Namespace(aa='rand-m9-mstd0.5-inc1', auto_resume=True, batch_size=128, cfg_path='iFormer_h.yaml', clip_grad=None, color_jitter=0.4, crop_pct=None, cutmix=1.0, cutmix_minmax=None, data_path='imagenet', data_set='IMNET', device='cuda', disable_eval=False, dist_backend='nccl', dist_eval=True, dist_on_itp=False, dist_url='env://', distillation_alpha=0.5, distillation_tau=1.0, distillation_type='none', distributed=True, drop_path=0.6, enable_wandb=False, epochs=300, eval=False, eval_data_path=None, finetune='', gpu=0, head_init_scale=1.0, imagenet_default_mean_and_std=True, input_size=224, layer_decay=1.0, layer_scale_init_value=1e-06, local_rank=-1, log_dir=None, lr=0.008, min_lr=1e-06, mixup=0.8, mixup_mode='batch', mixup_prob=1.0, mixup_switch_prob=0.5, model='iFormer_h', model_ema=True, model_ema_decay=0.9999, model_ema_eval=True, model_ema_force_cpu=False, model_key='model|module', model_prefix='', momentum=0.9, nb_classes=1000, num_workers=16, opt='adamw', opt_betas=None, opt_eps=1e-08, output_dir='', pin_mem=True, project='iFormer', rank=0, recount=1, remode='pixel', reprob=0.25, resplit=False, resume='', save_ckpt=True, save_ckpt_freq=1, save_ckpt_num=3, seed=0, smoothing=0.1, start_epoch=0, teacher_model='regnety_160', teacher_path='regnety_160-a5fe301d.pth', train_interpolation='bicubic', update_freq=1, use_amp=False, wandb_ckpt=False, warmup_epochs=20, warmup_steps=-1, weight_decay=0.05, weight_decay_end=None, world_size=64) Transform = RandomResizedCropAndInterpolation(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BICUBIC) RandomHorizontalFlip(p=0.5) ToTensor() Normalize(mean=tensor([0.4850, 0.4560, 0.4060]), std=tensor([0.2290, 0.2240, 0.2250])) --------------------------- reading from datapath imagenet Number of the class = 1000 Transform = Resize(size=256, interpolation=bicubic, max_size=None, antialias=warn) CenterCrop(size=(224, 224)) ToTensor() Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) --------------------------- reading from datapath imagenet Number of the class = 1000 Sampler_train = Warning: Enabling distributed evaluation with an eval dataset not divisible by process number. This will slightly alter validation results as extra duplicate entries are added to achieve equal num of samples per-process. Mixup is activated! Using EMA with decay = 0.99990000 Model = iFormer( (downsample_layers): ModuleList( (0): Sequential( (0): Conv2d_BN( (c): Conv2d(3, 48, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False) (bn): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): EdgeResidual( (conv_exp_bn1): Conv2d_BN( (c): Conv2d(48, 192, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (act): GELU(approximate='none') (conv_pwl_bn2): Conv2d_BN( (c): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) (1): Sequential( (0): Conv2d_BN( (c): Conv2d(96, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (2): Sequential( (0): Conv2d_BN( (c): Conv2d(192, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (3): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) (stages): ModuleList( (0): Sequential( (0): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96, bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(96, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): Identity() ) ) ) (1): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96, bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(96, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (2): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96, bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(96, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (3): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96, bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(96, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (4): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96, bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(96, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) ) (1): Sequential( (0): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192, bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (1): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192, bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (2): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192, bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (3): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192, bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (4): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192, bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) ) (2): Sequential( (0): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (1): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (2): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (3): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (4): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (5): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (6): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (7): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (8): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (9): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (10): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (11): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (12): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (13): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (14): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (15): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (16): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (17): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (18): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (19): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (20): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (21): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (22): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (23): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (24): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (25): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (26): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (27): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (28): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (29): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (30): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (31): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (32): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (33): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (34): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (35): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (36): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (37): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (38): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (39): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (40): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (41): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (42): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (43): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (44): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (45): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (46): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (47): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (48): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (49): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (50): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (51): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (52): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (53): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (54): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (55): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (56): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (57): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (58): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (59): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) ) (3): Sequential( (0): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (1): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(768, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (2): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(768, 3072, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(3072, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (3): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (4): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(768, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (5): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(768, 3072, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(3072, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (6): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (7): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(768, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (8): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(768, 3072, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(3072, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (9): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (10): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(768, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (11): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(768, 3072, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(3072, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (12): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (13): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(768, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (14): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(768, 3072, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(3072, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (15): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (16): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual_drop( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(768, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (17): BasicBlock( (block): FFN2d( (channel_mixer): Residual_drop( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(768, 3072, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(3072, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) ) ) (classifier): Classfier( (classifier): BN_Linear( (bn): BatchNorm1d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (l): Linear(in_features=768, out_features=1000, bias=True) ) ) ) number of params: 99596632 LR = 0.00800000 Batch size = 8192 Update frequent = 1 Number of training examples = 1281167 Number of training training per epoch = 156 Param groups = { "decay": { "weight_decay": 0.05, "params": [ "downsample_layers.0.0.c.weight", "downsample_layers.0.2.conv_exp_bn1.c.weight", "downsample_layers.0.2.conv_pwl_bn2.c.weight", "downsample_layers.1.0.c.weight", "downsample_layers.2.0.c.weight", "downsample_layers.3.0.c.weight", "stages.0.0.block.token_channel_mixer.gamma", "stages.0.0.block.token_channel_mixer.m.0.c.weight", "stages.0.0.block.token_channel_mixer.m.1.c.weight", "stages.0.0.block.token_channel_mixer.m.3.c.weight", "stages.0.1.block.token_channel_mixer.gamma", "stages.0.1.block.token_channel_mixer.m.0.c.weight", "stages.0.1.block.token_channel_mixer.m.1.c.weight", "stages.0.1.block.token_channel_mixer.m.3.c.weight", "stages.0.2.block.token_channel_mixer.gamma", "stages.0.2.block.token_channel_mixer.m.0.c.weight", "stages.0.2.block.token_channel_mixer.m.1.c.weight", "stages.0.2.block.token_channel_mixer.m.3.c.weight", "stages.0.3.block.token_channel_mixer.gamma", "stages.0.3.block.token_channel_mixer.m.0.c.weight", "stages.0.3.block.token_channel_mixer.m.1.c.weight", "stages.0.3.block.token_channel_mixer.m.3.c.weight", "stages.0.4.block.token_channel_mixer.gamma", "stages.0.4.block.token_channel_mixer.m.0.c.weight", "stages.0.4.block.token_channel_mixer.m.1.c.weight", "stages.0.4.block.token_channel_mixer.m.3.c.weight", "stages.1.0.block.token_channel_mixer.gamma", "stages.1.0.block.token_channel_mixer.m.0.c.weight", "stages.1.0.block.token_channel_mixer.m.1.c.weight", "stages.1.0.block.token_channel_mixer.m.3.c.weight", "stages.1.1.block.token_channel_mixer.gamma", "stages.1.1.block.token_channel_mixer.m.0.c.weight", "stages.1.1.block.token_channel_mixer.m.1.c.weight", "stages.1.1.block.token_channel_mixer.m.3.c.weight", "stages.1.2.block.token_channel_mixer.gamma", "stages.1.2.block.token_channel_mixer.m.0.c.weight", "stages.1.2.block.token_channel_mixer.m.1.c.weight", "stages.1.2.block.token_channel_mixer.m.3.c.weight", "stages.1.3.block.token_channel_mixer.gamma", "stages.1.3.block.token_channel_mixer.m.0.c.weight", "stages.1.3.block.token_channel_mixer.m.1.c.weight", "stages.1.3.block.token_channel_mixer.m.3.c.weight", "stages.1.4.block.token_channel_mixer.gamma", "stages.1.4.block.token_channel_mixer.m.0.c.weight", "stages.1.4.block.token_channel_mixer.m.1.c.weight", "stages.1.4.block.token_channel_mixer.m.3.c.weight", "stages.2.0.block.token_channel_mixer.gamma", "stages.2.0.block.token_channel_mixer.m.0.c.weight", "stages.2.0.block.token_channel_mixer.m.1.c.weight", "stages.2.0.block.token_channel_mixer.m.3.c.weight", "stages.2.1.block.token_channel_mixer.gamma", "stages.2.1.block.token_channel_mixer.m.0.c.weight", "stages.2.1.block.token_channel_mixer.m.1.c.weight", "stages.2.1.block.token_channel_mixer.m.3.c.weight", "stages.2.2.block.token_channel_mixer.gamma", "stages.2.2.block.token_channel_mixer.m.0.c.weight", "stages.2.2.block.token_channel_mixer.m.1.c.weight", "stages.2.2.block.token_channel_mixer.m.3.c.weight", "stages.2.3.block.token_channel_mixer.gamma", "stages.2.3.block.token_channel_mixer.m.0.c.weight", "stages.2.3.block.token_channel_mixer.m.1.c.weight", "stages.2.3.block.token_channel_mixer.m.3.c.weight", "stages.2.4.block.token_channel_mixer.gamma", "stages.2.4.block.token_channel_mixer.m.0.c.weight", "stages.2.4.block.token_channel_mixer.m.1.c.weight", "stages.2.4.block.token_channel_mixer.m.3.c.weight", "stages.2.5.block.token_channel_mixer.gamma", "stages.2.5.block.token_channel_mixer.m.0.c.weight", "stages.2.5.block.token_channel_mixer.m.1.c.weight", "stages.2.5.block.token_channel_mixer.m.3.c.weight", "stages.2.6.block.token_channel_mixer.gamma", "stages.2.6.block.token_channel_mixer.m.0.c.weight", "stages.2.6.block.token_channel_mixer.m.1.c.weight", "stages.2.6.block.token_channel_mixer.m.3.c.weight", "stages.2.7.block.token_channel_mixer.gamma", "stages.2.7.block.token_channel_mixer.m.0.c.weight", "stages.2.7.block.token_channel_mixer.m.1.c.weight", "stages.2.7.block.token_channel_mixer.m.3.c.weight", "stages.2.8.block.token_channel_mixer.gamma", "stages.2.8.block.token_channel_mixer.m.0.c.weight", "stages.2.8.block.token_channel_mixer.m.1.c.weight", "stages.2.8.block.token_channel_mixer.m.3.c.weight", "stages.2.9.block.token_channel_mixer.gamma", "stages.2.9.block.token_channel_mixer.m.0.c.weight", "stages.2.9.block.token_channel_mixer.m.1.c.weight", "stages.2.9.block.token_channel_mixer.m.3.c.weight", "stages.2.10.block.token_channel_mixer.gamma", "stages.2.10.block.token_channel_mixer.m.0.c.weight", "stages.2.10.block.token_channel_mixer.m.1.c.weight", "stages.2.10.block.token_channel_mixer.m.3.c.weight", "stages.2.11.block.token_channel_mixer.gamma", "stages.2.11.block.token_channel_mixer.m.0.c.weight", "stages.2.11.block.token_channel_mixer.m.1.c.weight", "stages.2.11.block.token_channel_mixer.m.3.c.weight", "stages.2.12.block.token_channel_mixer.gamma", "stages.2.12.block.token_channel_mixer.m.0.c.weight", "stages.2.12.block.token_channel_mixer.m.1.c.weight", "stages.2.12.block.token_channel_mixer.m.3.c.weight", "stages.2.13.block.token_channel_mixer.gamma", "stages.2.13.block.token_channel_mixer.m.0.c.weight", "stages.2.13.block.token_channel_mixer.m.1.c.weight", "stages.2.13.block.token_channel_mixer.m.3.c.weight", "stages.2.14.block.cpe.m.c.weight", "stages.2.15.block.token_channel_mixer.gamma", "stages.2.15.block.token_channel_mixer.m.q.c.weight", "stages.2.15.block.token_channel_mixer.m.k.c.weight", "stages.2.15.block.token_channel_mixer.m.proj.c.weight", "stages.2.15.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.16.block.channel_mixer.gamma", "stages.2.16.block.channel_mixer.m.0.c.weight", "stages.2.16.block.channel_mixer.m.2.c.weight", "stages.2.17.block.cpe.m.c.weight", "stages.2.18.block.token_channel_mixer.gamma", "stages.2.18.block.token_channel_mixer.m.q.c.weight", "stages.2.18.block.token_channel_mixer.m.k.c.weight", "stages.2.18.block.token_channel_mixer.m.proj.c.weight", "stages.2.18.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.19.block.channel_mixer.gamma", "stages.2.19.block.channel_mixer.m.0.c.weight", "stages.2.19.block.channel_mixer.m.2.c.weight", "stages.2.20.block.cpe.m.c.weight", "stages.2.21.block.token_channel_mixer.gamma", "stages.2.21.block.token_channel_mixer.m.q.c.weight", "stages.2.21.block.token_channel_mixer.m.k.c.weight", "stages.2.21.block.token_channel_mixer.m.proj.c.weight", "stages.2.21.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.22.block.channel_mixer.gamma", "stages.2.22.block.channel_mixer.m.0.c.weight", "stages.2.22.block.channel_mixer.m.2.c.weight", "stages.2.23.block.cpe.m.c.weight", "stages.2.24.block.token_channel_mixer.gamma", "stages.2.24.block.token_channel_mixer.m.q.c.weight", "stages.2.24.block.token_channel_mixer.m.k.c.weight", "stages.2.24.block.token_channel_mixer.m.proj.c.weight", "stages.2.24.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.25.block.channel_mixer.gamma", "stages.2.25.block.channel_mixer.m.0.c.weight", "stages.2.25.block.channel_mixer.m.2.c.weight", "stages.2.26.block.cpe.m.c.weight", "stages.2.27.block.token_channel_mixer.gamma", "stages.2.27.block.token_channel_mixer.m.q.c.weight", "stages.2.27.block.token_channel_mixer.m.k.c.weight", "stages.2.27.block.token_channel_mixer.m.proj.c.weight", "stages.2.27.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.28.block.channel_mixer.gamma", "stages.2.28.block.channel_mixer.m.0.c.weight", "stages.2.28.block.channel_mixer.m.2.c.weight", "stages.2.29.block.cpe.m.c.weight", "stages.2.30.block.token_channel_mixer.gamma", "stages.2.30.block.token_channel_mixer.m.q.c.weight", "stages.2.30.block.token_channel_mixer.m.k.c.weight", "stages.2.30.block.token_channel_mixer.m.proj.c.weight", "stages.2.30.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.31.block.channel_mixer.gamma", "stages.2.31.block.channel_mixer.m.0.c.weight", "stages.2.31.block.channel_mixer.m.2.c.weight", "stages.2.32.block.cpe.m.c.weight", "stages.2.33.block.token_channel_mixer.gamma", "stages.2.33.block.token_channel_mixer.m.q.c.weight", "stages.2.33.block.token_channel_mixer.m.k.c.weight", "stages.2.33.block.token_channel_mixer.m.proj.c.weight", "stages.2.33.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.34.block.channel_mixer.gamma", "stages.2.34.block.channel_mixer.m.0.c.weight", "stages.2.34.block.channel_mixer.m.2.c.weight", "stages.2.35.block.cpe.m.c.weight", "stages.2.36.block.token_channel_mixer.gamma", "stages.2.36.block.token_channel_mixer.m.q.c.weight", "stages.2.36.block.token_channel_mixer.m.k.c.weight", "stages.2.36.block.token_channel_mixer.m.proj.c.weight", "stages.2.36.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.37.block.channel_mixer.gamma", "stages.2.37.block.channel_mixer.m.0.c.weight", "stages.2.37.block.channel_mixer.m.2.c.weight", "stages.2.38.block.cpe.m.c.weight", "stages.2.39.block.token_channel_mixer.gamma", "stages.2.39.block.token_channel_mixer.m.q.c.weight", "stages.2.39.block.token_channel_mixer.m.k.c.weight", "stages.2.39.block.token_channel_mixer.m.proj.c.weight", "stages.2.39.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.40.block.channel_mixer.gamma", "stages.2.40.block.channel_mixer.m.0.c.weight", "stages.2.40.block.channel_mixer.m.2.c.weight", "stages.2.41.block.cpe.m.c.weight", "stages.2.42.block.token_channel_mixer.gamma", "stages.2.42.block.token_channel_mixer.m.q.c.weight", "stages.2.42.block.token_channel_mixer.m.k.c.weight", "stages.2.42.block.token_channel_mixer.m.proj.c.weight", "stages.2.42.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.43.block.channel_mixer.gamma", "stages.2.43.block.channel_mixer.m.0.c.weight", "stages.2.43.block.channel_mixer.m.2.c.weight", "stages.2.44.block.cpe.m.c.weight", "stages.2.45.block.token_channel_mixer.gamma", "stages.2.45.block.token_channel_mixer.m.q.c.weight", "stages.2.45.block.token_channel_mixer.m.k.c.weight", "stages.2.45.block.token_channel_mixer.m.proj.c.weight", "stages.2.45.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.46.block.channel_mixer.gamma", "stages.2.46.block.channel_mixer.m.0.c.weight", "stages.2.46.block.channel_mixer.m.2.c.weight", "stages.2.47.block.cpe.m.c.weight", "stages.2.48.block.token_channel_mixer.gamma", "stages.2.48.block.token_channel_mixer.m.q.c.weight", "stages.2.48.block.token_channel_mixer.m.k.c.weight", "stages.2.48.block.token_channel_mixer.m.proj.c.weight", "stages.2.48.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.49.block.channel_mixer.gamma", "stages.2.49.block.channel_mixer.m.0.c.weight", "stages.2.49.block.channel_mixer.m.2.c.weight", "stages.2.50.block.cpe.m.c.weight", "stages.2.51.block.token_channel_mixer.gamma", "stages.2.51.block.token_channel_mixer.m.q.c.weight", "stages.2.51.block.token_channel_mixer.m.k.c.weight", "stages.2.51.block.token_channel_mixer.m.proj.c.weight", "stages.2.51.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.52.block.channel_mixer.gamma", "stages.2.52.block.channel_mixer.m.0.c.weight", "stages.2.52.block.channel_mixer.m.2.c.weight", "stages.2.53.block.cpe.m.c.weight", "stages.2.54.block.token_channel_mixer.gamma", "stages.2.54.block.token_channel_mixer.m.q.c.weight", "stages.2.54.block.token_channel_mixer.m.k.c.weight", "stages.2.54.block.token_channel_mixer.m.proj.c.weight", "stages.2.54.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.55.block.channel_mixer.gamma", "stages.2.55.block.channel_mixer.m.0.c.weight", "stages.2.55.block.channel_mixer.m.2.c.weight", "stages.2.56.block.cpe.m.c.weight", "stages.2.57.block.token_channel_mixer.gamma", "stages.2.57.block.token_channel_mixer.m.q.c.weight", "stages.2.57.block.token_channel_mixer.m.k.c.weight", "stages.2.57.block.token_channel_mixer.m.proj.c.weight", "stages.2.57.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.58.block.channel_mixer.gamma", "stages.2.58.block.channel_mixer.m.0.c.weight", "stages.2.58.block.channel_mixer.m.2.c.weight", "stages.2.59.block.token_channel_mixer.gamma", "stages.2.59.block.token_channel_mixer.m.0.c.weight", "stages.2.59.block.token_channel_mixer.m.1.c.weight", "stages.2.59.block.token_channel_mixer.m.3.c.weight", "stages.3.0.block.cpe.m.c.weight", "stages.3.1.block.token_channel_mixer.gamma", "stages.3.1.block.token_channel_mixer.m.q.c.weight", "stages.3.1.block.token_channel_mixer.m.k.c.weight", "stages.3.1.block.token_channel_mixer.m.proj.c.weight", "stages.3.1.block.token_channel_mixer.m.v_gate.c.weight", "stages.3.2.block.channel_mixer.gamma", "stages.3.2.block.channel_mixer.m.0.c.weight", "stages.3.2.block.channel_mixer.m.2.c.weight", "stages.3.3.block.cpe.m.c.weight", "stages.3.4.block.token_channel_mixer.gamma", "stages.3.4.block.token_channel_mixer.m.q.c.weight", "stages.3.4.block.token_channel_mixer.m.k.c.weight", "stages.3.4.block.token_channel_mixer.m.proj.c.weight", "stages.3.4.block.token_channel_mixer.m.v_gate.c.weight", "stages.3.5.block.channel_mixer.gamma", "stages.3.5.block.channel_mixer.m.0.c.weight", "stages.3.5.block.channel_mixer.m.2.c.weight", "stages.3.6.block.cpe.m.c.weight", "stages.3.7.block.token_channel_mixer.gamma", "stages.3.7.block.token_channel_mixer.m.q.c.weight", "stages.3.7.block.token_channel_mixer.m.k.c.weight", "stages.3.7.block.token_channel_mixer.m.proj.c.weight", "stages.3.7.block.token_channel_mixer.m.v_gate.c.weight", "stages.3.8.block.channel_mixer.gamma", "stages.3.8.block.channel_mixer.m.0.c.weight", "stages.3.8.block.channel_mixer.m.2.c.weight", "stages.3.9.block.cpe.m.c.weight", "stages.3.10.block.token_channel_mixer.gamma", "stages.3.10.block.token_channel_mixer.m.q.c.weight", "stages.3.10.block.token_channel_mixer.m.k.c.weight", "stages.3.10.block.token_channel_mixer.m.proj.c.weight", "stages.3.10.block.token_channel_mixer.m.v_gate.c.weight", "stages.3.11.block.channel_mixer.gamma", "stages.3.11.block.channel_mixer.m.0.c.weight", "stages.3.11.block.channel_mixer.m.2.c.weight", "stages.3.12.block.cpe.m.c.weight", "stages.3.13.block.token_channel_mixer.gamma", "stages.3.13.block.token_channel_mixer.m.q.c.weight", "stages.3.13.block.token_channel_mixer.m.k.c.weight", "stages.3.13.block.token_channel_mixer.m.proj.c.weight", "stages.3.13.block.token_channel_mixer.m.v_gate.c.weight", "stages.3.14.block.channel_mixer.gamma", "stages.3.14.block.channel_mixer.m.0.c.weight", "stages.3.14.block.channel_mixer.m.2.c.weight", "stages.3.15.block.cpe.m.c.weight", "stages.3.16.block.token_channel_mixer.gamma", "stages.3.16.block.token_channel_mixer.m.q.c.weight", "stages.3.16.block.token_channel_mixer.m.k.c.weight", "stages.3.16.block.token_channel_mixer.m.proj.c.weight", "stages.3.16.block.token_channel_mixer.m.v_gate.c.weight", "stages.3.17.block.channel_mixer.gamma", "stages.3.17.block.channel_mixer.m.0.c.weight", "stages.3.17.block.channel_mixer.m.2.c.weight", "classifier.classifier.l.weight" ], "lr_scale": 1.0 }, "no_decay": { "weight_decay": 0.0, "params": [ "downsample_layers.0.0.bn.weight", "downsample_layers.0.0.bn.bias", "downsample_layers.0.2.conv_exp_bn1.bn.weight", "downsample_layers.0.2.conv_exp_bn1.bn.bias", "downsample_layers.0.2.conv_pwl_bn2.bn.weight", "downsample_layers.0.2.conv_pwl_bn2.bn.bias", "downsample_layers.1.0.bn.weight", "downsample_layers.1.0.bn.bias", "downsample_layers.2.0.bn.weight", "downsample_layers.2.0.bn.bias", "downsample_layers.3.0.bn.weight", "downsample_layers.3.0.bn.bias", "stages.0.0.block.token_channel_mixer.m.0.bn.weight", "stages.0.0.block.token_channel_mixer.m.0.bn.bias", "stages.0.0.block.token_channel_mixer.m.1.bn.weight", "stages.0.0.block.token_channel_mixer.m.1.bn.bias", "stages.0.0.block.token_channel_mixer.m.3.bn.weight", "stages.0.0.block.token_channel_mixer.m.3.bn.bias", "stages.0.1.block.token_channel_mixer.m.0.bn.weight", "stages.0.1.block.token_channel_mixer.m.0.bn.bias", "stages.0.1.block.token_channel_mixer.m.1.bn.weight", "stages.0.1.block.token_channel_mixer.m.1.bn.bias", "stages.0.1.block.token_channel_mixer.m.3.bn.weight", "stages.0.1.block.token_channel_mixer.m.3.bn.bias", "stages.0.2.block.token_channel_mixer.m.0.bn.weight", "stages.0.2.block.token_channel_mixer.m.0.bn.bias", "stages.0.2.block.token_channel_mixer.m.1.bn.weight", "stages.0.2.block.token_channel_mixer.m.1.bn.bias", "stages.0.2.block.token_channel_mixer.m.3.bn.weight", "stages.0.2.block.token_channel_mixer.m.3.bn.bias", "stages.0.3.block.token_channel_mixer.m.0.bn.weight", "stages.0.3.block.token_channel_mixer.m.0.bn.bias", "stages.0.3.block.token_channel_mixer.m.1.bn.weight", "stages.0.3.block.token_channel_mixer.m.1.bn.bias", "stages.0.3.block.token_channel_mixer.m.3.bn.weight", "stages.0.3.block.token_channel_mixer.m.3.bn.bias", "stages.0.4.block.token_channel_mixer.m.0.bn.weight", "stages.0.4.block.token_channel_mixer.m.0.bn.bias", "stages.0.4.block.token_channel_mixer.m.1.bn.weight", "stages.0.4.block.token_channel_mixer.m.1.bn.bias", "stages.0.4.block.token_channel_mixer.m.3.bn.weight", "stages.0.4.block.token_channel_mixer.m.3.bn.bias", "stages.1.0.block.token_channel_mixer.m.0.bn.weight", "stages.1.0.block.token_channel_mixer.m.0.bn.bias", "stages.1.0.block.token_channel_mixer.m.1.bn.weight", "stages.1.0.block.token_channel_mixer.m.1.bn.bias", "stages.1.0.block.token_channel_mixer.m.3.bn.weight", "stages.1.0.block.token_channel_mixer.m.3.bn.bias", "stages.1.1.block.token_channel_mixer.m.0.bn.weight", "stages.1.1.block.token_channel_mixer.m.0.bn.bias", "stages.1.1.block.token_channel_mixer.m.1.bn.weight", "stages.1.1.block.token_channel_mixer.m.1.bn.bias", "stages.1.1.block.token_channel_mixer.m.3.bn.weight", "stages.1.1.block.token_channel_mixer.m.3.bn.bias", "stages.1.2.block.token_channel_mixer.m.0.bn.weight", "stages.1.2.block.token_channel_mixer.m.0.bn.bias", "stages.1.2.block.token_channel_mixer.m.1.bn.weight", "stages.1.2.block.token_channel_mixer.m.1.bn.bias", "stages.1.2.block.token_channel_mixer.m.3.bn.weight", "stages.1.2.block.token_channel_mixer.m.3.bn.bias", "stages.1.3.block.token_channel_mixer.m.0.bn.weight", "stages.1.3.block.token_channel_mixer.m.0.bn.bias", "stages.1.3.block.token_channel_mixer.m.1.bn.weight", "stages.1.3.block.token_channel_mixer.m.1.bn.bias", "stages.1.3.block.token_channel_mixer.m.3.bn.weight", "stages.1.3.block.token_channel_mixer.m.3.bn.bias", "stages.1.4.block.token_channel_mixer.m.0.bn.weight", "stages.1.4.block.token_channel_mixer.m.0.bn.bias", "stages.1.4.block.token_channel_mixer.m.1.bn.weight", "stages.1.4.block.token_channel_mixer.m.1.bn.bias", "stages.1.4.block.token_channel_mixer.m.3.bn.weight", "stages.1.4.block.token_channel_mixer.m.3.bn.bias", "stages.2.0.block.token_channel_mixer.m.0.bn.weight", "stages.2.0.block.token_channel_mixer.m.0.bn.bias", "stages.2.0.block.token_channel_mixer.m.1.bn.weight", "stages.2.0.block.token_channel_mixer.m.1.bn.bias", "stages.2.0.block.token_channel_mixer.m.3.bn.weight", "stages.2.0.block.token_channel_mixer.m.3.bn.bias", "stages.2.1.block.token_channel_mixer.m.0.bn.weight", "stages.2.1.block.token_channel_mixer.m.0.bn.bias", "stages.2.1.block.token_channel_mixer.m.1.bn.weight", "stages.2.1.block.token_channel_mixer.m.1.bn.bias", "stages.2.1.block.token_channel_mixer.m.3.bn.weight", "stages.2.1.block.token_channel_mixer.m.3.bn.bias", "stages.2.2.block.token_channel_mixer.m.0.bn.weight", "stages.2.2.block.token_channel_mixer.m.0.bn.bias", "stages.2.2.block.token_channel_mixer.m.1.bn.weight", "stages.2.2.block.token_channel_mixer.m.1.bn.bias", "stages.2.2.block.token_channel_mixer.m.3.bn.weight", "stages.2.2.block.token_channel_mixer.m.3.bn.bias", "stages.2.3.block.token_channel_mixer.m.0.bn.weight", "stages.2.3.block.token_channel_mixer.m.0.bn.bias", "stages.2.3.block.token_channel_mixer.m.1.bn.weight", "stages.2.3.block.token_channel_mixer.m.1.bn.bias", "stages.2.3.block.token_channel_mixer.m.3.bn.weight", "stages.2.3.block.token_channel_mixer.m.3.bn.bias", "stages.2.4.block.token_channel_mixer.m.0.bn.weight", "stages.2.4.block.token_channel_mixer.m.0.bn.bias", "stages.2.4.block.token_channel_mixer.m.1.bn.weight", "stages.2.4.block.token_channel_mixer.m.1.bn.bias", "stages.2.4.block.token_channel_mixer.m.3.bn.weight", "stages.2.4.block.token_channel_mixer.m.3.bn.bias", "stages.2.5.block.token_channel_mixer.m.0.bn.weight", "stages.2.5.block.token_channel_mixer.m.0.bn.bias", "stages.2.5.block.token_channel_mixer.m.1.bn.weight", "stages.2.5.block.token_channel_mixer.m.1.bn.bias", "stages.2.5.block.token_channel_mixer.m.3.bn.weight", "stages.2.5.block.token_channel_mixer.m.3.bn.bias", "stages.2.6.block.token_channel_mixer.m.0.bn.weight", "stages.2.6.block.token_channel_mixer.m.0.bn.bias", "stages.2.6.block.token_channel_mixer.m.1.bn.weight", "stages.2.6.block.token_channel_mixer.m.1.bn.bias", "stages.2.6.block.token_channel_mixer.m.3.bn.weight", "stages.2.6.block.token_channel_mixer.m.3.bn.bias", "stages.2.7.block.token_channel_mixer.m.0.bn.weight", "stages.2.7.block.token_channel_mixer.m.0.bn.bias", "stages.2.7.block.token_channel_mixer.m.1.bn.weight", "stages.2.7.block.token_channel_mixer.m.1.bn.bias", "stages.2.7.block.token_channel_mixer.m.3.bn.weight", "stages.2.7.block.token_channel_mixer.m.3.bn.bias", "stages.2.8.block.token_channel_mixer.m.0.bn.weight", "stages.2.8.block.token_channel_mixer.m.0.bn.bias", "stages.2.8.block.token_channel_mixer.m.1.bn.weight", "stages.2.8.block.token_channel_mixer.m.1.bn.bias", "stages.2.8.block.token_channel_mixer.m.3.bn.weight", "stages.2.8.block.token_channel_mixer.m.3.bn.bias", "stages.2.9.block.token_channel_mixer.m.0.bn.weight", "stages.2.9.block.token_channel_mixer.m.0.bn.bias", "stages.2.9.block.token_channel_mixer.m.1.bn.weight", "stages.2.9.block.token_channel_mixer.m.1.bn.bias", "stages.2.9.block.token_channel_mixer.m.3.bn.weight", "stages.2.9.block.token_channel_mixer.m.3.bn.bias", "stages.2.10.block.token_channel_mixer.m.0.bn.weight", "stages.2.10.block.token_channel_mixer.m.0.bn.bias", "stages.2.10.block.token_channel_mixer.m.1.bn.weight", "stages.2.10.block.token_channel_mixer.m.1.bn.bias", "stages.2.10.block.token_channel_mixer.m.3.bn.weight", "stages.2.10.block.token_channel_mixer.m.3.bn.bias", "stages.2.11.block.token_channel_mixer.m.0.bn.weight", "stages.2.11.block.token_channel_mixer.m.0.bn.bias", "stages.2.11.block.token_channel_mixer.m.1.bn.weight", "stages.2.11.block.token_channel_mixer.m.1.bn.bias", "stages.2.11.block.token_channel_mixer.m.3.bn.weight", "stages.2.11.block.token_channel_mixer.m.3.bn.bias", "stages.2.12.block.token_channel_mixer.m.0.bn.weight", "stages.2.12.block.token_channel_mixer.m.0.bn.bias", "stages.2.12.block.token_channel_mixer.m.1.bn.weight", "stages.2.12.block.token_channel_mixer.m.1.bn.bias", "stages.2.12.block.token_channel_mixer.m.3.bn.weight", "stages.2.12.block.token_channel_mixer.m.3.bn.bias", "stages.2.13.block.token_channel_mixer.m.0.bn.weight", "stages.2.13.block.token_channel_mixer.m.0.bn.bias", "stages.2.13.block.token_channel_mixer.m.1.bn.weight", "stages.2.13.block.token_channel_mixer.m.1.bn.bias", "stages.2.13.block.token_channel_mixer.m.3.bn.weight", "stages.2.13.block.token_channel_mixer.m.3.bn.bias", "stages.2.14.block.cpe.m.bn.weight", "stages.2.14.block.cpe.m.bn.bias", "stages.2.15.block.token_channel_mixer.m.q.bn.weight", "stages.2.15.block.token_channel_mixer.m.q.bn.bias", "stages.2.15.block.token_channel_mixer.m.k.bn.weight", "stages.2.15.block.token_channel_mixer.m.k.bn.bias", "stages.2.15.block.token_channel_mixer.m.proj.bn.weight", "stages.2.15.block.token_channel_mixer.m.proj.bn.bias", "stages.2.15.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.15.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.16.block.channel_mixer.m.0.bn.weight", "stages.2.16.block.channel_mixer.m.0.bn.bias", "stages.2.16.block.channel_mixer.m.2.bn.weight", "stages.2.16.block.channel_mixer.m.2.bn.bias", "stages.2.17.block.cpe.m.bn.weight", "stages.2.17.block.cpe.m.bn.bias", "stages.2.18.block.token_channel_mixer.m.q.bn.weight", "stages.2.18.block.token_channel_mixer.m.q.bn.bias", "stages.2.18.block.token_channel_mixer.m.k.bn.weight", "stages.2.18.block.token_channel_mixer.m.k.bn.bias", "stages.2.18.block.token_channel_mixer.m.proj.bn.weight", "stages.2.18.block.token_channel_mixer.m.proj.bn.bias", "stages.2.18.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.18.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.19.block.channel_mixer.m.0.bn.weight", "stages.2.19.block.channel_mixer.m.0.bn.bias", "stages.2.19.block.channel_mixer.m.2.bn.weight", "stages.2.19.block.channel_mixer.m.2.bn.bias", "stages.2.20.block.cpe.m.bn.weight", "stages.2.20.block.cpe.m.bn.bias", "stages.2.21.block.token_channel_mixer.m.q.bn.weight", "stages.2.21.block.token_channel_mixer.m.q.bn.bias", "stages.2.21.block.token_channel_mixer.m.k.bn.weight", "stages.2.21.block.token_channel_mixer.m.k.bn.bias", "stages.2.21.block.token_channel_mixer.m.proj.bn.weight", "stages.2.21.block.token_channel_mixer.m.proj.bn.bias", "stages.2.21.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.21.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.22.block.channel_mixer.m.0.bn.weight", "stages.2.22.block.channel_mixer.m.0.bn.bias", "stages.2.22.block.channel_mixer.m.2.bn.weight", "stages.2.22.block.channel_mixer.m.2.bn.bias", "stages.2.23.block.cpe.m.bn.weight", "stages.2.23.block.cpe.m.bn.bias", "stages.2.24.block.token_channel_mixer.m.q.bn.weight", "stages.2.24.block.token_channel_mixer.m.q.bn.bias", "stages.2.24.block.token_channel_mixer.m.k.bn.weight", "stages.2.24.block.token_channel_mixer.m.k.bn.bias", "stages.2.24.block.token_channel_mixer.m.proj.bn.weight", "stages.2.24.block.token_channel_mixer.m.proj.bn.bias", "stages.2.24.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.24.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.25.block.channel_mixer.m.0.bn.weight", "stages.2.25.block.channel_mixer.m.0.bn.bias", "stages.2.25.block.channel_mixer.m.2.bn.weight", "stages.2.25.block.channel_mixer.m.2.bn.bias", "stages.2.26.block.cpe.m.bn.weight", "stages.2.26.block.cpe.m.bn.bias", "stages.2.27.block.token_channel_mixer.m.q.bn.weight", "stages.2.27.block.token_channel_mixer.m.q.bn.bias", "stages.2.27.block.token_channel_mixer.m.k.bn.weight", "stages.2.27.block.token_channel_mixer.m.k.bn.bias", "stages.2.27.block.token_channel_mixer.m.proj.bn.weight", "stages.2.27.block.token_channel_mixer.m.proj.bn.bias", "stages.2.27.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.27.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.28.block.channel_mixer.m.0.bn.weight", "stages.2.28.block.channel_mixer.m.0.bn.bias", "stages.2.28.block.channel_mixer.m.2.bn.weight", "stages.2.28.block.channel_mixer.m.2.bn.bias", "stages.2.29.block.cpe.m.bn.weight", "stages.2.29.block.cpe.m.bn.bias", "stages.2.30.block.token_channel_mixer.m.q.bn.weight", "stages.2.30.block.token_channel_mixer.m.q.bn.bias", "stages.2.30.block.token_channel_mixer.m.k.bn.weight", "stages.2.30.block.token_channel_mixer.m.k.bn.bias", "stages.2.30.block.token_channel_mixer.m.proj.bn.weight", "stages.2.30.block.token_channel_mixer.m.proj.bn.bias", "stages.2.30.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.30.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.31.block.channel_mixer.m.0.bn.weight", "stages.2.31.block.channel_mixer.m.0.bn.bias", "stages.2.31.block.channel_mixer.m.2.bn.weight", "stages.2.31.block.channel_mixer.m.2.bn.bias", "stages.2.32.block.cpe.m.bn.weight", "stages.2.32.block.cpe.m.bn.bias", "stages.2.33.block.token_channel_mixer.m.q.bn.weight", "stages.2.33.block.token_channel_mixer.m.q.bn.bias", "stages.2.33.block.token_channel_mixer.m.k.bn.weight", "stages.2.33.block.token_channel_mixer.m.k.bn.bias", "stages.2.33.block.token_channel_mixer.m.proj.bn.weight", "stages.2.33.block.token_channel_mixer.m.proj.bn.bias", "stages.2.33.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.33.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.34.block.channel_mixer.m.0.bn.weight", "stages.2.34.block.channel_mixer.m.0.bn.bias", "stages.2.34.block.channel_mixer.m.2.bn.weight", "stages.2.34.block.channel_mixer.m.2.bn.bias", "stages.2.35.block.cpe.m.bn.weight", "stages.2.35.block.cpe.m.bn.bias", "stages.2.36.block.token_channel_mixer.m.q.bn.weight", "stages.2.36.block.token_channel_mixer.m.q.bn.bias", "stages.2.36.block.token_channel_mixer.m.k.bn.weight", "stages.2.36.block.token_channel_mixer.m.k.bn.bias", "stages.2.36.block.token_channel_mixer.m.proj.bn.weight", "stages.2.36.block.token_channel_mixer.m.proj.bn.bias", "stages.2.36.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.36.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.37.block.channel_mixer.m.0.bn.weight", "stages.2.37.block.channel_mixer.m.0.bn.bias", "stages.2.37.block.channel_mixer.m.2.bn.weight", "stages.2.37.block.channel_mixer.m.2.bn.bias", "stages.2.38.block.cpe.m.bn.weight", "stages.2.38.block.cpe.m.bn.bias", "stages.2.39.block.token_channel_mixer.m.q.bn.weight", "stages.2.39.block.token_channel_mixer.m.q.bn.bias", "stages.2.39.block.token_channel_mixer.m.k.bn.weight", "stages.2.39.block.token_channel_mixer.m.k.bn.bias", "stages.2.39.block.token_channel_mixer.m.proj.bn.weight", "stages.2.39.block.token_channel_mixer.m.proj.bn.bias", "stages.2.39.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.39.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.40.block.channel_mixer.m.0.bn.weight", "stages.2.40.block.channel_mixer.m.0.bn.bias", "stages.2.40.block.channel_mixer.m.2.bn.weight", "stages.2.40.block.channel_mixer.m.2.bn.bias", "stages.2.41.block.cpe.m.bn.weight", "stages.2.41.block.cpe.m.bn.bias", "stages.2.42.block.token_channel_mixer.m.q.bn.weight", "stages.2.42.block.token_channel_mixer.m.q.bn.bias", "stages.2.42.block.token_channel_mixer.m.k.bn.weight", "stages.2.42.block.token_channel_mixer.m.k.bn.bias", "stages.2.42.block.token_channel_mixer.m.proj.bn.weight", "stages.2.42.block.token_channel_mixer.m.proj.bn.bias", "stages.2.42.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.42.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.43.block.channel_mixer.m.0.bn.weight", "stages.2.43.block.channel_mixer.m.0.bn.bias", "stages.2.43.block.channel_mixer.m.2.bn.weight", "stages.2.43.block.channel_mixer.m.2.bn.bias", "stages.2.44.block.cpe.m.bn.weight", "stages.2.44.block.cpe.m.bn.bias", "stages.2.45.block.token_channel_mixer.m.q.bn.weight", "stages.2.45.block.token_channel_mixer.m.q.bn.bias", "stages.2.45.block.token_channel_mixer.m.k.bn.weight", "stages.2.45.block.token_channel_mixer.m.k.bn.bias", "stages.2.45.block.token_channel_mixer.m.proj.bn.weight", "stages.2.45.block.token_channel_mixer.m.proj.bn.bias", "stages.2.45.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.45.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.46.block.channel_mixer.m.0.bn.weight", "stages.2.46.block.channel_mixer.m.0.bn.bias", "stages.2.46.block.channel_mixer.m.2.bn.weight", "stages.2.46.block.channel_mixer.m.2.bn.bias", "stages.2.47.block.cpe.m.bn.weight", "stages.2.47.block.cpe.m.bn.bias", "stages.2.48.block.token_channel_mixer.m.q.bn.weight", "stages.2.48.block.token_channel_mixer.m.q.bn.bias", "stages.2.48.block.token_channel_mixer.m.k.bn.weight", "stages.2.48.block.token_channel_mixer.m.k.bn.bias", "stages.2.48.block.token_channel_mixer.m.proj.bn.weight", "stages.2.48.block.token_channel_mixer.m.proj.bn.bias", "stages.2.48.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.48.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.49.block.channel_mixer.m.0.bn.weight", "stages.2.49.block.channel_mixer.m.0.bn.bias", "stages.2.49.block.channel_mixer.m.2.bn.weight", "stages.2.49.block.channel_mixer.m.2.bn.bias", "stages.2.50.block.cpe.m.bn.weight", "stages.2.50.block.cpe.m.bn.bias", "stages.2.51.block.token_channel_mixer.m.q.bn.weight", "stages.2.51.block.token_channel_mixer.m.q.bn.bias", "stages.2.51.block.token_channel_mixer.m.k.bn.weight", "stages.2.51.block.token_channel_mixer.m.k.bn.bias", "stages.2.51.block.token_channel_mixer.m.proj.bn.weight", "stages.2.51.block.token_channel_mixer.m.proj.bn.bias", "stages.2.51.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.51.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.52.block.channel_mixer.m.0.bn.weight", "stages.2.52.block.channel_mixer.m.0.bn.bias", "stages.2.52.block.channel_mixer.m.2.bn.weight", "stages.2.52.block.channel_mixer.m.2.bn.bias", "stages.2.53.block.cpe.m.bn.weight", "stages.2.53.block.cpe.m.bn.bias", "stages.2.54.block.token_channel_mixer.m.q.bn.weight", "stages.2.54.block.token_channel_mixer.m.q.bn.bias", "stages.2.54.block.token_channel_mixer.m.k.bn.weight", "stages.2.54.block.token_channel_mixer.m.k.bn.bias", "stages.2.54.block.token_channel_mixer.m.proj.bn.weight", "stages.2.54.block.token_channel_mixer.m.proj.bn.bias", "stages.2.54.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.54.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.55.block.channel_mixer.m.0.bn.weight", "stages.2.55.block.channel_mixer.m.0.bn.bias", "stages.2.55.block.channel_mixer.m.2.bn.weight", "stages.2.55.block.channel_mixer.m.2.bn.bias", "stages.2.56.block.cpe.m.bn.weight", "stages.2.56.block.cpe.m.bn.bias", "stages.2.57.block.token_channel_mixer.m.q.bn.weight", "stages.2.57.block.token_channel_mixer.m.q.bn.bias", "stages.2.57.block.token_channel_mixer.m.k.bn.weight", "stages.2.57.block.token_channel_mixer.m.k.bn.bias", "stages.2.57.block.token_channel_mixer.m.proj.bn.weight", "stages.2.57.block.token_channel_mixer.m.proj.bn.bias", "stages.2.57.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.57.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.58.block.channel_mixer.m.0.bn.weight", "stages.2.58.block.channel_mixer.m.0.bn.bias", "stages.2.58.block.channel_mixer.m.2.bn.weight", "stages.2.58.block.channel_mixer.m.2.bn.bias", "stages.2.59.block.token_channel_mixer.m.0.bn.weight", "stages.2.59.block.token_channel_mixer.m.0.bn.bias", "stages.2.59.block.token_channel_mixer.m.1.bn.weight", "stages.2.59.block.token_channel_mixer.m.1.bn.bias", "stages.2.59.block.token_channel_mixer.m.3.bn.weight", "stages.2.59.block.token_channel_mixer.m.3.bn.bias", "stages.3.0.block.cpe.m.bn.weight", "stages.3.0.block.cpe.m.bn.bias", "stages.3.1.block.token_channel_mixer.m.q.bn.weight", "stages.3.1.block.token_channel_mixer.m.q.bn.bias", "stages.3.1.block.token_channel_mixer.m.k.bn.weight", "stages.3.1.block.token_channel_mixer.m.k.bn.bias", "stages.3.1.block.token_channel_mixer.m.proj.bn.weight", "stages.3.1.block.token_channel_mixer.m.proj.bn.bias", "stages.3.1.block.token_channel_mixer.m.v_gate.bn.weight", "stages.3.1.block.token_channel_mixer.m.v_gate.bn.bias", "stages.3.2.block.channel_mixer.m.0.bn.weight", "stages.3.2.block.channel_mixer.m.0.bn.bias", "stages.3.2.block.channel_mixer.m.2.bn.weight", "stages.3.2.block.channel_mixer.m.2.bn.bias", "stages.3.3.block.cpe.m.bn.weight", "stages.3.3.block.cpe.m.bn.bias", "stages.3.4.block.token_channel_mixer.m.q.bn.weight", "stages.3.4.block.token_channel_mixer.m.q.bn.bias", "stages.3.4.block.token_channel_mixer.m.k.bn.weight", "stages.3.4.block.token_channel_mixer.m.k.bn.bias", "stages.3.4.block.token_channel_mixer.m.proj.bn.weight", "stages.3.4.block.token_channel_mixer.m.proj.bn.bias", "stages.3.4.block.token_channel_mixer.m.v_gate.bn.weight", "stages.3.4.block.token_channel_mixer.m.v_gate.bn.bias", "stages.3.5.block.channel_mixer.m.0.bn.weight", "stages.3.5.block.channel_mixer.m.0.bn.bias", "stages.3.5.block.channel_mixer.m.2.bn.weight", "stages.3.5.block.channel_mixer.m.2.bn.bias", "stages.3.6.block.cpe.m.bn.weight", "stages.3.6.block.cpe.m.bn.bias", "stages.3.7.block.token_channel_mixer.m.q.bn.weight", "stages.3.7.block.token_channel_mixer.m.q.bn.bias", "stages.3.7.block.token_channel_mixer.m.k.bn.weight", "stages.3.7.block.token_channel_mixer.m.k.bn.bias", "stages.3.7.block.token_channel_mixer.m.proj.bn.weight", "stages.3.7.block.token_channel_mixer.m.proj.bn.bias", "stages.3.7.block.token_channel_mixer.m.v_gate.bn.weight", "stages.3.7.block.token_channel_mixer.m.v_gate.bn.bias", "stages.3.8.block.channel_mixer.m.0.bn.weight", "stages.3.8.block.channel_mixer.m.0.bn.bias", "stages.3.8.block.channel_mixer.m.2.bn.weight", "stages.3.8.block.channel_mixer.m.2.bn.bias", "stages.3.9.block.cpe.m.bn.weight", "stages.3.9.block.cpe.m.bn.bias", "stages.3.10.block.token_channel_mixer.m.q.bn.weight", "stages.3.10.block.token_channel_mixer.m.q.bn.bias", "stages.3.10.block.token_channel_mixer.m.k.bn.weight", "stages.3.10.block.token_channel_mixer.m.k.bn.bias", "stages.3.10.block.token_channel_mixer.m.proj.bn.weight", "stages.3.10.block.token_channel_mixer.m.proj.bn.bias", "stages.3.10.block.token_channel_mixer.m.v_gate.bn.weight", "stages.3.10.block.token_channel_mixer.m.v_gate.bn.bias", "stages.3.11.block.channel_mixer.m.0.bn.weight", "stages.3.11.block.channel_mixer.m.0.bn.bias", "stages.3.11.block.channel_mixer.m.2.bn.weight", "stages.3.11.block.channel_mixer.m.2.bn.bias", "stages.3.12.block.cpe.m.bn.weight", "stages.3.12.block.cpe.m.bn.bias", "stages.3.13.block.token_channel_mixer.m.q.bn.weight", "stages.3.13.block.token_channel_mixer.m.q.bn.bias", "stages.3.13.block.token_channel_mixer.m.k.bn.weight", "stages.3.13.block.token_channel_mixer.m.k.bn.bias", "stages.3.13.block.token_channel_mixer.m.proj.bn.weight", "stages.3.13.block.token_channel_mixer.m.proj.bn.bias", "stages.3.13.block.token_channel_mixer.m.v_gate.bn.weight", "stages.3.13.block.token_channel_mixer.m.v_gate.bn.bias", "stages.3.14.block.channel_mixer.m.0.bn.weight", "stages.3.14.block.channel_mixer.m.0.bn.bias", "stages.3.14.block.channel_mixer.m.2.bn.weight", "stages.3.14.block.channel_mixer.m.2.bn.bias", "stages.3.15.block.cpe.m.bn.weight", "stages.3.15.block.cpe.m.bn.bias", "stages.3.16.block.token_channel_mixer.m.q.bn.weight", "stages.3.16.block.token_channel_mixer.m.q.bn.bias", "stages.3.16.block.token_channel_mixer.m.k.bn.weight", "stages.3.16.block.token_channel_mixer.m.k.bn.bias", "stages.3.16.block.token_channel_mixer.m.proj.bn.weight", "stages.3.16.block.token_channel_mixer.m.proj.bn.bias", "stages.3.16.block.token_channel_mixer.m.v_gate.bn.weight", "stages.3.16.block.token_channel_mixer.m.v_gate.bn.bias", "stages.3.17.block.channel_mixer.m.0.bn.weight", "stages.3.17.block.channel_mixer.m.0.bn.bias", "stages.3.17.block.channel_mixer.m.2.bn.weight", "stages.3.17.block.channel_mixer.m.2.bn.bias", "classifier.classifier.bn.weight", "classifier.classifier.bn.bias", "classifier.classifier.l.bias" ], "lr_scale": 1.0 } } Use Cosine LR scheduler Set warmup steps = 3120 Set warmup steps = 0 Max WD = 0.0500000, Min WD = 0.0500000 criterion = SoftTargetCrossEntropy() Auto resume checkpoint: Start training for 300 epochs Epoch: [0] [ 0/156] eta: 1:39:53 lr: 0.000000 min_lr: 0.000000 loss: 7.1039 (7.1039) weight_decay: 0.0500 (0.0500) time: 38.4193 data: 3.7894 max mem: 57114 Epoch: [0] [ 10/156] eta: 0:13:49 lr: 0.000026 min_lr: 0.000026 loss: 7.0620 (7.0630) weight_decay: 0.0500 (0.0500) time: 5.6824 data: 0.3449 max mem: 57114 Epoch: [0] [ 20/156] eta: 0:08:13 lr: 0.000051 min_lr: 0.000051 loss: 7.0482 (7.0528) weight_decay: 0.0500 (0.0500) time: 1.8881 data: 0.0004 max mem: 57114 Epoch: [0] [ 30/156] eta: 0:06:04 lr: 0.000077 min_lr: 0.000077 loss: 7.0064 (7.0232) weight_decay: 0.0500 (0.0500) time: 1.3567 data: 0.0004 max mem: 57114 Epoch: [0] [ 40/156] eta: 0:04:43 lr: 0.000103 min_lr: 0.000103 loss: 6.9370 (7.0009) weight_decay: 0.0500 (0.0500) time: 1.1988 data: 0.0005 max mem: 57114 Epoch: [0] [ 50/156] eta: 0:03:42 lr: 0.000128 min_lr: 0.000128 loss: 6.9130 (6.9794) weight_decay: 0.0500 (0.0500) time: 0.8605 data: 0.0004 max mem: 57114 Epoch: [0] [ 60/156] eta: 0:02:58 lr: 0.000154 min_lr: 0.000154 loss: 6.8703 (6.9557) weight_decay: 0.0500 (0.0500) time: 0.6704 data: 0.0004 max mem: 57114 Epoch: [0] [ 70/156] eta: 0:02:25 lr: 0.000180 min_lr: 0.000180 loss: 6.8283 (6.9370) weight_decay: 0.0500 (0.0500) time: 0.6716 data: 0.0004 max mem: 57114 Epoch: [0] [ 80/156] eta: 0:01:59 lr: 0.000205 min_lr: 0.000205 loss: 6.8090 (6.9192) weight_decay: 0.0500 (0.0500) time: 0.6742 data: 0.0004 max mem: 57114 Epoch: [0] [ 90/156] eta: 0:01:37 lr: 0.000231 min_lr: 0.000231 loss: 6.7934 (6.9051) weight_decay: 0.0500 (0.0500) time: 0.6845 data: 0.0004 max mem: 57114 Epoch: [0] [100/156] eta: 0:01:18 lr: 0.000256 min_lr: 0.000256 loss: 6.7964 (6.8928) weight_decay: 0.0500 (0.0500) time: 0.6841 data: 0.0004 max mem: 57114 Epoch: [0] [110/156] eta: 0:01:01 lr: 0.000282 min_lr: 0.000282 loss: 6.7762 (6.8792) weight_decay: 0.0500 (0.0500) time: 0.6761 data: 0.0004 max mem: 57114 Epoch: [0] [120/156] eta: 0:00:45 lr: 0.000308 min_lr: 0.000308 loss: 6.7329 (6.8658) weight_decay: 0.0500 (0.0500) time: 0.6750 data: 0.0004 max mem: 57114 Epoch: [0] [130/156] eta: 0:00:31 lr: 0.000333 min_lr: 0.000333 loss: 6.6943 (6.8526) weight_decay: 0.0500 (0.0500) time: 0.6743 data: 0.0009 max mem: 57114 Epoch: [0] [140/156] eta: 0:00:19 lr: 0.000359 min_lr: 0.000359 loss: 6.6845 (6.8383) weight_decay: 0.0500 (0.0500) time: 0.6695 data: 0.0007 max mem: 57114 Epoch: [0] [150/156] eta: 0:00:06 lr: 0.000385 min_lr: 0.000385 loss: 6.6398 (6.8251) weight_decay: 0.0500 (0.0500) time: 0.6648 data: 0.0001 max mem: 57114 Epoch: [0] [155/156] eta: 0:00:01 lr: 0.000398 min_lr: 0.000398 loss: 6.6470 (6.8193) weight_decay: 0.0500 (0.0500) time: 0.6691 data: 0.0001 max mem: 57114 Epoch: [0] Total time: 0:02:58 (1.1420 s / it) Averaged stats: lr: 0.000398 min_lr: 0.000398 loss: 6.6470 (6.8177) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:27 loss: 5.8772 (5.8772) acc1: 2.6042 (2.6042) acc5: 12.5000 (12.5000) time: 5.4747 data: 1.8190 max mem: 57114 Test: [4/5] eta: 0:00:01 loss: 5.9074 (5.7426) acc1: 4.6875 (4.0921) acc5: 12.5000 (12.7877) time: 1.3068 data: 0.3640 max mem: 57114 Test: Total time: 0:00:06 (1.3182 s / it) * Acc@1 3.587 Acc@5 10.746 loss 5.833 Accuracy of the model on the 50000 test images: 3.6% Max accuracy: 3.59% Test: [0/5] eta: 0:00:12 loss: 6.9078 (6.9078) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 2.4422 data: 1.8873 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9078 (6.9079) acc1: 0.0000 (0.1279) acc5: 0.5208 (0.5115) time: 0.6391 data: 0.3776 max mem: 57114 Test: Total time: 0:00:03 (0.6505 s / it) * Acc@1 0.104 Acc@5 0.523 loss 6.908 Accuracy of the model EMA on 50000 test images: 0.1% Max EMA accuracy: 0.10% Epoch: [1] [ 0/156] eta: 0:26:53 lr: 0.000400 min_lr: 0.000400 loss: 6.5778 (6.5778) weight_decay: 0.0500 (0.0500) time: 10.3454 data: 3.3689 max mem: 57114 Epoch: [1] [ 10/156] eta: 0:05:11 lr: 0.000426 min_lr: 0.000426 loss: 6.6556 (6.6238) weight_decay: 0.0500 (0.0500) time: 2.1363 data: 0.3067 max mem: 57114 Epoch: [1] [ 20/156] eta: 0:03:57 lr: 0.000451 min_lr: 0.000451 loss: 6.6123 (6.5961) weight_decay: 0.0500 (0.0500) time: 1.3144 data: 0.0004 max mem: 57114 Epoch: [1] [ 30/156] eta: 0:03:22 lr: 0.000477 min_lr: 0.000477 loss: 6.5493 (6.5990) weight_decay: 0.0500 (0.0500) time: 1.3129 data: 0.0003 max mem: 57114 Epoch: [1] [ 40/156] eta: 0:02:47 lr: 0.000503 min_lr: 0.000503 loss: 6.5787 (6.5863) weight_decay: 0.0500 (0.0500) time: 1.1293 data: 0.0004 max mem: 57114 Epoch: [1] [ 50/156] eta: 0:02:17 lr: 0.000528 min_lr: 0.000528 loss: 6.4575 (6.5580) weight_decay: 0.0500 (0.0500) time: 0.8211 data: 0.0004 max mem: 57114 Epoch: [1] [ 60/156] eta: 0:01:55 lr: 0.000554 min_lr: 0.000554 loss: 6.4459 (6.5423) weight_decay: 0.0500 (0.0500) time: 0.6933 data: 0.0003 max mem: 57114 Epoch: [1] [ 70/156] eta: 0:01:37 lr: 0.000580 min_lr: 0.000580 loss: 6.4919 (6.5374) weight_decay: 0.0500 (0.0500) time: 0.6980 data: 0.0005 max mem: 57114 Epoch: [1] [ 80/156] eta: 0:01:21 lr: 0.000605 min_lr: 0.000605 loss: 6.4891 (6.5235) weight_decay: 0.0500 (0.0500) time: 0.6913 data: 0.0005 max mem: 57114 Epoch: [1] [ 90/156] eta: 0:01:07 lr: 0.000631 min_lr: 0.000631 loss: 6.4321 (6.5096) weight_decay: 0.0500 (0.0500) time: 0.6753 data: 0.0003 max mem: 57114 Epoch: [1] [100/156] eta: 0:00:55 lr: 0.000657 min_lr: 0.000657 loss: 6.3939 (6.4890) weight_decay: 0.0500 (0.0500) time: 0.6728 data: 0.0003 max mem: 57114 Epoch: [1] [110/156] eta: 0:00:44 lr: 0.000682 min_lr: 0.000682 loss: 6.2998 (6.4741) weight_decay: 0.0500 (0.0500) time: 0.6755 data: 0.0004 max mem: 57114 Epoch: [1] [120/156] eta: 0:00:33 lr: 0.000708 min_lr: 0.000708 loss: 6.4889 (6.4771) weight_decay: 0.0500 (0.0500) time: 0.6786 data: 0.0003 max mem: 57114 Epoch: [1] [130/156] eta: 0:00:23 lr: 0.000734 min_lr: 0.000734 loss: 6.4917 (6.4677) weight_decay: 0.0500 (0.0500) time: 0.6735 data: 0.0004 max mem: 57114 Epoch: [1] [140/156] eta: 0:00:14 lr: 0.000759 min_lr: 0.000759 loss: 6.2513 (6.4513) weight_decay: 0.0500 (0.0500) time: 0.6699 data: 0.0003 max mem: 57114 Epoch: [1] [150/156] eta: 0:00:05 lr: 0.000785 min_lr: 0.000785 loss: 6.2406 (6.4399) weight_decay: 0.0500 (0.0500) time: 0.6670 data: 0.0001 max mem: 57114 Epoch: [1] [155/156] eta: 0:00:00 lr: 0.000798 min_lr: 0.000798 loss: 6.2406 (6.4370) weight_decay: 0.0500 (0.0500) time: 0.6667 data: 0.0001 max mem: 57114 Epoch: [1] Total time: 0:02:17 (0.8824 s / it) Averaged stats: lr: 0.000798 min_lr: 0.000798 loss: 6.2406 (6.4632) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 4.8223 (4.8223) acc1: 9.8958 (9.8958) acc5: 31.2500 (31.2500) time: 2.0794 data: 1.8237 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 4.9562 (4.8510) acc1: 10.9375 (11.2532) acc5: 26.5625 (26.5985) time: 0.5665 data: 0.3648 max mem: 57114 Test: Total time: 0:00:02 (0.5876 s / it) * Acc@1 9.883 Acc@5 24.692 loss 4.918 Accuracy of the model on the 50000 test images: 9.9% Max accuracy: 9.88% Test: [0/5] eta: 0:00:10 loss: 6.9077 (6.9077) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.0879 data: 1.8443 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9079 (6.9079) acc1: 0.0000 (0.1279) acc5: 0.5208 (0.6394) time: 0.5682 data: 0.3690 max mem: 57114 Test: Total time: 0:00:02 (0.5821 s / it) * Acc@1 0.094 Acc@5 0.478 loss 6.908 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [2] [ 0/156] eta: 0:22:07 lr: 0.000800 min_lr: 0.000800 loss: 6.5611 (6.5611) weight_decay: 0.0500 (0.0500) time: 8.5087 data: 4.1362 max mem: 57114 Epoch: [2] [ 10/156] eta: 0:03:23 lr: 0.000826 min_lr: 0.000826 loss: 6.5089 (6.4570) weight_decay: 0.0500 (0.0500) time: 1.3905 data: 0.3763 max mem: 57114 Epoch: [2] [ 20/156] eta: 0:02:24 lr: 0.000852 min_lr: 0.000852 loss: 6.3899 (6.3559) weight_decay: 0.0500 (0.0500) time: 0.6885 data: 0.0003 max mem: 57114 Epoch: [2] [ 30/156] eta: 0:01:58 lr: 0.000877 min_lr: 0.000877 loss: 6.3528 (6.3473) weight_decay: 0.0500 (0.0500) time: 0.6876 data: 0.0003 max mem: 57114 Epoch: [2] [ 40/156] eta: 0:01:42 lr: 0.000903 min_lr: 0.000903 loss: 6.2996 (6.3262) weight_decay: 0.0500 (0.0500) time: 0.6899 data: 0.0003 max mem: 57114 Epoch: [2] [ 50/156] eta: 0:01:29 lr: 0.000929 min_lr: 0.000929 loss: 6.2132 (6.2965) weight_decay: 0.0500 (0.0500) time: 0.6973 data: 0.0004 max mem: 57114 Epoch: [2] [ 60/156] eta: 0:01:18 lr: 0.000954 min_lr: 0.000954 loss: 6.2350 (6.2734) weight_decay: 0.0500 (0.0500) time: 0.6812 data: 0.0004 max mem: 57114 Epoch: [2] [ 70/156] eta: 0:01:08 lr: 0.000980 min_lr: 0.000980 loss: 6.2350 (6.2559) weight_decay: 0.0500 (0.0500) time: 0.6729 data: 0.0004 max mem: 57114 Epoch: [2] [ 80/156] eta: 0:00:59 lr: 0.001005 min_lr: 0.001005 loss: 6.1314 (6.2363) weight_decay: 0.0500 (0.0500) time: 0.6757 data: 0.0004 max mem: 57114 Epoch: [2] [ 90/156] eta: 0:00:50 lr: 0.001031 min_lr: 0.001031 loss: 6.2395 (6.2381) weight_decay: 0.0500 (0.0500) time: 0.6736 data: 0.0004 max mem: 57114 Epoch: [2] [100/156] eta: 0:00:42 lr: 0.001057 min_lr: 0.001057 loss: 6.2119 (6.2300) weight_decay: 0.0500 (0.0500) time: 0.6773 data: 0.0004 max mem: 57114 Epoch: [2] [110/156] eta: 0:00:34 lr: 0.001082 min_lr: 0.001082 loss: 6.0858 (6.2110) weight_decay: 0.0500 (0.0500) time: 0.6830 data: 0.0004 max mem: 57114 Epoch: [2] [120/156] eta: 0:00:26 lr: 0.001108 min_lr: 0.001108 loss: 6.2229 (6.2165) weight_decay: 0.0500 (0.0500) time: 0.6780 data: 0.0004 max mem: 57114 Epoch: [2] [130/156] eta: 0:00:19 lr: 0.001134 min_lr: 0.001134 loss: 6.2307 (6.2152) weight_decay: 0.0500 (0.0500) time: 0.6729 data: 0.0009 max mem: 57114 Epoch: [2] [140/156] eta: 0:00:11 lr: 0.001159 min_lr: 0.001159 loss: 6.0501 (6.1945) weight_decay: 0.0500 (0.0500) time: 0.6713 data: 0.0007 max mem: 57114 Epoch: [2] [150/156] eta: 0:00:04 lr: 0.001185 min_lr: 0.001185 loss: 5.8137 (6.1751) weight_decay: 0.0500 (0.0500) time: 0.6685 data: 0.0001 max mem: 57114 Epoch: [2] [155/156] eta: 0:00:00 lr: 0.001198 min_lr: 0.001198 loss: 5.8137 (6.1651) weight_decay: 0.0500 (0.0500) time: 0.6667 data: 0.0001 max mem: 57114 Epoch: [2] Total time: 0:01:54 (0.7312 s / it) Averaged stats: lr: 0.001198 min_lr: 0.001198 loss: 5.8137 (6.1638) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 4.4752 (4.4752) acc1: 14.5833 (14.5833) acc5: 37.5000 (37.5000) time: 2.1620 data: 1.9065 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 4.5564 (4.4272) acc1: 17.1875 (15.6010) acc5: 35.4167 (33.8875) time: 0.5832 data: 0.3814 max mem: 57114 Test: Total time: 0:00:03 (0.6065 s / it) * Acc@1 13.621 Acc@5 30.882 loss 4.501 Accuracy of the model on the 50000 test images: 13.6% Max accuracy: 13.62% Test: [0/5] eta: 0:00:10 loss: 6.9080 (6.9080) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 2.0615 data: 1.8180 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9080 (6.9080) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.2558) time: 0.5628 data: 0.3637 max mem: 57114 Test: Total time: 0:00:02 (0.5757 s / it) * Acc@1 0.078 Acc@5 0.512 loss 6.908 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [3] [ 0/156] eta: 0:21:25 lr: 0.001200 min_lr: 0.001200 loss: 5.1468 (5.1468) weight_decay: 0.0500 (0.0500) time: 8.2406 data: 4.8250 max mem: 57114 Epoch: [3] [ 10/156] eta: 0:03:23 lr: 0.001226 min_lr: 0.001226 loss: 5.9493 (5.8822) weight_decay: 0.0500 (0.0500) time: 1.3938 data: 0.4389 max mem: 57114 Epoch: [3] [ 20/156] eta: 0:02:22 lr: 0.001252 min_lr: 0.001252 loss: 6.0072 (5.9925) weight_decay: 0.0500 (0.0500) time: 0.6916 data: 0.0003 max mem: 57114 Epoch: [3] [ 30/156] eta: 0:01:58 lr: 0.001277 min_lr: 0.001277 loss: 6.0051 (5.9736) weight_decay: 0.0500 (0.0500) time: 0.6865 data: 0.0004 max mem: 57114 Epoch: [3] [ 40/156] eta: 0:01:41 lr: 0.001303 min_lr: 0.001303 loss: 6.0435 (5.9921) weight_decay: 0.0500 (0.0500) time: 0.6860 data: 0.0004 max mem: 57114 Epoch: [3] [ 50/156] eta: 0:01:28 lr: 0.001329 min_lr: 0.001329 loss: 6.1303 (6.0105) weight_decay: 0.0500 (0.0500) time: 0.6790 data: 0.0003 max mem: 57114 Epoch: [3] [ 60/156] eta: 0:01:17 lr: 0.001354 min_lr: 0.001354 loss: 6.0615 (5.9625) weight_decay: 0.0500 (0.0500) time: 0.6833 data: 0.0004 max mem: 57114 Epoch: [3] [ 70/156] eta: 0:01:08 lr: 0.001380 min_lr: 0.001380 loss: 5.9806 (5.9518) weight_decay: 0.0500 (0.0500) time: 0.6775 data: 0.0004 max mem: 57114 Epoch: [3] [ 80/156] eta: 0:00:59 lr: 0.001406 min_lr: 0.001406 loss: 6.0051 (5.9461) weight_decay: 0.0500 (0.0500) time: 0.6758 data: 0.0004 max mem: 57114 Epoch: [3] [ 90/156] eta: 0:00:50 lr: 0.001431 min_lr: 0.001431 loss: 6.0051 (5.9506) weight_decay: 0.0500 (0.0500) time: 0.6840 data: 0.0004 max mem: 57114 Epoch: [3] [100/156] eta: 0:00:42 lr: 0.001457 min_lr: 0.001457 loss: 6.1309 (5.9557) weight_decay: 0.0500 (0.0500) time: 0.6834 data: 0.0004 max mem: 57114 Epoch: [3] [110/156] eta: 0:00:34 lr: 0.001483 min_lr: 0.001483 loss: 5.9996 (5.9503) weight_decay: 0.0500 (0.0500) time: 0.6792 data: 0.0004 max mem: 57114 Epoch: [3] [120/156] eta: 0:00:26 lr: 0.001508 min_lr: 0.001508 loss: 5.9397 (5.9434) weight_decay: 0.0500 (0.0500) time: 0.6816 data: 0.0004 max mem: 57114 Epoch: [3] [130/156] eta: 0:00:19 lr: 0.001534 min_lr: 0.001534 loss: 5.9180 (5.9328) weight_decay: 0.0500 (0.0500) time: 0.6792 data: 0.0008 max mem: 57114 Epoch: [3] [140/156] eta: 0:00:11 lr: 0.001559 min_lr: 0.001559 loss: 5.7780 (5.9187) weight_decay: 0.0500 (0.0500) time: 0.6721 data: 0.0007 max mem: 57114 Epoch: [3] [150/156] eta: 0:00:04 lr: 0.001585 min_lr: 0.001585 loss: 5.8363 (5.9122) weight_decay: 0.0500 (0.0500) time: 0.6667 data: 0.0001 max mem: 57114 Epoch: [3] [155/156] eta: 0:00:00 lr: 0.001598 min_lr: 0.001598 loss: 5.8973 (5.9189) weight_decay: 0.0500 (0.0500) time: 0.6672 data: 0.0001 max mem: 57114 Epoch: [3] Total time: 0:01:54 (0.7311 s / it) Averaged stats: lr: 0.001598 min_lr: 0.001598 loss: 5.8973 (5.8915) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 3.2672 (3.2672) acc1: 28.6458 (28.6458) acc5: 60.4167 (60.4167) time: 2.2613 data: 1.9999 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 3.7789 (3.4579) acc1: 27.0833 (27.4936) acc5: 48.4375 (49.7442) time: 0.6031 data: 0.4001 max mem: 57114 Test: Total time: 0:00:03 (0.6271 s / it) * Acc@1 25.659 Acc@5 49.043 loss 3.565 Accuracy of the model on the 50000 test images: 25.7% Max accuracy: 25.66% Test: [0/5] eta: 0:00:10 loss: 6.9082 (6.9082) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 2.1406 data: 1.8973 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9082 (6.9081) acc1: 0.0000 (0.1279) acc5: 0.5208 (0.6394) time: 0.5787 data: 0.3796 max mem: 57114 Test: Total time: 0:00:02 (0.5905 s / it) * Acc@1 0.110 Acc@5 0.512 loss 6.908 Accuracy of the model EMA on 50000 test images: 0.1% Max EMA accuracy: 0.11% Epoch: [4] [ 0/156] eta: 0:10:34 lr: 0.001601 min_lr: 0.001601 loss: 5.9406 (5.9406) weight_decay: 0.0500 (0.0500) time: 4.0668 data: 3.4213 max mem: 57114 Epoch: [4] [ 10/156] eta: 0:02:29 lr: 0.001626 min_lr: 0.001626 loss: 5.5890 (5.7499) weight_decay: 0.0500 (0.0500) time: 1.0266 data: 0.3113 max mem: 57114 Epoch: [4] [ 20/156] eta: 0:01:58 lr: 0.001652 min_lr: 0.001652 loss: 5.5055 (5.6140) weight_decay: 0.0500 (0.0500) time: 0.7112 data: 0.0003 max mem: 57114 Epoch: [4] [ 30/156] eta: 0:01:42 lr: 0.001677 min_lr: 0.001677 loss: 5.5502 (5.6366) weight_decay: 0.0500 (0.0500) time: 0.6976 data: 0.0003 max mem: 57114 Epoch: [4] [ 40/156] eta: 0:01:30 lr: 0.001703 min_lr: 0.001703 loss: 5.5502 (5.6043) weight_decay: 0.0500 (0.0500) time: 0.6862 data: 0.0003 max mem: 57114 Epoch: [4] [ 50/156] eta: 0:01:20 lr: 0.001729 min_lr: 0.001729 loss: 5.8068 (5.6708) weight_decay: 0.0500 (0.0500) time: 0.6759 data: 0.0003 max mem: 57114 Epoch: [4] [ 60/156] eta: 0:01:11 lr: 0.001754 min_lr: 0.001754 loss: 5.8324 (5.6714) weight_decay: 0.0500 (0.0500) time: 0.6740 data: 0.0004 max mem: 57114 Epoch: [4] [ 70/156] eta: 0:01:03 lr: 0.001780 min_lr: 0.001780 loss: 5.5681 (5.6467) weight_decay: 0.0500 (0.0500) time: 0.6770 data: 0.0004 max mem: 57114 Epoch: [4] [ 80/156] eta: 0:00:55 lr: 0.001806 min_lr: 0.001806 loss: 5.3869 (5.6387) weight_decay: 0.0500 (0.0500) time: 0.6819 data: 0.0004 max mem: 57114 Epoch: [4] [ 90/156] eta: 0:00:47 lr: 0.001831 min_lr: 0.001831 loss: 5.3869 (5.6104) weight_decay: 0.0500 (0.0500) time: 0.6868 data: 0.0004 max mem: 57114 Epoch: [4] [100/156] eta: 0:00:40 lr: 0.001857 min_lr: 0.001857 loss: 5.4439 (5.6037) weight_decay: 0.0500 (0.0500) time: 0.6836 data: 0.0004 max mem: 57114 Epoch: [4] [110/156] eta: 0:00:32 lr: 0.001883 min_lr: 0.001883 loss: 5.7090 (5.6176) weight_decay: 0.0500 (0.0500) time: 0.6778 data: 0.0004 max mem: 57114 Epoch: [4] [120/156] eta: 0:00:25 lr: 0.001908 min_lr: 0.001908 loss: 5.9107 (5.6281) weight_decay: 0.0500 (0.0500) time: 0.6812 data: 0.0003 max mem: 57114 Epoch: [4] [130/156] eta: 0:00:18 lr: 0.001934 min_lr: 0.001934 loss: 5.7810 (5.6136) weight_decay: 0.0500 (0.0500) time: 0.6779 data: 0.0008 max mem: 57114 Epoch: [4] [140/156] eta: 0:00:11 lr: 0.001960 min_lr: 0.001960 loss: 5.7441 (5.6202) weight_decay: 0.0500 (0.0500) time: 0.6714 data: 0.0007 max mem: 57114 Epoch: [4] [150/156] eta: 0:00:04 lr: 0.001985 min_lr: 0.001985 loss: 5.7324 (5.6209) weight_decay: 0.0500 (0.0500) time: 0.6713 data: 0.0001 max mem: 57114 Epoch: [4] [155/156] eta: 0:00:00 lr: 0.001998 min_lr: 0.001998 loss: 5.7291 (5.6157) weight_decay: 0.0500 (0.0500) time: 0.6699 data: 0.0001 max mem: 57114 Epoch: [4] Total time: 0:01:50 (0.7068 s / it) Averaged stats: lr: 0.001998 min_lr: 0.001998 loss: 5.7291 (5.6442) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 2.8919 (2.8919) acc1: 33.8542 (33.8542) acc5: 63.0208 (63.0208) time: 2.2529 data: 1.9971 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 3.3534 (3.0626) acc1: 29.6875 (30.6905) acc5: 57.2917 (57.0333) time: 0.6013 data: 0.3995 max mem: 57114 Test: Total time: 0:00:03 (0.6224 s / it) * Acc@1 32.021 Acc@5 56.811 loss 3.097 Accuracy of the model on the 50000 test images: 32.0% Max accuracy: 32.02% Test: [0/5] eta: 0:00:10 loss: 6.9083 (6.9083) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.0619 data: 1.8185 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9083 (6.9083) acc1: 0.0000 (0.1279) acc5: 0.5208 (0.3836) time: 0.5629 data: 0.3638 max mem: 57114 Test: Total time: 0:00:03 (0.6032 s / it) * Acc@1 0.110 Acc@5 0.514 loss 6.908 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [5] [ 0/156] eta: 0:20:02 lr: 0.002001 min_lr: 0.002001 loss: 5.9639 (5.9639) weight_decay: 0.0500 (0.0500) time: 7.7084 data: 4.3664 max mem: 57114 Epoch: [5] [ 10/156] eta: 0:03:12 lr: 0.002026 min_lr: 0.002026 loss: 5.6525 (5.5266) weight_decay: 0.0500 (0.0500) time: 1.3191 data: 0.3972 max mem: 57114 Epoch: [5] [ 20/156] eta: 0:02:19 lr: 0.002052 min_lr: 0.002052 loss: 5.6525 (5.5212) weight_decay: 0.0500 (0.0500) time: 0.6947 data: 0.0003 max mem: 57114 Epoch: [5] [ 30/156] eta: 0:01:55 lr: 0.002078 min_lr: 0.002078 loss: 5.1874 (5.3290) weight_decay: 0.0500 (0.0500) time: 0.6980 data: 0.0003 max mem: 57114 Epoch: [5] [ 40/156] eta: 0:01:39 lr: 0.002103 min_lr: 0.002103 loss: 5.1355 (5.4068) weight_decay: 0.0500 (0.0500) time: 0.6781 data: 0.0003 max mem: 57114 Epoch: [5] [ 50/156] eta: 0:01:27 lr: 0.002129 min_lr: 0.002129 loss: 5.6139 (5.3739) weight_decay: 0.0500 (0.0500) time: 0.6833 data: 0.0004 max mem: 57114 Epoch: [5] [ 60/156] eta: 0:01:17 lr: 0.002155 min_lr: 0.002155 loss: 5.2794 (5.3784) weight_decay: 0.0500 (0.0500) time: 0.6953 data: 0.0004 max mem: 57114 Epoch: [5] [ 70/156] eta: 0:01:07 lr: 0.002180 min_lr: 0.002180 loss: 5.5758 (5.3820) weight_decay: 0.0500 (0.0500) time: 0.6951 data: 0.0004 max mem: 57114 Epoch: [5] [ 80/156] eta: 0:00:58 lr: 0.002206 min_lr: 0.002206 loss: 5.6194 (5.3913) weight_decay: 0.0500 (0.0500) time: 0.6880 data: 0.0004 max mem: 57114 Epoch: [5] [ 90/156] eta: 0:00:50 lr: 0.002231 min_lr: 0.002231 loss: 5.5139 (5.3997) weight_decay: 0.0500 (0.0500) time: 0.6875 data: 0.0004 max mem: 57114 Epoch: [5] [100/156] eta: 0:00:42 lr: 0.002257 min_lr: 0.002257 loss: 5.3715 (5.3808) weight_decay: 0.0500 (0.0500) time: 0.6919 data: 0.0004 max mem: 57114 Epoch: [5] [110/156] eta: 0:00:34 lr: 0.002283 min_lr: 0.002283 loss: 5.3134 (5.3762) weight_decay: 0.0500 (0.0500) time: 0.6839 data: 0.0004 max mem: 57114 Epoch: [5] [120/156] eta: 0:00:26 lr: 0.002308 min_lr: 0.002308 loss: 5.4709 (5.3777) weight_decay: 0.0500 (0.0500) time: 0.6787 data: 0.0004 max mem: 57114 Epoch: [5] [130/156] eta: 0:00:19 lr: 0.002334 min_lr: 0.002334 loss: 5.4709 (5.3749) weight_decay: 0.0500 (0.0500) time: 0.6777 data: 0.0009 max mem: 57114 Epoch: [5] [140/156] eta: 0:00:11 lr: 0.002360 min_lr: 0.002360 loss: 5.6401 (5.3770) weight_decay: 0.0500 (0.0500) time: 0.6742 data: 0.0007 max mem: 57114 Epoch: [5] [150/156] eta: 0:00:04 lr: 0.002385 min_lr: 0.002385 loss: 5.6315 (5.3940) weight_decay: 0.0500 (0.0500) time: 0.6757 data: 0.0001 max mem: 57114 Epoch: [5] [155/156] eta: 0:00:00 lr: 0.002398 min_lr: 0.002398 loss: 5.4568 (5.3803) weight_decay: 0.0500 (0.0500) time: 0.6732 data: 0.0001 max mem: 57114 Epoch: [5] Total time: 0:01:54 (0.7322 s / it) Averaged stats: lr: 0.002398 min_lr: 0.002398 loss: 5.4568 (5.4283) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:09 loss: 2.4418 (2.4418) acc1: 44.7917 (44.7917) acc5: 71.8750 (71.8750) time: 1.9912 data: 1.7351 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 2.8786 (2.7478) acc1: 39.0625 (38.3632) acc5: 65.1042 (62.9156) time: 0.5489 data: 0.3471 max mem: 57114 Test: Total time: 0:00:02 (0.5726 s / it) * Acc@1 38.615 Acc@5 63.949 loss 2.790 Accuracy of the model on the 50000 test images: 38.6% Max accuracy: 38.61% Test: [0/5] eta: 0:00:09 loss: 6.9084 (6.9084) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 1.9533 data: 1.7097 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9084 (6.9083) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.5413 data: 0.3420 max mem: 57114 Test: Total time: 0:00:02 (0.5526 s / it) * Acc@1 0.108 Acc@5 0.518 loss 6.909 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [6] [ 0/156] eta: 0:18:58 lr: 0.002401 min_lr: 0.002401 loss: 5.8492 (5.8492) weight_decay: 0.0500 (0.0500) time: 7.2979 data: 4.1359 max mem: 57114 Epoch: [6] [ 10/156] eta: 0:03:12 lr: 0.002426 min_lr: 0.002426 loss: 5.1618 (5.0687) weight_decay: 0.0500 (0.0500) time: 1.3190 data: 0.3762 max mem: 57114 Epoch: [6] [ 20/156] eta: 0:02:18 lr: 0.002452 min_lr: 0.002452 loss: 5.1542 (5.0744) weight_decay: 0.0500 (0.0500) time: 0.7023 data: 0.0003 max mem: 57114 Epoch: [6] [ 30/156] eta: 0:01:54 lr: 0.002478 min_lr: 0.002478 loss: 5.3058 (5.1887) weight_decay: 0.0500 (0.0500) time: 0.6788 data: 0.0003 max mem: 57114 Epoch: [6] [ 40/156] eta: 0:01:39 lr: 0.002503 min_lr: 0.002503 loss: 5.5512 (5.2456) weight_decay: 0.0500 (0.0500) time: 0.6843 data: 0.0004 max mem: 57114 Epoch: [6] [ 50/156] eta: 0:01:26 lr: 0.002529 min_lr: 0.002529 loss: 5.5493 (5.2467) weight_decay: 0.0500 (0.0500) time: 0.6875 data: 0.0004 max mem: 57114 Epoch: [6] [ 60/156] eta: 0:01:17 lr: 0.002555 min_lr: 0.002555 loss: 5.4700 (5.2303) weight_decay: 0.0500 (0.0500) time: 0.7004 data: 0.0004 max mem: 57114 Epoch: [6] [ 70/156] eta: 0:01:07 lr: 0.002580 min_lr: 0.002580 loss: 5.2491 (5.2380) weight_decay: 0.0500 (0.0500) time: 0.7143 data: 0.0004 max mem: 57114 Epoch: [6] [ 80/156] eta: 0:00:58 lr: 0.002606 min_lr: 0.002606 loss: 5.5533 (5.2621) weight_decay: 0.0500 (0.0500) time: 0.6914 data: 0.0004 max mem: 57114 Epoch: [6] [ 90/156] eta: 0:00:50 lr: 0.002632 min_lr: 0.002632 loss: 5.4331 (5.2234) weight_decay: 0.0500 (0.0500) time: 0.6774 data: 0.0003 max mem: 57114 Epoch: [6] [100/156] eta: 0:00:42 lr: 0.002657 min_lr: 0.002657 loss: 5.1729 (5.2242) weight_decay: 0.0500 (0.0500) time: 0.6826 data: 0.0004 max mem: 57114 Epoch: [6] [110/156] eta: 0:00:34 lr: 0.002683 min_lr: 0.002683 loss: 5.4187 (5.2287) weight_decay: 0.0500 (0.0500) time: 0.6791 data: 0.0004 max mem: 57114 Epoch: [6] [120/156] eta: 0:00:26 lr: 0.002709 min_lr: 0.002709 loss: 5.5138 (5.2394) weight_decay: 0.0500 (0.0500) time: 0.6846 data: 0.0004 max mem: 57114 Epoch: [6] [130/156] eta: 0:00:19 lr: 0.002734 min_lr: 0.002734 loss: 5.4750 (5.2390) weight_decay: 0.0500 (0.0500) time: 0.6867 data: 0.0009 max mem: 57114 Epoch: [6] [140/156] eta: 0:00:11 lr: 0.002760 min_lr: 0.002760 loss: 5.1393 (5.2231) weight_decay: 0.0500 (0.0500) time: 0.6761 data: 0.0007 max mem: 57114 Epoch: [6] [150/156] eta: 0:00:04 lr: 0.002786 min_lr: 0.002786 loss: 5.0780 (5.2167) weight_decay: 0.0500 (0.0500) time: 0.6727 data: 0.0001 max mem: 57114 Epoch: [6] [155/156] eta: 0:00:00 lr: 0.002798 min_lr: 0.002798 loss: 5.1271 (5.2078) weight_decay: 0.0500 (0.0500) time: 0.6721 data: 0.0001 max mem: 57114 Epoch: [6] Total time: 0:01:54 (0.7311 s / it) Averaged stats: lr: 0.002798 min_lr: 0.002798 loss: 5.1271 (5.2297) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 2.2497 (2.2497) acc1: 49.4792 (49.4792) acc5: 78.6458 (78.6458) time: 2.0373 data: 1.7817 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 2.5918 (2.4409) acc1: 45.3125 (44.3734) acc5: 72.9167 (70.3325) time: 0.5581 data: 0.3564 max mem: 57114 Test: Total time: 0:00:02 (0.5793 s / it) * Acc@1 44.721 Acc@5 70.482 loss 2.504 Accuracy of the model on the 50000 test images: 44.7% Max accuracy: 44.72% Test: [0/5] eta: 0:00:10 loss: 6.9086 (6.9086) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 2.0737 data: 1.8304 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9086 (6.9088) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.3836) time: 0.5653 data: 0.3662 max mem: 57114 Test: Total time: 0:00:02 (0.5752 s / it) * Acc@1 0.104 Acc@5 0.527 loss 6.909 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [7] [ 0/156] eta: 0:17:21 lr: 0.002801 min_lr: 0.002801 loss: 5.3940 (5.3940) weight_decay: 0.0500 (0.0500) time: 6.6789 data: 4.6488 max mem: 57114 Epoch: [7] [ 10/156] eta: 0:03:00 lr: 0.002827 min_lr: 0.002827 loss: 5.3940 (5.0810) weight_decay: 0.0500 (0.0500) time: 1.2373 data: 0.4228 max mem: 57114 Epoch: [7] [ 20/156] eta: 0:02:12 lr: 0.002852 min_lr: 0.002852 loss: 5.1281 (5.1146) weight_decay: 0.0500 (0.0500) time: 0.6906 data: 0.0003 max mem: 57114 Epoch: [7] [ 30/156] eta: 0:01:52 lr: 0.002878 min_lr: 0.002878 loss: 5.3059 (5.1869) weight_decay: 0.0500 (0.0500) time: 0.6988 data: 0.0003 max mem: 57114 Epoch: [7] [ 40/156] eta: 0:01:37 lr: 0.002903 min_lr: 0.002903 loss: 5.4876 (5.2008) weight_decay: 0.0500 (0.0500) time: 0.7010 data: 0.0003 max mem: 57114 Epoch: [7] [ 50/156] eta: 0:01:26 lr: 0.002929 min_lr: 0.002929 loss: 5.4197 (5.2047) weight_decay: 0.0500 (0.0500) time: 0.6916 data: 0.0004 max mem: 57114 Epoch: [7] [ 60/156] eta: 0:01:16 lr: 0.002955 min_lr: 0.002955 loss: 5.3034 (5.1837) weight_decay: 0.0500 (0.0500) time: 0.6947 data: 0.0004 max mem: 57114 Epoch: [7] [ 70/156] eta: 0:01:07 lr: 0.002980 min_lr: 0.002980 loss: 4.9472 (5.1409) weight_decay: 0.0500 (0.0500) time: 0.6963 data: 0.0004 max mem: 57114 Epoch: [7] [ 80/156] eta: 0:00:58 lr: 0.003006 min_lr: 0.003006 loss: 5.0532 (5.1394) weight_decay: 0.0500 (0.0500) time: 0.6892 data: 0.0004 max mem: 57114 Epoch: [7] [ 90/156] eta: 0:00:50 lr: 0.003032 min_lr: 0.003032 loss: 5.4746 (5.1880) weight_decay: 0.0500 (0.0500) time: 0.6829 data: 0.0004 max mem: 57114 Epoch: [7] [100/156] eta: 0:00:42 lr: 0.003057 min_lr: 0.003057 loss: 5.4798 (5.1866) weight_decay: 0.0500 (0.0500) time: 0.6801 data: 0.0004 max mem: 57114 Epoch: [7] [110/156] eta: 0:00:34 lr: 0.003083 min_lr: 0.003083 loss: 5.2562 (5.1851) weight_decay: 0.0500 (0.0500) time: 0.6872 data: 0.0004 max mem: 57114 Epoch: [7] [120/156] eta: 0:00:26 lr: 0.003109 min_lr: 0.003109 loss: 5.2149 (5.1792) weight_decay: 0.0500 (0.0500) time: 0.6844 data: 0.0003 max mem: 57114 Epoch: [7] [130/156] eta: 0:00:19 lr: 0.003134 min_lr: 0.003134 loss: 5.0034 (5.1518) weight_decay: 0.0500 (0.0500) time: 0.6732 data: 0.0008 max mem: 57114 Epoch: [7] [140/156] eta: 0:00:11 lr: 0.003160 min_lr: 0.003160 loss: 4.9023 (5.1306) weight_decay: 0.0500 (0.0500) time: 0.6687 data: 0.0007 max mem: 57114 Epoch: [7] [150/156] eta: 0:00:04 lr: 0.003186 min_lr: 0.003186 loss: 5.1397 (5.1284) weight_decay: 0.0500 (0.0500) time: 0.6663 data: 0.0001 max mem: 57114 Epoch: [7] [155/156] eta: 0:00:00 lr: 0.003198 min_lr: 0.003198 loss: 5.1397 (5.1186) weight_decay: 0.0500 (0.0500) time: 0.6676 data: 0.0001 max mem: 57114 Epoch: [7] Total time: 0:01:53 (0.7261 s / it) Averaged stats: lr: 0.003198 min_lr: 0.003198 loss: 5.1397 (5.0694) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.9626 (1.9626) acc1: 55.7292 (55.7292) acc5: 80.7292 (80.7292) time: 2.1840 data: 1.9283 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 2.4342 (2.2472) acc1: 48.4375 (48.5934) acc5: 76.0417 (74.0409) time: 0.5875 data: 0.3857 max mem: 57114 Test: Total time: 0:00:03 (0.6103 s / it) * Acc@1 48.298 Acc@5 73.699 loss 2.308 Accuracy of the model on the 50000 test images: 48.3% Max accuracy: 48.30% Test: [0/5] eta: 0:00:10 loss: 6.9091 (6.9091) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 2.0315 data: 1.7880 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9091 (6.9097) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.3836) time: 0.5571 data: 0.3577 max mem: 57114 Test: Total time: 0:00:02 (0.5678 s / it) * Acc@1 0.102 Acc@5 0.523 loss 6.909 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [8] [ 0/156] eta: 0:16:14 lr: 0.003201 min_lr: 0.003201 loss: 3.8847 (3.8847) weight_decay: 0.0500 (0.0500) time: 6.2495 data: 4.3225 max mem: 57114 Epoch: [8] [ 10/156] eta: 0:02:57 lr: 0.003227 min_lr: 0.003227 loss: 5.1398 (4.9664) weight_decay: 0.0500 (0.0500) time: 1.2152 data: 0.3933 max mem: 57114 Epoch: [8] [ 20/156] eta: 0:02:10 lr: 0.003252 min_lr: 0.003252 loss: 5.1865 (5.0731) weight_decay: 0.0500 (0.0500) time: 0.6984 data: 0.0004 max mem: 57114 Epoch: [8] [ 30/156] eta: 0:01:51 lr: 0.003278 min_lr: 0.003278 loss: 5.3653 (5.0536) weight_decay: 0.0500 (0.0500) time: 0.7028 data: 0.0004 max mem: 57114 Epoch: [8] [ 40/156] eta: 0:01:37 lr: 0.003304 min_lr: 0.003304 loss: 5.1962 (5.0740) weight_decay: 0.0500 (0.0500) time: 0.7099 data: 0.0004 max mem: 57114 Epoch: [8] [ 50/156] eta: 0:01:25 lr: 0.003329 min_lr: 0.003329 loss: 5.0863 (5.0388) weight_decay: 0.0500 (0.0500) time: 0.6913 data: 0.0004 max mem: 57114 Epoch: [8] [ 60/156] eta: 0:01:15 lr: 0.003355 min_lr: 0.003355 loss: 4.8664 (5.0143) weight_decay: 0.0500 (0.0500) time: 0.6848 data: 0.0003 max mem: 57114 Epoch: [8] [ 70/156] eta: 0:01:06 lr: 0.003381 min_lr: 0.003381 loss: 4.6681 (4.9577) weight_decay: 0.0500 (0.0500) time: 0.6918 data: 0.0004 max mem: 57114 Epoch: [8] [ 80/156] eta: 0:00:58 lr: 0.003406 min_lr: 0.003406 loss: 4.6681 (4.9490) weight_decay: 0.0500 (0.0500) time: 0.6930 data: 0.0004 max mem: 57114 Epoch: [8] [ 90/156] eta: 0:00:49 lr: 0.003432 min_lr: 0.003432 loss: 5.1346 (4.9512) weight_decay: 0.0500 (0.0500) time: 0.6871 data: 0.0004 max mem: 57114 Epoch: [8] [100/156] eta: 0:00:42 lr: 0.003458 min_lr: 0.003458 loss: 5.1225 (4.9559) weight_decay: 0.0500 (0.0500) time: 0.6928 data: 0.0004 max mem: 57114 Epoch: [8] [110/156] eta: 0:00:34 lr: 0.003483 min_lr: 0.003483 loss: 5.1225 (4.9665) weight_decay: 0.0500 (0.0500) time: 0.6874 data: 0.0004 max mem: 57114 Epoch: [8] [120/156] eta: 0:00:26 lr: 0.003509 min_lr: 0.003509 loss: 5.2188 (4.9796) weight_decay: 0.0500 (0.0500) time: 0.6765 data: 0.0004 max mem: 57114 Epoch: [8] [130/156] eta: 0:00:19 lr: 0.003534 min_lr: 0.003534 loss: 5.2217 (4.9881) weight_decay: 0.0500 (0.0500) time: 0.6754 data: 0.0008 max mem: 57114 Epoch: [8] [140/156] eta: 0:00:11 lr: 0.003560 min_lr: 0.003560 loss: 5.1238 (4.9875) weight_decay: 0.0500 (0.0500) time: 0.6700 data: 0.0007 max mem: 57114 Epoch: [8] [150/156] eta: 0:00:04 lr: 0.003586 min_lr: 0.003586 loss: 4.9722 (4.9793) weight_decay: 0.0500 (0.0500) time: 0.6684 data: 0.0001 max mem: 57114 Epoch: [8] [155/156] eta: 0:00:00 lr: 0.003599 min_lr: 0.003599 loss: 5.1238 (4.9875) weight_decay: 0.0500 (0.0500) time: 0.6718 data: 0.0001 max mem: 57114 Epoch: [8] Total time: 0:01:53 (0.7253 s / it) Averaged stats: lr: 0.003599 min_lr: 0.003599 loss: 5.1238 (4.8906) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 2.1142 (2.1142) acc1: 52.6042 (52.6042) acc5: 80.2083 (80.2083) time: 2.1189 data: 1.8622 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 2.2420 (2.1686) acc1: 52.6042 (48.2097) acc5: 79.1667 (75.0639) time: 0.5744 data: 0.3725 max mem: 57114 Test: Total time: 0:00:02 (0.5966 s / it) * Acc@1 51.586 Acc@5 76.401 loss 2.229 Accuracy of the model on the 50000 test images: 51.6% Max accuracy: 51.59% Test: [0/5] eta: 0:00:09 loss: 6.9093 (6.9093) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 1.9969 data: 1.7534 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9093 (6.9103) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.3836) time: 0.5500 data: 0.3508 max mem: 57114 Test: Total time: 0:00:02 (0.5621 s / it) * Acc@1 0.098 Acc@5 0.518 loss 6.909 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [9] [ 0/156] eta: 0:15:47 lr: 0.003601 min_lr: 0.003601 loss: 5.1432 (5.1432) weight_decay: 0.0500 (0.0500) time: 6.0715 data: 4.0109 max mem: 57114 Epoch: [9] [ 10/156] eta: 0:02:59 lr: 0.003627 min_lr: 0.003627 loss: 5.0773 (4.9432) weight_decay: 0.0500 (0.0500) time: 1.2269 data: 0.3649 max mem: 57114 Epoch: [9] [ 20/156] eta: 0:02:10 lr: 0.003652 min_lr: 0.003652 loss: 4.9699 (4.8684) weight_decay: 0.0500 (0.0500) time: 0.7062 data: 0.0004 max mem: 57114 Epoch: [9] [ 30/156] eta: 0:01:49 lr: 0.003678 min_lr: 0.003678 loss: 4.9699 (4.8163) weight_decay: 0.0500 (0.0500) time: 0.6767 data: 0.0004 max mem: 57114 Epoch: [9] [ 40/156] eta: 0:01:35 lr: 0.003704 min_lr: 0.003704 loss: 4.8599 (4.8146) weight_decay: 0.0500 (0.0500) time: 0.6837 data: 0.0004 max mem: 57114 Epoch: [9] [ 50/156] eta: 0:01:24 lr: 0.003729 min_lr: 0.003729 loss: 4.8600 (4.8159) weight_decay: 0.0500 (0.0500) time: 0.6830 data: 0.0004 max mem: 57114 Epoch: [9] [ 60/156] eta: 0:01:15 lr: 0.003755 min_lr: 0.003755 loss: 5.0165 (4.8562) weight_decay: 0.0500 (0.0500) time: 0.6951 data: 0.0004 max mem: 57114 Epoch: [9] [ 70/156] eta: 0:01:06 lr: 0.003781 min_lr: 0.003781 loss: 5.1001 (4.8128) weight_decay: 0.0500 (0.0500) time: 0.7079 data: 0.0004 max mem: 57114 Epoch: [9] [ 80/156] eta: 0:00:57 lr: 0.003806 min_lr: 0.003806 loss: 4.1551 (4.7379) weight_decay: 0.0500 (0.0500) time: 0.6963 data: 0.0004 max mem: 57114 Epoch: [9] [ 90/156] eta: 0:00:49 lr: 0.003832 min_lr: 0.003832 loss: 4.1551 (4.7057) weight_decay: 0.0500 (0.0500) time: 0.6820 data: 0.0003 max mem: 57114 Epoch: [9] [100/156] eta: 0:00:41 lr: 0.003858 min_lr: 0.003858 loss: 4.7114 (4.7220) weight_decay: 0.0500 (0.0500) time: 0.6798 data: 0.0003 max mem: 57114 Epoch: [9] [110/156] eta: 0:00:34 lr: 0.003883 min_lr: 0.003883 loss: 4.8707 (4.7305) weight_decay: 0.0500 (0.0500) time: 0.6818 data: 0.0004 max mem: 57114 Epoch: [9] [120/156] eta: 0:00:26 lr: 0.003909 min_lr: 0.003909 loss: 4.8271 (4.7453) weight_decay: 0.0500 (0.0500) time: 0.6842 data: 0.0003 max mem: 57114 Epoch: [9] [130/156] eta: 0:00:19 lr: 0.003935 min_lr: 0.003935 loss: 4.8126 (4.7299) weight_decay: 0.0500 (0.0500) time: 0.6868 data: 0.0008 max mem: 57114 Epoch: [9] [140/156] eta: 0:00:11 lr: 0.003960 min_lr: 0.003960 loss: 4.7693 (4.7257) weight_decay: 0.0500 (0.0500) time: 0.6773 data: 0.0007 max mem: 57114 Epoch: [9] [150/156] eta: 0:00:04 lr: 0.003986 min_lr: 0.003986 loss: 4.8290 (4.7242) weight_decay: 0.0500 (0.0500) time: 0.6642 data: 0.0001 max mem: 57114 Epoch: [9] [155/156] eta: 0:00:00 lr: 0.003999 min_lr: 0.003999 loss: 4.7403 (4.7190) weight_decay: 0.0500 (0.0500) time: 0.6647 data: 0.0001 max mem: 57114 Epoch: [9] Total time: 0:01:52 (0.7233 s / it) Averaged stats: lr: 0.003999 min_lr: 0.003999 loss: 4.7403 (4.7397) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.7449 (1.7449) acc1: 62.5000 (62.5000) acc5: 85.9375 (85.9375) time: 2.1259 data: 1.8692 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.9608 (1.9027) acc1: 54.1667 (55.3708) acc5: 82.2917 (79.2839) time: 0.5760 data: 0.3739 max mem: 57114 Test: Total time: 0:00:03 (0.6000 s / it) * Acc@1 55.525 Acc@5 80.125 loss 1.961 Accuracy of the model on the 50000 test images: 55.5% Max accuracy: 55.52% Test: [0/5] eta: 0:00:10 loss: 6.9096 (6.9096) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.0321 data: 1.7886 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9096 (6.9110) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.3836) time: 0.5571 data: 0.3578 max mem: 57114 Test: Total time: 0:00:02 (0.5691 s / it) * Acc@1 0.098 Acc@5 0.510 loss 6.910 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [10] [ 0/156] eta: 0:15:44 lr: 0.004001 min_lr: 0.004001 loss: 3.6188 (3.6188) weight_decay: 0.0500 (0.0500) time: 6.0555 data: 4.6256 max mem: 57114 Epoch: [10] [ 10/156] eta: 0:02:54 lr: 0.004027 min_lr: 0.004027 loss: 4.2387 (4.2581) weight_decay: 0.0500 (0.0500) time: 1.1932 data: 0.4208 max mem: 57114 Epoch: [10] [ 20/156] eta: 0:02:09 lr: 0.004053 min_lr: 0.004053 loss: 4.4351 (4.4081) weight_decay: 0.0500 (0.0500) time: 0.6960 data: 0.0003 max mem: 57114 Epoch: [10] [ 30/156] eta: 0:01:49 lr: 0.004078 min_lr: 0.004078 loss: 4.8167 (4.5418) weight_decay: 0.0500 (0.0500) time: 0.6951 data: 0.0003 max mem: 57114 Epoch: [10] [ 40/156] eta: 0:01:36 lr: 0.004104 min_lr: 0.004104 loss: 4.8801 (4.6249) weight_decay: 0.0500 (0.0500) time: 0.7060 data: 0.0003 max mem: 57114 Epoch: [10] [ 50/156] eta: 0:01:25 lr: 0.004130 min_lr: 0.004130 loss: 4.8801 (4.6611) weight_decay: 0.0500 (0.0500) time: 0.7007 data: 0.0004 max mem: 57114 Epoch: [10] [ 60/156] eta: 0:01:15 lr: 0.004155 min_lr: 0.004155 loss: 4.7794 (4.6616) weight_decay: 0.0500 (0.0500) time: 0.6895 data: 0.0004 max mem: 57114 Epoch: [10] [ 70/156] eta: 0:01:06 lr: 0.004181 min_lr: 0.004181 loss: 4.9151 (4.6728) weight_decay: 0.0500 (0.0500) time: 0.6800 data: 0.0004 max mem: 57114 Epoch: [10] [ 80/156] eta: 0:00:57 lr: 0.004206 min_lr: 0.004206 loss: 4.8788 (4.6724) weight_decay: 0.0500 (0.0500) time: 0.6777 data: 0.0004 max mem: 57114 Epoch: [10] [ 90/156] eta: 0:00:49 lr: 0.004232 min_lr: 0.004232 loss: 4.5888 (4.6042) weight_decay: 0.0500 (0.0500) time: 0.6822 data: 0.0004 max mem: 57114 Epoch: [10] [100/156] eta: 0:00:41 lr: 0.004258 min_lr: 0.004258 loss: 4.0322 (4.5689) weight_decay: 0.0500 (0.0500) time: 0.6853 data: 0.0004 max mem: 57114 Epoch: [10] [110/156] eta: 0:00:33 lr: 0.004283 min_lr: 0.004283 loss: 4.4005 (4.5794) weight_decay: 0.0500 (0.0500) time: 0.6855 data: 0.0004 max mem: 57114 Epoch: [10] [120/156] eta: 0:00:26 lr: 0.004309 min_lr: 0.004309 loss: 4.8883 (4.5790) weight_decay: 0.0500 (0.0500) time: 0.6861 data: 0.0004 max mem: 57114 Epoch: [10] [130/156] eta: 0:00:19 lr: 0.004335 min_lr: 0.004335 loss: 4.6563 (4.5763) weight_decay: 0.0500 (0.0500) time: 0.6887 data: 0.0008 max mem: 57114 Epoch: [10] [140/156] eta: 0:00:11 lr: 0.004360 min_lr: 0.004360 loss: 4.8933 (4.6079) weight_decay: 0.0500 (0.0500) time: 0.6773 data: 0.0007 max mem: 57114 Epoch: [10] [150/156] eta: 0:00:04 lr: 0.004386 min_lr: 0.004386 loss: 4.7839 (4.5990) weight_decay: 0.0500 (0.0500) time: 0.6656 data: 0.0001 max mem: 57114 Epoch: [10] [155/156] eta: 0:00:00 lr: 0.004399 min_lr: 0.004399 loss: 4.6794 (4.5962) weight_decay: 0.0500 (0.0500) time: 0.6655 data: 0.0001 max mem: 57114 Epoch: [10] Total time: 0:01:52 (0.7229 s / it) Averaged stats: lr: 0.004399 min_lr: 0.004399 loss: 4.6794 (4.6198) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.6451 (1.6451) acc1: 63.5417 (63.5417) acc5: 89.0625 (89.0625) time: 2.0794 data: 1.8240 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.8384 (1.7284) acc1: 58.8542 (58.6957) acc5: 82.8125 (82.2251) time: 0.5665 data: 0.3649 max mem: 57114 Test: Total time: 0:00:02 (0.5897 s / it) * Acc@1 58.588 Acc@5 82.223 loss 1.777 Accuracy of the model on the 50000 test images: 58.6% Max accuracy: 58.59% Test: [0/5] eta: 0:00:10 loss: 6.9098 (6.9098) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.0778 data: 1.8343 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9098 (6.9116) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.3836) time: 0.5661 data: 0.3669 max mem: 57114 Test: Total time: 0:00:02 (0.5786 s / it) * Acc@1 0.098 Acc@5 0.518 loss 6.910 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [11] [ 0/156] eta: 0:17:13 lr: 0.004401 min_lr: 0.004401 loss: 4.7723 (4.7723) weight_decay: 0.0500 (0.0500) time: 6.6228 data: 4.3958 max mem: 57114 Epoch: [11] [ 10/156] eta: 0:03:04 lr: 0.004427 min_lr: 0.004427 loss: 4.5982 (4.4395) weight_decay: 0.0500 (0.0500) time: 1.2617 data: 0.3999 max mem: 57114 Epoch: [11] [ 20/156] eta: 0:02:14 lr: 0.004453 min_lr: 0.004453 loss: 4.6713 (4.5439) weight_decay: 0.0500 (0.0500) time: 0.7077 data: 0.0003 max mem: 57114 Epoch: [11] [ 30/156] eta: 0:01:52 lr: 0.004478 min_lr: 0.004478 loss: 4.8827 (4.5346) weight_decay: 0.0500 (0.0500) time: 0.6940 data: 0.0003 max mem: 57114 Epoch: [11] [ 40/156] eta: 0:01:39 lr: 0.004504 min_lr: 0.004504 loss: 4.2066 (4.4452) weight_decay: 0.0500 (0.0500) time: 0.7138 data: 0.0004 max mem: 57114 Epoch: [11] [ 50/156] eta: 0:01:27 lr: 0.004530 min_lr: 0.004530 loss: 4.3688 (4.5010) weight_decay: 0.0500 (0.0500) time: 0.7059 data: 0.0004 max mem: 57114 Epoch: [11] [ 60/156] eta: 0:01:16 lr: 0.004555 min_lr: 0.004555 loss: 4.9140 (4.5416) weight_decay: 0.0500 (0.0500) time: 0.6801 data: 0.0005 max mem: 57114 Epoch: [11] [ 70/156] eta: 0:01:07 lr: 0.004581 min_lr: 0.004581 loss: 4.4657 (4.5074) weight_decay: 0.0500 (0.0500) time: 0.6831 data: 0.0005 max mem: 57114 Epoch: [11] [ 80/156] eta: 0:00:58 lr: 0.004607 min_lr: 0.004607 loss: 4.6939 (4.5458) weight_decay: 0.0500 (0.0500) time: 0.6847 data: 0.0004 max mem: 57114 Epoch: [11] [ 90/156] eta: 0:00:50 lr: 0.004632 min_lr: 0.004632 loss: 4.6545 (4.5347) weight_decay: 0.0500 (0.0500) time: 0.6837 data: 0.0004 max mem: 57114 Epoch: [11] [100/156] eta: 0:00:42 lr: 0.004658 min_lr: 0.004658 loss: 4.3636 (4.5236) weight_decay: 0.0500 (0.0500) time: 0.6949 data: 0.0004 max mem: 57114 Epoch: [11] [110/156] eta: 0:00:34 lr: 0.004684 min_lr: 0.004684 loss: 4.6084 (4.5358) weight_decay: 0.0500 (0.0500) time: 0.6987 data: 0.0004 max mem: 57114 Epoch: [11] [120/156] eta: 0:00:26 lr: 0.004709 min_lr: 0.004709 loss: 4.7154 (4.5508) weight_decay: 0.0500 (0.0500) time: 0.6882 data: 0.0004 max mem: 57114 Epoch: [11] [130/156] eta: 0:00:19 lr: 0.004735 min_lr: 0.004735 loss: 4.6399 (4.5273) weight_decay: 0.0500 (0.0500) time: 0.6786 data: 0.0009 max mem: 57114 Epoch: [11] [140/156] eta: 0:00:11 lr: 0.004761 min_lr: 0.004761 loss: 4.3947 (4.5158) weight_decay: 0.0500 (0.0500) time: 0.6695 data: 0.0007 max mem: 57114 Epoch: [11] [150/156] eta: 0:00:04 lr: 0.004786 min_lr: 0.004786 loss: 4.4052 (4.5043) weight_decay: 0.0500 (0.0500) time: 0.6671 data: 0.0001 max mem: 57114 Epoch: [11] [155/156] eta: 0:00:00 lr: 0.004799 min_lr: 0.004799 loss: 4.4484 (4.4977) weight_decay: 0.0500 (0.0500) time: 0.6693 data: 0.0001 max mem: 57114 Epoch: [11] Total time: 0:01:53 (0.7296 s / it) Averaged stats: lr: 0.004799 min_lr: 0.004799 loss: 4.4484 (4.5192) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.6277 (1.6277) acc1: 63.0208 (63.0208) acc5: 91.1458 (91.1458) time: 2.0886 data: 1.8326 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.7335 (1.7245) acc1: 62.5000 (58.9514) acc5: 84.8958 (83.1202) time: 0.5684 data: 0.3666 max mem: 57114 Test: Total time: 0:00:02 (0.5922 s / it) * Acc@1 59.039 Acc@5 82.757 loss 1.766 Accuracy of the model on the 50000 test images: 59.0% Max accuracy: 59.04% Test: [0/5] eta: 0:00:10 loss: 6.9102 (6.9102) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 2.0590 data: 1.8155 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9102 (6.9122) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.3836) time: 0.5624 data: 0.3632 max mem: 57114 Test: Total time: 0:00:02 (0.5747 s / it) * Acc@1 0.094 Acc@5 0.506 loss 6.911 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [12] [ 0/156] eta: 0:16:07 lr: 0.004802 min_lr: 0.004802 loss: 5.3681 (5.3681) weight_decay: 0.0500 (0.0500) time: 6.2009 data: 4.7998 max mem: 57114 Epoch: [12] [ 10/156] eta: 0:03:02 lr: 0.004827 min_lr: 0.004827 loss: 4.7481 (4.6400) weight_decay: 0.0500 (0.0500) time: 1.2522 data: 0.4366 max mem: 57114 Epoch: [12] [ 20/156] eta: 0:02:15 lr: 0.004853 min_lr: 0.004853 loss: 4.5595 (4.5553) weight_decay: 0.0500 (0.0500) time: 0.7330 data: 0.0003 max mem: 57114 Epoch: [12] [ 30/156] eta: 0:01:53 lr: 0.004878 min_lr: 0.004878 loss: 4.5595 (4.5059) weight_decay: 0.0500 (0.0500) time: 0.7022 data: 0.0003 max mem: 57114 Epoch: [12] [ 40/156] eta: 0:01:37 lr: 0.004904 min_lr: 0.004904 loss: 4.5331 (4.4985) weight_decay: 0.0500 (0.0500) time: 0.6846 data: 0.0003 max mem: 57114 Epoch: [12] [ 50/156] eta: 0:01:26 lr: 0.004930 min_lr: 0.004930 loss: 4.1933 (4.3986) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.0003 max mem: 57114 Epoch: [12] [ 60/156] eta: 0:01:16 lr: 0.004955 min_lr: 0.004955 loss: 4.0547 (4.3727) weight_decay: 0.0500 (0.0500) time: 0.6918 data: 0.0004 max mem: 57114 Epoch: [12] [ 70/156] eta: 0:01:06 lr: 0.004981 min_lr: 0.004981 loss: 4.4223 (4.3865) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.0004 max mem: 57114 Epoch: [12] [ 80/156] eta: 0:00:58 lr: 0.005007 min_lr: 0.005007 loss: 4.3126 (4.3548) weight_decay: 0.0500 (0.0500) time: 0.6933 data: 0.0004 max mem: 57114 Epoch: [12] [ 90/156] eta: 0:00:50 lr: 0.005032 min_lr: 0.005032 loss: 4.2843 (4.3579) weight_decay: 0.0500 (0.0500) time: 0.6984 data: 0.0004 max mem: 57114 Epoch: [12] [100/156] eta: 0:00:42 lr: 0.005058 min_lr: 0.005058 loss: 4.3074 (4.3378) weight_decay: 0.0500 (0.0500) time: 0.6906 data: 0.0004 max mem: 57114 Epoch: [12] [110/156] eta: 0:00:34 lr: 0.005084 min_lr: 0.005084 loss: 4.2232 (4.3271) weight_decay: 0.0500 (0.0500) time: 0.6797 data: 0.0003 max mem: 57114 Epoch: [12] [120/156] eta: 0:00:26 lr: 0.005109 min_lr: 0.005109 loss: 4.2948 (4.3255) weight_decay: 0.0500 (0.0500) time: 0.6778 data: 0.0004 max mem: 57114 Epoch: [12] [130/156] eta: 0:00:19 lr: 0.005135 min_lr: 0.005135 loss: 4.5300 (4.3249) weight_decay: 0.0500 (0.0500) time: 0.6777 data: 0.0008 max mem: 57114 Epoch: [12] [140/156] eta: 0:00:11 lr: 0.005161 min_lr: 0.005161 loss: 4.6315 (4.3539) weight_decay: 0.0500 (0.0500) time: 0.6767 data: 0.0007 max mem: 57114 Epoch: [12] [150/156] eta: 0:00:04 lr: 0.005186 min_lr: 0.005186 loss: 4.5261 (4.3539) weight_decay: 0.0500 (0.0500) time: 0.6757 data: 0.0001 max mem: 57114 Epoch: [12] [155/156] eta: 0:00:00 lr: 0.005199 min_lr: 0.005199 loss: 4.4675 (4.3541) weight_decay: 0.0500 (0.0500) time: 0.6780 data: 0.0001 max mem: 57114 Epoch: [12] Total time: 0:01:53 (0.7285 s / it) Averaged stats: lr: 0.005199 min_lr: 0.005199 loss: 4.4675 (4.4300) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.5187 (1.5187) acc1: 63.5417 (63.5417) acc5: 91.6667 (91.6667) time: 2.0309 data: 1.7753 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.6583 (1.6781) acc1: 63.0208 (59.7187) acc5: 89.5833 (84.6547) time: 0.5568 data: 0.3551 max mem: 57114 Test: Total time: 0:00:02 (0.5790 s / it) * Acc@1 59.996 Acc@5 83.600 loss 1.734 Accuracy of the model on the 50000 test images: 60.0% Max accuracy: 60.00% Test: [0/5] eta: 0:00:10 loss: 6.9107 (6.9107) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 2.0618 data: 1.8183 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9107 (6.9128) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.3836) time: 0.5630 data: 0.3637 max mem: 57114 Test: Total time: 0:00:02 (0.5751 s / it) * Acc@1 0.092 Acc@5 0.510 loss 6.911 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [13] [ 0/156] eta: 0:16:35 lr: 0.005202 min_lr: 0.005202 loss: 4.5402 (4.5402) weight_decay: 0.0500 (0.0500) time: 6.3793 data: 3.8439 max mem: 57114 Epoch: [13] [ 10/156] eta: 0:03:02 lr: 0.005227 min_lr: 0.005227 loss: 4.6508 (4.5840) weight_decay: 0.0500 (0.0500) time: 1.2469 data: 0.3498 max mem: 57114 Epoch: [13] [ 20/156] eta: 0:02:16 lr: 0.005253 min_lr: 0.005253 loss: 4.5138 (4.4470) weight_decay: 0.0500 (0.0500) time: 0.7321 data: 0.0003 max mem: 57114 Epoch: [13] [ 30/156] eta: 0:01:53 lr: 0.005279 min_lr: 0.005279 loss: 4.3785 (4.3992) weight_decay: 0.0500 (0.0500) time: 0.7061 data: 0.0003 max mem: 57114 Epoch: [13] [ 40/156] eta: 0:01:37 lr: 0.005304 min_lr: 0.005304 loss: 4.2946 (4.3747) weight_decay: 0.0500 (0.0500) time: 0.6801 data: 0.0004 max mem: 57114 Epoch: [13] [ 50/156] eta: 0:01:27 lr: 0.005330 min_lr: 0.005330 loss: 4.4639 (4.3911) weight_decay: 0.0500 (0.0500) time: 0.7028 data: 0.0004 max mem: 57114 Epoch: [13] [ 60/156] eta: 0:01:17 lr: 0.005356 min_lr: 0.005356 loss: 4.5273 (4.4103) weight_decay: 0.0500 (0.0500) time: 0.7310 data: 0.0004 max mem: 57114 Epoch: [13] [ 70/156] eta: 0:01:08 lr: 0.005381 min_lr: 0.005381 loss: 4.6722 (4.3915) weight_decay: 0.0500 (0.0500) time: 0.7160 data: 0.0004 max mem: 57114 Epoch: [13] [ 80/156] eta: 0:00:59 lr: 0.005407 min_lr: 0.005407 loss: 4.4926 (4.3853) weight_decay: 0.0500 (0.0500) time: 0.6993 data: 0.0003 max mem: 57114 Epoch: [13] [ 90/156] eta: 0:00:50 lr: 0.005433 min_lr: 0.005433 loss: 4.4926 (4.3788) weight_decay: 0.0500 (0.0500) time: 0.6968 data: 0.0004 max mem: 57114 Epoch: [13] [100/156] eta: 0:00:42 lr: 0.005458 min_lr: 0.005458 loss: 4.3244 (4.3757) weight_decay: 0.0500 (0.0500) time: 0.6842 data: 0.0004 max mem: 57114 Epoch: [13] [110/156] eta: 0:00:34 lr: 0.005484 min_lr: 0.005484 loss: 4.6445 (4.4035) weight_decay: 0.0500 (0.0500) time: 0.6773 data: 0.0004 max mem: 57114 Epoch: [13] [120/156] eta: 0:00:26 lr: 0.005509 min_lr: 0.005509 loss: 4.7577 (4.4009) weight_decay: 0.0500 (0.0500) time: 0.6891 data: 0.0004 max mem: 57114 Epoch: [13] [130/156] eta: 0:00:19 lr: 0.005535 min_lr: 0.005535 loss: 4.6034 (4.4121) weight_decay: 0.0500 (0.0500) time: 0.6940 data: 0.0008 max mem: 57114 Epoch: [13] [140/156] eta: 0:00:11 lr: 0.005561 min_lr: 0.005561 loss: 4.5908 (4.4131) weight_decay: 0.0500 (0.0500) time: 0.6881 data: 0.0007 max mem: 57114 Epoch: [13] [150/156] eta: 0:00:04 lr: 0.005586 min_lr: 0.005586 loss: 4.5576 (4.4045) weight_decay: 0.0500 (0.0500) time: 0.6846 data: 0.0001 max mem: 57114 Epoch: [13] [155/156] eta: 0:00:00 lr: 0.005599 min_lr: 0.005599 loss: 4.4230 (4.3926) weight_decay: 0.0500 (0.0500) time: 0.6893 data: 0.0001 max mem: 57114 Epoch: [13] Total time: 0:01:55 (0.7376 s / it) Averaged stats: lr: 0.005599 min_lr: 0.005599 loss: 4.4230 (4.3569) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.5302 (1.5302) acc1: 67.7083 (67.7083) acc5: 91.6667 (91.6667) time: 2.1176 data: 1.8621 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.7642 (1.6972) acc1: 64.5833 (60.9974) acc5: 88.0208 (85.6778) time: 0.5742 data: 0.3725 max mem: 57114 Test: Total time: 0:00:02 (0.5939 s / it) * Acc@1 61.141 Acc@5 83.905 loss 1.781 Accuracy of the model on the 50000 test images: 61.1% Max accuracy: 61.14% Test: [0/5] eta: 0:00:10 loss: 6.9113 (6.9113) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 2.0059 data: 1.7624 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9113 (6.9134) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.3836) time: 0.5518 data: 0.3526 max mem: 57114 Test: Total time: 0:00:02 (0.5648 s / it) * Acc@1 0.092 Acc@5 0.516 loss 6.912 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [14] [ 0/156] eta: 0:13:57 lr: 0.005602 min_lr: 0.005602 loss: 4.7302 (4.7302) weight_decay: 0.0500 (0.0500) time: 5.3714 data: 4.1448 max mem: 57114 Epoch: [14] [ 10/156] eta: 0:02:45 lr: 0.005627 min_lr: 0.005627 loss: 4.7302 (4.5816) weight_decay: 0.0500 (0.0500) time: 1.1327 data: 0.3771 max mem: 57114 Epoch: [14] [ 20/156] eta: 0:02:04 lr: 0.005653 min_lr: 0.005653 loss: 4.3651 (4.2984) weight_decay: 0.0500 (0.0500) time: 0.6939 data: 0.0003 max mem: 57114 Epoch: [14] [ 30/156] eta: 0:01:47 lr: 0.005679 min_lr: 0.005679 loss: 4.0908 (4.3066) weight_decay: 0.0500 (0.0500) time: 0.7011 data: 0.0004 max mem: 57114 Epoch: [14] [ 40/156] eta: 0:01:35 lr: 0.005704 min_lr: 0.005704 loss: 4.0908 (4.2181) weight_decay: 0.0500 (0.0500) time: 0.7226 data: 0.0004 max mem: 57114 Epoch: [14] [ 50/156] eta: 0:01:24 lr: 0.005730 min_lr: 0.005730 loss: 4.1430 (4.2114) weight_decay: 0.0500 (0.0500) time: 0.7206 data: 0.0005 max mem: 57114 Epoch: [14] [ 60/156] eta: 0:01:16 lr: 0.005756 min_lr: 0.005756 loss: 4.1348 (4.2134) weight_decay: 0.0500 (0.0500) time: 0.7394 data: 0.0005 max mem: 57114 Epoch: [14] [ 70/156] eta: 0:01:07 lr: 0.005781 min_lr: 0.005781 loss: 4.1295 (4.1964) weight_decay: 0.0500 (0.0500) time: 0.7250 data: 0.0004 max mem: 57114 Epoch: [14] [ 80/156] eta: 0:00:58 lr: 0.005807 min_lr: 0.005807 loss: 4.3387 (4.2263) weight_decay: 0.0500 (0.0500) time: 0.6906 data: 0.0004 max mem: 57114 Epoch: [14] [ 90/156] eta: 0:00:50 lr: 0.005833 min_lr: 0.005833 loss: 4.3387 (4.2498) weight_decay: 0.0500 (0.0500) time: 0.6952 data: 0.0004 max mem: 57114 Epoch: [14] [100/156] eta: 0:00:42 lr: 0.005858 min_lr: 0.005858 loss: 4.2877 (4.2393) weight_decay: 0.0500 (0.0500) time: 0.6905 data: 0.0004 max mem: 57114 Epoch: [14] [110/156] eta: 0:00:34 lr: 0.005884 min_lr: 0.005884 loss: 4.2700 (4.2423) weight_decay: 0.0500 (0.0500) time: 0.6895 data: 0.0004 max mem: 57114 Epoch: [14] [120/156] eta: 0:00:26 lr: 0.005910 min_lr: 0.005910 loss: 4.2505 (4.2355) weight_decay: 0.0500 (0.0500) time: 0.7036 data: 0.0004 max mem: 57114 Epoch: [14] [130/156] eta: 0:00:19 lr: 0.005935 min_lr: 0.005935 loss: 4.2505 (4.2404) weight_decay: 0.0500 (0.0500) time: 0.6977 data: 0.0005 max mem: 57114 Epoch: [14] [140/156] eta: 0:00:11 lr: 0.005961 min_lr: 0.005961 loss: 4.4454 (4.2562) weight_decay: 0.0500 (0.0500) time: 0.6801 data: 0.0003 max mem: 57114 Epoch: [14] [150/156] eta: 0:00:04 lr: 0.005987 min_lr: 0.005987 loss: 4.5180 (4.2661) weight_decay: 0.0500 (0.0500) time: 0.6773 data: 0.0001 max mem: 57114 Epoch: [14] [155/156] eta: 0:00:00 lr: 0.005999 min_lr: 0.005999 loss: 4.5180 (4.2597) weight_decay: 0.0500 (0.0500) time: 0.6766 data: 0.0001 max mem: 57114 Epoch: [14] Total time: 0:01:54 (0.7328 s / it) Averaged stats: lr: 0.005999 min_lr: 0.005999 loss: 4.5180 (4.2944) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.5211 (1.5211) acc1: 70.3125 (70.3125) acc5: 89.5833 (89.5833) time: 2.1059 data: 1.8504 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.7095 (1.6033) acc1: 65.6250 (64.5780) acc5: 89.5833 (85.2941) time: 0.5718 data: 0.3701 max mem: 57114 Test: Total time: 0:00:02 (0.5955 s / it) * Acc@1 61.515 Acc@5 83.989 loss 1.768 Accuracy of the model on the 50000 test images: 61.5% Max accuracy: 61.51% Test: [0/5] eta: 0:00:09 loss: 6.9120 (6.9120) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 1.9540 data: 1.7104 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9120 (6.9139) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.5415 data: 0.3422 max mem: 57114 Test: Total time: 0:00:02 (0.5543 s / it) * Acc@1 0.094 Acc@5 0.508 loss 6.912 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [15] [ 0/156] eta: 0:14:44 lr: 0.006002 min_lr: 0.006002 loss: 4.3763 (4.3763) weight_decay: 0.0500 (0.0500) time: 5.6721 data: 3.7482 max mem: 57114 Epoch: [15] [ 10/156] eta: 0:02:49 lr: 0.006028 min_lr: 0.006028 loss: 4.2275 (4.2601) weight_decay: 0.0500 (0.0500) time: 1.1619 data: 0.3410 max mem: 57114 Epoch: [15] [ 20/156] eta: 0:02:08 lr: 0.006053 min_lr: 0.006053 loss: 4.1850 (4.1846) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0003 max mem: 57114 Epoch: [15] [ 30/156] eta: 0:01:50 lr: 0.006079 min_lr: 0.006079 loss: 4.2131 (4.1899) weight_decay: 0.0500 (0.0500) time: 0.7201 data: 0.0003 max mem: 57114 Epoch: [15] [ 40/156] eta: 0:01:37 lr: 0.006105 min_lr: 0.006105 loss: 4.3292 (4.2263) weight_decay: 0.0500 (0.0500) time: 0.7266 data: 0.0004 max mem: 57114 Epoch: [15] [ 50/156] eta: 0:01:26 lr: 0.006130 min_lr: 0.006130 loss: 4.3614 (4.1919) weight_decay: 0.0500 (0.0500) time: 0.7205 data: 0.0004 max mem: 57114 Epoch: [15] [ 60/156] eta: 0:01:16 lr: 0.006156 min_lr: 0.006156 loss: 3.8396 (4.1721) weight_decay: 0.0500 (0.0500) time: 0.7126 data: 0.0003 max mem: 57114 Epoch: [15] [ 70/156] eta: 0:01:07 lr: 0.006181 min_lr: 0.006181 loss: 4.5889 (4.2220) weight_decay: 0.0500 (0.0500) time: 0.7095 data: 0.0003 max mem: 57114 Epoch: [15] [ 80/156] eta: 0:00:58 lr: 0.006207 min_lr: 0.006207 loss: 4.4255 (4.1946) weight_decay: 0.0500 (0.0500) time: 0.7078 data: 0.0003 max mem: 57114 Epoch: [15] [ 90/156] eta: 0:00:50 lr: 0.006233 min_lr: 0.006233 loss: 4.2510 (4.2136) weight_decay: 0.0500 (0.0500) time: 0.6929 data: 0.0004 max mem: 57114 Epoch: [15] [100/156] eta: 0:00:42 lr: 0.006258 min_lr: 0.006258 loss: 4.4419 (4.2302) weight_decay: 0.0500 (0.0500) time: 0.7018 data: 0.0003 max mem: 57114 Epoch: [15] [110/156] eta: 0:00:34 lr: 0.006284 min_lr: 0.006284 loss: 4.4036 (4.2478) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [15] [120/156] eta: 0:00:27 lr: 0.006310 min_lr: 0.006310 loss: 4.4928 (4.2541) weight_decay: 0.0500 (0.0500) time: 0.7046 data: 0.0004 max mem: 57114 Epoch: [15] [130/156] eta: 0:00:19 lr: 0.006335 min_lr: 0.006335 loss: 4.4928 (4.2451) weight_decay: 0.0500 (0.0500) time: 0.6992 data: 0.0005 max mem: 57114 Epoch: [15] [140/156] eta: 0:00:11 lr: 0.006361 min_lr: 0.006361 loss: 3.9288 (4.2330) weight_decay: 0.0500 (0.0500) time: 0.6882 data: 0.0003 max mem: 57114 Epoch: [15] [150/156] eta: 0:00:04 lr: 0.006387 min_lr: 0.006387 loss: 4.3350 (4.2466) weight_decay: 0.0500 (0.0500) time: 0.6780 data: 0.0001 max mem: 57114 Epoch: [15] [155/156] eta: 0:00:00 lr: 0.006399 min_lr: 0.006399 loss: 4.1822 (4.2338) weight_decay: 0.0500 (0.0500) time: 0.6764 data: 0.0001 max mem: 57114 Epoch: [15] Total time: 0:01:55 (0.7376 s / it) Averaged stats: lr: 0.006399 min_lr: 0.006399 loss: 4.1822 (4.2456) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.4190 (1.4190) acc1: 73.4375 (73.4375) acc5: 92.1875 (92.1875) time: 2.0678 data: 1.8123 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.7325 (1.5931) acc1: 61.9792 (63.6829) acc5: 86.9792 (85.8056) time: 0.5644 data: 0.3625 max mem: 57114 Test: Total time: 0:00:02 (0.5872 s / it) * Acc@1 62.182 Acc@5 84.877 loss 1.682 Accuracy of the model on the 50000 test images: 62.2% Max accuracy: 62.18% Test: [0/5] eta: 0:00:10 loss: 6.9124 (6.9124) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 2.1030 data: 1.8598 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9124 (6.9145) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.5712 data: 0.3720 max mem: 57114 Test: Total time: 0:00:02 (0.5825 s / it) * Acc@1 0.094 Acc@5 0.500 loss 6.913 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [16] [ 0/156] eta: 0:14:10 lr: 0.006402 min_lr: 0.006402 loss: 3.5861 (3.5861) weight_decay: 0.0500 (0.0500) time: 5.4492 data: 4.0975 max mem: 57114 Epoch: [16] [ 10/156] eta: 0:02:50 lr: 0.006428 min_lr: 0.006428 loss: 3.7842 (4.0122) weight_decay: 0.0500 (0.0500) time: 1.1709 data: 0.3728 max mem: 57114 Epoch: [16] [ 20/156] eta: 0:02:10 lr: 0.006453 min_lr: 0.006453 loss: 4.2437 (4.2029) weight_decay: 0.0500 (0.0500) time: 0.7357 data: 0.0004 max mem: 57114 Epoch: [16] [ 30/156] eta: 0:01:51 lr: 0.006479 min_lr: 0.006479 loss: 4.4830 (4.2684) weight_decay: 0.0500 (0.0500) time: 0.7325 data: 0.0003 max mem: 57114 Epoch: [16] [ 40/156] eta: 0:01:38 lr: 0.006505 min_lr: 0.006505 loss: 4.4500 (4.2827) weight_decay: 0.0500 (0.0500) time: 0.7308 data: 0.0004 max mem: 57114 Epoch: [16] [ 50/156] eta: 0:01:26 lr: 0.006530 min_lr: 0.006530 loss: 4.2034 (4.2377) weight_decay: 0.0500 (0.0500) time: 0.7055 data: 0.0004 max mem: 57114 Epoch: [16] [ 60/156] eta: 0:01:17 lr: 0.006556 min_lr: 0.006556 loss: 4.4842 (4.2893) weight_decay: 0.0500 (0.0500) time: 0.7076 data: 0.0004 max mem: 57114 Epoch: [16] [ 70/156] eta: 0:01:07 lr: 0.006582 min_lr: 0.006582 loss: 4.6258 (4.3134) weight_decay: 0.0500 (0.0500) time: 0.7244 data: 0.0004 max mem: 57114 Epoch: [16] [ 80/156] eta: 0:00:59 lr: 0.006607 min_lr: 0.006607 loss: 4.4099 (4.3037) weight_decay: 0.0500 (0.0500) time: 0.7164 data: 0.0004 max mem: 57114 Epoch: [16] [ 90/156] eta: 0:00:50 lr: 0.006633 min_lr: 0.006633 loss: 4.2877 (4.2977) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [16] [100/156] eta: 0:00:42 lr: 0.006659 min_lr: 0.006659 loss: 4.0884 (4.2702) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0004 max mem: 57114 Epoch: [16] [110/156] eta: 0:00:35 lr: 0.006684 min_lr: 0.006684 loss: 4.1894 (4.2780) weight_decay: 0.0500 (0.0500) time: 0.7075 data: 0.0004 max mem: 57114 Epoch: [16] [120/156] eta: 0:00:27 lr: 0.006710 min_lr: 0.006710 loss: 4.3576 (4.2654) weight_decay: 0.0500 (0.0500) time: 0.6971 data: 0.0004 max mem: 57114 Epoch: [16] [130/156] eta: 0:00:19 lr: 0.006735 min_lr: 0.006735 loss: 4.1939 (4.2604) weight_decay: 0.0500 (0.0500) time: 0.6947 data: 0.0009 max mem: 57114 Epoch: [16] [140/156] eta: 0:00:11 lr: 0.006761 min_lr: 0.006761 loss: 4.1939 (4.2625) weight_decay: 0.0500 (0.0500) time: 0.6858 data: 0.0007 max mem: 57114 Epoch: [16] [150/156] eta: 0:00:04 lr: 0.006787 min_lr: 0.006787 loss: 4.3985 (4.2804) weight_decay: 0.0500 (0.0500) time: 0.6803 data: 0.0001 max mem: 57114 Epoch: [16] [155/156] eta: 0:00:00 lr: 0.006800 min_lr: 0.006800 loss: 4.4220 (4.2621) weight_decay: 0.0500 (0.0500) time: 0.6811 data: 0.0001 max mem: 57114 Epoch: [16] Total time: 0:01:55 (0.7415 s / it) Averaged stats: lr: 0.006800 min_lr: 0.006800 loss: 4.4220 (4.1987) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.3990 (1.3990) acc1: 71.3542 (71.3542) acc5: 92.7083 (92.7083) time: 2.0732 data: 1.8177 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.7268 (1.6207) acc1: 64.0625 (61.8926) acc5: 88.0208 (85.5499) time: 0.5653 data: 0.3636 max mem: 57114 Test: Total time: 0:00:02 (0.5886 s / it) * Acc@1 62.158 Acc@5 84.984 loss 1.724 Accuracy of the model on the 50000 test images: 62.2% Max accuracy: 62.18% Test: [0/5] eta: 0:00:11 loss: 6.9127 (6.9127) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 2.3583 data: 2.1150 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9127 (6.9151) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.6223 data: 0.4231 max mem: 57114 Test: Total time: 0:00:03 (0.6324 s / it) * Acc@1 0.094 Acc@5 0.496 loss 6.913 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [17] [ 0/156] eta: 0:14:45 lr: 0.006802 min_lr: 0.006802 loss: 3.2083 (3.2083) weight_decay: 0.0500 (0.0500) time: 5.6747 data: 4.4156 max mem: 57114 Epoch: [17] [ 10/156] eta: 0:02:55 lr: 0.006828 min_lr: 0.006828 loss: 4.3897 (4.2701) weight_decay: 0.0500 (0.0500) time: 1.2043 data: 0.4017 max mem: 57114 Epoch: [17] [ 20/156] eta: 0:02:12 lr: 0.006853 min_lr: 0.006853 loss: 4.3897 (4.2103) weight_decay: 0.0500 (0.0500) time: 0.7371 data: 0.0003 max mem: 57114 Epoch: [17] [ 30/156] eta: 0:01:52 lr: 0.006879 min_lr: 0.006879 loss: 4.3925 (4.2199) weight_decay: 0.0500 (0.0500) time: 0.7168 data: 0.0003 max mem: 57114 Epoch: [17] [ 40/156] eta: 0:01:38 lr: 0.006905 min_lr: 0.006905 loss: 4.3702 (4.2318) weight_decay: 0.0500 (0.0500) time: 0.7194 data: 0.0003 max mem: 57114 Epoch: [17] [ 50/156] eta: 0:01:27 lr: 0.006930 min_lr: 0.006930 loss: 4.4279 (4.2763) weight_decay: 0.0500 (0.0500) time: 0.7142 data: 0.0004 max mem: 57114 Epoch: [17] [ 60/156] eta: 0:01:17 lr: 0.006956 min_lr: 0.006956 loss: 4.4699 (4.2928) weight_decay: 0.0500 (0.0500) time: 0.7137 data: 0.0004 max mem: 57114 Epoch: [17] [ 70/156] eta: 0:01:08 lr: 0.006982 min_lr: 0.006982 loss: 4.2058 (4.2642) weight_decay: 0.0500 (0.0500) time: 0.7231 data: 0.0004 max mem: 57114 Epoch: [17] [ 80/156] eta: 0:00:59 lr: 0.007007 min_lr: 0.007007 loss: 4.3176 (4.2954) weight_decay: 0.0500 (0.0500) time: 0.7216 data: 0.0004 max mem: 57114 Epoch: [17] [ 90/156] eta: 0:00:51 lr: 0.007033 min_lr: 0.007033 loss: 4.2535 (4.2346) weight_decay: 0.0500 (0.0500) time: 0.7104 data: 0.0004 max mem: 57114 Epoch: [17] [100/156] eta: 0:00:42 lr: 0.007059 min_lr: 0.007059 loss: 3.8849 (4.2152) weight_decay: 0.0500 (0.0500) time: 0.6968 data: 0.0004 max mem: 57114 Epoch: [17] [110/156] eta: 0:00:34 lr: 0.007084 min_lr: 0.007084 loss: 4.2134 (4.2026) weight_decay: 0.0500 (0.0500) time: 0.6952 data: 0.0003 max mem: 57114 Epoch: [17] [120/156] eta: 0:00:27 lr: 0.007110 min_lr: 0.007110 loss: 4.4383 (4.2190) weight_decay: 0.0500 (0.0500) time: 0.6988 data: 0.0003 max mem: 57114 Epoch: [17] [130/156] eta: 0:00:19 lr: 0.007136 min_lr: 0.007136 loss: 4.2249 (4.2090) weight_decay: 0.0500 (0.0500) time: 0.6976 data: 0.0009 max mem: 57114 Epoch: [17] [140/156] eta: 0:00:11 lr: 0.007161 min_lr: 0.007161 loss: 3.9044 (4.1813) weight_decay: 0.0500 (0.0500) time: 0.6920 data: 0.0007 max mem: 57114 Epoch: [17] [150/156] eta: 0:00:04 lr: 0.007187 min_lr: 0.007187 loss: 3.7496 (4.1717) weight_decay: 0.0500 (0.0500) time: 0.6831 data: 0.0002 max mem: 57114 Epoch: [17] [155/156] eta: 0:00:00 lr: 0.007200 min_lr: 0.007200 loss: 4.1252 (4.1722) weight_decay: 0.0500 (0.0500) time: 0.6836 data: 0.0002 max mem: 57114 Epoch: [17] Total time: 0:01:55 (0.7427 s / it) Averaged stats: lr: 0.007200 min_lr: 0.007200 loss: 4.1252 (4.1599) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.6515 (1.6515) acc1: 70.3125 (70.3125) acc5: 92.1875 (92.1875) time: 2.1043 data: 1.8484 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.8337 (1.8879) acc1: 65.6250 (61.5090) acc5: 88.0208 (84.7826) time: 0.5715 data: 0.3698 max mem: 57114 Test: Total time: 0:00:02 (0.5942 s / it) * Acc@1 60.578 Acc@5 83.506 loss 2.018 Accuracy of the model on the 50000 test images: 60.6% Max accuracy: 62.18% Test: [0/5] eta: 0:00:11 loss: 6.9131 (6.9131) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 2.2941 data: 2.0506 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9131 (6.9154) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.6096 data: 0.4103 max mem: 57114 Test: Total time: 0:00:03 (0.6237 s / it) * Acc@1 0.090 Acc@5 0.488 loss 6.914 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [18] [ 0/156] eta: 0:14:08 lr: 0.007202 min_lr: 0.007202 loss: 4.2097 (4.2097) weight_decay: 0.0500 (0.0500) time: 5.4412 data: 4.3787 max mem: 57114 Epoch: [18] [ 10/156] eta: 0:02:48 lr: 0.007228 min_lr: 0.007228 loss: 4.2543 (4.2210) weight_decay: 0.0500 (0.0500) time: 1.1520 data: 0.3984 max mem: 57114 Epoch: [18] [ 20/156] eta: 0:02:07 lr: 0.007254 min_lr: 0.007254 loss: 4.2294 (4.1376) weight_decay: 0.0500 (0.0500) time: 0.7113 data: 0.0003 max mem: 57114 Epoch: [18] [ 30/156] eta: 0:01:49 lr: 0.007279 min_lr: 0.007279 loss: 4.0090 (4.0851) weight_decay: 0.0500 (0.0500) time: 0.7163 data: 0.0003 max mem: 57114 Epoch: [18] [ 40/156] eta: 0:01:37 lr: 0.007305 min_lr: 0.007305 loss: 3.9226 (4.0563) weight_decay: 0.0500 (0.0500) time: 0.7344 data: 0.0004 max mem: 57114 Epoch: [18] [ 50/156] eta: 0:01:26 lr: 0.007331 min_lr: 0.007331 loss: 3.7113 (3.9872) weight_decay: 0.0500 (0.0500) time: 0.7194 data: 0.0004 max mem: 57114 Epoch: [18] [ 60/156] eta: 0:01:16 lr: 0.007356 min_lr: 0.007356 loss: 3.6777 (3.9632) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0004 max mem: 57114 Epoch: [18] [ 70/156] eta: 0:01:07 lr: 0.007382 min_lr: 0.007382 loss: 3.7919 (3.9487) weight_decay: 0.0500 (0.0500) time: 0.7219 data: 0.0004 max mem: 57114 Epoch: [18] [ 80/156] eta: 0:00:58 lr: 0.007408 min_lr: 0.007408 loss: 3.9074 (3.9410) weight_decay: 0.0500 (0.0500) time: 0.7115 data: 0.0004 max mem: 57114 Epoch: [18] [ 90/156] eta: 0:00:50 lr: 0.007433 min_lr: 0.007433 loss: 4.3708 (3.9900) weight_decay: 0.0500 (0.0500) time: 0.7021 data: 0.0004 max mem: 57114 Epoch: [18] [100/156] eta: 0:00:42 lr: 0.007459 min_lr: 0.007459 loss: 4.2398 (3.9775) weight_decay: 0.0500 (0.0500) time: 0.7040 data: 0.0004 max mem: 57114 Epoch: [18] [110/156] eta: 0:00:34 lr: 0.007484 min_lr: 0.007484 loss: 4.0923 (3.9994) weight_decay: 0.0500 (0.0500) time: 0.7030 data: 0.0004 max mem: 57114 Epoch: [18] [120/156] eta: 0:00:27 lr: 0.007510 min_lr: 0.007510 loss: 4.3381 (4.0135) weight_decay: 0.0500 (0.0500) time: 0.6999 data: 0.0004 max mem: 57114 Epoch: [18] [130/156] eta: 0:00:19 lr: 0.007536 min_lr: 0.007536 loss: 4.4067 (4.0474) weight_decay: 0.0500 (0.0500) time: 0.6930 data: 0.0005 max mem: 57114 Epoch: [18] [140/156] eta: 0:00:11 lr: 0.007561 min_lr: 0.007561 loss: 4.2909 (4.0437) weight_decay: 0.0500 (0.0500) time: 0.6801 data: 0.0003 max mem: 57114 Epoch: [18] [150/156] eta: 0:00:04 lr: 0.007587 min_lr: 0.007587 loss: 4.0057 (4.0427) weight_decay: 0.0500 (0.0500) time: 0.6806 data: 0.0001 max mem: 57114 Epoch: [18] [155/156] eta: 0:00:00 lr: 0.007600 min_lr: 0.007600 loss: 3.9905 (4.0403) weight_decay: 0.0500 (0.0500) time: 0.6839 data: 0.0001 max mem: 57114 Epoch: [18] Total time: 0:01:55 (0.7380 s / it) Averaged stats: lr: 0.007600 min_lr: 0.007600 loss: 3.9905 (4.1269) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.4056 (1.4056) acc1: 68.7500 (68.7500) acc5: 91.6667 (91.6667) time: 2.1299 data: 1.8744 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.6288 (1.5463) acc1: 66.1458 (62.6598) acc5: 89.5833 (86.8286) time: 0.5766 data: 0.3749 max mem: 57114 Test: Total time: 0:00:03 (0.6030 s / it) * Acc@1 62.582 Acc@5 85.322 loss 1.673 Accuracy of the model on the 50000 test images: 62.6% Max accuracy: 62.58% Test: [0/5] eta: 0:00:10 loss: 6.9131 (6.9131) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 2.0376 data: 1.7942 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9131 (6.9156) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.1279) time: 0.5581 data: 0.3589 max mem: 57114 Test: Total time: 0:00:02 (0.5729 s / it) * Acc@1 0.092 Acc@5 0.482 loss 6.914 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [19] [ 0/156] eta: 0:13:26 lr: 0.007602 min_lr: 0.007602 loss: 3.2453 (3.2453) weight_decay: 0.0500 (0.0500) time: 5.1717 data: 4.3763 max mem: 57114 Epoch: [19] [ 10/156] eta: 0:02:44 lr: 0.007628 min_lr: 0.007628 loss: 4.3939 (4.2462) weight_decay: 0.0500 (0.0500) time: 1.1252 data: 0.3981 max mem: 57114 Epoch: [19] [ 20/156] eta: 0:02:08 lr: 0.007654 min_lr: 0.007654 loss: 4.3011 (4.0756) weight_decay: 0.0500 (0.0500) time: 0.7299 data: 0.0003 max mem: 57114 Epoch: [19] [ 30/156] eta: 0:01:51 lr: 0.007679 min_lr: 0.007679 loss: 4.3011 (4.1555) weight_decay: 0.0500 (0.0500) time: 0.7481 data: 0.0003 max mem: 57114 Epoch: [19] [ 40/156] eta: 0:01:37 lr: 0.007705 min_lr: 0.007705 loss: 4.2574 (4.1396) weight_decay: 0.0500 (0.0500) time: 0.7299 data: 0.0004 max mem: 57114 Epoch: [19] [ 50/156] eta: 0:01:26 lr: 0.007731 min_lr: 0.007731 loss: 4.1608 (4.1317) weight_decay: 0.0500 (0.0500) time: 0.7040 data: 0.0004 max mem: 57114 Epoch: [19] [ 60/156] eta: 0:01:17 lr: 0.007756 min_lr: 0.007756 loss: 4.1131 (4.1097) weight_decay: 0.0500 (0.0500) time: 0.7332 data: 0.0004 max mem: 57114 Epoch: [19] [ 70/156] eta: 0:01:08 lr: 0.007782 min_lr: 0.007782 loss: 4.1131 (4.1099) weight_decay: 0.0500 (0.0500) time: 0.7390 data: 0.0003 max mem: 57114 Epoch: [19] [ 80/156] eta: 0:00:59 lr: 0.007808 min_lr: 0.007808 loss: 4.1617 (4.1117) weight_decay: 0.0500 (0.0500) time: 0.7088 data: 0.0004 max mem: 57114 Epoch: [19] [ 90/156] eta: 0:00:50 lr: 0.007833 min_lr: 0.007833 loss: 4.3236 (4.1172) weight_decay: 0.0500 (0.0500) time: 0.6962 data: 0.0004 max mem: 57114 Epoch: [19] [100/156] eta: 0:00:42 lr: 0.007859 min_lr: 0.007859 loss: 4.1616 (4.0887) weight_decay: 0.0500 (0.0500) time: 0.6986 data: 0.0004 max mem: 57114 Epoch: [19] [110/156] eta: 0:00:34 lr: 0.007885 min_lr: 0.007885 loss: 4.0414 (4.0874) weight_decay: 0.0500 (0.0500) time: 0.7010 data: 0.0004 max mem: 57114 Epoch: [19] [120/156] eta: 0:00:27 lr: 0.007910 min_lr: 0.007910 loss: 4.0414 (4.0655) weight_decay: 0.0500 (0.0500) time: 0.7009 data: 0.0004 max mem: 57114 Epoch: [19] [130/156] eta: 0:00:19 lr: 0.007936 min_lr: 0.007936 loss: 4.3433 (4.0810) weight_decay: 0.0500 (0.0500) time: 0.6992 data: 0.0005 max mem: 57114 Epoch: [19] [140/156] eta: 0:00:11 lr: 0.007962 min_lr: 0.007962 loss: 4.2178 (4.0658) weight_decay: 0.0500 (0.0500) time: 0.6839 data: 0.0003 max mem: 57114 Epoch: [19] [150/156] eta: 0:00:04 lr: 0.007987 min_lr: 0.007987 loss: 4.2096 (4.0730) weight_decay: 0.0500 (0.0500) time: 0.6795 data: 0.0001 max mem: 57114 Epoch: [19] [155/156] eta: 0:00:00 lr: 0.008000 min_lr: 0.008000 loss: 4.1025 (4.0801) weight_decay: 0.0500 (0.0500) time: 0.6845 data: 0.0001 max mem: 57114 Epoch: [19] Total time: 0:01:55 (0.7402 s / it) Averaged stats: lr: 0.008000 min_lr: 0.008000 loss: 4.1025 (4.0941) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.5892 (1.5892) acc1: 68.7500 (68.7500) acc5: 94.7917 (94.7917) time: 2.0027 data: 1.7473 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.7985 (1.7919) acc1: 64.0625 (62.4041) acc5: 86.4583 (86.4450) time: 0.5512 data: 0.3495 max mem: 57114 Test: Total time: 0:00:02 (0.5723 s / it) * Acc@1 61.281 Acc@5 84.861 loss 1.904 Accuracy of the model on the 50000 test images: 61.3% Max accuracy: 62.58% Test: [0/5] eta: 0:00:11 loss: 6.9136 (6.9136) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 2.3153 data: 2.0720 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9136 (6.9161) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.1279) time: 0.6137 data: 0.4145 max mem: 57114 Test: Total time: 0:00:03 (0.6251 s / it) * Acc@1 0.096 Acc@5 0.480 loss 6.915 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [20] [ 0/156] eta: 0:14:13 lr: 0.008000 min_lr: 0.008000 loss: 4.0290 (4.0290) weight_decay: 0.0500 (0.0500) time: 5.4737 data: 3.7498 max mem: 57114 Epoch: [20] [ 10/156] eta: 0:02:53 lr: 0.008000 min_lr: 0.008000 loss: 4.0290 (4.0520) weight_decay: 0.0500 (0.0500) time: 1.1872 data: 0.3411 max mem: 57114 Epoch: [20] [ 20/156] eta: 0:02:10 lr: 0.008000 min_lr: 0.008000 loss: 3.9414 (3.8907) weight_decay: 0.0500 (0.0500) time: 0.7340 data: 0.0003 max mem: 57114 Epoch: [20] [ 30/156] eta: 0:01:51 lr: 0.008000 min_lr: 0.008000 loss: 3.9078 (3.9340) weight_decay: 0.0500 (0.0500) time: 0.7232 data: 0.0003 max mem: 57114 Epoch: [20] [ 40/156] eta: 0:01:37 lr: 0.008000 min_lr: 0.008000 loss: 4.0936 (3.9520) weight_decay: 0.0500 (0.0500) time: 0.7196 data: 0.0003 max mem: 57114 Epoch: [20] [ 50/156] eta: 0:01:26 lr: 0.008000 min_lr: 0.008000 loss: 4.1379 (3.9879) weight_decay: 0.0500 (0.0500) time: 0.7037 data: 0.0004 max mem: 57114 Epoch: [20] [ 60/156] eta: 0:01:16 lr: 0.008000 min_lr: 0.008000 loss: 4.2262 (3.9890) weight_decay: 0.0500 (0.0500) time: 0.7151 data: 0.0004 max mem: 57114 Epoch: [20] [ 70/156] eta: 0:01:07 lr: 0.008000 min_lr: 0.008000 loss: 3.9458 (3.9696) weight_decay: 0.0500 (0.0500) time: 0.7189 data: 0.0004 max mem: 57114 Epoch: [20] [ 80/156] eta: 0:00:59 lr: 0.008000 min_lr: 0.008000 loss: 4.0986 (4.0000) weight_decay: 0.0500 (0.0500) time: 0.7052 data: 0.0003 max mem: 57114 Epoch: [20] [ 90/156] eta: 0:00:50 lr: 0.008000 min_lr: 0.008000 loss: 4.2416 (4.0135) weight_decay: 0.0500 (0.0500) time: 0.7048 data: 0.0003 max mem: 57114 Epoch: [20] [100/156] eta: 0:00:42 lr: 0.008000 min_lr: 0.008000 loss: 4.3286 (4.0286) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0004 max mem: 57114 Epoch: [20] [110/156] eta: 0:00:34 lr: 0.008000 min_lr: 0.008000 loss: 4.1594 (4.0466) weight_decay: 0.0500 (0.0500) time: 0.6995 data: 0.0004 max mem: 57114 Epoch: [20] [120/156] eta: 0:00:27 lr: 0.008000 min_lr: 0.008000 loss: 4.3322 (4.0781) weight_decay: 0.0500 (0.0500) time: 0.6895 data: 0.0004 max mem: 57114 Epoch: [20] [130/156] eta: 0:00:19 lr: 0.008000 min_lr: 0.008000 loss: 4.2505 (4.0834) weight_decay: 0.0500 (0.0500) time: 0.6908 data: 0.0008 max mem: 57114 Epoch: [20] [140/156] eta: 0:00:11 lr: 0.008000 min_lr: 0.008000 loss: 3.9719 (4.0811) weight_decay: 0.0500 (0.0500) time: 0.6864 data: 0.0007 max mem: 57114 Epoch: [20] [150/156] eta: 0:00:04 lr: 0.008000 min_lr: 0.008000 loss: 3.9719 (4.0729) weight_decay: 0.0500 (0.0500) time: 0.6836 data: 0.0001 max mem: 57114 Epoch: [20] [155/156] eta: 0:00:00 lr: 0.008000 min_lr: 0.008000 loss: 3.9583 (4.0731) weight_decay: 0.0500 (0.0500) time: 0.6842 data: 0.0001 max mem: 57114 Epoch: [20] Total time: 0:01:55 (0.7390 s / it) Averaged stats: lr: 0.008000 min_lr: 0.008000 loss: 3.9583 (4.0587) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.3891 (1.3891) acc1: 74.4792 (74.4792) acc5: 92.7083 (92.7083) time: 2.1088 data: 1.8531 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.6327 (1.5960) acc1: 66.1458 (63.9386) acc5: 89.0625 (85.9335) time: 0.5724 data: 0.3707 max mem: 57114 Test: Total time: 0:00:02 (0.5921 s / it) * Acc@1 62.622 Acc@5 85.326 loss 1.728 Accuracy of the model on the 50000 test images: 62.6% Max accuracy: 62.62% Test: [0/5] eta: 0:00:09 loss: 6.9137 (6.9137) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 1.9627 data: 1.7193 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9137 (6.9167) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.1279) time: 0.5432 data: 0.3439 max mem: 57114 Test: Total time: 0:00:02 (0.5538 s / it) * Acc@1 0.094 Acc@5 0.488 loss 6.915 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [21] [ 0/156] eta: 0:14:11 lr: 0.008000 min_lr: 0.008000 loss: 4.0519 (4.0519) weight_decay: 0.0500 (0.0500) time: 5.4586 data: 3.8907 max mem: 57114 Epoch: [21] [ 10/156] eta: 0:02:51 lr: 0.008000 min_lr: 0.008000 loss: 3.9884 (3.8412) weight_decay: 0.0500 (0.0500) time: 1.1773 data: 0.3540 max mem: 57114 Epoch: [21] [ 20/156] eta: 0:02:08 lr: 0.008000 min_lr: 0.008000 loss: 3.9884 (4.0779) weight_decay: 0.0500 (0.0500) time: 0.7180 data: 0.0003 max mem: 57114 Epoch: [21] [ 30/156] eta: 0:01:49 lr: 0.008000 min_lr: 0.008000 loss: 4.3641 (4.0831) weight_decay: 0.0500 (0.0500) time: 0.7052 data: 0.0003 max mem: 57114 Epoch: [21] [ 40/156] eta: 0:01:36 lr: 0.008000 min_lr: 0.008000 loss: 4.1796 (4.0676) weight_decay: 0.0500 (0.0500) time: 0.7228 data: 0.0004 max mem: 57114 Epoch: [21] [ 50/156] eta: 0:01:27 lr: 0.008000 min_lr: 0.008000 loss: 3.8136 (3.9918) weight_decay: 0.0500 (0.0500) time: 0.7466 data: 0.0004 max mem: 57114 Epoch: [21] [ 60/156] eta: 0:01:18 lr: 0.008000 min_lr: 0.008000 loss: 3.6541 (3.9356) weight_decay: 0.0500 (0.0500) time: 0.7653 data: 0.0004 max mem: 57114 Epoch: [21] [ 70/156] eta: 0:01:08 lr: 0.007999 min_lr: 0.007999 loss: 3.8990 (3.9479) weight_decay: 0.0500 (0.0500) time: 0.7435 data: 0.0004 max mem: 57114 Epoch: [21] [ 80/156] eta: 0:00:59 lr: 0.007999 min_lr: 0.007999 loss: 4.2274 (3.9567) weight_decay: 0.0500 (0.0500) time: 0.7172 data: 0.0004 max mem: 57114 Epoch: [21] [ 90/156] eta: 0:00:51 lr: 0.007999 min_lr: 0.007999 loss: 4.1621 (3.9629) weight_decay: 0.0500 (0.0500) time: 0.7007 data: 0.0004 max mem: 57114 Epoch: [21] [100/156] eta: 0:00:43 lr: 0.007999 min_lr: 0.007999 loss: 4.1397 (3.9620) weight_decay: 0.0500 (0.0500) time: 0.6945 data: 0.0004 max mem: 57114 Epoch: [21] [110/156] eta: 0:00:35 lr: 0.007999 min_lr: 0.007999 loss: 4.1632 (3.9831) weight_decay: 0.0500 (0.0500) time: 0.6950 data: 0.0004 max mem: 57114 Epoch: [21] [120/156] eta: 0:00:27 lr: 0.007999 min_lr: 0.007999 loss: 4.0781 (3.9852) weight_decay: 0.0500 (0.0500) time: 0.7001 data: 0.0004 max mem: 57114 Epoch: [21] [130/156] eta: 0:00:19 lr: 0.007999 min_lr: 0.007999 loss: 4.0781 (3.9930) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.0009 max mem: 57114 Epoch: [21] [140/156] eta: 0:00:11 lr: 0.007999 min_lr: 0.007999 loss: 4.2535 (3.9940) weight_decay: 0.0500 (0.0500) time: 0.6893 data: 0.0007 max mem: 57114 Epoch: [21] [150/156] eta: 0:00:04 lr: 0.007999 min_lr: 0.007999 loss: 4.1484 (3.9903) weight_decay: 0.0500 (0.0500) time: 0.6799 data: 0.0001 max mem: 57114 Epoch: [21] [155/156] eta: 0:00:00 lr: 0.007999 min_lr: 0.007999 loss: 4.0884 (3.9782) weight_decay: 0.0500 (0.0500) time: 0.6810 data: 0.0001 max mem: 57114 Epoch: [21] Total time: 0:01:56 (0.7443 s / it) Averaged stats: lr: 0.007999 min_lr: 0.007999 loss: 4.0884 (4.0108) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 1.2933 (1.2933) acc1: 73.9583 (73.9583) acc5: 94.7917 (94.7917) time: 2.2525 data: 1.9962 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.5546 (1.5008) acc1: 64.5833 (65.0895) acc5: 91.6667 (87.7238) time: 0.6012 data: 0.3993 max mem: 57114 Test: Total time: 0:00:03 (0.6242 s / it) * Acc@1 63.785 Acc@5 85.979 loss 1.653 Accuracy of the model on the 50000 test images: 63.8% Max accuracy: 63.78% Test: [0/5] eta: 0:00:10 loss: 6.9138 (6.9138) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 2.0521 data: 1.8086 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9138 (6.9179) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.5610 data: 0.3618 max mem: 57114 Test: Total time: 0:00:02 (0.5731 s / it) * Acc@1 0.102 Acc@5 0.504 loss 6.917 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [22] [ 0/156] eta: 0:13:26 lr: 0.007999 min_lr: 0.007999 loss: 4.5682 (4.5682) weight_decay: 0.0500 (0.0500) time: 5.1676 data: 4.1159 max mem: 57114 Epoch: [22] [ 10/156] eta: 0:02:45 lr: 0.007999 min_lr: 0.007999 loss: 3.6516 (3.9382) weight_decay: 0.0500 (0.0500) time: 1.1334 data: 0.3747 max mem: 57114 Epoch: [22] [ 20/156] eta: 0:02:08 lr: 0.007999 min_lr: 0.007999 loss: 3.8041 (3.9448) weight_decay: 0.0500 (0.0500) time: 0.7369 data: 0.0005 max mem: 57114 Epoch: [22] [ 30/156] eta: 0:01:51 lr: 0.007999 min_lr: 0.007999 loss: 4.2082 (4.0119) weight_decay: 0.0500 (0.0500) time: 0.7486 data: 0.0003 max mem: 57114 Epoch: [22] [ 40/156] eta: 0:01:38 lr: 0.007999 min_lr: 0.007999 loss: 3.7499 (3.9217) weight_decay: 0.0500 (0.0500) time: 0.7380 data: 0.0004 max mem: 57114 Epoch: [22] [ 50/156] eta: 0:01:27 lr: 0.007999 min_lr: 0.007999 loss: 3.7499 (3.9247) weight_decay: 0.0500 (0.0500) time: 0.7402 data: 0.0004 max mem: 57114 Epoch: [22] [ 60/156] eta: 0:01:17 lr: 0.007999 min_lr: 0.007999 loss: 3.9444 (3.9500) weight_decay: 0.0500 (0.0500) time: 0.7359 data: 0.0004 max mem: 57114 Epoch: [22] [ 70/156] eta: 0:01:08 lr: 0.007998 min_lr: 0.007998 loss: 4.0086 (3.9408) weight_decay: 0.0500 (0.0500) time: 0.7016 data: 0.0004 max mem: 57114 Epoch: [22] [ 80/156] eta: 0:00:59 lr: 0.007998 min_lr: 0.007998 loss: 4.0617 (3.9537) weight_decay: 0.0500 (0.0500) time: 0.6938 data: 0.0004 max mem: 57114 Epoch: [22] [ 90/156] eta: 0:00:50 lr: 0.007998 min_lr: 0.007998 loss: 3.8378 (3.9335) weight_decay: 0.0500 (0.0500) time: 0.6988 data: 0.0004 max mem: 57114 Epoch: [22] [100/156] eta: 0:00:42 lr: 0.007998 min_lr: 0.007998 loss: 3.8562 (3.9492) weight_decay: 0.0500 (0.0500) time: 0.7030 data: 0.0004 max mem: 57114 Epoch: [22] [110/156] eta: 0:00:34 lr: 0.007998 min_lr: 0.007998 loss: 3.9819 (3.9369) weight_decay: 0.0500 (0.0500) time: 0.7008 data: 0.0004 max mem: 57114 Epoch: [22] [120/156] eta: 0:00:27 lr: 0.007998 min_lr: 0.007998 loss: 3.9819 (3.9404) weight_decay: 0.0500 (0.0500) time: 0.7019 data: 0.0004 max mem: 57114 Epoch: [22] [130/156] eta: 0:00:19 lr: 0.007998 min_lr: 0.007998 loss: 4.3525 (3.9628) weight_decay: 0.0500 (0.0500) time: 0.7024 data: 0.0008 max mem: 57114 Epoch: [22] [140/156] eta: 0:00:11 lr: 0.007998 min_lr: 0.007998 loss: 4.1706 (3.9624) weight_decay: 0.0500 (0.0500) time: 0.6868 data: 0.0007 max mem: 57114 Epoch: [22] [150/156] eta: 0:00:04 lr: 0.007998 min_lr: 0.007998 loss: 4.1671 (3.9729) weight_decay: 0.0500 (0.0500) time: 0.6816 data: 0.0001 max mem: 57114 Epoch: [22] [155/156] eta: 0:00:00 lr: 0.007998 min_lr: 0.007998 loss: 4.2100 (3.9858) weight_decay: 0.0500 (0.0500) time: 0.6841 data: 0.0001 max mem: 57114 Epoch: [22] Total time: 0:01:55 (0.7417 s / it) Averaged stats: lr: 0.007998 min_lr: 0.007998 loss: 4.2100 (3.9777) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.6116 (1.6116) acc1: 70.3125 (70.3125) acc5: 91.1458 (91.1458) time: 2.1167 data: 1.8611 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.7703 (1.6374) acc1: 67.1875 (65.8568) acc5: 87.5000 (85.8056) time: 0.5741 data: 0.3723 max mem: 57114 Test: Total time: 0:00:02 (0.5953 s / it) * Acc@1 64.468 Acc@5 86.283 loss 1.784 Accuracy of the model on the 50000 test images: 64.5% Max accuracy: 64.47% Test: [0/5] eta: 0:00:10 loss: 6.9145 (6.9145) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.0886 data: 1.8451 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9145 (6.9190) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.5683 data: 0.3691 max mem: 57114 Test: Total time: 0:00:02 (0.5798 s / it) * Acc@1 0.098 Acc@5 0.496 loss 6.917 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [23] [ 0/156] eta: 0:13:33 lr: 0.007998 min_lr: 0.007998 loss: 4.2850 (4.2850) weight_decay: 0.0500 (0.0500) time: 5.2164 data: 3.6513 max mem: 57114 Epoch: [23] [ 10/156] eta: 0:02:49 lr: 0.007998 min_lr: 0.007998 loss: 3.9023 (3.8744) weight_decay: 0.0500 (0.0500) time: 1.1579 data: 0.3322 max mem: 57114 Epoch: [23] [ 20/156] eta: 0:02:10 lr: 0.007998 min_lr: 0.007998 loss: 3.9023 (3.8452) weight_decay: 0.0500 (0.0500) time: 0.7471 data: 0.0003 max mem: 57114 Epoch: [23] [ 30/156] eta: 0:01:52 lr: 0.007997 min_lr: 0.007997 loss: 4.1270 (3.9887) weight_decay: 0.0500 (0.0500) time: 0.7469 data: 0.0003 max mem: 57114 Epoch: [23] [ 40/156] eta: 0:01:39 lr: 0.007997 min_lr: 0.007997 loss: 4.2081 (4.0422) weight_decay: 0.0500 (0.0500) time: 0.7461 data: 0.0004 max mem: 57114 Epoch: [23] [ 50/156] eta: 0:01:28 lr: 0.007997 min_lr: 0.007997 loss: 4.1507 (4.0211) weight_decay: 0.0500 (0.0500) time: 0.7384 data: 0.0004 max mem: 57114 Epoch: [23] [ 60/156] eta: 0:01:18 lr: 0.007997 min_lr: 0.007997 loss: 4.0525 (4.0320) weight_decay: 0.0500 (0.0500) time: 0.7299 data: 0.0004 max mem: 57114 Epoch: [23] [ 70/156] eta: 0:01:08 lr: 0.007997 min_lr: 0.007997 loss: 4.0994 (4.0063) weight_decay: 0.0500 (0.0500) time: 0.7194 data: 0.0004 max mem: 57114 Epoch: [23] [ 80/156] eta: 0:00:59 lr: 0.007997 min_lr: 0.007997 loss: 4.0994 (4.0192) weight_decay: 0.0500 (0.0500) time: 0.7107 data: 0.0003 max mem: 57114 Epoch: [23] [ 90/156] eta: 0:00:51 lr: 0.007997 min_lr: 0.007997 loss: 3.9851 (4.0034) weight_decay: 0.0500 (0.0500) time: 0.7069 data: 0.0003 max mem: 57114 Epoch: [23] [100/156] eta: 0:00:43 lr: 0.007997 min_lr: 0.007997 loss: 3.9851 (4.0119) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.0004 max mem: 57114 Epoch: [23] [110/156] eta: 0:00:35 lr: 0.007997 min_lr: 0.007997 loss: 4.1765 (3.9958) weight_decay: 0.0500 (0.0500) time: 0.6992 data: 0.0004 max mem: 57114 Epoch: [23] [120/156] eta: 0:00:27 lr: 0.007996 min_lr: 0.007996 loss: 4.0171 (3.9963) weight_decay: 0.0500 (0.0500) time: 0.7005 data: 0.0004 max mem: 57114 Epoch: [23] [130/156] eta: 0:00:19 lr: 0.007996 min_lr: 0.007996 loss: 4.0171 (4.0047) weight_decay: 0.0500 (0.0500) time: 0.6979 data: 0.0008 max mem: 57114 Epoch: [23] [140/156] eta: 0:00:11 lr: 0.007996 min_lr: 0.007996 loss: 4.3166 (4.0061) weight_decay: 0.0500 (0.0500) time: 0.6838 data: 0.0007 max mem: 57114 Epoch: [23] [150/156] eta: 0:00:04 lr: 0.007996 min_lr: 0.007996 loss: 4.0111 (4.0030) weight_decay: 0.0500 (0.0500) time: 0.6776 data: 0.0001 max mem: 57114 Epoch: [23] [155/156] eta: 0:00:00 lr: 0.007996 min_lr: 0.007996 loss: 4.1693 (4.0112) weight_decay: 0.0500 (0.0500) time: 0.6815 data: 0.0001 max mem: 57114 Epoch: [23] Total time: 0:01:56 (0.7450 s / it) Averaged stats: lr: 0.007996 min_lr: 0.007996 loss: 4.1693 (3.9493) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.6036 (1.6036) acc1: 66.6667 (66.6667) acc5: 91.6667 (91.6667) time: 2.0510 data: 1.7953 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.6036 (1.7054) acc1: 66.6667 (63.0435) acc5: 91.6667 (87.5959) time: 0.5609 data: 0.3591 max mem: 57114 Test: Total time: 0:00:02 (0.5844 s / it) * Acc@1 63.297 Acc@5 85.922 loss 1.748 Accuracy of the model on the 50000 test images: 63.3% Max accuracy: 64.47% Test: [0/5] eta: 0:00:11 loss: 6.9158 (6.9158) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.2776 data: 2.0341 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9158 (6.9201) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.6061 data: 0.4069 max mem: 57114 Test: Total time: 0:00:03 (0.6189 s / it) * Acc@1 0.096 Acc@5 0.496 loss 6.919 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [24] [ 0/156] eta: 0:13:33 lr: 0.007996 min_lr: 0.007996 loss: 4.1404 (4.1404) weight_decay: 0.0500 (0.0500) time: 5.2132 data: 3.6261 max mem: 57114 Epoch: [24] [ 10/156] eta: 0:02:46 lr: 0.007996 min_lr: 0.007996 loss: 3.7474 (3.7405) weight_decay: 0.0500 (0.0500) time: 1.1410 data: 0.3300 max mem: 57114 Epoch: [24] [ 20/156] eta: 0:02:08 lr: 0.007996 min_lr: 0.007996 loss: 4.0824 (3.9129) weight_decay: 0.0500 (0.0500) time: 0.7298 data: 0.0003 max mem: 57114 Epoch: [24] [ 30/156] eta: 0:01:49 lr: 0.007996 min_lr: 0.007996 loss: 4.1396 (3.9327) weight_decay: 0.0500 (0.0500) time: 0.7221 data: 0.0004 max mem: 57114 Epoch: [24] [ 40/156] eta: 0:01:36 lr: 0.007995 min_lr: 0.007995 loss: 4.1908 (3.9482) weight_decay: 0.0500 (0.0500) time: 0.7158 data: 0.0004 max mem: 57114 Epoch: [24] [ 50/156] eta: 0:01:25 lr: 0.007995 min_lr: 0.007995 loss: 4.0363 (3.9439) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0004 max mem: 57114 Epoch: [24] [ 60/156] eta: 0:01:16 lr: 0.007995 min_lr: 0.007995 loss: 3.9619 (3.9425) weight_decay: 0.0500 (0.0500) time: 0.7115 data: 0.0004 max mem: 57114 Epoch: [24] [ 70/156] eta: 0:01:07 lr: 0.007995 min_lr: 0.007995 loss: 3.9817 (3.9041) weight_decay: 0.0500 (0.0500) time: 0.7243 data: 0.0004 max mem: 57114 Epoch: [24] [ 80/156] eta: 0:00:58 lr: 0.007995 min_lr: 0.007995 loss: 4.0483 (3.9322) weight_decay: 0.0500 (0.0500) time: 0.7241 data: 0.0004 max mem: 57114 Epoch: [24] [ 90/156] eta: 0:00:50 lr: 0.007995 min_lr: 0.007995 loss: 4.2413 (3.9454) weight_decay: 0.0500 (0.0500) time: 0.7050 data: 0.0004 max mem: 57114 Epoch: [24] [100/156] eta: 0:00:42 lr: 0.007995 min_lr: 0.007995 loss: 4.0651 (3.9332) weight_decay: 0.0500 (0.0500) time: 0.6979 data: 0.0004 max mem: 57114 Epoch: [24] [110/156] eta: 0:00:34 lr: 0.007994 min_lr: 0.007994 loss: 4.0115 (3.9372) weight_decay: 0.0500 (0.0500) time: 0.7017 data: 0.0004 max mem: 57114 Epoch: [24] [120/156] eta: 0:00:27 lr: 0.007994 min_lr: 0.007994 loss: 3.9867 (3.9167) weight_decay: 0.0500 (0.0500) time: 0.7041 data: 0.0003 max mem: 57114 Epoch: [24] [130/156] eta: 0:00:19 lr: 0.007994 min_lr: 0.007994 loss: 3.9867 (3.9148) weight_decay: 0.0500 (0.0500) time: 0.6964 data: 0.0008 max mem: 57114 Epoch: [24] [140/156] eta: 0:00:11 lr: 0.007994 min_lr: 0.007994 loss: 4.0414 (3.9053) weight_decay: 0.0500 (0.0500) time: 0.6873 data: 0.0007 max mem: 57114 Epoch: [24] [150/156] eta: 0:00:04 lr: 0.007994 min_lr: 0.007994 loss: 3.9165 (3.9004) weight_decay: 0.0500 (0.0500) time: 0.6841 data: 0.0001 max mem: 57114 Epoch: [24] [155/156] eta: 0:00:00 lr: 0.007994 min_lr: 0.007994 loss: 3.9165 (3.8867) weight_decay: 0.0500 (0.0500) time: 0.6829 data: 0.0001 max mem: 57114 Epoch: [24] Total time: 0:01:55 (0.7388 s / it) Averaged stats: lr: 0.007994 min_lr: 0.007994 loss: 3.9165 (3.9126) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.4251 (1.4251) acc1: 72.3958 (72.3958) acc5: 93.7500 (93.7500) time: 2.0772 data: 1.8211 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.5030 (1.5455) acc1: 69.7917 (66.1125) acc5: 92.1875 (88.8747) time: 0.5662 data: 0.3643 max mem: 57114 Test: Total time: 0:00:02 (0.5897 s / it) * Acc@1 65.147 Acc@5 86.981 loss 1.692 Accuracy of the model on the 50000 test images: 65.1% Max accuracy: 65.15% Test: [0/5] eta: 0:00:10 loss: 6.9173 (6.9173) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.0411 data: 1.7976 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9173 (6.9211) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.5588 data: 0.3596 max mem: 57114 Test: Total time: 0:00:02 (0.5699 s / it) * Acc@1 0.088 Acc@5 0.492 loss 6.920 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [25] [ 0/156] eta: 0:13:25 lr: 0.007994 min_lr: 0.007994 loss: 4.2126 (4.2126) weight_decay: 0.0500 (0.0500) time: 5.1660 data: 3.8111 max mem: 57114 Epoch: [25] [ 10/156] eta: 0:02:45 lr: 0.007994 min_lr: 0.007994 loss: 3.9795 (3.9035) weight_decay: 0.0500 (0.0500) time: 1.1312 data: 0.3468 max mem: 57114 Epoch: [25] [ 20/156] eta: 0:02:06 lr: 0.007993 min_lr: 0.007993 loss: 3.9772 (3.9579) weight_decay: 0.0500 (0.0500) time: 0.7198 data: 0.0005 max mem: 57114 Epoch: [25] [ 30/156] eta: 0:01:49 lr: 0.007993 min_lr: 0.007993 loss: 3.8957 (3.8764) weight_decay: 0.0500 (0.0500) time: 0.7293 data: 0.0005 max mem: 57114 Epoch: [25] [ 40/156] eta: 0:01:37 lr: 0.007993 min_lr: 0.007993 loss: 3.6146 (3.8661) weight_decay: 0.0500 (0.0500) time: 0.7514 data: 0.0004 max mem: 57114 Epoch: [25] [ 50/156] eta: 0:01:27 lr: 0.007993 min_lr: 0.007993 loss: 3.7940 (3.8319) weight_decay: 0.0500 (0.0500) time: 0.7512 data: 0.0004 max mem: 57114 Epoch: [25] [ 60/156] eta: 0:01:18 lr: 0.007993 min_lr: 0.007993 loss: 3.7940 (3.8420) weight_decay: 0.0500 (0.0500) time: 0.7491 data: 0.0004 max mem: 57114 Epoch: [25] [ 70/156] eta: 0:01:08 lr: 0.007993 min_lr: 0.007993 loss: 4.1369 (3.8978) weight_decay: 0.0500 (0.0500) time: 0.7349 data: 0.0004 max mem: 57114 Epoch: [25] [ 80/156] eta: 0:00:59 lr: 0.007992 min_lr: 0.007992 loss: 4.0223 (3.9081) weight_decay: 0.0500 (0.0500) time: 0.7153 data: 0.0004 max mem: 57114 Epoch: [25] [ 90/156] eta: 0:00:51 lr: 0.007992 min_lr: 0.007992 loss: 3.8032 (3.9023) weight_decay: 0.0500 (0.0500) time: 0.7024 data: 0.0004 max mem: 57114 Epoch: [25] [100/156] eta: 0:00:43 lr: 0.007992 min_lr: 0.007992 loss: 3.7742 (3.8904) weight_decay: 0.0500 (0.0500) time: 0.6959 data: 0.0004 max mem: 57114 Epoch: [25] [110/156] eta: 0:00:35 lr: 0.007992 min_lr: 0.007992 loss: 3.5502 (3.8794) weight_decay: 0.0500 (0.0500) time: 0.7068 data: 0.0003 max mem: 57114 Epoch: [25] [120/156] eta: 0:00:27 lr: 0.007992 min_lr: 0.007992 loss: 3.7894 (3.8827) weight_decay: 0.0500 (0.0500) time: 0.7056 data: 0.0004 max mem: 57114 Epoch: [25] [130/156] eta: 0:00:19 lr: 0.007991 min_lr: 0.007991 loss: 3.9673 (3.8848) weight_decay: 0.0500 (0.0500) time: 0.6951 data: 0.0008 max mem: 57114 Epoch: [25] [140/156] eta: 0:00:11 lr: 0.007991 min_lr: 0.007991 loss: 3.9829 (3.8987) weight_decay: 0.0500 (0.0500) time: 0.6884 data: 0.0007 max mem: 57114 Epoch: [25] [150/156] eta: 0:00:04 lr: 0.007991 min_lr: 0.007991 loss: 4.1517 (3.9199) weight_decay: 0.0500 (0.0500) time: 0.6830 data: 0.0001 max mem: 57114 Epoch: [25] [155/156] eta: 0:00:00 lr: 0.007991 min_lr: 0.007991 loss: 4.1301 (3.9266) weight_decay: 0.0500 (0.0500) time: 0.6817 data: 0.0001 max mem: 57114 Epoch: [25] Total time: 0:01:56 (0.7452 s / it) Averaged stats: lr: 0.007991 min_lr: 0.007991 loss: 4.1301 (3.8837) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.4728 (1.4728) acc1: 72.3958 (72.3958) acc5: 91.6667 (91.6667) time: 2.0499 data: 1.7922 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.5900 (1.5778) acc1: 70.3125 (65.8568) acc5: 89.0625 (86.4450) time: 0.5607 data: 0.3585 max mem: 57114 Test: Total time: 0:00:02 (0.5835 s / it) * Acc@1 64.282 Acc@5 86.451 loss 1.703 Accuracy of the model on the 50000 test images: 64.3% Max accuracy: 65.15% Test: [0/5] eta: 0:00:11 loss: 6.9198 (6.9198) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.2215 data: 1.9780 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9198 (6.9221) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.5949 data: 0.3957 max mem: 57114 Test: Total time: 0:00:03 (0.6068 s / it) * Acc@1 0.090 Acc@5 0.484 loss 6.920 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [26] [ 0/156] eta: 0:13:31 lr: 0.007991 min_lr: 0.007991 loss: 3.9487 (3.9487) weight_decay: 0.0500 (0.0500) time: 5.2004 data: 3.8813 max mem: 57114 Epoch: [26] [ 10/156] eta: 0:02:42 lr: 0.007991 min_lr: 0.007991 loss: 3.2667 (3.5472) weight_decay: 0.0500 (0.0500) time: 1.1142 data: 0.3531 max mem: 57114 Epoch: [26] [ 20/156] eta: 0:02:07 lr: 0.007991 min_lr: 0.007991 loss: 3.2667 (3.6112) weight_decay: 0.0500 (0.0500) time: 0.7235 data: 0.0003 max mem: 57114 Epoch: [26] [ 30/156] eta: 0:01:49 lr: 0.007990 min_lr: 0.007990 loss: 3.4242 (3.5425) weight_decay: 0.0500 (0.0500) time: 0.7291 data: 0.0003 max mem: 57114 Epoch: [26] [ 40/156] eta: 0:01:36 lr: 0.007990 min_lr: 0.007990 loss: 3.7398 (3.6327) weight_decay: 0.0500 (0.0500) time: 0.7240 data: 0.0004 max mem: 57114 Epoch: [26] [ 50/156] eta: 0:01:26 lr: 0.007990 min_lr: 0.007990 loss: 3.7710 (3.6103) weight_decay: 0.0500 (0.0500) time: 0.7295 data: 0.0004 max mem: 57114 Epoch: [26] [ 60/156] eta: 0:01:16 lr: 0.007990 min_lr: 0.007990 loss: 3.7517 (3.6553) weight_decay: 0.0500 (0.0500) time: 0.7262 data: 0.0004 max mem: 57114 Epoch: [26] [ 70/156] eta: 0:01:07 lr: 0.007990 min_lr: 0.007990 loss: 3.8275 (3.6950) weight_decay: 0.0500 (0.0500) time: 0.7065 data: 0.0004 max mem: 57114 Epoch: [26] [ 80/156] eta: 0:00:58 lr: 0.007989 min_lr: 0.007989 loss: 3.7945 (3.6913) weight_decay: 0.0500 (0.0500) time: 0.7033 data: 0.0004 max mem: 57114 Epoch: [26] [ 90/156] eta: 0:00:50 lr: 0.007989 min_lr: 0.007989 loss: 3.7708 (3.6993) weight_decay: 0.0500 (0.0500) time: 0.7101 data: 0.0003 max mem: 57114 Epoch: [26] [100/156] eta: 0:00:42 lr: 0.007989 min_lr: 0.007989 loss: 3.9531 (3.7295) weight_decay: 0.0500 (0.0500) time: 0.7049 data: 0.0003 max mem: 57114 Epoch: [26] [110/156] eta: 0:00:34 lr: 0.007989 min_lr: 0.007989 loss: 4.0270 (3.7453) weight_decay: 0.0500 (0.0500) time: 0.7173 data: 0.0004 max mem: 57114 Epoch: [26] [120/156] eta: 0:00:27 lr: 0.007988 min_lr: 0.007988 loss: 4.0129 (3.7761) weight_decay: 0.0500 (0.0500) time: 0.7099 data: 0.0004 max mem: 57114 Epoch: [26] [130/156] eta: 0:00:19 lr: 0.007988 min_lr: 0.007988 loss: 4.0129 (3.7778) weight_decay: 0.0500 (0.0500) time: 0.6950 data: 0.0009 max mem: 57114 Epoch: [26] [140/156] eta: 0:00:11 lr: 0.007988 min_lr: 0.007988 loss: 3.8276 (3.7750) weight_decay: 0.0500 (0.0500) time: 0.6919 data: 0.0008 max mem: 57114 Epoch: [26] [150/156] eta: 0:00:04 lr: 0.007988 min_lr: 0.007988 loss: 3.7458 (3.7771) weight_decay: 0.0500 (0.0500) time: 0.6820 data: 0.0001 max mem: 57114 Epoch: [26] [155/156] eta: 0:00:00 lr: 0.007988 min_lr: 0.007988 loss: 3.8459 (3.7806) weight_decay: 0.0500 (0.0500) time: 0.6829 data: 0.0001 max mem: 57114 Epoch: [26] Total time: 0:01:55 (0.7401 s / it) Averaged stats: lr: 0.007988 min_lr: 0.007988 loss: 3.8459 (3.8570) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.3004 (1.3004) acc1: 74.4792 (74.4792) acc5: 94.7917 (94.7917) time: 2.1010 data: 1.8455 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.5600 (1.4948) acc1: 68.7500 (66.1125) acc5: 90.6250 (87.7238) time: 0.5709 data: 0.3692 max mem: 57114 Test: Total time: 0:00:02 (0.5937 s / it) * Acc@1 64.922 Acc@5 86.943 loss 1.597 Accuracy of the model on the 50000 test images: 64.9% Max accuracy: 65.15% Test: [0/5] eta: 0:00:11 loss: 6.9223 (6.9223) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.3030 data: 2.0596 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9223 (6.9232) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.6112 data: 0.4120 max mem: 57114 Test: Total time: 0:00:03 (0.6256 s / it) * Acc@1 0.092 Acc@5 0.482 loss 6.921 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [27] [ 0/156] eta: 0:13:57 lr: 0.007988 min_lr: 0.007988 loss: 3.1739 (3.1739) weight_decay: 0.0500 (0.0500) time: 5.3714 data: 3.8088 max mem: 57114 Epoch: [27] [ 10/156] eta: 0:02:47 lr: 0.007987 min_lr: 0.007987 loss: 4.0995 (3.8914) weight_decay: 0.0500 (0.0500) time: 1.1460 data: 0.3465 max mem: 57114 Epoch: [27] [ 20/156] eta: 0:02:08 lr: 0.007987 min_lr: 0.007987 loss: 3.9258 (3.7148) weight_decay: 0.0500 (0.0500) time: 0.7199 data: 0.0003 max mem: 57114 Epoch: [27] [ 30/156] eta: 0:01:50 lr: 0.007987 min_lr: 0.007987 loss: 3.7404 (3.7730) weight_decay: 0.0500 (0.0500) time: 0.7248 data: 0.0003 max mem: 57114 Epoch: [27] [ 40/156] eta: 0:01:36 lr: 0.007987 min_lr: 0.007987 loss: 3.9083 (3.8083) weight_decay: 0.0500 (0.0500) time: 0.7251 data: 0.0003 max mem: 57114 Epoch: [27] [ 50/156] eta: 0:01:25 lr: 0.007987 min_lr: 0.007987 loss: 3.9083 (3.8145) weight_decay: 0.0500 (0.0500) time: 0.7135 data: 0.0003 max mem: 57114 Epoch: [27] [ 60/156] eta: 0:01:16 lr: 0.007986 min_lr: 0.007986 loss: 3.7070 (3.7857) weight_decay: 0.0500 (0.0500) time: 0.7072 data: 0.0003 max mem: 57114 Epoch: [27] [ 70/156] eta: 0:01:07 lr: 0.007986 min_lr: 0.007986 loss: 3.7851 (3.7948) weight_decay: 0.0500 (0.0500) time: 0.7049 data: 0.0004 max mem: 57114 Epoch: [27] [ 80/156] eta: 0:00:58 lr: 0.007986 min_lr: 0.007986 loss: 3.8384 (3.7953) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0004 max mem: 57114 Epoch: [27] [ 90/156] eta: 0:00:50 lr: 0.007986 min_lr: 0.007986 loss: 3.6555 (3.7831) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0004 max mem: 57114 Epoch: [27] [100/156] eta: 0:00:42 lr: 0.007985 min_lr: 0.007985 loss: 3.9999 (3.8158) weight_decay: 0.0500 (0.0500) time: 0.7094 data: 0.0005 max mem: 57114 Epoch: [27] [110/156] eta: 0:00:34 lr: 0.007985 min_lr: 0.007985 loss: 4.0418 (3.8278) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0005 max mem: 57114 Epoch: [27] [120/156] eta: 0:00:26 lr: 0.007985 min_lr: 0.007985 loss: 4.0402 (3.8251) weight_decay: 0.0500 (0.0500) time: 0.6962 data: 0.0004 max mem: 57114 Epoch: [27] [130/156] eta: 0:00:19 lr: 0.007985 min_lr: 0.007985 loss: 3.8465 (3.8032) weight_decay: 0.0500 (0.0500) time: 0.6870 data: 0.0008 max mem: 57114 Epoch: [27] [140/156] eta: 0:00:11 lr: 0.007984 min_lr: 0.007984 loss: 3.8465 (3.7975) weight_decay: 0.0500 (0.0500) time: 0.6832 data: 0.0007 max mem: 57114 Epoch: [27] [150/156] eta: 0:00:04 lr: 0.007984 min_lr: 0.007984 loss: 3.9235 (3.7977) weight_decay: 0.0500 (0.0500) time: 0.6785 data: 0.0001 max mem: 57114 Epoch: [27] [155/156] eta: 0:00:00 lr: 0.007984 min_lr: 0.007984 loss: 3.9803 (3.7971) weight_decay: 0.0500 (0.0500) time: 0.6796 data: 0.0001 max mem: 57114 Epoch: [27] Total time: 0:01:54 (0.7367 s / it) Averaged stats: lr: 0.007984 min_lr: 0.007984 loss: 3.9803 (3.8432) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.2778 (1.2778) acc1: 78.1250 (78.1250) acc5: 93.7500 (93.7500) time: 2.1081 data: 1.8525 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.4551 (1.4668) acc1: 71.8750 (69.0537) acc5: 90.6250 (89.2583) time: 0.5722 data: 0.3706 max mem: 57114 Test: Total time: 0:00:02 (0.5942 s / it) * Acc@1 66.764 Acc@5 87.882 loss 1.578 Accuracy of the model on the 50000 test images: 66.8% Max accuracy: 66.76% Test: [0/5] eta: 0:00:10 loss: 6.9241 (6.9241) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.0302 data: 1.7865 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9241 (6.9248) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.1279) time: 0.5567 data: 0.3574 max mem: 57114 Test: Total time: 0:00:02 (0.5669 s / it) * Acc@1 0.092 Acc@5 0.488 loss 6.923 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [28] [ 0/156] eta: 0:13:41 lr: 0.007984 min_lr: 0.007984 loss: 4.0943 (4.0943) weight_decay: 0.0500 (0.0500) time: 5.2656 data: 4.1147 max mem: 57114 Epoch: [28] [ 10/156] eta: 0:02:53 lr: 0.007984 min_lr: 0.007984 loss: 4.0293 (3.8238) weight_decay: 0.0500 (0.0500) time: 1.1882 data: 0.3744 max mem: 57114 Epoch: [28] [ 20/156] eta: 0:02:10 lr: 0.007983 min_lr: 0.007983 loss: 4.0149 (3.8182) weight_decay: 0.0500 (0.0500) time: 0.7455 data: 0.0004 max mem: 57114 Epoch: [28] [ 30/156] eta: 0:01:50 lr: 0.007983 min_lr: 0.007983 loss: 3.9766 (3.8891) weight_decay: 0.0500 (0.0500) time: 0.7096 data: 0.0004 max mem: 57114 Epoch: [28] [ 40/156] eta: 0:01:38 lr: 0.007983 min_lr: 0.007983 loss: 3.9766 (3.9157) weight_decay: 0.0500 (0.0500) time: 0.7274 data: 0.0004 max mem: 57114 Epoch: [28] [ 50/156] eta: 0:01:26 lr: 0.007983 min_lr: 0.007983 loss: 3.8478 (3.8458) weight_decay: 0.0500 (0.0500) time: 0.7283 data: 0.0003 max mem: 57114 Epoch: [28] [ 60/156] eta: 0:01:17 lr: 0.007982 min_lr: 0.007982 loss: 3.5695 (3.8396) weight_decay: 0.0500 (0.0500) time: 0.7204 data: 0.0004 max mem: 57114 Epoch: [28] [ 70/156] eta: 0:01:08 lr: 0.007982 min_lr: 0.007982 loss: 4.0957 (3.8609) weight_decay: 0.0500 (0.0500) time: 0.7350 data: 0.0004 max mem: 57114 Epoch: [28] [ 80/156] eta: 0:00:59 lr: 0.007982 min_lr: 0.007982 loss: 4.1326 (3.8706) weight_decay: 0.0500 (0.0500) time: 0.7316 data: 0.0004 max mem: 57114 Epoch: [28] [ 90/156] eta: 0:00:51 lr: 0.007981 min_lr: 0.007981 loss: 3.9249 (3.8332) weight_decay: 0.0500 (0.0500) time: 0.7132 data: 0.0004 max mem: 57114 Epoch: [28] [100/156] eta: 0:00:43 lr: 0.007981 min_lr: 0.007981 loss: 3.9615 (3.8547) weight_decay: 0.0500 (0.0500) time: 0.6967 data: 0.0004 max mem: 57114 Epoch: [28] [110/156] eta: 0:00:35 lr: 0.007981 min_lr: 0.007981 loss: 4.1304 (3.8606) weight_decay: 0.0500 (0.0500) time: 0.6938 data: 0.0004 max mem: 57114 Epoch: [28] [120/156] eta: 0:00:27 lr: 0.007981 min_lr: 0.007981 loss: 4.0432 (3.8563) weight_decay: 0.0500 (0.0500) time: 0.6957 data: 0.0004 max mem: 57114 Epoch: [28] [130/156] eta: 0:00:19 lr: 0.007980 min_lr: 0.007980 loss: 3.9167 (3.8439) weight_decay: 0.0500 (0.0500) time: 0.6912 data: 0.0005 max mem: 57114 Epoch: [28] [140/156] eta: 0:00:11 lr: 0.007980 min_lr: 0.007980 loss: 3.9521 (3.8470) weight_decay: 0.0500 (0.0500) time: 0.6853 data: 0.0003 max mem: 57114 Epoch: [28] [150/156] eta: 0:00:04 lr: 0.007980 min_lr: 0.007980 loss: 3.9595 (3.8371) weight_decay: 0.0500 (0.0500) time: 0.6850 data: 0.0001 max mem: 57114 Epoch: [28] [155/156] eta: 0:00:00 lr: 0.007980 min_lr: 0.007980 loss: 4.0059 (3.8384) weight_decay: 0.0500 (0.0500) time: 0.6874 data: 0.0001 max mem: 57114 Epoch: [28] Total time: 0:01:56 (0.7437 s / it) Averaged stats: lr: 0.007980 min_lr: 0.007980 loss: 4.0059 (3.8187) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.3139 (1.3139) acc1: 74.4792 (74.4792) acc5: 90.1042 (90.1042) time: 2.0650 data: 1.8094 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.6599 (1.5776) acc1: 71.3542 (67.3913) acc5: 89.5833 (86.8286) time: 0.5637 data: 0.3620 max mem: 57114 Test: Total time: 0:00:02 (0.5886 s / it) * Acc@1 65.617 Acc@5 86.883 loss 1.658 Accuracy of the model on the 50000 test images: 65.6% Max accuracy: 66.76% Test: [0/5] eta: 0:00:11 loss: 6.9262 (6.9262) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.2900 data: 2.0465 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9262 (6.9262) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.6086 data: 0.4094 max mem: 57114 Test: Total time: 0:00:03 (0.6202 s / it) * Acc@1 0.096 Acc@5 0.480 loss 6.924 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [29] [ 0/156] eta: 0:12:59 lr: 0.007980 min_lr: 0.007980 loss: 2.7767 (2.7767) weight_decay: 0.0500 (0.0500) time: 4.9962 data: 3.5853 max mem: 57114 Epoch: [29] [ 10/156] eta: 0:02:40 lr: 0.007979 min_lr: 0.007979 loss: 4.0994 (3.9965) weight_decay: 0.0500 (0.0500) time: 1.0977 data: 0.3262 max mem: 57114 Epoch: [29] [ 20/156] eta: 0:02:04 lr: 0.007979 min_lr: 0.007979 loss: 4.0306 (3.7974) weight_decay: 0.0500 (0.0500) time: 0.7080 data: 0.0003 max mem: 57114 Epoch: [29] [ 30/156] eta: 0:01:46 lr: 0.007979 min_lr: 0.007979 loss: 3.7502 (3.8583) weight_decay: 0.0500 (0.0500) time: 0.7007 data: 0.0003 max mem: 57114 Epoch: [29] [ 40/156] eta: 0:01:34 lr: 0.007978 min_lr: 0.007978 loss: 3.9325 (3.8727) weight_decay: 0.0500 (0.0500) time: 0.7054 data: 0.0004 max mem: 57114 Epoch: [29] [ 50/156] eta: 0:01:24 lr: 0.007978 min_lr: 0.007978 loss: 3.9080 (3.8426) weight_decay: 0.0500 (0.0500) time: 0.7276 data: 0.0004 max mem: 57114 Epoch: [29] [ 60/156] eta: 0:01:15 lr: 0.007978 min_lr: 0.007978 loss: 3.9287 (3.8769) weight_decay: 0.0500 (0.0500) time: 0.7381 data: 0.0004 max mem: 57114 Epoch: [29] [ 70/156] eta: 0:01:06 lr: 0.007978 min_lr: 0.007978 loss: 3.9287 (3.8769) weight_decay: 0.0500 (0.0500) time: 0.7230 data: 0.0004 max mem: 57114 Epoch: [29] [ 80/156] eta: 0:00:58 lr: 0.007977 min_lr: 0.007977 loss: 3.9033 (3.8793) weight_decay: 0.0500 (0.0500) time: 0.7089 data: 0.0004 max mem: 57114 Epoch: [29] [ 90/156] eta: 0:00:50 lr: 0.007977 min_lr: 0.007977 loss: 3.8942 (3.8557) weight_decay: 0.0500 (0.0500) time: 0.6990 data: 0.0004 max mem: 57114 Epoch: [29] [100/156] eta: 0:00:42 lr: 0.007977 min_lr: 0.007977 loss: 3.8193 (3.8431) weight_decay: 0.0500 (0.0500) time: 0.6957 data: 0.0004 max mem: 57114 Epoch: [29] [110/156] eta: 0:00:34 lr: 0.007976 min_lr: 0.007976 loss: 3.9441 (3.8464) weight_decay: 0.0500 (0.0500) time: 0.6989 data: 0.0004 max mem: 57114 Epoch: [29] [120/156] eta: 0:00:26 lr: 0.007976 min_lr: 0.007976 loss: 3.8500 (3.8474) weight_decay: 0.0500 (0.0500) time: 0.6986 data: 0.0004 max mem: 57114 Epoch: [29] [130/156] eta: 0:00:19 lr: 0.007976 min_lr: 0.007976 loss: 3.8500 (3.8515) weight_decay: 0.0500 (0.0500) time: 0.6971 data: 0.0009 max mem: 57114 Epoch: [29] [140/156] eta: 0:00:11 lr: 0.007975 min_lr: 0.007975 loss: 4.0755 (3.8607) weight_decay: 0.0500 (0.0500) time: 0.6825 data: 0.0008 max mem: 57114 Epoch: [29] [150/156] eta: 0:00:04 lr: 0.007975 min_lr: 0.007975 loss: 4.0132 (3.8714) weight_decay: 0.0500 (0.0500) time: 0.6776 data: 0.0001 max mem: 57114 Epoch: [29] [155/156] eta: 0:00:00 lr: 0.007975 min_lr: 0.007975 loss: 3.9953 (3.8654) weight_decay: 0.0500 (0.0500) time: 0.6789 data: 0.0001 max mem: 57114 Epoch: [29] Total time: 0:01:54 (0.7323 s / it) Averaged stats: lr: 0.007975 min_lr: 0.007975 loss: 3.9953 (3.7956) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.6090 (1.6090) acc1: 75.5208 (75.5208) acc5: 93.2292 (93.2292) time: 2.0708 data: 1.8147 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.8204 (1.7830) acc1: 63.5417 (63.0435) acc5: 89.5833 (86.8286) time: 0.5648 data: 0.3630 max mem: 57114 Test: Total time: 0:00:02 (0.5850 s / it) * Acc@1 63.999 Acc@5 86.225 loss 1.884 Accuracy of the model on the 50000 test images: 64.0% Max accuracy: 66.76% Test: [0/5] eta: 0:00:11 loss: 6.9285 (6.9285) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.2696 data: 2.0261 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9285 (6.9275) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.6045 data: 0.4053 max mem: 57114 Test: Total time: 0:00:03 (0.6157 s / it) * Acc@1 0.098 Acc@5 0.472 loss 6.925 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [30] [ 0/156] eta: 0:13:14 lr: 0.007975 min_lr: 0.007975 loss: 3.9167 (3.9167) weight_decay: 0.0500 (0.0500) time: 5.0952 data: 3.8931 max mem: 57114 Epoch: [30] [ 10/156] eta: 0:02:45 lr: 0.007975 min_lr: 0.007975 loss: 3.9715 (3.8715) weight_decay: 0.0500 (0.0500) time: 1.1330 data: 0.3542 max mem: 57114 Epoch: [30] [ 20/156] eta: 0:02:07 lr: 0.007974 min_lr: 0.007974 loss: 4.1074 (4.0107) weight_decay: 0.0500 (0.0500) time: 0.7270 data: 0.0003 max mem: 57114 Epoch: [30] [ 30/156] eta: 0:01:50 lr: 0.007974 min_lr: 0.007974 loss: 3.8612 (3.9097) weight_decay: 0.0500 (0.0500) time: 0.7337 data: 0.0004 max mem: 57114 Epoch: [30] [ 40/156] eta: 0:01:37 lr: 0.007974 min_lr: 0.007974 loss: 3.9601 (3.8988) weight_decay: 0.0500 (0.0500) time: 0.7405 data: 0.0004 max mem: 57114 Epoch: [30] [ 50/156] eta: 0:01:26 lr: 0.007973 min_lr: 0.007973 loss: 4.0111 (3.8440) weight_decay: 0.0500 (0.0500) time: 0.7196 data: 0.0004 max mem: 57114 Epoch: [30] [ 60/156] eta: 0:01:16 lr: 0.007973 min_lr: 0.007973 loss: 3.8022 (3.8480) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0004 max mem: 57114 Epoch: [30] [ 70/156] eta: 0:01:07 lr: 0.007973 min_lr: 0.007973 loss: 4.0092 (3.8435) weight_decay: 0.0500 (0.0500) time: 0.7112 data: 0.0004 max mem: 57114 Epoch: [30] [ 80/156] eta: 0:00:58 lr: 0.007972 min_lr: 0.007972 loss: 3.9162 (3.8069) weight_decay: 0.0500 (0.0500) time: 0.7008 data: 0.0004 max mem: 57114 Epoch: [30] [ 90/156] eta: 0:00:50 lr: 0.007972 min_lr: 0.007972 loss: 3.7935 (3.8057) weight_decay: 0.0500 (0.0500) time: 0.6932 data: 0.0004 max mem: 57114 Epoch: [30] [100/156] eta: 0:00:42 lr: 0.007972 min_lr: 0.007972 loss: 3.7712 (3.8077) weight_decay: 0.0500 (0.0500) time: 0.6995 data: 0.0004 max mem: 57114 Epoch: [30] [110/156] eta: 0:00:34 lr: 0.007971 min_lr: 0.007971 loss: 3.9713 (3.8170) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0004 max mem: 57114 Epoch: [30] [120/156] eta: 0:00:26 lr: 0.007971 min_lr: 0.007971 loss: 3.9713 (3.8164) weight_decay: 0.0500 (0.0500) time: 0.7019 data: 0.0004 max mem: 57114 Epoch: [30] [130/156] eta: 0:00:19 lr: 0.007970 min_lr: 0.007970 loss: 3.8336 (3.8117) weight_decay: 0.0500 (0.0500) time: 0.6924 data: 0.0009 max mem: 57114 Epoch: [30] [140/156] eta: 0:00:11 lr: 0.007970 min_lr: 0.007970 loss: 3.8460 (3.8135) weight_decay: 0.0500 (0.0500) time: 0.6827 data: 0.0007 max mem: 57114 Epoch: [30] [150/156] eta: 0:00:04 lr: 0.007970 min_lr: 0.007970 loss: 3.8431 (3.8036) weight_decay: 0.0500 (0.0500) time: 0.6809 data: 0.0001 max mem: 57114 Epoch: [30] [155/156] eta: 0:00:00 lr: 0.007970 min_lr: 0.007970 loss: 3.7716 (3.8097) weight_decay: 0.0500 (0.0500) time: 0.6819 data: 0.0001 max mem: 57114 Epoch: [30] Total time: 0:01:54 (0.7369 s / it) Averaged stats: lr: 0.007970 min_lr: 0.007970 loss: 3.7716 (3.7764) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.3972 (1.3972) acc1: 72.3958 (72.3958) acc5: 92.7083 (92.7083) time: 2.1536 data: 1.8981 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.5573 (1.5089) acc1: 71.3542 (66.7519) acc5: 91.1458 (88.6189) time: 0.5813 data: 0.3797 max mem: 57114 Test: Total time: 0:00:03 (0.6056 s / it) * Acc@1 65.072 Acc@5 87.302 loss 1.654 Accuracy of the model on the 50000 test images: 65.1% Max accuracy: 66.76% Test: [0/5] eta: 0:00:11 loss: 6.9323 (6.9323) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.3620 data: 2.1185 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9308 (6.9290) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.6231 data: 0.4238 max mem: 57114 Test: Total time: 0:00:03 (0.6346 s / it) * Acc@1 0.098 Acc@5 0.470 loss 6.926 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [31] [ 0/156] eta: 0:12:54 lr: 0.007970 min_lr: 0.007970 loss: 4.5630 (4.5630) weight_decay: 0.0500 (0.0500) time: 4.9625 data: 3.9091 max mem: 57114 Epoch: [31] [ 10/156] eta: 0:02:43 lr: 0.007969 min_lr: 0.007969 loss: 3.9857 (3.7954) weight_decay: 0.0500 (0.0500) time: 1.1231 data: 0.3557 max mem: 57114 Epoch: [31] [ 20/156] eta: 0:02:05 lr: 0.007969 min_lr: 0.007969 loss: 3.7119 (3.6234) weight_decay: 0.0500 (0.0500) time: 0.7234 data: 0.0003 max mem: 57114 Epoch: [31] [ 30/156] eta: 0:01:48 lr: 0.007969 min_lr: 0.007969 loss: 3.3602 (3.5191) weight_decay: 0.0500 (0.0500) time: 0.7197 data: 0.0003 max mem: 57114 Epoch: [31] [ 40/156] eta: 0:01:36 lr: 0.007968 min_lr: 0.007968 loss: 3.5574 (3.5746) weight_decay: 0.0500 (0.0500) time: 0.7256 data: 0.0004 max mem: 57114 Epoch: [31] [ 50/156] eta: 0:01:25 lr: 0.007968 min_lr: 0.007968 loss: 3.9190 (3.6047) weight_decay: 0.0500 (0.0500) time: 0.7103 data: 0.0004 max mem: 57114 Epoch: [31] [ 60/156] eta: 0:01:16 lr: 0.007967 min_lr: 0.007967 loss: 3.6770 (3.5983) weight_decay: 0.0500 (0.0500) time: 0.7179 data: 0.0004 max mem: 57114 Epoch: [31] [ 70/156] eta: 0:01:06 lr: 0.007967 min_lr: 0.007967 loss: 3.7717 (3.6339) weight_decay: 0.0500 (0.0500) time: 0.7163 data: 0.0004 max mem: 57114 Epoch: [31] [ 80/156] eta: 0:00:58 lr: 0.007967 min_lr: 0.007967 loss: 3.8264 (3.6386) weight_decay: 0.0500 (0.0500) time: 0.7066 data: 0.0004 max mem: 57114 Epoch: [31] [ 90/156] eta: 0:00:50 lr: 0.007966 min_lr: 0.007966 loss: 3.7124 (3.6486) weight_decay: 0.0500 (0.0500) time: 0.7106 data: 0.0004 max mem: 57114 Epoch: [31] [100/156] eta: 0:00:42 lr: 0.007966 min_lr: 0.007966 loss: 3.9621 (3.6906) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0004 max mem: 57114 Epoch: [31] [110/156] eta: 0:00:34 lr: 0.007966 min_lr: 0.007966 loss: 4.0759 (3.7058) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0004 max mem: 57114 Epoch: [31] [120/156] eta: 0:00:26 lr: 0.007965 min_lr: 0.007965 loss: 4.0728 (3.7140) weight_decay: 0.0500 (0.0500) time: 0.7023 data: 0.0004 max mem: 57114 Epoch: [31] [130/156] eta: 0:00:19 lr: 0.007965 min_lr: 0.007965 loss: 3.9085 (3.7064) weight_decay: 0.0500 (0.0500) time: 0.6977 data: 0.0008 max mem: 57114 Epoch: [31] [140/156] eta: 0:00:11 lr: 0.007964 min_lr: 0.007964 loss: 3.9463 (3.7239) weight_decay: 0.0500 (0.0500) time: 0.6871 data: 0.0007 max mem: 57114 Epoch: [31] [150/156] eta: 0:00:04 lr: 0.007964 min_lr: 0.007964 loss: 4.0518 (3.7420) weight_decay: 0.0500 (0.0500) time: 0.6807 data: 0.0001 max mem: 57114 Epoch: [31] [155/156] eta: 0:00:00 lr: 0.007964 min_lr: 0.007964 loss: 3.9463 (3.7370) weight_decay: 0.0500 (0.0500) time: 0.6856 data: 0.0001 max mem: 57114 Epoch: [31] Total time: 0:01:55 (0.7374 s / it) Averaged stats: lr: 0.007964 min_lr: 0.007964 loss: 3.9463 (3.7560) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.5930 (1.5930) acc1: 71.3542 (71.3542) acc5: 92.1875 (92.1875) time: 2.1231 data: 1.8676 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.6890 (1.6459) acc1: 69.7917 (66.4962) acc5: 89.5833 (86.9565) time: 0.5753 data: 0.3736 max mem: 57114 Test: Total time: 0:00:02 (0.5988 s / it) * Acc@1 66.027 Acc@5 87.188 loss 1.738 Accuracy of the model on the 50000 test images: 66.0% Max accuracy: 66.76% Test: [0/5] eta: 0:00:11 loss: 6.9361 (6.9361) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.3802 data: 2.1367 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9328 (6.9308) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.6267 data: 0.4274 max mem: 57114 Test: Total time: 0:00:03 (0.6398 s / it) * Acc@1 0.094 Acc@5 0.470 loss 6.928 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [32] [ 0/156] eta: 0:12:38 lr: 0.007964 min_lr: 0.007964 loss: 3.6025 (3.6025) weight_decay: 0.0500 (0.0500) time: 4.8632 data: 3.1122 max mem: 57114 Epoch: [32] [ 10/156] eta: 0:02:40 lr: 0.007963 min_lr: 0.007963 loss: 3.9496 (3.7952) weight_decay: 0.0500 (0.0500) time: 1.0977 data: 0.2833 max mem: 57114 Epoch: [32] [ 20/156] eta: 0:02:04 lr: 0.007963 min_lr: 0.007963 loss: 3.9496 (3.8267) weight_decay: 0.0500 (0.0500) time: 0.7157 data: 0.0004 max mem: 57114 Epoch: [32] [ 30/156] eta: 0:01:47 lr: 0.007963 min_lr: 0.007963 loss: 3.7261 (3.7474) weight_decay: 0.0500 (0.0500) time: 0.7133 data: 0.0003 max mem: 57114 Epoch: [32] [ 40/156] eta: 0:01:35 lr: 0.007962 min_lr: 0.007962 loss: 3.7250 (3.7110) weight_decay: 0.0500 (0.0500) time: 0.7220 data: 0.0004 max mem: 57114 Epoch: [32] [ 50/156] eta: 0:01:24 lr: 0.007962 min_lr: 0.007962 loss: 3.7250 (3.7123) weight_decay: 0.0500 (0.0500) time: 0.7116 data: 0.0004 max mem: 57114 Epoch: [32] [ 60/156] eta: 0:01:14 lr: 0.007961 min_lr: 0.007961 loss: 3.7062 (3.6961) weight_decay: 0.0500 (0.0500) time: 0.7004 data: 0.0004 max mem: 57114 Epoch: [32] [ 70/156] eta: 0:01:06 lr: 0.007961 min_lr: 0.007961 loss: 3.9869 (3.7317) weight_decay: 0.0500 (0.0500) time: 0.7146 data: 0.0004 max mem: 57114 Epoch: [32] [ 80/156] eta: 0:00:58 lr: 0.007961 min_lr: 0.007961 loss: 3.8539 (3.7093) weight_decay: 0.0500 (0.0500) time: 0.7145 data: 0.0004 max mem: 57114 Epoch: [32] [ 90/156] eta: 0:00:50 lr: 0.007960 min_lr: 0.007960 loss: 3.8242 (3.7193) weight_decay: 0.0500 (0.0500) time: 0.7078 data: 0.0004 max mem: 57114 Epoch: [32] [100/156] eta: 0:00:42 lr: 0.007960 min_lr: 0.007960 loss: 3.8242 (3.6967) weight_decay: 0.0500 (0.0500) time: 0.7026 data: 0.0004 max mem: 57114 Epoch: [32] [110/156] eta: 0:00:34 lr: 0.007959 min_lr: 0.007959 loss: 3.7176 (3.7019) weight_decay: 0.0500 (0.0500) time: 0.6989 data: 0.0004 max mem: 57114 Epoch: [32] [120/156] eta: 0:00:26 lr: 0.007959 min_lr: 0.007959 loss: 3.9420 (3.7101) weight_decay: 0.0500 (0.0500) time: 0.7010 data: 0.0004 max mem: 57114 Epoch: [32] [130/156] eta: 0:00:19 lr: 0.007959 min_lr: 0.007959 loss: 3.7780 (3.7061) weight_decay: 0.0500 (0.0500) time: 0.6980 data: 0.0008 max mem: 57114 Epoch: [32] [140/156] eta: 0:00:11 lr: 0.007958 min_lr: 0.007958 loss: 3.7780 (3.7245) weight_decay: 0.0500 (0.0500) time: 0.6849 data: 0.0007 max mem: 57114 Epoch: [32] [150/156] eta: 0:00:04 lr: 0.007958 min_lr: 0.007958 loss: 3.8998 (3.7362) weight_decay: 0.0500 (0.0500) time: 0.6778 data: 0.0001 max mem: 57114 Epoch: [32] [155/156] eta: 0:00:00 lr: 0.007958 min_lr: 0.007958 loss: 3.8506 (3.7230) weight_decay: 0.0500 (0.0500) time: 0.6794 data: 0.0001 max mem: 57114 Epoch: [32] Total time: 0:01:54 (0.7326 s / it) Averaged stats: lr: 0.007958 min_lr: 0.007958 loss: 3.8506 (3.7440) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.2513 (1.2513) acc1: 75.0000 (75.0000) acc5: 94.7917 (94.7917) time: 2.0910 data: 1.8351 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.5180 (1.4404) acc1: 69.2708 (68.2864) acc5: 90.1042 (88.4911) time: 0.5688 data: 0.3671 max mem: 57114 Test: Total time: 0:00:02 (0.5936 s / it) * Acc@1 68.071 Acc@5 88.681 loss 1.540 Accuracy of the model on the 50000 test images: 68.1% Max accuracy: 68.07% Test: [0/5] eta: 0:00:10 loss: 6.9416 (6.9416) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.0046 data: 1.7611 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9344 (6.9327) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.2558) time: 0.5515 data: 0.3523 max mem: 57114 Test: Total time: 0:00:02 (0.5635 s / it) * Acc@1 0.102 Acc@5 0.474 loss 6.930 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [33] [ 0/156] eta: 0:13:56 lr: 0.007958 min_lr: 0.007958 loss: 4.4507 (4.4507) weight_decay: 0.0500 (0.0500) time: 5.3620 data: 3.8129 max mem: 57114 Epoch: [33] [ 10/156] eta: 0:02:52 lr: 0.007957 min_lr: 0.007957 loss: 3.9710 (3.9178) weight_decay: 0.0500 (0.0500) time: 1.1810 data: 0.3469 max mem: 57114 Epoch: [33] [ 20/156] eta: 0:02:13 lr: 0.007957 min_lr: 0.007957 loss: 3.7970 (3.8800) weight_decay: 0.0500 (0.0500) time: 0.7619 data: 0.0003 max mem: 57114 Epoch: [33] [ 30/156] eta: 0:01:53 lr: 0.007956 min_lr: 0.007956 loss: 3.7739 (3.8201) weight_decay: 0.0500 (0.0500) time: 0.7414 data: 0.0003 max mem: 57114 Epoch: [33] [ 40/156] eta: 0:01:38 lr: 0.007956 min_lr: 0.007956 loss: 3.6692 (3.7751) weight_decay: 0.0500 (0.0500) time: 0.7190 data: 0.0003 max mem: 57114 Epoch: [33] [ 50/156] eta: 0:01:28 lr: 0.007955 min_lr: 0.007955 loss: 3.6692 (3.7686) weight_decay: 0.0500 (0.0500) time: 0.7278 data: 0.0003 max mem: 57114 Epoch: [33] [ 60/156] eta: 0:01:18 lr: 0.007955 min_lr: 0.007955 loss: 3.9719 (3.8019) weight_decay: 0.0500 (0.0500) time: 0.7387 data: 0.0003 max mem: 57114 Epoch: [33] [ 70/156] eta: 0:01:08 lr: 0.007955 min_lr: 0.007955 loss: 4.0939 (3.8239) weight_decay: 0.0500 (0.0500) time: 0.7228 data: 0.0003 max mem: 57114 Epoch: [33] [ 80/156] eta: 0:01:00 lr: 0.007954 min_lr: 0.007954 loss: 3.9007 (3.8031) weight_decay: 0.0500 (0.0500) time: 0.7123 data: 0.0004 max mem: 57114 Epoch: [33] [ 90/156] eta: 0:00:51 lr: 0.007954 min_lr: 0.007954 loss: 3.9007 (3.8229) weight_decay: 0.0500 (0.0500) time: 0.7107 data: 0.0004 max mem: 57114 Epoch: [33] [100/156] eta: 0:00:43 lr: 0.007953 min_lr: 0.007953 loss: 3.8879 (3.8079) weight_decay: 0.0500 (0.0500) time: 0.7013 data: 0.0004 max mem: 57114 Epoch: [33] [110/156] eta: 0:00:35 lr: 0.007953 min_lr: 0.007953 loss: 3.7898 (3.8142) weight_decay: 0.0500 (0.0500) time: 0.7028 data: 0.0004 max mem: 57114 Epoch: [33] [120/156] eta: 0:00:27 lr: 0.007952 min_lr: 0.007952 loss: 3.7898 (3.8045) weight_decay: 0.0500 (0.0500) time: 0.7029 data: 0.0004 max mem: 57114 Epoch: [33] [130/156] eta: 0:00:19 lr: 0.007952 min_lr: 0.007952 loss: 3.7862 (3.8044) weight_decay: 0.0500 (0.0500) time: 0.6985 data: 0.0008 max mem: 57114 Epoch: [33] [140/156] eta: 0:00:12 lr: 0.007951 min_lr: 0.007951 loss: 3.8975 (3.8078) weight_decay: 0.0500 (0.0500) time: 0.6916 data: 0.0007 max mem: 57114 Epoch: [33] [150/156] eta: 0:00:04 lr: 0.007951 min_lr: 0.007951 loss: 3.8975 (3.8037) weight_decay: 0.0500 (0.0500) time: 0.6861 data: 0.0001 max mem: 57114 Epoch: [33] [155/156] eta: 0:00:00 lr: 0.007951 min_lr: 0.007951 loss: 3.9580 (3.8116) weight_decay: 0.0500 (0.0500) time: 0.6853 data: 0.0001 max mem: 57114 Epoch: [33] Total time: 0:01:56 (0.7470 s / it) Averaged stats: lr: 0.007951 min_lr: 0.007951 loss: 3.9580 (3.7344) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.1119 (1.1119) acc1: 81.2500 (81.2500) acc5: 94.7917 (94.7917) time: 2.0279 data: 1.7725 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.3044 (1.3333) acc1: 72.3958 (70.7161) acc5: 92.7083 (89.3862) time: 0.5563 data: 0.3546 max mem: 57114 Test: Total time: 0:00:02 (0.5822 s / it) * Acc@1 68.598 Acc@5 89.186 loss 1.411 Accuracy of the model on the 50000 test images: 68.6% Max accuracy: 68.60% Test: [0/5] eta: 0:00:10 loss: 6.9479 (6.9479) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.1013 data: 1.8578 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9359 (6.9349) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.3836) time: 0.5709 data: 0.3717 max mem: 57114 Test: Total time: 0:00:02 (0.5837 s / it) * Acc@1 0.098 Acc@5 0.474 loss 6.931 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [34] [ 0/156] eta: 0:13:21 lr: 0.007951 min_lr: 0.007951 loss: 3.4468 (3.4468) weight_decay: 0.0500 (0.0500) time: 5.1360 data: 4.2764 max mem: 57114 Epoch: [34] [ 10/156] eta: 0:02:48 lr: 0.007950 min_lr: 0.007950 loss: 3.4468 (3.4604) weight_decay: 0.0500 (0.0500) time: 1.1537 data: 0.3891 max mem: 57114 Epoch: [34] [ 20/156] eta: 0:02:09 lr: 0.007950 min_lr: 0.007950 loss: 3.5855 (3.5394) weight_decay: 0.0500 (0.0500) time: 0.7452 data: 0.0003 max mem: 57114 Epoch: [34] [ 30/156] eta: 0:01:51 lr: 0.007949 min_lr: 0.007949 loss: 3.5290 (3.5308) weight_decay: 0.0500 (0.0500) time: 0.7345 data: 0.0003 max mem: 57114 Epoch: [34] [ 40/156] eta: 0:01:38 lr: 0.007949 min_lr: 0.007949 loss: 3.8317 (3.6150) weight_decay: 0.0500 (0.0500) time: 0.7316 data: 0.0003 max mem: 57114 Epoch: [34] [ 50/156] eta: 0:01:27 lr: 0.007948 min_lr: 0.007948 loss: 3.9799 (3.6551) weight_decay: 0.0500 (0.0500) time: 0.7261 data: 0.0004 max mem: 57114 Epoch: [34] [ 60/156] eta: 0:01:17 lr: 0.007948 min_lr: 0.007948 loss: 3.9158 (3.6535) weight_decay: 0.0500 (0.0500) time: 0.7294 data: 0.0004 max mem: 57114 Epoch: [34] [ 70/156] eta: 0:01:08 lr: 0.007948 min_lr: 0.007948 loss: 3.6094 (3.6431) weight_decay: 0.0500 (0.0500) time: 0.7269 data: 0.0004 max mem: 57114 Epoch: [34] [ 80/156] eta: 0:00:59 lr: 0.007947 min_lr: 0.007947 loss: 3.6094 (3.6344) weight_decay: 0.0500 (0.0500) time: 0.7308 data: 0.0004 max mem: 57114 Epoch: [34] [ 90/156] eta: 0:00:51 lr: 0.007947 min_lr: 0.007947 loss: 3.7552 (3.6392) weight_decay: 0.0500 (0.0500) time: 0.7257 data: 0.0004 max mem: 57114 Epoch: [34] [100/156] eta: 0:00:43 lr: 0.007946 min_lr: 0.007946 loss: 3.9331 (3.6647) weight_decay: 0.0500 (0.0500) time: 0.7103 data: 0.0004 max mem: 57114 Epoch: [34] [110/156] eta: 0:00:35 lr: 0.007946 min_lr: 0.007946 loss: 3.9067 (3.6760) weight_decay: 0.0500 (0.0500) time: 0.7106 data: 0.0004 max mem: 57114 Epoch: [34] [120/156] eta: 0:00:27 lr: 0.007945 min_lr: 0.007945 loss: 3.6396 (3.6538) weight_decay: 0.0500 (0.0500) time: 0.7128 data: 0.0004 max mem: 57114 Epoch: [34] [130/156] eta: 0:00:19 lr: 0.007945 min_lr: 0.007945 loss: 3.8057 (3.6621) weight_decay: 0.0500 (0.0500) time: 0.7014 data: 0.0009 max mem: 57114 Epoch: [34] [140/156] eta: 0:00:12 lr: 0.007944 min_lr: 0.007944 loss: 3.7750 (3.6666) weight_decay: 0.0500 (0.0500) time: 0.6874 data: 0.0007 max mem: 57114 Epoch: [34] [150/156] eta: 0:00:04 lr: 0.007944 min_lr: 0.007944 loss: 3.7750 (3.6724) weight_decay: 0.0500 (0.0500) time: 0.6878 data: 0.0001 max mem: 57114 Epoch: [34] [155/156] eta: 0:00:00 lr: 0.007944 min_lr: 0.007944 loss: 3.8798 (3.6700) weight_decay: 0.0500 (0.0500) time: 0.6900 data: 0.0001 max mem: 57114 Epoch: [34] Total time: 0:01:56 (0.7485 s / it) Averaged stats: lr: 0.007944 min_lr: 0.007944 loss: 3.8798 (3.7164) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.1092 (1.1092) acc1: 78.1250 (78.1250) acc5: 95.3125 (95.3125) time: 2.0324 data: 1.7768 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.3172 (1.3167) acc1: 76.5625 (69.6931) acc5: 92.1875 (89.7698) time: 0.5572 data: 0.3554 max mem: 57114 Test: Total time: 0:00:02 (0.5768 s / it) * Acc@1 68.824 Acc@5 89.520 loss 1.415 Accuracy of the model on the 50000 test images: 68.8% Max accuracy: 68.82% Test: [0/5] eta: 0:00:10 loss: 6.9542 (6.9542) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.0794 data: 1.8359 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9381 (6.9373) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.5115) time: 0.5664 data: 0.3673 max mem: 57114 Test: Total time: 0:00:02 (0.5778 s / it) * Acc@1 0.100 Acc@5 0.476 loss 6.934 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [35] [ 0/156] eta: 0:14:03 lr: 0.007943 min_lr: 0.007943 loss: 4.0256 (4.0256) weight_decay: 0.0500 (0.0500) time: 5.4058 data: 3.9358 max mem: 57114 Epoch: [35] [ 10/156] eta: 0:02:49 lr: 0.007943 min_lr: 0.007943 loss: 3.9828 (3.9388) weight_decay: 0.0500 (0.0500) time: 1.1611 data: 0.3581 max mem: 57114 Epoch: [35] [ 20/156] eta: 0:02:09 lr: 0.007943 min_lr: 0.007943 loss: 3.9179 (3.8044) weight_decay: 0.0500 (0.0500) time: 0.7295 data: 0.0003 max mem: 57114 Epoch: [35] [ 30/156] eta: 0:01:52 lr: 0.007942 min_lr: 0.007942 loss: 3.6061 (3.7141) weight_decay: 0.0500 (0.0500) time: 0.7413 data: 0.0003 max mem: 57114 Epoch: [35] [ 40/156] eta: 0:01:38 lr: 0.007942 min_lr: 0.007942 loss: 3.5662 (3.6798) weight_decay: 0.0500 (0.0500) time: 0.7464 data: 0.0004 max mem: 57114 Epoch: [35] [ 50/156] eta: 0:01:27 lr: 0.007941 min_lr: 0.007941 loss: 3.8813 (3.7079) weight_decay: 0.0500 (0.0500) time: 0.7340 data: 0.0004 max mem: 57114 Epoch: [35] [ 60/156] eta: 0:01:18 lr: 0.007941 min_lr: 0.007941 loss: 3.8476 (3.6967) weight_decay: 0.0500 (0.0500) time: 0.7336 data: 0.0004 max mem: 57114 Epoch: [35] [ 70/156] eta: 0:01:08 lr: 0.007940 min_lr: 0.007940 loss: 3.8438 (3.6944) weight_decay: 0.0500 (0.0500) time: 0.7307 data: 0.0004 max mem: 57114 Epoch: [35] [ 80/156] eta: 0:00:59 lr: 0.007940 min_lr: 0.007940 loss: 3.6790 (3.6813) weight_decay: 0.0500 (0.0500) time: 0.7121 data: 0.0004 max mem: 57114 Epoch: [35] [ 90/156] eta: 0:00:51 lr: 0.007939 min_lr: 0.007939 loss: 3.6624 (3.6524) weight_decay: 0.0500 (0.0500) time: 0.6995 data: 0.0005 max mem: 57114 Epoch: [35] [100/156] eta: 0:00:43 lr: 0.007939 min_lr: 0.007939 loss: 3.7012 (3.6442) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0005 max mem: 57114 Epoch: [35] [110/156] eta: 0:00:35 lr: 0.007938 min_lr: 0.007938 loss: 3.7478 (3.6628) weight_decay: 0.0500 (0.0500) time: 0.7043 data: 0.0004 max mem: 57114 Epoch: [35] [120/156] eta: 0:00:27 lr: 0.007938 min_lr: 0.007938 loss: 3.9049 (3.6522) weight_decay: 0.0500 (0.0500) time: 0.6969 data: 0.0004 max mem: 57114 Epoch: [35] [130/156] eta: 0:00:19 lr: 0.007937 min_lr: 0.007937 loss: 3.9049 (3.6717) weight_decay: 0.0500 (0.0500) time: 0.7031 data: 0.0008 max mem: 57114 Epoch: [35] [140/156] eta: 0:00:11 lr: 0.007937 min_lr: 0.007937 loss: 3.7660 (3.6665) weight_decay: 0.0500 (0.0500) time: 0.6869 data: 0.0007 max mem: 57114 Epoch: [35] [150/156] eta: 0:00:04 lr: 0.007936 min_lr: 0.007936 loss: 3.7223 (3.6618) weight_decay: 0.0500 (0.0500) time: 0.6757 data: 0.0001 max mem: 57114 Epoch: [35] [155/156] eta: 0:00:00 lr: 0.007936 min_lr: 0.007936 loss: 3.8752 (3.6588) weight_decay: 0.0500 (0.0500) time: 0.6761 data: 0.0001 max mem: 57114 Epoch: [35] Total time: 0:01:56 (0.7449 s / it) Averaged stats: lr: 0.007936 min_lr: 0.007936 loss: 3.8752 (3.6940) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.2144 (1.2144) acc1: 77.6042 (77.6042) acc5: 93.7500 (93.7500) time: 2.0428 data: 1.7870 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.5003 (1.4708) acc1: 73.4375 (67.9028) acc5: 90.6250 (88.6189) time: 0.5592 data: 0.3575 max mem: 57114 Test: Total time: 0:00:02 (0.5833 s / it) * Acc@1 67.565 Acc@5 88.505 loss 1.538 Accuracy of the model on the 50000 test images: 67.6% Max accuracy: 68.82% Test: [0/5] eta: 0:00:11 loss: 6.9620 (6.9620) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.3284 data: 2.0849 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9404 (6.9403) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.5115) time: 0.6163 data: 0.4171 max mem: 57114 Test: Total time: 0:00:03 (0.6298 s / it) * Acc@1 0.096 Acc@5 0.488 loss 6.936 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [36] [ 0/156] eta: 0:13:35 lr: 0.007936 min_lr: 0.007936 loss: 2.9476 (2.9476) weight_decay: 0.0500 (0.0500) time: 5.2257 data: 3.4122 max mem: 57114 Epoch: [36] [ 10/156] eta: 0:02:46 lr: 0.007935 min_lr: 0.007935 loss: 4.0111 (3.8144) weight_decay: 0.0500 (0.0500) time: 1.1397 data: 0.3105 max mem: 57114 Epoch: [36] [ 20/156] eta: 0:02:08 lr: 0.007935 min_lr: 0.007935 loss: 4.0503 (3.8929) weight_decay: 0.0500 (0.0500) time: 0.7341 data: 0.0003 max mem: 57114 Epoch: [36] [ 30/156] eta: 0:01:50 lr: 0.007934 min_lr: 0.007934 loss: 3.8711 (3.7776) weight_decay: 0.0500 (0.0500) time: 0.7384 data: 0.0003 max mem: 57114 Epoch: [36] [ 40/156] eta: 0:01:37 lr: 0.007934 min_lr: 0.007934 loss: 3.7295 (3.7160) weight_decay: 0.0500 (0.0500) time: 0.7277 data: 0.0003 max mem: 57114 Epoch: [36] [ 50/156] eta: 0:01:26 lr: 0.007933 min_lr: 0.007933 loss: 3.6506 (3.7392) weight_decay: 0.0500 (0.0500) time: 0.7075 data: 0.0004 max mem: 57114 Epoch: [36] [ 60/156] eta: 0:01:16 lr: 0.007933 min_lr: 0.007933 loss: 3.7391 (3.7254) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0004 max mem: 57114 Epoch: [36] [ 70/156] eta: 0:01:07 lr: 0.007932 min_lr: 0.007932 loss: 3.7391 (3.7271) weight_decay: 0.0500 (0.0500) time: 0.7099 data: 0.0004 max mem: 57114 Epoch: [36] [ 80/156] eta: 0:00:58 lr: 0.007932 min_lr: 0.007932 loss: 3.8442 (3.7338) weight_decay: 0.0500 (0.0500) time: 0.7005 data: 0.0004 max mem: 57114 Epoch: [36] [ 90/156] eta: 0:00:50 lr: 0.007931 min_lr: 0.007931 loss: 4.0621 (3.7321) weight_decay: 0.0500 (0.0500) time: 0.7125 data: 0.0004 max mem: 57114 Epoch: [36] [100/156] eta: 0:00:42 lr: 0.007930 min_lr: 0.007930 loss: 3.7939 (3.7091) weight_decay: 0.0500 (0.0500) time: 0.7195 data: 0.0003 max mem: 57114 Epoch: [36] [110/156] eta: 0:00:34 lr: 0.007930 min_lr: 0.007930 loss: 3.7172 (3.7034) weight_decay: 0.0500 (0.0500) time: 0.7048 data: 0.0003 max mem: 57114 Epoch: [36] [120/156] eta: 0:00:27 lr: 0.007929 min_lr: 0.007929 loss: 3.6282 (3.6786) weight_decay: 0.0500 (0.0500) time: 0.6957 data: 0.0003 max mem: 57114 Epoch: [36] [130/156] eta: 0:00:19 lr: 0.007929 min_lr: 0.007929 loss: 3.6600 (3.6855) weight_decay: 0.0500 (0.0500) time: 0.6980 data: 0.0008 max mem: 57114 Epoch: [36] [140/156] eta: 0:00:11 lr: 0.007928 min_lr: 0.007928 loss: 3.7901 (3.6927) weight_decay: 0.0500 (0.0500) time: 0.6906 data: 0.0007 max mem: 57114 Epoch: [36] [150/156] eta: 0:00:04 lr: 0.007928 min_lr: 0.007928 loss: 3.8182 (3.6970) weight_decay: 0.0500 (0.0500) time: 0.6825 data: 0.0001 max mem: 57114 Epoch: [36] [155/156] eta: 0:00:00 lr: 0.007928 min_lr: 0.007928 loss: 3.7901 (3.6960) weight_decay: 0.0500 (0.0500) time: 0.6768 data: 0.0001 max mem: 57114 Epoch: [36] Total time: 0:01:55 (0.7390 s / it) Averaged stats: lr: 0.007928 min_lr: 0.007928 loss: 3.7901 (3.6858) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.1260 (1.1260) acc1: 79.1667 (79.1667) acc5: 94.7917 (94.7917) time: 2.1516 data: 1.8953 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2824 (1.2360) acc1: 72.9167 (72.2506) acc5: 92.1875 (90.7928) time: 0.5811 data: 0.3791 max mem: 57114 Test: Total time: 0:00:03 (0.6050 s / it) * Acc@1 68.736 Acc@5 89.100 loss 1.415 Accuracy of the model on the 50000 test images: 68.7% Max accuracy: 68.82% Test: [0/5] eta: 0:00:11 loss: 6.9694 (6.9694) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.2865 data: 2.0431 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9421 (6.9423) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.5115) time: 0.6079 data: 0.4087 max mem: 57114 Test: Total time: 0:00:03 (0.6179 s / it) * Acc@1 0.090 Acc@5 0.494 loss 6.938 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [37] [ 0/156] eta: 0:13:01 lr: 0.007927 min_lr: 0.007927 loss: 2.7302 (2.7302) weight_decay: 0.0500 (0.0500) time: 5.0096 data: 4.0693 max mem: 57114 Epoch: [37] [ 10/156] eta: 0:02:46 lr: 0.007927 min_lr: 0.007927 loss: 3.6475 (3.6055) weight_decay: 0.0500 (0.0500) time: 1.1376 data: 0.3702 max mem: 57114 Epoch: [37] [ 20/156] eta: 0:02:07 lr: 0.007926 min_lr: 0.007926 loss: 3.6475 (3.5942) weight_decay: 0.0500 (0.0500) time: 0.7367 data: 0.0004 max mem: 57114 Epoch: [37] [ 30/156] eta: 0:01:49 lr: 0.007926 min_lr: 0.007926 loss: 3.5704 (3.6485) weight_decay: 0.0500 (0.0500) time: 0.7183 data: 0.0003 max mem: 57114 Epoch: [37] [ 40/156] eta: 0:01:36 lr: 0.007925 min_lr: 0.007925 loss: 3.6311 (3.6666) weight_decay: 0.0500 (0.0500) time: 0.7198 data: 0.0003 max mem: 57114 Epoch: [37] [ 50/156] eta: 0:01:26 lr: 0.007925 min_lr: 0.007925 loss: 3.8632 (3.6784) weight_decay: 0.0500 (0.0500) time: 0.7273 data: 0.0004 max mem: 57114 Epoch: [37] [ 60/156] eta: 0:01:16 lr: 0.007924 min_lr: 0.007924 loss: 3.9054 (3.7118) weight_decay: 0.0500 (0.0500) time: 0.7248 data: 0.0004 max mem: 57114 Epoch: [37] [ 70/156] eta: 0:01:07 lr: 0.007924 min_lr: 0.007924 loss: 3.7960 (3.6902) weight_decay: 0.0500 (0.0500) time: 0.7140 data: 0.0005 max mem: 57114 Epoch: [37] [ 80/156] eta: 0:00:59 lr: 0.007923 min_lr: 0.007923 loss: 3.6517 (3.6811) weight_decay: 0.0500 (0.0500) time: 0.7128 data: 0.0004 max mem: 57114 Epoch: [37] [ 90/156] eta: 0:00:50 lr: 0.007922 min_lr: 0.007922 loss: 3.7964 (3.7048) weight_decay: 0.0500 (0.0500) time: 0.7017 data: 0.0004 max mem: 57114 Epoch: [37] [100/156] eta: 0:00:42 lr: 0.007922 min_lr: 0.007922 loss: 4.0008 (3.7446) weight_decay: 0.0500 (0.0500) time: 0.6990 data: 0.0004 max mem: 57114 Epoch: [37] [110/156] eta: 0:00:34 lr: 0.007921 min_lr: 0.007921 loss: 4.0830 (3.7582) weight_decay: 0.0500 (0.0500) time: 0.7069 data: 0.0004 max mem: 57114 Epoch: [37] [120/156] eta: 0:00:27 lr: 0.007921 min_lr: 0.007921 loss: 3.9228 (3.7531) weight_decay: 0.0500 (0.0500) time: 0.7142 data: 0.0004 max mem: 57114 Epoch: [37] [130/156] eta: 0:00:19 lr: 0.007920 min_lr: 0.007920 loss: 3.7931 (3.7270) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0009 max mem: 57114 Epoch: [37] [140/156] eta: 0:00:11 lr: 0.007920 min_lr: 0.007920 loss: 3.7863 (3.7402) weight_decay: 0.0500 (0.0500) time: 0.6887 data: 0.0007 max mem: 57114 Epoch: [37] [150/156] eta: 0:00:04 lr: 0.007919 min_lr: 0.007919 loss: 3.8762 (3.7439) weight_decay: 0.0500 (0.0500) time: 0.6836 data: 0.0001 max mem: 57114 Epoch: [37] [155/156] eta: 0:00:00 lr: 0.007919 min_lr: 0.007919 loss: 3.9530 (3.7340) weight_decay: 0.0500 (0.0500) time: 0.6857 data: 0.0001 max mem: 57114 Epoch: [37] Total time: 0:01:55 (0.7403 s / it) Averaged stats: lr: 0.007919 min_lr: 0.007919 loss: 3.9530 (3.6728) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.2886 (1.2886) acc1: 74.4792 (74.4792) acc5: 96.3542 (96.3542) time: 2.0736 data: 1.8179 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.4685 (1.4304) acc1: 71.8750 (68.7980) acc5: 92.1875 (91.3044) time: 0.5655 data: 0.3637 max mem: 57114 Test: Total time: 0:00:02 (0.5889 s / it) * Acc@1 69.545 Acc@5 89.808 loss 1.554 Accuracy of the model on the 50000 test images: 69.5% Max accuracy: 69.55% Test: [0/5] eta: 0:00:10 loss: 6.9769 (6.9769) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.0587 data: 1.8152 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9456 (6.9448) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.5115) time: 0.5624 data: 0.3632 max mem: 57114 Test: Total time: 0:00:02 (0.5719 s / it) * Acc@1 0.098 Acc@5 0.494 loss 6.941 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [38] [ 0/156] eta: 0:14:14 lr: 0.007919 min_lr: 0.007919 loss: 4.0006 (4.0006) weight_decay: 0.0500 (0.0500) time: 5.4778 data: 4.4062 max mem: 57114 Epoch: [38] [ 10/156] eta: 0:02:53 lr: 0.007918 min_lr: 0.007918 loss: 3.8761 (3.8576) weight_decay: 0.0500 (0.0500) time: 1.1864 data: 0.4008 max mem: 57114 Epoch: [38] [ 20/156] eta: 0:02:13 lr: 0.007918 min_lr: 0.007918 loss: 3.8439 (3.7777) weight_decay: 0.0500 (0.0500) time: 0.7541 data: 0.0003 max mem: 57114 Epoch: [38] [ 30/156] eta: 0:01:53 lr: 0.007917 min_lr: 0.007917 loss: 3.6773 (3.6552) weight_decay: 0.0500 (0.0500) time: 0.7495 data: 0.0003 max mem: 57114 Epoch: [38] [ 40/156] eta: 0:01:40 lr: 0.007916 min_lr: 0.007916 loss: 3.7866 (3.7159) weight_decay: 0.0500 (0.0500) time: 0.7411 data: 0.0004 max mem: 57114 Epoch: [38] [ 50/156] eta: 0:01:28 lr: 0.007916 min_lr: 0.007916 loss: 3.8734 (3.6918) weight_decay: 0.0500 (0.0500) time: 0.7325 data: 0.0004 max mem: 57114 Epoch: [38] [ 60/156] eta: 0:01:19 lr: 0.007915 min_lr: 0.007915 loss: 3.8225 (3.7211) weight_decay: 0.0500 (0.0500) time: 0.7425 data: 0.0004 max mem: 57114 Epoch: [38] [ 70/156] eta: 0:01:09 lr: 0.007915 min_lr: 0.007915 loss: 3.7721 (3.7046) weight_decay: 0.0500 (0.0500) time: 0.7289 data: 0.0004 max mem: 57114 Epoch: [38] [ 80/156] eta: 0:01:00 lr: 0.007914 min_lr: 0.007914 loss: 3.7687 (3.7101) weight_decay: 0.0500 (0.0500) time: 0.7023 data: 0.0003 max mem: 57114 Epoch: [38] [ 90/156] eta: 0:00:51 lr: 0.007913 min_lr: 0.007913 loss: 3.7687 (3.7136) weight_decay: 0.0500 (0.0500) time: 0.7071 data: 0.0003 max mem: 57114 Epoch: [38] [100/156] eta: 0:00:43 lr: 0.007913 min_lr: 0.007913 loss: 3.7519 (3.6917) weight_decay: 0.0500 (0.0500) time: 0.7130 data: 0.0004 max mem: 57114 Epoch: [38] [110/156] eta: 0:00:35 lr: 0.007912 min_lr: 0.007912 loss: 3.5196 (3.6666) weight_decay: 0.0500 (0.0500) time: 0.7140 data: 0.0004 max mem: 57114 Epoch: [38] [120/156] eta: 0:00:27 lr: 0.007912 min_lr: 0.007912 loss: 3.6486 (3.6591) weight_decay: 0.0500 (0.0500) time: 0.7054 data: 0.0003 max mem: 57114 Epoch: [38] [130/156] eta: 0:00:19 lr: 0.007911 min_lr: 0.007911 loss: 3.6486 (3.6569) weight_decay: 0.0500 (0.0500) time: 0.6953 data: 0.0008 max mem: 57114 Epoch: [38] [140/156] eta: 0:00:12 lr: 0.007910 min_lr: 0.007910 loss: 3.9140 (3.6817) weight_decay: 0.0500 (0.0500) time: 0.6901 data: 0.0007 max mem: 57114 Epoch: [38] [150/156] eta: 0:00:04 lr: 0.007910 min_lr: 0.007910 loss: 3.9453 (3.6917) weight_decay: 0.0500 (0.0500) time: 0.6830 data: 0.0001 max mem: 57114 Epoch: [38] [155/156] eta: 0:00:00 lr: 0.007910 min_lr: 0.007910 loss: 3.9093 (3.6902) weight_decay: 0.0500 (0.0500) time: 0.6822 data: 0.0001 max mem: 57114 Epoch: [38] Total time: 0:01:56 (0.7500 s / it) Averaged stats: lr: 0.007910 min_lr: 0.007910 loss: 3.9093 (3.6585) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.2714 (1.2714) acc1: 78.1250 (78.1250) acc5: 96.3542 (96.3542) time: 2.0516 data: 1.7959 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.4674 (1.4146) acc1: 73.9583 (70.4604) acc5: 93.7500 (91.0486) time: 0.5609 data: 0.3593 max mem: 57114 Test: Total time: 0:00:02 (0.5842 s / it) * Acc@1 69.519 Acc@5 89.932 loss 1.532 Accuracy of the model on the 50000 test images: 69.5% Max accuracy: 69.55% Test: [0/5] eta: 0:00:11 loss: 6.9843 (6.9843) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.2873 data: 2.0438 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9496 (6.9479) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.5115) time: 0.6081 data: 0.4089 max mem: 57114 Test: Total time: 0:00:03 (0.6204 s / it) * Acc@1 0.096 Acc@5 0.502 loss 6.944 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [39] [ 0/156] eta: 0:13:12 lr: 0.007909 min_lr: 0.007909 loss: 3.2476 (3.2476) weight_decay: 0.0500 (0.0500) time: 5.0824 data: 3.8984 max mem: 57114 Epoch: [39] [ 10/156] eta: 0:02:46 lr: 0.007909 min_lr: 0.007909 loss: 4.1147 (3.7284) weight_decay: 0.0500 (0.0500) time: 1.1414 data: 0.3547 max mem: 57114 Epoch: [39] [ 20/156] eta: 0:02:08 lr: 0.007908 min_lr: 0.007908 loss: 3.8930 (3.6963) weight_decay: 0.0500 (0.0500) time: 0.7398 data: 0.0003 max mem: 57114 Epoch: [39] [ 30/156] eta: 0:01:50 lr: 0.007908 min_lr: 0.007908 loss: 3.3102 (3.6115) weight_decay: 0.0500 (0.0500) time: 0.7323 data: 0.0003 max mem: 57114 Epoch: [39] [ 40/156] eta: 0:01:37 lr: 0.007907 min_lr: 0.007907 loss: 3.4952 (3.6177) weight_decay: 0.0500 (0.0500) time: 0.7303 data: 0.0003 max mem: 57114 Epoch: [39] [ 50/156] eta: 0:01:26 lr: 0.007906 min_lr: 0.007906 loss: 3.7782 (3.6425) weight_decay: 0.0500 (0.0500) time: 0.7165 data: 0.0004 max mem: 57114 Epoch: [39] [ 60/156] eta: 0:01:16 lr: 0.007906 min_lr: 0.007906 loss: 3.5006 (3.5844) weight_decay: 0.0500 (0.0500) time: 0.7142 data: 0.0004 max mem: 57114 Epoch: [39] [ 70/156] eta: 0:01:07 lr: 0.007905 min_lr: 0.007905 loss: 3.5576 (3.6099) weight_decay: 0.0500 (0.0500) time: 0.7126 data: 0.0004 max mem: 57114 Epoch: [39] [ 80/156] eta: 0:00:59 lr: 0.007905 min_lr: 0.007905 loss: 3.7617 (3.6226) weight_decay: 0.0500 (0.0500) time: 0.7057 data: 0.0004 max mem: 57114 Epoch: [39] [ 90/156] eta: 0:00:50 lr: 0.007904 min_lr: 0.007904 loss: 3.9628 (3.6560) weight_decay: 0.0500 (0.0500) time: 0.7130 data: 0.0003 max mem: 57114 Epoch: [39] [100/156] eta: 0:00:42 lr: 0.007903 min_lr: 0.007903 loss: 3.8155 (3.6574) weight_decay: 0.0500 (0.0500) time: 0.7138 data: 0.0003 max mem: 57114 Epoch: [39] [110/156] eta: 0:00:34 lr: 0.007903 min_lr: 0.007903 loss: 3.8155 (3.6710) weight_decay: 0.0500 (0.0500) time: 0.7058 data: 0.0003 max mem: 57114 Epoch: [39] [120/156] eta: 0:00:27 lr: 0.007902 min_lr: 0.007902 loss: 3.9575 (3.6831) weight_decay: 0.0500 (0.0500) time: 0.7018 data: 0.0003 max mem: 57114 Epoch: [39] [130/156] eta: 0:00:19 lr: 0.007901 min_lr: 0.007901 loss: 3.3313 (3.6401) weight_decay: 0.0500 (0.0500) time: 0.7029 data: 0.0007 max mem: 57114 Epoch: [39] [140/156] eta: 0:00:11 lr: 0.007901 min_lr: 0.007901 loss: 3.4074 (3.6484) weight_decay: 0.0500 (0.0500) time: 0.6911 data: 0.0006 max mem: 57114 Epoch: [39] [150/156] eta: 0:00:04 lr: 0.007900 min_lr: 0.007900 loss: 3.7368 (3.6378) weight_decay: 0.0500 (0.0500) time: 0.6831 data: 0.0001 max mem: 57114 Epoch: [39] [155/156] eta: 0:00:00 lr: 0.007900 min_lr: 0.007900 loss: 3.7819 (3.6481) weight_decay: 0.0500 (0.0500) time: 0.6832 data: 0.0001 max mem: 57114 Epoch: [39] Total time: 0:01:55 (0.7410 s / it) Averaged stats: lr: 0.007900 min_lr: 0.007900 loss: 3.7819 (3.6425) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.2522 (1.2522) acc1: 74.4792 (74.4792) acc5: 95.3125 (95.3125) time: 2.1595 data: 1.9036 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.4446 (1.4281) acc1: 71.8750 (67.1356) acc5: 92.1875 (89.6419) time: 0.5826 data: 0.3808 max mem: 57114 Test: Total time: 0:00:03 (0.6080 s / it) * Acc@1 68.926 Acc@5 89.158 loss 1.525 Accuracy of the model on the 50000 test images: 68.9% Max accuracy: 69.55% Test: [0/5] eta: 0:00:11 loss: 6.9944 (6.9944) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.2585 data: 2.0152 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9536 (6.9514) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.5115) time: 0.6024 data: 0.4031 max mem: 57114 Test: Total time: 0:00:03 (0.6150 s / it) * Acc@1 0.102 Acc@5 0.506 loss 6.947 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [40] [ 0/156] eta: 0:13:20 lr: 0.007900 min_lr: 0.007900 loss: 3.8104 (3.8104) weight_decay: 0.0500 (0.0500) time: 5.1321 data: 4.0331 max mem: 57114 Epoch: [40] [ 10/156] eta: 0:02:47 lr: 0.007899 min_lr: 0.007899 loss: 3.8899 (3.9169) weight_decay: 0.0500 (0.0500) time: 1.1501 data: 0.3669 max mem: 57114 Epoch: [40] [ 20/156] eta: 0:02:07 lr: 0.007898 min_lr: 0.007898 loss: 3.8182 (3.7871) weight_decay: 0.0500 (0.0500) time: 0.7302 data: 0.0003 max mem: 57114 Epoch: [40] [ 30/156] eta: 0:01:49 lr: 0.007898 min_lr: 0.007898 loss: 3.8446 (3.8629) weight_decay: 0.0500 (0.0500) time: 0.7170 data: 0.0003 max mem: 57114 Epoch: [40] [ 40/156] eta: 0:01:37 lr: 0.007897 min_lr: 0.007897 loss: 3.9420 (3.7968) weight_decay: 0.0500 (0.0500) time: 0.7305 data: 0.0003 max mem: 57114 Epoch: [40] [ 50/156] eta: 0:01:26 lr: 0.007896 min_lr: 0.007896 loss: 3.7360 (3.8059) weight_decay: 0.0500 (0.0500) time: 0.7293 data: 0.0003 max mem: 57114 Epoch: [40] [ 60/156] eta: 0:01:16 lr: 0.007896 min_lr: 0.007896 loss: 3.8362 (3.8227) weight_decay: 0.0500 (0.0500) time: 0.7208 data: 0.0004 max mem: 57114 Epoch: [40] [ 70/156] eta: 0:01:07 lr: 0.007895 min_lr: 0.007895 loss: 4.0095 (3.8444) weight_decay: 0.0500 (0.0500) time: 0.7147 data: 0.0004 max mem: 57114 Epoch: [40] [ 80/156] eta: 0:00:59 lr: 0.007895 min_lr: 0.007895 loss: 3.8131 (3.7958) weight_decay: 0.0500 (0.0500) time: 0.7097 data: 0.0004 max mem: 57114 Epoch: [40] [ 90/156] eta: 0:00:50 lr: 0.007894 min_lr: 0.007894 loss: 3.7564 (3.7851) weight_decay: 0.0500 (0.0500) time: 0.7029 data: 0.0004 max mem: 57114 Epoch: [40] [100/156] eta: 0:00:42 lr: 0.007893 min_lr: 0.007893 loss: 3.7735 (3.7714) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.0003 max mem: 57114 Epoch: [40] [110/156] eta: 0:00:34 lr: 0.007893 min_lr: 0.007893 loss: 3.7735 (3.7601) weight_decay: 0.0500 (0.0500) time: 0.7104 data: 0.0003 max mem: 57114 Epoch: [40] [120/156] eta: 0:00:27 lr: 0.007892 min_lr: 0.007892 loss: 3.8581 (3.7480) weight_decay: 0.0500 (0.0500) time: 0.7098 data: 0.0004 max mem: 57114 Epoch: [40] [130/156] eta: 0:00:19 lr: 0.007891 min_lr: 0.007891 loss: 3.8581 (3.7438) weight_decay: 0.0500 (0.0500) time: 0.7010 data: 0.0008 max mem: 57114 Epoch: [40] [140/156] eta: 0:00:11 lr: 0.007891 min_lr: 0.007891 loss: 3.8682 (3.7549) weight_decay: 0.0500 (0.0500) time: 0.6920 data: 0.0007 max mem: 57114 Epoch: [40] [150/156] eta: 0:00:04 lr: 0.007890 min_lr: 0.007890 loss: 3.9180 (3.7498) weight_decay: 0.0500 (0.0500) time: 0.6845 data: 0.0001 max mem: 57114 Epoch: [40] [155/156] eta: 0:00:00 lr: 0.007890 min_lr: 0.007890 loss: 3.7298 (3.7506) weight_decay: 0.0500 (0.0500) time: 0.6832 data: 0.0001 max mem: 57114 Epoch: [40] Total time: 0:01:55 (0.7408 s / it) Averaged stats: lr: 0.007890 min_lr: 0.007890 loss: 3.7298 (3.6347) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.1549 (1.1549) acc1: 77.0833 (77.0833) acc5: 95.8333 (95.8333) time: 2.0459 data: 1.7871 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.3533 (1.3181) acc1: 72.3958 (72.1228) acc5: 93.2292 (91.5601) time: 0.5598 data: 0.3575 max mem: 57114 Test: Total time: 0:00:02 (0.5846 s / it) * Acc@1 68.808 Acc@5 89.284 loss 1.480 Accuracy of the model on the 50000 test images: 68.8% Max accuracy: 69.55% Test: [0/5] eta: 0:00:11 loss: 7.0058 (7.0058) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.2465 data: 2.0029 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9577 (6.9557) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.5115) time: 0.6000 data: 0.4007 max mem: 57114 Test: Total time: 0:00:03 (0.6146 s / it) * Acc@1 0.100 Acc@5 0.500 loss 6.951 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [41] [ 0/156] eta: 0:14:09 lr: 0.007889 min_lr: 0.007889 loss: 3.9790 (3.9790) weight_decay: 0.0500 (0.0500) time: 5.4451 data: 3.5614 max mem: 57114 Epoch: [41] [ 10/156] eta: 0:02:47 lr: 0.007889 min_lr: 0.007889 loss: 3.4852 (3.6099) weight_decay: 0.0500 (0.0500) time: 1.1448 data: 0.3241 max mem: 57114 Epoch: [41] [ 20/156] eta: 0:02:08 lr: 0.007888 min_lr: 0.007888 loss: 3.4852 (3.6177) weight_decay: 0.0500 (0.0500) time: 0.7186 data: 0.0004 max mem: 57114 Epoch: [41] [ 30/156] eta: 0:01:50 lr: 0.007887 min_lr: 0.007887 loss: 3.7686 (3.6746) weight_decay: 0.0500 (0.0500) time: 0.7353 data: 0.0003 max mem: 57114 Epoch: [41] [ 40/156] eta: 0:01:37 lr: 0.007887 min_lr: 0.007887 loss: 3.5939 (3.6114) weight_decay: 0.0500 (0.0500) time: 0.7339 data: 0.0004 max mem: 57114 Epoch: [41] [ 50/156] eta: 0:01:26 lr: 0.007886 min_lr: 0.007886 loss: 3.4629 (3.5622) weight_decay: 0.0500 (0.0500) time: 0.7254 data: 0.0004 max mem: 57114 Epoch: [41] [ 60/156] eta: 0:01:17 lr: 0.007885 min_lr: 0.007885 loss: 3.6673 (3.5676) weight_decay: 0.0500 (0.0500) time: 0.7232 data: 0.0004 max mem: 57114 Epoch: [41] [ 70/156] eta: 0:01:07 lr: 0.007885 min_lr: 0.007885 loss: 3.6673 (3.5388) weight_decay: 0.0500 (0.0500) time: 0.7165 data: 0.0004 max mem: 57114 Epoch: [41] [ 80/156] eta: 0:00:59 lr: 0.007884 min_lr: 0.007884 loss: 3.1708 (3.5236) weight_decay: 0.0500 (0.0500) time: 0.7166 data: 0.0003 max mem: 57114 Epoch: [41] [ 90/156] eta: 0:00:51 lr: 0.007883 min_lr: 0.007883 loss: 3.6706 (3.5451) weight_decay: 0.0500 (0.0500) time: 0.7114 data: 0.0004 max mem: 57114 Epoch: [41] [100/156] eta: 0:00:43 lr: 0.007883 min_lr: 0.007883 loss: 3.7885 (3.5523) weight_decay: 0.0500 (0.0500) time: 0.7140 data: 0.0004 max mem: 57114 Epoch: [41] [110/156] eta: 0:00:35 lr: 0.007882 min_lr: 0.007882 loss: 3.7885 (3.5547) weight_decay: 0.0500 (0.0500) time: 0.7162 data: 0.0004 max mem: 57114 Epoch: [41] [120/156] eta: 0:00:27 lr: 0.007881 min_lr: 0.007881 loss: 3.6860 (3.5617) weight_decay: 0.0500 (0.0500) time: 0.7138 data: 0.0004 max mem: 57114 Epoch: [41] [130/156] eta: 0:00:19 lr: 0.007881 min_lr: 0.007881 loss: 3.5894 (3.5593) weight_decay: 0.0500 (0.0500) time: 0.6981 data: 0.0009 max mem: 57114 Epoch: [41] [140/156] eta: 0:00:11 lr: 0.007880 min_lr: 0.007880 loss: 3.7426 (3.5680) weight_decay: 0.0500 (0.0500) time: 0.6817 data: 0.0007 max mem: 57114 Epoch: [41] [150/156] eta: 0:00:04 lr: 0.007879 min_lr: 0.007879 loss: 3.7426 (3.5786) weight_decay: 0.0500 (0.0500) time: 0.6797 data: 0.0001 max mem: 57114 Epoch: [41] [155/156] eta: 0:00:00 lr: 0.007879 min_lr: 0.007879 loss: 3.7551 (3.5891) weight_decay: 0.0500 (0.0500) time: 0.6797 data: 0.0001 max mem: 57114 Epoch: [41] Total time: 0:01:56 (0.7437 s / it) Averaged stats: lr: 0.007879 min_lr: 0.007879 loss: 3.7551 (3.6234) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.3543 (1.3543) acc1: 72.9167 (72.9167) acc5: 93.7500 (93.7500) time: 2.0973 data: 1.8416 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.4055 (1.4174) acc1: 72.3958 (67.6471) acc5: 93.7500 (89.5141) time: 0.5701 data: 0.3684 max mem: 57114 Test: Total time: 0:00:02 (0.5928 s / it) * Acc@1 69.068 Acc@5 89.336 loss 1.504 Accuracy of the model on the 50000 test images: 69.1% Max accuracy: 69.55% Test: [0/5] eta: 0:00:11 loss: 7.0176 (7.0176) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.3838 data: 2.1404 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9627 (6.9603) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.5115) time: 0.6274 data: 0.4281 max mem: 57114 Test: Total time: 0:00:03 (0.6421 s / it) * Acc@1 0.100 Acc@5 0.496 loss 6.955 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [42] [ 0/156] eta: 0:13:56 lr: 0.007879 min_lr: 0.007879 loss: 3.2153 (3.2153) weight_decay: 0.0500 (0.0500) time: 5.3606 data: 3.8897 max mem: 57114 Epoch: [42] [ 10/156] eta: 0:02:51 lr: 0.007878 min_lr: 0.007878 loss: 3.3203 (3.4754) weight_decay: 0.0500 (0.0500) time: 1.1742 data: 0.3539 max mem: 57114 Epoch: [42] [ 20/156] eta: 0:02:10 lr: 0.007877 min_lr: 0.007877 loss: 3.6967 (3.7078) weight_decay: 0.0500 (0.0500) time: 0.7406 data: 0.0003 max mem: 57114 Epoch: [42] [ 30/156] eta: 0:01:51 lr: 0.007877 min_lr: 0.007877 loss: 3.7087 (3.6115) weight_decay: 0.0500 (0.0500) time: 0.7311 data: 0.0003 max mem: 57114 Epoch: [42] [ 40/156] eta: 0:01:38 lr: 0.007876 min_lr: 0.007876 loss: 3.7086 (3.6690) weight_decay: 0.0500 (0.0500) time: 0.7241 data: 0.0004 max mem: 57114 Epoch: [42] [ 50/156] eta: 0:01:27 lr: 0.007875 min_lr: 0.007875 loss: 3.7758 (3.6499) weight_decay: 0.0500 (0.0500) time: 0.7195 data: 0.0004 max mem: 57114 Epoch: [42] [ 60/156] eta: 0:01:17 lr: 0.007875 min_lr: 0.007875 loss: 3.3371 (3.5934) weight_decay: 0.0500 (0.0500) time: 0.7247 data: 0.0004 max mem: 57114 Epoch: [42] [ 70/156] eta: 0:01:08 lr: 0.007874 min_lr: 0.007874 loss: 3.5893 (3.6325) weight_decay: 0.0500 (0.0500) time: 0.7193 data: 0.0004 max mem: 57114 Epoch: [42] [ 80/156] eta: 0:00:59 lr: 0.007873 min_lr: 0.007873 loss: 3.7439 (3.6026) weight_decay: 0.0500 (0.0500) time: 0.7141 data: 0.0004 max mem: 57114 Epoch: [42] [ 90/156] eta: 0:00:51 lr: 0.007872 min_lr: 0.007872 loss: 3.7175 (3.6119) weight_decay: 0.0500 (0.0500) time: 0.7180 data: 0.0004 max mem: 57114 Epoch: [42] [100/156] eta: 0:00:43 lr: 0.007872 min_lr: 0.007872 loss: 3.7175 (3.6179) weight_decay: 0.0500 (0.0500) time: 0.7177 data: 0.0004 max mem: 57114 Epoch: [42] [110/156] eta: 0:00:35 lr: 0.007871 min_lr: 0.007871 loss: 3.6399 (3.6070) weight_decay: 0.0500 (0.0500) time: 0.7061 data: 0.0004 max mem: 57114 Epoch: [42] [120/156] eta: 0:00:27 lr: 0.007870 min_lr: 0.007870 loss: 3.5312 (3.5951) weight_decay: 0.0500 (0.0500) time: 0.6975 data: 0.0004 max mem: 57114 Epoch: [42] [130/156] eta: 0:00:19 lr: 0.007869 min_lr: 0.007869 loss: 3.5312 (3.5783) weight_decay: 0.0500 (0.0500) time: 0.6969 data: 0.0010 max mem: 57114 Epoch: [42] [140/156] eta: 0:00:11 lr: 0.007869 min_lr: 0.007869 loss: 3.5007 (3.5719) weight_decay: 0.0500 (0.0500) time: 0.6939 data: 0.0008 max mem: 57114 Epoch: [42] [150/156] eta: 0:00:04 lr: 0.007868 min_lr: 0.007868 loss: 3.7559 (3.5911) weight_decay: 0.0500 (0.0500) time: 0.6857 data: 0.0001 max mem: 57114 Epoch: [42] [155/156] eta: 0:00:00 lr: 0.007868 min_lr: 0.007868 loss: 3.7994 (3.5976) weight_decay: 0.0500 (0.0500) time: 0.6870 data: 0.0001 max mem: 57114 Epoch: [42] Total time: 0:01:56 (0.7452 s / it) Averaged stats: lr: 0.007868 min_lr: 0.007868 loss: 3.7994 (3.6128) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.2029 (1.2029) acc1: 75.5208 (75.5208) acc5: 93.7500 (93.7500) time: 2.0562 data: 1.8002 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.3718 (1.3995) acc1: 72.9167 (70.5882) acc5: 93.2292 (90.4092) time: 0.5619 data: 0.3601 max mem: 57114 Test: Total time: 0:00:02 (0.5838 s / it) * Acc@1 69.537 Acc@5 89.866 loss 1.489 Accuracy of the model on the 50000 test images: 69.5% Max accuracy: 69.55% Test: [0/5] eta: 0:00:11 loss: 7.0295 (7.0295) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.3127 data: 2.0692 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9684 (6.9654) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.5115) time: 0.6132 data: 0.4139 max mem: 57114 Test: Total time: 0:00:03 (0.6245 s / it) * Acc@1 0.098 Acc@5 0.498 loss 6.960 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [43] [ 0/156] eta: 0:13:17 lr: 0.007868 min_lr: 0.007868 loss: 3.4967 (3.4967) weight_decay: 0.0500 (0.0500) time: 5.1103 data: 3.5846 max mem: 57114 Epoch: [43] [ 10/156] eta: 0:02:44 lr: 0.007867 min_lr: 0.007867 loss: 3.6675 (3.7243) weight_decay: 0.0500 (0.0500) time: 1.1249 data: 0.3262 max mem: 57114 Epoch: [43] [ 20/156] eta: 0:02:07 lr: 0.007866 min_lr: 0.007866 loss: 3.8050 (3.7579) weight_decay: 0.0500 (0.0500) time: 0.7262 data: 0.0003 max mem: 57114 Epoch: [43] [ 30/156] eta: 0:01:48 lr: 0.007865 min_lr: 0.007865 loss: 3.8254 (3.7123) weight_decay: 0.0500 (0.0500) time: 0.7218 data: 0.0003 max mem: 57114 Epoch: [43] [ 40/156] eta: 0:01:35 lr: 0.007865 min_lr: 0.007865 loss: 3.3205 (3.5467) weight_decay: 0.0500 (0.0500) time: 0.7143 data: 0.0004 max mem: 57114 Epoch: [43] [ 50/156] eta: 0:01:25 lr: 0.007864 min_lr: 0.007864 loss: 3.3079 (3.5655) weight_decay: 0.0500 (0.0500) time: 0.7234 data: 0.0004 max mem: 57114 Epoch: [43] [ 60/156] eta: 0:01:16 lr: 0.007863 min_lr: 0.007863 loss: 3.6824 (3.5640) weight_decay: 0.0500 (0.0500) time: 0.7309 data: 0.0004 max mem: 57114 Epoch: [43] [ 70/156] eta: 0:01:07 lr: 0.007862 min_lr: 0.007862 loss: 3.7014 (3.5957) weight_decay: 0.0500 (0.0500) time: 0.7168 data: 0.0004 max mem: 57114 Epoch: [43] [ 80/156] eta: 0:00:58 lr: 0.007862 min_lr: 0.007862 loss: 3.9096 (3.6213) weight_decay: 0.0500 (0.0500) time: 0.7052 data: 0.0004 max mem: 57114 Epoch: [43] [ 90/156] eta: 0:00:50 lr: 0.007861 min_lr: 0.007861 loss: 3.7983 (3.6157) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0004 max mem: 57114 Epoch: [43] [100/156] eta: 0:00:42 lr: 0.007860 min_lr: 0.007860 loss: 3.7839 (3.6155) weight_decay: 0.0500 (0.0500) time: 0.7031 data: 0.0004 max mem: 57114 Epoch: [43] [110/156] eta: 0:00:34 lr: 0.007859 min_lr: 0.007859 loss: 3.8582 (3.6148) weight_decay: 0.0500 (0.0500) time: 0.6971 data: 0.0004 max mem: 57114 Epoch: [43] [120/156] eta: 0:00:27 lr: 0.007859 min_lr: 0.007859 loss: 3.6100 (3.6181) weight_decay: 0.0500 (0.0500) time: 0.7046 data: 0.0003 max mem: 57114 Epoch: [43] [130/156] eta: 0:00:19 lr: 0.007858 min_lr: 0.007858 loss: 3.5787 (3.6063) weight_decay: 0.0500 (0.0500) time: 0.7003 data: 0.0008 max mem: 57114 Epoch: [43] [140/156] eta: 0:00:11 lr: 0.007857 min_lr: 0.007857 loss: 3.5397 (3.6076) weight_decay: 0.0500 (0.0500) time: 0.6884 data: 0.0007 max mem: 57114 Epoch: [43] [150/156] eta: 0:00:04 lr: 0.007856 min_lr: 0.007856 loss: 3.3224 (3.5886) weight_decay: 0.0500 (0.0500) time: 0.6831 data: 0.0001 max mem: 57114 Epoch: [43] [155/156] eta: 0:00:00 lr: 0.007856 min_lr: 0.007856 loss: 3.2921 (3.5903) weight_decay: 0.0500 (0.0500) time: 0.6821 data: 0.0001 max mem: 57114 Epoch: [43] Total time: 0:01:55 (0.7383 s / it) Averaged stats: lr: 0.007856 min_lr: 0.007856 loss: 3.2921 (3.5992) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.3191 (1.3191) acc1: 76.5625 (76.5625) acc5: 93.7500 (93.7500) time: 2.1010 data: 1.8454 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.3256 (1.3487) acc1: 76.5625 (70.3325) acc5: 93.7500 (89.8977) time: 0.5709 data: 0.3691 max mem: 57114 Test: Total time: 0:00:02 (0.5923 s / it) * Acc@1 70.031 Acc@5 89.816 loss 1.464 Accuracy of the model on the 50000 test images: 70.0% Max accuracy: 70.03% Test: [0/5] eta: 0:00:10 loss: 7.0438 (7.0438) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.0352 data: 1.7920 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9745 (6.9703) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.5115) time: 0.5574 data: 0.3585 max mem: 57114 Test: Total time: 0:00:02 (0.5699 s / it) * Acc@1 0.098 Acc@5 0.502 loss 6.965 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [44] [ 0/156] eta: 0:13:27 lr: 0.007856 min_lr: 0.007856 loss: 3.3795 (3.3795) weight_decay: 0.0500 (0.0500) time: 5.1790 data: 4.0850 max mem: 57114 Epoch: [44] [ 10/156] eta: 0:02:44 lr: 0.007855 min_lr: 0.007855 loss: 3.8556 (3.6566) weight_decay: 0.0500 (0.0500) time: 1.1297 data: 0.3717 max mem: 57114 Epoch: [44] [ 20/156] eta: 0:02:07 lr: 0.007854 min_lr: 0.007854 loss: 3.6876 (3.5175) weight_decay: 0.0500 (0.0500) time: 0.7259 data: 0.0003 max mem: 57114 Epoch: [44] [ 30/156] eta: 0:01:49 lr: 0.007854 min_lr: 0.007854 loss: 3.5665 (3.5469) weight_decay: 0.0500 (0.0500) time: 0.7225 data: 0.0004 max mem: 57114 Epoch: [44] [ 40/156] eta: 0:01:37 lr: 0.007853 min_lr: 0.007853 loss: 3.5665 (3.5056) weight_decay: 0.0500 (0.0500) time: 0.7332 data: 0.0004 max mem: 57114 Epoch: [44] [ 50/156] eta: 0:01:26 lr: 0.007852 min_lr: 0.007852 loss: 3.5503 (3.5254) weight_decay: 0.0500 (0.0500) time: 0.7399 data: 0.0004 max mem: 57114 Epoch: [44] [ 60/156] eta: 0:01:17 lr: 0.007851 min_lr: 0.007851 loss: 3.6035 (3.5265) weight_decay: 0.0500 (0.0500) time: 0.7442 data: 0.0003 max mem: 57114 Epoch: [44] [ 70/156] eta: 0:01:08 lr: 0.007850 min_lr: 0.007850 loss: 3.4794 (3.5465) weight_decay: 0.0500 (0.0500) time: 0.7420 data: 0.0003 max mem: 57114 Epoch: [44] [ 80/156] eta: 0:00:59 lr: 0.007850 min_lr: 0.007850 loss: 3.4794 (3.5285) weight_decay: 0.0500 (0.0500) time: 0.7210 data: 0.0004 max mem: 57114 Epoch: [44] [ 90/156] eta: 0:00:51 lr: 0.007849 min_lr: 0.007849 loss: 3.4771 (3.5185) weight_decay: 0.0500 (0.0500) time: 0.7064 data: 0.0004 max mem: 57114 Epoch: [44] [100/156] eta: 0:00:43 lr: 0.007848 min_lr: 0.007848 loss: 3.6269 (3.5370) weight_decay: 0.0500 (0.0500) time: 0.7103 data: 0.0004 max mem: 57114 Epoch: [44] [110/156] eta: 0:00:35 lr: 0.007847 min_lr: 0.007847 loss: 3.6269 (3.5234) weight_decay: 0.0500 (0.0500) time: 0.7156 data: 0.0004 max mem: 57114 Epoch: [44] [120/156] eta: 0:00:27 lr: 0.007847 min_lr: 0.007847 loss: 3.6316 (3.5315) weight_decay: 0.0500 (0.0500) time: 0.7058 data: 0.0004 max mem: 57114 Epoch: [44] [130/156] eta: 0:00:19 lr: 0.007846 min_lr: 0.007846 loss: 3.6254 (3.5283) weight_decay: 0.0500 (0.0500) time: 0.6946 data: 0.0005 max mem: 57114 Epoch: [44] [140/156] eta: 0:00:11 lr: 0.007845 min_lr: 0.007845 loss: 3.2878 (3.5009) weight_decay: 0.0500 (0.0500) time: 0.6851 data: 0.0003 max mem: 57114 Epoch: [44] [150/156] eta: 0:00:04 lr: 0.007844 min_lr: 0.007844 loss: 3.2281 (3.5061) weight_decay: 0.0500 (0.0500) time: 0.6836 data: 0.0001 max mem: 57114 Epoch: [44] [155/156] eta: 0:00:00 lr: 0.007844 min_lr: 0.007844 loss: 3.6013 (3.5118) weight_decay: 0.0500 (0.0500) time: 0.6856 data: 0.0001 max mem: 57114 Epoch: [44] Total time: 0:01:56 (0.7458 s / it) Averaged stats: lr: 0.007844 min_lr: 0.007844 loss: 3.6013 (3.5893) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:09 loss: 1.0860 (1.0860) acc1: 75.0000 (75.0000) acc5: 94.2708 (94.2708) time: 1.9977 data: 1.7421 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1862 (1.2253) acc1: 73.4375 (70.9719) acc5: 94.2708 (90.6650) time: 0.5503 data: 0.3485 max mem: 57114 Test: Total time: 0:00:02 (0.5694 s / it) * Acc@1 70.684 Acc@5 90.375 loss 1.321 Accuracy of the model on the 50000 test images: 70.7% Max accuracy: 70.68% Test: [0/5] eta: 0:00:09 loss: 7.0582 (7.0582) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 1.9834 data: 1.7399 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9759 (6.9763) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.5115) time: 0.5472 data: 0.3481 max mem: 57114 Test: Total time: 0:00:02 (0.5569 s / it) * Acc@1 0.100 Acc@5 0.512 loss 6.971 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [45] [ 0/156] eta: 0:14:01 lr: 0.007844 min_lr: 0.007844 loss: 3.0866 (3.0866) weight_decay: 0.0500 (0.0500) time: 5.3972 data: 4.3448 max mem: 57114 Epoch: [45] [ 10/156] eta: 0:02:52 lr: 0.007843 min_lr: 0.007843 loss: 3.8410 (3.5653) weight_decay: 0.0500 (0.0500) time: 1.1798 data: 0.3953 max mem: 57114 Epoch: [45] [ 20/156] eta: 0:02:10 lr: 0.007842 min_lr: 0.007842 loss: 3.6438 (3.5080) weight_decay: 0.0500 (0.0500) time: 0.7394 data: 0.0004 max mem: 57114 Epoch: [45] [ 30/156] eta: 0:01:51 lr: 0.007841 min_lr: 0.007841 loss: 3.7275 (3.4953) weight_decay: 0.0500 (0.0500) time: 0.7284 data: 0.0004 max mem: 57114 Epoch: [45] [ 40/156] eta: 0:01:38 lr: 0.007840 min_lr: 0.007840 loss: 3.8051 (3.5321) weight_decay: 0.0500 (0.0500) time: 0.7332 data: 0.0004 max mem: 57114 Epoch: [45] [ 50/156] eta: 0:01:27 lr: 0.007840 min_lr: 0.007840 loss: 3.7874 (3.5696) weight_decay: 0.0500 (0.0500) time: 0.7274 data: 0.0003 max mem: 57114 Epoch: [45] [ 60/156] eta: 0:01:18 lr: 0.007839 min_lr: 0.007839 loss: 3.5106 (3.4939) weight_decay: 0.0500 (0.0500) time: 0.7395 data: 0.0004 max mem: 57114 Epoch: [45] [ 70/156] eta: 0:01:08 lr: 0.007838 min_lr: 0.007838 loss: 3.2942 (3.5102) weight_decay: 0.0500 (0.0500) time: 0.7267 data: 0.0004 max mem: 57114 Epoch: [45] [ 80/156] eta: 0:00:59 lr: 0.007837 min_lr: 0.007837 loss: 3.6893 (3.5276) weight_decay: 0.0500 (0.0500) time: 0.7053 data: 0.0004 max mem: 57114 Epoch: [45] [ 90/156] eta: 0:00:51 lr: 0.007836 min_lr: 0.007836 loss: 3.4270 (3.5015) weight_decay: 0.0500 (0.0500) time: 0.7200 data: 0.0004 max mem: 57114 Epoch: [45] [100/156] eta: 0:00:43 lr: 0.007836 min_lr: 0.007836 loss: 3.3269 (3.5155) weight_decay: 0.0500 (0.0500) time: 0.7190 data: 0.0004 max mem: 57114 Epoch: [45] [110/156] eta: 0:00:35 lr: 0.007835 min_lr: 0.007835 loss: 3.7344 (3.5191) weight_decay: 0.0500 (0.0500) time: 0.7101 data: 0.0004 max mem: 57114 Epoch: [45] [120/156] eta: 0:00:27 lr: 0.007834 min_lr: 0.007834 loss: 3.6706 (3.5156) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0004 max mem: 57114 Epoch: [45] [130/156] eta: 0:00:19 lr: 0.007833 min_lr: 0.007833 loss: 3.6800 (3.5411) weight_decay: 0.0500 (0.0500) time: 0.6946 data: 0.0009 max mem: 57114 Epoch: [45] [140/156] eta: 0:00:12 lr: 0.007832 min_lr: 0.007832 loss: 4.1154 (3.5764) weight_decay: 0.0500 (0.0500) time: 0.6831 data: 0.0007 max mem: 57114 Epoch: [45] [150/156] eta: 0:00:04 lr: 0.007832 min_lr: 0.007832 loss: 3.9132 (3.5897) weight_decay: 0.0500 (0.0500) time: 0.6768 data: 0.0001 max mem: 57114 Epoch: [45] [155/156] eta: 0:00:00 lr: 0.007831 min_lr: 0.007831 loss: 3.8282 (3.5867) weight_decay: 0.0500 (0.0500) time: 0.6793 data: 0.0001 max mem: 57114 Epoch: [45] Total time: 0:01:56 (0.7465 s / it) Averaged stats: lr: 0.007831 min_lr: 0.007831 loss: 3.8282 (3.5742) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.2124 (1.2124) acc1: 79.1667 (79.1667) acc5: 95.3125 (95.3125) time: 2.0264 data: 1.7708 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.3490 (1.3634) acc1: 75.0000 (72.3785) acc5: 95.3125 (91.9437) time: 0.5559 data: 0.3542 max mem: 57114 Test: Total time: 0:00:02 (0.5767 s / it) * Acc@1 70.598 Acc@5 90.715 loss 1.465 Accuracy of the model on the 50000 test images: 70.6% Max accuracy: 70.68% Test: [0/5] eta: 0:00:11 loss: 7.0714 (7.0714) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 2.3234 data: 2.0799 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9770 (6.9824) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.5115) time: 0.6153 data: 0.4161 max mem: 57114 Test: Total time: 0:00:03 (0.6298 s / it) * Acc@1 0.100 Acc@5 0.512 loss 6.977 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [46] [ 0/156] eta: 0:13:53 lr: 0.007831 min_lr: 0.007831 loss: 3.4829 (3.4829) weight_decay: 0.0500 (0.0500) time: 5.3442 data: 3.6409 max mem: 57114 Epoch: [46] [ 10/156] eta: 0:02:53 lr: 0.007830 min_lr: 0.007830 loss: 3.3244 (3.1717) weight_decay: 0.0500 (0.0500) time: 1.1853 data: 0.3313 max mem: 57114 Epoch: [46] [ 20/156] eta: 0:02:10 lr: 0.007829 min_lr: 0.007829 loss: 3.5124 (3.4196) weight_decay: 0.0500 (0.0500) time: 0.7438 data: 0.0004 max mem: 57114 Epoch: [46] [ 30/156] eta: 0:01:51 lr: 0.007829 min_lr: 0.007829 loss: 3.5360 (3.4263) weight_decay: 0.0500 (0.0500) time: 0.7190 data: 0.0003 max mem: 57114 Epoch: [46] [ 40/156] eta: 0:01:37 lr: 0.007828 min_lr: 0.007828 loss: 3.5360 (3.4806) weight_decay: 0.0500 (0.0500) time: 0.7175 data: 0.0003 max mem: 57114 Epoch: [46] [ 50/156] eta: 0:01:26 lr: 0.007827 min_lr: 0.007827 loss: 3.6534 (3.5082) weight_decay: 0.0500 (0.0500) time: 0.7149 data: 0.0003 max mem: 57114 Epoch: [46] [ 60/156] eta: 0:01:16 lr: 0.007826 min_lr: 0.007826 loss: 3.5558 (3.5273) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0004 max mem: 57114 Epoch: [46] [ 70/156] eta: 0:01:07 lr: 0.007825 min_lr: 0.007825 loss: 3.4867 (3.5296) weight_decay: 0.0500 (0.0500) time: 0.7121 data: 0.0004 max mem: 57114 Epoch: [46] [ 80/156] eta: 0:00:59 lr: 0.007824 min_lr: 0.007824 loss: 3.6246 (3.5410) weight_decay: 0.0500 (0.0500) time: 0.7202 data: 0.0004 max mem: 57114 Epoch: [46] [ 90/156] eta: 0:00:50 lr: 0.007823 min_lr: 0.007823 loss: 3.8498 (3.5862) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0004 max mem: 57114 Epoch: [46] [100/156] eta: 0:00:42 lr: 0.007823 min_lr: 0.007823 loss: 3.8869 (3.5959) weight_decay: 0.0500 (0.0500) time: 0.7021 data: 0.0004 max mem: 57114 Epoch: [46] [110/156] eta: 0:00:34 lr: 0.007822 min_lr: 0.007822 loss: 3.2626 (3.5625) weight_decay: 0.0500 (0.0500) time: 0.7021 data: 0.0003 max mem: 57114 Epoch: [46] [120/156] eta: 0:00:27 lr: 0.007821 min_lr: 0.007821 loss: 3.5459 (3.5618) weight_decay: 0.0500 (0.0500) time: 0.7033 data: 0.0003 max mem: 57114 Epoch: [46] [130/156] eta: 0:00:19 lr: 0.007820 min_lr: 0.007820 loss: 3.6516 (3.5691) weight_decay: 0.0500 (0.0500) time: 0.6984 data: 0.0008 max mem: 57114 Epoch: [46] [140/156] eta: 0:00:11 lr: 0.007819 min_lr: 0.007819 loss: 3.7504 (3.5823) weight_decay: 0.0500 (0.0500) time: 0.6931 data: 0.0007 max mem: 57114 Epoch: [46] [150/156] eta: 0:00:04 lr: 0.007818 min_lr: 0.007818 loss: 3.5332 (3.5738) weight_decay: 0.0500 (0.0500) time: 0.6892 data: 0.0001 max mem: 57114 Epoch: [46] [155/156] eta: 0:00:00 lr: 0.007818 min_lr: 0.007818 loss: 3.4486 (3.5703) weight_decay: 0.0500 (0.0500) time: 0.6823 data: 0.0001 max mem: 57114 Epoch: [46] Total time: 0:01:55 (0.7416 s / it) Averaged stats: lr: 0.007818 min_lr: 0.007818 loss: 3.4486 (3.5762) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.1581 (1.1581) acc1: 79.1667 (79.1667) acc5: 96.8750 (96.8750) time: 2.0740 data: 1.8178 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2742 (1.2721) acc1: 73.9583 (71.3555) acc5: 93.7500 (91.1765) time: 0.5655 data: 0.3636 max mem: 57114 Test: Total time: 0:00:02 (0.5914 s / it) * Acc@1 71.589 Acc@5 91.546 loss 1.318 Accuracy of the model on the 50000 test images: 71.6% Max accuracy: 71.59% Test: [0/5] eta: 0:00:10 loss: 7.0848 (7.0848) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 2.1032 data: 1.8597 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9783 (6.9889) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.5115) time: 0.5713 data: 0.3720 max mem: 57114 Test: Total time: 0:00:02 (0.5847 s / it) * Acc@1 0.100 Acc@5 0.510 loss 6.983 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [47] [ 0/156] eta: 0:13:34 lr: 0.007818 min_lr: 0.007818 loss: 3.7394 (3.7394) weight_decay: 0.0500 (0.0500) time: 5.2241 data: 3.7711 max mem: 57114 Epoch: [47] [ 10/156] eta: 0:02:47 lr: 0.007817 min_lr: 0.007817 loss: 3.7394 (3.6541) weight_decay: 0.0500 (0.0500) time: 1.1470 data: 0.3432 max mem: 57114 Epoch: [47] [ 20/156] eta: 0:02:07 lr: 0.007816 min_lr: 0.007816 loss: 3.7039 (3.7079) weight_decay: 0.0500 (0.0500) time: 0.7247 data: 0.0004 max mem: 57114 Epoch: [47] [ 30/156] eta: 0:01:49 lr: 0.007815 min_lr: 0.007815 loss: 3.7375 (3.6779) weight_decay: 0.0500 (0.0500) time: 0.7158 data: 0.0003 max mem: 57114 Epoch: [47] [ 40/156] eta: 0:01:37 lr: 0.007814 min_lr: 0.007814 loss: 3.5832 (3.5874) weight_decay: 0.0500 (0.0500) time: 0.7334 data: 0.0003 max mem: 57114 Epoch: [47] [ 50/156] eta: 0:01:27 lr: 0.007814 min_lr: 0.007814 loss: 3.5676 (3.5995) weight_decay: 0.0500 (0.0500) time: 0.7479 data: 0.0004 max mem: 57114 Epoch: [47] [ 60/156] eta: 0:01:17 lr: 0.007813 min_lr: 0.007813 loss: 3.8986 (3.6283) weight_decay: 0.0500 (0.0500) time: 0.7412 data: 0.0004 max mem: 57114 Epoch: [47] [ 70/156] eta: 0:01:08 lr: 0.007812 min_lr: 0.007812 loss: 3.5264 (3.5612) weight_decay: 0.0500 (0.0500) time: 0.7255 data: 0.0004 max mem: 57114 Epoch: [47] [ 80/156] eta: 0:00:59 lr: 0.007811 min_lr: 0.007811 loss: 3.5264 (3.5762) weight_decay: 0.0500 (0.0500) time: 0.7059 data: 0.0004 max mem: 57114 Epoch: [47] [ 90/156] eta: 0:00:51 lr: 0.007810 min_lr: 0.007810 loss: 3.7241 (3.5780) weight_decay: 0.0500 (0.0500) time: 0.6976 data: 0.0004 max mem: 57114 Epoch: [47] [100/156] eta: 0:00:42 lr: 0.007809 min_lr: 0.007809 loss: 3.7491 (3.5894) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [47] [110/156] eta: 0:00:35 lr: 0.007808 min_lr: 0.007808 loss: 3.6008 (3.5706) weight_decay: 0.0500 (0.0500) time: 0.7161 data: 0.0004 max mem: 57114 Epoch: [47] [120/156] eta: 0:00:27 lr: 0.007807 min_lr: 0.007807 loss: 3.3627 (3.5599) weight_decay: 0.0500 (0.0500) time: 0.7077 data: 0.0004 max mem: 57114 Epoch: [47] [130/156] eta: 0:00:19 lr: 0.007807 min_lr: 0.007807 loss: 3.6494 (3.5679) weight_decay: 0.0500 (0.0500) time: 0.7026 data: 0.0008 max mem: 57114 Epoch: [47] [140/156] eta: 0:00:11 lr: 0.007806 min_lr: 0.007806 loss: 3.7773 (3.5764) weight_decay: 0.0500 (0.0500) time: 0.6922 data: 0.0007 max mem: 57114 Epoch: [47] [150/156] eta: 0:00:04 lr: 0.007805 min_lr: 0.007805 loss: 3.5707 (3.5857) weight_decay: 0.0500 (0.0500) time: 0.6741 data: 0.0001 max mem: 57114 Epoch: [47] [155/156] eta: 0:00:00 lr: 0.007804 min_lr: 0.007804 loss: 3.5384 (3.5727) weight_decay: 0.0500 (0.0500) time: 0.6756 data: 0.0001 max mem: 57114 Epoch: [47] Total time: 0:01:55 (0.7434 s / it) Averaged stats: lr: 0.007804 min_lr: 0.007804 loss: 3.5384 (3.5648) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8955 (0.8955) acc1: 84.3750 (84.3750) acc5: 95.8333 (95.8333) time: 2.0146 data: 1.7577 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1862 (1.0820) acc1: 76.0417 (73.9130) acc5: 93.2292 (91.5601) time: 0.5538 data: 0.3516 max mem: 57114 Test: Total time: 0:00:02 (0.5757 s / it) * Acc@1 71.551 Acc@5 90.639 loss 1.232 Accuracy of the model on the 50000 test images: 71.6% Max accuracy: 71.59% Test: [0/5] eta: 0:00:11 loss: 7.0984 (7.0984) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 2.2707 data: 2.0272 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9818 (6.9962) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.6048 data: 0.4055 max mem: 57114 Test: Total time: 0:00:03 (0.6159 s / it) * Acc@1 0.100 Acc@5 0.508 loss 6.989 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [48] [ 0/156] eta: 0:12:19 lr: 0.007804 min_lr: 0.007804 loss: 3.8780 (3.8780) weight_decay: 0.0500 (0.0500) time: 4.7397 data: 3.4335 max mem: 57114 Epoch: [48] [ 10/156] eta: 0:02:42 lr: 0.007803 min_lr: 0.007803 loss: 3.7685 (3.5928) weight_decay: 0.0500 (0.0500) time: 1.1121 data: 0.3126 max mem: 57114 Epoch: [48] [ 20/156] eta: 0:02:06 lr: 0.007802 min_lr: 0.007802 loss: 3.5447 (3.5063) weight_decay: 0.0500 (0.0500) time: 0.7376 data: 0.0004 max mem: 57114 Epoch: [48] [ 30/156] eta: 0:01:48 lr: 0.007802 min_lr: 0.007802 loss: 3.5447 (3.5225) weight_decay: 0.0500 (0.0500) time: 0.7192 data: 0.0003 max mem: 57114 Epoch: [48] [ 40/156] eta: 0:01:36 lr: 0.007801 min_lr: 0.007801 loss: 3.7305 (3.5265) weight_decay: 0.0500 (0.0500) time: 0.7285 data: 0.0004 max mem: 57114 Epoch: [48] [ 50/156] eta: 0:01:25 lr: 0.007800 min_lr: 0.007800 loss: 3.4907 (3.5015) weight_decay: 0.0500 (0.0500) time: 0.7268 data: 0.0004 max mem: 57114 Epoch: [48] [ 60/156] eta: 0:01:16 lr: 0.007799 min_lr: 0.007799 loss: 3.6561 (3.5470) weight_decay: 0.0500 (0.0500) time: 0.7211 data: 0.0004 max mem: 57114 Epoch: [48] [ 70/156] eta: 0:01:07 lr: 0.007798 min_lr: 0.007798 loss: 3.7180 (3.5136) weight_decay: 0.0500 (0.0500) time: 0.7126 data: 0.0004 max mem: 57114 Epoch: [48] [ 80/156] eta: 0:00:58 lr: 0.007797 min_lr: 0.007797 loss: 3.3927 (3.4752) weight_decay: 0.0500 (0.0500) time: 0.6964 data: 0.0003 max mem: 57114 Epoch: [48] [ 90/156] eta: 0:00:50 lr: 0.007796 min_lr: 0.007796 loss: 3.5652 (3.4910) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0003 max mem: 57114 Epoch: [48] [100/156] eta: 0:00:42 lr: 0.007795 min_lr: 0.007795 loss: 3.6404 (3.5012) weight_decay: 0.0500 (0.0500) time: 0.7069 data: 0.0003 max mem: 57114 Epoch: [48] [110/156] eta: 0:00:34 lr: 0.007794 min_lr: 0.007794 loss: 3.5655 (3.4974) weight_decay: 0.0500 (0.0500) time: 0.7105 data: 0.0003 max mem: 57114 Epoch: [48] [120/156] eta: 0:00:27 lr: 0.007793 min_lr: 0.007793 loss: 3.6479 (3.5066) weight_decay: 0.0500 (0.0500) time: 0.7114 data: 0.0003 max mem: 57114 Epoch: [48] [130/156] eta: 0:00:19 lr: 0.007793 min_lr: 0.007793 loss: 3.7302 (3.5066) weight_decay: 0.0500 (0.0500) time: 0.7038 data: 0.0008 max mem: 57114 Epoch: [48] [140/156] eta: 0:00:11 lr: 0.007792 min_lr: 0.007792 loss: 3.6277 (3.5064) weight_decay: 0.0500 (0.0500) time: 0.6897 data: 0.0007 max mem: 57114 Epoch: [48] [150/156] eta: 0:00:04 lr: 0.007791 min_lr: 0.007791 loss: 3.6275 (3.5062) weight_decay: 0.0500 (0.0500) time: 0.6795 data: 0.0001 max mem: 57114 Epoch: [48] [155/156] eta: 0:00:00 lr: 0.007790 min_lr: 0.007790 loss: 3.5336 (3.5037) weight_decay: 0.0500 (0.0500) time: 0.6838 data: 0.0001 max mem: 57114 Epoch: [48] Total time: 0:01:55 (0.7382 s / it) Averaged stats: lr: 0.007790 min_lr: 0.007790 loss: 3.5336 (3.5486) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0421 (1.0421) acc1: 81.7708 (81.7708) acc5: 96.3542 (96.3542) time: 2.0362 data: 1.7806 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2486 (1.3213) acc1: 76.5625 (72.6343) acc5: 92.8571 (91.9437) time: 0.5579 data: 0.3562 max mem: 57114 Test: Total time: 0:00:02 (0.5790 s / it) * Acc@1 71.371 Acc@5 90.763 loss 1.394 Accuracy of the model on the 50000 test images: 71.4% Max accuracy: 71.59% Test: [0/5] eta: 0:00:11 loss: 7.1122 (7.1122) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 2.2981 data: 2.0546 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9881 (7.0046) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.6103 data: 0.4110 max mem: 57114 Test: Total time: 0:00:03 (0.6232 s / it) * Acc@1 0.100 Acc@5 0.508 loss 6.997 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [49] [ 0/156] eta: 0:12:45 lr: 0.007790 min_lr: 0.007790 loss: 3.5170 (3.5170) weight_decay: 0.0500 (0.0500) time: 4.9080 data: 4.0643 max mem: 57114 Epoch: [49] [ 10/156] eta: 0:02:41 lr: 0.007789 min_lr: 0.007789 loss: 3.5170 (3.5283) weight_decay: 0.0500 (0.0500) time: 1.1042 data: 0.3697 max mem: 57114 Epoch: [49] [ 20/156] eta: 0:02:06 lr: 0.007788 min_lr: 0.007788 loss: 3.7628 (3.6611) weight_decay: 0.0500 (0.0500) time: 0.7331 data: 0.0003 max mem: 57114 Epoch: [49] [ 30/156] eta: 0:01:48 lr: 0.007787 min_lr: 0.007787 loss: 3.8029 (3.6040) weight_decay: 0.0500 (0.0500) time: 0.7308 data: 0.0003 max mem: 57114 Epoch: [49] [ 40/156] eta: 0:01:36 lr: 0.007786 min_lr: 0.007786 loss: 3.4493 (3.5590) weight_decay: 0.0500 (0.0500) time: 0.7320 data: 0.0004 max mem: 57114 Epoch: [49] [ 50/156] eta: 0:01:25 lr: 0.007786 min_lr: 0.007786 loss: 3.5084 (3.5739) weight_decay: 0.0500 (0.0500) time: 0.7276 data: 0.0004 max mem: 57114 Epoch: [49] [ 60/156] eta: 0:01:16 lr: 0.007785 min_lr: 0.007785 loss: 3.7999 (3.6006) weight_decay: 0.0500 (0.0500) time: 0.7073 data: 0.0004 max mem: 57114 Epoch: [49] [ 70/156] eta: 0:01:07 lr: 0.007784 min_lr: 0.007784 loss: 3.6271 (3.5737) weight_decay: 0.0500 (0.0500) time: 0.7071 data: 0.0005 max mem: 57114 Epoch: [49] [ 80/156] eta: 0:00:58 lr: 0.007783 min_lr: 0.007783 loss: 3.1113 (3.4985) weight_decay: 0.0500 (0.0500) time: 0.7126 data: 0.0004 max mem: 57114 Epoch: [49] [ 90/156] eta: 0:00:50 lr: 0.007782 min_lr: 0.007782 loss: 3.3080 (3.5074) weight_decay: 0.0500 (0.0500) time: 0.7146 data: 0.0004 max mem: 57114 Epoch: [49] [100/156] eta: 0:00:42 lr: 0.007781 min_lr: 0.007781 loss: 3.4198 (3.4928) weight_decay: 0.0500 (0.0500) time: 0.7104 data: 0.0004 max mem: 57114 Epoch: [49] [110/156] eta: 0:00:34 lr: 0.007780 min_lr: 0.007780 loss: 3.3955 (3.5039) weight_decay: 0.0500 (0.0500) time: 0.7034 data: 0.0004 max mem: 57114 Epoch: [49] [120/156] eta: 0:00:27 lr: 0.007779 min_lr: 0.007779 loss: 3.3604 (3.4962) weight_decay: 0.0500 (0.0500) time: 0.7058 data: 0.0004 max mem: 57114 Epoch: [49] [130/156] eta: 0:00:19 lr: 0.007778 min_lr: 0.007778 loss: 3.3581 (3.4988) weight_decay: 0.0500 (0.0500) time: 0.7018 data: 0.0009 max mem: 57114 Epoch: [49] [140/156] eta: 0:00:11 lr: 0.007777 min_lr: 0.007777 loss: 3.4590 (3.5032) weight_decay: 0.0500 (0.0500) time: 0.6858 data: 0.0007 max mem: 57114 Epoch: [49] [150/156] eta: 0:00:04 lr: 0.007776 min_lr: 0.007776 loss: 3.5042 (3.5110) weight_decay: 0.0500 (0.0500) time: 0.6855 data: 0.0001 max mem: 57114 Epoch: [49] [155/156] eta: 0:00:00 lr: 0.007776 min_lr: 0.007776 loss: 3.5358 (3.5012) weight_decay: 0.0500 (0.0500) time: 0.6873 data: 0.0001 max mem: 57114 Epoch: [49] Total time: 0:01:55 (0.7396 s / it) Averaged stats: lr: 0.007776 min_lr: 0.007776 loss: 3.5358 (3.5531) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 1.0978 (1.0978) acc1: 80.7292 (80.7292) acc5: 97.3958 (97.3958) time: 2.2650 data: 2.0092 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2945 (1.3493) acc1: 75.0000 (73.5294) acc5: 92.8571 (91.5601) time: 0.6037 data: 0.4019 max mem: 57114 Test: Total time: 0:00:03 (0.6271 s / it) * Acc@1 71.741 Acc@5 91.160 loss 1.438 Accuracy of the model on the 50000 test images: 71.7% Max accuracy: 71.74% Test: [0/5] eta: 0:00:10 loss: 7.1272 (7.1272) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 2.0330 data: 1.7896 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9894 (7.0124) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.5572 data: 0.3580 max mem: 57114 Test: Total time: 0:00:02 (0.5679 s / it) * Acc@1 0.100 Acc@5 0.506 loss 7.004 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [50] [ 0/156] eta: 0:13:46 lr: 0.007776 min_lr: 0.007776 loss: 4.3633 (4.3633) weight_decay: 0.0500 (0.0500) time: 5.2990 data: 3.7504 max mem: 57114 Epoch: [50] [ 10/156] eta: 0:02:52 lr: 0.007775 min_lr: 0.007775 loss: 3.8204 (3.6912) weight_decay: 0.0500 (0.0500) time: 1.1842 data: 0.3412 max mem: 57114 Epoch: [50] [ 20/156] eta: 0:02:11 lr: 0.007774 min_lr: 0.007774 loss: 3.6086 (3.7001) weight_decay: 0.0500 (0.0500) time: 0.7510 data: 0.0003 max mem: 57114 Epoch: [50] [ 30/156] eta: 0:01:51 lr: 0.007773 min_lr: 0.007773 loss: 3.5601 (3.5936) weight_decay: 0.0500 (0.0500) time: 0.7257 data: 0.0004 max mem: 57114 Epoch: [50] [ 40/156] eta: 0:01:39 lr: 0.007772 min_lr: 0.007772 loss: 3.5352 (3.5641) weight_decay: 0.0500 (0.0500) time: 0.7413 data: 0.0004 max mem: 57114 Epoch: [50] [ 50/156] eta: 0:01:28 lr: 0.007771 min_lr: 0.007771 loss: 3.7451 (3.5782) weight_decay: 0.0500 (0.0500) time: 0.7450 data: 0.0004 max mem: 57114 Epoch: [50] [ 60/156] eta: 0:01:18 lr: 0.007770 min_lr: 0.007770 loss: 3.6609 (3.5456) weight_decay: 0.0500 (0.0500) time: 0.7350 data: 0.0004 max mem: 57114 Epoch: [50] [ 70/156] eta: 0:01:09 lr: 0.007769 min_lr: 0.007769 loss: 3.7746 (3.5961) weight_decay: 0.0500 (0.0500) time: 0.7278 data: 0.0004 max mem: 57114 Epoch: [50] [ 80/156] eta: 0:01:00 lr: 0.007768 min_lr: 0.007768 loss: 3.7746 (3.5892) weight_decay: 0.0500 (0.0500) time: 0.7178 data: 0.0004 max mem: 57114 Epoch: [50] [ 90/156] eta: 0:00:51 lr: 0.007767 min_lr: 0.007767 loss: 3.6053 (3.5882) weight_decay: 0.0500 (0.0500) time: 0.7132 data: 0.0004 max mem: 57114 Epoch: [50] [100/156] eta: 0:00:43 lr: 0.007766 min_lr: 0.007766 loss: 3.7891 (3.6015) weight_decay: 0.0500 (0.0500) time: 0.7096 data: 0.0004 max mem: 57114 Epoch: [50] [110/156] eta: 0:00:35 lr: 0.007765 min_lr: 0.007765 loss: 3.7578 (3.6183) weight_decay: 0.0500 (0.0500) time: 0.7055 data: 0.0004 max mem: 57114 Epoch: [50] [120/156] eta: 0:00:27 lr: 0.007764 min_lr: 0.007764 loss: 3.4600 (3.6009) weight_decay: 0.0500 (0.0500) time: 0.6979 data: 0.0004 max mem: 57114 Epoch: [50] [130/156] eta: 0:00:19 lr: 0.007763 min_lr: 0.007763 loss: 3.5661 (3.5957) weight_decay: 0.0500 (0.0500) time: 0.6939 data: 0.0008 max mem: 57114 Epoch: [50] [140/156] eta: 0:00:12 lr: 0.007762 min_lr: 0.007762 loss: 3.6797 (3.6018) weight_decay: 0.0500 (0.0500) time: 0.6859 data: 0.0007 max mem: 57114 Epoch: [50] [150/156] eta: 0:00:04 lr: 0.007761 min_lr: 0.007761 loss: 3.7217 (3.6016) weight_decay: 0.0500 (0.0500) time: 0.6814 data: 0.0001 max mem: 57114 Epoch: [50] [155/156] eta: 0:00:00 lr: 0.007761 min_lr: 0.007761 loss: 3.6315 (3.5819) weight_decay: 0.0500 (0.0500) time: 0.6816 data: 0.0001 max mem: 57114 Epoch: [50] Total time: 0:01:56 (0.7474 s / it) Averaged stats: lr: 0.007761 min_lr: 0.007761 loss: 3.6315 (3.5365) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0245 (1.0245) acc1: 79.1667 (79.1667) acc5: 94.7917 (94.7917) time: 2.1677 data: 1.9117 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1999 (1.2385) acc1: 78.1250 (72.6343) acc5: 94.7917 (91.3044) time: 0.5842 data: 0.3824 max mem: 57114 Test: Total time: 0:00:03 (0.6098 s / it) * Acc@1 71.735 Acc@5 91.194 loss 1.316 Accuracy of the model on the 50000 test images: 71.7% Max accuracy: 71.74% Test: [0/5] eta: 0:00:11 loss: 7.1426 (7.1426) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 2.3284 data: 2.0848 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9919 (7.0208) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.6163 data: 0.4170 max mem: 57114 Test: Total time: 0:00:03 (0.6263 s / it) * Acc@1 0.100 Acc@5 0.504 loss 7.012 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [51] [ 0/156] eta: 0:13:34 lr: 0.007761 min_lr: 0.007761 loss: 3.7779 (3.7779) weight_decay: 0.0500 (0.0500) time: 5.2190 data: 3.5217 max mem: 57114 Epoch: [51] [ 10/156] eta: 0:02:48 lr: 0.007760 min_lr: 0.007760 loss: 3.6214 (3.4166) weight_decay: 0.0500 (0.0500) time: 1.1517 data: 0.3205 max mem: 57114 Epoch: [51] [ 20/156] eta: 0:02:08 lr: 0.007759 min_lr: 0.007759 loss: 3.5170 (3.4408) weight_decay: 0.0500 (0.0500) time: 0.7292 data: 0.0003 max mem: 57114 Epoch: [51] [ 30/156] eta: 0:01:49 lr: 0.007758 min_lr: 0.007758 loss: 3.4948 (3.4471) weight_decay: 0.0500 (0.0500) time: 0.7197 data: 0.0003 max mem: 57114 Epoch: [51] [ 40/156] eta: 0:01:36 lr: 0.007757 min_lr: 0.007757 loss: 3.4948 (3.4561) weight_decay: 0.0500 (0.0500) time: 0.7225 data: 0.0003 max mem: 57114 Epoch: [51] [ 50/156] eta: 0:01:25 lr: 0.007756 min_lr: 0.007756 loss: 3.6575 (3.4868) weight_decay: 0.0500 (0.0500) time: 0.7154 data: 0.0004 max mem: 57114 Epoch: [51] [ 60/156] eta: 0:01:16 lr: 0.007755 min_lr: 0.007755 loss: 3.4215 (3.4651) weight_decay: 0.0500 (0.0500) time: 0.7157 data: 0.0004 max mem: 57114 Epoch: [51] [ 70/156] eta: 0:01:07 lr: 0.007754 min_lr: 0.007754 loss: 3.3497 (3.4331) weight_decay: 0.0500 (0.0500) time: 0.7236 data: 0.0004 max mem: 57114 Epoch: [51] [ 80/156] eta: 0:00:58 lr: 0.007753 min_lr: 0.007753 loss: 3.5868 (3.4571) weight_decay: 0.0500 (0.0500) time: 0.7142 data: 0.0003 max mem: 57114 Epoch: [51] [ 90/156] eta: 0:00:50 lr: 0.007752 min_lr: 0.007752 loss: 3.6269 (3.4564) weight_decay: 0.0500 (0.0500) time: 0.7019 data: 0.0003 max mem: 57114 Epoch: [51] [100/156] eta: 0:00:42 lr: 0.007751 min_lr: 0.007751 loss: 3.3410 (3.4223) weight_decay: 0.0500 (0.0500) time: 0.7019 data: 0.0004 max mem: 57114 Epoch: [51] [110/156] eta: 0:00:34 lr: 0.007750 min_lr: 0.007750 loss: 3.5691 (3.4359) weight_decay: 0.0500 (0.0500) time: 0.7116 data: 0.0004 max mem: 57114 Epoch: [51] [120/156] eta: 0:00:27 lr: 0.007749 min_lr: 0.007749 loss: 3.8314 (3.4646) weight_decay: 0.0500 (0.0500) time: 0.7136 data: 0.0004 max mem: 57114 Epoch: [51] [130/156] eta: 0:00:19 lr: 0.007748 min_lr: 0.007748 loss: 3.7877 (3.4668) weight_decay: 0.0500 (0.0500) time: 0.7037 data: 0.0008 max mem: 57114 Epoch: [51] [140/156] eta: 0:00:11 lr: 0.007747 min_lr: 0.007747 loss: 3.7364 (3.4904) weight_decay: 0.0500 (0.0500) time: 0.6920 data: 0.0007 max mem: 57114 Epoch: [51] [150/156] eta: 0:00:04 lr: 0.007746 min_lr: 0.007746 loss: 3.6250 (3.4768) weight_decay: 0.0500 (0.0500) time: 0.6811 data: 0.0001 max mem: 57114 Epoch: [51] [155/156] eta: 0:00:00 lr: 0.007745 min_lr: 0.007745 loss: 3.5607 (3.4768) weight_decay: 0.0500 (0.0500) time: 0.6830 data: 0.0001 max mem: 57114 Epoch: [51] Total time: 0:01:55 (0.7410 s / it) Averaged stats: lr: 0.007745 min_lr: 0.007745 loss: 3.5607 (3.5392) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0980 (1.0980) acc1: 76.5625 (76.5625) acc5: 96.3542 (96.3542) time: 2.0932 data: 1.8370 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2148 (1.2129) acc1: 73.4375 (72.3785) acc5: 96.3542 (92.8389) time: 0.5693 data: 0.3675 max mem: 57114 Test: Total time: 0:00:02 (0.5944 s / it) * Acc@1 72.235 Acc@5 91.097 loss 1.310 Accuracy of the model on the 50000 test images: 72.2% Max accuracy: 72.23% Test: [0/5] eta: 0:00:09 loss: 7.1607 (7.1607) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 1.9825 data: 1.7390 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9991 (7.0307) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.5471 data: 0.3479 max mem: 57114 Test: Total time: 0:00:02 (0.5587 s / it) * Acc@1 0.100 Acc@5 0.508 loss 7.022 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [52] [ 0/156] eta: 0:13:03 lr: 0.007745 min_lr: 0.007745 loss: 3.9533 (3.9533) weight_decay: 0.0500 (0.0500) time: 5.0196 data: 3.8772 max mem: 57114 Epoch: [52] [ 10/156] eta: 0:02:49 lr: 0.007744 min_lr: 0.007744 loss: 3.5453 (3.4957) weight_decay: 0.0500 (0.0500) time: 1.1640 data: 0.3527 max mem: 57114 Epoch: [52] [ 20/156] eta: 0:02:09 lr: 0.007743 min_lr: 0.007743 loss: 3.4076 (3.4864) weight_decay: 0.0500 (0.0500) time: 0.7496 data: 0.0003 max mem: 57114 Epoch: [52] [ 30/156] eta: 0:01:51 lr: 0.007742 min_lr: 0.007742 loss: 3.3618 (3.4660) weight_decay: 0.0500 (0.0500) time: 0.7345 data: 0.0003 max mem: 57114 Epoch: [52] [ 40/156] eta: 0:01:38 lr: 0.007741 min_lr: 0.007741 loss: 3.3675 (3.4391) weight_decay: 0.0500 (0.0500) time: 0.7375 data: 0.0004 max mem: 57114 Epoch: [52] [ 50/156] eta: 0:01:27 lr: 0.007740 min_lr: 0.007740 loss: 3.4022 (3.4283) weight_decay: 0.0500 (0.0500) time: 0.7376 data: 0.0004 max mem: 57114 Epoch: [52] [ 60/156] eta: 0:01:17 lr: 0.007739 min_lr: 0.007739 loss: 3.4897 (3.4512) weight_decay: 0.0500 (0.0500) time: 0.7359 data: 0.0004 max mem: 57114 Epoch: [52] [ 70/156] eta: 0:01:08 lr: 0.007738 min_lr: 0.007738 loss: 3.6668 (3.4705) weight_decay: 0.0500 (0.0500) time: 0.7169 data: 0.0003 max mem: 57114 Epoch: [52] [ 80/156] eta: 0:00:59 lr: 0.007737 min_lr: 0.007737 loss: 3.7344 (3.4810) weight_decay: 0.0500 (0.0500) time: 0.7107 data: 0.0003 max mem: 57114 Epoch: [52] [ 90/156] eta: 0:00:51 lr: 0.007736 min_lr: 0.007736 loss: 3.6370 (3.4849) weight_decay: 0.0500 (0.0500) time: 0.7069 data: 0.0004 max mem: 57114 Epoch: [52] [100/156] eta: 0:00:43 lr: 0.007735 min_lr: 0.007735 loss: 3.5115 (3.4943) weight_decay: 0.0500 (0.0500) time: 0.7088 data: 0.0004 max mem: 57114 Epoch: [52] [110/156] eta: 0:00:35 lr: 0.007734 min_lr: 0.007734 loss: 3.5115 (3.4883) weight_decay: 0.0500 (0.0500) time: 0.7089 data: 0.0004 max mem: 57114 Epoch: [52] [120/156] eta: 0:00:27 lr: 0.007733 min_lr: 0.007733 loss: 3.3252 (3.4691) weight_decay: 0.0500 (0.0500) time: 0.7055 data: 0.0004 max mem: 57114 Epoch: [52] [130/156] eta: 0:00:19 lr: 0.007732 min_lr: 0.007732 loss: 3.3252 (3.4668) weight_decay: 0.0500 (0.0500) time: 0.7063 data: 0.0009 max mem: 57114 Epoch: [52] [140/156] eta: 0:00:12 lr: 0.007731 min_lr: 0.007731 loss: 3.7761 (3.4726) weight_decay: 0.0500 (0.0500) time: 0.6974 data: 0.0007 max mem: 57114 Epoch: [52] [150/156] eta: 0:00:04 lr: 0.007730 min_lr: 0.007730 loss: 3.7741 (3.4851) weight_decay: 0.0500 (0.0500) time: 0.6829 data: 0.0001 max mem: 57114 Epoch: [52] [155/156] eta: 0:00:00 lr: 0.007729 min_lr: 0.007729 loss: 3.7800 (3.5007) weight_decay: 0.0500 (0.0500) time: 0.6830 data: 0.0001 max mem: 57114 Epoch: [52] Total time: 0:01:56 (0.7464 s / it) Averaged stats: lr: 0.007729 min_lr: 0.007729 loss: 3.7800 (3.5188) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.1849 (1.1849) acc1: 79.1667 (79.1667) acc5: 95.3125 (95.3125) time: 2.0518 data: 1.7962 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.3330 (1.3451) acc1: 75.0000 (73.4015) acc5: 93.2292 (91.0486) time: 0.5611 data: 0.3593 max mem: 57114 Test: Total time: 0:00:02 (0.5828 s / it) * Acc@1 72.051 Acc@5 91.073 loss 1.424 Accuracy of the model on the 50000 test images: 72.1% Max accuracy: 72.23% Test: [0/5] eta: 0:00:11 loss: 7.1776 (7.1776) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 2.2536 data: 2.0102 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.0003 (7.0398) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.6013 data: 0.4021 max mem: 57114 Test: Total time: 0:00:03 (0.6146 s / it) * Acc@1 0.100 Acc@5 0.508 loss 7.031 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [53] [ 0/156] eta: 0:13:03 lr: 0.007729 min_lr: 0.007729 loss: 3.8379 (3.8379) weight_decay: 0.0500 (0.0500) time: 5.0214 data: 3.0864 max mem: 57114 Epoch: [53] [ 10/156] eta: 0:02:45 lr: 0.007728 min_lr: 0.007728 loss: 3.6787 (3.6091) weight_decay: 0.0500 (0.0500) time: 1.1320 data: 0.2811 max mem: 57114 Epoch: [53] [ 20/156] eta: 0:02:06 lr: 0.007727 min_lr: 0.007727 loss: 3.6396 (3.5858) weight_decay: 0.0500 (0.0500) time: 0.7268 data: 0.0004 max mem: 57114 Epoch: [53] [ 30/156] eta: 0:01:49 lr: 0.007726 min_lr: 0.007726 loss: 3.5191 (3.5297) weight_decay: 0.0500 (0.0500) time: 0.7184 data: 0.0003 max mem: 57114 Epoch: [53] [ 40/156] eta: 0:01:36 lr: 0.007725 min_lr: 0.007725 loss: 3.3988 (3.4916) weight_decay: 0.0500 (0.0500) time: 0.7286 data: 0.0003 max mem: 57114 Epoch: [53] [ 50/156] eta: 0:01:25 lr: 0.007724 min_lr: 0.007724 loss: 3.3988 (3.4732) weight_decay: 0.0500 (0.0500) time: 0.7252 data: 0.0004 max mem: 57114 Epoch: [53] [ 60/156] eta: 0:01:16 lr: 0.007723 min_lr: 0.007723 loss: 3.6269 (3.5007) weight_decay: 0.0500 (0.0500) time: 0.7197 data: 0.0004 max mem: 57114 Epoch: [53] [ 70/156] eta: 0:01:07 lr: 0.007722 min_lr: 0.007722 loss: 3.7664 (3.5117) weight_decay: 0.0500 (0.0500) time: 0.7128 data: 0.0004 max mem: 57114 Epoch: [53] [ 80/156] eta: 0:00:58 lr: 0.007721 min_lr: 0.007721 loss: 3.7013 (3.4798) weight_decay: 0.0500 (0.0500) time: 0.7046 data: 0.0003 max mem: 57114 Epoch: [53] [ 90/156] eta: 0:00:50 lr: 0.007720 min_lr: 0.007720 loss: 3.4244 (3.4984) weight_decay: 0.0500 (0.0500) time: 0.7060 data: 0.0004 max mem: 57114 Epoch: [53] [100/156] eta: 0:00:42 lr: 0.007718 min_lr: 0.007718 loss: 3.7287 (3.4971) weight_decay: 0.0500 (0.0500) time: 0.7062 data: 0.0003 max mem: 57114 Epoch: [53] [110/156] eta: 0:00:34 lr: 0.007717 min_lr: 0.007717 loss: 3.5550 (3.5145) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0004 max mem: 57114 Epoch: [53] [120/156] eta: 0:00:27 lr: 0.007716 min_lr: 0.007716 loss: 3.5550 (3.5267) weight_decay: 0.0500 (0.0500) time: 0.7089 data: 0.0004 max mem: 57114 Epoch: [53] [130/156] eta: 0:00:19 lr: 0.007715 min_lr: 0.007715 loss: 3.6271 (3.5271) weight_decay: 0.0500 (0.0500) time: 0.7050 data: 0.0008 max mem: 57114 Epoch: [53] [140/156] eta: 0:00:11 lr: 0.007714 min_lr: 0.007714 loss: 3.6611 (3.5360) weight_decay: 0.0500 (0.0500) time: 0.6929 data: 0.0007 max mem: 57114 Epoch: [53] [150/156] eta: 0:00:04 lr: 0.007713 min_lr: 0.007713 loss: 3.5675 (3.5188) weight_decay: 0.0500 (0.0500) time: 0.6841 data: 0.0001 max mem: 57114 Epoch: [53] [155/156] eta: 0:00:00 lr: 0.007713 min_lr: 0.007713 loss: 3.5675 (3.5205) weight_decay: 0.0500 (0.0500) time: 0.6895 data: 0.0001 max mem: 57114 Epoch: [53] Total time: 0:01:55 (0.7396 s / it) Averaged stats: lr: 0.007713 min_lr: 0.007713 loss: 3.5675 (3.5205) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0100 (1.0100) acc1: 77.0833 (77.0833) acc5: 94.2708 (94.2708) time: 2.1529 data: 1.8970 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2150 (1.1545) acc1: 73.4375 (72.7622) acc5: 93.2292 (91.6880) time: 0.5813 data: 0.3795 max mem: 57114 Test: Total time: 0:00:03 (0.6052 s / it) * Acc@1 73.416 Acc@5 91.820 loss 1.223 Accuracy of the model on the 50000 test images: 73.4% Max accuracy: 73.42% Test: [0/5] eta: 0:00:09 loss: 7.1942 (7.1942) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 1.9569 data: 1.7135 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.0092 (7.0503) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.5420 data: 0.3428 max mem: 57114 Test: Total time: 0:00:02 (0.5515 s / it) * Acc@1 0.100 Acc@5 0.510 loss 7.041 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [54] [ 0/156] eta: 0:13:53 lr: 0.007712 min_lr: 0.007712 loss: 3.5323 (3.5323) weight_decay: 0.0500 (0.0500) time: 5.3444 data: 3.9844 max mem: 57114 Epoch: [54] [ 10/156] eta: 0:02:47 lr: 0.007711 min_lr: 0.007711 loss: 3.5323 (3.3232) weight_decay: 0.0500 (0.0500) time: 1.1477 data: 0.3625 max mem: 57114 Epoch: [54] [ 20/156] eta: 0:02:07 lr: 0.007710 min_lr: 0.007710 loss: 3.7126 (3.5257) weight_decay: 0.0500 (0.0500) time: 0.7190 data: 0.0004 max mem: 57114 Epoch: [54] [ 30/156] eta: 0:01:50 lr: 0.007709 min_lr: 0.007709 loss: 3.7359 (3.5583) weight_decay: 0.0500 (0.0500) time: 0.7284 data: 0.0003 max mem: 57114 Epoch: [54] [ 40/156] eta: 0:01:37 lr: 0.007708 min_lr: 0.007708 loss: 3.6262 (3.5398) weight_decay: 0.0500 (0.0500) time: 0.7377 data: 0.0003 max mem: 57114 Epoch: [54] [ 50/156] eta: 0:01:27 lr: 0.007707 min_lr: 0.007707 loss: 3.6262 (3.4954) weight_decay: 0.0500 (0.0500) time: 0.7401 data: 0.0004 max mem: 57114 Epoch: [54] [ 60/156] eta: 0:01:17 lr: 0.007706 min_lr: 0.007706 loss: 3.3456 (3.4752) weight_decay: 0.0500 (0.0500) time: 0.7388 data: 0.0004 max mem: 57114 Epoch: [54] [ 70/156] eta: 0:01:08 lr: 0.007705 min_lr: 0.007705 loss: 3.5471 (3.5198) weight_decay: 0.0500 (0.0500) time: 0.7207 data: 0.0004 max mem: 57114 Epoch: [54] [ 80/156] eta: 0:00:59 lr: 0.007704 min_lr: 0.007704 loss: 3.7100 (3.5361) weight_decay: 0.0500 (0.0500) time: 0.7170 data: 0.0004 max mem: 57114 Epoch: [54] [ 90/156] eta: 0:00:51 lr: 0.007703 min_lr: 0.007703 loss: 3.7015 (3.5434) weight_decay: 0.0500 (0.0500) time: 0.7140 data: 0.0004 max mem: 57114 Epoch: [54] [100/156] eta: 0:00:43 lr: 0.007702 min_lr: 0.007702 loss: 3.6382 (3.5383) weight_decay: 0.0500 (0.0500) time: 0.7078 data: 0.0006 max mem: 57114 Epoch: [54] [110/156] eta: 0:00:35 lr: 0.007701 min_lr: 0.007701 loss: 3.6382 (3.5443) weight_decay: 0.0500 (0.0500) time: 0.7062 data: 0.0006 max mem: 57114 Epoch: [54] [120/156] eta: 0:00:27 lr: 0.007700 min_lr: 0.007700 loss: 3.6984 (3.5555) weight_decay: 0.0500 (0.0500) time: 0.7037 data: 0.0003 max mem: 57114 Epoch: [54] [130/156] eta: 0:00:19 lr: 0.007698 min_lr: 0.007698 loss: 3.7329 (3.5670) weight_decay: 0.0500 (0.0500) time: 0.6983 data: 0.0009 max mem: 57114 Epoch: [54] [140/156] eta: 0:00:11 lr: 0.007697 min_lr: 0.007697 loss: 3.7329 (3.5723) weight_decay: 0.0500 (0.0500) time: 0.6932 data: 0.0007 max mem: 57114 Epoch: [54] [150/156] eta: 0:00:04 lr: 0.007696 min_lr: 0.007696 loss: 3.6960 (3.5696) weight_decay: 0.0500 (0.0500) time: 0.6842 data: 0.0001 max mem: 57114 Epoch: [54] [155/156] eta: 0:00:00 lr: 0.007696 min_lr: 0.007696 loss: 3.6609 (3.5606) weight_decay: 0.0500 (0.0500) time: 0.6765 data: 0.0001 max mem: 57114 Epoch: [54] Total time: 0:01:56 (0.7444 s / it) Averaged stats: lr: 0.007696 min_lr: 0.007696 loss: 3.6609 (3.5185) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9536 (0.9536) acc1: 79.6875 (79.6875) acc5: 95.8333 (95.8333) time: 2.1727 data: 1.9169 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1526 (1.1209) acc1: 72.9167 (73.0179) acc5: 93.7500 (91.6880) time: 0.5852 data: 0.3835 max mem: 57114 Test: Total time: 0:00:03 (0.6082 s / it) * Acc@1 72.159 Acc@5 91.242 loss 1.255 Accuracy of the model on the 50000 test images: 72.2% Max accuracy: 73.42% Test: [0/5] eta: 0:00:11 loss: 7.2119 (7.2119) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 2.3523 data: 2.1088 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.0198 (7.0619) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.6212 data: 0.4219 max mem: 57114 Test: Total time: 0:00:03 (0.6563 s / it) * Acc@1 0.100 Acc@5 0.516 loss 7.053 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [55] [ 0/156] eta: 0:16:46 lr: 0.007696 min_lr: 0.007696 loss: 3.8874 (3.8874) weight_decay: 0.0500 (0.0500) time: 6.4550 data: 5.1667 max mem: 57114 Epoch: [55] [ 10/156] eta: 0:03:08 lr: 0.007694 min_lr: 0.007694 loss: 3.8099 (3.4303) weight_decay: 0.0500 (0.0500) time: 1.2928 data: 0.4701 max mem: 57114 Epoch: [55] [ 20/156] eta: 0:02:18 lr: 0.007693 min_lr: 0.007693 loss: 3.6382 (3.5367) weight_decay: 0.0500 (0.0500) time: 0.7465 data: 0.0004 max mem: 57114 Epoch: [55] [ 30/156] eta: 0:01:56 lr: 0.007692 min_lr: 0.007692 loss: 3.6673 (3.5795) weight_decay: 0.0500 (0.0500) time: 0.7206 data: 0.0004 max mem: 57114 Epoch: [55] [ 40/156] eta: 0:01:41 lr: 0.007691 min_lr: 0.007691 loss: 3.6673 (3.6020) weight_decay: 0.0500 (0.0500) time: 0.7230 data: 0.0004 max mem: 57114 Epoch: [55] [ 50/156] eta: 0:01:29 lr: 0.007690 min_lr: 0.007690 loss: 3.6614 (3.5937) weight_decay: 0.0500 (0.0500) time: 0.7221 data: 0.0003 max mem: 57114 Epoch: [55] [ 60/156] eta: 0:01:19 lr: 0.007689 min_lr: 0.007689 loss: 3.4770 (3.5678) weight_decay: 0.0500 (0.0500) time: 0.7236 data: 0.0003 max mem: 57114 Epoch: [55] [ 70/156] eta: 0:01:09 lr: 0.007688 min_lr: 0.007688 loss: 3.6285 (3.5706) weight_decay: 0.0500 (0.0500) time: 0.7214 data: 0.0004 max mem: 57114 Epoch: [55] [ 80/156] eta: 0:01:00 lr: 0.007687 min_lr: 0.007687 loss: 3.6285 (3.5289) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [55] [ 90/156] eta: 0:00:51 lr: 0.007686 min_lr: 0.007686 loss: 3.3940 (3.5283) weight_decay: 0.0500 (0.0500) time: 0.7020 data: 0.0004 max mem: 57114 Epoch: [55] [100/156] eta: 0:00:43 lr: 0.007684 min_lr: 0.007684 loss: 3.5662 (3.5185) weight_decay: 0.0500 (0.0500) time: 0.7062 data: 0.0004 max mem: 57114 Epoch: [55] [110/156] eta: 0:00:35 lr: 0.007683 min_lr: 0.007683 loss: 3.4568 (3.5043) weight_decay: 0.0500 (0.0500) time: 0.7096 data: 0.0004 max mem: 57114 Epoch: [55] [120/156] eta: 0:00:27 lr: 0.007682 min_lr: 0.007682 loss: 3.4568 (3.4971) weight_decay: 0.0500 (0.0500) time: 0.7055 data: 0.0004 max mem: 57114 Epoch: [55] [130/156] eta: 0:00:19 lr: 0.007681 min_lr: 0.007681 loss: 3.6941 (3.5044) weight_decay: 0.0500 (0.0500) time: 0.6969 data: 0.0008 max mem: 57114 Epoch: [55] [140/156] eta: 0:00:12 lr: 0.007680 min_lr: 0.007680 loss: 3.5527 (3.5089) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.0007 max mem: 57114 Epoch: [55] [150/156] eta: 0:00:04 lr: 0.007679 min_lr: 0.007679 loss: 3.5527 (3.5106) weight_decay: 0.0500 (0.0500) time: 0.6824 data: 0.0001 max mem: 57114 Epoch: [55] [155/156] eta: 0:00:00 lr: 0.007678 min_lr: 0.007678 loss: 3.7050 (3.5128) weight_decay: 0.0500 (0.0500) time: 0.6840 data: 0.0001 max mem: 57114 Epoch: [55] Total time: 0:01:57 (0.7505 s / it) Averaged stats: lr: 0.007678 min_lr: 0.007678 loss: 3.7050 (3.5055) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.1624 (1.1624) acc1: 81.7708 (81.7708) acc5: 95.3125 (95.3125) time: 2.0821 data: 1.8264 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.3949 (1.3190) acc1: 73.9583 (72.5064) acc5: 92.1875 (90.7928) time: 0.5671 data: 0.3653 max mem: 57114 Test: Total time: 0:00:02 (0.5908 s / it) * Acc@1 71.555 Acc@5 90.945 loss 1.415 Accuracy of the model on the 50000 test images: 71.6% Max accuracy: 73.42% Test: [0/5] eta: 0:00:11 loss: 7.2335 (7.2335) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 2.3985 data: 2.1550 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.0320 (7.0751) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.5115) time: 0.6304 data: 0.4311 max mem: 57114 Test: Total time: 0:00:03 (0.6430 s / it) * Acc@1 0.100 Acc@5 0.525 loss 7.066 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [56] [ 0/156] eta: 0:13:45 lr: 0.007678 min_lr: 0.007678 loss: 4.2096 (4.2096) weight_decay: 0.0500 (0.0500) time: 5.2892 data: 4.1597 max mem: 57114 Epoch: [56] [ 10/156] eta: 0:02:48 lr: 0.007677 min_lr: 0.007677 loss: 3.8185 (3.7317) weight_decay: 0.0500 (0.0500) time: 1.1542 data: 0.3785 max mem: 57114 Epoch: [56] [ 20/156] eta: 0:02:09 lr: 0.007676 min_lr: 0.007676 loss: 3.7670 (3.6989) weight_decay: 0.0500 (0.0500) time: 0.7375 data: 0.0003 max mem: 57114 Epoch: [56] [ 30/156] eta: 0:01:49 lr: 0.007675 min_lr: 0.007675 loss: 3.6170 (3.6762) weight_decay: 0.0500 (0.0500) time: 0.7176 data: 0.0003 max mem: 57114 Epoch: [56] [ 40/156] eta: 0:01:37 lr: 0.007674 min_lr: 0.007674 loss: 3.5612 (3.6508) weight_decay: 0.0500 (0.0500) time: 0.7147 data: 0.0004 max mem: 57114 Epoch: [56] [ 50/156] eta: 0:01:26 lr: 0.007672 min_lr: 0.007672 loss: 3.6405 (3.6273) weight_decay: 0.0500 (0.0500) time: 0.7194 data: 0.0004 max mem: 57114 Epoch: [56] [ 60/156] eta: 0:01:16 lr: 0.007671 min_lr: 0.007671 loss: 3.6405 (3.6171) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0004 max mem: 57114 Epoch: [56] [ 70/156] eta: 0:01:07 lr: 0.007670 min_lr: 0.007670 loss: 3.4231 (3.5719) weight_decay: 0.0500 (0.0500) time: 0.7171 data: 0.0004 max mem: 57114 Epoch: [56] [ 80/156] eta: 0:00:58 lr: 0.007669 min_lr: 0.007669 loss: 3.4231 (3.5601) weight_decay: 0.0500 (0.0500) time: 0.7137 data: 0.0004 max mem: 57114 Epoch: [56] [ 90/156] eta: 0:00:50 lr: 0.007668 min_lr: 0.007668 loss: 3.4820 (3.5559) weight_decay: 0.0500 (0.0500) time: 0.7024 data: 0.0004 max mem: 57114 Epoch: [56] [100/156] eta: 0:00:42 lr: 0.007667 min_lr: 0.007667 loss: 3.5966 (3.5510) weight_decay: 0.0500 (0.0500) time: 0.7077 data: 0.0004 max mem: 57114 Epoch: [56] [110/156] eta: 0:00:34 lr: 0.007666 min_lr: 0.007666 loss: 3.6356 (3.5597) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.0004 max mem: 57114 Epoch: [56] [120/156] eta: 0:00:27 lr: 0.007664 min_lr: 0.007664 loss: 3.5847 (3.5459) weight_decay: 0.0500 (0.0500) time: 0.7037 data: 0.0004 max mem: 57114 Epoch: [56] [130/156] eta: 0:00:19 lr: 0.007663 min_lr: 0.007663 loss: 3.4700 (3.5377) weight_decay: 0.0500 (0.0500) time: 0.7023 data: 0.0008 max mem: 57114 Epoch: [56] [140/156] eta: 0:00:11 lr: 0.007662 min_lr: 0.007662 loss: 3.8018 (3.5371) weight_decay: 0.0500 (0.0500) time: 0.6935 data: 0.0007 max mem: 57114 Epoch: [56] [150/156] eta: 0:00:04 lr: 0.007661 min_lr: 0.007661 loss: 3.7980 (3.5411) weight_decay: 0.0500 (0.0500) time: 0.6831 data: 0.0001 max mem: 57114 Epoch: [56] [155/156] eta: 0:00:00 lr: 0.007660 min_lr: 0.007660 loss: 3.5942 (3.5325) weight_decay: 0.0500 (0.0500) time: 0.6839 data: 0.0001 max mem: 57114 Epoch: [56] Total time: 0:01:55 (0.7403 s / it) Averaged stats: lr: 0.007660 min_lr: 0.007660 loss: 3.5942 (3.4893) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0405 (1.0405) acc1: 78.6458 (78.6458) acc5: 95.3125 (95.3125) time: 2.0991 data: 1.8434 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2091 (1.1908) acc1: 75.5208 (73.6573) acc5: 93.2292 (91.1765) time: 0.5705 data: 0.3688 max mem: 57114 Test: Total time: 0:00:02 (0.5945 s / it) * Acc@1 71.457 Acc@5 90.783 loss 1.337 Accuracy of the model on the 50000 test images: 71.5% Max accuracy: 73.42% Test: [0/5] eta: 0:00:11 loss: 7.2559 (7.2559) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 2.2787 data: 2.0352 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.0449 (7.0877) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.5115) time: 0.6064 data: 0.4071 max mem: 57114 Test: Total time: 0:00:03 (0.6202 s / it) * Acc@1 0.100 Acc@5 0.527 loss 7.078 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [57] [ 0/156] eta: 0:13:04 lr: 0.007660 min_lr: 0.007660 loss: 2.7292 (2.7292) weight_decay: 0.0500 (0.0500) time: 5.0319 data: 3.4790 max mem: 57114 Epoch: [57] [ 10/156] eta: 0:02:44 lr: 0.007659 min_lr: 0.007659 loss: 3.4683 (3.2837) weight_decay: 0.0500 (0.0500) time: 1.1250 data: 0.3166 max mem: 57114 Epoch: [57] [ 20/156] eta: 0:02:07 lr: 0.007658 min_lr: 0.007658 loss: 3.6138 (3.4387) weight_decay: 0.0500 (0.0500) time: 0.7323 data: 0.0003 max mem: 57114 Epoch: [57] [ 30/156] eta: 0:01:48 lr: 0.007657 min_lr: 0.007657 loss: 3.5899 (3.4703) weight_decay: 0.0500 (0.0500) time: 0.7184 data: 0.0003 max mem: 57114 Epoch: [57] [ 40/156] eta: 0:01:36 lr: 0.007656 min_lr: 0.007656 loss: 3.5899 (3.5136) weight_decay: 0.0500 (0.0500) time: 0.7209 data: 0.0004 max mem: 57114 Epoch: [57] [ 50/156] eta: 0:01:25 lr: 0.007654 min_lr: 0.007654 loss: 3.6538 (3.5122) weight_decay: 0.0500 (0.0500) time: 0.7243 data: 0.0005 max mem: 57114 Epoch: [57] [ 60/156] eta: 0:01:16 lr: 0.007653 min_lr: 0.007653 loss: 3.1860 (3.4792) weight_decay: 0.0500 (0.0500) time: 0.7158 data: 0.0005 max mem: 57114 Epoch: [57] [ 70/156] eta: 0:01:07 lr: 0.007652 min_lr: 0.007652 loss: 3.5683 (3.5230) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0004 max mem: 57114 Epoch: [57] [ 80/156] eta: 0:00:58 lr: 0.007651 min_lr: 0.007651 loss: 3.8468 (3.5582) weight_decay: 0.0500 (0.0500) time: 0.7071 data: 0.0004 max mem: 57114 Epoch: [57] [ 90/156] eta: 0:00:50 lr: 0.007650 min_lr: 0.007650 loss: 3.6798 (3.5200) weight_decay: 0.0500 (0.0500) time: 0.7075 data: 0.0004 max mem: 57114 Epoch: [57] [100/156] eta: 0:00:42 lr: 0.007649 min_lr: 0.007649 loss: 3.4855 (3.5251) weight_decay: 0.0500 (0.0500) time: 0.7057 data: 0.0004 max mem: 57114 Epoch: [57] [110/156] eta: 0:00:34 lr: 0.007647 min_lr: 0.007647 loss: 3.4371 (3.4842) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0004 max mem: 57114 Epoch: [57] [120/156] eta: 0:00:27 lr: 0.007646 min_lr: 0.007646 loss: 3.2043 (3.4612) weight_decay: 0.0500 (0.0500) time: 0.7056 data: 0.0004 max mem: 57114 Epoch: [57] [130/156] eta: 0:00:19 lr: 0.007645 min_lr: 0.007645 loss: 3.7246 (3.4873) weight_decay: 0.0500 (0.0500) time: 0.6965 data: 0.0009 max mem: 57114 Epoch: [57] [140/156] eta: 0:00:11 lr: 0.007644 min_lr: 0.007644 loss: 3.7803 (3.4893) weight_decay: 0.0500 (0.0500) time: 0.6859 data: 0.0007 max mem: 57114 Epoch: [57] [150/156] eta: 0:00:04 lr: 0.007643 min_lr: 0.007643 loss: 3.4048 (3.4848) weight_decay: 0.0500 (0.0500) time: 0.6797 data: 0.0001 max mem: 57114 Epoch: [57] [155/156] eta: 0:00:00 lr: 0.007642 min_lr: 0.007642 loss: 3.3662 (3.4739) weight_decay: 0.0500 (0.0500) time: 0.6792 data: 0.0001 max mem: 57114 Epoch: [57] Total time: 0:01:55 (0.7375 s / it) Averaged stats: lr: 0.007642 min_lr: 0.007642 loss: 3.3662 (3.4826) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8720 (0.8720) acc1: 79.1667 (79.1667) acc5: 96.8750 (96.8750) time: 2.0528 data: 1.7975 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0847 (1.0073) acc1: 76.0417 (75.1918) acc5: 93.7500 (92.5831) time: 0.5612 data: 0.3596 max mem: 57114 Test: Total time: 0:00:02 (0.5984 s / it) * Acc@1 73.108 Acc@5 91.694 loss 1.121 Accuracy of the model on the 50000 test images: 73.1% Max accuracy: 73.42% Test: [0/5] eta: 0:00:11 loss: 7.2810 (7.2810) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 2.3510 data: 2.1075 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.0594 (7.1023) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.5115) time: 0.6208 data: 0.4216 max mem: 57114 Test: Total time: 0:00:03 (0.6374 s / it) * Acc@1 0.100 Acc@5 0.527 loss 7.092 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [58] [ 0/156] eta: 0:12:44 lr: 0.007642 min_lr: 0.007642 loss: 2.8505 (2.8505) weight_decay: 0.0500 (0.0500) time: 4.8988 data: 3.6749 max mem: 57114 Epoch: [58] [ 10/156] eta: 0:02:45 lr: 0.007641 min_lr: 0.007641 loss: 3.4130 (3.3069) weight_decay: 0.0500 (0.0500) time: 1.1362 data: 0.3344 max mem: 57114 Epoch: [58] [ 20/156] eta: 0:02:07 lr: 0.007640 min_lr: 0.007640 loss: 3.4341 (3.3099) weight_decay: 0.0500 (0.0500) time: 0.7405 data: 0.0003 max mem: 57114 Epoch: [58] [ 30/156] eta: 0:01:49 lr: 0.007638 min_lr: 0.007638 loss: 3.4704 (3.3767) weight_decay: 0.0500 (0.0500) time: 0.7275 data: 0.0003 max mem: 57114 Epoch: [58] [ 40/156] eta: 0:01:36 lr: 0.007637 min_lr: 0.007637 loss: 3.5553 (3.4066) weight_decay: 0.0500 (0.0500) time: 0.7253 data: 0.0003 max mem: 57114 Epoch: [58] [ 50/156] eta: 0:01:25 lr: 0.007636 min_lr: 0.007636 loss: 3.4882 (3.4224) weight_decay: 0.0500 (0.0500) time: 0.7073 data: 0.0004 max mem: 57114 Epoch: [58] [ 60/156] eta: 0:01:16 lr: 0.007635 min_lr: 0.007635 loss: 3.6109 (3.4823) weight_decay: 0.0500 (0.0500) time: 0.7177 data: 0.0004 max mem: 57114 Epoch: [58] [ 70/156] eta: 0:01:07 lr: 0.007634 min_lr: 0.007634 loss: 3.5749 (3.4509) weight_decay: 0.0500 (0.0500) time: 0.7258 data: 0.0004 max mem: 57114 Epoch: [58] [ 80/156] eta: 0:00:58 lr: 0.007632 min_lr: 0.007632 loss: 3.3519 (3.4490) weight_decay: 0.0500 (0.0500) time: 0.7072 data: 0.0004 max mem: 57114 Epoch: [58] [ 90/156] eta: 0:00:50 lr: 0.007631 min_lr: 0.007631 loss: 3.3687 (3.4415) weight_decay: 0.0500 (0.0500) time: 0.7005 data: 0.0004 max mem: 57114 Epoch: [58] [100/156] eta: 0:00:42 lr: 0.007630 min_lr: 0.007630 loss: 3.5949 (3.4419) weight_decay: 0.0500 (0.0500) time: 0.7029 data: 0.0004 max mem: 57114 Epoch: [58] [110/156] eta: 0:00:34 lr: 0.007629 min_lr: 0.007629 loss: 3.3722 (3.4312) weight_decay: 0.0500 (0.0500) time: 0.7102 data: 0.0004 max mem: 57114 Epoch: [58] [120/156] eta: 0:00:27 lr: 0.007628 min_lr: 0.007628 loss: 3.3722 (3.4450) weight_decay: 0.0500 (0.0500) time: 0.7281 data: 0.0004 max mem: 57114 Epoch: [58] [130/156] eta: 0:00:19 lr: 0.007626 min_lr: 0.007626 loss: 3.4744 (3.4262) weight_decay: 0.0500 (0.0500) time: 0.7312 data: 0.0010 max mem: 57114 Epoch: [58] [140/156] eta: 0:00:11 lr: 0.007625 min_lr: 0.007625 loss: 3.3738 (3.4282) weight_decay: 0.0500 (0.0500) time: 0.7014 data: 0.0008 max mem: 57114 Epoch: [58] [150/156] eta: 0:00:04 lr: 0.007624 min_lr: 0.007624 loss: 3.4512 (3.4331) weight_decay: 0.0500 (0.0500) time: 0.6847 data: 0.0001 max mem: 57114 Epoch: [58] [155/156] eta: 0:00:00 lr: 0.007623 min_lr: 0.007623 loss: 3.4512 (3.4306) weight_decay: 0.0500 (0.0500) time: 0.6796 data: 0.0001 max mem: 57114 Epoch: [58] Total time: 0:01:55 (0.7430 s / it) Averaged stats: lr: 0.007623 min_lr: 0.007623 loss: 3.4512 (3.4823) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.3089 (1.3089) acc1: 78.6458 (78.6458) acc5: 93.7500 (93.7500) time: 2.0679 data: 1.8121 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.3878 (1.4330) acc1: 73.9583 (70.7161) acc5: 91.1458 (89.7698) time: 0.5642 data: 0.3625 max mem: 57114 Test: Total time: 0:00:02 (0.5870 s / it) * Acc@1 69.377 Acc@5 89.314 loss 1.484 Accuracy of the model on the 50000 test images: 69.4% Max accuracy: 73.42% Test: [0/5] eta: 0:00:11 loss: 7.3049 (7.3049) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 2.3106 data: 2.0671 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.0749 (7.1174) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.5115) time: 0.6127 data: 0.4135 max mem: 57114 Test: Total time: 0:00:03 (0.6285 s / it) * Acc@1 0.100 Acc@5 0.535 loss 7.106 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [59] [ 0/156] eta: 0:12:53 lr: 0.007623 min_lr: 0.007623 loss: 3.7330 (3.7330) weight_decay: 0.0500 (0.0500) time: 4.9558 data: 3.6774 max mem: 57114 Epoch: [59] [ 10/156] eta: 0:02:44 lr: 0.007622 min_lr: 0.007622 loss: 3.8479 (3.5451) weight_decay: 0.0500 (0.0500) time: 1.1275 data: 0.3346 max mem: 57114 Epoch: [59] [ 20/156] eta: 0:02:07 lr: 0.007621 min_lr: 0.007621 loss: 3.8362 (3.6544) weight_decay: 0.0500 (0.0500) time: 0.7354 data: 0.0003 max mem: 57114 Epoch: [59] [ 30/156] eta: 0:01:49 lr: 0.007620 min_lr: 0.007620 loss: 3.4887 (3.5440) weight_decay: 0.0500 (0.0500) time: 0.7319 data: 0.0004 max mem: 57114 Epoch: [59] [ 40/156] eta: 0:01:37 lr: 0.007618 min_lr: 0.007618 loss: 3.3003 (3.4926) weight_decay: 0.0500 (0.0500) time: 0.7326 data: 0.0004 max mem: 57114 Epoch: [59] [ 50/156] eta: 0:01:25 lr: 0.007617 min_lr: 0.007617 loss: 3.4792 (3.5045) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [59] [ 60/156] eta: 0:01:16 lr: 0.007616 min_lr: 0.007616 loss: 3.7432 (3.4953) weight_decay: 0.0500 (0.0500) time: 0.7097 data: 0.0004 max mem: 57114 Epoch: [59] [ 70/156] eta: 0:01:07 lr: 0.007615 min_lr: 0.007615 loss: 3.5582 (3.4916) weight_decay: 0.0500 (0.0500) time: 0.7144 data: 0.0004 max mem: 57114 Epoch: [59] [ 80/156] eta: 0:00:59 lr: 0.007613 min_lr: 0.007613 loss: 3.5936 (3.4959) weight_decay: 0.0500 (0.0500) time: 0.7243 data: 0.0004 max mem: 57114 Epoch: [59] [ 90/156] eta: 0:00:50 lr: 0.007612 min_lr: 0.007612 loss: 3.7791 (3.5023) weight_decay: 0.0500 (0.0500) time: 0.7334 data: 0.0005 max mem: 57114 Epoch: [59] [100/156] eta: 0:00:43 lr: 0.007611 min_lr: 0.007611 loss: 3.6539 (3.4815) weight_decay: 0.0500 (0.0500) time: 0.7412 data: 0.0004 max mem: 57114 Epoch: [59] [110/156] eta: 0:00:35 lr: 0.007610 min_lr: 0.007610 loss: 3.3252 (3.4771) weight_decay: 0.0500 (0.0500) time: 0.7442 data: 0.0004 max mem: 57114 Epoch: [59] [120/156] eta: 0:00:27 lr: 0.007608 min_lr: 0.007608 loss: 3.4706 (3.4768) weight_decay: 0.0500 (0.0500) time: 0.7325 data: 0.0004 max mem: 57114 Epoch: [59] [130/156] eta: 0:00:19 lr: 0.007607 min_lr: 0.007607 loss: 3.3815 (3.4530) weight_decay: 0.0500 (0.0500) time: 0.7229 data: 0.0009 max mem: 57114 Epoch: [59] [140/156] eta: 0:00:12 lr: 0.007606 min_lr: 0.007606 loss: 3.1326 (3.4448) weight_decay: 0.0500 (0.0500) time: 0.7173 data: 0.0007 max mem: 57114 Epoch: [59] [150/156] eta: 0:00:04 lr: 0.007605 min_lr: 0.007605 loss: 3.5765 (3.4461) weight_decay: 0.0500 (0.0500) time: 0.7165 data: 0.0001 max mem: 57114 Epoch: [59] [155/156] eta: 0:00:00 lr: 0.007604 min_lr: 0.007604 loss: 3.5765 (3.4547) weight_decay: 0.0500 (0.0500) time: 0.7164 data: 0.0001 max mem: 57114 Epoch: [59] Total time: 0:01:57 (0.7558 s / it) Averaged stats: lr: 0.007604 min_lr: 0.007604 loss: 3.5765 (3.4714) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:15 loss: 1.1505 (1.1505) acc1: 80.2083 (80.2083) acc5: 95.3125 (95.3125) time: 3.0447 data: 2.7854 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2777 (1.2145) acc1: 71.3542 (71.4834) acc5: 94.7917 (91.6880) time: 0.7615 data: 0.5572 max mem: 57114 Test: Total time: 0:00:04 (0.8108 s / it) * Acc@1 71.825 Acc@5 91.202 loss 1.300 Accuracy of the model on the 50000 test images: 71.8% Max accuracy: 73.42% Test: [0/5] eta: 0:00:16 loss: 7.3262 (7.3262) acc1: 0.5208 (0.5208) acc5: 1.0417 (1.0417) time: 3.3372 data: 3.0938 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.0908 (7.1314) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.5115) time: 0.8181 data: 0.6189 max mem: 57114 Test: Total time: 0:00:04 (0.8406 s / it) * Acc@1 0.100 Acc@5 0.529 loss 7.120 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [60] [ 0/156] eta: 0:14:01 lr: 0.007604 min_lr: 0.007604 loss: 4.3764 (4.3764) weight_decay: 0.0500 (0.0500) time: 5.3932 data: 4.3267 max mem: 57114 Epoch: [60] [ 10/156] eta: 0:02:51 lr: 0.007603 min_lr: 0.007603 loss: 3.8104 (3.7247) weight_decay: 0.0500 (0.0500) time: 1.1749 data: 0.3936 max mem: 57114 Epoch: [60] [ 20/156] eta: 0:02:12 lr: 0.007601 min_lr: 0.007601 loss: 3.6127 (3.5432) weight_decay: 0.0500 (0.0500) time: 0.7535 data: 0.0004 max mem: 57114 Epoch: [60] [ 30/156] eta: 0:01:53 lr: 0.007600 min_lr: 0.007600 loss: 3.4359 (3.5370) weight_decay: 0.0500 (0.0500) time: 0.7527 data: 0.0007 max mem: 57114 Epoch: [60] [ 40/156] eta: 0:01:40 lr: 0.007599 min_lr: 0.007599 loss: 3.7351 (3.5424) weight_decay: 0.0500 (0.0500) time: 0.7517 data: 0.0007 max mem: 57114 Epoch: [60] [ 50/156] eta: 0:01:30 lr: 0.007598 min_lr: 0.007598 loss: 3.7471 (3.5655) weight_decay: 0.0500 (0.0500) time: 0.7757 data: 0.0004 max mem: 57114 Epoch: [60] [ 60/156] eta: 0:01:20 lr: 0.007596 min_lr: 0.007596 loss: 3.8245 (3.5919) weight_decay: 0.0500 (0.0500) time: 0.7891 data: 0.0004 max mem: 57114 Epoch: [60] [ 70/156] eta: 0:01:11 lr: 0.007595 min_lr: 0.007595 loss: 3.7669 (3.5942) weight_decay: 0.0500 (0.0500) time: 0.7683 data: 0.0006 max mem: 57114 Epoch: [60] [ 80/156] eta: 0:01:02 lr: 0.007594 min_lr: 0.007594 loss: 3.5981 (3.5846) weight_decay: 0.0500 (0.0500) time: 0.7660 data: 0.0006 max mem: 57114 Epoch: [60] [ 90/156] eta: 0:00:54 lr: 0.007593 min_lr: 0.007593 loss: 3.5995 (3.5774) weight_decay: 0.0500 (0.0500) time: 0.8024 data: 0.0004 max mem: 57114 Epoch: [60] [100/156] eta: 0:00:45 lr: 0.007591 min_lr: 0.007591 loss: 3.3381 (3.5473) weight_decay: 0.0500 (0.0500) time: 0.7891 data: 0.0004 max mem: 57114 Epoch: [60] [110/156] eta: 0:00:37 lr: 0.007590 min_lr: 0.007590 loss: 3.4493 (3.5526) weight_decay: 0.0500 (0.0500) time: 0.7426 data: 0.0004 max mem: 57114 Epoch: [60] [120/156] eta: 0:00:28 lr: 0.007589 min_lr: 0.007589 loss: 3.6022 (3.5595) weight_decay: 0.0500 (0.0500) time: 0.7208 data: 0.0004 max mem: 57114 Epoch: [60] [130/156] eta: 0:00:20 lr: 0.007588 min_lr: 0.007588 loss: 3.5330 (3.5645) weight_decay: 0.0500 (0.0500) time: 0.6983 data: 0.0009 max mem: 57114 Epoch: [60] [140/156] eta: 0:00:12 lr: 0.007586 min_lr: 0.007586 loss: 3.4426 (3.5615) weight_decay: 0.0500 (0.0500) time: 0.6855 data: 0.0007 max mem: 57114 Epoch: [60] [150/156] eta: 0:00:04 lr: 0.007585 min_lr: 0.007585 loss: 3.0824 (3.5164) weight_decay: 0.0500 (0.0500) time: 0.6812 data: 0.0001 max mem: 57114 Epoch: [60] [155/156] eta: 0:00:00 lr: 0.007584 min_lr: 0.007584 loss: 2.8628 (3.4963) weight_decay: 0.0500 (0.0500) time: 0.6873 data: 0.0001 max mem: 57114 Epoch: [60] Total time: 0:02:01 (0.7769 s / it) Averaged stats: lr: 0.007584 min_lr: 0.007584 loss: 2.8628 (3.4692) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8561 (0.8561) acc1: 79.1667 (79.1667) acc5: 96.8750 (96.8750) time: 2.0885 data: 1.8332 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0686 (1.0470) acc1: 76.0417 (74.6803) acc5: 94.2708 (91.8159) time: 0.5684 data: 0.3667 max mem: 57114 Test: Total time: 0:00:02 (0.5907 s / it) * Acc@1 73.422 Acc@5 91.886 loss 1.138 Accuracy of the model on the 50000 test images: 73.4% Max accuracy: 73.42% Test: [0/5] eta: 0:00:10 loss: 7.3463 (7.3463) acc1: 0.5208 (0.5208) acc5: 0.5208 (0.5208) time: 2.0588 data: 1.8156 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.1069 (7.1467) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.5623 data: 0.3632 max mem: 57114 Test: Total time: 0:00:02 (0.5734 s / it) * Acc@1 0.100 Acc@5 0.518 loss 7.135 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [61] [ 0/156] eta: 0:13:07 lr: 0.007584 min_lr: 0.007584 loss: 4.0122 (4.0122) weight_decay: 0.0500 (0.0500) time: 5.0476 data: 3.6372 max mem: 57114 Epoch: [61] [ 10/156] eta: 0:02:49 lr: 0.007583 min_lr: 0.007583 loss: 3.6508 (3.5175) weight_decay: 0.0500 (0.0500) time: 1.1636 data: 0.3310 max mem: 57114 Epoch: [61] [ 20/156] eta: 0:02:10 lr: 0.007582 min_lr: 0.007582 loss: 3.6128 (3.6000) weight_decay: 0.0500 (0.0500) time: 0.7565 data: 0.0003 max mem: 57114 Epoch: [61] [ 30/156] eta: 0:01:51 lr: 0.007580 min_lr: 0.007580 loss: 3.5233 (3.5489) weight_decay: 0.0500 (0.0500) time: 0.7293 data: 0.0003 max mem: 57114 Epoch: [61] [ 40/156] eta: 0:01:37 lr: 0.007579 min_lr: 0.007579 loss: 3.4238 (3.5258) weight_decay: 0.0500 (0.0500) time: 0.7222 data: 0.0004 max mem: 57114 Epoch: [61] [ 50/156] eta: 0:01:27 lr: 0.007578 min_lr: 0.007578 loss: 3.5470 (3.5180) weight_decay: 0.0500 (0.0500) time: 0.7253 data: 0.0004 max mem: 57114 Epoch: [61] [ 60/156] eta: 0:01:17 lr: 0.007577 min_lr: 0.007577 loss: 3.5212 (3.5071) weight_decay: 0.0500 (0.0500) time: 0.7203 data: 0.0004 max mem: 57114 Epoch: [61] [ 70/156] eta: 0:01:08 lr: 0.007575 min_lr: 0.007575 loss: 3.5857 (3.5034) weight_decay: 0.0500 (0.0500) time: 0.7244 data: 0.0004 max mem: 57114 Epoch: [61] [ 80/156] eta: 0:00:59 lr: 0.007574 min_lr: 0.007574 loss: 3.4190 (3.4629) weight_decay: 0.0500 (0.0500) time: 0.7208 data: 0.0004 max mem: 57114 Epoch: [61] [ 90/156] eta: 0:00:51 lr: 0.007573 min_lr: 0.007573 loss: 3.3506 (3.4644) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0004 max mem: 57114 Epoch: [61] [100/156] eta: 0:00:43 lr: 0.007571 min_lr: 0.007571 loss: 3.3977 (3.4543) weight_decay: 0.0500 (0.0500) time: 0.7069 data: 0.0004 max mem: 57114 Epoch: [61] [110/156] eta: 0:00:35 lr: 0.007570 min_lr: 0.007570 loss: 3.6111 (3.4824) weight_decay: 0.0500 (0.0500) time: 0.6988 data: 0.0004 max mem: 57114 Epoch: [61] [120/156] eta: 0:00:27 lr: 0.007569 min_lr: 0.007569 loss: 3.6642 (3.4701) weight_decay: 0.0500 (0.0500) time: 0.7019 data: 0.0004 max mem: 57114 Epoch: [61] [130/156] eta: 0:00:19 lr: 0.007567 min_lr: 0.007567 loss: 3.2973 (3.4506) weight_decay: 0.0500 (0.0500) time: 0.6976 data: 0.0009 max mem: 57114 Epoch: [61] [140/156] eta: 0:00:11 lr: 0.007566 min_lr: 0.007566 loss: 3.4831 (3.4643) weight_decay: 0.0500 (0.0500) time: 0.6896 data: 0.0007 max mem: 57114 Epoch: [61] [150/156] eta: 0:00:04 lr: 0.007565 min_lr: 0.007565 loss: 3.6495 (3.4723) weight_decay: 0.0500 (0.0500) time: 0.6855 data: 0.0001 max mem: 57114 Epoch: [61] [155/156] eta: 0:00:00 lr: 0.007564 min_lr: 0.007564 loss: 3.6455 (3.4829) weight_decay: 0.0500 (0.0500) time: 0.6805 data: 0.0001 max mem: 57114 Epoch: [61] Total time: 0:01:55 (0.7431 s / it) Averaged stats: lr: 0.007564 min_lr: 0.007564 loss: 3.6455 (3.4590) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0752 (1.0752) acc1: 78.6458 (78.6458) acc5: 96.8750 (96.8750) time: 2.0551 data: 1.7993 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2075 (1.1987) acc1: 76.5625 (73.9130) acc5: 95.3125 (93.4783) time: 0.5617 data: 0.3599 max mem: 57114 Test: Total time: 0:00:02 (0.5832 s / it) * Acc@1 73.801 Acc@5 92.150 loss 1.281 Accuracy of the model on the 50000 test images: 73.8% Max accuracy: 73.80% Test: [0/5] eta: 0:00:11 loss: 7.3661 (7.3661) acc1: 0.5208 (0.5208) acc5: 0.5208 (0.5208) time: 2.2012 data: 1.9577 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.1230 (7.1601) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.5909 data: 0.3916 max mem: 57114 Test: Total time: 0:00:03 (0.6047 s / it) * Acc@1 0.100 Acc@5 0.525 loss 7.148 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [62] [ 0/156] eta: 0:12:43 lr: 0.007564 min_lr: 0.007564 loss: 2.9844 (2.9844) weight_decay: 0.0500 (0.0500) time: 4.8956 data: 3.2870 max mem: 57114 Epoch: [62] [ 10/156] eta: 0:02:48 lr: 0.007563 min_lr: 0.007563 loss: 3.6327 (3.5558) weight_decay: 0.0500 (0.0500) time: 1.1544 data: 0.2992 max mem: 57114 Epoch: [62] [ 20/156] eta: 0:02:11 lr: 0.007561 min_lr: 0.007561 loss: 3.5925 (3.5325) weight_decay: 0.0500 (0.0500) time: 0.7678 data: 0.0004 max mem: 57114 Epoch: [62] [ 30/156] eta: 0:01:52 lr: 0.007560 min_lr: 0.007560 loss: 3.5738 (3.5556) weight_decay: 0.0500 (0.0500) time: 0.7517 data: 0.0004 max mem: 57114 Epoch: [62] [ 40/156] eta: 0:01:39 lr: 0.007559 min_lr: 0.007559 loss: 3.5944 (3.5825) weight_decay: 0.0500 (0.0500) time: 0.7411 data: 0.0003 max mem: 57114 Epoch: [62] [ 50/156] eta: 0:01:28 lr: 0.007558 min_lr: 0.007558 loss: 3.5674 (3.5248) weight_decay: 0.0500 (0.0500) time: 0.7419 data: 0.0003 max mem: 57114 Epoch: [62] [ 60/156] eta: 0:01:18 lr: 0.007556 min_lr: 0.007556 loss: 3.5975 (3.5515) weight_decay: 0.0500 (0.0500) time: 0.7370 data: 0.0004 max mem: 57114 Epoch: [62] [ 70/156] eta: 0:01:09 lr: 0.007555 min_lr: 0.007555 loss: 3.7241 (3.5397) weight_decay: 0.0500 (0.0500) time: 0.7202 data: 0.0004 max mem: 57114 Epoch: [62] [ 80/156] eta: 0:00:59 lr: 0.007554 min_lr: 0.007554 loss: 3.4491 (3.5215) weight_decay: 0.0500 (0.0500) time: 0.7065 data: 0.0004 max mem: 57114 Epoch: [62] [ 90/156] eta: 0:00:51 lr: 0.007552 min_lr: 0.007552 loss: 3.5814 (3.5194) weight_decay: 0.0500 (0.0500) time: 0.7030 data: 0.0004 max mem: 57114 Epoch: [62] [100/156] eta: 0:00:43 lr: 0.007551 min_lr: 0.007551 loss: 3.3620 (3.4887) weight_decay: 0.0500 (0.0500) time: 0.7039 data: 0.0004 max mem: 57114 Epoch: [62] [110/156] eta: 0:00:35 lr: 0.007550 min_lr: 0.007550 loss: 3.3053 (3.4779) weight_decay: 0.0500 (0.0500) time: 0.6998 data: 0.0003 max mem: 57114 Epoch: [62] [120/156] eta: 0:00:27 lr: 0.007548 min_lr: 0.007548 loss: 3.5120 (3.4829) weight_decay: 0.0500 (0.0500) time: 0.7015 data: 0.0004 max mem: 57114 Epoch: [62] [130/156] eta: 0:00:19 lr: 0.007547 min_lr: 0.007547 loss: 3.5767 (3.4784) weight_decay: 0.0500 (0.0500) time: 0.7002 data: 0.0008 max mem: 57114 Epoch: [62] [140/156] eta: 0:00:12 lr: 0.007546 min_lr: 0.007546 loss: 3.5426 (3.4684) weight_decay: 0.0500 (0.0500) time: 0.6905 data: 0.0007 max mem: 57114 Epoch: [62] [150/156] eta: 0:00:04 lr: 0.007544 min_lr: 0.007544 loss: 3.6128 (3.4696) weight_decay: 0.0500 (0.0500) time: 0.6834 data: 0.0001 max mem: 57114 Epoch: [62] [155/156] eta: 0:00:00 lr: 0.007544 min_lr: 0.007544 loss: 3.6076 (3.4761) weight_decay: 0.0500 (0.0500) time: 0.6819 data: 0.0001 max mem: 57114 Epoch: [62] Total time: 0:01:56 (0.7459 s / it) Averaged stats: lr: 0.007544 min_lr: 0.007544 loss: 3.6076 (3.4614) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.1269 (1.1269) acc1: 79.1667 (79.1667) acc5: 97.3958 (97.3958) time: 2.1350 data: 1.8783 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2209 (1.2244) acc1: 78.1250 (74.9361) acc5: 96.3542 (93.0946) time: 0.5777 data: 0.3757 max mem: 57114 Test: Total time: 0:00:03 (0.6022 s / it) * Acc@1 73.260 Acc@5 91.912 loss 1.382 Accuracy of the model on the 50000 test images: 73.3% Max accuracy: 73.80% Test: [0/5] eta: 0:00:11 loss: 7.3849 (7.3849) acc1: 0.5208 (0.5208) acc5: 0.5208 (0.5208) time: 2.3378 data: 2.0943 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.1383 (7.1730) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.6182 data: 0.4190 max mem: 57114 Test: Total time: 0:00:03 (0.6319 s / it) * Acc@1 0.100 Acc@5 0.533 loss 7.162 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [63] [ 0/156] eta: 0:13:31 lr: 0.007543 min_lr: 0.007543 loss: 2.5639 (2.5639) weight_decay: 0.0500 (0.0500) time: 5.2018 data: 4.1853 max mem: 57114 Epoch: [63] [ 10/156] eta: 0:02:48 lr: 0.007542 min_lr: 0.007542 loss: 3.2097 (3.3851) weight_decay: 0.0500 (0.0500) time: 1.1508 data: 0.3807 max mem: 57114 Epoch: [63] [ 20/156] eta: 0:02:09 lr: 0.007541 min_lr: 0.007541 loss: 3.4171 (3.3697) weight_decay: 0.0500 (0.0500) time: 0.7378 data: 0.0003 max mem: 57114 Epoch: [63] [ 30/156] eta: 0:01:50 lr: 0.007539 min_lr: 0.007539 loss: 3.3896 (3.3123) weight_decay: 0.0500 (0.0500) time: 0.7295 data: 0.0003 max mem: 57114 Epoch: [63] [ 40/156] eta: 0:01:37 lr: 0.007538 min_lr: 0.007538 loss: 3.3896 (3.3321) weight_decay: 0.0500 (0.0500) time: 0.7288 data: 0.0004 max mem: 57114 Epoch: [63] [ 50/156] eta: 0:01:26 lr: 0.007537 min_lr: 0.007537 loss: 3.4281 (3.3427) weight_decay: 0.0500 (0.0500) time: 0.7254 data: 0.0004 max mem: 57114 Epoch: [63] [ 60/156] eta: 0:01:16 lr: 0.007535 min_lr: 0.007535 loss: 3.4281 (3.3406) weight_decay: 0.0500 (0.0500) time: 0.7184 data: 0.0004 max mem: 57114 Epoch: [63] [ 70/156] eta: 0:01:07 lr: 0.007534 min_lr: 0.007534 loss: 3.4519 (3.3423) weight_decay: 0.0500 (0.0500) time: 0.7074 data: 0.0004 max mem: 57114 Epoch: [63] [ 80/156] eta: 0:00:59 lr: 0.007533 min_lr: 0.007533 loss: 3.4113 (3.3529) weight_decay: 0.0500 (0.0500) time: 0.7106 data: 0.0004 max mem: 57114 Epoch: [63] [ 90/156] eta: 0:00:50 lr: 0.007531 min_lr: 0.007531 loss: 3.5383 (3.3651) weight_decay: 0.0500 (0.0500) time: 0.7130 data: 0.0004 max mem: 57114 Epoch: [63] [100/156] eta: 0:00:42 lr: 0.007530 min_lr: 0.007530 loss: 3.5383 (3.3597) weight_decay: 0.0500 (0.0500) time: 0.7086 data: 0.0003 max mem: 57114 Epoch: [63] [110/156] eta: 0:00:34 lr: 0.007529 min_lr: 0.007529 loss: 3.4910 (3.3698) weight_decay: 0.0500 (0.0500) time: 0.7123 data: 0.0004 max mem: 57114 Epoch: [63] [120/156] eta: 0:00:27 lr: 0.007527 min_lr: 0.007527 loss: 3.5920 (3.3941) weight_decay: 0.0500 (0.0500) time: 0.7133 data: 0.0004 max mem: 57114 Epoch: [63] [130/156] eta: 0:00:19 lr: 0.007526 min_lr: 0.007526 loss: 3.4403 (3.3786) weight_decay: 0.0500 (0.0500) time: 0.6980 data: 0.0010 max mem: 57114 Epoch: [63] [140/156] eta: 0:00:11 lr: 0.007525 min_lr: 0.007525 loss: 3.2826 (3.3875) weight_decay: 0.0500 (0.0500) time: 0.6812 data: 0.0008 max mem: 57114 Epoch: [63] [150/156] eta: 0:00:04 lr: 0.007523 min_lr: 0.007523 loss: 3.4751 (3.3854) weight_decay: 0.0500 (0.0500) time: 0.6839 data: 0.0001 max mem: 57114 Epoch: [63] [155/156] eta: 0:00:00 lr: 0.007523 min_lr: 0.007523 loss: 3.6069 (3.3943) weight_decay: 0.0500 (0.0500) time: 0.6832 data: 0.0001 max mem: 57114 Epoch: [63] Total time: 0:01:55 (0.7421 s / it) Averaged stats: lr: 0.007523 min_lr: 0.007523 loss: 3.6069 (3.4436) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 1.0044 (1.0044) acc1: 78.6458 (78.6458) acc5: 96.3542 (96.3542) time: 2.2185 data: 1.9588 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1195 (1.1347) acc1: 76.5625 (73.7852) acc5: 93.7500 (92.4552) time: 0.5944 data: 0.3918 max mem: 57114 Test: Total time: 0:00:03 (0.6147 s / it) * Acc@1 73.961 Acc@5 92.347 loss 1.227 Accuracy of the model on the 50000 test images: 74.0% Max accuracy: 73.96% Test: [0/5] eta: 0:00:09 loss: 7.4068 (7.4068) acc1: 0.5208 (0.5208) acc5: 0.5208 (0.5208) time: 1.9703 data: 1.7269 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.1530 (7.1904) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.5447 data: 0.3455 max mem: 57114 Test: Total time: 0:00:02 (0.5595 s / it) * Acc@1 0.100 Acc@5 0.533 loss 7.179 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [64] [ 0/156] eta: 0:12:51 lr: 0.007522 min_lr: 0.007522 loss: 3.2357 (3.2357) weight_decay: 0.0500 (0.0500) time: 4.9437 data: 3.5323 max mem: 57114 Epoch: [64] [ 10/156] eta: 0:02:45 lr: 0.007521 min_lr: 0.007521 loss: 3.2718 (3.3631) weight_decay: 0.0500 (0.0500) time: 1.1340 data: 0.3214 max mem: 57114 Epoch: [64] [ 20/156] eta: 0:02:09 lr: 0.007520 min_lr: 0.007520 loss: 3.4827 (3.4199) weight_decay: 0.0500 (0.0500) time: 0.7533 data: 0.0003 max mem: 57114 Epoch: [64] [ 30/156] eta: 0:01:51 lr: 0.007518 min_lr: 0.007518 loss: 3.6737 (3.5071) weight_decay: 0.0500 (0.0500) time: 0.7501 data: 0.0003 max mem: 57114 Epoch: [64] [ 40/156] eta: 0:01:37 lr: 0.007517 min_lr: 0.007517 loss: 3.7694 (3.5266) weight_decay: 0.0500 (0.0500) time: 0.7250 data: 0.0003 max mem: 57114 Epoch: [64] [ 50/156] eta: 0:01:26 lr: 0.007516 min_lr: 0.007516 loss: 3.7877 (3.5639) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0004 max mem: 57114 Epoch: [64] [ 60/156] eta: 0:01:17 lr: 0.007514 min_lr: 0.007514 loss: 3.8431 (3.5676) weight_decay: 0.0500 (0.0500) time: 0.7225 data: 0.0004 max mem: 57114 Epoch: [64] [ 70/156] eta: 0:01:08 lr: 0.007513 min_lr: 0.007513 loss: 3.5258 (3.5419) weight_decay: 0.0500 (0.0500) time: 0.7348 data: 0.0004 max mem: 57114 Epoch: [64] [ 80/156] eta: 0:00:59 lr: 0.007511 min_lr: 0.007511 loss: 3.5626 (3.5602) weight_decay: 0.0500 (0.0500) time: 0.7269 data: 0.0004 max mem: 57114 Epoch: [64] [ 90/156] eta: 0:00:51 lr: 0.007510 min_lr: 0.007510 loss: 3.6489 (3.5656) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0004 max mem: 57114 Epoch: [64] [100/156] eta: 0:00:43 lr: 0.007509 min_lr: 0.007509 loss: 3.6489 (3.5697) weight_decay: 0.0500 (0.0500) time: 0.7039 data: 0.0004 max mem: 57114 Epoch: [64] [110/156] eta: 0:00:35 lr: 0.007507 min_lr: 0.007507 loss: 3.5678 (3.5568) weight_decay: 0.0500 (0.0500) time: 0.7052 data: 0.0004 max mem: 57114 Epoch: [64] [120/156] eta: 0:00:27 lr: 0.007506 min_lr: 0.007506 loss: 3.5512 (3.5332) weight_decay: 0.0500 (0.0500) time: 0.7028 data: 0.0004 max mem: 57114 Epoch: [64] [130/156] eta: 0:00:19 lr: 0.007505 min_lr: 0.007505 loss: 3.5746 (3.5381) weight_decay: 0.0500 (0.0500) time: 0.6983 data: 0.0009 max mem: 57114 Epoch: [64] [140/156] eta: 0:00:11 lr: 0.007503 min_lr: 0.007503 loss: 3.6485 (3.5350) weight_decay: 0.0500 (0.0500) time: 0.6922 data: 0.0007 max mem: 57114 Epoch: [64] [150/156] eta: 0:00:04 lr: 0.007502 min_lr: 0.007502 loss: 3.4407 (3.5268) weight_decay: 0.0500 (0.0500) time: 0.6848 data: 0.0001 max mem: 57114 Epoch: [64] [155/156] eta: 0:00:00 lr: 0.007501 min_lr: 0.007501 loss: 3.4646 (3.5253) weight_decay: 0.0500 (0.0500) time: 0.6892 data: 0.0001 max mem: 57114 Epoch: [64] Total time: 0:01:56 (0.7444 s / it) Averaged stats: lr: 0.007501 min_lr: 0.007501 loss: 3.4646 (3.4449) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.1217 (1.1217) acc1: 81.7708 (81.7708) acc5: 96.3542 (96.3542) time: 2.0772 data: 1.8217 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2825 (1.2477) acc1: 75.5208 (73.5294) acc5: 93.7500 (91.4322) time: 0.5661 data: 0.3644 max mem: 57114 Test: Total time: 0:00:02 (0.5902 s / it) * Acc@1 72.500 Acc@5 91.616 loss 1.351 Accuracy of the model on the 50000 test images: 72.5% Max accuracy: 73.96% Test: [0/5] eta: 0:00:11 loss: 7.4306 (7.4306) acc1: 0.5208 (0.5208) acc5: 0.5208 (0.5208) time: 2.3603 data: 2.1167 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.1650 (7.2067) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.6227 data: 0.4234 max mem: 57114 Test: Total time: 0:00:03 (0.6381 s / it) * Acc@1 0.100 Acc@5 0.531 loss 7.195 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [65] [ 0/156] eta: 0:12:50 lr: 0.007501 min_lr: 0.007501 loss: 3.7206 (3.7206) weight_decay: 0.0500 (0.0500) time: 4.9401 data: 3.0192 max mem: 57114 Epoch: [65] [ 10/156] eta: 0:02:42 lr: 0.007500 min_lr: 0.007500 loss: 3.2739 (3.3084) weight_decay: 0.0500 (0.0500) time: 1.1140 data: 0.2748 max mem: 57114 Epoch: [65] [ 20/156] eta: 0:02:05 lr: 0.007498 min_lr: 0.007498 loss: 3.1618 (3.2829) weight_decay: 0.0500 (0.0500) time: 0.7233 data: 0.0004 max mem: 57114 Epoch: [65] [ 30/156] eta: 0:01:48 lr: 0.007497 min_lr: 0.007497 loss: 3.5126 (3.3928) weight_decay: 0.0500 (0.0500) time: 0.7203 data: 0.0004 max mem: 57114 Epoch: [65] [ 40/156] eta: 0:01:35 lr: 0.007495 min_lr: 0.007495 loss: 3.6479 (3.4477) weight_decay: 0.0500 (0.0500) time: 0.7154 data: 0.0003 max mem: 57114 Epoch: [65] [ 50/156] eta: 0:01:24 lr: 0.007494 min_lr: 0.007494 loss: 3.5806 (3.4075) weight_decay: 0.0500 (0.0500) time: 0.7100 data: 0.0004 max mem: 57114 Epoch: [65] [ 60/156] eta: 0:01:15 lr: 0.007493 min_lr: 0.007493 loss: 3.5806 (3.4481) weight_decay: 0.0500 (0.0500) time: 0.7173 data: 0.0004 max mem: 57114 Epoch: [65] [ 70/156] eta: 0:01:07 lr: 0.007491 min_lr: 0.007491 loss: 3.5620 (3.4558) weight_decay: 0.0500 (0.0500) time: 0.7289 data: 0.0004 max mem: 57114 Epoch: [65] [ 80/156] eta: 0:00:58 lr: 0.007490 min_lr: 0.007490 loss: 3.3744 (3.4199) weight_decay: 0.0500 (0.0500) time: 0.7275 data: 0.0004 max mem: 57114 Epoch: [65] [ 90/156] eta: 0:00:50 lr: 0.007488 min_lr: 0.007488 loss: 3.2310 (3.4115) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0004 max mem: 57114 Epoch: [65] [100/156] eta: 0:00:42 lr: 0.007487 min_lr: 0.007487 loss: 3.4211 (3.4027) weight_decay: 0.0500 (0.0500) time: 0.7003 data: 0.0004 max mem: 57114 Epoch: [65] [110/156] eta: 0:00:34 lr: 0.007486 min_lr: 0.007486 loss: 3.8165 (3.4504) weight_decay: 0.0500 (0.0500) time: 0.6977 data: 0.0004 max mem: 57114 Epoch: [65] [120/156] eta: 0:00:26 lr: 0.007484 min_lr: 0.007484 loss: 3.8554 (3.4741) weight_decay: 0.0500 (0.0500) time: 0.7034 data: 0.0004 max mem: 57114 Epoch: [65] [130/156] eta: 0:00:19 lr: 0.007483 min_lr: 0.007483 loss: 3.7752 (3.4802) weight_decay: 0.0500 (0.0500) time: 0.6997 data: 0.0005 max mem: 57114 Epoch: [65] [140/156] eta: 0:00:11 lr: 0.007481 min_lr: 0.007481 loss: 3.4284 (3.4708) weight_decay: 0.0500 (0.0500) time: 0.6858 data: 0.0003 max mem: 57114 Epoch: [65] [150/156] eta: 0:00:04 lr: 0.007480 min_lr: 0.007480 loss: 3.4686 (3.4747) weight_decay: 0.0500 (0.0500) time: 0.6825 data: 0.0001 max mem: 57114 Epoch: [65] [155/156] eta: 0:00:00 lr: 0.007479 min_lr: 0.007479 loss: 3.5462 (3.4722) weight_decay: 0.0500 (0.0500) time: 0.6832 data: 0.0001 max mem: 57114 Epoch: [65] Total time: 0:01:55 (0.7373 s / it) Averaged stats: lr: 0.007479 min_lr: 0.007479 loss: 3.5462 (3.4379) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 1.0170 (1.0170) acc1: 82.8125 (82.8125) acc5: 96.8750 (96.8750) time: 2.2462 data: 1.9908 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1371 (1.1430) acc1: 78.1250 (75.8312) acc5: 95.3125 (93.2225) time: 0.5999 data: 0.3982 max mem: 57114 Test: Total time: 0:00:03 (0.6222 s / it) * Acc@1 73.849 Acc@5 92.255 loss 1.227 Accuracy of the model on the 50000 test images: 73.8% Max accuracy: 73.96% Test: [0/5] eta: 0:00:11 loss: 7.4525 (7.4525) acc1: 0.5208 (0.5208) acc5: 0.5208 (0.5208) time: 2.3436 data: 2.0996 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.1782 (7.2235) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.6196 data: 0.4200 max mem: 57114 Test: Total time: 0:00:03 (0.6347 s / it) * Acc@1 0.100 Acc@5 0.529 loss 7.211 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [66] [ 0/156] eta: 0:12:32 lr: 0.007479 min_lr: 0.007479 loss: 3.4151 (3.4151) weight_decay: 0.0500 (0.0500) time: 4.8223 data: 3.9308 max mem: 57114 Epoch: [66] [ 10/156] eta: 0:02:40 lr: 0.007478 min_lr: 0.007478 loss: 3.7948 (3.7117) weight_decay: 0.0500 (0.0500) time: 1.1021 data: 0.3577 max mem: 57114 Epoch: [66] [ 20/156] eta: 0:02:05 lr: 0.007476 min_lr: 0.007476 loss: 3.5231 (3.4225) weight_decay: 0.0500 (0.0500) time: 0.7265 data: 0.0004 max mem: 57114 Epoch: [66] [ 30/156] eta: 0:01:48 lr: 0.007475 min_lr: 0.007475 loss: 3.2047 (3.4074) weight_decay: 0.0500 (0.0500) time: 0.7296 data: 0.0003 max mem: 57114 Epoch: [66] [ 40/156] eta: 0:01:36 lr: 0.007473 min_lr: 0.007473 loss: 3.5045 (3.4406) weight_decay: 0.0500 (0.0500) time: 0.7329 data: 0.0004 max mem: 57114 Epoch: [66] [ 50/156] eta: 0:01:25 lr: 0.007472 min_lr: 0.007472 loss: 3.5550 (3.4860) weight_decay: 0.0500 (0.0500) time: 0.7168 data: 0.0004 max mem: 57114 Epoch: [66] [ 60/156] eta: 0:01:16 lr: 0.007470 min_lr: 0.007470 loss: 3.6775 (3.4922) weight_decay: 0.0500 (0.0500) time: 0.7184 data: 0.0004 max mem: 57114 Epoch: [66] [ 70/156] eta: 0:01:07 lr: 0.007469 min_lr: 0.007469 loss: 3.5866 (3.5010) weight_decay: 0.0500 (0.0500) time: 0.7209 data: 0.0004 max mem: 57114 Epoch: [66] [ 80/156] eta: 0:00:58 lr: 0.007468 min_lr: 0.007468 loss: 3.5174 (3.4624) weight_decay: 0.0500 (0.0500) time: 0.7056 data: 0.0005 max mem: 57114 Epoch: [66] [ 90/156] eta: 0:00:50 lr: 0.007466 min_lr: 0.007466 loss: 2.9512 (3.4377) weight_decay: 0.0500 (0.0500) time: 0.6992 data: 0.0004 max mem: 57114 Epoch: [66] [100/156] eta: 0:00:42 lr: 0.007465 min_lr: 0.007465 loss: 3.5550 (3.4481) weight_decay: 0.0500 (0.0500) time: 0.7117 data: 0.0003 max mem: 57114 Epoch: [66] [110/156] eta: 0:00:34 lr: 0.007463 min_lr: 0.007463 loss: 3.5550 (3.4207) weight_decay: 0.0500 (0.0500) time: 0.7139 data: 0.0003 max mem: 57114 Epoch: [66] [120/156] eta: 0:00:27 lr: 0.007462 min_lr: 0.007462 loss: 3.2264 (3.4356) weight_decay: 0.0500 (0.0500) time: 0.7037 data: 0.0004 max mem: 57114 Epoch: [66] [130/156] eta: 0:00:19 lr: 0.007460 min_lr: 0.007460 loss: 3.3241 (3.4312) weight_decay: 0.0500 (0.0500) time: 0.7025 data: 0.0009 max mem: 57114 Epoch: [66] [140/156] eta: 0:00:11 lr: 0.007459 min_lr: 0.007459 loss: 3.4067 (3.4273) weight_decay: 0.0500 (0.0500) time: 0.6867 data: 0.0007 max mem: 57114 Epoch: [66] [150/156] eta: 0:00:04 lr: 0.007458 min_lr: 0.007458 loss: 3.4456 (3.4241) weight_decay: 0.0500 (0.0500) time: 0.6774 data: 0.0001 max mem: 57114 Epoch: [66] [155/156] eta: 0:00:00 lr: 0.007457 min_lr: 0.007457 loss: 3.4456 (3.4275) weight_decay: 0.0500 (0.0500) time: 0.6787 data: 0.0001 max mem: 57114 Epoch: [66] Total time: 0:01:55 (0.7379 s / it) Averaged stats: lr: 0.007457 min_lr: 0.007457 loss: 3.4456 (3.4368) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 1.1311 (1.1311) acc1: 79.1667 (79.1667) acc5: 95.3125 (95.3125) time: 2.2302 data: 1.9736 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2768 (1.2977) acc1: 76.0417 (72.8900) acc5: 94.2708 (91.8159) time: 0.5967 data: 0.3948 max mem: 57114 Test: Total time: 0:00:03 (0.6214 s / it) * Acc@1 73.430 Acc@5 91.946 loss 1.375 Accuracy of the model on the 50000 test images: 73.4% Max accuracy: 73.96% Test: [0/5] eta: 0:00:11 loss: 7.4778 (7.4778) acc1: 0.5208 (0.5208) acc5: 0.5208 (0.5208) time: 2.2781 data: 2.0346 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.1930 (7.2420) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.6063 data: 0.4070 max mem: 57114 Test: Total time: 0:00:03 (0.6201 s / it) * Acc@1 0.104 Acc@5 0.525 loss 7.229 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [67] [ 0/156] eta: 0:12:28 lr: 0.007457 min_lr: 0.007457 loss: 3.1947 (3.1947) weight_decay: 0.0500 (0.0500) time: 4.7958 data: 3.7243 max mem: 57114 Epoch: [67] [ 10/156] eta: 0:02:41 lr: 0.007455 min_lr: 0.007455 loss: 3.1947 (3.1808) weight_decay: 0.0500 (0.0500) time: 1.1040 data: 0.3389 max mem: 57114 Epoch: [67] [ 20/156] eta: 0:02:05 lr: 0.007454 min_lr: 0.007454 loss: 3.5212 (3.3867) weight_decay: 0.0500 (0.0500) time: 0.7268 data: 0.0003 max mem: 57114 Epoch: [67] [ 30/156] eta: 0:01:47 lr: 0.007452 min_lr: 0.007452 loss: 3.6829 (3.4177) weight_decay: 0.0500 (0.0500) time: 0.7135 data: 0.0003 max mem: 57114 Epoch: [67] [ 40/156] eta: 0:01:36 lr: 0.007451 min_lr: 0.007451 loss: 3.5996 (3.4615) weight_decay: 0.0500 (0.0500) time: 0.7299 data: 0.0005 max mem: 57114 Epoch: [67] [ 50/156] eta: 0:01:25 lr: 0.007449 min_lr: 0.007449 loss: 3.6586 (3.4483) weight_decay: 0.0500 (0.0500) time: 0.7413 data: 0.0005 max mem: 57114 Epoch: [67] [ 60/156] eta: 0:01:16 lr: 0.007448 min_lr: 0.007448 loss: 3.5703 (3.4739) weight_decay: 0.0500 (0.0500) time: 0.7190 data: 0.0004 max mem: 57114 Epoch: [67] [ 70/156] eta: 0:01:07 lr: 0.007446 min_lr: 0.007446 loss: 3.4212 (3.4457) weight_decay: 0.0500 (0.0500) time: 0.7065 data: 0.0003 max mem: 57114 Epoch: [67] [ 80/156] eta: 0:00:58 lr: 0.007445 min_lr: 0.007445 loss: 3.3742 (3.4520) weight_decay: 0.0500 (0.0500) time: 0.7112 data: 0.0004 max mem: 57114 Epoch: [67] [ 90/156] eta: 0:00:50 lr: 0.007444 min_lr: 0.007444 loss: 3.3742 (3.4418) weight_decay: 0.0500 (0.0500) time: 0.7056 data: 0.0004 max mem: 57114 Epoch: [67] [100/156] eta: 0:00:42 lr: 0.007442 min_lr: 0.007442 loss: 3.4478 (3.4452) weight_decay: 0.0500 (0.0500) time: 0.7045 data: 0.0004 max mem: 57114 Epoch: [67] [110/156] eta: 0:00:34 lr: 0.007441 min_lr: 0.007441 loss: 3.1942 (3.4215) weight_decay: 0.0500 (0.0500) time: 0.7133 data: 0.0004 max mem: 57114 Epoch: [67] [120/156] eta: 0:00:27 lr: 0.007439 min_lr: 0.007439 loss: 3.0578 (3.4013) weight_decay: 0.0500 (0.0500) time: 0.7085 data: 0.0004 max mem: 57114 Epoch: [67] [130/156] eta: 0:00:19 lr: 0.007438 min_lr: 0.007438 loss: 3.4314 (3.4125) weight_decay: 0.0500 (0.0500) time: 0.7022 data: 0.0008 max mem: 57114 Epoch: [67] [140/156] eta: 0:00:11 lr: 0.007436 min_lr: 0.007436 loss: 3.4523 (3.4023) weight_decay: 0.0500 (0.0500) time: 0.6898 data: 0.0007 max mem: 57114 Epoch: [67] [150/156] eta: 0:00:04 lr: 0.007435 min_lr: 0.007435 loss: 3.2824 (3.3920) weight_decay: 0.0500 (0.0500) time: 0.6801 data: 0.0001 max mem: 57114 Epoch: [67] [155/156] eta: 0:00:00 lr: 0.007434 min_lr: 0.007434 loss: 3.3300 (3.3932) weight_decay: 0.0500 (0.0500) time: 0.6820 data: 0.0001 max mem: 57114 Epoch: [67] Total time: 0:01:55 (0.7382 s / it) Averaged stats: lr: 0.007434 min_lr: 0.007434 loss: 3.3300 (3.4355) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 1.0726 (1.0726) acc1: 81.7708 (81.7708) acc5: 94.7917 (94.7917) time: 2.3634 data: 2.1067 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2586 (1.1945) acc1: 76.0417 (73.4015) acc5: 94.7917 (92.7110) time: 0.6234 data: 0.4214 max mem: 57114 Test: Total time: 0:00:03 (0.6435 s / it) * Acc@1 73.735 Acc@5 92.191 loss 1.279 Accuracy of the model on the 50000 test images: 73.7% Max accuracy: 73.96% Test: [0/5] eta: 0:00:11 loss: 7.5027 (7.5027) acc1: 0.5208 (0.5208) acc5: 0.5208 (0.5208) time: 2.3188 data: 2.0754 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.2131 (7.2617) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.6144 data: 0.4152 max mem: 57114 Test: Total time: 0:00:03 (0.6279 s / it) * Acc@1 0.114 Acc@5 0.521 loss 7.247 Accuracy of the model EMA on 50000 test images: 0.1% Max EMA accuracy: 0.11% Epoch: [68] [ 0/156] eta: 0:08:13 lr: 0.007434 min_lr: 0.007434 loss: 3.8776 (3.8776) weight_decay: 0.0500 (0.0500) time: 3.1639 data: 2.3849 max mem: 57114 Epoch: [68] [ 10/156] eta: 0:02:17 lr: 0.007432 min_lr: 0.007432 loss: 3.4229 (3.3311) weight_decay: 0.0500 (0.0500) time: 0.9447 data: 0.2171 max mem: 57114 Epoch: [68] [ 20/156] eta: 0:01:53 lr: 0.007431 min_lr: 0.007431 loss: 3.3011 (3.3362) weight_decay: 0.0500 (0.0500) time: 0.7196 data: 0.0003 max mem: 57114 Epoch: [68] [ 30/156] eta: 0:01:40 lr: 0.007429 min_lr: 0.007429 loss: 3.6624 (3.4606) weight_decay: 0.0500 (0.0500) time: 0.7223 data: 0.0003 max mem: 57114 Epoch: [68] [ 40/156] eta: 0:01:30 lr: 0.007428 min_lr: 0.007428 loss: 3.6806 (3.4232) weight_decay: 0.0500 (0.0500) time: 0.7238 data: 0.0004 max mem: 57114 Epoch: [68] [ 50/156] eta: 0:01:21 lr: 0.007426 min_lr: 0.007426 loss: 3.2184 (3.3939) weight_decay: 0.0500 (0.0500) time: 0.7180 data: 0.0004 max mem: 57114 Epoch: [68] [ 60/156] eta: 0:01:12 lr: 0.007425 min_lr: 0.007425 loss: 3.2522 (3.4027) weight_decay: 0.0500 (0.0500) time: 0.7113 data: 0.0004 max mem: 57114 Epoch: [68] [ 70/156] eta: 0:01:04 lr: 0.007423 min_lr: 0.007423 loss: 3.5909 (3.4034) weight_decay: 0.0500 (0.0500) time: 0.7166 data: 0.0004 max mem: 57114 Epoch: [68] [ 80/156] eta: 0:00:56 lr: 0.007422 min_lr: 0.007422 loss: 3.6773 (3.4438) weight_decay: 0.0500 (0.0500) time: 0.7101 data: 0.0005 max mem: 57114 Epoch: [68] [ 90/156] eta: 0:00:49 lr: 0.007421 min_lr: 0.007421 loss: 3.6812 (3.4470) weight_decay: 0.0500 (0.0500) time: 0.7058 data: 0.0004 max mem: 57114 Epoch: [68] [100/156] eta: 0:00:41 lr: 0.007419 min_lr: 0.007419 loss: 3.4098 (3.4509) weight_decay: 0.0500 (0.0500) time: 0.7149 data: 0.0004 max mem: 57114 Epoch: [68] [110/156] eta: 0:00:33 lr: 0.007418 min_lr: 0.007418 loss: 3.4707 (3.4447) weight_decay: 0.0500 (0.0500) time: 0.7017 data: 0.0004 max mem: 57114 Epoch: [68] [120/156] eta: 0:00:26 lr: 0.007416 min_lr: 0.007416 loss: 3.4748 (3.4418) weight_decay: 0.0500 (0.0500) time: 0.6967 data: 0.0004 max mem: 57114 Epoch: [68] [130/156] eta: 0:00:18 lr: 0.007415 min_lr: 0.007415 loss: 3.7084 (3.4567) weight_decay: 0.0500 (0.0500) time: 0.6992 data: 0.0009 max mem: 57114 Epoch: [68] [140/156] eta: 0:00:11 lr: 0.007413 min_lr: 0.007413 loss: 3.6410 (3.4501) weight_decay: 0.0500 (0.0500) time: 0.6882 data: 0.0007 max mem: 57114 Epoch: [68] [150/156] eta: 0:00:04 lr: 0.007412 min_lr: 0.007412 loss: 3.4945 (3.4524) weight_decay: 0.0500 (0.0500) time: 0.6849 data: 0.0001 max mem: 57114 Epoch: [68] [155/156] eta: 0:00:00 lr: 0.007411 min_lr: 0.007411 loss: 3.4945 (3.4443) weight_decay: 0.0500 (0.0500) time: 0.6878 data: 0.0001 max mem: 57114 Epoch: [68] Total time: 0:01:53 (0.7253 s / it) Averaged stats: lr: 0.007411 min_lr: 0.007411 loss: 3.4945 (3.4341) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.9635 (0.9635) acc1: 79.1667 (79.1667) acc5: 95.3125 (95.3125) time: 2.2952 data: 2.0334 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1549 (1.0925) acc1: 77.0833 (75.3197) acc5: 94.7917 (92.4552) time: 0.6097 data: 0.4067 max mem: 57114 Test: Total time: 0:00:03 (0.6554 s / it) * Acc@1 73.258 Acc@5 91.544 loss 1.239 Accuracy of the model on the 50000 test images: 73.3% Max accuracy: 73.96% Test: [0/5] eta: 0:00:12 loss: 7.5248 (7.5248) acc1: 0.5208 (0.5208) acc5: 0.5208 (0.5208) time: 2.5785 data: 2.3350 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.2360 (7.2831) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.6663 data: 0.4671 max mem: 57114 Test: Total time: 0:00:03 (0.6777 s / it) * Acc@1 0.126 Acc@5 0.521 loss 7.267 Accuracy of the model EMA on 50000 test images: 0.1% Max EMA accuracy: 0.13% Epoch: [69] [ 0/156] eta: 0:07:04 lr: 0.007411 min_lr: 0.007411 loss: 3.3803 (3.3803) weight_decay: 0.0500 (0.0500) time: 2.7182 data: 2.0675 max mem: 57114 Epoch: [69] [ 10/156] eta: 0:02:12 lr: 0.007409 min_lr: 0.007409 loss: 3.3803 (3.4505) weight_decay: 0.0500 (0.0500) time: 0.9082 data: 0.1882 max mem: 57114 Epoch: [69] [ 20/156] eta: 0:01:51 lr: 0.007408 min_lr: 0.007408 loss: 3.3606 (3.3675) weight_decay: 0.0500 (0.0500) time: 0.7245 data: 0.0003 max mem: 57114 Epoch: [69] [ 30/156] eta: 0:01:39 lr: 0.007406 min_lr: 0.007406 loss: 3.4896 (3.3995) weight_decay: 0.0500 (0.0500) time: 0.7243 data: 0.0003 max mem: 57114 Epoch: [69] [ 40/156] eta: 0:01:29 lr: 0.007405 min_lr: 0.007405 loss: 3.4455 (3.3609) weight_decay: 0.0500 (0.0500) time: 0.7157 data: 0.0004 max mem: 57114 Epoch: [69] [ 50/156] eta: 0:01:20 lr: 0.007403 min_lr: 0.007403 loss: 3.4155 (3.3698) weight_decay: 0.0500 (0.0500) time: 0.7156 data: 0.0004 max mem: 57114 Epoch: [69] [ 60/156] eta: 0:01:12 lr: 0.007402 min_lr: 0.007402 loss: 3.3953 (3.3344) weight_decay: 0.0500 (0.0500) time: 0.7288 data: 0.0004 max mem: 57114 Epoch: [69] [ 70/156] eta: 0:01:04 lr: 0.007400 min_lr: 0.007400 loss: 3.3953 (3.3396) weight_decay: 0.0500 (0.0500) time: 0.7261 data: 0.0004 max mem: 57114 Epoch: [69] [ 80/156] eta: 0:00:56 lr: 0.007399 min_lr: 0.007399 loss: 3.6050 (3.3702) weight_decay: 0.0500 (0.0500) time: 0.7272 data: 0.0004 max mem: 57114 Epoch: [69] [ 90/156] eta: 0:00:49 lr: 0.007397 min_lr: 0.007397 loss: 3.7078 (3.3885) weight_decay: 0.0500 (0.0500) time: 0.7169 data: 0.0004 max mem: 57114 Epoch: [69] [100/156] eta: 0:00:41 lr: 0.007396 min_lr: 0.007396 loss: 3.5926 (3.4006) weight_decay: 0.0500 (0.0500) time: 0.6983 data: 0.0004 max mem: 57114 Epoch: [69] [110/156] eta: 0:00:33 lr: 0.007394 min_lr: 0.007394 loss: 3.6156 (3.4156) weight_decay: 0.0500 (0.0500) time: 0.6969 data: 0.0003 max mem: 57114 Epoch: [69] [120/156] eta: 0:00:26 lr: 0.007392 min_lr: 0.007392 loss: 3.6532 (3.4217) weight_decay: 0.0500 (0.0500) time: 0.6963 data: 0.0004 max mem: 57114 Epoch: [69] [130/156] eta: 0:00:18 lr: 0.007391 min_lr: 0.007391 loss: 3.5699 (3.4175) weight_decay: 0.0500 (0.0500) time: 0.6988 data: 0.0009 max mem: 57114 Epoch: [69] [140/156] eta: 0:00:11 lr: 0.007389 min_lr: 0.007389 loss: 3.4763 (3.4113) weight_decay: 0.0500 (0.0500) time: 0.6917 data: 0.0007 max mem: 57114 Epoch: [69] [150/156] eta: 0:00:04 lr: 0.007388 min_lr: 0.007388 loss: 3.5009 (3.4107) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.0001 max mem: 57114 Epoch: [69] [155/156] eta: 0:00:00 lr: 0.007387 min_lr: 0.007387 loss: 3.5009 (3.3952) weight_decay: 0.0500 (0.0500) time: 0.6878 data: 0.0001 max mem: 57114 Epoch: [69] Total time: 0:01:53 (0.7247 s / it) Averaged stats: lr: 0.007387 min_lr: 0.007387 loss: 3.5009 (3.4346) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8554 (0.8554) acc1: 79.6875 (79.6875) acc5: 95.8333 (95.8333) time: 2.1198 data: 1.8634 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9411 (1.0148) acc1: 76.5625 (73.7852) acc5: 95.3125 (92.7110) time: 0.5746 data: 0.3728 max mem: 57114 Test: Total time: 0:00:02 (0.5985 s / it) * Acc@1 74.423 Acc@5 92.475 loss 1.059 Accuracy of the model on the 50000 test images: 74.4% Max accuracy: 74.42% Test: [0/5] eta: 0:00:10 loss: 7.5487 (7.5487) acc1: 0.5208 (0.5208) acc5: 0.5208 (0.5208) time: 2.1057 data: 1.8623 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.2587 (7.3053) acc1: 0.0000 (0.2558) acc5: 0.0000 (0.3836) time: 0.5718 data: 0.3726 max mem: 57114 Test: Total time: 0:00:02 (0.5816 s / it) * Acc@1 0.130 Acc@5 0.516 loss 7.288 Accuracy of the model EMA on 50000 test images: 0.1% Max EMA accuracy: 0.13% Epoch: [70] [ 0/156] eta: 0:07:34 lr: 0.007387 min_lr: 0.007387 loss: 3.0696 (3.0696) weight_decay: 0.0500 (0.0500) time: 2.9105 data: 2.2611 max mem: 57114 Epoch: [70] [ 10/156] eta: 0:02:16 lr: 0.007385 min_lr: 0.007385 loss: 3.2893 (3.2758) weight_decay: 0.0500 (0.0500) time: 0.9343 data: 0.2059 max mem: 57114 Epoch: [70] [ 20/156] eta: 0:01:52 lr: 0.007384 min_lr: 0.007384 loss: 3.2893 (3.3014) weight_decay: 0.0500 (0.0500) time: 0.7260 data: 0.0004 max mem: 57114 Epoch: [70] [ 30/156] eta: 0:01:40 lr: 0.007382 min_lr: 0.007382 loss: 3.5321 (3.4280) weight_decay: 0.0500 (0.0500) time: 0.7281 data: 0.0004 max mem: 57114 Epoch: [70] [ 40/156] eta: 0:01:30 lr: 0.007381 min_lr: 0.007381 loss: 3.6007 (3.4228) weight_decay: 0.0500 (0.0500) time: 0.7354 data: 0.0004 max mem: 57114 Epoch: [70] [ 50/156] eta: 0:01:21 lr: 0.007379 min_lr: 0.007379 loss: 3.5148 (3.4388) weight_decay: 0.0500 (0.0500) time: 0.7269 data: 0.0003 max mem: 57114 Epoch: [70] [ 60/156] eta: 0:01:13 lr: 0.007378 min_lr: 0.007378 loss: 3.6861 (3.4696) weight_decay: 0.0500 (0.0500) time: 0.7437 data: 0.0004 max mem: 57114 Epoch: [70] [ 70/156] eta: 0:01:05 lr: 0.007376 min_lr: 0.007376 loss: 3.5586 (3.4301) weight_decay: 0.0500 (0.0500) time: 0.7343 data: 0.0004 max mem: 57114 Epoch: [70] [ 80/156] eta: 0:00:57 lr: 0.007375 min_lr: 0.007375 loss: 3.4280 (3.4354) weight_decay: 0.0500 (0.0500) time: 0.7163 data: 0.0004 max mem: 57114 Epoch: [70] [ 90/156] eta: 0:00:49 lr: 0.007373 min_lr: 0.007373 loss: 3.5417 (3.4551) weight_decay: 0.0500 (0.0500) time: 0.7136 data: 0.0004 max mem: 57114 Epoch: [70] [100/156] eta: 0:00:41 lr: 0.007372 min_lr: 0.007372 loss: 3.6126 (3.4725) weight_decay: 0.0500 (0.0500) time: 0.7066 data: 0.0004 max mem: 57114 Epoch: [70] [110/156] eta: 0:00:34 lr: 0.007370 min_lr: 0.007370 loss: 3.6173 (3.4810) weight_decay: 0.0500 (0.0500) time: 0.7084 data: 0.0004 max mem: 57114 Epoch: [70] [120/156] eta: 0:00:26 lr: 0.007368 min_lr: 0.007368 loss: 3.4245 (3.4690) weight_decay: 0.0500 (0.0500) time: 0.7027 data: 0.0003 max mem: 57114 Epoch: [70] [130/156] eta: 0:00:19 lr: 0.007367 min_lr: 0.007367 loss: 3.3337 (3.4682) weight_decay: 0.0500 (0.0500) time: 0.6994 data: 0.0008 max mem: 57114 Epoch: [70] [140/156] eta: 0:00:11 lr: 0.007365 min_lr: 0.007365 loss: 3.3634 (3.4615) weight_decay: 0.0500 (0.0500) time: 0.6898 data: 0.0007 max mem: 57114 Epoch: [70] [150/156] eta: 0:00:04 lr: 0.007364 min_lr: 0.007364 loss: 3.6305 (3.4732) weight_decay: 0.0500 (0.0500) time: 0.6829 data: 0.0001 max mem: 57114 Epoch: [70] [155/156] eta: 0:00:00 lr: 0.007363 min_lr: 0.007363 loss: 3.6302 (3.4648) weight_decay: 0.0500 (0.0500) time: 0.6843 data: 0.0001 max mem: 57114 Epoch: [70] Total time: 0:01:53 (0.7305 s / it) Averaged stats: lr: 0.007363 min_lr: 0.007363 loss: 3.6302 (3.4150) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8982 (0.8982) acc1: 81.7708 (81.7708) acc5: 95.8333 (95.8333) time: 2.0676 data: 1.8119 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1335 (1.0617) acc1: 76.5625 (74.9361) acc5: 94.7917 (92.9668) time: 0.5641 data: 0.3624 max mem: 57114 Test: Total time: 0:00:02 (0.5885 s / it) * Acc@1 74.405 Acc@5 92.485 loss 1.181 Accuracy of the model on the 50000 test images: 74.4% Max accuracy: 74.42% Test: [0/5] eta: 0:00:11 loss: 7.5719 (7.5719) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.2880 data: 2.0439 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.2782 (7.3271) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.3836) time: 0.6085 data: 0.4089 max mem: 57114 Test: Total time: 0:00:03 (0.6219 s / it) * Acc@1 0.118 Acc@5 0.521 loss 7.309 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [71] [ 0/156] eta: 0:12:41 lr: 0.007363 min_lr: 0.007363 loss: 3.7077 (3.7077) weight_decay: 0.0500 (0.0500) time: 4.8786 data: 3.7355 max mem: 57114 Epoch: [71] [ 10/156] eta: 0:02:42 lr: 0.007361 min_lr: 0.007361 loss: 3.5406 (3.3085) weight_decay: 0.0500 (0.0500) time: 1.1139 data: 0.3399 max mem: 57114 Epoch: [71] [ 20/156] eta: 0:02:06 lr: 0.007360 min_lr: 0.007360 loss: 3.5019 (3.3968) weight_decay: 0.0500 (0.0500) time: 0.7338 data: 0.0004 max mem: 57114 Epoch: [71] [ 30/156] eta: 0:01:47 lr: 0.007358 min_lr: 0.007358 loss: 3.3750 (3.3422) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0004 max mem: 57114 Epoch: [71] [ 40/156] eta: 0:01:35 lr: 0.007357 min_lr: 0.007357 loss: 3.3742 (3.3633) weight_decay: 0.0500 (0.0500) time: 0.7123 data: 0.0004 max mem: 57114 Epoch: [71] [ 50/156] eta: 0:01:25 lr: 0.007355 min_lr: 0.007355 loss: 3.4563 (3.3562) weight_decay: 0.0500 (0.0500) time: 0.7297 data: 0.0004 max mem: 57114 Epoch: [71] [ 60/156] eta: 0:01:15 lr: 0.007354 min_lr: 0.007354 loss: 3.4644 (3.3700) weight_decay: 0.0500 (0.0500) time: 0.7169 data: 0.0004 max mem: 57114 Epoch: [71] [ 70/156] eta: 0:01:07 lr: 0.007352 min_lr: 0.007352 loss: 3.6308 (3.4032) weight_decay: 0.0500 (0.0500) time: 0.7195 data: 0.0004 max mem: 57114 Epoch: [71] [ 80/156] eta: 0:00:58 lr: 0.007350 min_lr: 0.007350 loss: 3.5342 (3.3944) weight_decay: 0.0500 (0.0500) time: 0.7158 data: 0.0004 max mem: 57114 Epoch: [71] [ 90/156] eta: 0:00:50 lr: 0.007349 min_lr: 0.007349 loss: 3.3851 (3.3938) weight_decay: 0.0500 (0.0500) time: 0.6995 data: 0.0004 max mem: 57114 Epoch: [71] [100/156] eta: 0:00:42 lr: 0.007347 min_lr: 0.007347 loss: 3.3291 (3.3785) weight_decay: 0.0500 (0.0500) time: 0.6970 data: 0.0004 max mem: 57114 Epoch: [71] [110/156] eta: 0:00:34 lr: 0.007346 min_lr: 0.007346 loss: 3.3030 (3.3765) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0004 max mem: 57114 Epoch: [71] [120/156] eta: 0:00:26 lr: 0.007344 min_lr: 0.007344 loss: 3.5478 (3.3752) weight_decay: 0.0500 (0.0500) time: 0.7075 data: 0.0004 max mem: 57114 Epoch: [71] [130/156] eta: 0:00:19 lr: 0.007342 min_lr: 0.007342 loss: 3.5956 (3.3747) weight_decay: 0.0500 (0.0500) time: 0.6992 data: 0.0009 max mem: 57114 Epoch: [71] [140/156] eta: 0:00:11 lr: 0.007341 min_lr: 0.007341 loss: 3.6307 (3.3856) weight_decay: 0.0500 (0.0500) time: 0.6944 data: 0.0007 max mem: 57114 Epoch: [71] [150/156] eta: 0:00:04 lr: 0.007339 min_lr: 0.007339 loss: 3.3059 (3.3791) weight_decay: 0.0500 (0.0500) time: 0.6867 data: 0.0001 max mem: 57114 Epoch: [71] [155/156] eta: 0:00:00 lr: 0.007339 min_lr: 0.007339 loss: 3.6493 (3.3923) weight_decay: 0.0500 (0.0500) time: 0.6854 data: 0.0001 max mem: 57114 Epoch: [71] Total time: 0:01:55 (0.7376 s / it) Averaged stats: lr: 0.007339 min_lr: 0.007339 loss: 3.6493 (3.4021) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 1.0732 (1.0732) acc1: 80.7292 (80.7292) acc5: 95.3125 (95.3125) time: 2.2064 data: 1.9508 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.3182 (1.2362) acc1: 72.3958 (72.2506) acc5: 92.1875 (91.3044) time: 0.5919 data: 0.3902 max mem: 57114 Test: Total time: 0:00:03 (0.6179 s / it) * Acc@1 73.573 Acc@5 92.076 loss 1.292 Accuracy of the model on the 50000 test images: 73.6% Max accuracy: 74.42% Test: [0/5] eta: 0:00:11 loss: 7.5982 (7.5982) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.3788 data: 2.1353 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.2977 (7.3516) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.5115) time: 0.6264 data: 0.4271 max mem: 57114 Test: Total time: 0:00:03 (0.6365 s / it) * Acc@1 0.108 Acc@5 0.523 loss 7.331 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [72] [ 0/156] eta: 0:12:00 lr: 0.007338 min_lr: 0.007338 loss: 3.5625 (3.5625) weight_decay: 0.0500 (0.0500) time: 4.6203 data: 3.5817 max mem: 57114 Epoch: [72] [ 10/156] eta: 0:02:38 lr: 0.007337 min_lr: 0.007337 loss: 3.3503 (3.2530) weight_decay: 0.0500 (0.0500) time: 1.0870 data: 0.3259 max mem: 57114 Epoch: [72] [ 20/156] eta: 0:02:04 lr: 0.007335 min_lr: 0.007335 loss: 3.3098 (3.2698) weight_decay: 0.0500 (0.0500) time: 0.7316 data: 0.0003 max mem: 57114 Epoch: [72] [ 30/156] eta: 0:01:47 lr: 0.007334 min_lr: 0.007334 loss: 3.3779 (3.2812) weight_decay: 0.0500 (0.0500) time: 0.7253 data: 0.0003 max mem: 57114 Epoch: [72] [ 40/156] eta: 0:01:35 lr: 0.007332 min_lr: 0.007332 loss: 3.4339 (3.3173) weight_decay: 0.0500 (0.0500) time: 0.7247 data: 0.0003 max mem: 57114 Epoch: [72] [ 50/156] eta: 0:01:25 lr: 0.007330 min_lr: 0.007330 loss: 3.3904 (3.2606) weight_decay: 0.0500 (0.0500) time: 0.7282 data: 0.0004 max mem: 57114 Epoch: [72] [ 60/156] eta: 0:01:15 lr: 0.007329 min_lr: 0.007329 loss: 3.3904 (3.3106) weight_decay: 0.0500 (0.0500) time: 0.7259 data: 0.0004 max mem: 57114 Epoch: [72] [ 70/156] eta: 0:01:06 lr: 0.007327 min_lr: 0.007327 loss: 3.6223 (3.3461) weight_decay: 0.0500 (0.0500) time: 0.7121 data: 0.0004 max mem: 57114 Epoch: [72] [ 80/156] eta: 0:00:58 lr: 0.007326 min_lr: 0.007326 loss: 3.6267 (3.3682) weight_decay: 0.0500 (0.0500) time: 0.7157 data: 0.0004 max mem: 57114 Epoch: [72] [ 90/156] eta: 0:00:50 lr: 0.007324 min_lr: 0.007324 loss: 3.5976 (3.3766) weight_decay: 0.0500 (0.0500) time: 0.7166 data: 0.0004 max mem: 57114 Epoch: [72] [100/156] eta: 0:00:42 lr: 0.007322 min_lr: 0.007322 loss: 3.5473 (3.3821) weight_decay: 0.0500 (0.0500) time: 0.7069 data: 0.0004 max mem: 57114 Epoch: [72] [110/156] eta: 0:00:34 lr: 0.007321 min_lr: 0.007321 loss: 3.6312 (3.4062) weight_decay: 0.0500 (0.0500) time: 0.7079 data: 0.0003 max mem: 57114 Epoch: [72] [120/156] eta: 0:00:27 lr: 0.007319 min_lr: 0.007319 loss: 3.7051 (3.4159) weight_decay: 0.0500 (0.0500) time: 0.7035 data: 0.0004 max mem: 57114 Epoch: [72] [130/156] eta: 0:00:19 lr: 0.007318 min_lr: 0.007318 loss: 3.5175 (3.4122) weight_decay: 0.0500 (0.0500) time: 0.6930 data: 0.0009 max mem: 57114 Epoch: [72] [140/156] eta: 0:00:11 lr: 0.007316 min_lr: 0.007316 loss: 3.5364 (3.4256) weight_decay: 0.0500 (0.0500) time: 0.6839 data: 0.0008 max mem: 57114 Epoch: [72] [150/156] eta: 0:00:04 lr: 0.007314 min_lr: 0.007314 loss: 3.5289 (3.4255) weight_decay: 0.0500 (0.0500) time: 0.6890 data: 0.0001 max mem: 57114 Epoch: [72] [155/156] eta: 0:00:00 lr: 0.007314 min_lr: 0.007314 loss: 3.4713 (3.4218) weight_decay: 0.0500 (0.0500) time: 0.6897 data: 0.0001 max mem: 57114 Epoch: [72] Total time: 0:01:55 (0.7383 s / it) Averaged stats: lr: 0.007314 min_lr: 0.007314 loss: 3.4713 (3.4059) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8885 (0.8885) acc1: 82.2917 (82.2917) acc5: 95.8333 (95.8333) time: 2.0480 data: 1.7914 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0892 (1.0384) acc1: 77.0833 (76.5985) acc5: 94.7917 (93.2225) time: 0.5604 data: 0.3584 max mem: 57114 Test: Total time: 0:00:02 (0.5816 s / it) * Acc@1 74.944 Acc@5 92.817 loss 1.149 Accuracy of the model on the 50000 test images: 74.9% Max accuracy: 74.94% Test: [0/5] eta: 0:00:10 loss: 7.6266 (7.6266) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.0085 data: 1.7650 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.3222 (7.3782) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.5115) time: 0.5523 data: 0.3531 max mem: 57114 Test: Total time: 0:00:02 (0.5625 s / it) * Acc@1 0.108 Acc@5 0.521 loss 7.355 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [73] [ 0/156] eta: 0:12:50 lr: 0.007313 min_lr: 0.007313 loss: 3.6480 (3.6480) weight_decay: 0.0500 (0.0500) time: 4.9399 data: 3.9538 max mem: 57114 Epoch: [73] [ 10/156] eta: 0:02:46 lr: 0.007312 min_lr: 0.007312 loss: 3.5674 (3.3805) weight_decay: 0.0500 (0.0500) time: 1.1388 data: 0.3597 max mem: 57114 Epoch: [73] [ 20/156] eta: 0:02:08 lr: 0.007310 min_lr: 0.007310 loss: 3.4584 (3.3068) weight_decay: 0.0500 (0.0500) time: 0.7472 data: 0.0003 max mem: 57114 Epoch: [73] [ 30/156] eta: 0:01:50 lr: 0.007309 min_lr: 0.007309 loss: 3.6368 (3.4568) weight_decay: 0.0500 (0.0500) time: 0.7341 data: 0.0003 max mem: 57114 Epoch: [73] [ 40/156] eta: 0:01:37 lr: 0.007307 min_lr: 0.007307 loss: 3.6366 (3.4226) weight_decay: 0.0500 (0.0500) time: 0.7339 data: 0.0003 max mem: 57114 Epoch: [73] [ 50/156] eta: 0:01:26 lr: 0.007305 min_lr: 0.007305 loss: 3.4884 (3.4438) weight_decay: 0.0500 (0.0500) time: 0.7276 data: 0.0003 max mem: 57114 Epoch: [73] [ 60/156] eta: 0:01:17 lr: 0.007304 min_lr: 0.007304 loss: 3.5102 (3.4379) weight_decay: 0.0500 (0.0500) time: 0.7277 data: 0.0004 max mem: 57114 Epoch: [73] [ 70/156] eta: 0:01:08 lr: 0.007302 min_lr: 0.007302 loss: 3.5508 (3.4525) weight_decay: 0.0500 (0.0500) time: 0.7341 data: 0.0004 max mem: 57114 Epoch: [73] [ 80/156] eta: 0:00:59 lr: 0.007300 min_lr: 0.007300 loss: 3.5908 (3.4629) weight_decay: 0.0500 (0.0500) time: 0.7193 data: 0.0004 max mem: 57114 Epoch: [73] [ 90/156] eta: 0:00:51 lr: 0.007299 min_lr: 0.007299 loss: 3.5085 (3.4442) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [73] [100/156] eta: 0:00:43 lr: 0.007297 min_lr: 0.007297 loss: 3.6071 (3.4639) weight_decay: 0.0500 (0.0500) time: 0.7047 data: 0.0004 max mem: 57114 Epoch: [73] [110/156] eta: 0:00:35 lr: 0.007296 min_lr: 0.007296 loss: 3.7296 (3.4842) weight_decay: 0.0500 (0.0500) time: 0.6992 data: 0.0003 max mem: 57114 Epoch: [73] [120/156] eta: 0:00:27 lr: 0.007294 min_lr: 0.007294 loss: 3.7604 (3.4984) weight_decay: 0.0500 (0.0500) time: 0.7053 data: 0.0003 max mem: 57114 Epoch: [73] [130/156] eta: 0:00:19 lr: 0.007292 min_lr: 0.007292 loss: 3.5359 (3.4939) weight_decay: 0.0500 (0.0500) time: 0.7114 data: 0.0008 max mem: 57114 Epoch: [73] [140/156] eta: 0:00:11 lr: 0.007291 min_lr: 0.007291 loss: 3.4196 (3.4798) weight_decay: 0.0500 (0.0500) time: 0.7010 data: 0.0007 max mem: 57114 Epoch: [73] [150/156] eta: 0:00:04 lr: 0.007289 min_lr: 0.007289 loss: 3.4097 (3.4704) weight_decay: 0.0500 (0.0500) time: 0.6851 data: 0.0001 max mem: 57114 Epoch: [73] [155/156] eta: 0:00:00 lr: 0.007288 min_lr: 0.007288 loss: 3.4097 (3.4671) weight_decay: 0.0500 (0.0500) time: 0.6812 data: 0.0001 max mem: 57114 Epoch: [73] Total time: 0:01:56 (0.7454 s / it) Averaged stats: lr: 0.007288 min_lr: 0.007288 loss: 3.4097 (3.4037) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0045 (1.0045) acc1: 83.3333 (83.3333) acc5: 96.3542 (96.3542) time: 2.0478 data: 1.7922 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1430 (1.1016) acc1: 78.1250 (76.9821) acc5: 95.3125 (93.2225) time: 0.5602 data: 0.3585 max mem: 57114 Test: Total time: 0:00:02 (0.5799 s / it) * Acc@1 74.650 Acc@5 92.795 loss 1.205 Accuracy of the model on the 50000 test images: 74.7% Max accuracy: 74.94% Test: [0/5] eta: 0:00:11 loss: 7.6553 (7.6553) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.3239 data: 2.0806 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.3499 (7.4063) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.5115) time: 0.6154 data: 0.4162 max mem: 57114 Test: Total time: 0:00:03 (0.6282 s / it) * Acc@1 0.108 Acc@5 0.529 loss 7.381 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [74] [ 0/156] eta: 0:12:24 lr: 0.007288 min_lr: 0.007288 loss: 3.8801 (3.8801) weight_decay: 0.0500 (0.0500) time: 4.7700 data: 3.7131 max mem: 57114 Epoch: [74] [ 10/156] eta: 0:02:38 lr: 0.007286 min_lr: 0.007286 loss: 3.6656 (3.5517) weight_decay: 0.0500 (0.0500) time: 1.0866 data: 0.3378 max mem: 57114 Epoch: [74] [ 20/156] eta: 0:02:04 lr: 0.007285 min_lr: 0.007285 loss: 3.4759 (3.3748) weight_decay: 0.0500 (0.0500) time: 0.7231 data: 0.0003 max mem: 57114 Epoch: [74] [ 30/156] eta: 0:01:47 lr: 0.007283 min_lr: 0.007283 loss: 3.2539 (3.3652) weight_decay: 0.0500 (0.0500) time: 0.7250 data: 0.0003 max mem: 57114 Epoch: [74] [ 40/156] eta: 0:01:34 lr: 0.007282 min_lr: 0.007282 loss: 3.2014 (3.2893) weight_decay: 0.0500 (0.0500) time: 0.7162 data: 0.0003 max mem: 57114 Epoch: [74] [ 50/156] eta: 0:01:24 lr: 0.007280 min_lr: 0.007280 loss: 3.2729 (3.2861) weight_decay: 0.0500 (0.0500) time: 0.7186 data: 0.0003 max mem: 57114 Epoch: [74] [ 60/156] eta: 0:01:15 lr: 0.007278 min_lr: 0.007278 loss: 3.3757 (3.3151) weight_decay: 0.0500 (0.0500) time: 0.7313 data: 0.0004 max mem: 57114 Epoch: [74] [ 70/156] eta: 0:01:06 lr: 0.007277 min_lr: 0.007277 loss: 3.4843 (3.3282) weight_decay: 0.0500 (0.0500) time: 0.7193 data: 0.0004 max mem: 57114 Epoch: [74] [ 80/156] eta: 0:00:58 lr: 0.007275 min_lr: 0.007275 loss: 3.2850 (3.3170) weight_decay: 0.0500 (0.0500) time: 0.7128 data: 0.0003 max mem: 57114 Epoch: [74] [ 90/156] eta: 0:00:50 lr: 0.007273 min_lr: 0.007273 loss: 3.3134 (3.3234) weight_decay: 0.0500 (0.0500) time: 0.7202 data: 0.0003 max mem: 57114 Epoch: [74] [100/156] eta: 0:00:42 lr: 0.007272 min_lr: 0.007272 loss: 3.3263 (3.3297) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [74] [110/156] eta: 0:00:34 lr: 0.007270 min_lr: 0.007270 loss: 3.4845 (3.3192) weight_decay: 0.0500 (0.0500) time: 0.7038 data: 0.0004 max mem: 57114 Epoch: [74] [120/156] eta: 0:00:26 lr: 0.007268 min_lr: 0.007268 loss: 3.6721 (3.3529) weight_decay: 0.0500 (0.0500) time: 0.6988 data: 0.0004 max mem: 57114 Epoch: [74] [130/156] eta: 0:00:19 lr: 0.007267 min_lr: 0.007267 loss: 3.4697 (3.3452) weight_decay: 0.0500 (0.0500) time: 0.6902 data: 0.0009 max mem: 57114 Epoch: [74] [140/156] eta: 0:00:11 lr: 0.007265 min_lr: 0.007265 loss: 3.3989 (3.3475) weight_decay: 0.0500 (0.0500) time: 0.6875 data: 0.0007 max mem: 57114 Epoch: [74] [150/156] eta: 0:00:04 lr: 0.007263 min_lr: 0.007263 loss: 3.4621 (3.3573) weight_decay: 0.0500 (0.0500) time: 0.6858 data: 0.0001 max mem: 57114 Epoch: [74] [155/156] eta: 0:00:00 lr: 0.007263 min_lr: 0.007263 loss: 3.4621 (3.3633) weight_decay: 0.0500 (0.0500) time: 0.6847 data: 0.0001 max mem: 57114 Epoch: [74] Total time: 0:01:54 (0.7370 s / it) Averaged stats: lr: 0.007263 min_lr: 0.007263 loss: 3.4621 (3.3994) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0235 (1.0235) acc1: 81.2500 (81.2500) acc5: 95.8333 (95.8333) time: 2.0362 data: 1.7802 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2162 (1.1961) acc1: 76.0417 (74.6803) acc5: 95.8333 (93.2225) time: 0.5580 data: 0.3561 max mem: 57114 Test: Total time: 0:00:02 (0.5795 s / it) * Acc@1 74.516 Acc@5 92.435 loss 1.281 Accuracy of the model on the 50000 test images: 74.5% Max accuracy: 74.94% Test: [0/5] eta: 0:00:11 loss: 7.6854 (7.6854) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.3188 data: 2.0751 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.3809 (7.4360) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.6394) time: 0.6146 data: 0.4151 max mem: 57114 Test: Total time: 0:00:03 (0.6272 s / it) * Acc@1 0.108 Acc@5 0.537 loss 7.408 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [75] [ 0/156] eta: 0:12:08 lr: 0.007262 min_lr: 0.007262 loss: 3.6863 (3.6863) weight_decay: 0.0500 (0.0500) time: 4.6730 data: 3.4636 max mem: 57114 Epoch: [75] [ 10/156] eta: 0:02:41 lr: 0.007261 min_lr: 0.007261 loss: 3.5516 (3.4425) weight_decay: 0.0500 (0.0500) time: 1.1063 data: 0.3151 max mem: 57114 Epoch: [75] [ 20/156] eta: 0:02:04 lr: 0.007259 min_lr: 0.007259 loss: 3.3416 (3.2228) weight_decay: 0.0500 (0.0500) time: 0.7305 data: 0.0003 max mem: 57114 Epoch: [75] [ 30/156] eta: 0:01:46 lr: 0.007257 min_lr: 0.007257 loss: 3.2483 (3.2326) weight_decay: 0.0500 (0.0500) time: 0.7043 data: 0.0004 max mem: 57114 Epoch: [75] [ 40/156] eta: 0:01:35 lr: 0.007256 min_lr: 0.007256 loss: 3.3003 (3.2540) weight_decay: 0.0500 (0.0500) time: 0.7242 data: 0.0003 max mem: 57114 Epoch: [75] [ 50/156] eta: 0:01:26 lr: 0.007254 min_lr: 0.007254 loss: 3.3346 (3.2359) weight_decay: 0.0500 (0.0500) time: 0.7567 data: 0.0004 max mem: 57114 Epoch: [75] [ 60/156] eta: 0:01:16 lr: 0.007252 min_lr: 0.007252 loss: 3.3492 (3.2634) weight_decay: 0.0500 (0.0500) time: 0.7449 data: 0.0004 max mem: 57114 Epoch: [75] [ 70/156] eta: 0:01:07 lr: 0.007251 min_lr: 0.007251 loss: 3.4983 (3.2932) weight_decay: 0.0500 (0.0500) time: 0.7175 data: 0.0004 max mem: 57114 Epoch: [75] [ 80/156] eta: 0:00:58 lr: 0.007249 min_lr: 0.007249 loss: 3.4983 (3.3191) weight_decay: 0.0500 (0.0500) time: 0.7078 data: 0.0004 max mem: 57114 Epoch: [75] [ 90/156] eta: 0:00:50 lr: 0.007247 min_lr: 0.007247 loss: 3.3846 (3.3201) weight_decay: 0.0500 (0.0500) time: 0.7059 data: 0.0004 max mem: 57114 Epoch: [75] [100/156] eta: 0:00:42 lr: 0.007246 min_lr: 0.007246 loss: 3.2806 (3.3162) weight_decay: 0.0500 (0.0500) time: 0.7028 data: 0.0004 max mem: 57114 Epoch: [75] [110/156] eta: 0:00:34 lr: 0.007244 min_lr: 0.007244 loss: 3.4372 (3.3453) weight_decay: 0.0500 (0.0500) time: 0.7067 data: 0.0004 max mem: 57114 Epoch: [75] [120/156] eta: 0:00:27 lr: 0.007242 min_lr: 0.007242 loss: 3.6445 (3.3519) weight_decay: 0.0500 (0.0500) time: 0.7088 data: 0.0004 max mem: 57114 Epoch: [75] [130/156] eta: 0:00:19 lr: 0.007241 min_lr: 0.007241 loss: 3.5218 (3.3533) weight_decay: 0.0500 (0.0500) time: 0.7016 data: 0.0008 max mem: 57114 Epoch: [75] [140/156] eta: 0:00:11 lr: 0.007239 min_lr: 0.007239 loss: 3.3644 (3.3527) weight_decay: 0.0500 (0.0500) time: 0.6920 data: 0.0007 max mem: 57114 Epoch: [75] [150/156] eta: 0:00:04 lr: 0.007237 min_lr: 0.007237 loss: 3.5353 (3.3674) weight_decay: 0.0500 (0.0500) time: 0.7037 data: 0.0001 max mem: 57114 Epoch: [75] [155/156] eta: 0:00:00 lr: 0.007236 min_lr: 0.007236 loss: 3.4918 (3.3643) weight_decay: 0.0500 (0.0500) time: 0.7029 data: 0.0001 max mem: 57114 Epoch: [75] Total time: 0:01:55 (0.7423 s / it) Averaged stats: lr: 0.007236 min_lr: 0.007236 loss: 3.4918 (3.3947) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.1273 (1.1273) acc1: 80.7292 (80.7292) acc5: 93.7500 (93.7500) time: 2.0659 data: 1.8101 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2137 (1.2649) acc1: 79.1667 (75.7033) acc5: 93.7500 (92.7110) time: 0.5639 data: 0.3621 max mem: 57114 Test: Total time: 0:00:02 (0.5865 s / it) * Acc@1 74.500 Acc@5 92.431 loss 1.346 Accuracy of the model on the 50000 test images: 74.5% Max accuracy: 74.94% Test: [0/5] eta: 0:00:11 loss: 7.7223 (7.7223) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.2899 data: 2.0465 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.4254 (7.4734) acc1: 0.0000 (0.1279) acc5: 0.0000 (0.6394) time: 0.6087 data: 0.4094 max mem: 57114 Test: Total time: 0:00:03 (0.6202 s / it) * Acc@1 0.108 Acc@5 0.591 loss 7.442 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [76] [ 0/156] eta: 0:12:45 lr: 0.007236 min_lr: 0.007236 loss: 2.4127 (2.4127) weight_decay: 0.0500 (0.0500) time: 4.9078 data: 2.7365 max mem: 57114 Epoch: [76] [ 10/156] eta: 0:02:42 lr: 0.007234 min_lr: 0.007234 loss: 3.2767 (3.1805) weight_decay: 0.0500 (0.0500) time: 1.1154 data: 0.2491 max mem: 57114 Epoch: [76] [ 20/156] eta: 0:02:07 lr: 0.007233 min_lr: 0.007233 loss: 3.3795 (3.2597) weight_decay: 0.0500 (0.0500) time: 0.7393 data: 0.0004 max mem: 57114 Epoch: [76] [ 30/156] eta: 0:01:49 lr: 0.007231 min_lr: 0.007231 loss: 3.5217 (3.2492) weight_decay: 0.0500 (0.0500) time: 0.7343 data: 0.0004 max mem: 57114 Epoch: [76] [ 40/156] eta: 0:01:37 lr: 0.007229 min_lr: 0.007229 loss: 3.5803 (3.2838) weight_decay: 0.0500 (0.0500) time: 0.7380 data: 0.0004 max mem: 57114 Epoch: [76] [ 50/156] eta: 0:01:26 lr: 0.007228 min_lr: 0.007228 loss: 3.5872 (3.3316) weight_decay: 0.0500 (0.0500) time: 0.7360 data: 0.0004 max mem: 57114 Epoch: [76] [ 60/156] eta: 0:01:16 lr: 0.007226 min_lr: 0.007226 loss: 3.5278 (3.3447) weight_decay: 0.0500 (0.0500) time: 0.7196 data: 0.0004 max mem: 57114 Epoch: [76] [ 70/156] eta: 0:01:07 lr: 0.007224 min_lr: 0.007224 loss: 3.4442 (3.3722) weight_decay: 0.0500 (0.0500) time: 0.7194 data: 0.0003 max mem: 57114 Epoch: [76] [ 80/156] eta: 0:00:59 lr: 0.007223 min_lr: 0.007223 loss: 3.3559 (3.3448) weight_decay: 0.0500 (0.0500) time: 0.7106 data: 0.0004 max mem: 57114 Epoch: [76] [ 90/156] eta: 0:00:50 lr: 0.007221 min_lr: 0.007221 loss: 3.3498 (3.3556) weight_decay: 0.0500 (0.0500) time: 0.7025 data: 0.0004 max mem: 57114 Epoch: [76] [100/156] eta: 0:00:42 lr: 0.007219 min_lr: 0.007219 loss: 3.5026 (3.3470) weight_decay: 0.0500 (0.0500) time: 0.7091 data: 0.0003 max mem: 57114 Epoch: [76] [110/156] eta: 0:00:34 lr: 0.007217 min_lr: 0.007217 loss: 3.4909 (3.3687) weight_decay: 0.0500 (0.0500) time: 0.7117 data: 0.0004 max mem: 57114 Epoch: [76] [120/156] eta: 0:00:27 lr: 0.007216 min_lr: 0.007216 loss: 3.5152 (3.3806) weight_decay: 0.0500 (0.0500) time: 0.7054 data: 0.0004 max mem: 57114 Epoch: [76] [130/156] eta: 0:00:19 lr: 0.007214 min_lr: 0.007214 loss: 3.3381 (3.3651) weight_decay: 0.0500 (0.0500) time: 0.6992 data: 0.0008 max mem: 57114 Epoch: [76] [140/156] eta: 0:00:11 lr: 0.007212 min_lr: 0.007212 loss: 3.1609 (3.3624) weight_decay: 0.0500 (0.0500) time: 0.6888 data: 0.0007 max mem: 57114 Epoch: [76] [150/156] eta: 0:00:04 lr: 0.007211 min_lr: 0.007211 loss: 3.6188 (3.3742) weight_decay: 0.0500 (0.0500) time: 0.6803 data: 0.0001 max mem: 57114 Epoch: [76] [155/156] eta: 0:00:00 lr: 0.007210 min_lr: 0.007210 loss: 3.5637 (3.3735) weight_decay: 0.0500 (0.0500) time: 0.6823 data: 0.0001 max mem: 57114 Epoch: [76] Total time: 0:01:55 (0.7419 s / it) Averaged stats: lr: 0.007210 min_lr: 0.007210 loss: 3.5637 (3.3908) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9252 (0.9252) acc1: 80.7292 (80.7292) acc5: 96.8750 (96.8750) time: 2.0993 data: 1.8435 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1567 (1.0841) acc1: 74.4792 (74.0409) acc5: 96.8750 (93.2225) time: 0.5706 data: 0.3688 max mem: 57114 Test: Total time: 0:00:02 (0.5933 s / it) * Acc@1 73.881 Acc@5 92.223 loss 1.197 Accuracy of the model on the 50000 test images: 73.9% Max accuracy: 74.94% Test: [0/5] eta: 0:00:11 loss: 7.7666 (7.7666) acc1: 0.0000 (0.0000) acc5: 0.5208 (0.5208) time: 2.3384 data: 2.0950 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.4726 (7.5157) acc1: 0.0000 (0.1279) acc5: 0.5208 (0.8951) time: 0.6183 data: 0.4191 max mem: 57114 Test: Total time: 0:00:03 (0.6292 s / it) * Acc@1 0.108 Acc@5 0.601 loss 7.482 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [77] [ 0/156] eta: 0:12:49 lr: 0.007210 min_lr: 0.007210 loss: 3.0532 (3.0532) weight_decay: 0.0500 (0.0500) time: 4.9306 data: 3.7539 max mem: 57114 Epoch: [77] [ 10/156] eta: 0:02:44 lr: 0.007208 min_lr: 0.007208 loss: 3.3726 (3.3054) weight_decay: 0.0500 (0.0500) time: 1.1280 data: 0.3416 max mem: 57114 Epoch: [77] [ 20/156] eta: 0:02:07 lr: 0.007206 min_lr: 0.007206 loss: 3.3726 (3.3355) weight_decay: 0.0500 (0.0500) time: 0.7373 data: 0.0004 max mem: 57114 Epoch: [77] [ 30/156] eta: 0:01:49 lr: 0.007204 min_lr: 0.007204 loss: 3.5579 (3.4069) weight_decay: 0.0500 (0.0500) time: 0.7265 data: 0.0004 max mem: 57114 Epoch: [77] [ 40/156] eta: 0:01:37 lr: 0.007203 min_lr: 0.007203 loss: 3.5579 (3.3825) weight_decay: 0.0500 (0.0500) time: 0.7318 data: 0.0004 max mem: 57114 Epoch: [77] [ 50/156] eta: 0:01:26 lr: 0.007201 min_lr: 0.007201 loss: 3.5207 (3.4121) weight_decay: 0.0500 (0.0500) time: 0.7259 data: 0.0004 max mem: 57114 Epoch: [77] [ 60/156] eta: 0:01:16 lr: 0.007199 min_lr: 0.007199 loss: 3.6478 (3.4324) weight_decay: 0.0500 (0.0500) time: 0.7119 data: 0.0004 max mem: 57114 Epoch: [77] [ 70/156] eta: 0:01:07 lr: 0.007198 min_lr: 0.007198 loss: 3.4340 (3.4129) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [77] [ 80/156] eta: 0:00:59 lr: 0.007196 min_lr: 0.007196 loss: 3.2723 (3.3916) weight_decay: 0.0500 (0.0500) time: 0.7187 data: 0.0004 max mem: 57114 Epoch: [77] [ 90/156] eta: 0:00:50 lr: 0.007194 min_lr: 0.007194 loss: 3.2001 (3.3545) weight_decay: 0.0500 (0.0500) time: 0.7098 data: 0.0004 max mem: 57114 Epoch: [77] [100/156] eta: 0:00:42 lr: 0.007192 min_lr: 0.007192 loss: 3.2062 (3.3555) weight_decay: 0.0500 (0.0500) time: 0.7079 data: 0.0004 max mem: 57114 Epoch: [77] [110/156] eta: 0:00:34 lr: 0.007191 min_lr: 0.007191 loss: 3.3032 (3.3529) weight_decay: 0.0500 (0.0500) time: 0.7077 data: 0.0004 max mem: 57114 Epoch: [77] [120/156] eta: 0:00:27 lr: 0.007189 min_lr: 0.007189 loss: 3.4047 (3.3459) weight_decay: 0.0500 (0.0500) time: 0.7034 data: 0.0004 max mem: 57114 Epoch: [77] [130/156] eta: 0:00:19 lr: 0.007187 min_lr: 0.007187 loss: 3.2840 (3.3279) weight_decay: 0.0500 (0.0500) time: 0.6975 data: 0.0008 max mem: 57114 Epoch: [77] [140/156] eta: 0:00:11 lr: 0.007185 min_lr: 0.007185 loss: 3.2165 (3.3269) weight_decay: 0.0500 (0.0500) time: 0.6906 data: 0.0007 max mem: 57114 Epoch: [77] [150/156] eta: 0:00:04 lr: 0.007184 min_lr: 0.007184 loss: 3.4293 (3.3313) weight_decay: 0.0500 (0.0500) time: 0.6947 data: 0.0001 max mem: 57114 Epoch: [77] [155/156] eta: 0:00:00 lr: 0.007183 min_lr: 0.007183 loss: 3.4671 (3.3353) weight_decay: 0.0500 (0.0500) time: 0.6844 data: 0.0001 max mem: 57114 Epoch: [77] Total time: 0:01:55 (0.7408 s / it) Averaged stats: lr: 0.007183 min_lr: 0.007183 loss: 3.4671 (3.3810) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0738 (1.0738) acc1: 78.6458 (78.6458) acc5: 94.7917 (94.7917) time: 2.0595 data: 1.8026 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1493 (1.1800) acc1: 78.1250 (73.7852) acc5: 94.7917 (93.2225) time: 0.5626 data: 0.3606 max mem: 57114 Test: Total time: 0:00:02 (0.5820 s / it) * Acc@1 73.308 Acc@5 92.040 loss 1.265 Accuracy of the model on the 50000 test images: 73.3% Max accuracy: 74.94% Test: [0/5] eta: 0:00:11 loss: 7.8056 (7.8056) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.2946 data: 2.0510 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.5318 (7.5613) acc1: 0.0000 (0.1279) acc5: 1.0417 (1.1509) time: 0.6096 data: 0.4103 max mem: 57114 Test: Total time: 0:00:03 (0.6234 s / it) * Acc@1 0.106 Acc@5 0.685 loss 7.523 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [78] [ 0/156] eta: 0:12:27 lr: 0.007183 min_lr: 0.007183 loss: 3.8536 (3.8536) weight_decay: 0.0500 (0.0500) time: 4.7910 data: 3.2710 max mem: 57114 Epoch: [78] [ 10/156] eta: 0:02:41 lr: 0.007181 min_lr: 0.007181 loss: 3.3207 (3.2888) weight_decay: 0.0500 (0.0500) time: 1.1055 data: 0.2977 max mem: 57114 Epoch: [78] [ 20/156] eta: 0:02:04 lr: 0.007179 min_lr: 0.007179 loss: 3.1133 (3.1960) weight_decay: 0.0500 (0.0500) time: 0.7251 data: 0.0003 max mem: 57114 Epoch: [78] [ 30/156] eta: 0:01:47 lr: 0.007177 min_lr: 0.007177 loss: 3.0731 (3.1808) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0003 max mem: 57114 Epoch: [78] [ 40/156] eta: 0:01:35 lr: 0.007176 min_lr: 0.007176 loss: 3.1950 (3.1856) weight_decay: 0.0500 (0.0500) time: 0.7241 data: 0.0004 max mem: 57114 Epoch: [78] [ 50/156] eta: 0:01:25 lr: 0.007174 min_lr: 0.007174 loss: 3.3601 (3.2145) weight_decay: 0.0500 (0.0500) time: 0.7394 data: 0.0004 max mem: 57114 Epoch: [78] [ 60/156] eta: 0:01:16 lr: 0.007172 min_lr: 0.007172 loss: 3.2530 (3.2339) weight_decay: 0.0500 (0.0500) time: 0.7285 data: 0.0004 max mem: 57114 Epoch: [78] [ 70/156] eta: 0:01:07 lr: 0.007170 min_lr: 0.007170 loss: 3.2944 (3.2625) weight_decay: 0.0500 (0.0500) time: 0.7123 data: 0.0004 max mem: 57114 Epoch: [78] [ 80/156] eta: 0:00:58 lr: 0.007169 min_lr: 0.007169 loss: 3.2944 (3.2754) weight_decay: 0.0500 (0.0500) time: 0.7154 data: 0.0003 max mem: 57114 Epoch: [78] [ 90/156] eta: 0:00:50 lr: 0.007167 min_lr: 0.007167 loss: 3.3977 (3.2919) weight_decay: 0.0500 (0.0500) time: 0.7125 data: 0.0003 max mem: 57114 Epoch: [78] [100/156] eta: 0:00:42 lr: 0.007165 min_lr: 0.007165 loss: 3.4754 (3.3115) weight_decay: 0.0500 (0.0500) time: 0.7035 data: 0.0004 max mem: 57114 Epoch: [78] [110/156] eta: 0:00:34 lr: 0.007163 min_lr: 0.007163 loss: 3.4334 (3.3001) weight_decay: 0.0500 (0.0500) time: 0.7075 data: 0.0004 max mem: 57114 Epoch: [78] [120/156] eta: 0:00:27 lr: 0.007162 min_lr: 0.007162 loss: 3.5088 (3.3266) weight_decay: 0.0500 (0.0500) time: 0.7105 data: 0.0004 max mem: 57114 Epoch: [78] [130/156] eta: 0:00:19 lr: 0.007160 min_lr: 0.007160 loss: 3.5965 (3.3429) weight_decay: 0.0500 (0.0500) time: 0.6987 data: 0.0007 max mem: 57114 Epoch: [78] [140/156] eta: 0:00:11 lr: 0.007158 min_lr: 0.007158 loss: 3.5551 (3.3464) weight_decay: 0.0500 (0.0500) time: 0.6882 data: 0.0006 max mem: 57114 Epoch: [78] [150/156] eta: 0:00:04 lr: 0.007156 min_lr: 0.007156 loss: 3.3650 (3.3485) weight_decay: 0.0500 (0.0500) time: 0.6835 data: 0.0001 max mem: 57114 Epoch: [78] [155/156] eta: 0:00:00 lr: 0.007155 min_lr: 0.007155 loss: 3.3841 (3.3547) weight_decay: 0.0500 (0.0500) time: 0.6789 data: 0.0001 max mem: 57114 Epoch: [78] Total time: 0:01:55 (0.7386 s / it) Averaged stats: lr: 0.007155 min_lr: 0.007155 loss: 3.3841 (3.3742) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.1189 (1.1189) acc1: 80.2083 (80.2083) acc5: 95.3125 (95.3125) time: 2.1335 data: 1.8773 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2619 (1.2465) acc1: 77.0833 (73.9130) acc5: 94.7917 (91.6880) time: 0.5773 data: 0.3755 max mem: 57114 Test: Total time: 0:00:03 (0.6019 s / it) * Acc@1 72.542 Acc@5 91.536 loss 1.333 Accuracy of the model on the 50000 test images: 72.5% Max accuracy: 74.94% Test: [0/5] eta: 0:00:11 loss: 7.8473 (7.8473) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.3236 data: 2.0797 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.5922 (7.6094) acc1: 0.0000 (0.1279) acc5: 1.0417 (1.0230) time: 0.6155 data: 0.4160 max mem: 57114 Test: Total time: 0:00:03 (0.6300 s / it) * Acc@1 0.106 Acc@5 0.727 loss 7.567 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [79] [ 0/156] eta: 0:11:56 lr: 0.007155 min_lr: 0.007155 loss: 3.7084 (3.7084) weight_decay: 0.0500 (0.0500) time: 4.5956 data: 3.3689 max mem: 57114 Epoch: [79] [ 10/156] eta: 0:02:39 lr: 0.007153 min_lr: 0.007153 loss: 3.2150 (3.1123) weight_decay: 0.0500 (0.0500) time: 1.0918 data: 0.3065 max mem: 57114 Epoch: [79] [ 20/156] eta: 0:02:05 lr: 0.007152 min_lr: 0.007152 loss: 3.5602 (3.3778) weight_decay: 0.0500 (0.0500) time: 0.7426 data: 0.0003 max mem: 57114 Epoch: [79] [ 30/156] eta: 0:01:50 lr: 0.007150 min_lr: 0.007150 loss: 3.4743 (3.3134) weight_decay: 0.0500 (0.0500) time: 0.7618 data: 0.0003 max mem: 57114 Epoch: [79] [ 40/156] eta: 0:01:38 lr: 0.007148 min_lr: 0.007148 loss: 3.1619 (3.2799) weight_decay: 0.0500 (0.0500) time: 0.7705 data: 0.0005 max mem: 57114 Epoch: [79] [ 50/156] eta: 0:01:28 lr: 0.007146 min_lr: 0.007146 loss: 3.0429 (3.2369) weight_decay: 0.0500 (0.0500) time: 0.7594 data: 0.0007 max mem: 57114 Epoch: [79] [ 60/156] eta: 0:01:18 lr: 0.007145 min_lr: 0.007145 loss: 3.4668 (3.2661) weight_decay: 0.0500 (0.0500) time: 0.7442 data: 0.0006 max mem: 57114 Epoch: [79] [ 70/156] eta: 0:01:09 lr: 0.007143 min_lr: 0.007143 loss: 3.4685 (3.2710) weight_decay: 0.0500 (0.0500) time: 0.7433 data: 0.0005 max mem: 57114 Epoch: [79] [ 80/156] eta: 0:01:00 lr: 0.007141 min_lr: 0.007141 loss: 3.2430 (3.2701) weight_decay: 0.0500 (0.0500) time: 0.7507 data: 0.0004 max mem: 57114 Epoch: [79] [ 90/156] eta: 0:00:52 lr: 0.007139 min_lr: 0.007139 loss: 3.3776 (3.2866) weight_decay: 0.0500 (0.0500) time: 0.7369 data: 0.0004 max mem: 57114 Epoch: [79] [100/156] eta: 0:00:43 lr: 0.007137 min_lr: 0.007137 loss: 3.4196 (3.2867) weight_decay: 0.0500 (0.0500) time: 0.7194 data: 0.0004 max mem: 57114 Epoch: [79] [110/156] eta: 0:00:35 lr: 0.007136 min_lr: 0.007136 loss: 3.2954 (3.2793) weight_decay: 0.0500 (0.0500) time: 0.7091 data: 0.0004 max mem: 57114 Epoch: [79] [120/156] eta: 0:00:27 lr: 0.007134 min_lr: 0.007134 loss: 3.3295 (3.2784) weight_decay: 0.0500 (0.0500) time: 0.7033 data: 0.0004 max mem: 57114 Epoch: [79] [130/156] eta: 0:00:19 lr: 0.007132 min_lr: 0.007132 loss: 3.3374 (3.2767) weight_decay: 0.0500 (0.0500) time: 0.6959 data: 0.0009 max mem: 57114 Epoch: [79] [140/156] eta: 0:00:12 lr: 0.007130 min_lr: 0.007130 loss: 3.4701 (3.3029) weight_decay: 0.0500 (0.0500) time: 0.6848 data: 0.0007 max mem: 57114 Epoch: [79] [150/156] eta: 0:00:04 lr: 0.007129 min_lr: 0.007129 loss: 3.5398 (3.3073) weight_decay: 0.0500 (0.0500) time: 0.6824 data: 0.0001 max mem: 57114 Epoch: [79] [155/156] eta: 0:00:00 lr: 0.007128 min_lr: 0.007128 loss: 3.5398 (3.3102) weight_decay: 0.0500 (0.0500) time: 0.6852 data: 0.0001 max mem: 57114 Epoch: [79] Total time: 0:01:57 (0.7534 s / it) Averaged stats: lr: 0.007128 min_lr: 0.007128 loss: 3.5398 (3.3807) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0097 (1.0097) acc1: 83.8542 (83.8542) acc5: 95.3125 (95.3125) time: 2.0436 data: 1.7876 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1356 (1.1937) acc1: 75.5208 (74.2967) acc5: 95.3125 (92.1995) time: 0.5594 data: 0.3576 max mem: 57114 Test: Total time: 0:00:02 (0.5818 s / it) * Acc@1 73.783 Acc@5 92.128 loss 1.272 Accuracy of the model on the 50000 test images: 73.8% Max accuracy: 74.94% Test: [0/5] eta: 0:00:11 loss: 7.8978 (7.8978) acc1: 0.0000 (0.0000) acc5: 1.0417 (1.0417) time: 2.2998 data: 2.0564 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.6606 (7.6674) acc1: 0.0000 (0.1279) acc5: 0.5208 (0.7673) time: 0.6105 data: 0.4113 max mem: 57114 Test: Total time: 0:00:03 (0.6262 s / it) * Acc@1 0.106 Acc@5 0.717 loss 7.619 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [80] [ 0/156] eta: 0:12:36 lr: 0.007127 min_lr: 0.007127 loss: 3.0597 (3.0597) weight_decay: 0.0500 (0.0500) time: 4.8489 data: 3.6960 max mem: 57114 Epoch: [80] [ 10/156] eta: 0:02:45 lr: 0.007126 min_lr: 0.007126 loss: 3.4141 (3.2385) weight_decay: 0.0500 (0.0500) time: 1.1337 data: 0.3363 max mem: 57114 Epoch: [80] [ 20/156] eta: 0:02:07 lr: 0.007124 min_lr: 0.007124 loss: 3.3649 (3.2751) weight_decay: 0.0500 (0.0500) time: 0.7394 data: 0.0003 max mem: 57114 Epoch: [80] [ 30/156] eta: 0:01:49 lr: 0.007122 min_lr: 0.007122 loss: 3.4325 (3.3401) weight_decay: 0.0500 (0.0500) time: 0.7234 data: 0.0003 max mem: 57114 Epoch: [80] [ 40/156] eta: 0:01:36 lr: 0.007120 min_lr: 0.007120 loss: 3.5569 (3.3978) weight_decay: 0.0500 (0.0500) time: 0.7245 data: 0.0004 max mem: 57114 Epoch: [80] [ 50/156] eta: 0:01:25 lr: 0.007118 min_lr: 0.007118 loss: 3.4837 (3.4041) weight_decay: 0.0500 (0.0500) time: 0.7141 data: 0.0004 max mem: 57114 Epoch: [80] [ 60/156] eta: 0:01:16 lr: 0.007117 min_lr: 0.007117 loss: 3.2810 (3.3756) weight_decay: 0.0500 (0.0500) time: 0.7213 data: 0.0005 max mem: 57114 Epoch: [80] [ 70/156] eta: 0:01:07 lr: 0.007115 min_lr: 0.007115 loss: 3.2810 (3.3876) weight_decay: 0.0500 (0.0500) time: 0.7189 data: 0.0005 max mem: 57114 Epoch: [80] [ 80/156] eta: 0:00:59 lr: 0.007113 min_lr: 0.007113 loss: 3.5536 (3.3814) weight_decay: 0.0500 (0.0500) time: 0.7188 data: 0.0004 max mem: 57114 Epoch: [80] [ 90/156] eta: 0:00:50 lr: 0.007111 min_lr: 0.007111 loss: 3.2782 (3.3669) weight_decay: 0.0500 (0.0500) time: 0.7196 data: 0.0004 max mem: 57114 Epoch: [80] [100/156] eta: 0:00:42 lr: 0.007109 min_lr: 0.007109 loss: 3.3539 (3.3918) weight_decay: 0.0500 (0.0500) time: 0.7048 data: 0.0004 max mem: 57114 Epoch: [80] [110/156] eta: 0:00:34 lr: 0.007108 min_lr: 0.007108 loss: 3.7064 (3.4149) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [80] [120/156] eta: 0:00:27 lr: 0.007106 min_lr: 0.007106 loss: 3.5052 (3.4100) weight_decay: 0.0500 (0.0500) time: 0.6997 data: 0.0004 max mem: 57114 Epoch: [80] [130/156] eta: 0:00:19 lr: 0.007104 min_lr: 0.007104 loss: 3.3434 (3.4020) weight_decay: 0.0500 (0.0500) time: 0.6878 data: 0.0008 max mem: 57114 Epoch: [80] [140/156] eta: 0:00:11 lr: 0.007102 min_lr: 0.007102 loss: 3.4530 (3.3984) weight_decay: 0.0500 (0.0500) time: 0.6885 data: 0.0007 max mem: 57114 Epoch: [80] [150/156] eta: 0:00:04 lr: 0.007100 min_lr: 0.007100 loss: 3.4530 (3.4007) weight_decay: 0.0500 (0.0500) time: 0.6870 data: 0.0001 max mem: 57114 Epoch: [80] [155/156] eta: 0:00:00 lr: 0.007099 min_lr: 0.007099 loss: 3.3411 (3.3963) weight_decay: 0.0500 (0.0500) time: 0.6845 data: 0.0001 max mem: 57114 Epoch: [80] Total time: 0:01:55 (0.7399 s / it) Averaged stats: lr: 0.007099 min_lr: 0.007099 loss: 3.3411 (3.3719) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9277 (0.9277) acc1: 82.8125 (82.8125) acc5: 95.8333 (95.8333) time: 2.0358 data: 1.7798 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0143 (1.0781) acc1: 81.7708 (76.7263) acc5: 95.8333 (93.6061) time: 0.5579 data: 0.3560 max mem: 57114 Test: Total time: 0:00:02 (0.5798 s / it) * Acc@1 75.370 Acc@5 92.935 loss 1.170 Accuracy of the model on the 50000 test images: 75.4% Max accuracy: 75.37% Test: [0/5] eta: 0:00:10 loss: 7.9493 (7.9493) acc1: 0.0000 (0.0000) acc5: 1.5625 (1.5625) time: 2.0411 data: 1.7978 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.7311 (7.7242) acc1: 0.0000 (0.1279) acc5: 0.5208 (0.8951) time: 0.5588 data: 0.3596 max mem: 57114 Test: Total time: 0:00:02 (0.5718 s / it) * Acc@1 0.110 Acc@5 0.691 loss 7.670 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [81] [ 0/156] eta: 0:12:54 lr: 0.007099 min_lr: 0.007099 loss: 2.3271 (2.3271) weight_decay: 0.0500 (0.0500) time: 4.9677 data: 3.9402 max mem: 57114 Epoch: [81] [ 10/156] eta: 0:02:46 lr: 0.007097 min_lr: 0.007097 loss: 3.5035 (3.3105) weight_decay: 0.0500 (0.0500) time: 1.1378 data: 0.3585 max mem: 57114 Epoch: [81] [ 20/156] eta: 0:02:07 lr: 0.007096 min_lr: 0.007096 loss: 3.5647 (3.3735) weight_decay: 0.0500 (0.0500) time: 0.7336 data: 0.0003 max mem: 57114 Epoch: [81] [ 30/156] eta: 0:01:49 lr: 0.007094 min_lr: 0.007094 loss: 3.4847 (3.3802) weight_decay: 0.0500 (0.0500) time: 0.7266 data: 0.0003 max mem: 57114 Epoch: [81] [ 40/156] eta: 0:01:37 lr: 0.007092 min_lr: 0.007092 loss: 3.4847 (3.4226) weight_decay: 0.0500 (0.0500) time: 0.7382 data: 0.0003 max mem: 57114 Epoch: [81] [ 50/156] eta: 0:01:27 lr: 0.007090 min_lr: 0.007090 loss: 3.5470 (3.4219) weight_decay: 0.0500 (0.0500) time: 0.7429 data: 0.0004 max mem: 57114 Epoch: [81] [ 60/156] eta: 0:01:17 lr: 0.007088 min_lr: 0.007088 loss: 3.5090 (3.4190) weight_decay: 0.0500 (0.0500) time: 0.7374 data: 0.0004 max mem: 57114 Epoch: [81] [ 70/156] eta: 0:01:08 lr: 0.007086 min_lr: 0.007086 loss: 3.5728 (3.4162) weight_decay: 0.0500 (0.0500) time: 0.7395 data: 0.0004 max mem: 57114 Epoch: [81] [ 80/156] eta: 0:00:59 lr: 0.007085 min_lr: 0.007085 loss: 3.6206 (3.4213) weight_decay: 0.0500 (0.0500) time: 0.7362 data: 0.0004 max mem: 57114 Epoch: [81] [ 90/156] eta: 0:00:51 lr: 0.007083 min_lr: 0.007083 loss: 3.6427 (3.4271) weight_decay: 0.0500 (0.0500) time: 0.7022 data: 0.0003 max mem: 57114 Epoch: [81] [100/156] eta: 0:00:43 lr: 0.007081 min_lr: 0.007081 loss: 3.5234 (3.4124) weight_decay: 0.0500 (0.0500) time: 0.6930 data: 0.0004 max mem: 57114 Epoch: [81] [110/156] eta: 0:00:35 lr: 0.007079 min_lr: 0.007079 loss: 3.1161 (3.3987) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.0004 max mem: 57114 Epoch: [81] [120/156] eta: 0:00:27 lr: 0.007077 min_lr: 0.007077 loss: 3.2918 (3.3983) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0004 max mem: 57114 Epoch: [81] [130/156] eta: 0:00:19 lr: 0.007075 min_lr: 0.007075 loss: 3.4690 (3.4021) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.0009 max mem: 57114 Epoch: [81] [140/156] eta: 0:00:12 lr: 0.007074 min_lr: 0.007074 loss: 3.3892 (3.3956) weight_decay: 0.0500 (0.0500) time: 0.6900 data: 0.0007 max mem: 57114 Epoch: [81] [150/156] eta: 0:00:04 lr: 0.007072 min_lr: 0.007072 loss: 3.1742 (3.3850) weight_decay: 0.0500 (0.0500) time: 0.6829 data: 0.0001 max mem: 57114 Epoch: [81] [155/156] eta: 0:00:00 lr: 0.007071 min_lr: 0.007071 loss: 3.1742 (3.3825) weight_decay: 0.0500 (0.0500) time: 0.6820 data: 0.0001 max mem: 57114 Epoch: [81] Total time: 0:01:56 (0.7456 s / it) Averaged stats: lr: 0.007071 min_lr: 0.007071 loss: 3.1742 (3.3672) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0042 (1.0042) acc1: 82.2917 (82.2917) acc5: 94.2708 (94.2708) time: 2.0653 data: 1.8084 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0794 (1.1177) acc1: 76.0417 (74.5524) acc5: 94.2708 (92.1995) time: 0.5637 data: 0.3618 max mem: 57114 Test: Total time: 0:00:02 (0.5847 s / it) * Acc@1 74.345 Acc@5 92.493 loss 1.217 Accuracy of the model on the 50000 test images: 74.3% Max accuracy: 75.37% Test: [0/5] eta: 0:00:11 loss: 8.0114 (8.0114) acc1: 0.0000 (0.0000) acc5: 1.5625 (1.5625) time: 2.3537 data: 2.1102 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.8060 (7.7903) acc1: 0.0000 (0.1279) acc5: 0.5208 (0.8951) time: 0.6215 data: 0.4222 max mem: 57114 Test: Total time: 0:00:03 (0.6327 s / it) * Acc@1 0.112 Acc@5 0.697 loss 7.730 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [82] [ 0/156] eta: 0:13:07 lr: 0.007071 min_lr: 0.007071 loss: 2.6856 (2.6856) weight_decay: 0.0500 (0.0500) time: 5.0452 data: 4.0070 max mem: 57114 Epoch: [82] [ 10/156] eta: 0:02:43 lr: 0.007069 min_lr: 0.007069 loss: 2.7322 (3.1164) weight_decay: 0.0500 (0.0500) time: 1.1201 data: 0.3645 max mem: 57114 Epoch: [82] [ 20/156] eta: 0:02:06 lr: 0.007067 min_lr: 0.007067 loss: 3.4465 (3.3029) weight_decay: 0.0500 (0.0500) time: 0.7220 data: 0.0003 max mem: 57114 Epoch: [82] [ 30/156] eta: 0:01:48 lr: 0.007065 min_lr: 0.007065 loss: 3.5815 (3.3186) weight_decay: 0.0500 (0.0500) time: 0.7230 data: 0.0003 max mem: 57114 Epoch: [82] [ 40/156] eta: 0:01:36 lr: 0.007063 min_lr: 0.007063 loss: 3.5654 (3.3608) weight_decay: 0.0500 (0.0500) time: 0.7279 data: 0.0004 max mem: 57114 Epoch: [82] [ 50/156] eta: 0:01:25 lr: 0.007061 min_lr: 0.007061 loss: 3.3994 (3.3237) weight_decay: 0.0500 (0.0500) time: 0.7256 data: 0.0004 max mem: 57114 Epoch: [82] [ 60/156] eta: 0:01:16 lr: 0.007060 min_lr: 0.007060 loss: 3.3994 (3.3608) weight_decay: 0.0500 (0.0500) time: 0.7386 data: 0.0004 max mem: 57114 Epoch: [82] [ 70/156] eta: 0:01:07 lr: 0.007058 min_lr: 0.007058 loss: 3.2725 (3.3043) weight_decay: 0.0500 (0.0500) time: 0.7399 data: 0.0004 max mem: 57114 Epoch: [82] [ 80/156] eta: 0:00:59 lr: 0.007056 min_lr: 0.007056 loss: 3.4520 (3.3390) weight_decay: 0.0500 (0.0500) time: 0.7184 data: 0.0004 max mem: 57114 Epoch: [82] [ 90/156] eta: 0:00:50 lr: 0.007054 min_lr: 0.007054 loss: 3.5421 (3.3413) weight_decay: 0.0500 (0.0500) time: 0.7043 data: 0.0004 max mem: 57114 Epoch: [82] [100/156] eta: 0:00:42 lr: 0.007052 min_lr: 0.007052 loss: 3.4557 (3.3410) weight_decay: 0.0500 (0.0500) time: 0.6984 data: 0.0004 max mem: 57114 Epoch: [82] [110/156] eta: 0:00:34 lr: 0.007050 min_lr: 0.007050 loss: 3.5948 (3.3595) weight_decay: 0.0500 (0.0500) time: 0.7024 data: 0.0004 max mem: 57114 Epoch: [82] [120/156] eta: 0:00:27 lr: 0.007048 min_lr: 0.007048 loss: 3.6473 (3.3694) weight_decay: 0.0500 (0.0500) time: 0.7025 data: 0.0004 max mem: 57114 Epoch: [82] [130/156] eta: 0:00:19 lr: 0.007047 min_lr: 0.007047 loss: 3.5946 (3.3882) weight_decay: 0.0500 (0.0500) time: 0.6948 data: 0.0007 max mem: 57114 Epoch: [82] [140/156] eta: 0:00:11 lr: 0.007045 min_lr: 0.007045 loss: 3.4322 (3.3685) weight_decay: 0.0500 (0.0500) time: 0.6879 data: 0.0006 max mem: 57114 Epoch: [82] [150/156] eta: 0:00:04 lr: 0.007043 min_lr: 0.007043 loss: 3.3761 (3.3743) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.0002 max mem: 57114 Epoch: [82] [155/156] eta: 0:00:00 lr: 0.007042 min_lr: 0.007042 loss: 3.1912 (3.3736) weight_decay: 0.0500 (0.0500) time: 0.6884 data: 0.0002 max mem: 57114 Epoch: [82] Total time: 0:01:55 (0.7412 s / it) Averaged stats: lr: 0.007042 min_lr: 0.007042 loss: 3.1912 (3.3668) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0460 (1.0460) acc1: 82.8125 (82.8125) acc5: 95.8333 (95.8333) time: 2.1316 data: 1.8758 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1457 (1.1111) acc1: 76.0417 (76.5985) acc5: 94.7917 (92.5831) time: 0.5770 data: 0.3752 max mem: 57114 Test: Total time: 0:00:02 (0.5995 s / it) * Acc@1 75.330 Acc@5 92.815 loss 1.209 Accuracy of the model on the 50000 test images: 75.3% Max accuracy: 75.37% Test: [0/5] eta: 0:00:11 loss: 8.0714 (8.0714) acc1: 0.0000 (0.0000) acc5: 1.5625 (1.5625) time: 2.3253 data: 2.0819 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.8823 (7.8583) acc1: 0.0000 (0.1279) acc5: 1.0417 (1.0230) time: 0.6157 data: 0.4165 max mem: 57114 Test: Total time: 0:00:03 (0.6385 s / it) * Acc@1 0.112 Acc@5 0.713 loss 7.793 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [83] [ 0/156] eta: 0:13:06 lr: 0.007042 min_lr: 0.007042 loss: 2.1852 (2.1852) weight_decay: 0.0500 (0.0500) time: 5.0427 data: 3.2440 max mem: 57114 Epoch: [83] [ 10/156] eta: 0:02:43 lr: 0.007040 min_lr: 0.007040 loss: 3.4595 (3.3855) weight_decay: 0.0500 (0.0500) time: 1.1195 data: 0.2952 max mem: 57114 Epoch: [83] [ 20/156] eta: 0:02:06 lr: 0.007038 min_lr: 0.007038 loss: 3.5536 (3.4473) weight_decay: 0.0500 (0.0500) time: 0.7265 data: 0.0003 max mem: 57114 Epoch: [83] [ 30/156] eta: 0:01:49 lr: 0.007036 min_lr: 0.007036 loss: 3.4404 (3.3892) weight_decay: 0.0500 (0.0500) time: 0.7278 data: 0.0003 max mem: 57114 Epoch: [83] [ 40/156] eta: 0:01:36 lr: 0.007034 min_lr: 0.007034 loss: 3.2832 (3.3064) weight_decay: 0.0500 (0.0500) time: 0.7264 data: 0.0003 max mem: 57114 Epoch: [83] [ 50/156] eta: 0:01:25 lr: 0.007032 min_lr: 0.007032 loss: 3.2443 (3.2997) weight_decay: 0.0500 (0.0500) time: 0.7161 data: 0.0004 max mem: 57114 Epoch: [83] [ 60/156] eta: 0:01:16 lr: 0.007031 min_lr: 0.007031 loss: 3.3920 (3.3275) weight_decay: 0.0500 (0.0500) time: 0.7150 data: 0.0004 max mem: 57114 Epoch: [83] [ 70/156] eta: 0:01:07 lr: 0.007029 min_lr: 0.007029 loss: 3.7123 (3.3743) weight_decay: 0.0500 (0.0500) time: 0.7229 data: 0.0004 max mem: 57114 Epoch: [83] [ 80/156] eta: 0:00:58 lr: 0.007027 min_lr: 0.007027 loss: 3.6018 (3.3990) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [83] [ 90/156] eta: 0:00:50 lr: 0.007025 min_lr: 0.007025 loss: 3.5460 (3.3911) weight_decay: 0.0500 (0.0500) time: 0.7034 data: 0.0004 max mem: 57114 Epoch: [83] [100/156] eta: 0:00:42 lr: 0.007023 min_lr: 0.007023 loss: 3.6066 (3.4049) weight_decay: 0.0500 (0.0500) time: 0.7010 data: 0.0004 max mem: 57114 Epoch: [83] [110/156] eta: 0:00:34 lr: 0.007021 min_lr: 0.007021 loss: 3.5928 (3.4025) weight_decay: 0.0500 (0.0500) time: 0.7085 data: 0.0004 max mem: 57114 Epoch: [83] [120/156] eta: 0:00:27 lr: 0.007019 min_lr: 0.007019 loss: 3.2334 (3.3863) weight_decay: 0.0500 (0.0500) time: 0.7104 data: 0.0004 max mem: 57114 Epoch: [83] [130/156] eta: 0:00:19 lr: 0.007017 min_lr: 0.007017 loss: 3.0567 (3.3600) weight_decay: 0.0500 (0.0500) time: 0.6916 data: 0.0008 max mem: 57114 Epoch: [83] [140/156] eta: 0:00:11 lr: 0.007015 min_lr: 0.007015 loss: 3.3487 (3.3592) weight_decay: 0.0500 (0.0500) time: 0.6854 data: 0.0007 max mem: 57114 Epoch: [83] [150/156] eta: 0:00:04 lr: 0.007014 min_lr: 0.007014 loss: 3.5513 (3.3637) weight_decay: 0.0500 (0.0500) time: 0.6883 data: 0.0001 max mem: 57114 Epoch: [83] [155/156] eta: 0:00:00 lr: 0.007013 min_lr: 0.007013 loss: 3.5772 (3.3810) weight_decay: 0.0500 (0.0500) time: 0.6890 data: 0.0001 max mem: 57114 Epoch: [83] Total time: 0:01:55 (0.7394 s / it) Averaged stats: lr: 0.007013 min_lr: 0.007013 loss: 3.5772 (3.3659) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.1175 (1.1175) acc1: 79.1667 (79.1667) acc5: 97.9167 (97.9167) time: 2.0922 data: 1.8363 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1990 (1.2427) acc1: 76.5625 (73.7852) acc5: 95.3125 (93.0946) time: 0.5691 data: 0.3673 max mem: 57114 Test: Total time: 0:00:02 (0.5885 s / it) * Acc@1 74.411 Acc@5 92.525 loss 1.327 Accuracy of the model on the 50000 test images: 74.4% Max accuracy: 75.37% Test: [0/5] eta: 0:00:11 loss: 8.1309 (8.1309) acc1: 0.5208 (0.5208) acc5: 1.5625 (1.5625) time: 2.2657 data: 2.0222 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.9607 (7.9318) acc1: 0.0000 (0.2558) acc5: 1.0417 (1.0230) time: 0.6038 data: 0.4045 max mem: 57114 Test: Total time: 0:00:03 (0.6178 s / it) * Acc@1 0.118 Acc@5 0.733 loss 7.860 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [84] [ 0/156] eta: 0:12:27 lr: 0.007012 min_lr: 0.007012 loss: 3.7746 (3.7746) weight_decay: 0.0500 (0.0500) time: 4.7929 data: 3.5359 max mem: 57114 Epoch: [84] [ 10/156] eta: 0:02:40 lr: 0.007011 min_lr: 0.007011 loss: 3.1114 (3.2055) weight_decay: 0.0500 (0.0500) time: 1.0999 data: 0.3217 max mem: 57114 Epoch: [84] [ 20/156] eta: 0:02:05 lr: 0.007009 min_lr: 0.007009 loss: 3.1369 (3.2918) weight_decay: 0.0500 (0.0500) time: 0.7263 data: 0.0004 max mem: 57114 Epoch: [84] [ 30/156] eta: 0:01:48 lr: 0.007007 min_lr: 0.007007 loss: 3.3633 (3.2694) weight_decay: 0.0500 (0.0500) time: 0.7306 data: 0.0004 max mem: 57114 Epoch: [84] [ 40/156] eta: 0:01:35 lr: 0.007005 min_lr: 0.007005 loss: 3.3633 (3.2816) weight_decay: 0.0500 (0.0500) time: 0.7214 data: 0.0004 max mem: 57114 Epoch: [84] [ 50/156] eta: 0:01:24 lr: 0.007003 min_lr: 0.007003 loss: 3.5053 (3.3529) weight_decay: 0.0500 (0.0500) time: 0.7091 data: 0.0004 max mem: 57114 Epoch: [84] [ 60/156] eta: 0:01:16 lr: 0.007001 min_lr: 0.007001 loss: 3.4007 (3.3087) weight_decay: 0.0500 (0.0500) time: 0.7319 data: 0.0004 max mem: 57114 Epoch: [84] [ 70/156] eta: 0:01:07 lr: 0.006999 min_lr: 0.006999 loss: 3.1912 (3.3026) weight_decay: 0.0500 (0.0500) time: 0.7217 data: 0.0003 max mem: 57114 Epoch: [84] [ 80/156] eta: 0:00:58 lr: 0.006997 min_lr: 0.006997 loss: 3.4608 (3.3374) weight_decay: 0.0500 (0.0500) time: 0.7082 data: 0.0003 max mem: 57114 Epoch: [84] [ 90/156] eta: 0:00:50 lr: 0.006995 min_lr: 0.006995 loss: 3.2927 (3.3088) weight_decay: 0.0500 (0.0500) time: 0.7204 data: 0.0003 max mem: 57114 Epoch: [84] [100/156] eta: 0:00:42 lr: 0.006993 min_lr: 0.006993 loss: 3.2927 (3.3135) weight_decay: 0.0500 (0.0500) time: 0.7168 data: 0.0004 max mem: 57114 Epoch: [84] [110/156] eta: 0:00:34 lr: 0.006991 min_lr: 0.006991 loss: 3.6858 (3.3520) weight_decay: 0.0500 (0.0500) time: 0.6987 data: 0.0003 max mem: 57114 Epoch: [84] [120/156] eta: 0:00:27 lr: 0.006990 min_lr: 0.006990 loss: 3.5720 (3.3587) weight_decay: 0.0500 (0.0500) time: 0.7007 data: 0.0004 max mem: 57114 Epoch: [84] [130/156] eta: 0:00:19 lr: 0.006988 min_lr: 0.006988 loss: 3.4017 (3.3587) weight_decay: 0.0500 (0.0500) time: 0.7057 data: 0.0009 max mem: 57114 Epoch: [84] [140/156] eta: 0:00:11 lr: 0.006986 min_lr: 0.006986 loss: 3.3183 (3.3364) weight_decay: 0.0500 (0.0500) time: 0.6901 data: 0.0008 max mem: 57114 Epoch: [84] [150/156] eta: 0:00:04 lr: 0.006984 min_lr: 0.006984 loss: 3.3605 (3.3375) weight_decay: 0.0500 (0.0500) time: 0.6903 data: 0.0001 max mem: 57114 Epoch: [84] [155/156] eta: 0:00:00 lr: 0.006983 min_lr: 0.006983 loss: 3.3159 (3.3327) weight_decay: 0.0500 (0.0500) time: 0.6914 data: 0.0001 max mem: 57114 Epoch: [84] Total time: 0:01:55 (0.7398 s / it) Averaged stats: lr: 0.006983 min_lr: 0.006983 loss: 3.3159 (3.3523) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.9403 (0.9403) acc1: 80.2083 (80.2083) acc5: 96.3542 (96.3542) time: 2.2108 data: 1.9555 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1187 (1.0873) acc1: 77.6042 (75.1918) acc5: 95.3125 (92.4552) time: 0.5928 data: 0.3912 max mem: 57114 Test: Total time: 0:00:03 (0.6168 s / it) * Acc@1 75.278 Acc@5 92.871 loss 1.167 Accuracy of the model on the 50000 test images: 75.3% Max accuracy: 75.37% Test: [0/5] eta: 0:00:11 loss: 8.2027 (8.2027) acc1: 0.5208 (0.5208) acc5: 1.5625 (1.5625) time: 2.3114 data: 2.0679 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.0499 (8.0133) acc1: 0.0000 (0.2558) acc5: 1.0417 (1.1509) time: 0.6129 data: 0.4137 max mem: 57114 Test: Total time: 0:00:03 (0.6227 s / it) * Acc@1 0.134 Acc@5 0.769 loss 7.935 Accuracy of the model EMA on 50000 test images: 0.1% Max EMA accuracy: 0.13% Epoch: [85] [ 0/156] eta: 0:07:02 lr: 0.006983 min_lr: 0.006983 loss: 3.3636 (3.3636) weight_decay: 0.0500 (0.0500) time: 2.7111 data: 2.0514 max mem: 57114 Epoch: [85] [ 10/156] eta: 0:02:13 lr: 0.006981 min_lr: 0.006981 loss: 3.3636 (3.2338) weight_decay: 0.0500 (0.0500) time: 0.9155 data: 0.1868 max mem: 57114 Epoch: [85] [ 20/156] eta: 0:01:50 lr: 0.006979 min_lr: 0.006979 loss: 3.1278 (3.2392) weight_decay: 0.0500 (0.0500) time: 0.7179 data: 0.0004 max mem: 57114 Epoch: [85] [ 30/156] eta: 0:01:39 lr: 0.006977 min_lr: 0.006977 loss: 3.3016 (3.3167) weight_decay: 0.0500 (0.0500) time: 0.7182 data: 0.0004 max mem: 57114 Epoch: [85] [ 40/156] eta: 0:01:30 lr: 0.006975 min_lr: 0.006975 loss: 3.3016 (3.2994) weight_decay: 0.0500 (0.0500) time: 0.7393 data: 0.0004 max mem: 57114 Epoch: [85] [ 50/156] eta: 0:01:21 lr: 0.006973 min_lr: 0.006973 loss: 3.1313 (3.2013) weight_decay: 0.0500 (0.0500) time: 0.7380 data: 0.0004 max mem: 57114 Epoch: [85] [ 60/156] eta: 0:01:12 lr: 0.006971 min_lr: 0.006971 loss: 2.9835 (3.2125) weight_decay: 0.0500 (0.0500) time: 0.7252 data: 0.0004 max mem: 57114 Epoch: [85] [ 70/156] eta: 0:01:04 lr: 0.006969 min_lr: 0.006969 loss: 3.3656 (3.2391) weight_decay: 0.0500 (0.0500) time: 0.7158 data: 0.0004 max mem: 57114 Epoch: [85] [ 80/156] eta: 0:00:56 lr: 0.006967 min_lr: 0.006967 loss: 3.4139 (3.2481) weight_decay: 0.0500 (0.0500) time: 0.7107 data: 0.0004 max mem: 57114 Epoch: [85] [ 90/156] eta: 0:00:48 lr: 0.006965 min_lr: 0.006965 loss: 3.4337 (3.2671) weight_decay: 0.0500 (0.0500) time: 0.7010 data: 0.0004 max mem: 57114 Epoch: [85] [100/156] eta: 0:00:41 lr: 0.006963 min_lr: 0.006963 loss: 3.4951 (3.2749) weight_decay: 0.0500 (0.0500) time: 0.6961 data: 0.0003 max mem: 57114 Epoch: [85] [110/156] eta: 0:00:33 lr: 0.006962 min_lr: 0.006962 loss: 3.6256 (3.3047) weight_decay: 0.0500 (0.0500) time: 0.7062 data: 0.0004 max mem: 57114 Epoch: [85] [120/156] eta: 0:00:26 lr: 0.006960 min_lr: 0.006960 loss: 3.7180 (3.3326) weight_decay: 0.0500 (0.0500) time: 0.7062 data: 0.0004 max mem: 57114 Epoch: [85] [130/156] eta: 0:00:18 lr: 0.006958 min_lr: 0.006958 loss: 3.3959 (3.3325) weight_decay: 0.0500 (0.0500) time: 0.6971 data: 0.0008 max mem: 57114 Epoch: [85] [140/156] eta: 0:00:11 lr: 0.006956 min_lr: 0.006956 loss: 3.2210 (3.3024) weight_decay: 0.0500 (0.0500) time: 0.6887 data: 0.0007 max mem: 57114 Epoch: [85] [150/156] eta: 0:00:04 lr: 0.006954 min_lr: 0.006954 loss: 2.5802 (3.2696) weight_decay: 0.0500 (0.0500) time: 0.6811 data: 0.0001 max mem: 57114 Epoch: [85] [155/156] eta: 0:00:00 lr: 0.006953 min_lr: 0.006953 loss: 2.9878 (3.2815) weight_decay: 0.0500 (0.0500) time: 0.6839 data: 0.0001 max mem: 57114 Epoch: [85] Total time: 0:01:52 (0.7243 s / it) Averaged stats: lr: 0.006953 min_lr: 0.006953 loss: 2.9878 (3.3520) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8885 (0.8885) acc1: 82.2917 (82.2917) acc5: 97.3958 (97.3958) time: 2.0351 data: 1.7794 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0229 (1.0037) acc1: 75.5208 (75.7033) acc5: 95.8333 (94.2455) time: 0.5577 data: 0.3559 max mem: 57114 Test: Total time: 0:00:02 (0.5797 s / it) * Acc@1 75.284 Acc@5 93.197 loss 1.107 Accuracy of the model on the 50000 test images: 75.3% Max accuracy: 75.37% Test: [0/5] eta: 0:00:11 loss: 8.2771 (8.2771) acc1: 0.5208 (0.5208) acc5: 1.5625 (1.5625) time: 2.3281 data: 2.0847 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.1433 (8.0974) acc1: 0.0000 (0.2558) acc5: 1.0417 (1.1509) time: 0.6162 data: 0.4170 max mem: 57114 Test: Total time: 0:00:03 (0.6294 s / it) * Acc@1 0.148 Acc@5 0.799 loss 8.012 Accuracy of the model EMA on 50000 test images: 0.1% Max EMA accuracy: 0.15% Epoch: [86] [ 0/156] eta: 0:06:09 lr: 0.006953 min_lr: 0.006953 loss: 3.6072 (3.6072) weight_decay: 0.0500 (0.0500) time: 2.3692 data: 1.7097 max mem: 57114 Epoch: [86] [ 10/156] eta: 0:02:05 lr: 0.006951 min_lr: 0.006951 loss: 3.3059 (3.2104) weight_decay: 0.0500 (0.0500) time: 0.8602 data: 0.1557 max mem: 57114 Epoch: [86] [ 20/156] eta: 0:01:48 lr: 0.006949 min_lr: 0.006949 loss: 3.4895 (3.4170) weight_decay: 0.0500 (0.0500) time: 0.7156 data: 0.0004 max mem: 57114 Epoch: [86] [ 30/156] eta: 0:01:37 lr: 0.006947 min_lr: 0.006947 loss: 3.4895 (3.4356) weight_decay: 0.0500 (0.0500) time: 0.7267 data: 0.0004 max mem: 57114 Epoch: [86] [ 40/156] eta: 0:01:27 lr: 0.006945 min_lr: 0.006945 loss: 3.3060 (3.4031) weight_decay: 0.0500 (0.0500) time: 0.7181 data: 0.0004 max mem: 57114 Epoch: [86] [ 50/156] eta: 0:01:19 lr: 0.006943 min_lr: 0.006943 loss: 3.3393 (3.4147) weight_decay: 0.0500 (0.0500) time: 0.7125 data: 0.0004 max mem: 57114 Epoch: [86] [ 60/156] eta: 0:01:11 lr: 0.006941 min_lr: 0.006941 loss: 3.3393 (3.3794) weight_decay: 0.0500 (0.0500) time: 0.7268 data: 0.0004 max mem: 57114 Epoch: [86] [ 70/156] eta: 0:01:03 lr: 0.006939 min_lr: 0.006939 loss: 3.0807 (3.3456) weight_decay: 0.0500 (0.0500) time: 0.7269 data: 0.0004 max mem: 57114 Epoch: [86] [ 80/156] eta: 0:00:56 lr: 0.006937 min_lr: 0.006937 loss: 3.3370 (3.3712) weight_decay: 0.0500 (0.0500) time: 0.7199 data: 0.0004 max mem: 57114 Epoch: [86] [ 90/156] eta: 0:00:48 lr: 0.006935 min_lr: 0.006935 loss: 3.5401 (3.3714) weight_decay: 0.0500 (0.0500) time: 0.7053 data: 0.0004 max mem: 57114 Epoch: [86] [100/156] eta: 0:00:41 lr: 0.006933 min_lr: 0.006933 loss: 3.5401 (3.3702) weight_decay: 0.0500 (0.0500) time: 0.7032 data: 0.0004 max mem: 57114 Epoch: [86] [110/156] eta: 0:00:33 lr: 0.006931 min_lr: 0.006931 loss: 3.0258 (3.3196) weight_decay: 0.0500 (0.0500) time: 0.7124 data: 0.0004 max mem: 57114 Epoch: [86] [120/156] eta: 0:00:26 lr: 0.006929 min_lr: 0.006929 loss: 3.1517 (3.3304) weight_decay: 0.0500 (0.0500) time: 0.7098 data: 0.0004 max mem: 57114 Epoch: [86] [130/156] eta: 0:00:18 lr: 0.006927 min_lr: 0.006927 loss: 3.2784 (3.3173) weight_decay: 0.0500 (0.0500) time: 0.7021 data: 0.0008 max mem: 57114 Epoch: [86] [140/156] eta: 0:00:11 lr: 0.006925 min_lr: 0.006925 loss: 3.2867 (3.3230) weight_decay: 0.0500 (0.0500) time: 0.6881 data: 0.0007 max mem: 57114 Epoch: [86] [150/156] eta: 0:00:04 lr: 0.006923 min_lr: 0.006923 loss: 3.2867 (3.3187) weight_decay: 0.0500 (0.0500) time: 0.6869 data: 0.0001 max mem: 57114 Epoch: [86] [155/156] eta: 0:00:00 lr: 0.006922 min_lr: 0.006922 loss: 3.2994 (3.3139) weight_decay: 0.0500 (0.0500) time: 0.6835 data: 0.0001 max mem: 57114 Epoch: [86] Total time: 0:01:52 (0.7220 s / it) Averaged stats: lr: 0.006922 min_lr: 0.006922 loss: 3.2994 (3.3546) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9147 (0.9147) acc1: 80.7292 (80.7292) acc5: 95.8333 (95.8333) time: 2.1270 data: 1.8710 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9759 (1.0272) acc1: 79.1667 (75.4476) acc5: 95.8333 (92.9668) time: 0.5760 data: 0.3743 max mem: 57114 Test: Total time: 0:00:02 (0.5986 s / it) * Acc@1 75.885 Acc@5 93.394 loss 1.092 Accuracy of the model on the 50000 test images: 75.9% Max accuracy: 75.89% Test: [0/5] eta: 0:00:10 loss: 8.3458 (8.3458) acc1: 0.5208 (0.5208) acc5: 1.5625 (1.5625) time: 2.0475 data: 1.8039 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.2306 (8.1753) acc1: 0.0000 (0.3836) acc5: 1.0417 (1.1509) time: 0.5601 data: 0.3609 max mem: 57114 Test: Total time: 0:00:02 (0.5700 s / it) * Acc@1 0.156 Acc@5 0.843 loss 8.085 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.16% Epoch: [87] [ 0/156] eta: 0:05:52 lr: 0.006922 min_lr: 0.006922 loss: 3.6836 (3.6836) weight_decay: 0.0500 (0.0500) time: 2.2593 data: 1.6061 max mem: 57114 Epoch: [87] [ 10/156] eta: 0:02:08 lr: 0.006920 min_lr: 0.006920 loss: 3.6836 (3.4491) weight_decay: 0.0500 (0.0500) time: 0.8828 data: 0.1463 max mem: 57114 Epoch: [87] [ 20/156] eta: 0:01:52 lr: 0.006918 min_lr: 0.006918 loss: 3.2180 (3.2804) weight_decay: 0.0500 (0.0500) time: 0.7554 data: 0.0003 max mem: 57114 Epoch: [87] [ 30/156] eta: 0:01:40 lr: 0.006916 min_lr: 0.006916 loss: 3.0075 (3.2879) weight_decay: 0.0500 (0.0500) time: 0.7542 data: 0.0003 max mem: 57114 Epoch: [87] [ 40/156] eta: 0:01:31 lr: 0.006914 min_lr: 0.006914 loss: 3.1474 (3.2660) weight_decay: 0.0500 (0.0500) time: 0.7468 data: 0.0004 max mem: 57114 Epoch: [87] [ 50/156] eta: 0:01:22 lr: 0.006912 min_lr: 0.006912 loss: 3.4774 (3.2818) weight_decay: 0.0500 (0.0500) time: 0.7482 data: 0.0004 max mem: 57114 Epoch: [87] [ 60/156] eta: 0:01:13 lr: 0.006910 min_lr: 0.006910 loss: 3.6267 (3.3238) weight_decay: 0.0500 (0.0500) time: 0.7298 data: 0.0004 max mem: 57114 Epoch: [87] [ 70/156] eta: 0:01:05 lr: 0.006908 min_lr: 0.006908 loss: 3.1696 (3.2880) weight_decay: 0.0500 (0.0500) time: 0.7099 data: 0.0004 max mem: 57114 Epoch: [87] [ 80/156] eta: 0:00:57 lr: 0.006906 min_lr: 0.006906 loss: 2.9066 (3.2562) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.0004 max mem: 57114 Epoch: [87] [ 90/156] eta: 0:00:49 lr: 0.006904 min_lr: 0.006904 loss: 3.2455 (3.2505) weight_decay: 0.0500 (0.0500) time: 0.7124 data: 0.0004 max mem: 57114 Epoch: [87] [100/156] eta: 0:00:41 lr: 0.006902 min_lr: 0.006902 loss: 3.4065 (3.2699) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.0004 max mem: 57114 Epoch: [87] [110/156] eta: 0:00:34 lr: 0.006900 min_lr: 0.006900 loss: 3.5731 (3.2878) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0004 max mem: 57114 Epoch: [87] [120/156] eta: 0:00:26 lr: 0.006898 min_lr: 0.006898 loss: 3.4015 (3.2741) weight_decay: 0.0500 (0.0500) time: 0.7057 data: 0.0004 max mem: 57114 Epoch: [87] [130/156] eta: 0:00:19 lr: 0.006896 min_lr: 0.006896 loss: 3.1994 (3.2662) weight_decay: 0.0500 (0.0500) time: 0.6967 data: 0.0009 max mem: 57114 Epoch: [87] [140/156] eta: 0:00:11 lr: 0.006894 min_lr: 0.006894 loss: 3.4719 (3.2810) weight_decay: 0.0500 (0.0500) time: 0.6876 data: 0.0007 max mem: 57114 Epoch: [87] [150/156] eta: 0:00:04 lr: 0.006893 min_lr: 0.006893 loss: 3.4954 (3.2892) weight_decay: 0.0500 (0.0500) time: 0.6887 data: 0.0001 max mem: 57114 Epoch: [87] [155/156] eta: 0:00:00 lr: 0.006892 min_lr: 0.006892 loss: 3.4377 (3.2858) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.0001 max mem: 57114 Epoch: [87] Total time: 0:01:53 (0.7295 s / it) Averaged stats: lr: 0.006892 min_lr: 0.006892 loss: 3.4377 (3.3342) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8878 (0.8878) acc1: 80.7292 (80.7292) acc5: 97.3958 (97.3958) time: 2.1799 data: 1.9237 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9657 (0.9726) acc1: 80.2083 (77.1100) acc5: 96.8750 (94.7570) time: 0.5867 data: 0.3848 max mem: 57114 Test: Total time: 0:00:03 (0.6102 s / it) * Acc@1 76.019 Acc@5 93.568 loss 1.083 Accuracy of the model on the 50000 test images: 76.0% Max accuracy: 76.02% Test: [0/5] eta: 0:00:09 loss: 8.4240 (8.4240) acc1: 0.5208 (0.5208) acc5: 1.5625 (1.5625) time: 1.9783 data: 1.7348 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.3296 (8.2618) acc1: 0.0000 (0.3836) acc5: 1.0417 (1.1509) time: 0.5462 data: 0.3470 max mem: 57114 Test: Total time: 0:00:02 (0.5572 s / it) * Acc@1 0.168 Acc@5 0.881 loss 8.164 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.17% Epoch: [88] [ 0/156] eta: 0:06:51 lr: 0.006891 min_lr: 0.006891 loss: 3.5162 (3.5162) weight_decay: 0.0500 (0.0500) time: 2.6356 data: 1.9797 max mem: 57114 Epoch: [88] [ 10/156] eta: 0:02:10 lr: 0.006889 min_lr: 0.006889 loss: 3.3240 (3.3092) weight_decay: 0.0500 (0.0500) time: 0.8943 data: 0.1803 max mem: 57114 Epoch: [88] [ 20/156] eta: 0:01:50 lr: 0.006887 min_lr: 0.006887 loss: 3.4080 (3.3655) weight_decay: 0.0500 (0.0500) time: 0.7229 data: 0.0003 max mem: 57114 Epoch: [88] [ 30/156] eta: 0:01:39 lr: 0.006885 min_lr: 0.006885 loss: 3.4080 (3.3848) weight_decay: 0.0500 (0.0500) time: 0.7319 data: 0.0003 max mem: 57114 Epoch: [88] [ 40/156] eta: 0:01:30 lr: 0.006883 min_lr: 0.006883 loss: 3.3575 (3.3941) weight_decay: 0.0500 (0.0500) time: 0.7421 data: 0.0004 max mem: 57114 Epoch: [88] [ 50/156] eta: 0:01:21 lr: 0.006881 min_lr: 0.006881 loss: 3.6209 (3.4555) weight_decay: 0.0500 (0.0500) time: 0.7422 data: 0.0004 max mem: 57114 Epoch: [88] [ 60/156] eta: 0:01:14 lr: 0.006879 min_lr: 0.006879 loss: 3.7725 (3.4324) weight_decay: 0.0500 (0.0500) time: 0.7561 data: 0.0004 max mem: 57114 Epoch: [88] [ 70/156] eta: 0:01:05 lr: 0.006877 min_lr: 0.006877 loss: 3.4847 (3.4326) weight_decay: 0.0500 (0.0500) time: 0.7394 data: 0.0004 max mem: 57114 Epoch: [88] [ 80/156] eta: 0:00:57 lr: 0.006875 min_lr: 0.006875 loss: 3.1978 (3.3915) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0004 max mem: 57114 Epoch: [88] [ 90/156] eta: 0:00:49 lr: 0.006873 min_lr: 0.006873 loss: 3.2129 (3.3788) weight_decay: 0.0500 (0.0500) time: 0.7102 data: 0.0004 max mem: 57114 Epoch: [88] [100/156] eta: 0:00:41 lr: 0.006871 min_lr: 0.006871 loss: 3.4032 (3.3684) weight_decay: 0.0500 (0.0500) time: 0.7034 data: 0.0004 max mem: 57114 Epoch: [88] [110/156] eta: 0:00:34 lr: 0.006869 min_lr: 0.006869 loss: 3.4032 (3.3584) weight_decay: 0.0500 (0.0500) time: 0.6983 data: 0.0004 max mem: 57114 Epoch: [88] [120/156] eta: 0:00:26 lr: 0.006867 min_lr: 0.006867 loss: 3.4302 (3.3593) weight_decay: 0.0500 (0.0500) time: 0.7047 data: 0.0004 max mem: 57114 Epoch: [88] [130/156] eta: 0:00:19 lr: 0.006865 min_lr: 0.006865 loss: 3.5730 (3.3799) weight_decay: 0.0500 (0.0500) time: 0.7098 data: 0.0005 max mem: 57114 Epoch: [88] [140/156] eta: 0:00:11 lr: 0.006863 min_lr: 0.006863 loss: 3.5294 (3.3752) weight_decay: 0.0500 (0.0500) time: 0.6932 data: 0.0004 max mem: 57114 Epoch: [88] [150/156] eta: 0:00:04 lr: 0.006861 min_lr: 0.006861 loss: 3.4107 (3.3731) weight_decay: 0.0500 (0.0500) time: 0.6812 data: 0.0001 max mem: 57114 Epoch: [88] [155/156] eta: 0:00:00 lr: 0.006860 min_lr: 0.006860 loss: 3.4107 (3.3644) weight_decay: 0.0500 (0.0500) time: 0.6857 data: 0.0001 max mem: 57114 Epoch: [88] Total time: 0:01:53 (0.7300 s / it) Averaged stats: lr: 0.006860 min_lr: 0.006860 loss: 3.4107 (3.3474) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9475 (0.9475) acc1: 79.6875 (79.6875) acc5: 96.3542 (96.3542) time: 2.0609 data: 1.8052 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0785 (1.0500) acc1: 77.0833 (74.5524) acc5: 93.7500 (93.3504) time: 0.5629 data: 0.3611 max mem: 57114 Test: Total time: 0:00:02 (0.5837 s / it) * Acc@1 74.257 Acc@5 92.445 loss 1.163 Accuracy of the model on the 50000 test images: 74.3% Max accuracy: 76.02% Test: [0/5] eta: 0:00:11 loss: 8.4950 (8.4950) acc1: 1.0417 (1.0417) acc5: 1.5625 (1.5625) time: 2.3362 data: 2.0922 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.4219 (8.3371) acc1: 0.0000 (0.5115) acc5: 1.5625 (1.2788) time: 0.6181 data: 0.4185 max mem: 57114 Test: Total time: 0:00:03 (0.6314 s / it) * Acc@1 0.172 Acc@5 0.909 loss 8.235 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.17% Epoch: [89] [ 0/156] eta: 0:08:22 lr: 0.006860 min_lr: 0.006860 loss: 2.4674 (2.4674) weight_decay: 0.0500 (0.0500) time: 3.2204 data: 2.5712 max mem: 57114 Epoch: [89] [ 10/156] eta: 0:02:17 lr: 0.006858 min_lr: 0.006858 loss: 3.5643 (3.4154) weight_decay: 0.0500 (0.0500) time: 0.9388 data: 0.2340 max mem: 57114 Epoch: [89] [ 20/156] eta: 0:01:52 lr: 0.006856 min_lr: 0.006856 loss: 3.4821 (3.3844) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0004 max mem: 57114 Epoch: [89] [ 30/156] eta: 0:01:41 lr: 0.006854 min_lr: 0.006854 loss: 3.4252 (3.4163) weight_decay: 0.0500 (0.0500) time: 0.7301 data: 0.0004 max mem: 57114 Epoch: [89] [ 40/156] eta: 0:01:31 lr: 0.006852 min_lr: 0.006852 loss: 3.3995 (3.3269) weight_decay: 0.0500 (0.0500) time: 0.7420 data: 0.0004 max mem: 57114 Epoch: [89] [ 50/156] eta: 0:01:22 lr: 0.006850 min_lr: 0.006850 loss: 3.0389 (3.3170) weight_decay: 0.0500 (0.0500) time: 0.7329 data: 0.0004 max mem: 57114 Epoch: [89] [ 60/156] eta: 0:01:13 lr: 0.006848 min_lr: 0.006848 loss: 3.3878 (3.3320) weight_decay: 0.0500 (0.0500) time: 0.7219 data: 0.0004 max mem: 57114 Epoch: [89] [ 70/156] eta: 0:01:05 lr: 0.006846 min_lr: 0.006846 loss: 3.4037 (3.3065) weight_decay: 0.0500 (0.0500) time: 0.7149 data: 0.0004 max mem: 57114 Epoch: [89] [ 80/156] eta: 0:00:57 lr: 0.006844 min_lr: 0.006844 loss: 3.1794 (3.2633) weight_decay: 0.0500 (0.0500) time: 0.7075 data: 0.0004 max mem: 57114 Epoch: [89] [ 90/156] eta: 0:00:49 lr: 0.006842 min_lr: 0.006842 loss: 3.3455 (3.2955) weight_decay: 0.0500 (0.0500) time: 0.7020 data: 0.0005 max mem: 57114 Epoch: [89] [100/156] eta: 0:00:41 lr: 0.006840 min_lr: 0.006840 loss: 3.5809 (3.3015) weight_decay: 0.0500 (0.0500) time: 0.7034 data: 0.0004 max mem: 57114 Epoch: [89] [110/156] eta: 0:00:33 lr: 0.006838 min_lr: 0.006838 loss: 3.5809 (3.3288) weight_decay: 0.0500 (0.0500) time: 0.7022 data: 0.0003 max mem: 57114 Epoch: [89] [120/156] eta: 0:00:26 lr: 0.006836 min_lr: 0.006836 loss: 3.5982 (3.3520) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.0003 max mem: 57114 Epoch: [89] [130/156] eta: 0:00:19 lr: 0.006834 min_lr: 0.006834 loss: 3.4805 (3.3483) weight_decay: 0.0500 (0.0500) time: 0.7016 data: 0.0008 max mem: 57114 Epoch: [89] [140/156] eta: 0:00:11 lr: 0.006832 min_lr: 0.006832 loss: 3.4603 (3.3626) weight_decay: 0.0500 (0.0500) time: 0.6893 data: 0.0007 max mem: 57114 Epoch: [89] [150/156] eta: 0:00:04 lr: 0.006830 min_lr: 0.006830 loss: 3.4572 (3.3645) weight_decay: 0.0500 (0.0500) time: 0.6892 data: 0.0001 max mem: 57114 Epoch: [89] [155/156] eta: 0:00:00 lr: 0.006829 min_lr: 0.006829 loss: 3.5007 (3.3688) weight_decay: 0.0500 (0.0500) time: 0.6871 data: 0.0001 max mem: 57114 Epoch: [89] Total time: 0:01:53 (0.7275 s / it) Averaged stats: lr: 0.006829 min_lr: 0.006829 loss: 3.5007 (3.3418) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0958 (1.0958) acc1: 81.2500 (81.2500) acc5: 95.3125 (95.3125) time: 2.0293 data: 1.7735 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2014 (1.1774) acc1: 78.1250 (76.3427) acc5: 95.3125 (93.3504) time: 0.5565 data: 0.3548 max mem: 57114 Test: Total time: 0:00:02 (0.5811 s / it) * Acc@1 75.657 Acc@5 93.187 loss 1.258 Accuracy of the model on the 50000 test images: 75.7% Max accuracy: 76.02% Test: [0/5] eta: 0:00:11 loss: 8.5391 (8.5391) acc1: 1.0417 (1.0417) acc5: 2.0833 (2.0833) time: 2.3254 data: 2.0820 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.4888 (8.3821) acc1: 0.5208 (0.6394) acc5: 1.5625 (1.2788) time: 0.6158 data: 0.4165 max mem: 57114 Test: Total time: 0:00:03 (0.6268 s / it) * Acc@1 0.188 Acc@5 0.951 loss 8.279 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.19% Epoch: [90] [ 0/156] eta: 0:05:45 lr: 0.006829 min_lr: 0.006829 loss: 2.4796 (2.4796) weight_decay: 0.0500 (0.0500) time: 2.2136 data: 1.5578 max mem: 57114 Epoch: [90] [ 10/156] eta: 0:02:07 lr: 0.006827 min_lr: 0.006827 loss: 3.4346 (3.3406) weight_decay: 0.0500 (0.0500) time: 0.8740 data: 0.1419 max mem: 57114 Epoch: [90] [ 20/156] eta: 0:01:47 lr: 0.006825 min_lr: 0.006825 loss: 3.3110 (3.2883) weight_decay: 0.0500 (0.0500) time: 0.7214 data: 0.0003 max mem: 57114 Epoch: [90] [ 30/156] eta: 0:01:37 lr: 0.006822 min_lr: 0.006822 loss: 3.3902 (3.3772) weight_decay: 0.0500 (0.0500) time: 0.7145 data: 0.0004 max mem: 57114 Epoch: [90] [ 40/156] eta: 0:01:28 lr: 0.006820 min_lr: 0.006820 loss: 3.5559 (3.4115) weight_decay: 0.0500 (0.0500) time: 0.7276 data: 0.0004 max mem: 57114 Epoch: [90] [ 50/156] eta: 0:01:20 lr: 0.006818 min_lr: 0.006818 loss: 3.5393 (3.4500) weight_decay: 0.0500 (0.0500) time: 0.7374 data: 0.0004 max mem: 57114 Epoch: [90] [ 60/156] eta: 0:01:12 lr: 0.006816 min_lr: 0.006816 loss: 3.5393 (3.4386) weight_decay: 0.0500 (0.0500) time: 0.7319 data: 0.0004 max mem: 57114 Epoch: [90] [ 70/156] eta: 0:01:04 lr: 0.006814 min_lr: 0.006814 loss: 3.2070 (3.4107) weight_decay: 0.0500 (0.0500) time: 0.7142 data: 0.0005 max mem: 57114 Epoch: [90] [ 80/156] eta: 0:00:56 lr: 0.006812 min_lr: 0.006812 loss: 3.1956 (3.3858) weight_decay: 0.0500 (0.0500) time: 0.7063 data: 0.0004 max mem: 57114 Epoch: [90] [ 90/156] eta: 0:00:48 lr: 0.006810 min_lr: 0.006810 loss: 3.3299 (3.3844) weight_decay: 0.0500 (0.0500) time: 0.7079 data: 0.0004 max mem: 57114 Epoch: [90] [100/156] eta: 0:00:41 lr: 0.006808 min_lr: 0.006808 loss: 3.4970 (3.3799) weight_decay: 0.0500 (0.0500) time: 0.7112 data: 0.0004 max mem: 57114 Epoch: [90] [110/156] eta: 0:00:33 lr: 0.006806 min_lr: 0.006806 loss: 3.6161 (3.3971) weight_decay: 0.0500 (0.0500) time: 0.7065 data: 0.0004 max mem: 57114 Epoch: [90] [120/156] eta: 0:00:26 lr: 0.006804 min_lr: 0.006804 loss: 3.5940 (3.3918) weight_decay: 0.0500 (0.0500) time: 0.6984 data: 0.0004 max mem: 57114 Epoch: [90] [130/156] eta: 0:00:18 lr: 0.006802 min_lr: 0.006802 loss: 3.3344 (3.3753) weight_decay: 0.0500 (0.0500) time: 0.6933 data: 0.0009 max mem: 57114 Epoch: [90] [140/156] eta: 0:00:11 lr: 0.006800 min_lr: 0.006800 loss: 3.0691 (3.3652) weight_decay: 0.0500 (0.0500) time: 0.6896 data: 0.0008 max mem: 57114 Epoch: [90] [150/156] eta: 0:00:04 lr: 0.006798 min_lr: 0.006798 loss: 3.5251 (3.3783) weight_decay: 0.0500 (0.0500) time: 0.6928 data: 0.0001 max mem: 57114 Epoch: [90] [155/156] eta: 0:00:00 lr: 0.006797 min_lr: 0.006797 loss: 3.5424 (3.3832) weight_decay: 0.0500 (0.0500) time: 0.6877 data: 0.0001 max mem: 57114 Epoch: [90] Total time: 0:01:52 (0.7220 s / it) Averaged stats: lr: 0.006797 min_lr: 0.006797 loss: 3.5424 (3.3372) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0259 (1.0259) acc1: 78.6458 (78.6458) acc5: 95.8333 (95.8333) time: 2.0650 data: 1.8093 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0495 (1.0896) acc1: 77.6042 (72.8900) acc5: 94.7917 (92.3274) time: 0.5637 data: 0.3619 max mem: 57114 Test: Total time: 0:00:02 (0.5856 s / it) * Acc@1 74.351 Acc@5 92.399 loss 1.145 Accuracy of the model on the 50000 test images: 74.4% Max accuracy: 76.02% Test: [0/5] eta: 0:00:12 loss: 8.5887 (8.5887) acc1: 1.0417 (1.0417) acc5: 2.0833 (2.0833) time: 2.4111 data: 2.1676 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.5592 (8.4319) acc1: 0.5208 (0.6394) acc5: 1.5625 (1.4066) time: 0.6328 data: 0.4336 max mem: 57114 Test: Total time: 0:00:03 (0.6442 s / it) * Acc@1 0.202 Acc@5 0.987 loss 8.328 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.20% Epoch: [91] [ 0/156] eta: 0:07:20 lr: 0.006797 min_lr: 0.006797 loss: 3.4553 (3.4553) weight_decay: 0.0500 (0.0500) time: 2.8221 data: 2.1739 max mem: 57114 Epoch: [91] [ 10/156] eta: 0:02:13 lr: 0.006795 min_lr: 0.006795 loss: 3.2555 (3.1751) weight_decay: 0.0500 (0.0500) time: 0.9128 data: 0.1979 max mem: 57114 Epoch: [91] [ 20/156] eta: 0:01:52 lr: 0.006793 min_lr: 0.006793 loss: 3.1643 (3.1632) weight_decay: 0.0500 (0.0500) time: 0.7241 data: 0.0003 max mem: 57114 Epoch: [91] [ 30/156] eta: 0:01:39 lr: 0.006790 min_lr: 0.006790 loss: 3.2747 (3.1393) weight_decay: 0.0500 (0.0500) time: 0.7173 data: 0.0003 max mem: 57114 Epoch: [91] [ 40/156] eta: 0:01:29 lr: 0.006788 min_lr: 0.006788 loss: 3.4876 (3.1919) weight_decay: 0.0500 (0.0500) time: 0.7246 data: 0.0004 max mem: 57114 Epoch: [91] [ 50/156] eta: 0:01:21 lr: 0.006786 min_lr: 0.006786 loss: 3.3844 (3.2358) weight_decay: 0.0500 (0.0500) time: 0.7421 data: 0.0004 max mem: 57114 Epoch: [91] [ 60/156] eta: 0:01:12 lr: 0.006784 min_lr: 0.006784 loss: 3.2967 (3.2357) weight_decay: 0.0500 (0.0500) time: 0.7244 data: 0.0004 max mem: 57114 Epoch: [91] [ 70/156] eta: 0:01:04 lr: 0.006782 min_lr: 0.006782 loss: 3.2967 (3.2324) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0003 max mem: 57114 Epoch: [91] [ 80/156] eta: 0:00:56 lr: 0.006780 min_lr: 0.006780 loss: 3.3543 (3.2517) weight_decay: 0.0500 (0.0500) time: 0.7116 data: 0.0004 max mem: 57114 Epoch: [91] [ 90/156] eta: 0:00:49 lr: 0.006778 min_lr: 0.006778 loss: 3.1231 (3.2473) weight_decay: 0.0500 (0.0500) time: 0.7078 data: 0.0004 max mem: 57114 Epoch: [91] [100/156] eta: 0:00:41 lr: 0.006776 min_lr: 0.006776 loss: 3.2493 (3.2595) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [91] [110/156] eta: 0:00:33 lr: 0.006774 min_lr: 0.006774 loss: 3.2493 (3.2599) weight_decay: 0.0500 (0.0500) time: 0.7024 data: 0.0004 max mem: 57114 Epoch: [91] [120/156] eta: 0:00:26 lr: 0.006772 min_lr: 0.006772 loss: 3.1689 (3.2514) weight_decay: 0.0500 (0.0500) time: 0.7115 data: 0.0004 max mem: 57114 Epoch: [91] [130/156] eta: 0:00:19 lr: 0.006770 min_lr: 0.006770 loss: 3.4324 (3.2703) weight_decay: 0.0500 (0.0500) time: 0.7115 data: 0.0009 max mem: 57114 Epoch: [91] [140/156] eta: 0:00:11 lr: 0.006768 min_lr: 0.006768 loss: 3.4398 (3.2720) weight_decay: 0.0500 (0.0500) time: 0.6844 data: 0.0007 max mem: 57114 Epoch: [91] [150/156] eta: 0:00:04 lr: 0.006766 min_lr: 0.006766 loss: 3.3963 (3.2823) weight_decay: 0.0500 (0.0500) time: 0.6794 data: 0.0001 max mem: 57114 Epoch: [91] [155/156] eta: 0:00:00 lr: 0.006765 min_lr: 0.006765 loss: 3.3963 (3.2895) weight_decay: 0.0500 (0.0500) time: 0.6822 data: 0.0001 max mem: 57114 Epoch: [91] Total time: 0:01:53 (0.7261 s / it) Averaged stats: lr: 0.006765 min_lr: 0.006765 loss: 3.3963 (3.3334) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.8927 (0.8927) acc1: 80.2083 (80.2083) acc5: 95.8333 (95.8333) time: 2.2228 data: 1.9670 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0464 (1.0961) acc1: 80.2083 (76.0870) acc5: 95.8333 (94.3734) time: 0.5952 data: 0.3935 max mem: 57114 Test: Total time: 0:00:03 (0.6155 s / it) * Acc@1 75.567 Acc@5 93.217 loss 1.188 Accuracy of the model on the 50000 test images: 75.6% Max accuracy: 76.02% Test: [0/5] eta: 0:00:11 loss: 8.6305 (8.6305) acc1: 1.0417 (1.0417) acc5: 2.0833 (2.0833) time: 2.3268 data: 2.0832 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.6245 (8.4727) acc1: 0.5208 (0.6394) acc5: 1.5625 (1.5345) time: 0.6160 data: 0.4167 max mem: 57114 Test: Total time: 0:00:03 (0.6483 s / it) * Acc@1 0.230 Acc@5 1.019 loss 8.369 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.23% Epoch: [92] [ 0/156] eta: 0:06:44 lr: 0.006764 min_lr: 0.006764 loss: 3.4130 (3.4130) weight_decay: 0.0500 (0.0500) time: 2.5915 data: 1.9420 max mem: 57114 Epoch: [92] [ 10/156] eta: 0:02:10 lr: 0.006762 min_lr: 0.006762 loss: 3.4130 (3.1983) weight_decay: 0.0500 (0.0500) time: 0.8972 data: 0.1769 max mem: 57114 Epoch: [92] [ 20/156] eta: 0:01:51 lr: 0.006760 min_lr: 0.006760 loss: 3.3623 (3.3063) weight_decay: 0.0500 (0.0500) time: 0.7285 data: 0.0004 max mem: 57114 Epoch: [92] [ 30/156] eta: 0:01:38 lr: 0.006758 min_lr: 0.006758 loss: 3.4359 (3.3368) weight_decay: 0.0500 (0.0500) time: 0.7162 data: 0.0004 max mem: 57114 Epoch: [92] [ 40/156] eta: 0:01:28 lr: 0.006756 min_lr: 0.006756 loss: 3.4070 (3.3158) weight_decay: 0.0500 (0.0500) time: 0.7126 data: 0.0004 max mem: 57114 Epoch: [92] [ 50/156] eta: 0:01:19 lr: 0.006754 min_lr: 0.006754 loss: 3.3425 (3.3473) weight_decay: 0.0500 (0.0500) time: 0.7088 data: 0.0004 max mem: 57114 Epoch: [92] [ 60/156] eta: 0:01:12 lr: 0.006752 min_lr: 0.006752 loss: 3.5074 (3.3689) weight_decay: 0.0500 (0.0500) time: 0.7175 data: 0.0004 max mem: 57114 Epoch: [92] [ 70/156] eta: 0:01:04 lr: 0.006750 min_lr: 0.006750 loss: 3.5738 (3.3570) weight_decay: 0.0500 (0.0500) time: 0.7277 data: 0.0003 max mem: 57114 Epoch: [92] [ 80/156] eta: 0:00:56 lr: 0.006748 min_lr: 0.006748 loss: 3.5079 (3.3794) weight_decay: 0.0500 (0.0500) time: 0.7127 data: 0.0004 max mem: 57114 Epoch: [92] [ 90/156] eta: 0:00:48 lr: 0.006746 min_lr: 0.006746 loss: 3.5119 (3.3697) weight_decay: 0.0500 (0.0500) time: 0.7095 data: 0.0004 max mem: 57114 Epoch: [92] [100/156] eta: 0:00:41 lr: 0.006744 min_lr: 0.006744 loss: 3.5365 (3.3862) weight_decay: 0.0500 (0.0500) time: 0.7141 data: 0.0004 max mem: 57114 Epoch: [92] [110/156] eta: 0:00:33 lr: 0.006741 min_lr: 0.006741 loss: 3.5491 (3.3904) weight_decay: 0.0500 (0.0500) time: 0.7031 data: 0.0004 max mem: 57114 Epoch: [92] [120/156] eta: 0:00:26 lr: 0.006739 min_lr: 0.006739 loss: 3.6702 (3.4182) weight_decay: 0.0500 (0.0500) time: 0.6909 data: 0.0004 max mem: 57114 Epoch: [92] [130/156] eta: 0:00:18 lr: 0.006737 min_lr: 0.006737 loss: 3.5731 (3.4090) weight_decay: 0.0500 (0.0500) time: 0.6954 data: 0.0008 max mem: 57114 Epoch: [92] [140/156] eta: 0:00:11 lr: 0.006735 min_lr: 0.006735 loss: 3.4875 (3.4129) weight_decay: 0.0500 (0.0500) time: 0.6911 data: 0.0007 max mem: 57114 Epoch: [92] [150/156] eta: 0:00:04 lr: 0.006733 min_lr: 0.006733 loss: 3.3611 (3.4036) weight_decay: 0.0500 (0.0500) time: 0.6880 data: 0.0001 max mem: 57114 Epoch: [92] [155/156] eta: 0:00:00 lr: 0.006732 min_lr: 0.006732 loss: 3.3464 (3.4008) weight_decay: 0.0500 (0.0500) time: 0.6832 data: 0.0001 max mem: 57114 Epoch: [92] Total time: 0:01:52 (0.7215 s / it) Averaged stats: lr: 0.006732 min_lr: 0.006732 loss: 3.3464 (3.3311) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9286 (0.9286) acc1: 80.2083 (80.2083) acc5: 95.8333 (95.8333) time: 2.0503 data: 1.7944 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1111 (1.0430) acc1: 79.1667 (76.0870) acc5: 93.2292 (92.3274) time: 0.5607 data: 0.3589 max mem: 57114 Test: Total time: 0:00:02 (0.5825 s / it) * Acc@1 75.328 Acc@5 93.015 loss 1.133 Accuracy of the model on the 50000 test images: 75.3% Max accuracy: 76.02% Test: [0/5] eta: 0:00:11 loss: 8.6669 (8.6669) acc1: 1.0417 (1.0417) acc5: 2.0833 (2.0833) time: 2.2963 data: 2.0528 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.6669 (8.5080) acc1: 0.5208 (0.6394) acc5: 1.5625 (1.5345) time: 0.6099 data: 0.4107 max mem: 57114 Test: Total time: 0:00:03 (0.6227 s / it) * Acc@1 0.256 Acc@5 1.055 loss 8.403 Accuracy of the model EMA on 50000 test images: 0.3% Max EMA accuracy: 0.26% Epoch: [93] [ 0/156] eta: 0:08:13 lr: 0.006732 min_lr: 0.006732 loss: 3.0813 (3.0813) weight_decay: 0.0500 (0.0500) time: 3.1651 data: 2.5185 max mem: 57114 Epoch: [93] [ 10/156] eta: 0:02:15 lr: 0.006730 min_lr: 0.006730 loss: 3.5230 (3.4400) weight_decay: 0.0500 (0.0500) time: 0.9252 data: 0.2293 max mem: 57114 Epoch: [93] [ 20/156] eta: 0:01:53 lr: 0.006728 min_lr: 0.006728 loss: 3.4650 (3.3548) weight_decay: 0.0500 (0.0500) time: 0.7187 data: 0.0004 max mem: 57114 Epoch: [93] [ 30/156] eta: 0:01:39 lr: 0.006725 min_lr: 0.006725 loss: 3.3879 (3.3905) weight_decay: 0.0500 (0.0500) time: 0.7208 data: 0.0003 max mem: 57114 Epoch: [93] [ 40/156] eta: 0:01:30 lr: 0.006723 min_lr: 0.006723 loss: 3.3194 (3.3435) weight_decay: 0.0500 (0.0500) time: 0.7220 data: 0.0004 max mem: 57114 Epoch: [93] [ 50/156] eta: 0:01:21 lr: 0.006721 min_lr: 0.006721 loss: 3.3398 (3.3746) weight_decay: 0.0500 (0.0500) time: 0.7408 data: 0.0004 max mem: 57114 Epoch: [93] [ 60/156] eta: 0:01:13 lr: 0.006719 min_lr: 0.006719 loss: 3.3631 (3.3560) weight_decay: 0.0500 (0.0500) time: 0.7298 data: 0.0004 max mem: 57114 Epoch: [93] [ 70/156] eta: 0:01:04 lr: 0.006717 min_lr: 0.006717 loss: 3.3810 (3.3528) weight_decay: 0.0500 (0.0500) time: 0.7100 data: 0.0004 max mem: 57114 Epoch: [93] [ 80/156] eta: 0:00:56 lr: 0.006715 min_lr: 0.006715 loss: 3.3810 (3.3470) weight_decay: 0.0500 (0.0500) time: 0.7043 data: 0.0004 max mem: 57114 Epoch: [93] [ 90/156] eta: 0:00:49 lr: 0.006713 min_lr: 0.006713 loss: 3.4445 (3.3544) weight_decay: 0.0500 (0.0500) time: 0.7099 data: 0.0004 max mem: 57114 Epoch: [93] [100/156] eta: 0:00:41 lr: 0.006711 min_lr: 0.006711 loss: 3.6138 (3.3655) weight_decay: 0.0500 (0.0500) time: 0.7075 data: 0.0004 max mem: 57114 Epoch: [93] [110/156] eta: 0:00:33 lr: 0.006709 min_lr: 0.006709 loss: 3.6529 (3.3941) weight_decay: 0.0500 (0.0500) time: 0.6994 data: 0.0004 max mem: 57114 Epoch: [93] [120/156] eta: 0:00:26 lr: 0.006706 min_lr: 0.006706 loss: 3.6194 (3.3873) weight_decay: 0.0500 (0.0500) time: 0.7076 data: 0.0004 max mem: 57114 Epoch: [93] [130/156] eta: 0:00:19 lr: 0.006704 min_lr: 0.006704 loss: 3.3410 (3.3719) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0005 max mem: 57114 Epoch: [93] [140/156] eta: 0:00:11 lr: 0.006702 min_lr: 0.006702 loss: 3.3214 (3.3569) weight_decay: 0.0500 (0.0500) time: 0.6986 data: 0.0004 max mem: 57114 Epoch: [93] [150/156] eta: 0:00:04 lr: 0.006700 min_lr: 0.006700 loss: 3.3625 (3.3709) weight_decay: 0.0500 (0.0500) time: 0.6865 data: 0.0001 max mem: 57114 Epoch: [93] [155/156] eta: 0:00:00 lr: 0.006699 min_lr: 0.006699 loss: 3.3993 (3.3579) weight_decay: 0.0500 (0.0500) time: 0.6880 data: 0.0001 max mem: 57114 Epoch: [93] Total time: 0:01:53 (0.7277 s / it) Averaged stats: lr: 0.006699 min_lr: 0.006699 loss: 3.3993 (3.3205) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8188 (0.8188) acc1: 81.2500 (81.2500) acc5: 97.9167 (97.9167) time: 2.0806 data: 1.8247 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0167 (0.9900) acc1: 79.1667 (76.4706) acc5: 95.3125 (93.4783) time: 0.5668 data: 0.3650 max mem: 57114 Test: Total time: 0:00:02 (0.5902 s / it) * Acc@1 76.073 Acc@5 93.534 loss 1.061 Accuracy of the model on the 50000 test images: 76.1% Max accuracy: 76.07% Test: [0/5] eta: 0:00:10 loss: 8.6677 (8.6677) acc1: 1.0417 (1.0417) acc5: 2.0833 (2.0833) time: 2.1167 data: 1.8732 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.6677 (8.5089) acc1: 0.5208 (0.6394) acc5: 1.5625 (1.5345) time: 0.5740 data: 0.3748 max mem: 57114 Test: Total time: 0:00:02 (0.5863 s / it) * Acc@1 0.268 Acc@5 1.101 loss 8.408 Accuracy of the model EMA on 50000 test images: 0.3% Max EMA accuracy: 0.27% Epoch: [94] [ 0/156] eta: 0:06:30 lr: 0.006699 min_lr: 0.006699 loss: 3.5354 (3.5354) weight_decay: 0.0500 (0.0500) time: 2.5050 data: 1.8539 max mem: 57114 Epoch: [94] [ 10/156] eta: 0:02:12 lr: 0.006697 min_lr: 0.006697 loss: 3.4774 (3.1522) weight_decay: 0.0500 (0.0500) time: 0.9042 data: 0.1689 max mem: 57114 Epoch: [94] [ 20/156] eta: 0:01:53 lr: 0.006695 min_lr: 0.006695 loss: 3.2245 (3.1456) weight_decay: 0.0500 (0.0500) time: 0.7489 data: 0.0004 max mem: 57114 Epoch: [94] [ 30/156] eta: 0:01:40 lr: 0.006692 min_lr: 0.006692 loss: 2.9417 (3.0314) weight_decay: 0.0500 (0.0500) time: 0.7361 data: 0.0004 max mem: 57114 Epoch: [94] [ 40/156] eta: 0:01:30 lr: 0.006690 min_lr: 0.006690 loss: 2.9417 (3.0689) weight_decay: 0.0500 (0.0500) time: 0.7302 data: 0.0003 max mem: 57114 Epoch: [94] [ 50/156] eta: 0:01:22 lr: 0.006688 min_lr: 0.006688 loss: 3.3049 (3.1009) weight_decay: 0.0500 (0.0500) time: 0.7463 data: 0.0004 max mem: 57114 Epoch: [94] [ 60/156] eta: 0:01:14 lr: 0.006686 min_lr: 0.006686 loss: 3.3836 (3.1201) weight_decay: 0.0500 (0.0500) time: 0.7470 data: 0.0004 max mem: 57114 Epoch: [94] [ 70/156] eta: 0:01:05 lr: 0.006684 min_lr: 0.006684 loss: 3.4135 (3.1392) weight_decay: 0.0500 (0.0500) time: 0.7366 data: 0.0004 max mem: 57114 Epoch: [94] [ 80/156] eta: 0:00:57 lr: 0.006682 min_lr: 0.006682 loss: 3.4179 (3.1622) weight_decay: 0.0500 (0.0500) time: 0.7199 data: 0.0004 max mem: 57114 Epoch: [94] [ 90/156] eta: 0:00:49 lr: 0.006680 min_lr: 0.006680 loss: 3.3675 (3.1813) weight_decay: 0.0500 (0.0500) time: 0.7027 data: 0.0004 max mem: 57114 Epoch: [94] [100/156] eta: 0:00:41 lr: 0.006678 min_lr: 0.006678 loss: 3.4503 (3.1769) weight_decay: 0.0500 (0.0500) time: 0.6943 data: 0.0004 max mem: 57114 Epoch: [94] [110/156] eta: 0:00:34 lr: 0.006675 min_lr: 0.006675 loss: 2.9425 (3.1628) weight_decay: 0.0500 (0.0500) time: 0.6989 data: 0.0004 max mem: 57114 Epoch: [94] [120/156] eta: 0:00:26 lr: 0.006673 min_lr: 0.006673 loss: 3.1839 (3.1742) weight_decay: 0.0500 (0.0500) time: 0.7132 data: 0.0004 max mem: 57114 Epoch: [94] [130/156] eta: 0:00:19 lr: 0.006671 min_lr: 0.006671 loss: 3.4123 (3.2022) weight_decay: 0.0500 (0.0500) time: 0.7116 data: 0.0009 max mem: 57114 Epoch: [94] [140/156] eta: 0:00:11 lr: 0.006669 min_lr: 0.006669 loss: 3.4139 (3.2144) weight_decay: 0.0500 (0.0500) time: 0.6938 data: 0.0007 max mem: 57114 Epoch: [94] [150/156] eta: 0:00:04 lr: 0.006667 min_lr: 0.006667 loss: 3.3607 (3.2186) weight_decay: 0.0500 (0.0500) time: 0.6838 data: 0.0001 max mem: 57114 Epoch: [94] [155/156] eta: 0:00:00 lr: 0.006666 min_lr: 0.006666 loss: 3.3312 (3.2197) weight_decay: 0.0500 (0.0500) time: 0.6824 data: 0.0001 max mem: 57114 Epoch: [94] Total time: 0:01:53 (0.7306 s / it) Averaged stats: lr: 0.006666 min_lr: 0.006666 loss: 3.3312 (3.3239) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0070 (1.0070) acc1: 79.1667 (79.1667) acc5: 95.3125 (95.3125) time: 2.1201 data: 1.8583 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0550 (1.0800) acc1: 79.1667 (76.0870) acc5: 95.3125 (93.9898) time: 0.5748 data: 0.3717 max mem: 57114 Test: Total time: 0:00:02 (0.5992 s / it) * Acc@1 76.838 Acc@5 93.606 loss 1.130 Accuracy of the model on the 50000 test images: 76.8% Max accuracy: 76.84% Test: [0/5] eta: 0:00:10 loss: 8.6619 (8.6619) acc1: 1.0417 (1.0417) acc5: 2.6042 (2.6042) time: 2.0772 data: 1.8336 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.6619 (8.4996) acc1: 0.5208 (0.7673) acc5: 1.5625 (1.6624) time: 0.5660 data: 0.3668 max mem: 57114 Test: Total time: 0:00:02 (0.5796 s / it) * Acc@1 0.286 Acc@5 1.189 loss 8.403 Accuracy of the model EMA on 50000 test images: 0.3% Max EMA accuracy: 0.29% Epoch: [95] [ 0/156] eta: 0:06:36 lr: 0.006666 min_lr: 0.006666 loss: 2.7945 (2.7945) weight_decay: 0.0500 (0.0500) time: 2.5434 data: 1.8920 max mem: 57114 Epoch: [95] [ 10/156] eta: 0:02:09 lr: 0.006663 min_lr: 0.006663 loss: 3.1164 (3.1628) weight_decay: 0.0500 (0.0500) time: 0.8876 data: 0.1724 max mem: 57114 Epoch: [95] [ 20/156] eta: 0:01:51 lr: 0.006661 min_lr: 0.006661 loss: 3.2932 (3.2460) weight_decay: 0.0500 (0.0500) time: 0.7325 data: 0.0004 max mem: 57114 Epoch: [95] [ 30/156] eta: 0:01:40 lr: 0.006659 min_lr: 0.006659 loss: 3.4667 (3.3085) weight_decay: 0.0500 (0.0500) time: 0.7522 data: 0.0004 max mem: 57114 Epoch: [95] [ 40/156] eta: 0:01:31 lr: 0.006657 min_lr: 0.006657 loss: 3.2972 (3.2844) weight_decay: 0.0500 (0.0500) time: 0.7591 data: 0.0004 max mem: 57114 Epoch: [95] [ 50/156] eta: 0:01:22 lr: 0.006655 min_lr: 0.006655 loss: 3.2688 (3.2860) weight_decay: 0.0500 (0.0500) time: 0.7534 data: 0.0004 max mem: 57114 Epoch: [95] [ 60/156] eta: 0:01:14 lr: 0.006653 min_lr: 0.006653 loss: 3.4777 (3.3080) weight_decay: 0.0500 (0.0500) time: 0.7371 data: 0.0004 max mem: 57114 Epoch: [95] [ 70/156] eta: 0:01:05 lr: 0.006651 min_lr: 0.006651 loss: 3.4850 (3.3102) weight_decay: 0.0500 (0.0500) time: 0.7239 data: 0.0004 max mem: 57114 Epoch: [95] [ 80/156] eta: 0:00:57 lr: 0.006648 min_lr: 0.006648 loss: 3.5471 (3.3078) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [95] [ 90/156] eta: 0:00:49 lr: 0.006646 min_lr: 0.006646 loss: 3.4317 (3.3089) weight_decay: 0.0500 (0.0500) time: 0.7052 data: 0.0004 max mem: 57114 Epoch: [95] [100/156] eta: 0:00:41 lr: 0.006644 min_lr: 0.006644 loss: 3.4166 (3.3142) weight_decay: 0.0500 (0.0500) time: 0.7082 data: 0.0004 max mem: 57114 Epoch: [95] [110/156] eta: 0:00:34 lr: 0.006642 min_lr: 0.006642 loss: 3.3969 (3.2963) weight_decay: 0.0500 (0.0500) time: 0.7072 data: 0.0004 max mem: 57114 Epoch: [95] [120/156] eta: 0:00:26 lr: 0.006640 min_lr: 0.006640 loss: 3.4274 (3.3081) weight_decay: 0.0500 (0.0500) time: 0.7038 data: 0.0004 max mem: 57114 Epoch: [95] [130/156] eta: 0:00:19 lr: 0.006638 min_lr: 0.006638 loss: 3.4455 (3.3038) weight_decay: 0.0500 (0.0500) time: 0.6988 data: 0.0008 max mem: 57114 Epoch: [95] [140/156] eta: 0:00:11 lr: 0.006635 min_lr: 0.006635 loss: 3.3605 (3.3040) weight_decay: 0.0500 (0.0500) time: 0.6900 data: 0.0007 max mem: 57114 Epoch: [95] [150/156] eta: 0:00:04 lr: 0.006633 min_lr: 0.006633 loss: 3.3583 (3.3001) weight_decay: 0.0500 (0.0500) time: 0.6855 data: 0.0001 max mem: 57114 Epoch: [95] [155/156] eta: 0:00:00 lr: 0.006632 min_lr: 0.006632 loss: 3.3042 (3.2973) weight_decay: 0.0500 (0.0500) time: 0.6863 data: 0.0001 max mem: 57114 Epoch: [95] Total time: 0:01:54 (0.7309 s / it) Averaged stats: lr: 0.006632 min_lr: 0.006632 loss: 3.3042 (3.3104) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0602 (1.0602) acc1: 81.7708 (81.7708) acc5: 95.8333 (95.8333) time: 2.0922 data: 1.8363 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0602 (1.0604) acc1: 81.7708 (77.8772) acc5: 95.8333 (93.8619) time: 0.5691 data: 0.3673 max mem: 57114 Test: Total time: 0:00:02 (0.5909 s / it) * Acc@1 76.037 Acc@5 93.300 loss 1.154 Accuracy of the model on the 50000 test images: 76.0% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 8.6527 (8.6527) acc1: 1.0417 (1.0417) acc5: 2.6042 (2.6042) time: 2.3105 data: 2.0671 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.6527 (8.4879) acc1: 1.0417 (0.8951) acc5: 1.5625 (1.6624) time: 0.6128 data: 0.4135 max mem: 57114 Test: Total time: 0:00:03 (0.6256 s / it) * Acc@1 0.310 Acc@5 1.247 loss 8.396 Accuracy of the model EMA on 50000 test images: 0.3% Max EMA accuracy: 0.31% Epoch: [96] [ 0/156] eta: 0:06:59 lr: 0.006632 min_lr: 0.006632 loss: 3.3014 (3.3014) weight_decay: 0.0500 (0.0500) time: 2.6865 data: 2.0382 max mem: 57114 Epoch: [96] [ 10/156] eta: 0:02:11 lr: 0.006630 min_lr: 0.006630 loss: 3.4443 (3.3893) weight_decay: 0.0500 (0.0500) time: 0.8987 data: 0.1855 max mem: 57114 Epoch: [96] [ 20/156] eta: 0:01:50 lr: 0.006628 min_lr: 0.006628 loss: 3.5465 (3.3883) weight_decay: 0.0500 (0.0500) time: 0.7197 data: 0.0003 max mem: 57114 Epoch: [96] [ 30/156] eta: 0:01:38 lr: 0.006625 min_lr: 0.006625 loss: 3.6255 (3.4240) weight_decay: 0.0500 (0.0500) time: 0.7216 data: 0.0004 max mem: 57114 Epoch: [96] [ 40/156] eta: 0:01:29 lr: 0.006623 min_lr: 0.006623 loss: 3.5527 (3.3964) weight_decay: 0.0500 (0.0500) time: 0.7198 data: 0.0003 max mem: 57114 Epoch: [96] [ 50/156] eta: 0:01:20 lr: 0.006621 min_lr: 0.006621 loss: 3.4182 (3.3660) weight_decay: 0.0500 (0.0500) time: 0.7102 data: 0.0004 max mem: 57114 Epoch: [96] [ 60/156] eta: 0:01:12 lr: 0.006619 min_lr: 0.006619 loss: 3.2887 (3.3527) weight_decay: 0.0500 (0.0500) time: 0.7204 data: 0.0004 max mem: 57114 Epoch: [96] [ 70/156] eta: 0:01:04 lr: 0.006617 min_lr: 0.006617 loss: 3.3759 (3.3378) weight_decay: 0.0500 (0.0500) time: 0.7196 data: 0.0004 max mem: 57114 Epoch: [96] [ 80/156] eta: 0:00:56 lr: 0.006615 min_lr: 0.006615 loss: 3.0694 (3.3207) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0004 max mem: 57114 Epoch: [96] [ 90/156] eta: 0:00:48 lr: 0.006612 min_lr: 0.006612 loss: 3.0430 (3.2892) weight_decay: 0.0500 (0.0500) time: 0.7126 data: 0.0004 max mem: 57114 Epoch: [96] [100/156] eta: 0:00:41 lr: 0.006610 min_lr: 0.006610 loss: 3.3477 (3.3016) weight_decay: 0.0500 (0.0500) time: 0.7082 data: 0.0004 max mem: 57114 Epoch: [96] [110/156] eta: 0:00:33 lr: 0.006608 min_lr: 0.006608 loss: 3.3186 (3.2931) weight_decay: 0.0500 (0.0500) time: 0.7120 data: 0.0004 max mem: 57114 Epoch: [96] [120/156] eta: 0:00:26 lr: 0.006606 min_lr: 0.006606 loss: 3.4886 (3.3095) weight_decay: 0.0500 (0.0500) time: 0.7068 data: 0.0004 max mem: 57114 Epoch: [96] [130/156] eta: 0:00:18 lr: 0.006604 min_lr: 0.006604 loss: 3.4886 (3.2920) weight_decay: 0.0500 (0.0500) time: 0.6994 data: 0.0009 max mem: 57114 Epoch: [96] [140/156] eta: 0:00:11 lr: 0.006601 min_lr: 0.006601 loss: 3.4095 (3.3098) weight_decay: 0.0500 (0.0500) time: 0.6891 data: 0.0007 max mem: 57114 Epoch: [96] [150/156] eta: 0:00:04 lr: 0.006599 min_lr: 0.006599 loss: 3.4399 (3.3123) weight_decay: 0.0500 (0.0500) time: 0.6826 data: 0.0001 max mem: 57114 Epoch: [96] [155/156] eta: 0:00:00 lr: 0.006598 min_lr: 0.006598 loss: 3.3327 (3.2972) weight_decay: 0.0500 (0.0500) time: 0.6877 data: 0.0001 max mem: 57114 Epoch: [96] Total time: 0:01:52 (0.7238 s / it) Averaged stats: lr: 0.006598 min_lr: 0.006598 loss: 3.3327 (3.3118) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9430 (0.9430) acc1: 81.7708 (81.7708) acc5: 96.8750 (96.8750) time: 2.0677 data: 1.8121 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0664 (0.9891) acc1: 78.1250 (77.4936) acc5: 96.3542 (94.3734) time: 0.5643 data: 0.3625 max mem: 57114 Test: Total time: 0:00:02 (0.5866 s / it) * Acc@1 76.341 Acc@5 93.648 loss 1.113 Accuracy of the model on the 50000 test images: 76.3% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 8.6135 (8.6135) acc1: 1.0417 (1.0417) acc5: 2.6042 (2.6042) time: 2.3389 data: 2.0954 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.6135 (8.4464) acc1: 1.0417 (0.8951) acc5: 1.5625 (1.9182) time: 0.6184 data: 0.4192 max mem: 57114 Test: Total time: 0:00:03 (0.6302 s / it) * Acc@1 0.348 Acc@5 1.309 loss 8.362 Accuracy of the model EMA on 50000 test images: 0.3% Max EMA accuracy: 0.35% Epoch: [97] [ 0/156] eta: 0:06:21 lr: 0.006598 min_lr: 0.006598 loss: 3.1405 (3.1405) weight_decay: 0.0500 (0.0500) time: 2.4435 data: 1.7853 max mem: 57114 Epoch: [97] [ 10/156] eta: 0:02:09 lr: 0.006596 min_lr: 0.006596 loss: 3.4610 (3.3284) weight_decay: 0.0500 (0.0500) time: 0.8899 data: 0.1626 max mem: 57114 Epoch: [97] [ 20/156] eta: 0:01:50 lr: 0.006594 min_lr: 0.006594 loss: 3.4610 (3.4505) weight_decay: 0.0500 (0.0500) time: 0.7283 data: 0.0004 max mem: 57114 Epoch: [97] [ 30/156] eta: 0:01:38 lr: 0.006591 min_lr: 0.006591 loss: 3.4497 (3.3677) weight_decay: 0.0500 (0.0500) time: 0.7206 data: 0.0003 max mem: 57114 Epoch: [97] [ 40/156] eta: 0:01:29 lr: 0.006589 min_lr: 0.006589 loss: 3.4432 (3.3786) weight_decay: 0.0500 (0.0500) time: 0.7331 data: 0.0003 max mem: 57114 Epoch: [97] [ 50/156] eta: 0:01:20 lr: 0.006587 min_lr: 0.006587 loss: 3.5426 (3.4057) weight_decay: 0.0500 (0.0500) time: 0.7228 data: 0.0004 max mem: 57114 Epoch: [97] [ 60/156] eta: 0:01:12 lr: 0.006585 min_lr: 0.006585 loss: 3.5388 (3.3662) weight_decay: 0.0500 (0.0500) time: 0.7178 data: 0.0004 max mem: 57114 Epoch: [97] [ 70/156] eta: 0:01:04 lr: 0.006583 min_lr: 0.006583 loss: 3.3006 (3.3614) weight_decay: 0.0500 (0.0500) time: 0.7193 data: 0.0004 max mem: 57114 Epoch: [97] [ 80/156] eta: 0:00:56 lr: 0.006580 min_lr: 0.006580 loss: 3.5002 (3.3756) weight_decay: 0.0500 (0.0500) time: 0.7071 data: 0.0004 max mem: 57114 Epoch: [97] [ 90/156] eta: 0:00:48 lr: 0.006578 min_lr: 0.006578 loss: 3.5100 (3.3773) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0004 max mem: 57114 Epoch: [97] [100/156] eta: 0:00:41 lr: 0.006576 min_lr: 0.006576 loss: 3.3220 (3.3792) weight_decay: 0.0500 (0.0500) time: 0.6974 data: 0.0003 max mem: 57114 Epoch: [97] [110/156] eta: 0:00:33 lr: 0.006574 min_lr: 0.006574 loss: 3.2016 (3.3571) weight_decay: 0.0500 (0.0500) time: 0.6941 data: 0.0003 max mem: 57114 Epoch: [97] [120/156] eta: 0:00:26 lr: 0.006572 min_lr: 0.006572 loss: 3.3950 (3.3638) weight_decay: 0.0500 (0.0500) time: 0.7066 data: 0.0004 max mem: 57114 Epoch: [97] [130/156] eta: 0:00:18 lr: 0.006569 min_lr: 0.006569 loss: 3.5056 (3.3644) weight_decay: 0.0500 (0.0500) time: 0.7031 data: 0.0009 max mem: 57114 Epoch: [97] [140/156] eta: 0:00:11 lr: 0.006567 min_lr: 0.006567 loss: 3.3976 (3.3607) weight_decay: 0.0500 (0.0500) time: 0.6879 data: 0.0007 max mem: 57114 Epoch: [97] [150/156] eta: 0:00:04 lr: 0.006565 min_lr: 0.006565 loss: 3.5383 (3.3755) weight_decay: 0.0500 (0.0500) time: 0.6800 data: 0.0001 max mem: 57114 Epoch: [97] [155/156] eta: 0:00:00 lr: 0.006564 min_lr: 0.006564 loss: 3.5383 (3.3687) weight_decay: 0.0500 (0.0500) time: 0.6821 data: 0.0001 max mem: 57114 Epoch: [97] Total time: 0:01:52 (0.7213 s / it) Averaged stats: lr: 0.006564 min_lr: 0.006564 loss: 3.5383 (3.3065) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8050 (0.8050) acc1: 84.3750 (84.3750) acc5: 96.8750 (96.8750) time: 2.0416 data: 1.7857 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0612 (0.9871) acc1: 79.6875 (77.7494) acc5: 95.8333 (94.8849) time: 0.5591 data: 0.3572 max mem: 57114 Test: Total time: 0:00:02 (0.5811 s / it) * Acc@1 76.794 Acc@5 93.494 loss 1.086 Accuracy of the model on the 50000 test images: 76.8% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 8.5834 (8.5834) acc1: 1.0417 (1.0417) acc5: 3.1250 (3.1250) time: 2.2967 data: 2.0533 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.5834 (8.4178) acc1: 1.0417 (1.0230) acc5: 2.0833 (2.0460) time: 0.6099 data: 0.4107 max mem: 57114 Test: Total time: 0:00:03 (0.6242 s / it) * Acc@1 0.398 Acc@5 1.399 loss 8.339 Accuracy of the model EMA on 50000 test images: 0.4% Max EMA accuracy: 0.40% Epoch: [98] [ 0/156] eta: 0:06:29 lr: 0.006564 min_lr: 0.006564 loss: 3.7017 (3.7017) weight_decay: 0.0500 (0.0500) time: 2.4999 data: 1.8409 max mem: 57114 Epoch: [98] [ 10/156] eta: 0:02:07 lr: 0.006561 min_lr: 0.006561 loss: 3.6032 (3.4748) weight_decay: 0.0500 (0.0500) time: 0.8744 data: 0.1677 max mem: 57114 Epoch: [98] [ 20/156] eta: 0:01:47 lr: 0.006559 min_lr: 0.006559 loss: 3.2883 (3.2473) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0005 max mem: 57114 Epoch: [98] [ 30/156] eta: 0:01:36 lr: 0.006557 min_lr: 0.006557 loss: 3.2408 (3.2341) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0006 max mem: 57114 Epoch: [98] [ 40/156] eta: 0:01:28 lr: 0.006555 min_lr: 0.006555 loss: 3.2687 (3.1793) weight_decay: 0.0500 (0.0500) time: 0.7266 data: 0.0004 max mem: 57114 Epoch: [98] [ 50/156] eta: 0:01:20 lr: 0.006553 min_lr: 0.006553 loss: 3.2826 (3.2081) weight_decay: 0.0500 (0.0500) time: 0.7361 data: 0.0004 max mem: 57114 Epoch: [98] [ 60/156] eta: 0:01:12 lr: 0.006550 min_lr: 0.006550 loss: 3.2826 (3.2035) weight_decay: 0.0500 (0.0500) time: 0.7372 data: 0.0004 max mem: 57114 Epoch: [98] [ 70/156] eta: 0:01:04 lr: 0.006548 min_lr: 0.006548 loss: 3.5201 (3.2644) weight_decay: 0.0500 (0.0500) time: 0.7299 data: 0.0004 max mem: 57114 Epoch: [98] [ 80/156] eta: 0:00:56 lr: 0.006546 min_lr: 0.006546 loss: 3.5303 (3.2760) weight_decay: 0.0500 (0.0500) time: 0.7099 data: 0.0004 max mem: 57114 Epoch: [98] [ 90/156] eta: 0:00:48 lr: 0.006544 min_lr: 0.006544 loss: 3.4466 (3.2857) weight_decay: 0.0500 (0.0500) time: 0.6935 data: 0.0004 max mem: 57114 Epoch: [98] [100/156] eta: 0:00:41 lr: 0.006542 min_lr: 0.006542 loss: 3.3549 (3.2850) weight_decay: 0.0500 (0.0500) time: 0.7075 data: 0.0004 max mem: 57114 Epoch: [98] [110/156] eta: 0:00:33 lr: 0.006539 min_lr: 0.006539 loss: 3.0947 (3.2570) weight_decay: 0.0500 (0.0500) time: 0.7161 data: 0.0004 max mem: 57114 Epoch: [98] [120/156] eta: 0:00:26 lr: 0.006537 min_lr: 0.006537 loss: 3.0947 (3.2571) weight_decay: 0.0500 (0.0500) time: 0.7046 data: 0.0004 max mem: 57114 Epoch: [98] [130/156] eta: 0:00:18 lr: 0.006535 min_lr: 0.006535 loss: 3.2492 (3.2652) weight_decay: 0.0500 (0.0500) time: 0.6988 data: 0.0008 max mem: 57114 Epoch: [98] [140/156] eta: 0:00:11 lr: 0.006533 min_lr: 0.006533 loss: 3.2492 (3.2483) weight_decay: 0.0500 (0.0500) time: 0.6951 data: 0.0007 max mem: 57114 Epoch: [98] [150/156] eta: 0:00:04 lr: 0.006530 min_lr: 0.006530 loss: 2.9832 (3.2497) weight_decay: 0.0500 (0.0500) time: 0.6908 data: 0.0001 max mem: 57114 Epoch: [98] [155/156] eta: 0:00:00 lr: 0.006529 min_lr: 0.006529 loss: 3.4519 (3.2584) weight_decay: 0.0500 (0.0500) time: 0.6838 data: 0.0001 max mem: 57114 Epoch: [98] Total time: 0:01:52 (0.7233 s / it) Averaged stats: lr: 0.006529 min_lr: 0.006529 loss: 3.4519 (3.3016) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9141 (0.9141) acc1: 80.7292 (80.7292) acc5: 96.8750 (96.8750) time: 2.1563 data: 1.9002 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0650 (0.9859) acc1: 73.4375 (75.5755) acc5: 95.3125 (93.7340) time: 0.5820 data: 0.3801 max mem: 57114 Test: Total time: 0:00:03 (0.6067 s / it) * Acc@1 76.171 Acc@5 93.608 loss 1.076 Accuracy of the model on the 50000 test images: 76.2% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 8.5396 (8.5396) acc1: 1.0417 (1.0417) acc5: 3.1250 (3.1250) time: 2.3735 data: 2.1301 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.5396 (8.3677) acc1: 1.0417 (1.1509) acc5: 3.1250 (2.3018) time: 0.6253 data: 0.4261 max mem: 57114 Test: Total time: 0:00:03 (0.6395 s / it) * Acc@1 0.450 Acc@5 1.481 loss 8.298 Accuracy of the model EMA on 50000 test images: 0.4% Max EMA accuracy: 0.45% Epoch: [99] [ 0/156] eta: 0:07:21 lr: 0.006529 min_lr: 0.006529 loss: 3.4267 (3.4267) weight_decay: 0.0500 (0.0500) time: 2.8310 data: 2.1777 max mem: 57114 Epoch: [99] [ 10/156] eta: 0:02:13 lr: 0.006527 min_lr: 0.006527 loss: 3.4267 (3.1913) weight_decay: 0.0500 (0.0500) time: 0.9162 data: 0.1982 max mem: 57114 Epoch: [99] [ 20/156] eta: 0:01:52 lr: 0.006525 min_lr: 0.006525 loss: 3.3909 (3.1820) weight_decay: 0.0500 (0.0500) time: 0.7270 data: 0.0003 max mem: 57114 Epoch: [99] [ 30/156] eta: 0:01:40 lr: 0.006522 min_lr: 0.006522 loss: 2.9604 (3.1351) weight_decay: 0.0500 (0.0500) time: 0.7329 data: 0.0004 max mem: 57114 Epoch: [99] [ 40/156] eta: 0:01:30 lr: 0.006520 min_lr: 0.006520 loss: 3.1409 (3.2122) weight_decay: 0.0500 (0.0500) time: 0.7265 data: 0.0003 max mem: 57114 Epoch: [99] [ 50/156] eta: 0:01:21 lr: 0.006518 min_lr: 0.006518 loss: 3.6889 (3.2846) weight_decay: 0.0500 (0.0500) time: 0.7248 data: 0.0004 max mem: 57114 Epoch: [99] [ 60/156] eta: 0:01:13 lr: 0.006516 min_lr: 0.006516 loss: 3.5372 (3.2992) weight_decay: 0.0500 (0.0500) time: 0.7339 data: 0.0004 max mem: 57114 Epoch: [99] [ 70/156] eta: 0:01:05 lr: 0.006513 min_lr: 0.006513 loss: 3.5074 (3.3068) weight_decay: 0.0500 (0.0500) time: 0.7229 data: 0.0004 max mem: 57114 Epoch: [99] [ 80/156] eta: 0:00:56 lr: 0.006511 min_lr: 0.006511 loss: 3.5880 (3.3252) weight_decay: 0.0500 (0.0500) time: 0.7079 data: 0.0004 max mem: 57114 Epoch: [99] [ 90/156] eta: 0:00:49 lr: 0.006509 min_lr: 0.006509 loss: 3.5476 (3.3210) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [99] [100/156] eta: 0:00:41 lr: 0.006507 min_lr: 0.006507 loss: 3.5476 (3.3215) weight_decay: 0.0500 (0.0500) time: 0.7172 data: 0.0003 max mem: 57114 Epoch: [99] [110/156] eta: 0:00:34 lr: 0.006504 min_lr: 0.006504 loss: 3.4372 (3.3271) weight_decay: 0.0500 (0.0500) time: 0.7121 data: 0.0003 max mem: 57114 Epoch: [99] [120/156] eta: 0:00:26 lr: 0.006502 min_lr: 0.006502 loss: 3.3509 (3.3229) weight_decay: 0.0500 (0.0500) time: 0.7045 data: 0.0004 max mem: 57114 Epoch: [99] [130/156] eta: 0:00:19 lr: 0.006500 min_lr: 0.006500 loss: 3.4321 (3.3319) weight_decay: 0.0500 (0.0500) time: 0.7055 data: 0.0009 max mem: 57114 Epoch: [99] [140/156] eta: 0:00:11 lr: 0.006498 min_lr: 0.006498 loss: 3.5198 (3.3225) weight_decay: 0.0500 (0.0500) time: 0.6930 data: 0.0007 max mem: 57114 Epoch: [99] [150/156] eta: 0:00:04 lr: 0.006495 min_lr: 0.006495 loss: 3.1098 (3.3101) weight_decay: 0.0500 (0.0500) time: 0.6815 data: 0.0001 max mem: 57114 Epoch: [99] [155/156] eta: 0:00:00 lr: 0.006494 min_lr: 0.006494 loss: 3.3487 (3.3161) weight_decay: 0.0500 (0.0500) time: 0.6895 data: 0.0001 max mem: 57114 Epoch: [99] Total time: 0:01:53 (0.7289 s / it) Averaged stats: lr: 0.006494 min_lr: 0.006494 loss: 3.3487 (3.2955) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9745 (0.9745) acc1: 80.2083 (80.2083) acc5: 96.3542 (96.3542) time: 2.0671 data: 1.8111 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1142 (1.0643) acc1: 77.6042 (75.7033) acc5: 93.2292 (92.0716) time: 0.5641 data: 0.3623 max mem: 57114 Test: Total time: 0:00:02 (0.5863 s / it) * Acc@1 75.422 Acc@5 92.829 loss 1.201 Accuracy of the model on the 50000 test images: 75.4% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 8.4626 (8.4626) acc1: 1.0417 (1.0417) acc5: 3.1250 (3.1250) time: 2.2750 data: 2.0316 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.4626 (8.2906) acc1: 1.0417 (1.1509) acc5: 3.1250 (2.3018) time: 0.6056 data: 0.4064 max mem: 57114 Test: Total time: 0:00:03 (0.6184 s / it) * Acc@1 0.523 Acc@5 1.582 loss 8.234 Accuracy of the model EMA on 50000 test images: 0.5% Max EMA accuracy: 0.52% Epoch: [100] [ 0/156] eta: 0:06:02 lr: 0.006494 min_lr: 0.006494 loss: 3.0842 (3.0842) weight_decay: 0.0500 (0.0500) time: 2.3250 data: 1.6733 max mem: 57114 Epoch: [100] [ 10/156] eta: 0:02:05 lr: 0.006492 min_lr: 0.006492 loss: 3.4575 (3.2703) weight_decay: 0.0500 (0.0500) time: 0.8585 data: 0.1523 max mem: 57114 Epoch: [100] [ 20/156] eta: 0:01:47 lr: 0.006490 min_lr: 0.006490 loss: 3.4746 (3.3803) weight_decay: 0.0500 (0.0500) time: 0.7140 data: 0.0003 max mem: 57114 Epoch: [100] [ 30/156] eta: 0:01:36 lr: 0.006487 min_lr: 0.006487 loss: 3.5336 (3.4268) weight_decay: 0.0500 (0.0500) time: 0.7132 data: 0.0003 max mem: 57114 Epoch: [100] [ 40/156] eta: 0:01:27 lr: 0.006485 min_lr: 0.006485 loss: 3.3295 (3.3794) weight_decay: 0.0500 (0.0500) time: 0.7235 data: 0.0004 max mem: 57114 Epoch: [100] [ 50/156] eta: 0:01:19 lr: 0.006483 min_lr: 0.006483 loss: 3.0811 (3.2969) weight_decay: 0.0500 (0.0500) time: 0.7284 data: 0.0004 max mem: 57114 Epoch: [100] [ 60/156] eta: 0:01:11 lr: 0.006481 min_lr: 0.006481 loss: 3.1776 (3.3177) weight_decay: 0.0500 (0.0500) time: 0.7223 data: 0.0004 max mem: 57114 Epoch: [100] [ 70/156] eta: 0:01:03 lr: 0.006478 min_lr: 0.006478 loss: 3.4032 (3.3127) weight_decay: 0.0500 (0.0500) time: 0.7178 data: 0.0004 max mem: 57114 Epoch: [100] [ 80/156] eta: 0:00:56 lr: 0.006476 min_lr: 0.006476 loss: 3.4032 (3.3180) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [100] [ 90/156] eta: 0:00:48 lr: 0.006474 min_lr: 0.006474 loss: 3.1744 (3.2891) weight_decay: 0.0500 (0.0500) time: 0.7085 data: 0.0004 max mem: 57114 Epoch: [100] [100/156] eta: 0:00:40 lr: 0.006472 min_lr: 0.006472 loss: 3.3496 (3.2867) weight_decay: 0.0500 (0.0500) time: 0.7059 data: 0.0004 max mem: 57114 Epoch: [100] [110/156] eta: 0:00:33 lr: 0.006469 min_lr: 0.006469 loss: 3.4193 (3.2941) weight_decay: 0.0500 (0.0500) time: 0.7074 data: 0.0004 max mem: 57114 Epoch: [100] [120/156] eta: 0:00:26 lr: 0.006467 min_lr: 0.006467 loss: 3.4251 (3.3068) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [100] [130/156] eta: 0:00:18 lr: 0.006465 min_lr: 0.006465 loss: 3.4193 (3.2902) weight_decay: 0.0500 (0.0500) time: 0.7056 data: 0.0009 max mem: 57114 Epoch: [100] [140/156] eta: 0:00:11 lr: 0.006463 min_lr: 0.006463 loss: 3.4428 (3.2968) weight_decay: 0.0500 (0.0500) time: 0.6939 data: 0.0007 max mem: 57114 Epoch: [100] [150/156] eta: 0:00:04 lr: 0.006460 min_lr: 0.006460 loss: 3.5182 (3.3049) weight_decay: 0.0500 (0.0500) time: 0.6869 data: 0.0001 max mem: 57114 Epoch: [100] [155/156] eta: 0:00:00 lr: 0.006459 min_lr: 0.006459 loss: 3.3813 (3.3024) weight_decay: 0.0500 (0.0500) time: 0.6844 data: 0.0001 max mem: 57114 Epoch: [100] Total time: 0:01:52 (0.7213 s / it) Averaged stats: lr: 0.006459 min_lr: 0.006459 loss: 3.3813 (3.2927) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8786 (0.8786) acc1: 83.8542 (83.8542) acc5: 96.3542 (96.3542) time: 2.1034 data: 1.8468 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9532 (0.9645) acc1: 78.6458 (76.4706) acc5: 96.3542 (93.6061) time: 0.5715 data: 0.3694 max mem: 57114 Test: Total time: 0:00:02 (0.5923 s / it) * Acc@1 76.592 Acc@5 93.552 loss 1.075 Accuracy of the model on the 50000 test images: 76.6% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 8.3737 (8.3737) acc1: 1.0417 (1.0417) acc5: 4.1667 (4.1667) time: 2.3032 data: 2.0599 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.3737 (8.2013) acc1: 1.0417 (1.1509) acc5: 3.1250 (2.5575) time: 0.6113 data: 0.4121 max mem: 57114 Test: Total time: 0:00:03 (0.6227 s / it) * Acc@1 0.583 Acc@5 1.702 loss 8.159 Accuracy of the model EMA on 50000 test images: 0.6% Max EMA accuracy: 0.58% Epoch: [101] [ 0/156] eta: 0:07:07 lr: 0.006459 min_lr: 0.006459 loss: 3.1896 (3.1896) weight_decay: 0.0500 (0.0500) time: 2.7382 data: 2.0843 max mem: 57114 Epoch: [101] [ 10/156] eta: 0:02:10 lr: 0.006457 min_lr: 0.006457 loss: 3.2544 (3.0789) weight_decay: 0.0500 (0.0500) time: 0.8915 data: 0.1898 max mem: 57114 Epoch: [101] [ 20/156] eta: 0:01:50 lr: 0.006454 min_lr: 0.006454 loss: 3.4935 (3.2709) weight_decay: 0.0500 (0.0500) time: 0.7195 data: 0.0004 max mem: 57114 Epoch: [101] [ 30/156] eta: 0:01:39 lr: 0.006452 min_lr: 0.006452 loss: 3.4935 (3.2541) weight_decay: 0.0500 (0.0500) time: 0.7354 data: 0.0004 max mem: 57114 Epoch: [101] [ 40/156] eta: 0:01:29 lr: 0.006450 min_lr: 0.006450 loss: 3.5026 (3.2692) weight_decay: 0.0500 (0.0500) time: 0.7278 data: 0.0004 max mem: 57114 Epoch: [101] [ 50/156] eta: 0:01:20 lr: 0.006448 min_lr: 0.006448 loss: 3.4286 (3.2625) weight_decay: 0.0500 (0.0500) time: 0.7158 data: 0.0004 max mem: 57114 Epoch: [101] [ 60/156] eta: 0:01:12 lr: 0.006445 min_lr: 0.006445 loss: 3.3349 (3.2848) weight_decay: 0.0500 (0.0500) time: 0.7256 data: 0.0004 max mem: 57114 Epoch: [101] [ 70/156] eta: 0:01:04 lr: 0.006443 min_lr: 0.006443 loss: 3.3077 (3.2906) weight_decay: 0.0500 (0.0500) time: 0.7187 data: 0.0005 max mem: 57114 Epoch: [101] [ 80/156] eta: 0:00:56 lr: 0.006441 min_lr: 0.006441 loss: 3.3148 (3.3066) weight_decay: 0.0500 (0.0500) time: 0.6995 data: 0.0005 max mem: 57114 Epoch: [101] [ 90/156] eta: 0:00:48 lr: 0.006438 min_lr: 0.006438 loss: 3.4606 (3.3182) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0004 max mem: 57114 Epoch: [101] [100/156] eta: 0:00:41 lr: 0.006436 min_lr: 0.006436 loss: 3.4117 (3.3105) weight_decay: 0.0500 (0.0500) time: 0.7098 data: 0.0004 max mem: 57114 Epoch: [101] [110/156] eta: 0:00:33 lr: 0.006434 min_lr: 0.006434 loss: 3.4117 (3.3179) weight_decay: 0.0500 (0.0500) time: 0.7067 data: 0.0004 max mem: 57114 Epoch: [101] [120/156] eta: 0:00:26 lr: 0.006432 min_lr: 0.006432 loss: 3.5245 (3.3338) weight_decay: 0.0500 (0.0500) time: 0.7060 data: 0.0004 max mem: 57114 Epoch: [101] [130/156] eta: 0:00:18 lr: 0.006429 min_lr: 0.006429 loss: 3.5563 (3.3378) weight_decay: 0.0500 (0.0500) time: 0.6968 data: 0.0008 max mem: 57114 Epoch: [101] [140/156] eta: 0:00:11 lr: 0.006427 min_lr: 0.006427 loss: 3.4777 (3.3411) weight_decay: 0.0500 (0.0500) time: 0.6873 data: 0.0007 max mem: 57114 Epoch: [101] [150/156] eta: 0:00:04 lr: 0.006425 min_lr: 0.006425 loss: 3.4777 (3.3520) weight_decay: 0.0500 (0.0500) time: 0.6858 data: 0.0001 max mem: 57114 Epoch: [101] [155/156] eta: 0:00:00 lr: 0.006424 min_lr: 0.006424 loss: 3.3110 (3.3334) weight_decay: 0.0500 (0.0500) time: 0.6856 data: 0.0001 max mem: 57114 Epoch: [101] Total time: 0:01:52 (0.7241 s / it) Averaged stats: lr: 0.006424 min_lr: 0.006424 loss: 3.3110 (3.2894) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8585 (0.8585) acc1: 83.3333 (83.3333) acc5: 95.8333 (95.8333) time: 2.0001 data: 1.7443 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0666 (1.0375) acc1: 77.0833 (75.9591) acc5: 94.7917 (92.8389) time: 0.5507 data: 0.3489 max mem: 57114 Test: Total time: 0:00:02 (0.5721 s / it) * Acc@1 74.946 Acc@5 92.821 loss 1.124 Accuracy of the model on the 50000 test images: 74.9% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 8.2829 (8.2829) acc1: 1.0417 (1.0417) acc5: 4.1667 (4.1667) time: 2.2745 data: 2.0309 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.2829 (8.1102) acc1: 1.0417 (1.1509) acc5: 3.1250 (2.5575) time: 0.6057 data: 0.4063 max mem: 57114 Test: Total time: 0:00:03 (0.6186 s / it) * Acc@1 0.661 Acc@5 1.868 loss 8.085 Accuracy of the model EMA on 50000 test images: 0.7% Max EMA accuracy: 0.66% Epoch: [102] [ 0/156] eta: 0:07:37 lr: 0.006423 min_lr: 0.006423 loss: 3.6148 (3.6148) weight_decay: 0.0500 (0.0500) time: 2.9307 data: 2.2816 max mem: 57114 Epoch: [102] [ 10/156] eta: 0:02:16 lr: 0.006421 min_lr: 0.006421 loss: 3.2027 (3.1697) weight_decay: 0.0500 (0.0500) time: 0.9316 data: 0.2077 max mem: 57114 Epoch: [102] [ 20/156] eta: 0:01:53 lr: 0.006419 min_lr: 0.006419 loss: 2.9268 (2.9921) weight_decay: 0.0500 (0.0500) time: 0.7302 data: 0.0003 max mem: 57114 Epoch: [102] [ 30/156] eta: 0:01:39 lr: 0.006416 min_lr: 0.006416 loss: 3.1548 (3.0585) weight_decay: 0.0500 (0.0500) time: 0.7150 data: 0.0003 max mem: 57114 Epoch: [102] [ 40/156] eta: 0:01:29 lr: 0.006414 min_lr: 0.006414 loss: 3.1897 (3.0785) weight_decay: 0.0500 (0.0500) time: 0.7079 data: 0.0003 max mem: 57114 Epoch: [102] [ 50/156] eta: 0:01:20 lr: 0.006412 min_lr: 0.006412 loss: 3.1848 (3.1309) weight_decay: 0.0500 (0.0500) time: 0.7179 data: 0.0004 max mem: 57114 Epoch: [102] [ 60/156] eta: 0:01:12 lr: 0.006410 min_lr: 0.006410 loss: 3.4762 (3.1363) weight_decay: 0.0500 (0.0500) time: 0.7236 data: 0.0004 max mem: 57114 Epoch: [102] [ 70/156] eta: 0:01:04 lr: 0.006407 min_lr: 0.006407 loss: 3.3943 (3.1414) weight_decay: 0.0500 (0.0500) time: 0.7230 data: 0.0004 max mem: 57114 Epoch: [102] [ 80/156] eta: 0:00:56 lr: 0.006405 min_lr: 0.006405 loss: 3.0738 (3.1338) weight_decay: 0.0500 (0.0500) time: 0.7209 data: 0.0004 max mem: 57114 Epoch: [102] [ 90/156] eta: 0:00:49 lr: 0.006403 min_lr: 0.006403 loss: 3.2409 (3.1727) weight_decay: 0.0500 (0.0500) time: 0.7150 data: 0.0004 max mem: 57114 Epoch: [102] [100/156] eta: 0:00:41 lr: 0.006400 min_lr: 0.006400 loss: 3.3236 (3.1641) weight_decay: 0.0500 (0.0500) time: 0.7026 data: 0.0004 max mem: 57114 Epoch: [102] [110/156] eta: 0:00:33 lr: 0.006398 min_lr: 0.006398 loss: 3.2706 (3.1727) weight_decay: 0.0500 (0.0500) time: 0.7014 data: 0.0004 max mem: 57114 Epoch: [102] [120/156] eta: 0:00:26 lr: 0.006396 min_lr: 0.006396 loss: 3.2706 (3.1799) weight_decay: 0.0500 (0.0500) time: 0.7024 data: 0.0004 max mem: 57114 Epoch: [102] [130/156] eta: 0:00:18 lr: 0.006393 min_lr: 0.006393 loss: 3.3320 (3.1912) weight_decay: 0.0500 (0.0500) time: 0.6991 data: 0.0006 max mem: 57114 Epoch: [102] [140/156] eta: 0:00:11 lr: 0.006391 min_lr: 0.006391 loss: 3.4235 (3.1933) weight_decay: 0.0500 (0.0500) time: 0.6902 data: 0.0005 max mem: 57114 Epoch: [102] [150/156] eta: 0:00:04 lr: 0.006389 min_lr: 0.006389 loss: 3.1836 (3.1909) weight_decay: 0.0500 (0.0500) time: 0.6809 data: 0.0001 max mem: 57114 Epoch: [102] [155/156] eta: 0:00:00 lr: 0.006388 min_lr: 0.006388 loss: 3.4235 (3.1977) weight_decay: 0.0500 (0.0500) time: 0.6830 data: 0.0001 max mem: 57114 Epoch: [102] Total time: 0:01:53 (0.7245 s / it) Averaged stats: lr: 0.006388 min_lr: 0.006388 loss: 3.4235 (3.2955) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9536 (0.9536) acc1: 82.2917 (82.2917) acc5: 97.3958 (97.3958) time: 2.0626 data: 1.8072 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1188 (1.0703) acc1: 77.6042 (76.3427) acc5: 95.8333 (94.3734) time: 0.5633 data: 0.3615 max mem: 57114 Test: Total time: 0:00:02 (0.5850 s / it) * Acc@1 76.363 Acc@5 93.446 loss 1.151 Accuracy of the model on the 50000 test images: 76.4% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 8.1945 (8.1945) acc1: 1.5625 (1.5625) acc5: 4.1667 (4.1667) time: 2.3866 data: 2.1431 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.1945 (8.0177) acc1: 1.5625 (1.2788) acc5: 3.1250 (2.8133) time: 0.6280 data: 0.4287 max mem: 57114 Test: Total time: 0:00:03 (0.6406 s / it) * Acc@1 0.733 Acc@5 2.032 loss 8.014 Accuracy of the model EMA on 50000 test images: 0.7% Max EMA accuracy: 0.73% Epoch: [103] [ 0/156] eta: 0:06:22 lr: 0.006388 min_lr: 0.006388 loss: 2.3485 (2.3485) weight_decay: 0.0500 (0.0500) time: 2.4530 data: 1.8027 max mem: 57114 Epoch: [103] [ 10/156] eta: 0:02:11 lr: 0.006385 min_lr: 0.006385 loss: 3.4534 (3.3692) weight_decay: 0.0500 (0.0500) time: 0.9002 data: 0.1642 max mem: 57114 Epoch: [103] [ 20/156] eta: 0:01:49 lr: 0.006383 min_lr: 0.006383 loss: 3.4710 (3.3424) weight_decay: 0.0500 (0.0500) time: 0.7226 data: 0.0003 max mem: 57114 Epoch: [103] [ 30/156] eta: 0:01:38 lr: 0.006381 min_lr: 0.006381 loss: 3.3585 (3.2857) weight_decay: 0.0500 (0.0500) time: 0.7148 data: 0.0003 max mem: 57114 Epoch: [103] [ 40/156] eta: 0:01:28 lr: 0.006378 min_lr: 0.006378 loss: 3.2629 (3.2705) weight_decay: 0.0500 (0.0500) time: 0.7179 data: 0.0004 max mem: 57114 Epoch: [103] [ 50/156] eta: 0:01:20 lr: 0.006376 min_lr: 0.006376 loss: 3.1898 (3.2800) weight_decay: 0.0500 (0.0500) time: 0.7221 data: 0.0004 max mem: 57114 Epoch: [103] [ 60/156] eta: 0:01:12 lr: 0.006374 min_lr: 0.006374 loss: 3.3343 (3.2796) weight_decay: 0.0500 (0.0500) time: 0.7320 data: 0.0004 max mem: 57114 Epoch: [103] [ 70/156] eta: 0:01:04 lr: 0.006371 min_lr: 0.006371 loss: 3.5966 (3.3192) weight_decay: 0.0500 (0.0500) time: 0.7193 data: 0.0004 max mem: 57114 Epoch: [103] [ 80/156] eta: 0:00:56 lr: 0.006369 min_lr: 0.006369 loss: 3.6276 (3.3393) weight_decay: 0.0500 (0.0500) time: 0.7123 data: 0.0004 max mem: 57114 Epoch: [103] [ 90/156] eta: 0:00:48 lr: 0.006367 min_lr: 0.006367 loss: 3.4978 (3.3449) weight_decay: 0.0500 (0.0500) time: 0.7105 data: 0.0004 max mem: 57114 Epoch: [103] [100/156] eta: 0:00:41 lr: 0.006364 min_lr: 0.006364 loss: 3.3906 (3.3331) weight_decay: 0.0500 (0.0500) time: 0.7007 data: 0.0003 max mem: 57114 Epoch: [103] [110/156] eta: 0:00:33 lr: 0.006362 min_lr: 0.006362 loss: 3.2835 (3.3018) weight_decay: 0.0500 (0.0500) time: 0.6918 data: 0.0004 max mem: 57114 Epoch: [103] [120/156] eta: 0:00:26 lr: 0.006360 min_lr: 0.006360 loss: 3.0972 (3.2892) weight_decay: 0.0500 (0.0500) time: 0.7055 data: 0.0004 max mem: 57114 Epoch: [103] [130/156] eta: 0:00:18 lr: 0.006357 min_lr: 0.006357 loss: 3.2277 (3.2979) weight_decay: 0.0500 (0.0500) time: 0.7056 data: 0.0009 max mem: 57114 Epoch: [103] [140/156] eta: 0:00:11 lr: 0.006355 min_lr: 0.006355 loss: 3.6596 (3.3239) weight_decay: 0.0500 (0.0500) time: 0.6885 data: 0.0007 max mem: 57114 Epoch: [103] [150/156] eta: 0:00:04 lr: 0.006353 min_lr: 0.006353 loss: 3.5220 (3.3053) weight_decay: 0.0500 (0.0500) time: 0.6840 data: 0.0001 max mem: 57114 Epoch: [103] [155/156] eta: 0:00:00 lr: 0.006352 min_lr: 0.006352 loss: 3.3954 (3.3100) weight_decay: 0.0500 (0.0500) time: 0.6831 data: 0.0001 max mem: 57114 Epoch: [103] Total time: 0:01:52 (0.7221 s / it) Averaged stats: lr: 0.006352 min_lr: 0.006352 loss: 3.3954 (3.2883) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8746 (0.8746) acc1: 83.3333 (83.3333) acc5: 96.3542 (96.3542) time: 2.0476 data: 1.7914 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0551 (1.0379) acc1: 75.5208 (75.9591) acc5: 95.8333 (93.4783) time: 0.5603 data: 0.3584 max mem: 57114 Test: Total time: 0:00:02 (0.5786 s / it) * Acc@1 76.457 Acc@5 93.632 loss 1.109 Accuracy of the model on the 50000 test images: 76.5% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 8.0971 (8.0971) acc1: 1.5625 (1.5625) acc5: 4.6875 (4.6875) time: 2.3024 data: 2.0588 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 8.0971 (7.9169) acc1: 1.5625 (1.4066) acc5: 3.1250 (3.4527) time: 0.6111 data: 0.4118 max mem: 57114 Test: Total time: 0:00:03 (0.6206 s / it) * Acc@1 0.825 Acc@5 2.322 loss 7.939 Accuracy of the model EMA on 50000 test images: 0.8% Max EMA accuracy: 0.83% Epoch: [104] [ 0/156] eta: 0:06:58 lr: 0.006351 min_lr: 0.006351 loss: 3.4217 (3.4217) weight_decay: 0.0500 (0.0500) time: 2.6807 data: 2.0314 max mem: 57114 Epoch: [104] [ 10/156] eta: 0:02:07 lr: 0.006349 min_lr: 0.006349 loss: 3.3741 (3.2625) weight_decay: 0.0500 (0.0500) time: 0.8755 data: 0.1850 max mem: 57114 Epoch: [104] [ 20/156] eta: 0:01:49 lr: 0.006347 min_lr: 0.006347 loss: 3.2843 (3.2167) weight_decay: 0.0500 (0.0500) time: 0.7119 data: 0.0004 max mem: 57114 Epoch: [104] [ 30/156] eta: 0:01:38 lr: 0.006344 min_lr: 0.006344 loss: 2.9149 (3.1418) weight_decay: 0.0500 (0.0500) time: 0.7261 data: 0.0003 max mem: 57114 Epoch: [104] [ 40/156] eta: 0:01:29 lr: 0.006342 min_lr: 0.006342 loss: 3.0900 (3.1957) weight_decay: 0.0500 (0.0500) time: 0.7291 data: 0.0004 max mem: 57114 Epoch: [104] [ 50/156] eta: 0:01:20 lr: 0.006340 min_lr: 0.006340 loss: 3.3372 (3.1644) weight_decay: 0.0500 (0.0500) time: 0.7282 data: 0.0004 max mem: 57114 Epoch: [104] [ 60/156] eta: 0:01:12 lr: 0.006337 min_lr: 0.006337 loss: 3.3372 (3.2028) weight_decay: 0.0500 (0.0500) time: 0.7181 data: 0.0004 max mem: 57114 Epoch: [104] [ 70/156] eta: 0:01:04 lr: 0.006335 min_lr: 0.006335 loss: 3.2609 (3.1947) weight_decay: 0.0500 (0.0500) time: 0.7191 data: 0.0004 max mem: 57114 Epoch: [104] [ 80/156] eta: 0:00:56 lr: 0.006333 min_lr: 0.006333 loss: 3.0610 (3.1897) weight_decay: 0.0500 (0.0500) time: 0.7124 data: 0.0004 max mem: 57114 Epoch: [104] [ 90/156] eta: 0:00:48 lr: 0.006330 min_lr: 0.006330 loss: 3.3641 (3.2058) weight_decay: 0.0500 (0.0500) time: 0.7167 data: 0.0004 max mem: 57114 Epoch: [104] [100/156] eta: 0:00:41 lr: 0.006328 min_lr: 0.006328 loss: 3.3474 (3.2130) weight_decay: 0.0500 (0.0500) time: 0.7201 data: 0.0004 max mem: 57114 Epoch: [104] [110/156] eta: 0:00:33 lr: 0.006326 min_lr: 0.006326 loss: 3.3874 (3.2205) weight_decay: 0.0500 (0.0500) time: 0.7081 data: 0.0004 max mem: 57114 Epoch: [104] [120/156] eta: 0:00:26 lr: 0.006323 min_lr: 0.006323 loss: 3.4106 (3.2153) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0003 max mem: 57114 Epoch: [104] [130/156] eta: 0:00:18 lr: 0.006321 min_lr: 0.006321 loss: 3.1572 (3.2054) weight_decay: 0.0500 (0.0500) time: 0.7014 data: 0.0008 max mem: 57114 Epoch: [104] [140/156] eta: 0:00:11 lr: 0.006319 min_lr: 0.006319 loss: 3.1905 (3.2135) weight_decay: 0.0500 (0.0500) time: 0.6877 data: 0.0007 max mem: 57114 Epoch: [104] [150/156] eta: 0:00:04 lr: 0.006316 min_lr: 0.006316 loss: 3.4174 (3.2195) weight_decay: 0.0500 (0.0500) time: 0.6837 data: 0.0001 max mem: 57114 Epoch: [104] [155/156] eta: 0:00:00 lr: 0.006315 min_lr: 0.006315 loss: 3.4560 (3.2249) weight_decay: 0.0500 (0.0500) time: 0.6897 data: 0.0001 max mem: 57114 Epoch: [104] Total time: 0:01:53 (0.7256 s / it) Averaged stats: lr: 0.006315 min_lr: 0.006315 loss: 3.4560 (3.2794) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9425 (0.9425) acc1: 82.2917 (82.2917) acc5: 96.3542 (96.3542) time: 2.0760 data: 1.8205 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0973 (1.0892) acc1: 80.2083 (76.8542) acc5: 96.3542 (93.2225) time: 0.5658 data: 0.3642 max mem: 57114 Test: Total time: 0:00:02 (0.5897 s / it) * Acc@1 76.037 Acc@5 93.203 loss 1.152 Accuracy of the model on the 50000 test images: 76.0% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 7.9901 (7.9901) acc1: 3.1250 (3.1250) acc5: 4.6875 (4.6875) time: 2.3381 data: 2.0947 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.9901 (7.8051) acc1: 2.0833 (1.9182) acc5: 3.6458 (3.8363) time: 0.6182 data: 0.4190 max mem: 57114 Test: Total time: 0:00:03 (0.6353 s / it) * Acc@1 0.957 Acc@5 2.619 loss 7.861 Accuracy of the model EMA on 50000 test images: 1.0% Max EMA accuracy: 0.96% Epoch: [105] [ 0/156] eta: 0:07:20 lr: 0.006315 min_lr: 0.006315 loss: 2.1503 (2.1503) weight_decay: 0.0500 (0.0500) time: 2.8211 data: 2.1625 max mem: 57114 Epoch: [105] [ 10/156] eta: 0:02:13 lr: 0.006313 min_lr: 0.006313 loss: 3.2903 (3.1135) weight_decay: 0.0500 (0.0500) time: 0.9125 data: 0.1968 max mem: 57114 Epoch: [105] [ 20/156] eta: 0:01:51 lr: 0.006310 min_lr: 0.006310 loss: 3.2834 (3.0990) weight_decay: 0.0500 (0.0500) time: 0.7169 data: 0.0003 max mem: 57114 Epoch: [105] [ 30/156] eta: 0:01:39 lr: 0.006308 min_lr: 0.006308 loss: 3.2782 (3.1636) weight_decay: 0.0500 (0.0500) time: 0.7203 data: 0.0003 max mem: 57114 Epoch: [105] [ 40/156] eta: 0:01:30 lr: 0.006306 min_lr: 0.006306 loss: 3.1726 (3.1594) weight_decay: 0.0500 (0.0500) time: 0.7339 data: 0.0003 max mem: 57114 Epoch: [105] [ 50/156] eta: 0:01:20 lr: 0.006303 min_lr: 0.006303 loss: 3.1829 (3.1954) weight_decay: 0.0500 (0.0500) time: 0.7163 data: 0.0004 max mem: 57114 Epoch: [105] [ 60/156] eta: 0:01:12 lr: 0.006301 min_lr: 0.006301 loss: 3.4861 (3.2450) weight_decay: 0.0500 (0.0500) time: 0.7039 data: 0.0004 max mem: 57114 Epoch: [105] [ 70/156] eta: 0:01:04 lr: 0.006298 min_lr: 0.006298 loss: 3.4443 (3.2456) weight_decay: 0.0500 (0.0500) time: 0.7261 data: 0.0003 max mem: 57114 Epoch: [105] [ 80/156] eta: 0:00:56 lr: 0.006296 min_lr: 0.006296 loss: 3.2428 (3.2367) weight_decay: 0.0500 (0.0500) time: 0.7212 data: 0.0004 max mem: 57114 Epoch: [105] [ 90/156] eta: 0:00:48 lr: 0.006294 min_lr: 0.006294 loss: 3.2428 (3.2345) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0004 max mem: 57114 Epoch: [105] [100/156] eta: 0:00:41 lr: 0.006291 min_lr: 0.006291 loss: 3.2994 (3.2444) weight_decay: 0.0500 (0.0500) time: 0.7099 data: 0.0004 max mem: 57114 Epoch: [105] [110/156] eta: 0:00:33 lr: 0.006289 min_lr: 0.006289 loss: 3.3244 (3.2421) weight_decay: 0.0500 (0.0500) time: 0.7014 data: 0.0004 max mem: 57114 Epoch: [105] [120/156] eta: 0:00:26 lr: 0.006287 min_lr: 0.006287 loss: 3.4886 (3.2680) weight_decay: 0.0500 (0.0500) time: 0.7003 data: 0.0004 max mem: 57114 Epoch: [105] [130/156] eta: 0:00:18 lr: 0.006284 min_lr: 0.006284 loss: 3.5186 (3.2793) weight_decay: 0.0500 (0.0500) time: 0.7036 data: 0.0009 max mem: 57114 Epoch: [105] [140/156] eta: 0:00:11 lr: 0.006282 min_lr: 0.006282 loss: 3.4790 (3.2816) weight_decay: 0.0500 (0.0500) time: 0.6957 data: 0.0007 max mem: 57114 Epoch: [105] [150/156] eta: 0:00:04 lr: 0.006280 min_lr: 0.006280 loss: 3.2522 (3.2720) weight_decay: 0.0500 (0.0500) time: 0.6805 data: 0.0001 max mem: 57114 Epoch: [105] [155/156] eta: 0:00:00 lr: 0.006278 min_lr: 0.006278 loss: 3.0658 (3.2625) weight_decay: 0.0500 (0.0500) time: 0.6841 data: 0.0001 max mem: 57114 Epoch: [105] Total time: 0:01:53 (0.7248 s / it) Averaged stats: lr: 0.006278 min_lr: 0.006278 loss: 3.0658 (3.2804) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8134 (0.8134) acc1: 85.9375 (85.9375) acc5: 96.8750 (96.8750) time: 2.0082 data: 1.7524 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9374 (0.8985) acc1: 79.6875 (78.7724) acc5: 95.8333 (93.8619) time: 0.5524 data: 0.3506 max mem: 57114 Test: Total time: 0:00:02 (0.5727 s / it) * Acc@1 76.590 Acc@5 93.700 loss 1.032 Accuracy of the model on the 50000 test images: 76.6% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 7.8365 (7.8365) acc1: 3.6458 (3.6458) acc5: 5.2083 (5.2083) time: 2.2916 data: 2.0481 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.8365 (7.6530) acc1: 2.0833 (2.0460) acc5: 4.1667 (4.7315) time: 0.6090 data: 0.4097 max mem: 57114 Test: Total time: 0:00:03 (0.6186 s / it) * Acc@1 1.109 Acc@5 3.033 loss 7.752 Accuracy of the model EMA on 50000 test images: 1.1% Max EMA accuracy: 1.11% Epoch: [106] [ 0/156] eta: 0:06:35 lr: 0.006278 min_lr: 0.006278 loss: 3.4170 (3.4170) weight_decay: 0.0500 (0.0500) time: 2.5372 data: 1.8812 max mem: 57114 Epoch: [106] [ 10/156] eta: 0:02:09 lr: 0.006276 min_lr: 0.006276 loss: 3.3510 (3.1935) weight_decay: 0.0500 (0.0500) time: 0.8878 data: 0.1713 max mem: 57114 Epoch: [106] [ 20/156] eta: 0:01:48 lr: 0.006273 min_lr: 0.006273 loss: 3.3145 (3.1094) weight_decay: 0.0500 (0.0500) time: 0.7123 data: 0.0003 max mem: 57114 Epoch: [106] [ 30/156] eta: 0:01:37 lr: 0.006271 min_lr: 0.006271 loss: 3.3145 (3.1766) weight_decay: 0.0500 (0.0500) time: 0.7162 data: 0.0004 max mem: 57114 Epoch: [106] [ 40/156] eta: 0:01:28 lr: 0.006269 min_lr: 0.006269 loss: 3.3736 (3.2061) weight_decay: 0.0500 (0.0500) time: 0.7303 data: 0.0004 max mem: 57114 Epoch: [106] [ 50/156] eta: 0:01:20 lr: 0.006266 min_lr: 0.006266 loss: 3.4092 (3.2741) weight_decay: 0.0500 (0.0500) time: 0.7209 data: 0.0004 max mem: 57114 Epoch: [106] [ 60/156] eta: 0:01:12 lr: 0.006264 min_lr: 0.006264 loss: 3.4455 (3.2418) weight_decay: 0.0500 (0.0500) time: 0.7240 data: 0.0004 max mem: 57114 Epoch: [106] [ 70/156] eta: 0:01:04 lr: 0.006262 min_lr: 0.006262 loss: 3.2877 (3.2467) weight_decay: 0.0500 (0.0500) time: 0.7212 data: 0.0004 max mem: 57114 Epoch: [106] [ 80/156] eta: 0:00:56 lr: 0.006259 min_lr: 0.006259 loss: 3.3360 (3.2462) weight_decay: 0.0500 (0.0500) time: 0.7105 data: 0.0004 max mem: 57114 Epoch: [106] [ 90/156] eta: 0:00:48 lr: 0.006257 min_lr: 0.006257 loss: 3.3562 (3.2428) weight_decay: 0.0500 (0.0500) time: 0.7068 data: 0.0004 max mem: 57114 Epoch: [106] [100/156] eta: 0:00:41 lr: 0.006254 min_lr: 0.006254 loss: 3.2761 (3.2380) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0004 max mem: 57114 Epoch: [106] [110/156] eta: 0:00:33 lr: 0.006252 min_lr: 0.006252 loss: 3.4701 (3.2531) weight_decay: 0.0500 (0.0500) time: 0.7029 data: 0.0004 max mem: 57114 Epoch: [106] [120/156] eta: 0:00:26 lr: 0.006250 min_lr: 0.006250 loss: 3.4539 (3.2462) weight_decay: 0.0500 (0.0500) time: 0.7008 data: 0.0004 max mem: 57114 Epoch: [106] [130/156] eta: 0:00:18 lr: 0.006247 min_lr: 0.006247 loss: 3.4307 (3.2435) weight_decay: 0.0500 (0.0500) time: 0.7028 data: 0.0008 max mem: 57114 Epoch: [106] [140/156] eta: 0:00:11 lr: 0.006245 min_lr: 0.006245 loss: 3.2321 (3.2412) weight_decay: 0.0500 (0.0500) time: 0.6912 data: 0.0007 max mem: 57114 Epoch: [106] [150/156] eta: 0:00:04 lr: 0.006243 min_lr: 0.006243 loss: 3.2321 (3.2458) weight_decay: 0.0500 (0.0500) time: 0.6826 data: 0.0001 max mem: 57114 Epoch: [106] [155/156] eta: 0:00:00 lr: 0.006241 min_lr: 0.006241 loss: 3.2088 (3.2481) weight_decay: 0.0500 (0.0500) time: 0.6841 data: 0.0001 max mem: 57114 Epoch: [106] Total time: 0:01:52 (0.7216 s / it) Averaged stats: lr: 0.006241 min_lr: 0.006241 loss: 3.2088 (3.2746) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9000 (0.9000) acc1: 81.7708 (81.7708) acc5: 96.3542 (96.3542) time: 2.0307 data: 1.7749 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0351 (1.0091) acc1: 79.6875 (78.0051) acc5: 94.7917 (93.8619) time: 0.5571 data: 0.3551 max mem: 57114 Test: Total time: 0:00:02 (0.5791 s / it) * Acc@1 76.281 Acc@5 93.336 loss 1.108 Accuracy of the model on the 50000 test images: 76.3% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 7.6551 (7.6551) acc1: 3.6458 (3.6458) acc5: 5.2083 (5.2083) time: 2.2604 data: 2.0170 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.6551 (7.4754) acc1: 2.0833 (2.0460) acc5: 5.2083 (5.6266) time: 0.6027 data: 0.4035 max mem: 57114 Test: Total time: 0:00:03 (0.6196 s / it) * Acc@1 1.301 Acc@5 3.581 loss 7.624 Accuracy of the model EMA on 50000 test images: 1.3% Max EMA accuracy: 1.30% Epoch: [107] [ 0/156] eta: 0:06:05 lr: 0.006241 min_lr: 0.006241 loss: 3.7543 (3.7543) weight_decay: 0.0500 (0.0500) time: 2.3418 data: 1.6898 max mem: 57114 Epoch: [107] [ 10/156] eta: 0:02:05 lr: 0.006239 min_lr: 0.006239 loss: 3.5658 (3.4602) weight_decay: 0.0500 (0.0500) time: 0.8600 data: 0.1539 max mem: 57114 Epoch: [107] [ 20/156] eta: 0:01:47 lr: 0.006236 min_lr: 0.006236 loss: 3.5086 (3.4582) weight_decay: 0.0500 (0.0500) time: 0.7097 data: 0.0003 max mem: 57114 Epoch: [107] [ 30/156] eta: 0:01:36 lr: 0.006234 min_lr: 0.006234 loss: 3.2714 (3.3086) weight_decay: 0.0500 (0.0500) time: 0.7128 data: 0.0004 max mem: 57114 Epoch: [107] [ 40/156] eta: 0:01:27 lr: 0.006232 min_lr: 0.006232 loss: 3.1396 (3.2780) weight_decay: 0.0500 (0.0500) time: 0.7238 data: 0.0004 max mem: 57114 Epoch: [107] [ 50/156] eta: 0:01:19 lr: 0.006229 min_lr: 0.006229 loss: 3.4153 (3.2867) weight_decay: 0.0500 (0.0500) time: 0.7280 data: 0.0004 max mem: 57114 Epoch: [107] [ 60/156] eta: 0:01:11 lr: 0.006227 min_lr: 0.006227 loss: 3.4746 (3.3208) weight_decay: 0.0500 (0.0500) time: 0.7288 data: 0.0004 max mem: 57114 Epoch: [107] [ 70/156] eta: 0:01:03 lr: 0.006224 min_lr: 0.006224 loss: 3.5757 (3.3420) weight_decay: 0.0500 (0.0500) time: 0.7212 data: 0.0004 max mem: 57114 Epoch: [107] [ 80/156] eta: 0:00:56 lr: 0.006222 min_lr: 0.006222 loss: 3.5882 (3.3740) weight_decay: 0.0500 (0.0500) time: 0.7104 data: 0.0004 max mem: 57114 Epoch: [107] [ 90/156] eta: 0:00:48 lr: 0.006220 min_lr: 0.006220 loss: 3.4255 (3.3549) weight_decay: 0.0500 (0.0500) time: 0.7052 data: 0.0004 max mem: 57114 Epoch: [107] [100/156] eta: 0:00:40 lr: 0.006217 min_lr: 0.006217 loss: 3.1157 (3.3144) weight_decay: 0.0500 (0.0500) time: 0.7073 data: 0.0004 max mem: 57114 Epoch: [107] [110/156] eta: 0:00:33 lr: 0.006215 min_lr: 0.006215 loss: 3.1157 (3.3078) weight_decay: 0.0500 (0.0500) time: 0.7177 data: 0.0004 max mem: 57114 Epoch: [107] [120/156] eta: 0:00:26 lr: 0.006212 min_lr: 0.006212 loss: 3.4430 (3.2929) weight_decay: 0.0500 (0.0500) time: 0.7175 data: 0.0004 max mem: 57114 Epoch: [107] [130/156] eta: 0:00:18 lr: 0.006210 min_lr: 0.006210 loss: 3.3331 (3.2889) weight_decay: 0.0500 (0.0500) time: 0.6993 data: 0.0010 max mem: 57114 Epoch: [107] [140/156] eta: 0:00:11 lr: 0.006208 min_lr: 0.006208 loss: 3.4476 (3.3051) weight_decay: 0.0500 (0.0500) time: 0.6805 data: 0.0009 max mem: 57114 Epoch: [107] [150/156] eta: 0:00:04 lr: 0.006205 min_lr: 0.006205 loss: 3.4476 (3.3148) weight_decay: 0.0500 (0.0500) time: 0.6793 data: 0.0001 max mem: 57114 Epoch: [107] [155/156] eta: 0:00:00 lr: 0.006204 min_lr: 0.006204 loss: 3.4326 (3.3169) weight_decay: 0.0500 (0.0500) time: 0.6847 data: 0.0001 max mem: 57114 Epoch: [107] Total time: 0:01:52 (0.7212 s / it) Averaged stats: lr: 0.006204 min_lr: 0.006204 loss: 3.4326 (3.2650) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0247 (1.0247) acc1: 83.8542 (83.8542) acc5: 96.8750 (96.8750) time: 2.0200 data: 1.7642 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1218 (1.0633) acc1: 76.5625 (77.8772) acc5: 95.3125 (93.8619) time: 0.5547 data: 0.3529 max mem: 57114 Test: Total time: 0:00:02 (0.5757 s / it) * Acc@1 75.975 Acc@5 93.370 loss 1.170 Accuracy of the model on the 50000 test images: 76.0% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 7.4317 (7.4317) acc1: 3.6458 (3.6458) acc5: 6.2500 (6.2500) time: 2.2867 data: 2.0431 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.4317 (7.2645) acc1: 2.0833 (2.0460) acc5: 6.2500 (6.2660) time: 0.6080 data: 0.4087 max mem: 57114 Test: Total time: 0:00:03 (0.6225 s / it) * Acc@1 1.561 Acc@5 4.204 loss 7.471 Accuracy of the model EMA on 50000 test images: 1.6% Max EMA accuracy: 1.56% Epoch: [108] [ 0/156] eta: 0:07:35 lr: 0.006204 min_lr: 0.006204 loss: 4.0741 (4.0741) weight_decay: 0.0500 (0.0500) time: 2.9190 data: 2.2690 max mem: 57114 Epoch: [108] [ 10/156] eta: 0:02:15 lr: 0.006201 min_lr: 0.006201 loss: 3.2684 (3.1007) weight_decay: 0.0500 (0.0500) time: 0.9268 data: 0.2066 max mem: 57114 Epoch: [108] [ 20/156] eta: 0:01:53 lr: 0.006199 min_lr: 0.006199 loss: 3.3218 (3.2556) weight_decay: 0.0500 (0.0500) time: 0.7296 data: 0.0003 max mem: 57114 Epoch: [108] [ 30/156] eta: 0:01:41 lr: 0.006197 min_lr: 0.006197 loss: 3.4960 (3.2693) weight_decay: 0.0500 (0.0500) time: 0.7392 data: 0.0003 max mem: 57114 Epoch: [108] [ 40/156] eta: 0:01:30 lr: 0.006194 min_lr: 0.006194 loss: 3.3199 (3.2434) weight_decay: 0.0500 (0.0500) time: 0.7277 data: 0.0004 max mem: 57114 Epoch: [108] [ 50/156] eta: 0:01:21 lr: 0.006192 min_lr: 0.006192 loss: 3.2749 (3.2470) weight_decay: 0.0500 (0.0500) time: 0.7192 data: 0.0004 max mem: 57114 Epoch: [108] [ 60/156] eta: 0:01:13 lr: 0.006189 min_lr: 0.006189 loss: 3.1098 (3.2104) weight_decay: 0.0500 (0.0500) time: 0.7206 data: 0.0004 max mem: 57114 Epoch: [108] [ 70/156] eta: 0:01:05 lr: 0.006187 min_lr: 0.006187 loss: 3.4582 (3.2507) weight_decay: 0.0500 (0.0500) time: 0.7201 data: 0.0004 max mem: 57114 Epoch: [108] [ 80/156] eta: 0:00:57 lr: 0.006185 min_lr: 0.006185 loss: 3.4654 (3.2283) weight_decay: 0.0500 (0.0500) time: 0.7172 data: 0.0004 max mem: 57114 Epoch: [108] [ 90/156] eta: 0:00:49 lr: 0.006182 min_lr: 0.006182 loss: 2.9808 (3.2128) weight_decay: 0.0500 (0.0500) time: 0.7112 data: 0.0004 max mem: 57114 Epoch: [108] [100/156] eta: 0:00:41 lr: 0.006180 min_lr: 0.006180 loss: 3.2976 (3.2323) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0003 max mem: 57114 Epoch: [108] [110/156] eta: 0:00:34 lr: 0.006177 min_lr: 0.006177 loss: 3.4667 (3.2443) weight_decay: 0.0500 (0.0500) time: 0.7107 data: 0.0003 max mem: 57114 Epoch: [108] [120/156] eta: 0:00:26 lr: 0.006175 min_lr: 0.006175 loss: 3.4051 (3.2340) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [108] [130/156] eta: 0:00:19 lr: 0.006173 min_lr: 0.006173 loss: 3.4143 (3.2402) weight_decay: 0.0500 (0.0500) time: 0.7002 data: 0.0008 max mem: 57114 Epoch: [108] [140/156] eta: 0:00:11 lr: 0.006170 min_lr: 0.006170 loss: 3.3509 (3.2252) weight_decay: 0.0500 (0.0500) time: 0.6884 data: 0.0007 max mem: 57114 Epoch: [108] [150/156] eta: 0:00:04 lr: 0.006168 min_lr: 0.006168 loss: 3.3579 (3.2318) weight_decay: 0.0500 (0.0500) time: 0.6825 data: 0.0002 max mem: 57114 Epoch: [108] [155/156] eta: 0:00:00 lr: 0.006166 min_lr: 0.006166 loss: 3.3675 (3.2400) weight_decay: 0.0500 (0.0500) time: 0.6906 data: 0.0002 max mem: 57114 Epoch: [108] Total time: 0:01:53 (0.7290 s / it) Averaged stats: lr: 0.006166 min_lr: 0.006166 loss: 3.3675 (3.2764) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8376 (0.8376) acc1: 84.8958 (84.8958) acc5: 96.8750 (96.8750) time: 2.0755 data: 1.8196 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9625 (0.9623) acc1: 81.2500 (77.8772) acc5: 94.7917 (93.9898) time: 0.5658 data: 0.3640 max mem: 57114 Test: Total time: 0:00:02 (0.5879 s / it) * Acc@1 76.557 Acc@5 93.590 loss 1.074 Accuracy of the model on the 50000 test images: 76.6% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 7.1809 (7.1809) acc1: 4.1667 (4.1667) acc5: 7.8125 (7.8125) time: 2.2613 data: 2.0178 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 7.1809 (7.0319) acc1: 2.6042 (2.8133) acc5: 7.8125 (7.8005) time: 0.6029 data: 0.4036 max mem: 57114 Test: Total time: 0:00:03 (0.6146 s / it) * Acc@1 1.886 Acc@5 5.075 loss 7.300 Accuracy of the model EMA on 50000 test images: 1.9% Max EMA accuracy: 1.89% Epoch: [109] [ 0/156] eta: 0:05:27 lr: 0.006166 min_lr: 0.006166 loss: 3.4467 (3.4467) weight_decay: 0.0500 (0.0500) time: 2.1002 data: 1.4464 max mem: 57114 Epoch: [109] [ 10/156] eta: 0:02:03 lr: 0.006164 min_lr: 0.006164 loss: 3.3674 (3.0996) weight_decay: 0.0500 (0.0500) time: 0.8465 data: 0.1317 max mem: 57114 Epoch: [109] [ 20/156] eta: 0:01:47 lr: 0.006161 min_lr: 0.006161 loss: 2.9836 (3.0884) weight_decay: 0.0500 (0.0500) time: 0.7265 data: 0.0003 max mem: 57114 Epoch: [109] [ 30/156] eta: 0:01:37 lr: 0.006159 min_lr: 0.006159 loss: 2.9836 (3.0470) weight_decay: 0.0500 (0.0500) time: 0.7297 data: 0.0004 max mem: 57114 Epoch: [109] [ 40/156] eta: 0:01:28 lr: 0.006157 min_lr: 0.006157 loss: 3.2765 (3.1068) weight_decay: 0.0500 (0.0500) time: 0.7270 data: 0.0003 max mem: 57114 Epoch: [109] [ 50/156] eta: 0:01:19 lr: 0.006154 min_lr: 0.006154 loss: 3.3772 (3.1092) weight_decay: 0.0500 (0.0500) time: 0.7113 data: 0.0004 max mem: 57114 Epoch: [109] [ 60/156] eta: 0:01:11 lr: 0.006152 min_lr: 0.006152 loss: 2.9480 (3.1225) weight_decay: 0.0500 (0.0500) time: 0.7010 data: 0.0004 max mem: 57114 Epoch: [109] [ 70/156] eta: 0:01:03 lr: 0.006149 min_lr: 0.006149 loss: 3.3441 (3.1457) weight_decay: 0.0500 (0.0500) time: 0.7136 data: 0.0004 max mem: 57114 Epoch: [109] [ 80/156] eta: 0:00:55 lr: 0.006147 min_lr: 0.006147 loss: 3.4035 (3.1771) weight_decay: 0.0500 (0.0500) time: 0.7231 data: 0.0004 max mem: 57114 Epoch: [109] [ 90/156] eta: 0:00:48 lr: 0.006144 min_lr: 0.006144 loss: 3.3627 (3.1813) weight_decay: 0.0500 (0.0500) time: 0.7155 data: 0.0004 max mem: 57114 Epoch: [109] [100/156] eta: 0:00:40 lr: 0.006142 min_lr: 0.006142 loss: 3.3998 (3.1941) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0004 max mem: 57114 Epoch: [109] [110/156] eta: 0:00:33 lr: 0.006140 min_lr: 0.006140 loss: 3.4230 (3.2033) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0004 max mem: 57114 Epoch: [109] [120/156] eta: 0:00:26 lr: 0.006137 min_lr: 0.006137 loss: 3.4395 (3.2232) weight_decay: 0.0500 (0.0500) time: 0.7005 data: 0.0004 max mem: 57114 Epoch: [109] [130/156] eta: 0:00:18 lr: 0.006135 min_lr: 0.006135 loss: 3.5865 (3.2263) weight_decay: 0.0500 (0.0500) time: 0.6975 data: 0.0009 max mem: 57114 Epoch: [109] [140/156] eta: 0:00:11 lr: 0.006132 min_lr: 0.006132 loss: 3.4915 (3.2189) weight_decay: 0.0500 (0.0500) time: 0.6930 data: 0.0007 max mem: 57114 Epoch: [109] [150/156] eta: 0:00:04 lr: 0.006130 min_lr: 0.006130 loss: 3.3825 (3.2301) weight_decay: 0.0500 (0.0500) time: 0.6856 data: 0.0001 max mem: 57114 Epoch: [109] [155/156] eta: 0:00:00 lr: 0.006129 min_lr: 0.006129 loss: 3.4915 (3.2404) weight_decay: 0.0500 (0.0500) time: 0.6857 data: 0.0001 max mem: 57114 Epoch: [109] Total time: 0:01:52 (0.7205 s / it) Averaged stats: lr: 0.006129 min_lr: 0.006129 loss: 3.4915 (3.2662) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0960 (1.0960) acc1: 81.2500 (81.2500) acc5: 96.3542 (96.3542) time: 2.0373 data: 1.7813 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2023 (1.1841) acc1: 81.2500 (76.9821) acc5: 95.3125 (93.4783) time: 0.5582 data: 0.3563 max mem: 57114 Test: Total time: 0:00:02 (0.5781 s / it) * Acc@1 76.636 Acc@5 93.682 loss 1.289 Accuracy of the model on the 50000 test images: 76.6% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 6.9181 (6.9181) acc1: 5.7292 (5.7292) acc5: 10.9375 (10.9375) time: 2.2923 data: 2.0487 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.9365 (6.7919) acc1: 4.6875 (3.8363) acc5: 10.9375 (10.1023) time: 0.6091 data: 0.4098 max mem: 57114 Test: Total time: 0:00:03 (0.6208 s / it) * Acc@1 2.370 Acc@5 6.278 loss 7.118 Accuracy of the model EMA on 50000 test images: 2.4% Max EMA accuracy: 2.37% Epoch: [110] [ 0/156] eta: 0:07:29 lr: 0.006128 min_lr: 0.006128 loss: 4.0171 (4.0171) weight_decay: 0.0500 (0.0500) time: 2.8836 data: 2.2278 max mem: 57114 Epoch: [110] [ 10/156] eta: 0:02:14 lr: 0.006126 min_lr: 0.006126 loss: 3.3014 (3.3183) weight_decay: 0.0500 (0.0500) time: 0.9207 data: 0.2028 max mem: 57114 Epoch: [110] [ 20/156] eta: 0:01:51 lr: 0.006123 min_lr: 0.006123 loss: 3.1693 (3.2126) weight_decay: 0.0500 (0.0500) time: 0.7172 data: 0.0003 max mem: 57114 Epoch: [110] [ 30/156] eta: 0:01:38 lr: 0.006121 min_lr: 0.006121 loss: 3.0347 (3.1072) weight_decay: 0.0500 (0.0500) time: 0.7079 data: 0.0003 max mem: 57114 Epoch: [110] [ 40/156] eta: 0:01:29 lr: 0.006119 min_lr: 0.006119 loss: 3.3093 (3.1737) weight_decay: 0.0500 (0.0500) time: 0.7225 data: 0.0004 max mem: 57114 Epoch: [110] [ 50/156] eta: 0:01:21 lr: 0.006116 min_lr: 0.006116 loss: 3.4063 (3.2118) weight_decay: 0.0500 (0.0500) time: 0.7348 data: 0.0004 max mem: 57114 Epoch: [110] [ 60/156] eta: 0:01:12 lr: 0.006114 min_lr: 0.006114 loss: 3.4063 (3.2113) weight_decay: 0.0500 (0.0500) time: 0.7336 data: 0.0004 max mem: 57114 Epoch: [110] [ 70/156] eta: 0:01:04 lr: 0.006111 min_lr: 0.006111 loss: 3.1780 (3.1956) weight_decay: 0.0500 (0.0500) time: 0.7311 data: 0.0004 max mem: 57114 Epoch: [110] [ 80/156] eta: 0:00:56 lr: 0.006109 min_lr: 0.006109 loss: 3.2535 (3.2020) weight_decay: 0.0500 (0.0500) time: 0.7199 data: 0.0004 max mem: 57114 Epoch: [110] [ 90/156] eta: 0:00:49 lr: 0.006106 min_lr: 0.006106 loss: 3.4662 (3.2304) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [110] [100/156] eta: 0:00:41 lr: 0.006104 min_lr: 0.006104 loss: 3.4662 (3.2448) weight_decay: 0.0500 (0.0500) time: 0.7068 data: 0.0004 max mem: 57114 Epoch: [110] [110/156] eta: 0:00:33 lr: 0.006102 min_lr: 0.006102 loss: 3.4560 (3.2338) weight_decay: 0.0500 (0.0500) time: 0.6965 data: 0.0004 max mem: 57114 Epoch: [110] [120/156] eta: 0:00:26 lr: 0.006099 min_lr: 0.006099 loss: 3.2841 (3.2244) weight_decay: 0.0500 (0.0500) time: 0.7022 data: 0.0004 max mem: 57114 Epoch: [110] [130/156] eta: 0:00:19 lr: 0.006097 min_lr: 0.006097 loss: 3.5418 (3.2450) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0008 max mem: 57114 Epoch: [110] [140/156] eta: 0:00:11 lr: 0.006094 min_lr: 0.006094 loss: 3.3390 (3.2435) weight_decay: 0.0500 (0.0500) time: 0.6952 data: 0.0007 max mem: 57114 Epoch: [110] [150/156] eta: 0:00:04 lr: 0.006092 min_lr: 0.006092 loss: 3.3390 (3.2464) weight_decay: 0.0500 (0.0500) time: 0.6763 data: 0.0001 max mem: 57114 Epoch: [110] [155/156] eta: 0:00:00 lr: 0.006090 min_lr: 0.006090 loss: 3.3104 (3.2460) weight_decay: 0.0500 (0.0500) time: 0.6758 data: 0.0001 max mem: 57114 Epoch: [110] Total time: 0:01:53 (0.7260 s / it) Averaged stats: lr: 0.006090 min_lr: 0.006090 loss: 3.3104 (3.2679) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9250 (0.9250) acc1: 83.3333 (83.3333) acc5: 94.7917 (94.7917) time: 2.0253 data: 1.7677 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0178 (1.0100) acc1: 80.2083 (76.5985) acc5: 94.7917 (93.6061) time: 0.5558 data: 0.3536 max mem: 57114 Test: Total time: 0:00:02 (0.5814 s / it) * Acc@1 76.770 Acc@5 93.696 loss 1.103 Accuracy of the model on the 50000 test images: 76.8% Max accuracy: 76.84% Test: [0/5] eta: 0:00:11 loss: 6.6384 (6.6384) acc1: 6.2500 (6.2500) acc5: 13.5417 (13.5417) time: 2.2612 data: 2.0179 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.7214 (6.5367) acc1: 6.2500 (5.2430) acc5: 13.5417 (12.0205) time: 0.6035 data: 0.4038 max mem: 57114 Test: Total time: 0:00:03 (0.6153 s / it) * Acc@1 2.985 Acc@5 7.864 loss 6.916 Accuracy of the model EMA on 50000 test images: 3.0% Max EMA accuracy: 2.99% Epoch: [111] [ 0/156] eta: 0:07:08 lr: 0.006090 min_lr: 0.006090 loss: 3.5641 (3.5641) weight_decay: 0.0500 (0.0500) time: 2.7486 data: 2.0974 max mem: 57114 Epoch: [111] [ 10/156] eta: 0:02:11 lr: 0.006088 min_lr: 0.006088 loss: 3.4495 (3.3728) weight_decay: 0.0500 (0.0500) time: 0.8983 data: 0.1909 max mem: 57114 Epoch: [111] [ 20/156] eta: 0:01:51 lr: 0.006085 min_lr: 0.006085 loss: 3.3310 (3.2978) weight_decay: 0.0500 (0.0500) time: 0.7208 data: 0.0003 max mem: 57114 Epoch: [111] [ 30/156] eta: 0:01:39 lr: 0.006083 min_lr: 0.006083 loss: 3.2400 (3.2314) weight_decay: 0.0500 (0.0500) time: 0.7250 data: 0.0003 max mem: 57114 Epoch: [111] [ 40/156] eta: 0:01:30 lr: 0.006080 min_lr: 0.006080 loss: 3.2643 (3.2691) weight_decay: 0.0500 (0.0500) time: 0.7379 data: 0.0004 max mem: 57114 Epoch: [111] [ 50/156] eta: 0:01:21 lr: 0.006078 min_lr: 0.006078 loss: 3.3925 (3.2132) weight_decay: 0.0500 (0.0500) time: 0.7394 data: 0.0004 max mem: 57114 Epoch: [111] [ 60/156] eta: 0:01:12 lr: 0.006075 min_lr: 0.006075 loss: 3.3274 (3.2141) weight_decay: 0.0500 (0.0500) time: 0.7151 data: 0.0004 max mem: 57114 Epoch: [111] [ 70/156] eta: 0:01:04 lr: 0.006073 min_lr: 0.006073 loss: 3.3107 (3.1851) weight_decay: 0.0500 (0.0500) time: 0.7121 data: 0.0004 max mem: 57114 Epoch: [111] [ 80/156] eta: 0:00:56 lr: 0.006071 min_lr: 0.006071 loss: 3.1794 (3.1902) weight_decay: 0.0500 (0.0500) time: 0.7177 data: 0.0004 max mem: 57114 Epoch: [111] [ 90/156] eta: 0:00:49 lr: 0.006068 min_lr: 0.006068 loss: 3.2359 (3.2110) weight_decay: 0.0500 (0.0500) time: 0.7201 data: 0.0004 max mem: 57114 Epoch: [111] [100/156] eta: 0:00:41 lr: 0.006066 min_lr: 0.006066 loss: 3.3875 (3.2278) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [111] [110/156] eta: 0:00:33 lr: 0.006063 min_lr: 0.006063 loss: 3.3527 (3.2351) weight_decay: 0.0500 (0.0500) time: 0.7059 data: 0.0004 max mem: 57114 Epoch: [111] [120/156] eta: 0:00:26 lr: 0.006061 min_lr: 0.006061 loss: 3.3012 (3.2332) weight_decay: 0.0500 (0.0500) time: 0.7052 data: 0.0004 max mem: 57114 Epoch: [111] [130/156] eta: 0:00:19 lr: 0.006058 min_lr: 0.006058 loss: 3.2536 (3.2290) weight_decay: 0.0500 (0.0500) time: 0.7002 data: 0.0008 max mem: 57114 Epoch: [111] [140/156] eta: 0:00:11 lr: 0.006056 min_lr: 0.006056 loss: 3.0099 (3.2103) weight_decay: 0.0500 (0.0500) time: 0.6954 data: 0.0007 max mem: 57114 Epoch: [111] [150/156] eta: 0:00:04 lr: 0.006053 min_lr: 0.006053 loss: 3.1702 (3.2224) weight_decay: 0.0500 (0.0500) time: 0.6838 data: 0.0001 max mem: 57114 Epoch: [111] [155/156] eta: 0:00:00 lr: 0.006052 min_lr: 0.006052 loss: 3.3150 (3.2187) weight_decay: 0.0500 (0.0500) time: 0.6851 data: 0.0001 max mem: 57114 Epoch: [111] Total time: 0:01:53 (0.7271 s / it) Averaged stats: lr: 0.006052 min_lr: 0.006052 loss: 3.3150 (3.2492) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8154 (0.8154) acc1: 82.8125 (82.8125) acc5: 96.3542 (96.3542) time: 2.0935 data: 1.8375 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9915 (0.9163) acc1: 78.6458 (77.3657) acc5: 95.8333 (94.1177) time: 0.5694 data: 0.3676 max mem: 57114 Test: Total time: 0:00:02 (0.5942 s / it) * Acc@1 77.340 Acc@5 93.938 loss 1.014 Accuracy of the model on the 50000 test images: 77.3% Max accuracy: 77.34% Test: [0/5] eta: 0:00:10 loss: 6.3525 (6.3525) acc1: 8.3333 (8.3333) acc5: 16.6667 (16.6667) time: 2.1238 data: 1.8805 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.4911 (6.2716) acc1: 8.3333 (6.6496) acc5: 16.6667 (13.8107) time: 0.5753 data: 0.3762 max mem: 57114 Test: Total time: 0:00:02 (0.5867 s / it) * Acc@1 3.834 Acc@5 9.797 loss 6.693 Accuracy of the model EMA on 50000 test images: 3.8% Max EMA accuracy: 3.83% Epoch: [112] [ 0/156] eta: 0:05:31 lr: 0.006052 min_lr: 0.006052 loss: 2.9165 (2.9165) weight_decay: 0.0500 (0.0500) time: 2.1270 data: 1.4758 max mem: 57114 Epoch: [112] [ 10/156] eta: 0:02:08 lr: 0.006049 min_lr: 0.006049 loss: 3.4394 (3.1793) weight_decay: 0.0500 (0.0500) time: 0.8788 data: 0.1344 max mem: 57114 Epoch: [112] [ 20/156] eta: 0:01:50 lr: 0.006047 min_lr: 0.006047 loss: 3.4227 (3.2027) weight_decay: 0.0500 (0.0500) time: 0.7489 data: 0.0003 max mem: 57114 Epoch: [112] [ 30/156] eta: 0:01:39 lr: 0.006044 min_lr: 0.006044 loss: 3.1249 (3.0978) weight_decay: 0.0500 (0.0500) time: 0.7448 data: 0.0004 max mem: 57114 Epoch: [112] [ 40/156] eta: 0:01:30 lr: 0.006042 min_lr: 0.006042 loss: 3.0628 (3.1285) weight_decay: 0.0500 (0.0500) time: 0.7448 data: 0.0004 max mem: 57114 Epoch: [112] [ 50/156] eta: 0:01:22 lr: 0.006039 min_lr: 0.006039 loss: 3.3909 (3.1918) weight_decay: 0.0500 (0.0500) time: 0.7463 data: 0.0004 max mem: 57114 Epoch: [112] [ 60/156] eta: 0:01:13 lr: 0.006037 min_lr: 0.006037 loss: 3.4074 (3.1713) weight_decay: 0.0500 (0.0500) time: 0.7321 data: 0.0004 max mem: 57114 Epoch: [112] [ 70/156] eta: 0:01:04 lr: 0.006035 min_lr: 0.006035 loss: 3.4432 (3.2065) weight_decay: 0.0500 (0.0500) time: 0.7077 data: 0.0004 max mem: 57114 Epoch: [112] [ 80/156] eta: 0:00:57 lr: 0.006032 min_lr: 0.006032 loss: 3.4358 (3.1997) weight_decay: 0.0500 (0.0500) time: 0.7112 data: 0.0004 max mem: 57114 Epoch: [112] [ 90/156] eta: 0:00:49 lr: 0.006030 min_lr: 0.006030 loss: 3.1199 (3.1869) weight_decay: 0.0500 (0.0500) time: 0.7169 data: 0.0004 max mem: 57114 Epoch: [112] [100/156] eta: 0:00:41 lr: 0.006027 min_lr: 0.006027 loss: 3.4936 (3.2261) weight_decay: 0.0500 (0.0500) time: 0.7139 data: 0.0004 max mem: 57114 Epoch: [112] [110/156] eta: 0:00:34 lr: 0.006025 min_lr: 0.006025 loss: 3.4745 (3.2280) weight_decay: 0.0500 (0.0500) time: 0.7094 data: 0.0004 max mem: 57114 Epoch: [112] [120/156] eta: 0:00:26 lr: 0.006022 min_lr: 0.006022 loss: 3.4150 (3.2411) weight_decay: 0.0500 (0.0500) time: 0.7064 data: 0.0004 max mem: 57114 Epoch: [112] [130/156] eta: 0:00:19 lr: 0.006020 min_lr: 0.006020 loss: 3.4245 (3.2468) weight_decay: 0.0500 (0.0500) time: 0.7054 data: 0.0009 max mem: 57114 Epoch: [112] [140/156] eta: 0:00:11 lr: 0.006017 min_lr: 0.006017 loss: 3.3044 (3.2492) weight_decay: 0.0500 (0.0500) time: 0.6925 data: 0.0007 max mem: 57114 Epoch: [112] [150/156] eta: 0:00:04 lr: 0.006015 min_lr: 0.006015 loss: 3.3838 (3.2507) weight_decay: 0.0500 (0.0500) time: 0.6830 data: 0.0001 max mem: 57114 Epoch: [112] [155/156] eta: 0:00:00 lr: 0.006013 min_lr: 0.006013 loss: 3.3130 (3.2520) weight_decay: 0.0500 (0.0500) time: 0.6874 data: 0.0001 max mem: 57114 Epoch: [112] Total time: 0:01:53 (0.7293 s / it) Averaged stats: lr: 0.006013 min_lr: 0.006013 loss: 3.3130 (3.2466) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9929 (0.9929) acc1: 84.3750 (84.3750) acc5: 96.8750 (96.8750) time: 2.1379 data: 1.8823 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0938 (1.0863) acc1: 79.1667 (78.1330) acc5: 94.7917 (93.6061) time: 0.5782 data: 0.3765 max mem: 57114 Test: Total time: 0:00:03 (0.6028 s / it) * Acc@1 76.367 Acc@5 93.636 loss 1.190 Accuracy of the model on the 50000 test images: 76.4% Max accuracy: 77.34% Test: [0/5] eta: 0:00:11 loss: 6.0599 (6.0599) acc1: 8.8542 (8.8542) acc5: 20.8333 (20.8333) time: 2.3957 data: 2.1520 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.2618 (6.0086) acc1: 8.8542 (7.6726) acc5: 19.7917 (16.4962) time: 0.6298 data: 0.4305 max mem: 57114 Test: Total time: 0:00:03 (0.6418 s / it) * Acc@1 4.965 Acc@5 12.108 loss 6.458 Accuracy of the model EMA on 50000 test images: 5.0% Max EMA accuracy: 4.97% Epoch: [113] [ 0/156] eta: 0:06:54 lr: 0.006013 min_lr: 0.006013 loss: 2.0871 (2.0871) weight_decay: 0.0500 (0.0500) time: 2.6602 data: 2.0111 max mem: 57114 Epoch: [113] [ 10/156] eta: 0:02:11 lr: 0.006011 min_lr: 0.006011 loss: 3.3408 (3.2755) weight_decay: 0.0500 (0.0500) time: 0.8983 data: 0.1832 max mem: 57114 Epoch: [113] [ 20/156] eta: 0:01:50 lr: 0.006008 min_lr: 0.006008 loss: 3.2733 (3.2021) weight_decay: 0.0500 (0.0500) time: 0.7180 data: 0.0004 max mem: 57114 Epoch: [113] [ 30/156] eta: 0:01:38 lr: 0.006006 min_lr: 0.006006 loss: 3.2631 (3.2493) weight_decay: 0.0500 (0.0500) time: 0.7133 data: 0.0004 max mem: 57114 Epoch: [113] [ 40/156] eta: 0:01:28 lr: 0.006003 min_lr: 0.006003 loss: 3.2476 (3.2045) weight_decay: 0.0500 (0.0500) time: 0.7189 data: 0.0005 max mem: 57114 Epoch: [113] [ 50/156] eta: 0:01:20 lr: 0.006001 min_lr: 0.006001 loss: 3.3565 (3.2181) weight_decay: 0.0500 (0.0500) time: 0.7203 data: 0.0005 max mem: 57114 Epoch: [113] [ 60/156] eta: 0:01:12 lr: 0.005998 min_lr: 0.005998 loss: 3.3259 (3.1582) weight_decay: 0.0500 (0.0500) time: 0.7199 data: 0.0004 max mem: 57114 Epoch: [113] [ 70/156] eta: 0:01:04 lr: 0.005996 min_lr: 0.005996 loss: 2.7268 (3.1291) weight_decay: 0.0500 (0.0500) time: 0.7187 data: 0.0005 max mem: 57114 Epoch: [113] [ 80/156] eta: 0:00:56 lr: 0.005993 min_lr: 0.005993 loss: 3.3484 (3.1628) weight_decay: 0.0500 (0.0500) time: 0.7225 data: 0.0005 max mem: 57114 Epoch: [113] [ 90/156] eta: 0:00:48 lr: 0.005991 min_lr: 0.005991 loss: 3.4658 (3.1751) weight_decay: 0.0500 (0.0500) time: 0.7161 data: 0.0006 max mem: 57114 Epoch: [113] [100/156] eta: 0:00:41 lr: 0.005988 min_lr: 0.005988 loss: 3.2450 (3.1605) weight_decay: 0.0500 (0.0500) time: 0.7036 data: 0.0007 max mem: 57114 Epoch: [113] [110/156] eta: 0:00:33 lr: 0.005986 min_lr: 0.005986 loss: 3.2450 (3.1754) weight_decay: 0.0500 (0.0500) time: 0.7139 data: 0.0007 max mem: 57114 Epoch: [113] [120/156] eta: 0:00:26 lr: 0.005983 min_lr: 0.005983 loss: 3.3250 (3.1766) weight_decay: 0.0500 (0.0500) time: 0.7215 data: 0.0007 max mem: 57114 Epoch: [113] [130/156] eta: 0:00:18 lr: 0.005981 min_lr: 0.005981 loss: 3.3243 (3.1850) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0018 max mem: 57114 Epoch: [113] [140/156] eta: 0:00:11 lr: 0.005978 min_lr: 0.005978 loss: 3.3692 (3.1869) weight_decay: 0.0500 (0.0500) time: 0.6870 data: 0.0015 max mem: 57114 Epoch: [113] [150/156] eta: 0:00:04 lr: 0.005976 min_lr: 0.005976 loss: 3.3692 (3.1895) weight_decay: 0.0500 (0.0500) time: 0.6798 data: 0.0001 max mem: 57114 Epoch: [113] [155/156] eta: 0:00:00 lr: 0.005975 min_lr: 0.005975 loss: 3.3671 (3.1859) weight_decay: 0.0500 (0.0500) time: 0.6814 data: 0.0001 max mem: 57114 Epoch: [113] Total time: 0:01:53 (0.7245 s / it) Averaged stats: lr: 0.005975 min_lr: 0.005975 loss: 3.3671 (3.2468) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7906 (0.7906) acc1: 82.2917 (82.2917) acc5: 95.8333 (95.8333) time: 2.1734 data: 1.9164 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8281 (0.8507) acc1: 81.2500 (77.8772) acc5: 95.8333 (94.5013) time: 0.5861 data: 0.3835 max mem: 57114 Test: Total time: 0:00:03 (0.6100 s / it) * Acc@1 77.508 Acc@5 93.968 loss 0.982 Accuracy of the model on the 50000 test images: 77.5% Max accuracy: 77.51% Test: [0/5] eta: 0:00:10 loss: 5.7622 (5.7622) acc1: 11.9792 (11.9792) acc5: 22.9167 (22.9167) time: 2.1121 data: 1.8684 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 6.0325 (5.7464) acc1: 11.9792 (9.7187) acc5: 21.8750 (19.0537) time: 0.5730 data: 0.3737 max mem: 57114 Test: Total time: 0:00:02 (0.5848 s / it) * Acc@1 6.252 Acc@5 14.796 loss 6.214 Accuracy of the model EMA on 50000 test images: 6.3% Max EMA accuracy: 6.25% Epoch: [114] [ 0/156] eta: 0:06:02 lr: 0.005974 min_lr: 0.005974 loss: 2.5972 (2.5972) weight_decay: 0.0500 (0.0500) time: 2.3221 data: 1.6727 max mem: 57114 Epoch: [114] [ 10/156] eta: 0:02:06 lr: 0.005972 min_lr: 0.005972 loss: 3.4731 (3.2128) weight_decay: 0.0500 (0.0500) time: 0.8661 data: 0.1527 max mem: 57114 Epoch: [114] [ 20/156] eta: 0:01:49 lr: 0.005969 min_lr: 0.005969 loss: 3.4647 (3.2572) weight_decay: 0.0500 (0.0500) time: 0.7328 data: 0.0007 max mem: 57114 Epoch: [114] [ 30/156] eta: 0:01:38 lr: 0.005967 min_lr: 0.005967 loss: 3.4647 (3.2546) weight_decay: 0.0500 (0.0500) time: 0.7356 data: 0.0007 max mem: 57114 Epoch: [114] [ 40/156] eta: 0:01:30 lr: 0.005964 min_lr: 0.005964 loss: 3.3346 (3.2102) weight_decay: 0.0500 (0.0500) time: 0.7505 data: 0.0006 max mem: 57114 Epoch: [114] [ 50/156] eta: 0:01:21 lr: 0.005962 min_lr: 0.005962 loss: 3.2401 (3.2165) weight_decay: 0.0500 (0.0500) time: 0.7414 data: 0.0005 max mem: 57114 Epoch: [114] [ 60/156] eta: 0:01:12 lr: 0.005959 min_lr: 0.005959 loss: 3.4305 (3.2349) weight_decay: 0.0500 (0.0500) time: 0.7145 data: 0.0004 max mem: 57114 Epoch: [114] [ 70/156] eta: 0:01:04 lr: 0.005957 min_lr: 0.005957 loss: 3.3289 (3.2150) weight_decay: 0.0500 (0.0500) time: 0.7074 data: 0.0004 max mem: 57114 Epoch: [114] [ 80/156] eta: 0:00:56 lr: 0.005954 min_lr: 0.005954 loss: 2.9937 (3.2167) weight_decay: 0.0500 (0.0500) time: 0.7118 data: 0.0004 max mem: 57114 Epoch: [114] [ 90/156] eta: 0:00:49 lr: 0.005952 min_lr: 0.005952 loss: 3.3875 (3.2285) weight_decay: 0.0500 (0.0500) time: 0.7229 data: 0.0004 max mem: 57114 Epoch: [114] [100/156] eta: 0:00:41 lr: 0.005949 min_lr: 0.005949 loss: 3.3819 (3.1975) weight_decay: 0.0500 (0.0500) time: 0.7156 data: 0.0004 max mem: 57114 Epoch: [114] [110/156] eta: 0:00:33 lr: 0.005947 min_lr: 0.005947 loss: 3.3740 (3.2019) weight_decay: 0.0500 (0.0500) time: 0.7040 data: 0.0004 max mem: 57114 Epoch: [114] [120/156] eta: 0:00:26 lr: 0.005944 min_lr: 0.005944 loss: 3.4010 (3.2052) weight_decay: 0.0500 (0.0500) time: 0.6986 data: 0.0004 max mem: 57114 Epoch: [114] [130/156] eta: 0:00:18 lr: 0.005942 min_lr: 0.005942 loss: 3.4326 (3.2306) weight_decay: 0.0500 (0.0500) time: 0.6952 data: 0.0005 max mem: 57114 Epoch: [114] [140/156] eta: 0:00:11 lr: 0.005939 min_lr: 0.005939 loss: 3.5456 (3.2361) weight_decay: 0.0500 (0.0500) time: 0.6809 data: 0.0004 max mem: 57114 Epoch: [114] [150/156] eta: 0:00:04 lr: 0.005937 min_lr: 0.005937 loss: 3.1859 (3.2221) weight_decay: 0.0500 (0.0500) time: 0.6850 data: 0.0001 max mem: 57114 Epoch: [114] [155/156] eta: 0:00:00 lr: 0.005935 min_lr: 0.005935 loss: 3.2581 (3.2256) weight_decay: 0.0500 (0.0500) time: 0.6844 data: 0.0001 max mem: 57114 Epoch: [114] Total time: 0:01:53 (0.7252 s / it) Averaged stats: lr: 0.005935 min_lr: 0.005935 loss: 3.2581 (3.2477) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8308 (0.8308) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.0722 data: 1.8161 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9656 (1.0002) acc1: 81.7708 (79.2839) acc5: 95.3125 (94.2455) time: 0.5650 data: 0.3633 max mem: 57114 Test: Total time: 0:00:02 (0.5902 s / it) * Acc@1 76.906 Acc@5 93.746 loss 1.114 Accuracy of the model on the 50000 test images: 76.9% Max accuracy: 77.51% Test: [0/5] eta: 0:00:11 loss: 5.4480 (5.4480) acc1: 14.5833 (14.5833) acc5: 25.0000 (25.0000) time: 2.3230 data: 2.0796 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 5.7997 (5.4779) acc1: 14.0625 (11.7647) acc5: 24.4792 (22.3785) time: 0.6153 data: 0.4160 max mem: 57114 Test: Total time: 0:00:03 (0.6267 s / it) * Acc@1 7.801 Acc@5 17.941 loss 5.950 Accuracy of the model EMA on 50000 test images: 7.8% Max EMA accuracy: 7.80% Epoch: [115] [ 0/156] eta: 0:07:35 lr: 0.005935 min_lr: 0.005935 loss: 3.2565 (3.2565) weight_decay: 0.0500 (0.0500) time: 2.9200 data: 2.2640 max mem: 57114 Epoch: [115] [ 10/156] eta: 0:02:13 lr: 0.005933 min_lr: 0.005933 loss: 3.2049 (3.1185) weight_decay: 0.0500 (0.0500) time: 0.9172 data: 0.2061 max mem: 57114 Epoch: [115] [ 20/156] eta: 0:01:52 lr: 0.005930 min_lr: 0.005930 loss: 3.1701 (3.1634) weight_decay: 0.0500 (0.0500) time: 0.7206 data: 0.0004 max mem: 57114 Epoch: [115] [ 30/156] eta: 0:01:39 lr: 0.005928 min_lr: 0.005928 loss: 3.2681 (3.2188) weight_decay: 0.0500 (0.0500) time: 0.7246 data: 0.0005 max mem: 57114 Epoch: [115] [ 40/156] eta: 0:01:29 lr: 0.005925 min_lr: 0.005925 loss: 3.2681 (3.1585) weight_decay: 0.0500 (0.0500) time: 0.7219 data: 0.0005 max mem: 57114 Epoch: [115] [ 50/156] eta: 0:01:20 lr: 0.005923 min_lr: 0.005923 loss: 3.1684 (3.1353) weight_decay: 0.0500 (0.0500) time: 0.7184 data: 0.0005 max mem: 57114 Epoch: [115] [ 60/156] eta: 0:01:12 lr: 0.005920 min_lr: 0.005920 loss: 3.2340 (3.1515) weight_decay: 0.0500 (0.0500) time: 0.7228 data: 0.0004 max mem: 57114 Epoch: [115] [ 70/156] eta: 0:01:04 lr: 0.005917 min_lr: 0.005917 loss: 3.2969 (3.1732) weight_decay: 0.0500 (0.0500) time: 0.7332 data: 0.0006 max mem: 57114 Epoch: [115] [ 80/156] eta: 0:00:56 lr: 0.005915 min_lr: 0.005915 loss: 3.4511 (3.1893) weight_decay: 0.0500 (0.0500) time: 0.7229 data: 0.0007 max mem: 57114 Epoch: [115] [ 90/156] eta: 0:00:49 lr: 0.005912 min_lr: 0.005912 loss: 3.3029 (3.1735) weight_decay: 0.0500 (0.0500) time: 0.7026 data: 0.0007 max mem: 57114 Epoch: [115] [100/156] eta: 0:00:41 lr: 0.005910 min_lr: 0.005910 loss: 3.2135 (3.1877) weight_decay: 0.0500 (0.0500) time: 0.7016 data: 0.0007 max mem: 57114 Epoch: [115] [110/156] eta: 0:00:33 lr: 0.005907 min_lr: 0.005907 loss: 3.3497 (3.1928) weight_decay: 0.0500 (0.0500) time: 0.6961 data: 0.0006 max mem: 57114 Epoch: [115] [120/156] eta: 0:00:26 lr: 0.005905 min_lr: 0.005905 loss: 3.4831 (3.1915) weight_decay: 0.0500 (0.0500) time: 0.7036 data: 0.0007 max mem: 57114 Epoch: [115] [130/156] eta: 0:00:19 lr: 0.005902 min_lr: 0.005902 loss: 3.4831 (3.1980) weight_decay: 0.0500 (0.0500) time: 0.7077 data: 0.0007 max mem: 57114 Epoch: [115] [140/156] eta: 0:00:11 lr: 0.005900 min_lr: 0.005900 loss: 3.5050 (3.2124) weight_decay: 0.0500 (0.0500) time: 0.6909 data: 0.0004 max mem: 57114 Epoch: [115] [150/156] eta: 0:00:04 lr: 0.005897 min_lr: 0.005897 loss: 3.5050 (3.2229) weight_decay: 0.0500 (0.0500) time: 0.6880 data: 0.0001 max mem: 57114 Epoch: [115] [155/156] eta: 0:00:00 lr: 0.005896 min_lr: 0.005896 loss: 3.5010 (3.2360) weight_decay: 0.0500 (0.0500) time: 0.6908 data: 0.0001 max mem: 57114 Epoch: [115] Total time: 0:01:53 (0.7268 s / it) Averaged stats: lr: 0.005896 min_lr: 0.005896 loss: 3.5010 (3.2404) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.9813 (0.9813) acc1: 84.3750 (84.3750) acc5: 96.8750 (96.8750) time: 2.2500 data: 1.9939 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0555 (1.0774) acc1: 83.8542 (78.6445) acc5: 96.3542 (94.3734) time: 0.6012 data: 0.3989 max mem: 57114 Test: Total time: 0:00:03 (0.6224 s / it) * Acc@1 76.966 Acc@5 93.740 loss 1.179 Accuracy of the model on the 50000 test images: 77.0% Max accuracy: 77.51% Test: [0/5] eta: 0:00:11 loss: 5.1427 (5.1427) acc1: 17.1875 (17.1875) acc5: 29.1667 (29.1667) time: 2.2287 data: 1.9853 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 5.5735 (5.2154) acc1: 15.1042 (13.8107) acc5: 27.6042 (26.0870) time: 0.5965 data: 0.3972 max mem: 57114 Test: Total time: 0:00:03 (0.6107 s / it) * Acc@1 9.721 Acc@5 21.260 loss 5.685 Accuracy of the model EMA on 50000 test images: 9.7% Max EMA accuracy: 9.72% Epoch: [116] [ 0/156] eta: 0:07:48 lr: 0.005896 min_lr: 0.005896 loss: 3.7110 (3.7110) weight_decay: 0.0500 (0.0500) time: 3.0048 data: 2.3557 max mem: 57114 Epoch: [116] [ 10/156] eta: 0:02:15 lr: 0.005893 min_lr: 0.005893 loss: 3.4444 (3.3539) weight_decay: 0.0500 (0.0500) time: 0.9284 data: 0.2144 max mem: 57114 Epoch: [116] [ 20/156] eta: 0:01:52 lr: 0.005891 min_lr: 0.005891 loss: 3.3831 (3.2869) weight_decay: 0.0500 (0.0500) time: 0.7154 data: 0.0003 max mem: 57114 Epoch: [116] [ 30/156] eta: 0:01:39 lr: 0.005888 min_lr: 0.005888 loss: 3.3002 (3.2643) weight_decay: 0.0500 (0.0500) time: 0.7164 data: 0.0004 max mem: 57114 Epoch: [116] [ 40/156] eta: 0:01:30 lr: 0.005886 min_lr: 0.005886 loss: 3.2821 (3.2377) weight_decay: 0.0500 (0.0500) time: 0.7278 data: 0.0004 max mem: 57114 Epoch: [116] [ 50/156] eta: 0:01:21 lr: 0.005883 min_lr: 0.005883 loss: 3.2932 (3.2449) weight_decay: 0.0500 (0.0500) time: 0.7290 data: 0.0004 max mem: 57114 Epoch: [116] [ 60/156] eta: 0:01:13 lr: 0.005881 min_lr: 0.005881 loss: 3.3420 (3.2478) weight_decay: 0.0500 (0.0500) time: 0.7336 data: 0.0004 max mem: 57114 Epoch: [116] [ 70/156] eta: 0:01:04 lr: 0.005878 min_lr: 0.005878 loss: 3.3667 (3.2350) weight_decay: 0.0500 (0.0500) time: 0.7167 data: 0.0004 max mem: 57114 Epoch: [116] [ 80/156] eta: 0:00:56 lr: 0.005875 min_lr: 0.005875 loss: 3.3829 (3.2515) weight_decay: 0.0500 (0.0500) time: 0.6996 data: 0.0004 max mem: 57114 Epoch: [116] [ 90/156] eta: 0:00:48 lr: 0.005873 min_lr: 0.005873 loss: 3.3128 (3.2491) weight_decay: 0.0500 (0.0500) time: 0.6936 data: 0.0004 max mem: 57114 Epoch: [116] [100/156] eta: 0:00:41 lr: 0.005870 min_lr: 0.005870 loss: 3.3099 (3.2601) weight_decay: 0.0500 (0.0500) time: 0.6973 data: 0.0004 max mem: 57114 Epoch: [116] [110/156] eta: 0:00:33 lr: 0.005868 min_lr: 0.005868 loss: 3.3099 (3.2663) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0004 max mem: 57114 Epoch: [116] [120/156] eta: 0:00:26 lr: 0.005865 min_lr: 0.005865 loss: 3.2868 (3.2789) weight_decay: 0.0500 (0.0500) time: 0.7066 data: 0.0004 max mem: 57114 Epoch: [116] [130/156] eta: 0:00:18 lr: 0.005863 min_lr: 0.005863 loss: 3.3873 (3.2827) weight_decay: 0.0500 (0.0500) time: 0.7056 data: 0.0009 max mem: 57114 Epoch: [116] [140/156] eta: 0:00:11 lr: 0.005860 min_lr: 0.005860 loss: 3.4318 (3.2842) weight_decay: 0.0500 (0.0500) time: 0.6867 data: 0.0007 max mem: 57114 Epoch: [116] [150/156] eta: 0:00:04 lr: 0.005858 min_lr: 0.005858 loss: 3.4136 (3.2844) weight_decay: 0.0500 (0.0500) time: 0.6775 data: 0.0001 max mem: 57114 Epoch: [116] [155/156] eta: 0:00:00 lr: 0.005856 min_lr: 0.005856 loss: 3.4136 (3.2837) weight_decay: 0.0500 (0.0500) time: 0.6764 data: 0.0001 max mem: 57114 Epoch: [116] Total time: 0:01:52 (0.7236 s / it) Averaged stats: lr: 0.005856 min_lr: 0.005856 loss: 3.4136 (3.2386) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8826 (0.8826) acc1: 82.2917 (82.2917) acc5: 96.3542 (96.3542) time: 2.0410 data: 1.7847 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0555 (1.0212) acc1: 79.6875 (78.6445) acc5: 96.3542 (93.7340) time: 0.5590 data: 0.3570 max mem: 57114 Test: Total time: 0:00:02 (0.5820 s / it) * Acc@1 77.272 Acc@5 93.886 loss 1.117 Accuracy of the model on the 50000 test images: 77.3% Max accuracy: 77.51% Test: [0/5] eta: 0:00:11 loss: 4.8327 (4.8327) acc1: 19.2708 (19.2708) acc5: 36.4583 (36.4583) time: 2.3368 data: 2.0933 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 5.3535 (4.9554) acc1: 17.1875 (15.2174) acc5: 28.6458 (30.1790) time: 0.6180 data: 0.4188 max mem: 57114 Test: Total time: 0:00:03 (0.6307 s / it) * Acc@1 11.743 Acc@5 24.838 loss 5.417 Accuracy of the model EMA on 50000 test images: 11.7% Max EMA accuracy: 11.74% Epoch: [117] [ 0/156] eta: 0:08:03 lr: 0.005856 min_lr: 0.005856 loss: 3.6707 (3.6707) weight_decay: 0.0500 (0.0500) time: 3.0962 data: 2.4397 max mem: 57114 Epoch: [117] [ 10/156] eta: 0:02:18 lr: 0.005854 min_lr: 0.005854 loss: 3.3780 (3.4774) weight_decay: 0.0500 (0.0500) time: 0.9503 data: 0.2222 max mem: 57114 Epoch: [117] [ 20/156] eta: 0:01:53 lr: 0.005851 min_lr: 0.005851 loss: 3.4520 (3.4969) weight_decay: 0.0500 (0.0500) time: 0.7248 data: 0.0005 max mem: 57114 Epoch: [117] [ 30/156] eta: 0:01:41 lr: 0.005848 min_lr: 0.005848 loss: 3.4314 (3.4403) weight_decay: 0.0500 (0.0500) time: 0.7284 data: 0.0004 max mem: 57114 Epoch: [117] [ 40/156] eta: 0:01:31 lr: 0.005846 min_lr: 0.005846 loss: 3.2226 (3.3461) weight_decay: 0.0500 (0.0500) time: 0.7364 data: 0.0004 max mem: 57114 Epoch: [117] [ 50/156] eta: 0:01:21 lr: 0.005843 min_lr: 0.005843 loss: 3.2125 (3.3526) weight_decay: 0.0500 (0.0500) time: 0.7205 data: 0.0004 max mem: 57114 Epoch: [117] [ 60/156] eta: 0:01:13 lr: 0.005841 min_lr: 0.005841 loss: 3.3107 (3.3364) weight_decay: 0.0500 (0.0500) time: 0.7147 data: 0.0004 max mem: 57114 Epoch: [117] [ 70/156] eta: 0:01:05 lr: 0.005838 min_lr: 0.005838 loss: 3.4338 (3.3330) weight_decay: 0.0500 (0.0500) time: 0.7155 data: 0.0005 max mem: 57114 Epoch: [117] [ 80/156] eta: 0:00:56 lr: 0.005836 min_lr: 0.005836 loss: 3.2639 (3.3058) weight_decay: 0.0500 (0.0500) time: 0.7024 data: 0.0004 max mem: 57114 Epoch: [117] [ 90/156] eta: 0:00:49 lr: 0.005833 min_lr: 0.005833 loss: 3.2560 (3.2943) weight_decay: 0.0500 (0.0500) time: 0.7085 data: 0.0004 max mem: 57114 Epoch: [117] [100/156] eta: 0:00:41 lr: 0.005831 min_lr: 0.005831 loss: 3.1563 (3.2747) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0004 max mem: 57114 Epoch: [117] [110/156] eta: 0:00:34 lr: 0.005828 min_lr: 0.005828 loss: 3.1563 (3.2588) weight_decay: 0.0500 (0.0500) time: 0.7120 data: 0.0004 max mem: 57114 Epoch: [117] [120/156] eta: 0:00:26 lr: 0.005825 min_lr: 0.005825 loss: 3.2732 (3.2545) weight_decay: 0.0500 (0.0500) time: 0.7202 data: 0.0003 max mem: 57114 Epoch: [117] [130/156] eta: 0:00:19 lr: 0.005823 min_lr: 0.005823 loss: 3.3021 (3.2591) weight_decay: 0.0500 (0.0500) time: 0.7055 data: 0.0006 max mem: 57114 Epoch: [117] [140/156] eta: 0:00:11 lr: 0.005820 min_lr: 0.005820 loss: 3.4731 (3.2747) weight_decay: 0.0500 (0.0500) time: 0.6884 data: 0.0005 max mem: 57114 Epoch: [117] [150/156] eta: 0:00:04 lr: 0.005818 min_lr: 0.005818 loss: 3.4512 (3.2693) weight_decay: 0.0500 (0.0500) time: 0.6806 data: 0.0001 max mem: 57114 Epoch: [117] [155/156] eta: 0:00:00 lr: 0.005816 min_lr: 0.005816 loss: 3.3563 (3.2679) weight_decay: 0.0500 (0.0500) time: 0.6802 data: 0.0001 max mem: 57114 Epoch: [117] Total time: 0:01:53 (0.7285 s / it) Averaged stats: lr: 0.005816 min_lr: 0.005816 loss: 3.3563 (3.2324) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0545 (1.0545) acc1: 81.2500 (81.2500) acc5: 96.8750 (96.8750) time: 2.0881 data: 1.8323 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1459 (1.1415) acc1: 80.2083 (77.8772) acc5: 96.8750 (95.0128) time: 0.5683 data: 0.3665 max mem: 57114 Test: Total time: 0:00:02 (0.5913 s / it) * Acc@1 76.712 Acc@5 93.380 loss 1.250 Accuracy of the model on the 50000 test images: 76.7% Max accuracy: 77.51% Test: [0/5] eta: 0:00:11 loss: 4.5118 (4.5118) acc1: 20.8333 (20.8333) acc5: 40.6250 (40.6250) time: 2.2887 data: 2.0453 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 5.0047 (4.6880) acc1: 19.2708 (17.6471) acc5: 32.2917 (34.1432) time: 0.6084 data: 0.4091 max mem: 57114 Test: Total time: 0:00:03 (0.6205 s / it) * Acc@1 14.148 Acc@5 28.748 loss 5.140 Accuracy of the model EMA on 50000 test images: 14.1% Max EMA accuracy: 14.15% Epoch: [118] [ 0/156] eta: 0:07:11 lr: 0.005816 min_lr: 0.005816 loss: 3.2462 (3.2462) weight_decay: 0.0500 (0.0500) time: 2.7638 data: 2.1106 max mem: 57114 Epoch: [118] [ 10/156] eta: 0:02:12 lr: 0.005814 min_lr: 0.005814 loss: 3.6729 (3.3563) weight_decay: 0.0500 (0.0500) time: 0.9100 data: 0.1921 max mem: 57114 Epoch: [118] [ 20/156] eta: 0:01:52 lr: 0.005811 min_lr: 0.005811 loss: 3.2051 (3.2230) weight_decay: 0.0500 (0.0500) time: 0.7296 data: 0.0003 max mem: 57114 Epoch: [118] [ 30/156] eta: 0:01:40 lr: 0.005809 min_lr: 0.005809 loss: 3.2051 (3.1991) weight_decay: 0.0500 (0.0500) time: 0.7411 data: 0.0003 max mem: 57114 Epoch: [118] [ 40/156] eta: 0:01:30 lr: 0.005806 min_lr: 0.005806 loss: 3.1968 (3.1943) weight_decay: 0.0500 (0.0500) time: 0.7349 data: 0.0004 max mem: 57114 Epoch: [118] [ 50/156] eta: 0:01:21 lr: 0.005803 min_lr: 0.005803 loss: 3.0053 (3.1655) weight_decay: 0.0500 (0.0500) time: 0.7175 data: 0.0003 max mem: 57114 Epoch: [118] [ 60/156] eta: 0:01:13 lr: 0.005801 min_lr: 0.005801 loss: 2.9455 (3.1490) weight_decay: 0.0500 (0.0500) time: 0.7191 data: 0.0003 max mem: 57114 Epoch: [118] [ 70/156] eta: 0:01:04 lr: 0.005798 min_lr: 0.005798 loss: 3.2647 (3.2039) weight_decay: 0.0500 (0.0500) time: 0.7212 data: 0.0003 max mem: 57114 Epoch: [118] [ 80/156] eta: 0:00:57 lr: 0.005796 min_lr: 0.005796 loss: 3.4230 (3.2066) weight_decay: 0.0500 (0.0500) time: 0.7173 data: 0.0004 max mem: 57114 Epoch: [118] [ 90/156] eta: 0:00:49 lr: 0.005793 min_lr: 0.005793 loss: 3.2841 (3.2224) weight_decay: 0.0500 (0.0500) time: 0.7131 data: 0.0004 max mem: 57114 Epoch: [118] [100/156] eta: 0:00:41 lr: 0.005791 min_lr: 0.005791 loss: 3.3627 (3.2350) weight_decay: 0.0500 (0.0500) time: 0.7074 data: 0.0004 max mem: 57114 Epoch: [118] [110/156] eta: 0:00:33 lr: 0.005788 min_lr: 0.005788 loss: 3.4687 (3.2534) weight_decay: 0.0500 (0.0500) time: 0.7062 data: 0.0004 max mem: 57114 Epoch: [118] [120/156] eta: 0:00:26 lr: 0.005785 min_lr: 0.005785 loss: 3.3151 (3.2518) weight_decay: 0.0500 (0.0500) time: 0.7100 data: 0.0004 max mem: 57114 Epoch: [118] [130/156] eta: 0:00:19 lr: 0.005783 min_lr: 0.005783 loss: 3.1067 (3.2280) weight_decay: 0.0500 (0.0500) time: 0.7065 data: 0.0008 max mem: 57114 Epoch: [118] [140/156] eta: 0:00:11 lr: 0.005780 min_lr: 0.005780 loss: 2.7950 (3.2122) weight_decay: 0.0500 (0.0500) time: 0.6920 data: 0.0007 max mem: 57114 Epoch: [118] [150/156] eta: 0:00:04 lr: 0.005778 min_lr: 0.005778 loss: 3.3387 (3.2272) weight_decay: 0.0500 (0.0500) time: 0.6837 data: 0.0001 max mem: 57114 Epoch: [118] [155/156] eta: 0:00:00 lr: 0.005776 min_lr: 0.005776 loss: 3.3387 (3.2225) weight_decay: 0.0500 (0.0500) time: 0.6813 data: 0.0001 max mem: 57114 Epoch: [118] Total time: 0:01:53 (0.7277 s / it) Averaged stats: lr: 0.005776 min_lr: 0.005776 loss: 3.3387 (3.2341) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8750 (0.8750) acc1: 85.9375 (85.9375) acc5: 97.3958 (97.3958) time: 2.0215 data: 1.7658 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0395 (0.9685) acc1: 80.2083 (80.0512) acc5: 96.8750 (95.0128) time: 0.5549 data: 0.3532 max mem: 57114 Test: Total time: 0:00:02 (0.5752 s / it) * Acc@1 77.190 Acc@5 93.902 loss 1.103 Accuracy of the model on the 50000 test images: 77.2% Max accuracy: 77.51% Test: [0/5] eta: 0:00:11 loss: 4.2051 (4.2051) acc1: 22.9167 (22.9167) acc5: 45.8333 (45.8333) time: 2.2770 data: 2.0337 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 4.6836 (4.4350) acc1: 21.3542 (19.8210) acc5: 37.5000 (39.0026) time: 0.6060 data: 0.4068 max mem: 57114 Test: Total time: 0:00:03 (0.6198 s / it) * Acc@1 16.496 Acc@5 32.695 loss 4.874 Accuracy of the model EMA on 50000 test images: 16.5% Max EMA accuracy: 16.50% Epoch: [119] [ 0/156] eta: 0:07:44 lr: 0.005776 min_lr: 0.005776 loss: 2.4153 (2.4153) weight_decay: 0.0500 (0.0500) time: 2.9747 data: 2.3094 max mem: 57114 Epoch: [119] [ 10/156] eta: 0:02:17 lr: 0.005774 min_lr: 0.005774 loss: 3.1715 (3.0960) weight_decay: 0.0500 (0.0500) time: 0.9402 data: 0.2102 max mem: 57114 Epoch: [119] [ 20/156] eta: 0:01:53 lr: 0.005771 min_lr: 0.005771 loss: 3.1715 (3.1574) weight_decay: 0.0500 (0.0500) time: 0.7287 data: 0.0003 max mem: 57114 Epoch: [119] [ 30/156] eta: 0:01:40 lr: 0.005768 min_lr: 0.005768 loss: 3.1557 (3.1371) weight_decay: 0.0500 (0.0500) time: 0.7214 data: 0.0003 max mem: 57114 Epoch: [119] [ 40/156] eta: 0:01:30 lr: 0.005766 min_lr: 0.005766 loss: 3.1682 (3.1432) weight_decay: 0.0500 (0.0500) time: 0.7225 data: 0.0004 max mem: 57114 Epoch: [119] [ 50/156] eta: 0:01:21 lr: 0.005763 min_lr: 0.005763 loss: 3.3127 (3.1638) weight_decay: 0.0500 (0.0500) time: 0.7179 data: 0.0003 max mem: 57114 Epoch: [119] [ 60/156] eta: 0:01:13 lr: 0.005761 min_lr: 0.005761 loss: 3.3646 (3.1442) weight_decay: 0.0500 (0.0500) time: 0.7231 data: 0.0003 max mem: 57114 Epoch: [119] [ 70/156] eta: 0:01:04 lr: 0.005758 min_lr: 0.005758 loss: 3.0004 (3.1360) weight_decay: 0.0500 (0.0500) time: 0.7227 data: 0.0004 max mem: 57114 Epoch: [119] [ 80/156] eta: 0:00:56 lr: 0.005755 min_lr: 0.005755 loss: 3.3776 (3.1677) weight_decay: 0.0500 (0.0500) time: 0.7144 data: 0.0004 max mem: 57114 Epoch: [119] [ 90/156] eta: 0:00:49 lr: 0.005753 min_lr: 0.005753 loss: 3.3776 (3.1755) weight_decay: 0.0500 (0.0500) time: 0.7162 data: 0.0004 max mem: 57114 Epoch: [119] [100/156] eta: 0:00:41 lr: 0.005750 min_lr: 0.005750 loss: 3.3168 (3.1834) weight_decay: 0.0500 (0.0500) time: 0.7037 data: 0.0004 max mem: 57114 Epoch: [119] [110/156] eta: 0:00:33 lr: 0.005748 min_lr: 0.005748 loss: 3.1922 (3.1842) weight_decay: 0.0500 (0.0500) time: 0.6989 data: 0.0003 max mem: 57114 Epoch: [119] [120/156] eta: 0:00:26 lr: 0.005745 min_lr: 0.005745 loss: 3.2854 (3.1834) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [119] [130/156] eta: 0:00:19 lr: 0.005743 min_lr: 0.005743 loss: 3.4111 (3.2110) weight_decay: 0.0500 (0.0500) time: 0.7026 data: 0.0009 max mem: 57114 Epoch: [119] [140/156] eta: 0:00:11 lr: 0.005740 min_lr: 0.005740 loss: 3.3827 (3.2033) weight_decay: 0.0500 (0.0500) time: 0.6933 data: 0.0007 max mem: 57114 Epoch: [119] [150/156] eta: 0:00:04 lr: 0.005737 min_lr: 0.005737 loss: 2.9390 (3.1908) weight_decay: 0.0500 (0.0500) time: 0.6890 data: 0.0001 max mem: 57114 Epoch: [119] [155/156] eta: 0:00:00 lr: 0.005736 min_lr: 0.005736 loss: 2.9390 (3.1860) weight_decay: 0.0500 (0.0500) time: 0.6882 data: 0.0001 max mem: 57114 Epoch: [119] Total time: 0:01:53 (0.7274 s / it) Averaged stats: lr: 0.005736 min_lr: 0.005736 loss: 2.9390 (3.2214) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9294 (0.9294) acc1: 81.7708 (81.7708) acc5: 94.7917 (94.7917) time: 2.0440 data: 1.7865 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9969 (1.0052) acc1: 80.2083 (77.2379) acc5: 94.7917 (94.1177) time: 0.5597 data: 0.3574 max mem: 57114 Test: Total time: 0:00:02 (0.5799 s / it) * Acc@1 77.707 Acc@5 94.046 loss 1.066 Accuracy of the model on the 50000 test images: 77.7% Max accuracy: 77.71% Test: [0/5] eta: 0:00:10 loss: 3.9186 (3.9186) acc1: 26.5625 (26.5625) acc5: 50.5208 (50.5208) time: 2.0721 data: 1.8285 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 4.3908 (4.1958) acc1: 22.3958 (22.5064) acc5: 42.7083 (42.5831) time: 0.5650 data: 0.3658 max mem: 57114 Test: Total time: 0:00:02 (0.5776 s / it) * Acc@1 19.006 Acc@5 36.423 loss 4.624 Accuracy of the model EMA on 50000 test images: 19.0% Max EMA accuracy: 19.01% Epoch: [120] [ 0/156] eta: 0:06:17 lr: 0.005736 min_lr: 0.005736 loss: 3.3410 (3.3410) weight_decay: 0.0500 (0.0500) time: 2.4227 data: 1.7713 max mem: 57114 Epoch: [120] [ 10/156] eta: 0:02:07 lr: 0.005733 min_lr: 0.005733 loss: 3.3410 (3.2433) weight_decay: 0.0500 (0.0500) time: 0.8725 data: 0.1613 max mem: 57114 Epoch: [120] [ 20/156] eta: 0:01:49 lr: 0.005731 min_lr: 0.005731 loss: 3.1090 (3.1411) weight_decay: 0.0500 (0.0500) time: 0.7248 data: 0.0003 max mem: 57114 Epoch: [120] [ 30/156] eta: 0:01:38 lr: 0.005728 min_lr: 0.005728 loss: 3.1321 (3.1581) weight_decay: 0.0500 (0.0500) time: 0.7268 data: 0.0004 max mem: 57114 Epoch: [120] [ 40/156] eta: 0:01:29 lr: 0.005725 min_lr: 0.005725 loss: 3.1106 (3.1162) weight_decay: 0.0500 (0.0500) time: 0.7383 data: 0.0004 max mem: 57114 Epoch: [120] [ 50/156] eta: 0:01:21 lr: 0.005723 min_lr: 0.005723 loss: 3.2023 (3.1544) weight_decay: 0.0500 (0.0500) time: 0.7509 data: 0.0004 max mem: 57114 Epoch: [120] [ 60/156] eta: 0:01:13 lr: 0.005720 min_lr: 0.005720 loss: 3.3103 (3.1801) weight_decay: 0.0500 (0.0500) time: 0.7449 data: 0.0004 max mem: 57114 Epoch: [120] [ 70/156] eta: 0:01:04 lr: 0.005718 min_lr: 0.005718 loss: 3.2202 (3.1780) weight_decay: 0.0500 (0.0500) time: 0.7187 data: 0.0004 max mem: 57114 Epoch: [120] [ 80/156] eta: 0:00:57 lr: 0.005715 min_lr: 0.005715 loss: 3.3135 (3.2013) weight_decay: 0.0500 (0.0500) time: 0.7106 data: 0.0004 max mem: 57114 Epoch: [120] [ 90/156] eta: 0:00:49 lr: 0.005712 min_lr: 0.005712 loss: 3.3770 (3.2111) weight_decay: 0.0500 (0.0500) time: 0.7169 data: 0.0003 max mem: 57114 Epoch: [120] [100/156] eta: 0:00:41 lr: 0.005710 min_lr: 0.005710 loss: 3.2196 (3.2002) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0004 max mem: 57114 Epoch: [120] [110/156] eta: 0:00:34 lr: 0.005707 min_lr: 0.005707 loss: 3.1503 (3.1852) weight_decay: 0.0500 (0.0500) time: 0.7113 data: 0.0004 max mem: 57114 Epoch: [120] [120/156] eta: 0:00:26 lr: 0.005705 min_lr: 0.005705 loss: 3.2504 (3.1752) weight_decay: 0.0500 (0.0500) time: 0.7076 data: 0.0004 max mem: 57114 Epoch: [120] [130/156] eta: 0:00:19 lr: 0.005702 min_lr: 0.005702 loss: 3.1842 (3.1541) weight_decay: 0.0500 (0.0500) time: 0.6997 data: 0.0009 max mem: 57114 Epoch: [120] [140/156] eta: 0:00:11 lr: 0.005699 min_lr: 0.005699 loss: 3.1842 (3.1552) weight_decay: 0.0500 (0.0500) time: 0.6879 data: 0.0007 max mem: 57114 Epoch: [120] [150/156] eta: 0:00:04 lr: 0.005697 min_lr: 0.005697 loss: 3.3468 (3.1717) weight_decay: 0.0500 (0.0500) time: 0.6814 data: 0.0001 max mem: 57114 Epoch: [120] [155/156] eta: 0:00:00 lr: 0.005696 min_lr: 0.005696 loss: 3.4301 (3.1715) weight_decay: 0.0500 (0.0500) time: 0.6813 data: 0.0001 max mem: 57114 Epoch: [120] Total time: 0:01:53 (0.7276 s / it) Averaged stats: lr: 0.005696 min_lr: 0.005696 loss: 3.4301 (3.2249) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9290 (0.9290) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.0551 data: 1.7995 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0135 (1.0649) acc1: 81.2500 (78.3887) acc5: 95.8333 (94.6292) time: 0.5617 data: 0.3600 max mem: 57114 Test: Total time: 0:00:02 (0.5838 s / it) * Acc@1 77.851 Acc@5 94.224 loss 1.101 Accuracy of the model on the 50000 test images: 77.9% Max accuracy: 77.85% Test: [0/5] eta: 0:00:10 loss: 3.6501 (3.6501) acc1: 31.2500 (31.2500) acc5: 53.1250 (53.1250) time: 2.0186 data: 1.7751 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 4.1155 (3.9654) acc1: 25.0000 (25.5754) acc5: 47.3958 (46.4194) time: 0.5544 data: 0.3551 max mem: 57114 Test: Total time: 0:00:02 (0.5653 s / it) * Acc@1 21.429 Acc@5 40.123 loss 4.382 Accuracy of the model EMA on 50000 test images: 21.4% Max EMA accuracy: 21.43% Epoch: [121] [ 0/156] eta: 0:07:03 lr: 0.005695 min_lr: 0.005695 loss: 3.4073 (3.4073) weight_decay: 0.0500 (0.0500) time: 2.7163 data: 1.9855 max mem: 57114 Epoch: [121] [ 10/156] eta: 0:02:11 lr: 0.005693 min_lr: 0.005693 loss: 3.5053 (3.3765) weight_decay: 0.0500 (0.0500) time: 0.9027 data: 0.1808 max mem: 57114 Epoch: [121] [ 20/156] eta: 0:01:53 lr: 0.005690 min_lr: 0.005690 loss: 3.5053 (3.4763) weight_decay: 0.0500 (0.0500) time: 0.7367 data: 0.0004 max mem: 57114 Epoch: [121] [ 30/156] eta: 0:01:41 lr: 0.005687 min_lr: 0.005687 loss: 3.4155 (3.3870) weight_decay: 0.0500 (0.0500) time: 0.7474 data: 0.0004 max mem: 57114 Epoch: [121] [ 40/156] eta: 0:01:31 lr: 0.005685 min_lr: 0.005685 loss: 3.2233 (3.3280) weight_decay: 0.0500 (0.0500) time: 0.7376 data: 0.0004 max mem: 57114 Epoch: [121] [ 50/156] eta: 0:01:22 lr: 0.005682 min_lr: 0.005682 loss: 3.2309 (3.3297) weight_decay: 0.0500 (0.0500) time: 0.7378 data: 0.0004 max mem: 57114 Epoch: [121] [ 60/156] eta: 0:01:13 lr: 0.005680 min_lr: 0.005680 loss: 3.2993 (3.3032) weight_decay: 0.0500 (0.0500) time: 0.7287 data: 0.0004 max mem: 57114 Epoch: [121] [ 70/156] eta: 0:01:05 lr: 0.005677 min_lr: 0.005677 loss: 3.1367 (3.2622) weight_decay: 0.0500 (0.0500) time: 0.7225 data: 0.0004 max mem: 57114 Epoch: [121] [ 80/156] eta: 0:00:57 lr: 0.005674 min_lr: 0.005674 loss: 3.1367 (3.2320) weight_decay: 0.0500 (0.0500) time: 0.7252 data: 0.0004 max mem: 57114 Epoch: [121] [ 90/156] eta: 0:00:49 lr: 0.005672 min_lr: 0.005672 loss: 3.1858 (3.2303) weight_decay: 0.0500 (0.0500) time: 0.7133 data: 0.0004 max mem: 57114 Epoch: [121] [100/156] eta: 0:00:41 lr: 0.005669 min_lr: 0.005669 loss: 3.3783 (3.2493) weight_decay: 0.0500 (0.0500) time: 0.7130 data: 0.0003 max mem: 57114 Epoch: [121] [110/156] eta: 0:00:34 lr: 0.005667 min_lr: 0.005667 loss: 3.4843 (3.2356) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [121] [120/156] eta: 0:00:26 lr: 0.005664 min_lr: 0.005664 loss: 3.3207 (3.2321) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0004 max mem: 57114 Epoch: [121] [130/156] eta: 0:00:19 lr: 0.005661 min_lr: 0.005661 loss: 3.3968 (3.2308) weight_decay: 0.0500 (0.0500) time: 0.7029 data: 0.0009 max mem: 57114 Epoch: [121] [140/156] eta: 0:00:11 lr: 0.005659 min_lr: 0.005659 loss: 3.3968 (3.2369) weight_decay: 0.0500 (0.0500) time: 0.6914 data: 0.0007 max mem: 57114 Epoch: [121] [150/156] eta: 0:00:04 lr: 0.005656 min_lr: 0.005656 loss: 3.3293 (3.2319) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.0001 max mem: 57114 Epoch: [121] [155/156] eta: 0:00:00 lr: 0.005655 min_lr: 0.005655 loss: 3.3293 (3.2223) weight_decay: 0.0500 (0.0500) time: 0.6831 data: 0.0001 max mem: 57114 Epoch: [121] Total time: 0:01:54 (0.7309 s / it) Averaged stats: lr: 0.005655 min_lr: 0.005655 loss: 3.3293 (3.2155) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7952 (0.7952) acc1: 85.4167 (85.4167) acc5: 97.9167 (97.9167) time: 2.1141 data: 1.8583 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9399 (0.9621) acc1: 79.1667 (77.8772) acc5: 96.8750 (95.1407) time: 0.5736 data: 0.3717 max mem: 57114 Test: Total time: 0:00:02 (0.5959 s / it) * Acc@1 77.753 Acc@5 94.132 loss 1.044 Accuracy of the model on the 50000 test images: 77.8% Max accuracy: 77.85% Test: [0/5] eta: 0:00:11 loss: 3.3996 (3.3996) acc1: 34.3750 (34.3750) acc5: 56.7708 (56.7708) time: 2.3639 data: 2.1205 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 3.8537 (3.7477) acc1: 26.5625 (28.0051) acc5: 50.5208 (49.2327) time: 0.6234 data: 0.4242 max mem: 57114 Test: Total time: 0:00:03 (0.6410 s / it) * Acc@1 23.967 Acc@5 43.848 loss 4.152 Accuracy of the model EMA on 50000 test images: 24.0% Max EMA accuracy: 23.97% Epoch: [122] [ 0/156] eta: 0:07:13 lr: 0.005655 min_lr: 0.005655 loss: 3.6342 (3.6342) weight_decay: 0.0500 (0.0500) time: 2.7794 data: 2.1299 max mem: 57114 Epoch: [122] [ 10/156] eta: 0:02:14 lr: 0.005652 min_lr: 0.005652 loss: 2.8658 (2.8999) weight_decay: 0.0500 (0.0500) time: 0.9198 data: 0.1939 max mem: 57114 Epoch: [122] [ 20/156] eta: 0:01:52 lr: 0.005649 min_lr: 0.005649 loss: 3.1139 (3.0632) weight_decay: 0.0500 (0.0500) time: 0.7306 data: 0.0003 max mem: 57114 Epoch: [122] [ 30/156] eta: 0:01:39 lr: 0.005647 min_lr: 0.005647 loss: 3.2906 (3.1477) weight_decay: 0.0500 (0.0500) time: 0.7224 data: 0.0003 max mem: 57114 Epoch: [122] [ 40/156] eta: 0:01:29 lr: 0.005644 min_lr: 0.005644 loss: 3.2966 (3.2021) weight_decay: 0.0500 (0.0500) time: 0.7209 data: 0.0004 max mem: 57114 Epoch: [122] [ 50/156] eta: 0:01:21 lr: 0.005641 min_lr: 0.005641 loss: 3.5280 (3.2364) weight_decay: 0.0500 (0.0500) time: 0.7221 data: 0.0005 max mem: 57114 Epoch: [122] [ 60/156] eta: 0:01:12 lr: 0.005639 min_lr: 0.005639 loss: 3.2670 (3.1973) weight_decay: 0.0500 (0.0500) time: 0.7192 data: 0.0005 max mem: 57114 Epoch: [122] [ 70/156] eta: 0:01:04 lr: 0.005636 min_lr: 0.005636 loss: 3.2278 (3.2315) weight_decay: 0.0500 (0.0500) time: 0.7221 data: 0.0004 max mem: 57114 Epoch: [122] [ 80/156] eta: 0:00:56 lr: 0.005634 min_lr: 0.005634 loss: 3.4736 (3.2597) weight_decay: 0.0500 (0.0500) time: 0.7247 data: 0.0004 max mem: 57114 Epoch: [122] [ 90/156] eta: 0:00:49 lr: 0.005631 min_lr: 0.005631 loss: 3.3408 (3.2402) weight_decay: 0.0500 (0.0500) time: 0.7117 data: 0.0005 max mem: 57114 Epoch: [122] [100/156] eta: 0:00:41 lr: 0.005628 min_lr: 0.005628 loss: 3.0534 (3.2015) weight_decay: 0.0500 (0.0500) time: 0.6990 data: 0.0005 max mem: 57114 Epoch: [122] [110/156] eta: 0:00:33 lr: 0.005626 min_lr: 0.005626 loss: 3.0534 (3.1946) weight_decay: 0.0500 (0.0500) time: 0.7027 data: 0.0004 max mem: 57114 Epoch: [122] [120/156] eta: 0:00:26 lr: 0.005623 min_lr: 0.005623 loss: 3.1813 (3.1984) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0005 max mem: 57114 Epoch: [122] [130/156] eta: 0:00:19 lr: 0.005620 min_lr: 0.005620 loss: 3.3511 (3.1947) weight_decay: 0.0500 (0.0500) time: 0.7017 data: 0.0014 max mem: 57114 Epoch: [122] [140/156] eta: 0:00:11 lr: 0.005618 min_lr: 0.005618 loss: 3.4552 (3.2089) weight_decay: 0.0500 (0.0500) time: 0.6913 data: 0.0012 max mem: 57114 Epoch: [122] [150/156] eta: 0:00:04 lr: 0.005615 min_lr: 0.005615 loss: 3.5369 (3.2181) weight_decay: 0.0500 (0.0500) time: 0.6878 data: 0.0001 max mem: 57114 Epoch: [122] [155/156] eta: 0:00:00 lr: 0.005614 min_lr: 0.005614 loss: 3.4971 (3.2339) weight_decay: 0.0500 (0.0500) time: 0.6856 data: 0.0001 max mem: 57114 Epoch: [122] Total time: 0:01:53 (0.7260 s / it) Averaged stats: lr: 0.005614 min_lr: 0.005614 loss: 3.4971 (3.2154) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8712 (0.8712) acc1: 83.3333 (83.3333) acc5: 98.4375 (98.4375) time: 2.0781 data: 1.8222 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9839 (1.0192) acc1: 79.1667 (78.1330) acc5: 97.3958 (94.8849) time: 0.5663 data: 0.3645 max mem: 57114 Test: Total time: 0:00:02 (0.5883 s / it) * Acc@1 77.352 Acc@5 93.856 loss 1.101 Accuracy of the model on the 50000 test images: 77.4% Max accuracy: 77.85% Test: [0/5] eta: 0:00:11 loss: 3.1717 (3.1717) acc1: 36.9792 (36.9792) acc5: 63.5417 (63.5417) time: 2.3606 data: 2.1173 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 3.6089 (3.5412) acc1: 29.1667 (30.1790) acc5: 57.2917 (53.7084) time: 0.6228 data: 0.4236 max mem: 57114 Test: Total time: 0:00:03 (0.6365 s / it) * Acc@1 26.381 Acc@5 47.305 loss 3.934 Accuracy of the model EMA on 50000 test images: 26.4% Max EMA accuracy: 26.38% Epoch: [123] [ 0/156] eta: 0:07:07 lr: 0.005614 min_lr: 0.005614 loss: 3.3880 (3.3880) weight_decay: 0.0500 (0.0500) time: 2.7384 data: 2.0855 max mem: 57114 Epoch: [123] [ 10/156] eta: 0:02:14 lr: 0.005611 min_lr: 0.005611 loss: 3.1692 (3.1463) weight_decay: 0.0500 (0.0500) time: 0.9188 data: 0.1899 max mem: 57114 Epoch: [123] [ 20/156] eta: 0:01:51 lr: 0.005608 min_lr: 0.005608 loss: 3.1692 (3.2161) weight_decay: 0.0500 (0.0500) time: 0.7239 data: 0.0004 max mem: 57114 Epoch: [123] [ 30/156] eta: 0:01:39 lr: 0.005606 min_lr: 0.005606 loss: 3.5388 (3.2886) weight_decay: 0.0500 (0.0500) time: 0.7183 data: 0.0004 max mem: 57114 Epoch: [123] [ 40/156] eta: 0:01:30 lr: 0.005603 min_lr: 0.005603 loss: 3.4779 (3.3338) weight_decay: 0.0500 (0.0500) time: 0.7302 data: 0.0004 max mem: 57114 Epoch: [123] [ 50/156] eta: 0:01:21 lr: 0.005600 min_lr: 0.005600 loss: 3.4178 (3.3304) weight_decay: 0.0500 (0.0500) time: 0.7333 data: 0.0004 max mem: 57114 Epoch: [123] [ 60/156] eta: 0:01:13 lr: 0.005598 min_lr: 0.005598 loss: 3.3607 (3.3062) weight_decay: 0.0500 (0.0500) time: 0.7312 data: 0.0004 max mem: 57114 Epoch: [123] [ 70/156] eta: 0:01:04 lr: 0.005595 min_lr: 0.005595 loss: 3.0917 (3.2373) weight_decay: 0.0500 (0.0500) time: 0.7219 data: 0.0004 max mem: 57114 Epoch: [123] [ 80/156] eta: 0:00:56 lr: 0.005592 min_lr: 0.005592 loss: 2.9275 (3.2290) weight_decay: 0.0500 (0.0500) time: 0.7043 data: 0.0004 max mem: 57114 Epoch: [123] [ 90/156] eta: 0:00:49 lr: 0.005590 min_lr: 0.005590 loss: 3.3001 (3.2250) weight_decay: 0.0500 (0.0500) time: 0.7022 data: 0.0004 max mem: 57114 Epoch: [123] [100/156] eta: 0:00:41 lr: 0.005587 min_lr: 0.005587 loss: 3.2500 (3.2105) weight_decay: 0.0500 (0.0500) time: 0.7074 data: 0.0004 max mem: 57114 Epoch: [123] [110/156] eta: 0:00:33 lr: 0.005585 min_lr: 0.005585 loss: 3.2199 (3.2066) weight_decay: 0.0500 (0.0500) time: 0.7072 data: 0.0004 max mem: 57114 Epoch: [123] [120/156] eta: 0:00:26 lr: 0.005582 min_lr: 0.005582 loss: 3.2487 (3.2104) weight_decay: 0.0500 (0.0500) time: 0.7186 data: 0.0004 max mem: 57114 Epoch: [123] [130/156] eta: 0:00:19 lr: 0.005579 min_lr: 0.005579 loss: 3.2487 (3.2067) weight_decay: 0.0500 (0.0500) time: 0.7073 data: 0.0008 max mem: 57114 Epoch: [123] [140/156] eta: 0:00:11 lr: 0.005577 min_lr: 0.005577 loss: 3.1682 (3.2054) weight_decay: 0.0500 (0.0500) time: 0.6849 data: 0.0007 max mem: 57114 Epoch: [123] [150/156] eta: 0:00:04 lr: 0.005574 min_lr: 0.005574 loss: 3.1788 (3.2001) weight_decay: 0.0500 (0.0500) time: 0.6849 data: 0.0001 max mem: 57114 Epoch: [123] [155/156] eta: 0:00:00 lr: 0.005573 min_lr: 0.005573 loss: 3.2090 (3.2066) weight_decay: 0.0500 (0.0500) time: 0.6889 data: 0.0001 max mem: 57114 Epoch: [123] Total time: 0:01:53 (0.7272 s / it) Averaged stats: lr: 0.005573 min_lr: 0.005573 loss: 3.2090 (3.2146) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9214 (0.9214) acc1: 81.7708 (81.7708) acc5: 97.3958 (97.3958) time: 2.1101 data: 1.8538 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9833 (0.9550) acc1: 80.7292 (78.2609) acc5: 95.3125 (94.8849) time: 0.5728 data: 0.3708 max mem: 57114 Test: Total time: 0:00:02 (0.5971 s / it) * Acc@1 77.256 Acc@5 93.966 loss 1.066 Accuracy of the model on the 50000 test images: 77.3% Max accuracy: 77.85% Test: [0/5] eta: 0:00:11 loss: 2.9552 (2.9552) acc1: 40.1042 (40.1042) acc5: 67.7083 (67.7083) time: 2.2993 data: 2.0559 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 3.3822 (3.3423) acc1: 31.7708 (32.7366) acc5: 60.4167 (56.9054) time: 0.6106 data: 0.4113 max mem: 57114 Test: Total time: 0:00:03 (0.6238 s / it) * Acc@1 28.916 Acc@5 50.569 loss 3.726 Accuracy of the model EMA on 50000 test images: 28.9% Max EMA accuracy: 28.92% Epoch: [124] [ 0/156] eta: 0:07:13 lr: 0.005572 min_lr: 0.005572 loss: 3.4174 (3.4174) weight_decay: 0.0500 (0.0500) time: 2.7792 data: 2.1303 max mem: 57114 Epoch: [124] [ 10/156] eta: 0:02:10 lr: 0.005570 min_lr: 0.005570 loss: 3.3403 (3.2297) weight_decay: 0.0500 (0.0500) time: 0.8963 data: 0.1939 max mem: 57114 Epoch: [124] [ 20/156] eta: 0:01:50 lr: 0.005567 min_lr: 0.005567 loss: 3.3403 (3.2729) weight_decay: 0.0500 (0.0500) time: 0.7177 data: 0.0003 max mem: 57114 Epoch: [124] [ 30/156] eta: 0:01:39 lr: 0.005564 min_lr: 0.005564 loss: 3.1523 (3.1647) weight_decay: 0.0500 (0.0500) time: 0.7311 data: 0.0003 max mem: 57114 Epoch: [124] [ 40/156] eta: 0:01:29 lr: 0.005562 min_lr: 0.005562 loss: 3.0133 (3.1386) weight_decay: 0.0500 (0.0500) time: 0.7255 data: 0.0004 max mem: 57114 Epoch: [124] [ 50/156] eta: 0:01:20 lr: 0.005559 min_lr: 0.005559 loss: 3.0620 (3.0881) weight_decay: 0.0500 (0.0500) time: 0.7141 data: 0.0004 max mem: 57114 Epoch: [124] [ 60/156] eta: 0:01:12 lr: 0.005557 min_lr: 0.005557 loss: 3.0784 (3.1241) weight_decay: 0.0500 (0.0500) time: 0.7161 data: 0.0004 max mem: 57114 Epoch: [124] [ 70/156] eta: 0:01:04 lr: 0.005554 min_lr: 0.005554 loss: 3.4743 (3.1625) weight_decay: 0.0500 (0.0500) time: 0.7149 data: 0.0004 max mem: 57114 Epoch: [124] [ 80/156] eta: 0:00:56 lr: 0.005551 min_lr: 0.005551 loss: 3.5700 (3.2197) weight_decay: 0.0500 (0.0500) time: 0.7168 data: 0.0004 max mem: 57114 Epoch: [124] [ 90/156] eta: 0:00:48 lr: 0.005549 min_lr: 0.005549 loss: 3.4086 (3.2118) weight_decay: 0.0500 (0.0500) time: 0.7127 data: 0.0004 max mem: 57114 Epoch: [124] [100/156] eta: 0:00:41 lr: 0.005546 min_lr: 0.005546 loss: 3.3190 (3.2277) weight_decay: 0.0500 (0.0500) time: 0.7048 data: 0.0004 max mem: 57114 Epoch: [124] [110/156] eta: 0:00:33 lr: 0.005543 min_lr: 0.005543 loss: 3.5100 (3.2380) weight_decay: 0.0500 (0.0500) time: 0.7048 data: 0.0004 max mem: 57114 Epoch: [124] [120/156] eta: 0:00:26 lr: 0.005541 min_lr: 0.005541 loss: 3.3399 (3.2351) weight_decay: 0.0500 (0.0500) time: 0.7016 data: 0.0004 max mem: 57114 Epoch: [124] [130/156] eta: 0:00:18 lr: 0.005538 min_lr: 0.005538 loss: 3.2984 (3.2361) weight_decay: 0.0500 (0.0500) time: 0.6972 data: 0.0009 max mem: 57114 Epoch: [124] [140/156] eta: 0:00:11 lr: 0.005535 min_lr: 0.005535 loss: 3.4028 (3.2461) weight_decay: 0.0500 (0.0500) time: 0.6889 data: 0.0007 max mem: 57114 Epoch: [124] [150/156] eta: 0:00:04 lr: 0.005533 min_lr: 0.005533 loss: 3.4326 (3.2455) weight_decay: 0.0500 (0.0500) time: 0.6868 data: 0.0001 max mem: 57114 Epoch: [124] [155/156] eta: 0:00:00 lr: 0.005531 min_lr: 0.005531 loss: 3.3589 (3.2441) weight_decay: 0.0500 (0.0500) time: 0.6863 data: 0.0001 max mem: 57114 Epoch: [124] Total time: 0:01:52 (0.7234 s / it) Averaged stats: lr: 0.005531 min_lr: 0.005531 loss: 3.3589 (3.2101) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.9680 (0.9680) acc1: 86.9792 (86.9792) acc5: 97.3958 (97.3958) time: 2.2047 data: 1.9492 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1202 (1.1449) acc1: 78.6458 (79.7954) acc5: 97.3958 (94.6292) time: 0.5915 data: 0.3899 max mem: 57114 Test: Total time: 0:00:03 (0.6115 s / it) * Acc@1 77.829 Acc@5 94.142 loss 1.259 Accuracy of the model on the 50000 test images: 77.8% Max accuracy: 77.85% Test: [0/5] eta: 0:00:12 loss: 2.7563 (2.7563) acc1: 43.2292 (43.2292) acc5: 71.8750 (71.8750) time: 2.4822 data: 2.2387 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 3.1783 (3.1542) acc1: 34.8958 (35.2941) acc5: 63.5417 (59.8465) time: 0.6471 data: 0.4478 max mem: 57114 Test: Total time: 0:00:03 (0.6594 s / it) * Acc@1 31.314 Acc@5 53.736 loss 3.529 Accuracy of the model EMA on 50000 test images: 31.3% Max EMA accuracy: 31.31% Epoch: [125] [ 0/156] eta: 0:07:26 lr: 0.005531 min_lr: 0.005531 loss: 3.0864 (3.0864) weight_decay: 0.0500 (0.0500) time: 2.8619 data: 2.2075 max mem: 57114 Epoch: [125] [ 10/156] eta: 0:02:13 lr: 0.005528 min_lr: 0.005528 loss: 3.0864 (3.0006) weight_decay: 0.0500 (0.0500) time: 0.9121 data: 0.2009 max mem: 57114 Epoch: [125] [ 20/156] eta: 0:01:51 lr: 0.005526 min_lr: 0.005526 loss: 3.2645 (3.1157) weight_decay: 0.0500 (0.0500) time: 0.7162 data: 0.0003 max mem: 57114 Epoch: [125] [ 30/156] eta: 0:01:39 lr: 0.005523 min_lr: 0.005523 loss: 3.2645 (3.1341) weight_decay: 0.0500 (0.0500) time: 0.7258 data: 0.0003 max mem: 57114 Epoch: [125] [ 40/156] eta: 0:01:29 lr: 0.005520 min_lr: 0.005520 loss: 3.3568 (3.2049) weight_decay: 0.0500 (0.0500) time: 0.7229 data: 0.0004 max mem: 57114 Epoch: [125] [ 50/156] eta: 0:01:21 lr: 0.005518 min_lr: 0.005518 loss: 3.3219 (3.1898) weight_decay: 0.0500 (0.0500) time: 0.7284 data: 0.0004 max mem: 57114 Epoch: [125] [ 60/156] eta: 0:01:12 lr: 0.005515 min_lr: 0.005515 loss: 3.2714 (3.1875) weight_decay: 0.0500 (0.0500) time: 0.7338 data: 0.0004 max mem: 57114 Epoch: [125] [ 70/156] eta: 0:01:05 lr: 0.005512 min_lr: 0.005512 loss: 3.4745 (3.2480) weight_decay: 0.0500 (0.0500) time: 0.7283 data: 0.0004 max mem: 57114 Epoch: [125] [ 80/156] eta: 0:00:56 lr: 0.005510 min_lr: 0.005510 loss: 3.5404 (3.2588) weight_decay: 0.0500 (0.0500) time: 0.7189 data: 0.0004 max mem: 57114 Epoch: [125] [ 90/156] eta: 0:00:49 lr: 0.005507 min_lr: 0.005507 loss: 3.3392 (3.2371) weight_decay: 0.0500 (0.0500) time: 0.7065 data: 0.0004 max mem: 57114 Epoch: [125] [100/156] eta: 0:00:41 lr: 0.005504 min_lr: 0.005504 loss: 3.2377 (3.2283) weight_decay: 0.0500 (0.0500) time: 0.7113 data: 0.0004 max mem: 57114 Epoch: [125] [110/156] eta: 0:00:33 lr: 0.005502 min_lr: 0.005502 loss: 3.3526 (3.2420) weight_decay: 0.0500 (0.0500) time: 0.7081 data: 0.0004 max mem: 57114 Epoch: [125] [120/156] eta: 0:00:26 lr: 0.005499 min_lr: 0.005499 loss: 3.3812 (3.2440) weight_decay: 0.0500 (0.0500) time: 0.7126 data: 0.0004 max mem: 57114 Epoch: [125] [130/156] eta: 0:00:19 lr: 0.005496 min_lr: 0.005496 loss: 3.3812 (3.2505) weight_decay: 0.0500 (0.0500) time: 0.7069 data: 0.0008 max mem: 57114 Epoch: [125] [140/156] eta: 0:00:11 lr: 0.005494 min_lr: 0.005494 loss: 3.4270 (3.2470) weight_decay: 0.0500 (0.0500) time: 0.6917 data: 0.0007 max mem: 57114 Epoch: [125] [150/156] eta: 0:00:04 lr: 0.005491 min_lr: 0.005491 loss: 3.1040 (3.2312) weight_decay: 0.0500 (0.0500) time: 0.6883 data: 0.0001 max mem: 57114 Epoch: [125] [155/156] eta: 0:00:00 lr: 0.005490 min_lr: 0.005490 loss: 3.0640 (3.2227) weight_decay: 0.0500 (0.0500) time: 0.6831 data: 0.0001 max mem: 57114 Epoch: [125] Total time: 0:01:53 (0.7282 s / it) Averaged stats: lr: 0.005490 min_lr: 0.005490 loss: 3.0640 (3.2055) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7994 (0.7994) acc1: 83.8542 (83.8542) acc5: 97.3958 (97.3958) time: 2.0773 data: 1.8215 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8952 (0.9035) acc1: 79.6875 (80.0512) acc5: 96.3542 (93.8619) time: 0.5661 data: 0.3644 max mem: 57114 Test: Total time: 0:00:02 (0.5921 s / it) * Acc@1 78.203 Acc@5 94.443 loss 1.010 Accuracy of the model on the 50000 test images: 78.2% Max accuracy: 78.20% Test: [0/5] eta: 0:00:10 loss: 2.5731 (2.5731) acc1: 44.7917 (44.7917) acc5: 74.4792 (74.4792) time: 2.0528 data: 1.8093 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 2.9978 (2.9804) acc1: 39.0625 (38.2353) acc5: 65.6250 (62.1483) time: 0.5612 data: 0.3619 max mem: 57114 Test: Total time: 0:00:02 (0.5732 s / it) * Acc@1 33.758 Acc@5 56.624 loss 3.350 Accuracy of the model EMA on 50000 test images: 33.8% Max EMA accuracy: 33.76% Epoch: [126] [ 0/156] eta: 0:08:23 lr: 0.005489 min_lr: 0.005489 loss: 2.7455 (2.7455) weight_decay: 0.0500 (0.0500) time: 3.2281 data: 2.5795 max mem: 57114 Epoch: [126] [ 10/156] eta: 0:02:16 lr: 0.005487 min_lr: 0.005487 loss: 3.4330 (3.1263) weight_decay: 0.0500 (0.0500) time: 0.9367 data: 0.2347 max mem: 57114 Epoch: [126] [ 20/156] eta: 0:01:53 lr: 0.005484 min_lr: 0.005484 loss: 3.3097 (3.1746) weight_decay: 0.0500 (0.0500) time: 0.7140 data: 0.0003 max mem: 57114 Epoch: [126] [ 30/156] eta: 0:01:41 lr: 0.005481 min_lr: 0.005481 loss: 3.1832 (3.1534) weight_decay: 0.0500 (0.0500) time: 0.7363 data: 0.0003 max mem: 57114 Epoch: [126] [ 40/156] eta: 0:01:31 lr: 0.005479 min_lr: 0.005479 loss: 3.2290 (3.1666) weight_decay: 0.0500 (0.0500) time: 0.7444 data: 0.0004 max mem: 57114 Epoch: [126] [ 50/156] eta: 0:01:22 lr: 0.005476 min_lr: 0.005476 loss: 3.3707 (3.2053) weight_decay: 0.0500 (0.0500) time: 0.7374 data: 0.0004 max mem: 57114 Epoch: [126] [ 60/156] eta: 0:01:14 lr: 0.005473 min_lr: 0.005473 loss: 3.3211 (3.1621) weight_decay: 0.0500 (0.0500) time: 0.7480 data: 0.0004 max mem: 57114 Epoch: [126] [ 70/156] eta: 0:01:05 lr: 0.005471 min_lr: 0.005471 loss: 3.2061 (3.1963) weight_decay: 0.0500 (0.0500) time: 0.7324 data: 0.0004 max mem: 57114 Epoch: [126] [ 80/156] eta: 0:00:57 lr: 0.005468 min_lr: 0.005468 loss: 3.3601 (3.2029) weight_decay: 0.0500 (0.0500) time: 0.7072 data: 0.0004 max mem: 57114 Epoch: [126] [ 90/156] eta: 0:00:49 lr: 0.005465 min_lr: 0.005465 loss: 3.4123 (3.2075) weight_decay: 0.0500 (0.0500) time: 0.7018 data: 0.0004 max mem: 57114 Epoch: [126] [100/156] eta: 0:00:41 lr: 0.005463 min_lr: 0.005463 loss: 3.2928 (3.2002) weight_decay: 0.0500 (0.0500) time: 0.7046 data: 0.0003 max mem: 57114 Epoch: [126] [110/156] eta: 0:00:34 lr: 0.005460 min_lr: 0.005460 loss: 3.4460 (3.2234) weight_decay: 0.0500 (0.0500) time: 0.7171 data: 0.0003 max mem: 57114 Epoch: [126] [120/156] eta: 0:00:26 lr: 0.005457 min_lr: 0.005457 loss: 3.4077 (3.2039) weight_decay: 0.0500 (0.0500) time: 0.7082 data: 0.0004 max mem: 57114 Epoch: [126] [130/156] eta: 0:00:19 lr: 0.005455 min_lr: 0.005455 loss: 3.1595 (3.2059) weight_decay: 0.0500 (0.0500) time: 0.6943 data: 0.0009 max mem: 57114 Epoch: [126] [140/156] eta: 0:00:11 lr: 0.005452 min_lr: 0.005452 loss: 3.2204 (3.1923) weight_decay: 0.0500 (0.0500) time: 0.6888 data: 0.0007 max mem: 57114 Epoch: [126] [150/156] eta: 0:00:04 lr: 0.005449 min_lr: 0.005449 loss: 3.2204 (3.1926) weight_decay: 0.0500 (0.0500) time: 0.6892 data: 0.0001 max mem: 57114 Epoch: [126] [155/156] eta: 0:00:00 lr: 0.005448 min_lr: 0.005448 loss: 3.1203 (3.1886) weight_decay: 0.0500 (0.0500) time: 0.6927 data: 0.0001 max mem: 57114 Epoch: [126] Total time: 0:01:54 (0.7331 s / it) Averaged stats: lr: 0.005448 min_lr: 0.005448 loss: 3.1203 (3.2026) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8597 (0.8597) acc1: 81.7708 (81.7708) acc5: 96.8750 (96.8750) time: 2.1080 data: 1.8521 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9322 (0.9139) acc1: 79.6875 (77.7494) acc5: 95.3125 (94.7570) time: 0.5722 data: 0.3705 max mem: 57114 Test: Total time: 0:00:02 (0.5911 s / it) * Acc@1 78.207 Acc@5 94.463 loss 0.995 Accuracy of the model on the 50000 test images: 78.2% Max accuracy: 78.21% Test: [0/5] eta: 0:00:10 loss: 2.4107 (2.4107) acc1: 47.9167 (47.9167) acc5: 75.5208 (75.5208) time: 2.0265 data: 1.7829 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 2.8385 (2.8238) acc1: 40.1042 (39.8977) acc5: 68.7500 (64.8338) time: 0.5562 data: 0.3568 max mem: 57114 Test: Total time: 0:00:02 (0.5687 s / it) * Acc@1 35.955 Acc@5 59.275 loss 3.186 Accuracy of the model EMA on 50000 test images: 36.0% Max EMA accuracy: 35.96% Epoch: [127] [ 0/156] eta: 0:07:13 lr: 0.005448 min_lr: 0.005448 loss: 3.1202 (3.1202) weight_decay: 0.0500 (0.0500) time: 2.7809 data: 2.1179 max mem: 57114 Epoch: [127] [ 10/156] eta: 0:02:12 lr: 0.005445 min_lr: 0.005445 loss: 3.3376 (3.1800) weight_decay: 0.0500 (0.0500) time: 0.9088 data: 0.1929 max mem: 57114 Epoch: [127] [ 20/156] eta: 0:01:51 lr: 0.005442 min_lr: 0.005442 loss: 3.2205 (3.0701) weight_decay: 0.0500 (0.0500) time: 0.7230 data: 0.0004 max mem: 57114 Epoch: [127] [ 30/156] eta: 0:01:39 lr: 0.005440 min_lr: 0.005440 loss: 3.2422 (3.1039) weight_decay: 0.0500 (0.0500) time: 0.7255 data: 0.0003 max mem: 57114 Epoch: [127] [ 40/156] eta: 0:01:30 lr: 0.005437 min_lr: 0.005437 loss: 3.2987 (3.0458) weight_decay: 0.0500 (0.0500) time: 0.7329 data: 0.0004 max mem: 57114 Epoch: [127] [ 50/156] eta: 0:01:21 lr: 0.005434 min_lr: 0.005434 loss: 3.1676 (3.1070) weight_decay: 0.0500 (0.0500) time: 0.7434 data: 0.0004 max mem: 57114 Epoch: [127] [ 60/156] eta: 0:01:13 lr: 0.005432 min_lr: 0.005432 loss: 3.2970 (3.1231) weight_decay: 0.0500 (0.0500) time: 0.7442 data: 0.0004 max mem: 57114 Epoch: [127] [ 70/156] eta: 0:01:05 lr: 0.005429 min_lr: 0.005429 loss: 3.4689 (3.1573) weight_decay: 0.0500 (0.0500) time: 0.7407 data: 0.0004 max mem: 57114 Epoch: [127] [ 80/156] eta: 0:00:57 lr: 0.005426 min_lr: 0.005426 loss: 3.2919 (3.1640) weight_decay: 0.0500 (0.0500) time: 0.7312 data: 0.0004 max mem: 57114 Epoch: [127] [ 90/156] eta: 0:00:49 lr: 0.005424 min_lr: 0.005424 loss: 3.2601 (3.1852) weight_decay: 0.0500 (0.0500) time: 0.7107 data: 0.0004 max mem: 57114 Epoch: [127] [100/156] eta: 0:00:41 lr: 0.005421 min_lr: 0.005421 loss: 3.0685 (3.1703) weight_decay: 0.0500 (0.0500) time: 0.6963 data: 0.0004 max mem: 57114 Epoch: [127] [110/156] eta: 0:00:34 lr: 0.005418 min_lr: 0.005418 loss: 3.2083 (3.1865) weight_decay: 0.0500 (0.0500) time: 0.7023 data: 0.0004 max mem: 57114 Epoch: [127] [120/156] eta: 0:00:26 lr: 0.005416 min_lr: 0.005416 loss: 3.5054 (3.1970) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0004 max mem: 57114 Epoch: [127] [130/156] eta: 0:00:19 lr: 0.005413 min_lr: 0.005413 loss: 3.3679 (3.2009) weight_decay: 0.0500 (0.0500) time: 0.6995 data: 0.0009 max mem: 57114 Epoch: [127] [140/156] eta: 0:00:11 lr: 0.005410 min_lr: 0.005410 loss: 3.2107 (3.2006) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.0007 max mem: 57114 Epoch: [127] [150/156] eta: 0:00:04 lr: 0.005407 min_lr: 0.005407 loss: 3.2107 (3.1920) weight_decay: 0.0500 (0.0500) time: 0.6849 data: 0.0001 max mem: 57114 Epoch: [127] [155/156] eta: 0:00:00 lr: 0.005406 min_lr: 0.005406 loss: 3.3517 (3.2007) weight_decay: 0.0500 (0.0500) time: 0.6836 data: 0.0001 max mem: 57114 Epoch: [127] Total time: 0:01:53 (0.7303 s / it) Averaged stats: lr: 0.005406 min_lr: 0.005406 loss: 3.3517 (3.1962) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8597 (0.8597) acc1: 83.3333 (83.3333) acc5: 98.9583 (98.9583) time: 2.0389 data: 1.7834 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0050 (0.9792) acc1: 81.7708 (79.0281) acc5: 95.8333 (94.6292) time: 0.5585 data: 0.3567 max mem: 57114 Test: Total time: 0:00:02 (0.5788 s / it) * Acc@1 78.379 Acc@5 94.479 loss 1.088 Accuracy of the model on the 50000 test images: 78.4% Max accuracy: 78.38% Test: [0/5] eta: 0:00:11 loss: 2.2632 (2.2632) acc1: 50.5208 (50.5208) acc5: 78.1250 (78.1250) time: 2.2054 data: 1.9619 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 2.6951 (2.6783) acc1: 42.1875 (42.0716) acc5: 71.3542 (67.0077) time: 0.5917 data: 0.3925 max mem: 57114 Test: Total time: 0:00:03 (0.6061 s / it) * Acc@1 37.990 Acc@5 61.813 loss 3.034 Accuracy of the model EMA on 50000 test images: 38.0% Max EMA accuracy: 37.99% Epoch: [128] [ 0/156] eta: 0:07:33 lr: 0.005406 min_lr: 0.005406 loss: 3.4155 (3.4155) weight_decay: 0.0500 (0.0500) time: 2.9102 data: 2.2600 max mem: 57114 Epoch: [128] [ 10/156] eta: 0:02:16 lr: 0.005403 min_lr: 0.005403 loss: 2.9810 (3.1389) weight_decay: 0.0500 (0.0500) time: 0.9350 data: 0.2057 max mem: 57114 Epoch: [128] [ 20/156] eta: 0:01:54 lr: 0.005400 min_lr: 0.005400 loss: 3.0561 (3.1474) weight_decay: 0.0500 (0.0500) time: 0.7408 data: 0.0003 max mem: 57114 Epoch: [128] [ 30/156] eta: 0:01:41 lr: 0.005398 min_lr: 0.005398 loss: 3.0944 (3.1231) weight_decay: 0.0500 (0.0500) time: 0.7322 data: 0.0003 max mem: 57114 Epoch: [128] [ 40/156] eta: 0:01:31 lr: 0.005395 min_lr: 0.005395 loss: 3.0944 (3.1388) weight_decay: 0.0500 (0.0500) time: 0.7372 data: 0.0004 max mem: 57114 Epoch: [128] [ 50/156] eta: 0:01:22 lr: 0.005392 min_lr: 0.005392 loss: 3.2982 (3.1738) weight_decay: 0.0500 (0.0500) time: 0.7427 data: 0.0004 max mem: 57114 Epoch: [128] [ 60/156] eta: 0:01:14 lr: 0.005390 min_lr: 0.005390 loss: 3.3593 (3.1693) weight_decay: 0.0500 (0.0500) time: 0.7372 data: 0.0004 max mem: 57114 Epoch: [128] [ 70/156] eta: 0:01:05 lr: 0.005387 min_lr: 0.005387 loss: 3.3170 (3.1987) weight_decay: 0.0500 (0.0500) time: 0.7334 data: 0.0004 max mem: 57114 Epoch: [128] [ 80/156] eta: 0:00:57 lr: 0.005384 min_lr: 0.005384 loss: 3.3460 (3.2176) weight_decay: 0.0500 (0.0500) time: 0.7209 data: 0.0004 max mem: 57114 Epoch: [128] [ 90/156] eta: 0:00:49 lr: 0.005382 min_lr: 0.005382 loss: 3.3033 (3.2097) weight_decay: 0.0500 (0.0500) time: 0.7105 data: 0.0004 max mem: 57114 Epoch: [128] [100/156] eta: 0:00:42 lr: 0.005379 min_lr: 0.005379 loss: 3.3596 (3.2222) weight_decay: 0.0500 (0.0500) time: 0.7061 data: 0.0004 max mem: 57114 Epoch: [128] [110/156] eta: 0:00:34 lr: 0.005376 min_lr: 0.005376 loss: 3.5529 (3.2437) weight_decay: 0.0500 (0.0500) time: 0.7132 data: 0.0004 max mem: 57114 Epoch: [128] [120/156] eta: 0:00:26 lr: 0.005373 min_lr: 0.005373 loss: 3.2476 (3.2343) weight_decay: 0.0500 (0.0500) time: 0.7175 data: 0.0004 max mem: 57114 Epoch: [128] [130/156] eta: 0:00:19 lr: 0.005371 min_lr: 0.005371 loss: 3.3170 (3.2531) weight_decay: 0.0500 (0.0500) time: 0.7065 data: 0.0008 max mem: 57114 Epoch: [128] [140/156] eta: 0:00:11 lr: 0.005368 min_lr: 0.005368 loss: 3.3911 (3.2449) weight_decay: 0.0500 (0.0500) time: 0.6877 data: 0.0007 max mem: 57114 Epoch: [128] [150/156] eta: 0:00:04 lr: 0.005365 min_lr: 0.005365 loss: 3.1131 (3.2274) weight_decay: 0.0500 (0.0500) time: 0.6854 data: 0.0001 max mem: 57114 Epoch: [128] [155/156] eta: 0:00:00 lr: 0.005364 min_lr: 0.005364 loss: 3.1131 (3.2194) weight_decay: 0.0500 (0.0500) time: 0.6879 data: 0.0001 max mem: 57114 Epoch: [128] Total time: 0:01:54 (0.7342 s / it) Averaged stats: lr: 0.005364 min_lr: 0.005364 loss: 3.1131 (3.1960) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8612 (0.8612) acc1: 83.8542 (83.8542) acc5: 96.8750 (96.8750) time: 2.0788 data: 1.8225 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0472 (0.9972) acc1: 80.7292 (79.1560) acc5: 96.3542 (94.8849) time: 0.5665 data: 0.3646 max mem: 57114 Test: Total time: 0:00:02 (0.5909 s / it) * Acc@1 77.899 Acc@5 94.246 loss 1.072 Accuracy of the model on the 50000 test images: 77.9% Max accuracy: 78.38% Test: [0/5] eta: 0:00:11 loss: 2.1291 (2.1291) acc1: 54.6875 (54.6875) acc5: 79.1667 (79.1667) time: 2.3852 data: 2.1417 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 2.5642 (2.5435) acc1: 45.8333 (45.1407) acc5: 73.4375 (68.6701) time: 0.6277 data: 0.4284 max mem: 57114 Test: Total time: 0:00:03 (0.6399 s / it) * Acc@1 40.185 Acc@5 64.146 loss 2.892 Accuracy of the model EMA on 50000 test images: 40.2% Max EMA accuracy: 40.19% Epoch: [129] [ 0/156] eta: 0:07:07 lr: 0.005364 min_lr: 0.005364 loss: 3.8080 (3.8080) weight_decay: 0.0500 (0.0500) time: 2.7385 data: 2.0862 max mem: 57114 Epoch: [129] [ 10/156] eta: 0:02:14 lr: 0.005361 min_lr: 0.005361 loss: 3.2138 (3.1780) weight_decay: 0.0500 (0.0500) time: 0.9202 data: 0.1899 max mem: 57114 Epoch: [129] [ 20/156] eta: 0:01:51 lr: 0.005358 min_lr: 0.005358 loss: 3.1525 (3.1559) weight_decay: 0.0500 (0.0500) time: 0.7235 data: 0.0003 max mem: 57114 Epoch: [129] [ 30/156] eta: 0:01:39 lr: 0.005356 min_lr: 0.005356 loss: 3.1772 (3.1789) weight_decay: 0.0500 (0.0500) time: 0.7136 data: 0.0003 max mem: 57114 Epoch: [129] [ 40/156] eta: 0:01:29 lr: 0.005353 min_lr: 0.005353 loss: 3.2351 (3.1768) weight_decay: 0.0500 (0.0500) time: 0.7170 data: 0.0003 max mem: 57114 Epoch: [129] [ 50/156] eta: 0:01:20 lr: 0.005350 min_lr: 0.005350 loss: 3.3275 (3.1978) weight_decay: 0.0500 (0.0500) time: 0.7245 data: 0.0004 max mem: 57114 Epoch: [129] [ 60/156] eta: 0:01:12 lr: 0.005347 min_lr: 0.005347 loss: 3.0467 (3.1208) weight_decay: 0.0500 (0.0500) time: 0.7253 data: 0.0004 max mem: 57114 Epoch: [129] [ 70/156] eta: 0:01:04 lr: 0.005345 min_lr: 0.005345 loss: 2.8959 (3.1326) weight_decay: 0.0500 (0.0500) time: 0.7159 data: 0.0004 max mem: 57114 Epoch: [129] [ 80/156] eta: 0:00:56 lr: 0.005342 min_lr: 0.005342 loss: 3.1724 (3.1410) weight_decay: 0.0500 (0.0500) time: 0.7121 data: 0.0004 max mem: 57114 Epoch: [129] [ 90/156] eta: 0:00:48 lr: 0.005339 min_lr: 0.005339 loss: 3.2980 (3.1418) weight_decay: 0.0500 (0.0500) time: 0.7033 data: 0.0004 max mem: 57114 Epoch: [129] [100/156] eta: 0:00:41 lr: 0.005337 min_lr: 0.005337 loss: 3.2193 (3.1415) weight_decay: 0.0500 (0.0500) time: 0.7045 data: 0.0004 max mem: 57114 Epoch: [129] [110/156] eta: 0:00:33 lr: 0.005334 min_lr: 0.005334 loss: 3.2340 (3.1422) weight_decay: 0.0500 (0.0500) time: 0.7058 data: 0.0004 max mem: 57114 Epoch: [129] [120/156] eta: 0:00:26 lr: 0.005331 min_lr: 0.005331 loss: 3.4618 (3.1592) weight_decay: 0.0500 (0.0500) time: 0.7148 data: 0.0004 max mem: 57114 Epoch: [129] [130/156] eta: 0:00:18 lr: 0.005329 min_lr: 0.005329 loss: 3.2008 (3.1390) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0009 max mem: 57114 Epoch: [129] [140/156] eta: 0:00:11 lr: 0.005326 min_lr: 0.005326 loss: 3.0633 (3.1498) weight_decay: 0.0500 (0.0500) time: 0.6904 data: 0.0007 max mem: 57114 Epoch: [129] [150/156] eta: 0:00:04 lr: 0.005323 min_lr: 0.005323 loss: 3.2466 (3.1509) weight_decay: 0.0500 (0.0500) time: 0.6883 data: 0.0001 max mem: 57114 Epoch: [129] [155/156] eta: 0:00:00 lr: 0.005322 min_lr: 0.005322 loss: 3.2466 (3.1561) weight_decay: 0.0500 (0.0500) time: 0.6851 data: 0.0001 max mem: 57114 Epoch: [129] Total time: 0:01:53 (0.7246 s / it) Averaged stats: lr: 0.005322 min_lr: 0.005322 loss: 3.2466 (3.1951) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.9103 (0.9103) acc1: 87.5000 (87.5000) acc5: 97.9167 (97.9167) time: 2.2171 data: 1.9576 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0562 (1.0549) acc1: 81.7708 (80.4348) acc5: 96.3542 (94.8849) time: 0.5944 data: 0.3917 max mem: 57114 Test: Total time: 0:00:03 (0.6183 s / it) * Acc@1 78.249 Acc@5 94.230 loss 1.146 Accuracy of the model on the 50000 test images: 78.2% Max accuracy: 78.38% Test: [0/5] eta: 0:00:11 loss: 2.0128 (2.0128) acc1: 57.8125 (57.8125) acc5: 80.2083 (80.2083) time: 2.3180 data: 2.0745 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 2.4495 (2.4234) acc1: 47.3958 (47.0588) acc5: 74.4792 (70.3325) time: 0.6143 data: 0.4150 max mem: 57114 Test: Total time: 0:00:03 (0.6287 s / it) * Acc@1 42.074 Acc@5 66.312 loss 2.763 Accuracy of the model EMA on 50000 test images: 42.1% Max EMA accuracy: 42.07% Epoch: [130] [ 0/156] eta: 0:06:10 lr: 0.005321 min_lr: 0.005321 loss: 3.6640 (3.6640) weight_decay: 0.0500 (0.0500) time: 2.3742 data: 1.7192 max mem: 57114 Epoch: [130] [ 10/156] eta: 0:02:05 lr: 0.005319 min_lr: 0.005319 loss: 3.5940 (3.3331) weight_decay: 0.0500 (0.0500) time: 0.8599 data: 0.1565 max mem: 57114 Epoch: [130] [ 20/156] eta: 0:01:49 lr: 0.005316 min_lr: 0.005316 loss: 3.4676 (3.3104) weight_decay: 0.0500 (0.0500) time: 0.7298 data: 0.0003 max mem: 57114 Epoch: [130] [ 30/156] eta: 0:01:38 lr: 0.005313 min_lr: 0.005313 loss: 3.3589 (3.2398) weight_decay: 0.0500 (0.0500) time: 0.7389 data: 0.0003 max mem: 57114 Epoch: [130] [ 40/156] eta: 0:01:29 lr: 0.005311 min_lr: 0.005311 loss: 3.3071 (3.2324) weight_decay: 0.0500 (0.0500) time: 0.7258 data: 0.0003 max mem: 57114 Epoch: [130] [ 50/156] eta: 0:01:20 lr: 0.005308 min_lr: 0.005308 loss: 3.1167 (3.2077) weight_decay: 0.0500 (0.0500) time: 0.7314 data: 0.0004 max mem: 57114 Epoch: [130] [ 60/156] eta: 0:01:12 lr: 0.005305 min_lr: 0.005305 loss: 3.1600 (3.2302) weight_decay: 0.0500 (0.0500) time: 0.7247 data: 0.0004 max mem: 57114 Epoch: [130] [ 70/156] eta: 0:01:04 lr: 0.005302 min_lr: 0.005302 loss: 3.0815 (3.1812) weight_decay: 0.0500 (0.0500) time: 0.7029 data: 0.0004 max mem: 57114 Epoch: [130] [ 80/156] eta: 0:00:56 lr: 0.005300 min_lr: 0.005300 loss: 2.8395 (3.1487) weight_decay: 0.0500 (0.0500) time: 0.6972 data: 0.0004 max mem: 57114 Epoch: [130] [ 90/156] eta: 0:00:48 lr: 0.005297 min_lr: 0.005297 loss: 3.1462 (3.1782) weight_decay: 0.0500 (0.0500) time: 0.7106 data: 0.0003 max mem: 57114 Epoch: [130] [100/156] eta: 0:00:41 lr: 0.005294 min_lr: 0.005294 loss: 3.3612 (3.1846) weight_decay: 0.0500 (0.0500) time: 0.7239 data: 0.0004 max mem: 57114 Epoch: [130] [110/156] eta: 0:00:33 lr: 0.005292 min_lr: 0.005292 loss: 3.3511 (3.1963) weight_decay: 0.0500 (0.0500) time: 0.7193 data: 0.0004 max mem: 57114 Epoch: [130] [120/156] eta: 0:00:26 lr: 0.005289 min_lr: 0.005289 loss: 3.3471 (3.1848) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [130] [130/156] eta: 0:00:18 lr: 0.005286 min_lr: 0.005286 loss: 3.1380 (3.1795) weight_decay: 0.0500 (0.0500) time: 0.7003 data: 0.0008 max mem: 57114 Epoch: [130] [140/156] eta: 0:00:11 lr: 0.005283 min_lr: 0.005283 loss: 3.4206 (3.1939) weight_decay: 0.0500 (0.0500) time: 0.6851 data: 0.0007 max mem: 57114 Epoch: [130] [150/156] eta: 0:00:04 lr: 0.005281 min_lr: 0.005281 loss: 3.4505 (3.2040) weight_decay: 0.0500 (0.0500) time: 0.6785 data: 0.0001 max mem: 57114 Epoch: [130] [155/156] eta: 0:00:00 lr: 0.005279 min_lr: 0.005279 loss: 3.4262 (3.2019) weight_decay: 0.0500 (0.0500) time: 0.6829 data: 0.0001 max mem: 57114 Epoch: [130] Total time: 0:01:52 (0.7238 s / it) Averaged stats: lr: 0.005279 min_lr: 0.005279 loss: 3.4262 (3.1854) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8130 (0.8130) acc1: 84.3750 (84.3750) acc5: 97.9167 (97.9167) time: 2.0427 data: 1.7868 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9661 (0.9490) acc1: 79.6875 (78.5166) acc5: 96.3542 (94.3734) time: 0.5592 data: 0.3574 max mem: 57114 Test: Total time: 0:00:02 (0.5834 s / it) * Acc@1 77.709 Acc@5 93.978 loss 1.038 Accuracy of the model on the 50000 test images: 77.7% Max accuracy: 78.38% Test: [0/5] eta: 0:00:11 loss: 1.9109 (1.9109) acc1: 58.8542 (58.8542) acc5: 80.2083 (80.2083) time: 2.3074 data: 2.0638 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 2.3469 (2.3149) acc1: 48.4375 (47.9540) acc5: 76.5625 (71.9949) time: 0.6122 data: 0.4128 max mem: 57114 Test: Total time: 0:00:03 (0.6239 s / it) * Acc@1 43.914 Acc@5 68.264 loss 2.647 Accuracy of the model EMA on 50000 test images: 43.9% Max EMA accuracy: 43.91% Epoch: [131] [ 0/156] eta: 0:06:48 lr: 0.005279 min_lr: 0.005279 loss: 3.0472 (3.0472) weight_decay: 0.0500 (0.0500) time: 2.6203 data: 1.8317 max mem: 57114 Epoch: [131] [ 10/156] eta: 0:02:09 lr: 0.005276 min_lr: 0.005276 loss: 3.0303 (2.9676) weight_decay: 0.0500 (0.0500) time: 0.8869 data: 0.1668 max mem: 57114 Epoch: [131] [ 20/156] eta: 0:01:50 lr: 0.005274 min_lr: 0.005274 loss: 3.2569 (3.1148) weight_decay: 0.0500 (0.0500) time: 0.7243 data: 0.0003 max mem: 57114 Epoch: [131] [ 30/156] eta: 0:01:39 lr: 0.005271 min_lr: 0.005271 loss: 3.3658 (3.1377) weight_decay: 0.0500 (0.0500) time: 0.7315 data: 0.0003 max mem: 57114 Epoch: [131] [ 40/156] eta: 0:01:29 lr: 0.005268 min_lr: 0.005268 loss: 3.2489 (3.1813) weight_decay: 0.0500 (0.0500) time: 0.7240 data: 0.0004 max mem: 57114 Epoch: [131] [ 50/156] eta: 0:01:20 lr: 0.005265 min_lr: 0.005265 loss: 3.2577 (3.2089) weight_decay: 0.0500 (0.0500) time: 0.7141 data: 0.0004 max mem: 57114 Epoch: [131] [ 60/156] eta: 0:01:12 lr: 0.005263 min_lr: 0.005263 loss: 3.2432 (3.2003) weight_decay: 0.0500 (0.0500) time: 0.7153 data: 0.0004 max mem: 57114 Epoch: [131] [ 70/156] eta: 0:01:04 lr: 0.005260 min_lr: 0.005260 loss: 3.2523 (3.1842) weight_decay: 0.0500 (0.0500) time: 0.7210 data: 0.0004 max mem: 57114 Epoch: [131] [ 80/156] eta: 0:00:56 lr: 0.005257 min_lr: 0.005257 loss: 3.2767 (3.1947) weight_decay: 0.0500 (0.0500) time: 0.7162 data: 0.0003 max mem: 57114 Epoch: [131] [ 90/156] eta: 0:00:48 lr: 0.005254 min_lr: 0.005254 loss: 3.3351 (3.2033) weight_decay: 0.0500 (0.0500) time: 0.7144 data: 0.0003 max mem: 57114 Epoch: [131] [100/156] eta: 0:00:41 lr: 0.005252 min_lr: 0.005252 loss: 3.4480 (3.2002) weight_decay: 0.0500 (0.0500) time: 0.7132 data: 0.0003 max mem: 57114 Epoch: [131] [110/156] eta: 0:00:33 lr: 0.005249 min_lr: 0.005249 loss: 3.3529 (3.2138) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.0004 max mem: 57114 Epoch: [131] [120/156] eta: 0:00:26 lr: 0.005246 min_lr: 0.005246 loss: 3.1567 (3.1965) weight_decay: 0.0500 (0.0500) time: 0.7037 data: 0.0004 max mem: 57114 Epoch: [131] [130/156] eta: 0:00:18 lr: 0.005244 min_lr: 0.005244 loss: 3.1326 (3.2034) weight_decay: 0.0500 (0.0500) time: 0.6952 data: 0.0009 max mem: 57114 Epoch: [131] [140/156] eta: 0:00:11 lr: 0.005241 min_lr: 0.005241 loss: 3.3101 (3.2088) weight_decay: 0.0500 (0.0500) time: 0.6839 data: 0.0008 max mem: 57114 Epoch: [131] [150/156] eta: 0:00:04 lr: 0.005238 min_lr: 0.005238 loss: 3.4211 (3.2213) weight_decay: 0.0500 (0.0500) time: 0.6862 data: 0.0001 max mem: 57114 Epoch: [131] [155/156] eta: 0:00:00 lr: 0.005237 min_lr: 0.005237 loss: 3.4076 (3.2197) weight_decay: 0.0500 (0.0500) time: 0.6886 data: 0.0001 max mem: 57114 Epoch: [131] Total time: 0:01:52 (0.7240 s / it) Averaged stats: lr: 0.005237 min_lr: 0.005237 loss: 3.4076 (3.1837) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8272 (0.8272) acc1: 84.8958 (84.8958) acc5: 96.3542 (96.3542) time: 2.1725 data: 1.9170 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8848 (0.9353) acc1: 80.7292 (77.2379) acc5: 96.3542 (94.7570) time: 0.5852 data: 0.3835 max mem: 57114 Test: Total time: 0:00:03 (0.6060 s / it) * Acc@1 77.939 Acc@5 94.122 loss 1.020 Accuracy of the model on the 50000 test images: 77.9% Max accuracy: 78.38% Test: [0/5] eta: 0:00:11 loss: 1.8143 (1.8143) acc1: 60.9375 (60.9375) acc5: 81.2500 (81.2500) time: 2.2638 data: 2.0202 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 2.2482 (2.2103) acc1: 52.6042 (49.8721) acc5: 78.1250 (73.6573) time: 0.6034 data: 0.4041 max mem: 57114 Test: Total time: 0:00:03 (0.6154 s / it) * Acc@1 45.776 Acc@5 70.097 loss 2.535 Accuracy of the model EMA on 50000 test images: 45.8% Max EMA accuracy: 45.78% Epoch: [132] [ 0/156] eta: 0:06:38 lr: 0.005236 min_lr: 0.005236 loss: 3.5424 (3.5424) weight_decay: 0.0500 (0.0500) time: 2.5521 data: 1.9048 max mem: 57114 Epoch: [132] [ 10/156] eta: 0:02:09 lr: 0.005234 min_lr: 0.005234 loss: 3.5279 (3.3280) weight_decay: 0.0500 (0.0500) time: 0.8853 data: 0.1734 max mem: 57114 Epoch: [132] [ 20/156] eta: 0:01:51 lr: 0.005231 min_lr: 0.005231 loss: 3.3321 (3.3592) weight_decay: 0.0500 (0.0500) time: 0.7308 data: 0.0003 max mem: 57114 Epoch: [132] [ 30/156] eta: 0:01:38 lr: 0.005228 min_lr: 0.005228 loss: 3.3321 (3.3268) weight_decay: 0.0500 (0.0500) time: 0.7205 data: 0.0004 max mem: 57114 Epoch: [132] [ 40/156] eta: 0:01:28 lr: 0.005225 min_lr: 0.005225 loss: 3.2188 (3.2401) weight_decay: 0.0500 (0.0500) time: 0.7082 data: 0.0004 max mem: 57114 Epoch: [132] [ 50/156] eta: 0:01:19 lr: 0.005223 min_lr: 0.005223 loss: 3.1271 (3.2002) weight_decay: 0.0500 (0.0500) time: 0.7141 data: 0.0004 max mem: 57114 Epoch: [132] [ 60/156] eta: 0:01:12 lr: 0.005220 min_lr: 0.005220 loss: 3.2274 (3.2229) weight_decay: 0.0500 (0.0500) time: 0.7226 data: 0.0004 max mem: 57114 Epoch: [132] [ 70/156] eta: 0:01:04 lr: 0.005217 min_lr: 0.005217 loss: 3.3493 (3.2256) weight_decay: 0.0500 (0.0500) time: 0.7385 data: 0.0004 max mem: 57114 Epoch: [132] [ 80/156] eta: 0:00:56 lr: 0.005215 min_lr: 0.005215 loss: 3.1087 (3.2035) weight_decay: 0.0500 (0.0500) time: 0.7255 data: 0.0004 max mem: 57114 Epoch: [132] [ 90/156] eta: 0:00:48 lr: 0.005212 min_lr: 0.005212 loss: 3.2247 (3.2181) weight_decay: 0.0500 (0.0500) time: 0.7050 data: 0.0004 max mem: 57114 Epoch: [132] [100/156] eta: 0:00:41 lr: 0.005209 min_lr: 0.005209 loss: 3.2340 (3.2143) weight_decay: 0.0500 (0.0500) time: 0.7030 data: 0.0004 max mem: 57114 Epoch: [132] [110/156] eta: 0:00:33 lr: 0.005206 min_lr: 0.005206 loss: 3.2340 (3.2197) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0004 max mem: 57114 Epoch: [132] [120/156] eta: 0:00:26 lr: 0.005204 min_lr: 0.005204 loss: 3.3699 (3.2154) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.0004 max mem: 57114 Epoch: [132] [130/156] eta: 0:00:18 lr: 0.005201 min_lr: 0.005201 loss: 3.2870 (3.2125) weight_decay: 0.0500 (0.0500) time: 0.7040 data: 0.0009 max mem: 57114 Epoch: [132] [140/156] eta: 0:00:11 lr: 0.005198 min_lr: 0.005198 loss: 3.2779 (3.2123) weight_decay: 0.0500 (0.0500) time: 0.6957 data: 0.0007 max mem: 57114 Epoch: [132] [150/156] eta: 0:00:04 lr: 0.005195 min_lr: 0.005195 loss: 3.1535 (3.2006) weight_decay: 0.0500 (0.0500) time: 0.6811 data: 0.0001 max mem: 57114 Epoch: [132] [155/156] eta: 0:00:00 lr: 0.005194 min_lr: 0.005194 loss: 3.1535 (3.2043) weight_decay: 0.0500 (0.0500) time: 0.6795 data: 0.0001 max mem: 57114 Epoch: [132] Total time: 0:01:52 (0.7232 s / it) Averaged stats: lr: 0.005194 min_lr: 0.005194 loss: 3.1535 (3.1843) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9324 (0.9324) acc1: 87.5000 (87.5000) acc5: 96.3542 (96.3542) time: 2.0951 data: 1.8394 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1560 (1.0929) acc1: 78.6458 (78.2609) acc5: 96.3542 (94.7570) time: 0.5697 data: 0.3680 max mem: 57114 Test: Total time: 0:00:02 (0.5923 s / it) * Acc@1 77.815 Acc@5 94.088 loss 1.167 Accuracy of the model on the 50000 test images: 77.8% Max accuracy: 78.38% Test: [0/5] eta: 0:00:11 loss: 1.7272 (1.7272) acc1: 61.4583 (61.4583) acc5: 81.7708 (81.7708) time: 2.2680 data: 2.0246 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 2.1580 (2.1143) acc1: 52.6042 (51.1509) acc5: 78.6458 (74.6803) time: 0.6043 data: 0.4050 max mem: 57114 Test: Total time: 0:00:03 (0.6151 s / it) * Acc@1 47.398 Acc@5 71.757 loss 2.433 Accuracy of the model EMA on 50000 test images: 47.4% Max EMA accuracy: 47.40% Epoch: [133] [ 0/156] eta: 0:06:35 lr: 0.005194 min_lr: 0.005194 loss: 3.4347 (3.4347) weight_decay: 0.0500 (0.0500) time: 2.5343 data: 1.8855 max mem: 57114 Epoch: [133] [ 10/156] eta: 0:02:07 lr: 0.005191 min_lr: 0.005191 loss: 3.3050 (3.0611) weight_decay: 0.0500 (0.0500) time: 0.8755 data: 0.1717 max mem: 57114 Epoch: [133] [ 20/156] eta: 0:01:49 lr: 0.005188 min_lr: 0.005188 loss: 3.3050 (3.1513) weight_decay: 0.0500 (0.0500) time: 0.7180 data: 0.0003 max mem: 57114 Epoch: [133] [ 30/156] eta: 0:01:36 lr: 0.005185 min_lr: 0.005185 loss: 3.3669 (3.1413) weight_decay: 0.0500 (0.0500) time: 0.7102 data: 0.0003 max mem: 57114 Epoch: [133] [ 40/156] eta: 0:01:28 lr: 0.005183 min_lr: 0.005183 loss: 3.4067 (3.2154) weight_decay: 0.0500 (0.0500) time: 0.7264 data: 0.0004 max mem: 57114 Epoch: [133] [ 50/156] eta: 0:01:20 lr: 0.005180 min_lr: 0.005180 loss: 3.4067 (3.1493) weight_decay: 0.0500 (0.0500) time: 0.7517 data: 0.0004 max mem: 57114 Epoch: [133] [ 60/156] eta: 0:01:12 lr: 0.005177 min_lr: 0.005177 loss: 3.1446 (3.1364) weight_decay: 0.0500 (0.0500) time: 0.7313 data: 0.0004 max mem: 57114 Epoch: [133] [ 70/156] eta: 0:01:04 lr: 0.005174 min_lr: 0.005174 loss: 3.3702 (3.1616) weight_decay: 0.0500 (0.0500) time: 0.7150 data: 0.0004 max mem: 57114 Epoch: [133] [ 80/156] eta: 0:00:56 lr: 0.005172 min_lr: 0.005172 loss: 3.3702 (3.1627) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0004 max mem: 57114 Epoch: [133] [ 90/156] eta: 0:00:48 lr: 0.005169 min_lr: 0.005169 loss: 3.3586 (3.1796) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0004 max mem: 57114 Epoch: [133] [100/156] eta: 0:00:41 lr: 0.005166 min_lr: 0.005166 loss: 3.3371 (3.1813) weight_decay: 0.0500 (0.0500) time: 0.7072 data: 0.0004 max mem: 57114 Epoch: [133] [110/156] eta: 0:00:33 lr: 0.005163 min_lr: 0.005163 loss: 3.1380 (3.1697) weight_decay: 0.0500 (0.0500) time: 0.7082 data: 0.0004 max mem: 57114 Epoch: [133] [120/156] eta: 0:00:26 lr: 0.005161 min_lr: 0.005161 loss: 2.9533 (3.1463) weight_decay: 0.0500 (0.0500) time: 0.7057 data: 0.0004 max mem: 57114 Epoch: [133] [130/156] eta: 0:00:18 lr: 0.005158 min_lr: 0.005158 loss: 3.2009 (3.1602) weight_decay: 0.0500 (0.0500) time: 0.7059 data: 0.0009 max mem: 57114 Epoch: [133] [140/156] eta: 0:00:11 lr: 0.005155 min_lr: 0.005155 loss: 3.2728 (3.1429) weight_decay: 0.0500 (0.0500) time: 0.6962 data: 0.0007 max mem: 57114 Epoch: [133] [150/156] eta: 0:00:04 lr: 0.005152 min_lr: 0.005152 loss: 3.2141 (3.1499) weight_decay: 0.0500 (0.0500) time: 0.6840 data: 0.0001 max mem: 57114 Epoch: [133] [155/156] eta: 0:00:00 lr: 0.005151 min_lr: 0.005151 loss: 3.2445 (3.1508) weight_decay: 0.0500 (0.0500) time: 0.6839 data: 0.0001 max mem: 57114 Epoch: [133] Total time: 0:01:53 (0.7244 s / it) Averaged stats: lr: 0.005151 min_lr: 0.005151 loss: 3.2445 (3.1725) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8149 (0.8149) acc1: 85.9375 (85.9375) acc5: 97.9167 (97.9167) time: 2.0730 data: 1.8157 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9726 (0.9487) acc1: 77.6042 (77.7494) acc5: 95.8333 (94.8849) time: 0.5655 data: 0.3632 max mem: 57114 Test: Total time: 0:00:02 (0.5870 s / it) * Acc@1 78.395 Acc@5 94.523 loss 1.050 Accuracy of the model on the 50000 test images: 78.4% Max accuracy: 78.39% Test: [0/5] eta: 0:00:10 loss: 1.6493 (1.6493) acc1: 63.5417 (63.5417) acc5: 83.3333 (83.3333) time: 2.0521 data: 1.8086 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 2.0742 (2.0252) acc1: 54.1667 (53.0691) acc5: 79.6875 (75.7033) time: 0.5610 data: 0.3618 max mem: 57114 Test: Total time: 0:00:02 (0.5730 s / it) * Acc@1 48.921 Acc@5 73.274 loss 2.338 Accuracy of the model EMA on 50000 test images: 48.9% Max EMA accuracy: 48.92% Epoch: [134] [ 0/156] eta: 0:06:24 lr: 0.005151 min_lr: 0.005151 loss: 3.1865 (3.1865) weight_decay: 0.0500 (0.0500) time: 2.4627 data: 1.8104 max mem: 57114 Epoch: [134] [ 10/156] eta: 0:02:09 lr: 0.005148 min_lr: 0.005148 loss: 3.0441 (3.0695) weight_decay: 0.0500 (0.0500) time: 0.8875 data: 0.1648 max mem: 57114 Epoch: [134] [ 20/156] eta: 0:01:51 lr: 0.005145 min_lr: 0.005145 loss: 3.2615 (3.1731) weight_decay: 0.0500 (0.0500) time: 0.7416 data: 0.0003 max mem: 57114 Epoch: [134] [ 30/156] eta: 0:01:40 lr: 0.005142 min_lr: 0.005142 loss: 3.3083 (3.1196) weight_decay: 0.0500 (0.0500) time: 0.7492 data: 0.0003 max mem: 57114 Epoch: [134] [ 40/156] eta: 0:01:31 lr: 0.005140 min_lr: 0.005140 loss: 2.9147 (3.0601) weight_decay: 0.0500 (0.0500) time: 0.7500 data: 0.0003 max mem: 57114 Epoch: [134] [ 50/156] eta: 0:01:22 lr: 0.005137 min_lr: 0.005137 loss: 3.2010 (3.1193) weight_decay: 0.0500 (0.0500) time: 0.7428 data: 0.0004 max mem: 57114 Epoch: [134] [ 60/156] eta: 0:01:13 lr: 0.005134 min_lr: 0.005134 loss: 3.3568 (3.1076) weight_decay: 0.0500 (0.0500) time: 0.7268 data: 0.0004 max mem: 57114 Epoch: [134] [ 70/156] eta: 0:01:05 lr: 0.005131 min_lr: 0.005131 loss: 3.1972 (3.1041) weight_decay: 0.0500 (0.0500) time: 0.7226 data: 0.0004 max mem: 57114 Epoch: [134] [ 80/156] eta: 0:00:57 lr: 0.005129 min_lr: 0.005129 loss: 3.1893 (3.1111) weight_decay: 0.0500 (0.0500) time: 0.7188 data: 0.0004 max mem: 57114 Epoch: [134] [ 90/156] eta: 0:00:49 lr: 0.005126 min_lr: 0.005126 loss: 3.0992 (3.1167) weight_decay: 0.0500 (0.0500) time: 0.7126 data: 0.0004 max mem: 57114 Epoch: [134] [100/156] eta: 0:00:41 lr: 0.005123 min_lr: 0.005123 loss: 3.2361 (3.1335) weight_decay: 0.0500 (0.0500) time: 0.7098 data: 0.0004 max mem: 57114 Epoch: [134] [110/156] eta: 0:00:34 lr: 0.005120 min_lr: 0.005120 loss: 3.2361 (3.1238) weight_decay: 0.0500 (0.0500) time: 0.7094 data: 0.0003 max mem: 57114 Epoch: [134] [120/156] eta: 0:00:26 lr: 0.005118 min_lr: 0.005118 loss: 2.9414 (3.1058) weight_decay: 0.0500 (0.0500) time: 0.7104 data: 0.0004 max mem: 57114 Epoch: [134] [130/156] eta: 0:00:19 lr: 0.005115 min_lr: 0.005115 loss: 3.1897 (3.1040) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0009 max mem: 57114 Epoch: [134] [140/156] eta: 0:00:11 lr: 0.005112 min_lr: 0.005112 loss: 3.1191 (3.1093) weight_decay: 0.0500 (0.0500) time: 0.6888 data: 0.0007 max mem: 57114 Epoch: [134] [150/156] eta: 0:00:04 lr: 0.005109 min_lr: 0.005109 loss: 3.1191 (3.1106) weight_decay: 0.0500 (0.0500) time: 0.6817 data: 0.0001 max mem: 57114 Epoch: [134] [155/156] eta: 0:00:00 lr: 0.005108 min_lr: 0.005108 loss: 3.2053 (3.1129) weight_decay: 0.0500 (0.0500) time: 0.6849 data: 0.0001 max mem: 57114 Epoch: [134] Total time: 0:01:54 (0.7308 s / it) Averaged stats: lr: 0.005108 min_lr: 0.005108 loss: 3.2053 (3.1683) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9446 (0.9446) acc1: 86.9792 (86.9792) acc5: 96.8750 (96.8750) time: 2.0994 data: 1.8435 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0467 (1.0848) acc1: 82.8125 (78.5166) acc5: 96.8750 (94.8849) time: 0.5707 data: 0.3688 max mem: 57114 Test: Total time: 0:00:02 (0.5971 s / it) * Acc@1 77.490 Acc@5 94.034 loss 1.196 Accuracy of the model on the 50000 test images: 77.5% Max accuracy: 78.39% Test: [0/5] eta: 0:00:11 loss: 1.5758 (1.5758) acc1: 64.0625 (64.0625) acc5: 84.3750 (84.3750) time: 2.2990 data: 2.0555 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.9957 (1.9416) acc1: 54.6875 (54.0921) acc5: 80.7292 (77.1100) time: 0.6104 data: 0.4112 max mem: 57114 Test: Total time: 0:00:03 (0.6227 s / it) * Acc@1 50.508 Acc@5 74.756 loss 2.249 Accuracy of the model EMA on 50000 test images: 50.5% Max EMA accuracy: 50.51% Epoch: [135] [ 0/156] eta: 0:06:58 lr: 0.005108 min_lr: 0.005108 loss: 3.3182 (3.3182) weight_decay: 0.0500 (0.0500) time: 2.6812 data: 2.0323 max mem: 57114 Epoch: [135] [ 10/156] eta: 0:02:09 lr: 0.005105 min_lr: 0.005105 loss: 3.2166 (3.0713) weight_decay: 0.0500 (0.0500) time: 0.8874 data: 0.1850 max mem: 57114 Epoch: [135] [ 20/156] eta: 0:01:49 lr: 0.005102 min_lr: 0.005102 loss: 2.9882 (2.9870) weight_decay: 0.0500 (0.0500) time: 0.7138 data: 0.0003 max mem: 57114 Epoch: [135] [ 30/156] eta: 0:01:38 lr: 0.005099 min_lr: 0.005099 loss: 3.2408 (3.0788) weight_decay: 0.0500 (0.0500) time: 0.7193 data: 0.0004 max mem: 57114 Epoch: [135] [ 40/156] eta: 0:01:29 lr: 0.005097 min_lr: 0.005097 loss: 3.2507 (3.0663) weight_decay: 0.0500 (0.0500) time: 0.7268 data: 0.0003 max mem: 57114 Epoch: [135] [ 50/156] eta: 0:01:20 lr: 0.005094 min_lr: 0.005094 loss: 3.0938 (3.0462) weight_decay: 0.0500 (0.0500) time: 0.7270 data: 0.0004 max mem: 57114 Epoch: [135] [ 60/156] eta: 0:01:12 lr: 0.005091 min_lr: 0.005091 loss: 3.0938 (3.0538) weight_decay: 0.0500 (0.0500) time: 0.7156 data: 0.0004 max mem: 57114 Epoch: [135] [ 70/156] eta: 0:01:03 lr: 0.005088 min_lr: 0.005088 loss: 3.0834 (3.0575) weight_decay: 0.0500 (0.0500) time: 0.7073 data: 0.0004 max mem: 57114 Epoch: [135] [ 80/156] eta: 0:00:56 lr: 0.005086 min_lr: 0.005086 loss: 3.0233 (3.0537) weight_decay: 0.0500 (0.0500) time: 0.7013 data: 0.0003 max mem: 57114 Epoch: [135] [ 90/156] eta: 0:00:48 lr: 0.005083 min_lr: 0.005083 loss: 3.3446 (3.0953) weight_decay: 0.0500 (0.0500) time: 0.7058 data: 0.0004 max mem: 57114 Epoch: [135] [100/156] eta: 0:00:41 lr: 0.005080 min_lr: 0.005080 loss: 3.3446 (3.1041) weight_decay: 0.0500 (0.0500) time: 0.7104 data: 0.0004 max mem: 57114 Epoch: [135] [110/156] eta: 0:00:33 lr: 0.005077 min_lr: 0.005077 loss: 3.1880 (3.0984) weight_decay: 0.0500 (0.0500) time: 0.7032 data: 0.0004 max mem: 57114 Epoch: [135] [120/156] eta: 0:00:26 lr: 0.005074 min_lr: 0.005074 loss: 3.1001 (3.0935) weight_decay: 0.0500 (0.0500) time: 0.7030 data: 0.0004 max mem: 57114 Epoch: [135] [130/156] eta: 0:00:18 lr: 0.005072 min_lr: 0.005072 loss: 3.2592 (3.1087) weight_decay: 0.0500 (0.0500) time: 0.6996 data: 0.0008 max mem: 57114 Epoch: [135] [140/156] eta: 0:00:11 lr: 0.005069 min_lr: 0.005069 loss: 3.4050 (3.1248) weight_decay: 0.0500 (0.0500) time: 0.6882 data: 0.0007 max mem: 57114 Epoch: [135] [150/156] eta: 0:00:04 lr: 0.005066 min_lr: 0.005066 loss: 3.5652 (3.1456) weight_decay: 0.0500 (0.0500) time: 0.6837 data: 0.0001 max mem: 57114 Epoch: [135] [155/156] eta: 0:00:00 lr: 0.005065 min_lr: 0.005065 loss: 3.4144 (3.1484) weight_decay: 0.0500 (0.0500) time: 0.6842 data: 0.0001 max mem: 57114 Epoch: [135] Total time: 0:01:52 (0.7211 s / it) Averaged stats: lr: 0.005065 min_lr: 0.005065 loss: 3.4144 (3.1724) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0967 (1.0967) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.0579 data: 1.8018 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2342 (1.1735) acc1: 80.2083 (78.9003) acc5: 96.3542 (94.7570) time: 0.5623 data: 0.3604 max mem: 57114 Test: Total time: 0:00:02 (0.5807 s / it) * Acc@1 77.857 Acc@5 94.160 loss 1.289 Accuracy of the model on the 50000 test images: 77.9% Max accuracy: 78.39% Test: [0/5] eta: 0:00:11 loss: 1.5087 (1.5087) acc1: 65.1042 (65.1042) acc5: 84.8958 (84.8958) time: 2.2647 data: 2.0213 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.9223 (1.8654) acc1: 56.2500 (55.1151) acc5: 82.2917 (78.3888) time: 0.6036 data: 0.4044 max mem: 57114 Test: Total time: 0:00:03 (0.6164 s / it) * Acc@1 51.964 Acc@5 76.069 loss 2.167 Accuracy of the model EMA on 50000 test images: 52.0% Max EMA accuracy: 51.96% Epoch: [136] [ 0/156] eta: 0:07:09 lr: 0.005065 min_lr: 0.005065 loss: 3.2038 (3.2038) weight_decay: 0.0500 (0.0500) time: 2.7547 data: 2.1046 max mem: 57114 Epoch: [136] [ 10/156] eta: 0:02:11 lr: 0.005062 min_lr: 0.005062 loss: 3.2900 (3.1577) weight_decay: 0.0500 (0.0500) time: 0.8987 data: 0.1917 max mem: 57114 Epoch: [136] [ 20/156] eta: 0:01:52 lr: 0.005059 min_lr: 0.005059 loss: 3.1134 (3.1357) weight_decay: 0.0500 (0.0500) time: 0.7301 data: 0.0004 max mem: 57114 Epoch: [136] [ 30/156] eta: 0:01:40 lr: 0.005056 min_lr: 0.005056 loss: 3.1834 (3.1805) weight_decay: 0.0500 (0.0500) time: 0.7367 data: 0.0004 max mem: 57114 Epoch: [136] [ 40/156] eta: 0:01:29 lr: 0.005053 min_lr: 0.005053 loss: 3.2660 (3.1754) weight_decay: 0.0500 (0.0500) time: 0.7220 data: 0.0004 max mem: 57114 Epoch: [136] [ 50/156] eta: 0:01:20 lr: 0.005051 min_lr: 0.005051 loss: 3.1685 (3.1730) weight_decay: 0.0500 (0.0500) time: 0.7091 data: 0.0004 max mem: 57114 Epoch: [136] [ 60/156] eta: 0:01:12 lr: 0.005048 min_lr: 0.005048 loss: 3.2181 (3.1785) weight_decay: 0.0500 (0.0500) time: 0.7218 data: 0.0004 max mem: 57114 Epoch: [136] [ 70/156] eta: 0:01:04 lr: 0.005045 min_lr: 0.005045 loss: 3.2752 (3.2067) weight_decay: 0.0500 (0.0500) time: 0.7275 data: 0.0004 max mem: 57114 Epoch: [136] [ 80/156] eta: 0:00:56 lr: 0.005042 min_lr: 0.005042 loss: 3.2589 (3.1725) weight_decay: 0.0500 (0.0500) time: 0.7145 data: 0.0004 max mem: 57114 Epoch: [136] [ 90/156] eta: 0:00:49 lr: 0.005040 min_lr: 0.005040 loss: 2.9832 (3.1449) weight_decay: 0.0500 (0.0500) time: 0.7121 data: 0.0004 max mem: 57114 Epoch: [136] [100/156] eta: 0:00:41 lr: 0.005037 min_lr: 0.005037 loss: 2.7559 (3.0960) weight_decay: 0.0500 (0.0500) time: 0.7062 data: 0.0004 max mem: 57114 Epoch: [136] [110/156] eta: 0:00:33 lr: 0.005034 min_lr: 0.005034 loss: 2.8356 (3.1075) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0004 max mem: 57114 Epoch: [136] [120/156] eta: 0:00:26 lr: 0.005031 min_lr: 0.005031 loss: 3.2979 (3.1146) weight_decay: 0.0500 (0.0500) time: 0.7043 data: 0.0004 max mem: 57114 Epoch: [136] [130/156] eta: 0:00:18 lr: 0.005028 min_lr: 0.005028 loss: 3.2806 (3.1241) weight_decay: 0.0500 (0.0500) time: 0.6956 data: 0.0009 max mem: 57114 Epoch: [136] [140/156] eta: 0:00:11 lr: 0.005026 min_lr: 0.005026 loss: 3.2535 (3.1378) weight_decay: 0.0500 (0.0500) time: 0.6938 data: 0.0007 max mem: 57114 Epoch: [136] [150/156] eta: 0:00:04 lr: 0.005023 min_lr: 0.005023 loss: 3.3308 (3.1525) weight_decay: 0.0500 (0.0500) time: 0.6898 data: 0.0001 max mem: 57114 Epoch: [136] [155/156] eta: 0:00:00 lr: 0.005021 min_lr: 0.005021 loss: 3.3948 (3.1553) weight_decay: 0.0500 (0.0500) time: 0.6854 data: 0.0001 max mem: 57114 Epoch: [136] Total time: 0:01:53 (0.7260 s / it) Averaged stats: lr: 0.005021 min_lr: 0.005021 loss: 3.3948 (3.1667) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9760 (0.9760) acc1: 85.4167 (85.4167) acc5: 96.3542 (96.3542) time: 2.0904 data: 1.8352 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0846 (1.1006) acc1: 83.3333 (79.1560) acc5: 96.3542 (94.1177) time: 0.5687 data: 0.3671 max mem: 57114 Test: Total time: 0:00:02 (0.5914 s / it) * Acc@1 77.396 Acc@5 93.934 loss 1.147 Accuracy of the model on the 50000 test images: 77.4% Max accuracy: 78.39% Test: [0/5] eta: 0:00:11 loss: 1.4490 (1.4490) acc1: 66.1458 (66.1458) acc5: 85.4167 (85.4167) time: 2.2829 data: 2.0395 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.8539 (1.7962) acc1: 58.3333 (56.7775) acc5: 83.3333 (79.5396) time: 0.6072 data: 0.4080 max mem: 57114 Test: Total time: 0:00:03 (0.6219 s / it) * Acc@1 53.275 Acc@5 77.170 loss 2.093 Accuracy of the model EMA on 50000 test images: 53.3% Max EMA accuracy: 53.27% Epoch: [137] [ 0/156] eta: 0:07:04 lr: 0.005021 min_lr: 0.005021 loss: 3.7286 (3.7286) weight_decay: 0.0500 (0.0500) time: 2.7194 data: 2.0609 max mem: 57114 Epoch: [137] [ 10/156] eta: 0:02:14 lr: 0.005018 min_lr: 0.005018 loss: 2.9422 (3.0029) weight_decay: 0.0500 (0.0500) time: 0.9226 data: 0.1876 max mem: 57114 Epoch: [137] [ 20/156] eta: 0:01:51 lr: 0.005016 min_lr: 0.005016 loss: 3.1090 (3.1318) weight_decay: 0.0500 (0.0500) time: 0.7279 data: 0.0003 max mem: 57114 Epoch: [137] [ 30/156] eta: 0:01:39 lr: 0.005013 min_lr: 0.005013 loss: 3.2235 (3.1746) weight_decay: 0.0500 (0.0500) time: 0.7162 data: 0.0003 max mem: 57114 Epoch: [137] [ 40/156] eta: 0:01:30 lr: 0.005010 min_lr: 0.005010 loss: 2.9876 (3.0975) weight_decay: 0.0500 (0.0500) time: 0.7369 data: 0.0004 max mem: 57114 Epoch: [137] [ 50/156] eta: 0:01:21 lr: 0.005007 min_lr: 0.005007 loss: 3.1264 (3.1148) weight_decay: 0.0500 (0.0500) time: 0.7391 data: 0.0004 max mem: 57114 Epoch: [137] [ 60/156] eta: 0:01:13 lr: 0.005004 min_lr: 0.005004 loss: 3.3410 (3.1101) weight_decay: 0.0500 (0.0500) time: 0.7331 data: 0.0004 max mem: 57114 Epoch: [137] [ 70/156] eta: 0:01:05 lr: 0.005002 min_lr: 0.005002 loss: 2.9608 (3.0936) weight_decay: 0.0500 (0.0500) time: 0.7211 data: 0.0004 max mem: 57114 Epoch: [137] [ 80/156] eta: 0:00:56 lr: 0.004999 min_lr: 0.004999 loss: 3.1010 (3.1183) weight_decay: 0.0500 (0.0500) time: 0.7028 data: 0.0004 max mem: 57114 Epoch: [137] [ 90/156] eta: 0:00:49 lr: 0.004996 min_lr: 0.004996 loss: 3.1936 (3.0943) weight_decay: 0.0500 (0.0500) time: 0.7080 data: 0.0004 max mem: 57114 Epoch: [137] [100/156] eta: 0:00:41 lr: 0.004993 min_lr: 0.004993 loss: 3.2825 (3.1074) weight_decay: 0.0500 (0.0500) time: 0.7200 data: 0.0004 max mem: 57114 Epoch: [137] [110/156] eta: 0:00:33 lr: 0.004991 min_lr: 0.004991 loss: 3.3362 (3.1155) weight_decay: 0.0500 (0.0500) time: 0.7094 data: 0.0004 max mem: 57114 Epoch: [137] [120/156] eta: 0:00:26 lr: 0.004988 min_lr: 0.004988 loss: 3.1131 (3.1084) weight_decay: 0.0500 (0.0500) time: 0.7006 data: 0.0004 max mem: 57114 Epoch: [137] [130/156] eta: 0:00:19 lr: 0.004985 min_lr: 0.004985 loss: 2.9640 (3.0987) weight_decay: 0.0500 (0.0500) time: 0.7021 data: 0.0008 max mem: 57114 Epoch: [137] [140/156] eta: 0:00:11 lr: 0.004982 min_lr: 0.004982 loss: 3.0072 (3.1002) weight_decay: 0.0500 (0.0500) time: 0.6825 data: 0.0007 max mem: 57114 Epoch: [137] [150/156] eta: 0:00:04 lr: 0.004979 min_lr: 0.004979 loss: 3.3923 (3.1097) weight_decay: 0.0500 (0.0500) time: 0.6792 data: 0.0001 max mem: 57114 Epoch: [137] [155/156] eta: 0:00:00 lr: 0.004978 min_lr: 0.004978 loss: 3.4201 (3.1192) weight_decay: 0.0500 (0.0500) time: 0.6841 data: 0.0001 max mem: 57114 Epoch: [137] Total time: 0:01:53 (0.7272 s / it) Averaged stats: lr: 0.004978 min_lr: 0.004978 loss: 3.4201 (3.1603) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9355 (0.9355) acc1: 86.4583 (86.4583) acc5: 98.9583 (98.9583) time: 2.0631 data: 1.8061 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1010 (1.0717) acc1: 82.2917 (80.0512) acc5: 96.3542 (95.6522) time: 0.5633 data: 0.3613 max mem: 57114 Test: Total time: 0:00:02 (0.5833 s / it) * Acc@1 77.713 Acc@5 94.222 loss 1.189 Accuracy of the model on the 50000 test images: 77.7% Max accuracy: 78.39% Test: [0/5] eta: 0:00:11 loss: 1.3943 (1.3943) acc1: 68.2292 (68.2292) acc5: 85.9375 (85.9375) time: 2.2397 data: 1.9961 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.7893 (1.7314) acc1: 58.8542 (58.0563) acc5: 84.3750 (80.5627) time: 0.5987 data: 0.3993 max mem: 57114 Test: Total time: 0:00:03 (0.6147 s / it) * Acc@1 54.516 Acc@5 78.227 loss 2.023 Accuracy of the model EMA on 50000 test images: 54.5% Max EMA accuracy: 54.52% Epoch: [138] [ 0/156] eta: 0:07:42 lr: 0.004978 min_lr: 0.004978 loss: 2.3290 (2.3290) weight_decay: 0.0500 (0.0500) time: 2.9620 data: 2.3059 max mem: 57114 Epoch: [138] [ 10/156] eta: 0:02:18 lr: 0.004975 min_lr: 0.004975 loss: 2.7826 (2.8718) weight_decay: 0.0500 (0.0500) time: 0.9456 data: 0.2099 max mem: 57114 Epoch: [138] [ 20/156] eta: 0:01:54 lr: 0.004972 min_lr: 0.004972 loss: 2.8015 (2.9164) weight_decay: 0.0500 (0.0500) time: 0.7340 data: 0.0003 max mem: 57114 Epoch: [138] [ 30/156] eta: 0:01:41 lr: 0.004969 min_lr: 0.004969 loss: 3.0182 (2.9241) weight_decay: 0.0500 (0.0500) time: 0.7293 data: 0.0003 max mem: 57114 Epoch: [138] [ 40/156] eta: 0:01:31 lr: 0.004967 min_lr: 0.004967 loss: 3.2356 (3.0282) weight_decay: 0.0500 (0.0500) time: 0.7387 data: 0.0004 max mem: 57114 Epoch: [138] [ 50/156] eta: 0:01:22 lr: 0.004964 min_lr: 0.004964 loss: 3.2831 (3.0616) weight_decay: 0.0500 (0.0500) time: 0.7261 data: 0.0004 max mem: 57114 Epoch: [138] [ 60/156] eta: 0:01:13 lr: 0.004961 min_lr: 0.004961 loss: 3.3583 (3.0875) weight_decay: 0.0500 (0.0500) time: 0.7153 data: 0.0004 max mem: 57114 Epoch: [138] [ 70/156] eta: 0:01:05 lr: 0.004958 min_lr: 0.004958 loss: 3.0434 (3.0392) weight_decay: 0.0500 (0.0500) time: 0.7113 data: 0.0004 max mem: 57114 Epoch: [138] [ 80/156] eta: 0:00:57 lr: 0.004955 min_lr: 0.004955 loss: 2.7563 (3.0290) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0004 max mem: 57114 Epoch: [138] [ 90/156] eta: 0:00:49 lr: 0.004953 min_lr: 0.004953 loss: 2.8366 (3.0109) weight_decay: 0.0500 (0.0500) time: 0.7230 data: 0.0004 max mem: 57114 Epoch: [138] [100/156] eta: 0:00:41 lr: 0.004950 min_lr: 0.004950 loss: 3.1501 (3.0295) weight_decay: 0.0500 (0.0500) time: 0.7151 data: 0.0004 max mem: 57114 Epoch: [138] [110/156] eta: 0:00:34 lr: 0.004947 min_lr: 0.004947 loss: 3.3999 (3.0574) weight_decay: 0.0500 (0.0500) time: 0.7076 data: 0.0003 max mem: 57114 Epoch: [138] [120/156] eta: 0:00:26 lr: 0.004944 min_lr: 0.004944 loss: 3.5187 (3.0723) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0003 max mem: 57114 Epoch: [138] [130/156] eta: 0:00:19 lr: 0.004941 min_lr: 0.004941 loss: 3.2502 (3.0713) weight_decay: 0.0500 (0.0500) time: 0.7008 data: 0.0008 max mem: 57114 Epoch: [138] [140/156] eta: 0:00:11 lr: 0.004939 min_lr: 0.004939 loss: 2.9372 (3.0735) weight_decay: 0.0500 (0.0500) time: 0.6890 data: 0.0007 max mem: 57114 Epoch: [138] [150/156] eta: 0:00:04 lr: 0.004936 min_lr: 0.004936 loss: 3.2187 (3.0869) weight_decay: 0.0500 (0.0500) time: 0.6870 data: 0.0001 max mem: 57114 Epoch: [138] [155/156] eta: 0:00:00 lr: 0.004934 min_lr: 0.004934 loss: 3.2187 (3.0875) weight_decay: 0.0500 (0.0500) time: 0.6888 data: 0.0001 max mem: 57114 Epoch: [138] Total time: 0:01:53 (0.7300 s / it) Averaged stats: lr: 0.004934 min_lr: 0.004934 loss: 3.2187 (3.1590) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9046 (0.9046) acc1: 82.8125 (82.8125) acc5: 96.8750 (96.8750) time: 2.1454 data: 1.8884 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9882 (0.9683) acc1: 78.6458 (76.7263) acc5: 96.8750 (95.0128) time: 0.5798 data: 0.3778 max mem: 57114 Test: Total time: 0:00:03 (0.6038 s / it) * Acc@1 78.543 Acc@5 94.507 loss 1.066 Accuracy of the model on the 50000 test images: 78.5% Max accuracy: 78.54% Test: [0/5] eta: 0:00:10 loss: 1.3428 (1.3428) acc1: 68.2292 (68.2292) acc5: 87.5000 (87.5000) time: 2.0925 data: 1.8489 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.7270 (1.6709) acc1: 60.4167 (58.8235) acc5: 85.4167 (81.4578) time: 0.5692 data: 0.3699 max mem: 57114 Test: Total time: 0:00:02 (0.5824 s / it) * Acc@1 55.645 Acc@5 79.196 loss 1.958 Accuracy of the model EMA on 50000 test images: 55.6% Max EMA accuracy: 55.64% Epoch: [139] [ 0/156] eta: 0:08:53 lr: 0.004934 min_lr: 0.004934 loss: 2.4697 (2.4697) weight_decay: 0.0500 (0.0500) time: 3.4174 data: 2.7700 max mem: 57114 Epoch: [139] [ 10/156] eta: 0:02:24 lr: 0.004931 min_lr: 0.004931 loss: 3.3311 (3.2610) weight_decay: 0.0500 (0.0500) time: 0.9865 data: 0.2521 max mem: 57114 Epoch: [139] [ 20/156] eta: 0:01:59 lr: 0.004929 min_lr: 0.004929 loss: 3.2867 (3.1863) weight_decay: 0.0500 (0.0500) time: 0.7496 data: 0.0003 max mem: 57114 Epoch: [139] [ 30/156] eta: 0:01:44 lr: 0.004926 min_lr: 0.004926 loss: 3.2867 (3.2092) weight_decay: 0.0500 (0.0500) time: 0.7394 data: 0.0003 max mem: 57114 Epoch: [139] [ 40/156] eta: 0:01:33 lr: 0.004923 min_lr: 0.004923 loss: 3.2994 (3.2080) weight_decay: 0.0500 (0.0500) time: 0.7346 data: 0.0004 max mem: 57114 Epoch: [139] [ 50/156] eta: 0:01:24 lr: 0.004920 min_lr: 0.004920 loss: 3.1002 (3.1920) weight_decay: 0.0500 (0.0500) time: 0.7454 data: 0.0004 max mem: 57114 Epoch: [139] [ 60/156] eta: 0:01:15 lr: 0.004917 min_lr: 0.004917 loss: 3.0099 (3.1572) weight_decay: 0.0500 (0.0500) time: 0.7287 data: 0.0004 max mem: 57114 Epoch: [139] [ 70/156] eta: 0:01:06 lr: 0.004915 min_lr: 0.004915 loss: 3.0867 (3.1491) weight_decay: 0.0500 (0.0500) time: 0.7254 data: 0.0004 max mem: 57114 Epoch: [139] [ 80/156] eta: 0:00:58 lr: 0.004912 min_lr: 0.004912 loss: 3.3375 (3.1873) weight_decay: 0.0500 (0.0500) time: 0.7381 data: 0.0004 max mem: 57114 Epoch: [139] [ 90/156] eta: 0:00:50 lr: 0.004909 min_lr: 0.004909 loss: 3.3375 (3.1716) weight_decay: 0.0500 (0.0500) time: 0.7255 data: 0.0004 max mem: 57114 Epoch: [139] [100/156] eta: 0:00:42 lr: 0.004906 min_lr: 0.004906 loss: 2.9355 (3.1582) weight_decay: 0.0500 (0.0500) time: 0.7096 data: 0.0003 max mem: 57114 Epoch: [139] [110/156] eta: 0:00:34 lr: 0.004903 min_lr: 0.004903 loss: 2.9355 (3.1399) weight_decay: 0.0500 (0.0500) time: 0.7082 data: 0.0003 max mem: 57114 Epoch: [139] [120/156] eta: 0:00:27 lr: 0.004901 min_lr: 0.004901 loss: 3.1300 (3.1413) weight_decay: 0.0500 (0.0500) time: 0.7084 data: 0.0003 max mem: 57114 Epoch: [139] [130/156] eta: 0:00:19 lr: 0.004898 min_lr: 0.004898 loss: 3.3269 (3.1539) weight_decay: 0.0500 (0.0500) time: 0.6989 data: 0.0008 max mem: 57114 Epoch: [139] [140/156] eta: 0:00:11 lr: 0.004895 min_lr: 0.004895 loss: 3.3807 (3.1585) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.0007 max mem: 57114 Epoch: [139] [150/156] eta: 0:00:04 lr: 0.004892 min_lr: 0.004892 loss: 3.4227 (3.1704) weight_decay: 0.0500 (0.0500) time: 0.6863 data: 0.0001 max mem: 57114 Epoch: [139] [155/156] eta: 0:00:00 lr: 0.004891 min_lr: 0.004891 loss: 3.1934 (3.1622) weight_decay: 0.0500 (0.0500) time: 0.6901 data: 0.0001 max mem: 57114 Epoch: [139] Total time: 0:01:55 (0.7389 s / it) Averaged stats: lr: 0.004891 min_lr: 0.004891 loss: 3.1934 (3.1544) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8747 (0.8747) acc1: 85.9375 (85.9375) acc5: 98.4375 (98.4375) time: 2.0106 data: 1.7553 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0756 (1.0140) acc1: 80.2083 (79.1560) acc5: 96.3542 (95.2685) time: 0.5529 data: 0.3511 max mem: 57114 Test: Total time: 0:00:02 (0.5749 s / it) * Acc@1 78.666 Acc@5 94.639 loss 1.090 Accuracy of the model on the 50000 test images: 78.7% Max accuracy: 78.67% Test: [0/5] eta: 0:00:09 loss: 1.2929 (1.2929) acc1: 68.2292 (68.2292) acc5: 88.5417 (88.5417) time: 1.9833 data: 1.7398 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.6681 (1.6135) acc1: 61.4583 (59.7187) acc5: 86.9792 (82.4808) time: 0.5473 data: 0.3480 max mem: 57114 Test: Total time: 0:00:02 (0.5585 s / it) * Acc@1 56.728 Acc@5 80.141 loss 1.896 Accuracy of the model EMA on 50000 test images: 56.7% Max EMA accuracy: 56.73% Epoch: [140] [ 0/156] eta: 0:07:34 lr: 0.004890 min_lr: 0.004890 loss: 3.7223 (3.7223) weight_decay: 0.0500 (0.0500) time: 2.9154 data: 2.2645 max mem: 57114 Epoch: [140] [ 10/156] eta: 0:02:15 lr: 0.004888 min_lr: 0.004888 loss: 3.3313 (3.1502) weight_decay: 0.0500 (0.0500) time: 0.9306 data: 0.2062 max mem: 57114 Epoch: [140] [ 20/156] eta: 0:01:54 lr: 0.004885 min_lr: 0.004885 loss: 2.9456 (3.1035) weight_decay: 0.0500 (0.0500) time: 0.7390 data: 0.0003 max mem: 57114 Epoch: [140] [ 30/156] eta: 0:01:40 lr: 0.004882 min_lr: 0.004882 loss: 3.1327 (3.1762) weight_decay: 0.0500 (0.0500) time: 0.7206 data: 0.0003 max mem: 57114 Epoch: [140] [ 40/156] eta: 0:01:31 lr: 0.004879 min_lr: 0.004879 loss: 3.3149 (3.1830) weight_decay: 0.0500 (0.0500) time: 0.7272 data: 0.0004 max mem: 57114 Epoch: [140] [ 50/156] eta: 0:01:22 lr: 0.004876 min_lr: 0.004876 loss: 3.1823 (3.1677) weight_decay: 0.0500 (0.0500) time: 0.7608 data: 0.0004 max mem: 57114 Epoch: [140] [ 60/156] eta: 0:01:14 lr: 0.004874 min_lr: 0.004874 loss: 3.3131 (3.1898) weight_decay: 0.0500 (0.0500) time: 0.7470 data: 0.0004 max mem: 57114 Epoch: [140] [ 70/156] eta: 0:01:05 lr: 0.004871 min_lr: 0.004871 loss: 3.1252 (3.1370) weight_decay: 0.0500 (0.0500) time: 0.7212 data: 0.0004 max mem: 57114 Epoch: [140] [ 80/156] eta: 0:00:57 lr: 0.004868 min_lr: 0.004868 loss: 3.0799 (3.1572) weight_decay: 0.0500 (0.0500) time: 0.7203 data: 0.0004 max mem: 57114 Epoch: [140] [ 90/156] eta: 0:00:49 lr: 0.004865 min_lr: 0.004865 loss: 3.4558 (3.1785) weight_decay: 0.0500 (0.0500) time: 0.7169 data: 0.0004 max mem: 57114 Epoch: [140] [100/156] eta: 0:00:41 lr: 0.004862 min_lr: 0.004862 loss: 3.3252 (3.1732) weight_decay: 0.0500 (0.0500) time: 0.7057 data: 0.0004 max mem: 57114 Epoch: [140] [110/156] eta: 0:00:34 lr: 0.004860 min_lr: 0.004860 loss: 3.2178 (3.1611) weight_decay: 0.0500 (0.0500) time: 0.7116 data: 0.0004 max mem: 57114 Epoch: [140] [120/156] eta: 0:00:26 lr: 0.004857 min_lr: 0.004857 loss: 3.1725 (3.1553) weight_decay: 0.0500 (0.0500) time: 0.7144 data: 0.0003 max mem: 57114 Epoch: [140] [130/156] eta: 0:00:19 lr: 0.004854 min_lr: 0.004854 loss: 3.0720 (3.1461) weight_decay: 0.0500 (0.0500) time: 0.7058 data: 0.0004 max mem: 57114 Epoch: [140] [140/156] eta: 0:00:11 lr: 0.004851 min_lr: 0.004851 loss: 3.0945 (3.1602) weight_decay: 0.0500 (0.0500) time: 0.6922 data: 0.0003 max mem: 57114 Epoch: [140] [150/156] eta: 0:00:04 lr: 0.004848 min_lr: 0.004848 loss: 3.4423 (3.1728) weight_decay: 0.0500 (0.0500) time: 0.6802 data: 0.0001 max mem: 57114 Epoch: [140] [155/156] eta: 0:00:00 lr: 0.004847 min_lr: 0.004847 loss: 3.3788 (3.1722) weight_decay: 0.0500 (0.0500) time: 0.6814 data: 0.0001 max mem: 57114 Epoch: [140] Total time: 0:01:54 (0.7329 s / it) Averaged stats: lr: 0.004847 min_lr: 0.004847 loss: 3.3788 (3.1537) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8120 (0.8120) acc1: 88.5417 (88.5417) acc5: 97.9167 (97.9167) time: 2.0761 data: 1.8205 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0669 (1.0525) acc1: 79.6875 (79.5396) acc5: 93.2292 (94.7570) time: 0.5659 data: 0.3642 max mem: 57114 Test: Total time: 0:00:02 (0.5906 s / it) * Acc@1 78.461 Acc@5 94.549 loss 1.129 Accuracy of the model on the 50000 test images: 78.5% Max accuracy: 78.67% Test: [0/5] eta: 0:00:11 loss: 1.2477 (1.2477) acc1: 69.7917 (69.7917) acc5: 89.5833 (89.5833) time: 2.2868 data: 2.0434 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.6131 (1.5599) acc1: 61.9792 (60.9974) acc5: 87.5000 (83.2481) time: 0.6080 data: 0.4088 max mem: 57114 Test: Total time: 0:00:03 (0.6185 s / it) * Acc@1 57.737 Acc@5 80.978 loss 1.837 Accuracy of the model EMA on 50000 test images: 57.7% Max EMA accuracy: 57.74% Epoch: [141] [ 0/156] eta: 0:07:44 lr: 0.004847 min_lr: 0.004847 loss: 3.3103 (3.3103) weight_decay: 0.0500 (0.0500) time: 2.9744 data: 2.3176 max mem: 57114 Epoch: [141] [ 10/156] eta: 0:02:17 lr: 0.004844 min_lr: 0.004844 loss: 3.4335 (3.3233) weight_decay: 0.0500 (0.0500) time: 0.9423 data: 0.2110 max mem: 57114 Epoch: [141] [ 20/156] eta: 0:01:54 lr: 0.004841 min_lr: 0.004841 loss: 3.3538 (3.1937) weight_decay: 0.0500 (0.0500) time: 0.7319 data: 0.0003 max mem: 57114 Epoch: [141] [ 30/156] eta: 0:01:41 lr: 0.004838 min_lr: 0.004838 loss: 2.9343 (3.1084) weight_decay: 0.0500 (0.0500) time: 0.7282 data: 0.0003 max mem: 57114 Epoch: [141] [ 40/156] eta: 0:01:31 lr: 0.004835 min_lr: 0.004835 loss: 2.8492 (3.0489) weight_decay: 0.0500 (0.0500) time: 0.7289 data: 0.0004 max mem: 57114 Epoch: [141] [ 50/156] eta: 0:01:22 lr: 0.004833 min_lr: 0.004833 loss: 3.2254 (3.0771) weight_decay: 0.0500 (0.0500) time: 0.7306 data: 0.0004 max mem: 57114 Epoch: [141] [ 60/156] eta: 0:01:13 lr: 0.004830 min_lr: 0.004830 loss: 3.2284 (3.0878) weight_decay: 0.0500 (0.0500) time: 0.7204 data: 0.0004 max mem: 57114 Epoch: [141] [ 70/156] eta: 0:01:05 lr: 0.004827 min_lr: 0.004827 loss: 3.2607 (3.1167) weight_decay: 0.0500 (0.0500) time: 0.7121 data: 0.0004 max mem: 57114 Epoch: [141] [ 80/156] eta: 0:00:57 lr: 0.004824 min_lr: 0.004824 loss: 3.4117 (3.1176) weight_decay: 0.0500 (0.0500) time: 0.7127 data: 0.0004 max mem: 57114 Epoch: [141] [ 90/156] eta: 0:00:49 lr: 0.004821 min_lr: 0.004821 loss: 3.3906 (3.1194) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0004 max mem: 57114 Epoch: [141] [100/156] eta: 0:00:41 lr: 0.004819 min_lr: 0.004819 loss: 3.0630 (3.1161) weight_decay: 0.0500 (0.0500) time: 0.7173 data: 0.0004 max mem: 57114 Epoch: [141] [110/156] eta: 0:00:34 lr: 0.004816 min_lr: 0.004816 loss: 3.2115 (3.1344) weight_decay: 0.0500 (0.0500) time: 0.7128 data: 0.0004 max mem: 57114 Epoch: [141] [120/156] eta: 0:00:26 lr: 0.004813 min_lr: 0.004813 loss: 3.1591 (3.1081) weight_decay: 0.0500 (0.0500) time: 0.6960 data: 0.0004 max mem: 57114 Epoch: [141] [130/156] eta: 0:00:19 lr: 0.004810 min_lr: 0.004810 loss: 2.9633 (3.1118) weight_decay: 0.0500 (0.0500) time: 0.6925 data: 0.0008 max mem: 57114 Epoch: [141] [140/156] eta: 0:00:11 lr: 0.004807 min_lr: 0.004807 loss: 3.2846 (3.1191) weight_decay: 0.0500 (0.0500) time: 0.6971 data: 0.0007 max mem: 57114 Epoch: [141] [150/156] eta: 0:00:04 lr: 0.004804 min_lr: 0.004804 loss: 3.2084 (3.1124) weight_decay: 0.0500 (0.0500) time: 0.6891 data: 0.0001 max mem: 57114 Epoch: [141] [155/156] eta: 0:00:00 lr: 0.004803 min_lr: 0.004803 loss: 3.2604 (3.1227) weight_decay: 0.0500 (0.0500) time: 0.6912 data: 0.0001 max mem: 57114 Epoch: [141] Total time: 0:01:53 (0.7288 s / it) Averaged stats: lr: 0.004803 min_lr: 0.004803 loss: 3.2604 (3.1480) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9489 (0.9489) acc1: 84.3750 (84.3750) acc5: 96.8750 (96.8750) time: 2.1362 data: 1.8807 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0704 (1.0603) acc1: 80.2083 (78.7724) acc5: 96.8750 (94.7570) time: 0.5779 data: 0.3762 max mem: 57114 Test: Total time: 0:00:03 (0.6020 s / it) * Acc@1 78.077 Acc@5 94.060 loss 1.140 Accuracy of the model on the 50000 test images: 78.1% Max accuracy: 78.67% Test: [0/5] eta: 0:00:11 loss: 1.2054 (1.2054) acc1: 69.7917 (69.7917) acc5: 89.5833 (89.5833) time: 2.2810 data: 2.0374 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.5613 (1.5106) acc1: 63.5417 (61.8926) acc5: 87.5000 (83.6317) time: 0.6068 data: 0.4076 max mem: 57114 Test: Total time: 0:00:03 (0.6189 s / it) * Acc@1 58.742 Acc@5 81.817 loss 1.783 Accuracy of the model EMA on 50000 test images: 58.7% Max EMA accuracy: 58.74% Epoch: [142] [ 0/156] eta: 0:07:35 lr: 0.004803 min_lr: 0.004803 loss: 2.4096 (2.4096) weight_decay: 0.0500 (0.0500) time: 2.9199 data: 2.2644 max mem: 57114 Epoch: [142] [ 10/156] eta: 0:02:14 lr: 0.004800 min_lr: 0.004800 loss: 2.8853 (2.8987) weight_decay: 0.0500 (0.0500) time: 0.9231 data: 0.2061 max mem: 57114 Epoch: [142] [ 20/156] eta: 0:01:53 lr: 0.004797 min_lr: 0.004797 loss: 3.2543 (3.0376) weight_decay: 0.0500 (0.0500) time: 0.7290 data: 0.0003 max mem: 57114 Epoch: [142] [ 30/156] eta: 0:01:40 lr: 0.004794 min_lr: 0.004794 loss: 3.3267 (3.1245) weight_decay: 0.0500 (0.0500) time: 0.7287 data: 0.0003 max mem: 57114 Epoch: [142] [ 40/156] eta: 0:01:30 lr: 0.004791 min_lr: 0.004791 loss: 3.4527 (3.0967) weight_decay: 0.0500 (0.0500) time: 0.7237 data: 0.0004 max mem: 57114 Epoch: [142] [ 50/156] eta: 0:01:21 lr: 0.004789 min_lr: 0.004789 loss: 3.4360 (3.1047) weight_decay: 0.0500 (0.0500) time: 0.7218 data: 0.0004 max mem: 57114 Epoch: [142] [ 60/156] eta: 0:01:12 lr: 0.004786 min_lr: 0.004786 loss: 3.2130 (3.0980) weight_decay: 0.0500 (0.0500) time: 0.7170 data: 0.0004 max mem: 57114 Epoch: [142] [ 70/156] eta: 0:01:04 lr: 0.004783 min_lr: 0.004783 loss: 3.2481 (3.1143) weight_decay: 0.0500 (0.0500) time: 0.7181 data: 0.0004 max mem: 57114 Epoch: [142] [ 80/156] eta: 0:00:56 lr: 0.004780 min_lr: 0.004780 loss: 3.1901 (3.1053) weight_decay: 0.0500 (0.0500) time: 0.7169 data: 0.0004 max mem: 57114 Epoch: [142] [ 90/156] eta: 0:00:49 lr: 0.004777 min_lr: 0.004777 loss: 3.1394 (3.1074) weight_decay: 0.0500 (0.0500) time: 0.7094 data: 0.0004 max mem: 57114 Epoch: [142] [100/156] eta: 0:00:41 lr: 0.004775 min_lr: 0.004775 loss: 3.2408 (3.1300) weight_decay: 0.0500 (0.0500) time: 0.7076 data: 0.0004 max mem: 57114 Epoch: [142] [110/156] eta: 0:00:33 lr: 0.004772 min_lr: 0.004772 loss: 3.3858 (3.1594) weight_decay: 0.0500 (0.0500) time: 0.7073 data: 0.0004 max mem: 57114 Epoch: [142] [120/156] eta: 0:00:26 lr: 0.004769 min_lr: 0.004769 loss: 3.3773 (3.1713) weight_decay: 0.0500 (0.0500) time: 0.7006 data: 0.0004 max mem: 57114 Epoch: [142] [130/156] eta: 0:00:19 lr: 0.004766 min_lr: 0.004766 loss: 3.3773 (3.1907) weight_decay: 0.0500 (0.0500) time: 0.6955 data: 0.0008 max mem: 57114 Epoch: [142] [140/156] eta: 0:00:11 lr: 0.004763 min_lr: 0.004763 loss: 3.1679 (3.1750) weight_decay: 0.0500 (0.0500) time: 0.6918 data: 0.0007 max mem: 57114 Epoch: [142] [150/156] eta: 0:00:04 lr: 0.004760 min_lr: 0.004760 loss: 2.9572 (3.1634) weight_decay: 0.0500 (0.0500) time: 0.6870 data: 0.0001 max mem: 57114 Epoch: [142] [155/156] eta: 0:00:00 lr: 0.004759 min_lr: 0.004759 loss: 3.0725 (3.1638) weight_decay: 0.0500 (0.0500) time: 0.6892 data: 0.0001 max mem: 57114 Epoch: [142] Total time: 0:01:53 (0.7266 s / it) Averaged stats: lr: 0.004759 min_lr: 0.004759 loss: 3.0725 (3.1386) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8627 (0.8627) acc1: 83.3333 (83.3333) acc5: 97.3958 (97.3958) time: 2.0905 data: 1.8343 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9578 (0.9627) acc1: 80.7292 (79.2839) acc5: 95.8333 (94.2455) time: 0.5688 data: 0.3669 max mem: 57114 Test: Total time: 0:00:02 (0.5927 s / it) * Acc@1 78.605 Acc@5 94.557 loss 1.042 Accuracy of the model on the 50000 test images: 78.6% Max accuracy: 78.67% Test: [0/5] eta: 0:00:11 loss: 1.1662 (1.1662) acc1: 71.8750 (71.8750) acc5: 90.1042 (90.1042) time: 2.2931 data: 2.0498 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.5130 (1.4648) acc1: 64.0625 (63.1714) acc5: 88.0208 (84.0153) time: 0.6092 data: 0.4100 max mem: 57114 Test: Total time: 0:00:03 (0.6209 s / it) * Acc@1 59.649 Acc@5 82.651 loss 1.733 Accuracy of the model EMA on 50000 test images: 59.6% Max EMA accuracy: 59.65% Epoch: [143] [ 0/156] eta: 0:07:00 lr: 0.004759 min_lr: 0.004759 loss: 3.0555 (3.0555) weight_decay: 0.0500 (0.0500) time: 2.6951 data: 2.0476 max mem: 57114 Epoch: [143] [ 10/156] eta: 0:02:12 lr: 0.004756 min_lr: 0.004756 loss: 3.4129 (3.3331) weight_decay: 0.0500 (0.0500) time: 0.9102 data: 0.1864 max mem: 57114 Epoch: [143] [ 20/156] eta: 0:01:52 lr: 0.004753 min_lr: 0.004753 loss: 3.4129 (3.2863) weight_decay: 0.0500 (0.0500) time: 0.7300 data: 0.0004 max mem: 57114 Epoch: [143] [ 30/156] eta: 0:01:39 lr: 0.004750 min_lr: 0.004750 loss: 3.4384 (3.3101) weight_decay: 0.0500 (0.0500) time: 0.7255 data: 0.0005 max mem: 57114 Epoch: [143] [ 40/156] eta: 0:01:29 lr: 0.004747 min_lr: 0.004747 loss: 3.3786 (3.2737) weight_decay: 0.0500 (0.0500) time: 0.7175 data: 0.0004 max mem: 57114 Epoch: [143] [ 50/156] eta: 0:01:20 lr: 0.004745 min_lr: 0.004745 loss: 3.1940 (3.2560) weight_decay: 0.0500 (0.0500) time: 0.7125 data: 0.0004 max mem: 57114 Epoch: [143] [ 60/156] eta: 0:01:12 lr: 0.004742 min_lr: 0.004742 loss: 3.2380 (3.2449) weight_decay: 0.0500 (0.0500) time: 0.7243 data: 0.0004 max mem: 57114 Epoch: [143] [ 70/156] eta: 0:01:04 lr: 0.004739 min_lr: 0.004739 loss: 3.2380 (3.2421) weight_decay: 0.0500 (0.0500) time: 0.7312 data: 0.0004 max mem: 57114 Epoch: [143] [ 80/156] eta: 0:00:56 lr: 0.004736 min_lr: 0.004736 loss: 3.3776 (3.2533) weight_decay: 0.0500 (0.0500) time: 0.7145 data: 0.0004 max mem: 57114 Epoch: [143] [ 90/156] eta: 0:00:48 lr: 0.004733 min_lr: 0.004733 loss: 3.3476 (3.2541) weight_decay: 0.0500 (0.0500) time: 0.7077 data: 0.0003 max mem: 57114 Epoch: [143] [100/156] eta: 0:00:41 lr: 0.004730 min_lr: 0.004730 loss: 3.3336 (3.2371) weight_decay: 0.0500 (0.0500) time: 0.7091 data: 0.0004 max mem: 57114 Epoch: [143] [110/156] eta: 0:00:33 lr: 0.004728 min_lr: 0.004728 loss: 3.3226 (3.2423) weight_decay: 0.0500 (0.0500) time: 0.7053 data: 0.0004 max mem: 57114 Epoch: [143] [120/156] eta: 0:00:26 lr: 0.004725 min_lr: 0.004725 loss: 3.1446 (3.2189) weight_decay: 0.0500 (0.0500) time: 0.7076 data: 0.0004 max mem: 57114 Epoch: [143] [130/156] eta: 0:00:18 lr: 0.004722 min_lr: 0.004722 loss: 3.1446 (3.2249) weight_decay: 0.0500 (0.0500) time: 0.7015 data: 0.0008 max mem: 57114 Epoch: [143] [140/156] eta: 0:00:11 lr: 0.004719 min_lr: 0.004719 loss: 3.4025 (3.2246) weight_decay: 0.0500 (0.0500) time: 0.6951 data: 0.0007 max mem: 57114 Epoch: [143] [150/156] eta: 0:00:04 lr: 0.004716 min_lr: 0.004716 loss: 3.3373 (3.2261) weight_decay: 0.0500 (0.0500) time: 0.6899 data: 0.0001 max mem: 57114 Epoch: [143] [155/156] eta: 0:00:00 lr: 0.004715 min_lr: 0.004715 loss: 3.3773 (3.2348) weight_decay: 0.0500 (0.0500) time: 0.6871 data: 0.0001 max mem: 57114 Epoch: [143] Total time: 0:01:53 (0.7261 s / it) Averaged stats: lr: 0.004715 min_lr: 0.004715 loss: 3.3773 (3.1380) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0398 (1.0398) acc1: 84.8958 (84.8958) acc5: 97.9167 (97.9167) time: 2.0836 data: 1.8276 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0927 (1.1011) acc1: 82.2917 (79.7954) acc5: 96.8750 (95.5243) time: 0.5674 data: 0.3656 max mem: 57114 Test: Total time: 0:00:02 (0.5909 s / it) * Acc@1 78.878 Acc@5 94.569 loss 1.196 Accuracy of the model on the 50000 test images: 78.9% Max accuracy: 78.88% Test: [0/5] eta: 0:00:10 loss: 1.1296 (1.1296) acc1: 72.3958 (72.3958) acc5: 90.6250 (90.6250) time: 2.0217 data: 1.7783 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.4672 (1.4218) acc1: 66.1458 (64.3223) acc5: 88.0208 (84.1432) time: 0.5551 data: 0.3558 max mem: 57114 Test: Total time: 0:00:02 (0.5674 s / it) * Acc@1 60.542 Acc@5 83.312 loss 1.686 Accuracy of the model EMA on 50000 test images: 60.5% Max EMA accuracy: 60.54% Epoch: [144] [ 0/156] eta: 0:08:51 lr: 0.004715 min_lr: 0.004715 loss: 2.6195 (2.6195) weight_decay: 0.0500 (0.0500) time: 3.4081 data: 2.7578 max mem: 57114 Epoch: [144] [ 10/156] eta: 0:02:25 lr: 0.004712 min_lr: 0.004712 loss: 3.1916 (3.0126) weight_decay: 0.0500 (0.0500) time: 0.9963 data: 0.2509 max mem: 57114 Epoch: [144] [ 20/156] eta: 0:01:59 lr: 0.004709 min_lr: 0.004709 loss: 3.3182 (3.1020) weight_decay: 0.0500 (0.0500) time: 0.7487 data: 0.0003 max mem: 57114 Epoch: [144] [ 30/156] eta: 0:01:44 lr: 0.004706 min_lr: 0.004706 loss: 3.2857 (3.0535) weight_decay: 0.0500 (0.0500) time: 0.7384 data: 0.0003 max mem: 57114 Epoch: [144] [ 40/156] eta: 0:01:33 lr: 0.004703 min_lr: 0.004703 loss: 3.2332 (3.0788) weight_decay: 0.0500 (0.0500) time: 0.7369 data: 0.0004 max mem: 57114 Epoch: [144] [ 50/156] eta: 0:01:24 lr: 0.004700 min_lr: 0.004700 loss: 3.3658 (3.1595) weight_decay: 0.0500 (0.0500) time: 0.7364 data: 0.0004 max mem: 57114 Epoch: [144] [ 60/156] eta: 0:01:14 lr: 0.004698 min_lr: 0.004698 loss: 3.3593 (3.1639) weight_decay: 0.0500 (0.0500) time: 0.7223 data: 0.0004 max mem: 57114 Epoch: [144] [ 70/156] eta: 0:01:06 lr: 0.004695 min_lr: 0.004695 loss: 3.1473 (3.1565) weight_decay: 0.0500 (0.0500) time: 0.7229 data: 0.0004 max mem: 57114 Epoch: [144] [ 80/156] eta: 0:00:58 lr: 0.004692 min_lr: 0.004692 loss: 2.9571 (3.1281) weight_decay: 0.0500 (0.0500) time: 0.7199 data: 0.0004 max mem: 57114 Epoch: [144] [ 90/156] eta: 0:00:50 lr: 0.004689 min_lr: 0.004689 loss: 3.1817 (3.1319) weight_decay: 0.0500 (0.0500) time: 0.7059 data: 0.0004 max mem: 57114 Epoch: [144] [100/156] eta: 0:00:42 lr: 0.004686 min_lr: 0.004686 loss: 3.1817 (3.1244) weight_decay: 0.0500 (0.0500) time: 0.6981 data: 0.0003 max mem: 57114 Epoch: [144] [110/156] eta: 0:00:34 lr: 0.004683 min_lr: 0.004683 loss: 3.2904 (3.1330) weight_decay: 0.0500 (0.0500) time: 0.6984 data: 0.0003 max mem: 57114 Epoch: [144] [120/156] eta: 0:00:26 lr: 0.004681 min_lr: 0.004681 loss: 3.3853 (3.1293) weight_decay: 0.0500 (0.0500) time: 0.7076 data: 0.0004 max mem: 57114 Epoch: [144] [130/156] eta: 0:00:19 lr: 0.004678 min_lr: 0.004678 loss: 2.8310 (3.0982) weight_decay: 0.0500 (0.0500) time: 0.6999 data: 0.0008 max mem: 57114 Epoch: [144] [140/156] eta: 0:00:11 lr: 0.004675 min_lr: 0.004675 loss: 2.9535 (3.1090) weight_decay: 0.0500 (0.0500) time: 0.6885 data: 0.0007 max mem: 57114 Epoch: [144] [150/156] eta: 0:00:04 lr: 0.004672 min_lr: 0.004672 loss: 3.2450 (3.1111) weight_decay: 0.0500 (0.0500) time: 0.6898 data: 0.0001 max mem: 57114 Epoch: [144] [155/156] eta: 0:00:00 lr: 0.004671 min_lr: 0.004671 loss: 3.2223 (3.1076) weight_decay: 0.0500 (0.0500) time: 0.6880 data: 0.0001 max mem: 57114 Epoch: [144] Total time: 0:01:54 (0.7342 s / it) Averaged stats: lr: 0.004671 min_lr: 0.004671 loss: 3.2223 (3.1302) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7568 (0.7568) acc1: 88.5417 (88.5417) acc5: 97.3958 (97.3958) time: 2.1194 data: 1.8636 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9657 (0.9194) acc1: 79.6875 (80.0512) acc5: 96.3542 (94.8849) time: 0.5746 data: 0.3728 max mem: 57114 Test: Total time: 0:00:02 (0.5974 s / it) * Acc@1 79.124 Acc@5 94.829 loss 1.019 Accuracy of the model on the 50000 test images: 79.1% Max accuracy: 79.12% Test: [0/5] eta: 0:00:09 loss: 1.0968 (1.0968) acc1: 73.4375 (73.4375) acc5: 90.6250 (90.6250) time: 1.9434 data: 1.7001 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.4250 (1.3829) acc1: 66.6667 (64.5780) acc5: 89.0625 (84.9105) time: 0.5394 data: 0.3401 max mem: 57114 Test: Total time: 0:00:02 (0.5500 s / it) * Acc@1 61.345 Acc@5 83.951 loss 1.643 Accuracy of the model EMA on 50000 test images: 61.3% Max EMA accuracy: 61.35% Epoch: [145] [ 0/156] eta: 0:09:08 lr: 0.004670 min_lr: 0.004670 loss: 3.4100 (3.4100) weight_decay: 0.0500 (0.0500) time: 3.5160 data: 2.8687 max mem: 57114 Epoch: [145] [ 10/156] eta: 0:02:22 lr: 0.004668 min_lr: 0.004668 loss: 3.4100 (3.3075) weight_decay: 0.0500 (0.0500) time: 0.9773 data: 0.2611 max mem: 57114 Epoch: [145] [ 20/156] eta: 0:01:56 lr: 0.004665 min_lr: 0.004665 loss: 3.3183 (3.3485) weight_decay: 0.0500 (0.0500) time: 0.7268 data: 0.0003 max mem: 57114 Epoch: [145] [ 30/156] eta: 0:01:42 lr: 0.004662 min_lr: 0.004662 loss: 3.1847 (3.2489) weight_decay: 0.0500 (0.0500) time: 0.7243 data: 0.0004 max mem: 57114 Epoch: [145] [ 40/156] eta: 0:01:32 lr: 0.004659 min_lr: 0.004659 loss: 3.3433 (3.2610) weight_decay: 0.0500 (0.0500) time: 0.7272 data: 0.0004 max mem: 57114 Epoch: [145] [ 50/156] eta: 0:01:23 lr: 0.004656 min_lr: 0.004656 loss: 3.3471 (3.2187) weight_decay: 0.0500 (0.0500) time: 0.7476 data: 0.0004 max mem: 57114 Epoch: [145] [ 60/156] eta: 0:01:14 lr: 0.004653 min_lr: 0.004653 loss: 3.0658 (3.1809) weight_decay: 0.0500 (0.0500) time: 0.7375 data: 0.0004 max mem: 57114 Epoch: [145] [ 70/156] eta: 0:01:06 lr: 0.004651 min_lr: 0.004651 loss: 2.6775 (3.0947) weight_decay: 0.0500 (0.0500) time: 0.7201 data: 0.0004 max mem: 57114 Epoch: [145] [ 80/156] eta: 0:00:57 lr: 0.004648 min_lr: 0.004648 loss: 2.7215 (3.0874) weight_decay: 0.0500 (0.0500) time: 0.7155 data: 0.0004 max mem: 57114 Epoch: [145] [ 90/156] eta: 0:00:49 lr: 0.004645 min_lr: 0.004645 loss: 3.1032 (3.0770) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0003 max mem: 57114 Epoch: [145] [100/156] eta: 0:00:41 lr: 0.004642 min_lr: 0.004642 loss: 3.2003 (3.0851) weight_decay: 0.0500 (0.0500) time: 0.7001 data: 0.0004 max mem: 57114 Epoch: [145] [110/156] eta: 0:00:34 lr: 0.004639 min_lr: 0.004639 loss: 3.2414 (3.0863) weight_decay: 0.0500 (0.0500) time: 0.7046 data: 0.0004 max mem: 57114 Epoch: [145] [120/156] eta: 0:00:26 lr: 0.004636 min_lr: 0.004636 loss: 3.1414 (3.0812) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [145] [130/156] eta: 0:00:19 lr: 0.004634 min_lr: 0.004634 loss: 3.2502 (3.0955) weight_decay: 0.0500 (0.0500) time: 0.6985 data: 0.0006 max mem: 57114 Epoch: [145] [140/156] eta: 0:00:11 lr: 0.004631 min_lr: 0.004631 loss: 3.3870 (3.1077) weight_decay: 0.0500 (0.0500) time: 0.6898 data: 0.0005 max mem: 57114 Epoch: [145] [150/156] eta: 0:00:04 lr: 0.004628 min_lr: 0.004628 loss: 3.2476 (3.1024) weight_decay: 0.0500 (0.0500) time: 0.6841 data: 0.0001 max mem: 57114 Epoch: [145] [155/156] eta: 0:00:00 lr: 0.004626 min_lr: 0.004626 loss: 3.0972 (3.0957) weight_decay: 0.0500 (0.0500) time: 0.6824 data: 0.0001 max mem: 57114 Epoch: [145] Total time: 0:01:54 (0.7319 s / it) Averaged stats: lr: 0.004626 min_lr: 0.004626 loss: 3.0972 (3.1261) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8940 (0.8940) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.1028 data: 1.8472 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9510 (1.0006) acc1: 82.2917 (79.7954) acc5: 96.3542 (94.6292) time: 0.5712 data: 0.3695 max mem: 57114 Test: Total time: 0:00:02 (0.5917 s / it) * Acc@1 78.359 Acc@5 94.369 loss 1.093 Accuracy of the model on the 50000 test images: 78.4% Max accuracy: 79.12% Test: [0/5] eta: 0:00:11 loss: 1.0667 (1.0667) acc1: 74.4792 (74.4792) acc5: 91.6667 (91.6667) time: 2.3294 data: 2.0857 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.3850 (1.3464) acc1: 67.1875 (65.7289) acc5: 89.5833 (85.5499) time: 0.6165 data: 0.4172 max mem: 57114 Test: Total time: 0:00:03 (0.6290 s / it) * Acc@1 62.156 Acc@5 84.561 loss 1.602 Accuracy of the model EMA on 50000 test images: 62.2% Max EMA accuracy: 62.16% Epoch: [146] [ 0/156] eta: 0:07:12 lr: 0.004626 min_lr: 0.004626 loss: 3.4905 (3.4905) weight_decay: 0.0500 (0.0500) time: 2.7696 data: 2.1191 max mem: 57114 Epoch: [146] [ 10/156] eta: 0:02:13 lr: 0.004623 min_lr: 0.004623 loss: 3.4399 (3.3732) weight_decay: 0.0500 (0.0500) time: 0.9132 data: 0.1929 max mem: 57114 Epoch: [146] [ 20/156] eta: 0:01:51 lr: 0.004620 min_lr: 0.004620 loss: 3.2715 (3.1972) weight_decay: 0.0500 (0.0500) time: 0.7249 data: 0.0003 max mem: 57114 Epoch: [146] [ 30/156] eta: 0:01:38 lr: 0.004618 min_lr: 0.004618 loss: 3.3243 (3.2619) weight_decay: 0.0500 (0.0500) time: 0.7147 data: 0.0003 max mem: 57114 Epoch: [146] [ 40/156] eta: 0:01:29 lr: 0.004615 min_lr: 0.004615 loss: 3.3996 (3.2326) weight_decay: 0.0500 (0.0500) time: 0.7130 data: 0.0004 max mem: 57114 Epoch: [146] [ 50/156] eta: 0:01:20 lr: 0.004612 min_lr: 0.004612 loss: 3.0898 (3.1627) weight_decay: 0.0500 (0.0500) time: 0.7223 data: 0.0003 max mem: 57114 Epoch: [146] [ 60/156] eta: 0:01:12 lr: 0.004609 min_lr: 0.004609 loss: 2.9559 (3.1771) weight_decay: 0.0500 (0.0500) time: 0.7235 data: 0.0004 max mem: 57114 Epoch: [146] [ 70/156] eta: 0:01:04 lr: 0.004606 min_lr: 0.004606 loss: 3.2465 (3.1714) weight_decay: 0.0500 (0.0500) time: 0.7132 data: 0.0004 max mem: 57114 Epoch: [146] [ 80/156] eta: 0:00:56 lr: 0.004603 min_lr: 0.004603 loss: 3.2710 (3.1693) weight_decay: 0.0500 (0.0500) time: 0.7101 data: 0.0004 max mem: 57114 Epoch: [146] [ 90/156] eta: 0:00:48 lr: 0.004601 min_lr: 0.004601 loss: 3.1778 (3.1454) weight_decay: 0.0500 (0.0500) time: 0.7080 data: 0.0004 max mem: 57114 Epoch: [146] [100/156] eta: 0:00:41 lr: 0.004598 min_lr: 0.004598 loss: 2.7320 (3.1193) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0004 max mem: 57114 Epoch: [146] [110/156] eta: 0:00:33 lr: 0.004595 min_lr: 0.004595 loss: 3.0096 (3.1134) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0003 max mem: 57114 Epoch: [146] [120/156] eta: 0:00:26 lr: 0.004592 min_lr: 0.004592 loss: 3.2725 (3.1217) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0003 max mem: 57114 Epoch: [146] [130/156] eta: 0:00:18 lr: 0.004589 min_lr: 0.004589 loss: 3.4049 (3.1182) weight_decay: 0.0500 (0.0500) time: 0.7039 data: 0.0008 max mem: 57114 Epoch: [146] [140/156] eta: 0:00:11 lr: 0.004586 min_lr: 0.004586 loss: 3.2805 (3.1281) weight_decay: 0.0500 (0.0500) time: 0.6924 data: 0.0007 max mem: 57114 Epoch: [146] [150/156] eta: 0:00:04 lr: 0.004584 min_lr: 0.004584 loss: 3.1616 (3.1187) weight_decay: 0.0500 (0.0500) time: 0.6850 data: 0.0001 max mem: 57114 Epoch: [146] [155/156] eta: 0:00:00 lr: 0.004582 min_lr: 0.004582 loss: 3.0374 (3.1197) weight_decay: 0.0500 (0.0500) time: 0.6802 data: 0.0001 max mem: 57114 Epoch: [146] Total time: 0:01:52 (0.7242 s / it) Averaged stats: lr: 0.004582 min_lr: 0.004582 loss: 3.0374 (3.1205) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9021 (0.9021) acc1: 81.7708 (81.7708) acc5: 96.3542 (96.3542) time: 2.1534 data: 1.8977 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9230 (0.9576) acc1: 81.2500 (79.0281) acc5: 96.3542 (94.6292) time: 0.5814 data: 0.3796 max mem: 57114 Test: Total time: 0:00:03 (0.6039 s / it) * Acc@1 78.804 Acc@5 94.693 loss 1.025 Accuracy of the model on the 50000 test images: 78.8% Max accuracy: 79.12% Test: [0/5] eta: 0:00:11 loss: 1.0388 (1.0388) acc1: 75.5208 (75.5208) acc5: 92.1875 (92.1875) time: 2.3060 data: 2.0625 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.3454 (1.3108) acc1: 67.7083 (66.3683) acc5: 89.5833 (85.9335) time: 0.6119 data: 0.4126 max mem: 57114 Test: Total time: 0:00:03 (0.6236 s / it) * Acc@1 62.902 Acc@5 85.126 loss 1.563 Accuracy of the model EMA on 50000 test images: 62.9% Max EMA accuracy: 62.90% Epoch: [147] [ 0/156] eta: 0:05:57 lr: 0.004582 min_lr: 0.004582 loss: 3.3232 (3.3232) weight_decay: 0.0500 (0.0500) time: 2.2899 data: 1.6370 max mem: 57114 Epoch: [147] [ 10/156] eta: 0:02:04 lr: 0.004579 min_lr: 0.004579 loss: 3.2422 (3.0401) weight_decay: 0.0500 (0.0500) time: 0.8527 data: 0.1491 max mem: 57114 Epoch: [147] [ 20/156] eta: 0:01:47 lr: 0.004576 min_lr: 0.004576 loss: 3.2422 (3.1094) weight_decay: 0.0500 (0.0500) time: 0.7171 data: 0.0003 max mem: 57114 Epoch: [147] [ 30/156] eta: 0:01:37 lr: 0.004573 min_lr: 0.004573 loss: 3.3630 (3.1208) weight_decay: 0.0500 (0.0500) time: 0.7268 data: 0.0003 max mem: 57114 Epoch: [147] [ 40/156] eta: 0:01:28 lr: 0.004570 min_lr: 0.004570 loss: 3.1622 (3.0687) weight_decay: 0.0500 (0.0500) time: 0.7263 data: 0.0003 max mem: 57114 Epoch: [147] [ 50/156] eta: 0:01:19 lr: 0.004568 min_lr: 0.004568 loss: 3.2055 (3.1095) weight_decay: 0.0500 (0.0500) time: 0.7220 data: 0.0003 max mem: 57114 Epoch: [147] [ 60/156] eta: 0:01:11 lr: 0.004565 min_lr: 0.004565 loss: 3.1824 (3.0794) weight_decay: 0.0500 (0.0500) time: 0.7207 data: 0.0003 max mem: 57114 Epoch: [147] [ 70/156] eta: 0:01:03 lr: 0.004562 min_lr: 0.004562 loss: 3.1918 (3.0906) weight_decay: 0.0500 (0.0500) time: 0.7212 data: 0.0003 max mem: 57114 Epoch: [147] [ 80/156] eta: 0:00:56 lr: 0.004559 min_lr: 0.004559 loss: 3.3017 (3.1217) weight_decay: 0.0500 (0.0500) time: 0.7091 data: 0.0004 max mem: 57114 Epoch: [147] [ 90/156] eta: 0:00:48 lr: 0.004556 min_lr: 0.004556 loss: 3.2772 (3.1143) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0004 max mem: 57114 Epoch: [147] [100/156] eta: 0:00:40 lr: 0.004553 min_lr: 0.004553 loss: 3.0074 (3.0982) weight_decay: 0.0500 (0.0500) time: 0.7036 data: 0.0004 max mem: 57114 Epoch: [147] [110/156] eta: 0:00:33 lr: 0.004550 min_lr: 0.004550 loss: 3.0258 (3.0864) weight_decay: 0.0500 (0.0500) time: 0.7029 data: 0.0004 max mem: 57114 Epoch: [147] [120/156] eta: 0:00:26 lr: 0.004548 min_lr: 0.004548 loss: 2.9980 (3.0755) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0004 max mem: 57114 Epoch: [147] [130/156] eta: 0:00:18 lr: 0.004545 min_lr: 0.004545 loss: 2.7147 (3.0520) weight_decay: 0.0500 (0.0500) time: 0.7001 data: 0.0008 max mem: 57114 Epoch: [147] [140/156] eta: 0:00:11 lr: 0.004542 min_lr: 0.004542 loss: 2.8897 (3.0544) weight_decay: 0.0500 (0.0500) time: 0.6928 data: 0.0006 max mem: 57114 Epoch: [147] [150/156] eta: 0:00:04 lr: 0.004539 min_lr: 0.004539 loss: 3.2841 (3.0603) weight_decay: 0.0500 (0.0500) time: 0.6788 data: 0.0001 max mem: 57114 Epoch: [147] [155/156] eta: 0:00:00 lr: 0.004538 min_lr: 0.004538 loss: 3.2841 (3.0676) weight_decay: 0.0500 (0.0500) time: 0.6753 data: 0.0001 max mem: 57114 Epoch: [147] Total time: 0:01:52 (0.7191 s / it) Averaged stats: lr: 0.004538 min_lr: 0.004538 loss: 3.2841 (3.1258) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8712 (0.8712) acc1: 83.3333 (83.3333) acc5: 96.8750 (96.8750) time: 2.0166 data: 1.7610 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0081 (0.9692) acc1: 81.2500 (79.6675) acc5: 95.8333 (94.7570) time: 0.5539 data: 0.3523 max mem: 57114 Test: Total time: 0:00:02 (0.5759 s / it) * Acc@1 78.896 Acc@5 94.679 loss 1.079 Accuracy of the model on the 50000 test images: 78.9% Max accuracy: 79.12% Test: [0/5] eta: 0:00:11 loss: 1.0134 (1.0134) acc1: 76.5625 (76.5625) acc5: 92.7083 (92.7083) time: 2.2864 data: 2.0428 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.3080 (1.2778) acc1: 69.2708 (67.3913) acc5: 90.1042 (86.3171) time: 0.6080 data: 0.4087 max mem: 57114 Test: Total time: 0:00:03 (0.6185 s / it) * Acc@1 63.639 Acc@5 85.632 loss 1.526 Accuracy of the model EMA on 50000 test images: 63.6% Max EMA accuracy: 63.64% Epoch: [148] [ 0/156] eta: 0:06:59 lr: 0.004537 min_lr: 0.004537 loss: 3.0239 (3.0239) weight_decay: 0.0500 (0.0500) time: 2.6876 data: 2.0393 max mem: 57114 Epoch: [148] [ 10/156] eta: 0:02:13 lr: 0.004535 min_lr: 0.004535 loss: 3.2050 (3.0766) weight_decay: 0.0500 (0.0500) time: 0.9165 data: 0.1857 max mem: 57114 Epoch: [148] [ 20/156] eta: 0:01:50 lr: 0.004532 min_lr: 0.004532 loss: 3.2915 (3.1386) weight_decay: 0.0500 (0.0500) time: 0.7202 data: 0.0003 max mem: 57114 Epoch: [148] [ 30/156] eta: 0:01:39 lr: 0.004529 min_lr: 0.004529 loss: 3.2281 (3.0362) weight_decay: 0.0500 (0.0500) time: 0.7182 data: 0.0004 max mem: 57114 Epoch: [148] [ 40/156] eta: 0:01:29 lr: 0.004526 min_lr: 0.004526 loss: 2.9398 (3.0590) weight_decay: 0.0500 (0.0500) time: 0.7302 data: 0.0004 max mem: 57114 Epoch: [148] [ 50/156] eta: 0:01:20 lr: 0.004523 min_lr: 0.004523 loss: 3.1205 (3.0550) weight_decay: 0.0500 (0.0500) time: 0.7252 data: 0.0004 max mem: 57114 Epoch: [148] [ 60/156] eta: 0:01:12 lr: 0.004520 min_lr: 0.004520 loss: 3.3162 (3.0769) weight_decay: 0.0500 (0.0500) time: 0.7247 data: 0.0004 max mem: 57114 Epoch: [148] [ 70/156] eta: 0:01:04 lr: 0.004517 min_lr: 0.004517 loss: 3.2444 (3.0910) weight_decay: 0.0500 (0.0500) time: 0.7154 data: 0.0003 max mem: 57114 Epoch: [148] [ 80/156] eta: 0:00:56 lr: 0.004515 min_lr: 0.004515 loss: 3.1591 (3.0763) weight_decay: 0.0500 (0.0500) time: 0.7023 data: 0.0003 max mem: 57114 Epoch: [148] [ 90/156] eta: 0:00:48 lr: 0.004512 min_lr: 0.004512 loss: 3.1591 (3.0844) weight_decay: 0.0500 (0.0500) time: 0.7027 data: 0.0003 max mem: 57114 Epoch: [148] [100/156] eta: 0:00:41 lr: 0.004509 min_lr: 0.004509 loss: 3.1704 (3.0623) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0004 max mem: 57114 Epoch: [148] [110/156] eta: 0:00:33 lr: 0.004506 min_lr: 0.004506 loss: 3.2904 (3.0832) weight_decay: 0.0500 (0.0500) time: 0.7127 data: 0.0003 max mem: 57114 Epoch: [148] [120/156] eta: 0:00:26 lr: 0.004503 min_lr: 0.004503 loss: 3.3698 (3.0822) weight_decay: 0.0500 (0.0500) time: 0.7074 data: 0.0004 max mem: 57114 Epoch: [148] [130/156] eta: 0:00:18 lr: 0.004500 min_lr: 0.004500 loss: 3.2531 (3.0860) weight_decay: 0.0500 (0.0500) time: 0.6994 data: 0.0009 max mem: 57114 Epoch: [148] [140/156] eta: 0:00:11 lr: 0.004497 min_lr: 0.004497 loss: 3.2754 (3.0984) weight_decay: 0.0500 (0.0500) time: 0.6879 data: 0.0007 max mem: 57114 Epoch: [148] [150/156] eta: 0:00:04 lr: 0.004495 min_lr: 0.004495 loss: 3.2202 (3.1093) weight_decay: 0.0500 (0.0500) time: 0.6817 data: 0.0001 max mem: 57114 Epoch: [148] [155/156] eta: 0:00:00 lr: 0.004493 min_lr: 0.004493 loss: 3.1817 (3.1064) weight_decay: 0.0500 (0.0500) time: 0.6859 data: 0.0001 max mem: 57114 Epoch: [148] Total time: 0:01:53 (0.7261 s / it) Averaged stats: lr: 0.004493 min_lr: 0.004493 loss: 3.1817 (3.1279) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8114 (0.8114) acc1: 85.9375 (85.9375) acc5: 99.4792 (99.4792) time: 2.0752 data: 1.8192 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0277 (0.9525) acc1: 82.8125 (81.2020) acc5: 97.3958 (96.5473) time: 0.5657 data: 0.3639 max mem: 57114 Test: Total time: 0:00:02 (0.5864 s / it) * Acc@1 79.384 Acc@5 94.867 loss 1.067 Accuracy of the model on the 50000 test images: 79.4% Max accuracy: 79.38% Test: [0/5] eta: 0:00:10 loss: 0.9907 (0.9907) acc1: 77.0833 (77.0833) acc5: 92.7083 (92.7083) time: 2.0184 data: 1.7749 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2729 (1.2470) acc1: 69.2708 (67.7749) acc5: 90.6250 (86.8286) time: 0.5543 data: 0.3551 max mem: 57114 Test: Total time: 0:00:02 (0.5686 s / it) * Acc@1 64.286 Acc@5 86.125 loss 1.491 Accuracy of the model EMA on 50000 test images: 64.3% Max EMA accuracy: 64.29% Epoch: [149] [ 0/156] eta: 0:07:15 lr: 0.004493 min_lr: 0.004493 loss: 2.7677 (2.7677) weight_decay: 0.0500 (0.0500) time: 2.7896 data: 2.1387 max mem: 57114 Epoch: [149] [ 10/156] eta: 0:02:14 lr: 0.004490 min_lr: 0.004490 loss: 3.1821 (3.0584) weight_decay: 0.0500 (0.0500) time: 0.9192 data: 0.1947 max mem: 57114 Epoch: [149] [ 20/156] eta: 0:01:52 lr: 0.004487 min_lr: 0.004487 loss: 3.1821 (3.1679) weight_decay: 0.0500 (0.0500) time: 0.7319 data: 0.0003 max mem: 57114 Epoch: [149] [ 30/156] eta: 0:01:41 lr: 0.004484 min_lr: 0.004484 loss: 3.1439 (3.1599) weight_decay: 0.0500 (0.0500) time: 0.7396 data: 0.0003 max mem: 57114 Epoch: [149] [ 40/156] eta: 0:01:30 lr: 0.004481 min_lr: 0.004481 loss: 3.0870 (3.1089) weight_decay: 0.0500 (0.0500) time: 0.7309 data: 0.0003 max mem: 57114 Epoch: [149] [ 50/156] eta: 0:01:21 lr: 0.004479 min_lr: 0.004479 loss: 3.1570 (3.0855) weight_decay: 0.0500 (0.0500) time: 0.7179 data: 0.0003 max mem: 57114 Epoch: [149] [ 60/156] eta: 0:01:13 lr: 0.004476 min_lr: 0.004476 loss: 3.2388 (3.0814) weight_decay: 0.0500 (0.0500) time: 0.7351 data: 0.0005 max mem: 57114 Epoch: [149] [ 70/156] eta: 0:01:05 lr: 0.004473 min_lr: 0.004473 loss: 3.3638 (3.1187) weight_decay: 0.0500 (0.0500) time: 0.7251 data: 0.0006 max mem: 57114 Epoch: [149] [ 80/156] eta: 0:00:57 lr: 0.004470 min_lr: 0.004470 loss: 3.3755 (3.1272) weight_decay: 0.0500 (0.0500) time: 0.7131 data: 0.0004 max mem: 57114 Epoch: [149] [ 90/156] eta: 0:00:49 lr: 0.004467 min_lr: 0.004467 loss: 3.1422 (3.1140) weight_decay: 0.0500 (0.0500) time: 0.7215 data: 0.0004 max mem: 57114 Epoch: [149] [100/156] eta: 0:00:41 lr: 0.004464 min_lr: 0.004464 loss: 3.1422 (3.1046) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0004 max mem: 57114 Epoch: [149] [110/156] eta: 0:00:34 lr: 0.004461 min_lr: 0.004461 loss: 3.1780 (3.1101) weight_decay: 0.0500 (0.0500) time: 0.7060 data: 0.0004 max mem: 57114 Epoch: [149] [120/156] eta: 0:00:26 lr: 0.004459 min_lr: 0.004459 loss: 3.3198 (3.1094) weight_decay: 0.0500 (0.0500) time: 0.7085 data: 0.0003 max mem: 57114 Epoch: [149] [130/156] eta: 0:00:19 lr: 0.004456 min_lr: 0.004456 loss: 3.0770 (3.0907) weight_decay: 0.0500 (0.0500) time: 0.6976 data: 0.0007 max mem: 57114 Epoch: [149] [140/156] eta: 0:00:11 lr: 0.004453 min_lr: 0.004453 loss: 3.0770 (3.0824) weight_decay: 0.0500 (0.0500) time: 0.6840 data: 0.0006 max mem: 57114 Epoch: [149] [150/156] eta: 0:00:04 lr: 0.004450 min_lr: 0.004450 loss: 3.2259 (3.0889) weight_decay: 0.0500 (0.0500) time: 0.6819 data: 0.0001 max mem: 57114 Epoch: [149] [155/156] eta: 0:00:00 lr: 0.004449 min_lr: 0.004449 loss: 3.1307 (3.0773) weight_decay: 0.0500 (0.0500) time: 0.6835 data: 0.0001 max mem: 57114 Epoch: [149] Total time: 0:01:53 (0.7289 s / it) Averaged stats: lr: 0.004449 min_lr: 0.004449 loss: 3.1307 (3.1106) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6222 (0.6222) acc1: 88.5417 (88.5417) acc5: 99.4792 (99.4792) time: 2.0484 data: 1.7929 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7895 (0.7724) acc1: 84.3750 (80.9463) acc5: 96.8750 (96.1637) time: 0.5603 data: 0.3586 max mem: 57114 Test: Total time: 0:00:02 (0.5813 s / it) * Acc@1 79.638 Acc@5 95.117 loss 0.891 Accuracy of the model on the 50000 test images: 79.6% Max accuracy: 79.64% Test: [0/5] eta: 0:00:10 loss: 0.9690 (0.9690) acc1: 77.0833 (77.0833) acc5: 92.7083 (92.7083) time: 2.1488 data: 1.9052 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2388 (1.2177) acc1: 69.2708 (68.2864) acc5: 91.6667 (87.4680) time: 0.5804 data: 0.3811 max mem: 57114 Test: Total time: 0:00:02 (0.5928 s / it) * Acc@1 64.918 Acc@5 86.583 loss 1.458 Accuracy of the model EMA on 50000 test images: 64.9% Max EMA accuracy: 64.92% Epoch: [150] [ 0/156] eta: 0:06:49 lr: 0.004448 min_lr: 0.004448 loss: 3.4153 (3.4153) weight_decay: 0.0500 (0.0500) time: 2.6261 data: 1.9763 max mem: 57114 Epoch: [150] [ 10/156] eta: 0:02:09 lr: 0.004445 min_lr: 0.004445 loss: 3.1161 (2.8818) weight_decay: 0.0500 (0.0500) time: 0.8877 data: 0.1799 max mem: 57114 Epoch: [150] [ 20/156] eta: 0:01:51 lr: 0.004443 min_lr: 0.004443 loss: 2.9162 (2.8640) weight_decay: 0.0500 (0.0500) time: 0.7272 data: 0.0003 max mem: 57114 Epoch: [150] [ 30/156] eta: 0:01:38 lr: 0.004440 min_lr: 0.004440 loss: 3.1409 (2.9070) weight_decay: 0.0500 (0.0500) time: 0.7238 data: 0.0004 max mem: 57114 Epoch: [150] [ 40/156] eta: 0:01:29 lr: 0.004437 min_lr: 0.004437 loss: 3.1918 (2.9421) weight_decay: 0.0500 (0.0500) time: 0.7175 data: 0.0004 max mem: 57114 Epoch: [150] [ 50/156] eta: 0:01:20 lr: 0.004434 min_lr: 0.004434 loss: 3.1918 (2.9982) weight_decay: 0.0500 (0.0500) time: 0.7229 data: 0.0004 max mem: 57114 Epoch: [150] [ 60/156] eta: 0:01:12 lr: 0.004431 min_lr: 0.004431 loss: 3.3879 (3.0544) weight_decay: 0.0500 (0.0500) time: 0.7373 data: 0.0004 max mem: 57114 Epoch: [150] [ 70/156] eta: 0:01:05 lr: 0.004428 min_lr: 0.004428 loss: 3.3706 (3.0738) weight_decay: 0.0500 (0.0500) time: 0.7488 data: 0.0004 max mem: 57114 Epoch: [150] [ 80/156] eta: 0:00:57 lr: 0.004425 min_lr: 0.004425 loss: 3.2811 (3.0903) weight_decay: 0.0500 (0.0500) time: 0.7293 data: 0.0004 max mem: 57114 Epoch: [150] [ 90/156] eta: 0:00:49 lr: 0.004423 min_lr: 0.004423 loss: 3.2811 (3.0836) weight_decay: 0.0500 (0.0500) time: 0.7106 data: 0.0004 max mem: 57114 Epoch: [150] [100/156] eta: 0:00:41 lr: 0.004420 min_lr: 0.004420 loss: 3.0087 (3.0481) weight_decay: 0.0500 (0.0500) time: 0.7136 data: 0.0003 max mem: 57114 Epoch: [150] [110/156] eta: 0:00:34 lr: 0.004417 min_lr: 0.004417 loss: 2.8160 (3.0454) weight_decay: 0.0500 (0.0500) time: 0.7117 data: 0.0003 max mem: 57114 Epoch: [150] [120/156] eta: 0:00:26 lr: 0.004414 min_lr: 0.004414 loss: 3.1493 (3.0502) weight_decay: 0.0500 (0.0500) time: 0.6993 data: 0.0003 max mem: 57114 Epoch: [150] [130/156] eta: 0:00:19 lr: 0.004411 min_lr: 0.004411 loss: 3.1574 (3.0599) weight_decay: 0.0500 (0.0500) time: 0.6967 data: 0.0007 max mem: 57114 Epoch: [150] [140/156] eta: 0:00:11 lr: 0.004408 min_lr: 0.004408 loss: 3.2648 (3.0569) weight_decay: 0.0500 (0.0500) time: 0.6882 data: 0.0006 max mem: 57114 Epoch: [150] [150/156] eta: 0:00:04 lr: 0.004405 min_lr: 0.004405 loss: 3.2648 (3.0741) weight_decay: 0.0500 (0.0500) time: 0.6824 data: 0.0001 max mem: 57114 Epoch: [150] [155/156] eta: 0:00:00 lr: 0.004404 min_lr: 0.004404 loss: 3.1900 (3.0796) weight_decay: 0.0500 (0.0500) time: 0.6842 data: 0.0001 max mem: 57114 Epoch: [150] Total time: 0:01:53 (0.7273 s / it) Averaged stats: lr: 0.004404 min_lr: 0.004404 loss: 3.1900 (3.1069) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7879 (0.7879) acc1: 84.3750 (84.3750) acc5: 98.4375 (98.4375) time: 2.0447 data: 1.7891 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9576 (0.9562) acc1: 82.8125 (80.4348) acc5: 95.8333 (94.7570) time: 0.5596 data: 0.3579 max mem: 57114 Test: Total time: 0:00:02 (0.5835 s / it) * Acc@1 79.170 Acc@5 94.691 loss 1.052 Accuracy of the model on the 50000 test images: 79.2% Max accuracy: 79.64% Test: [0/5] eta: 0:00:11 loss: 0.9507 (0.9507) acc1: 77.6042 (77.6042) acc5: 92.7083 (92.7083) time: 2.3851 data: 2.1416 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.2080 (1.1915) acc1: 70.3125 (69.1816) acc5: 91.6667 (87.5959) time: 0.6280 data: 0.4285 max mem: 57114 Test: Total time: 0:00:03 (0.6432 s / it) * Acc@1 65.525 Acc@5 86.959 loss 1.429 Accuracy of the model EMA on 50000 test images: 65.5% Max EMA accuracy: 65.53% Epoch: [151] [ 0/156] eta: 0:07:53 lr: 0.004404 min_lr: 0.004404 loss: 3.0349 (3.0349) weight_decay: 0.0500 (0.0500) time: 3.0326 data: 2.3851 max mem: 57114 Epoch: [151] [ 10/156] eta: 0:02:15 lr: 0.004401 min_lr: 0.004401 loss: 3.2505 (3.1618) weight_decay: 0.0500 (0.0500) time: 0.9299 data: 0.2171 max mem: 57114 Epoch: [151] [ 20/156] eta: 0:01:52 lr: 0.004398 min_lr: 0.004398 loss: 3.2226 (3.1840) weight_decay: 0.0500 (0.0500) time: 0.7184 data: 0.0003 max mem: 57114 Epoch: [151] [ 30/156] eta: 0:01:40 lr: 0.004395 min_lr: 0.004395 loss: 3.1761 (3.1598) weight_decay: 0.0500 (0.0500) time: 0.7233 data: 0.0003 max mem: 57114 Epoch: [151] [ 40/156] eta: 0:01:30 lr: 0.004392 min_lr: 0.004392 loss: 3.2095 (3.1906) weight_decay: 0.0500 (0.0500) time: 0.7232 data: 0.0004 max mem: 57114 Epoch: [151] [ 50/156] eta: 0:01:21 lr: 0.004389 min_lr: 0.004389 loss: 3.4740 (3.2007) weight_decay: 0.0500 (0.0500) time: 0.7248 data: 0.0004 max mem: 57114 Epoch: [151] [ 60/156] eta: 0:01:13 lr: 0.004387 min_lr: 0.004387 loss: 3.4466 (3.2161) weight_decay: 0.0500 (0.0500) time: 0.7274 data: 0.0004 max mem: 57114 Epoch: [151] [ 70/156] eta: 0:01:04 lr: 0.004384 min_lr: 0.004384 loss: 3.3334 (3.2168) weight_decay: 0.0500 (0.0500) time: 0.7211 data: 0.0004 max mem: 57114 Epoch: [151] [ 80/156] eta: 0:00:57 lr: 0.004381 min_lr: 0.004381 loss: 3.0838 (3.1976) weight_decay: 0.0500 (0.0500) time: 0.7176 data: 0.0004 max mem: 57114 Epoch: [151] [ 90/156] eta: 0:00:49 lr: 0.004378 min_lr: 0.004378 loss: 3.1199 (3.1833) weight_decay: 0.0500 (0.0500) time: 0.7091 data: 0.0004 max mem: 57114 Epoch: [151] [100/156] eta: 0:00:41 lr: 0.004375 min_lr: 0.004375 loss: 2.9111 (3.1478) weight_decay: 0.0500 (0.0500) time: 0.7040 data: 0.0004 max mem: 57114 Epoch: [151] [110/156] eta: 0:00:33 lr: 0.004372 min_lr: 0.004372 loss: 3.2341 (3.1616) weight_decay: 0.0500 (0.0500) time: 0.7059 data: 0.0003 max mem: 57114 Epoch: [151] [120/156] eta: 0:00:26 lr: 0.004369 min_lr: 0.004369 loss: 3.3910 (3.1681) weight_decay: 0.0500 (0.0500) time: 0.7106 data: 0.0003 max mem: 57114 Epoch: [151] [130/156] eta: 0:00:19 lr: 0.004366 min_lr: 0.004366 loss: 3.3630 (3.1619) weight_decay: 0.0500 (0.0500) time: 0.7026 data: 0.0009 max mem: 57114 Epoch: [151] [140/156] eta: 0:00:11 lr: 0.004364 min_lr: 0.004364 loss: 3.1003 (3.1583) weight_decay: 0.0500 (0.0500) time: 0.6929 data: 0.0007 max mem: 57114 Epoch: [151] [150/156] eta: 0:00:04 lr: 0.004361 min_lr: 0.004361 loss: 3.0285 (3.1433) weight_decay: 0.0500 (0.0500) time: 0.6882 data: 0.0001 max mem: 57114 Epoch: [151] [155/156] eta: 0:00:00 lr: 0.004359 min_lr: 0.004359 loss: 3.1003 (3.1477) weight_decay: 0.0500 (0.0500) time: 0.6845 data: 0.0001 max mem: 57114 Epoch: [151] Total time: 0:01:53 (0.7274 s / it) Averaged stats: lr: 0.004359 min_lr: 0.004359 loss: 3.1003 (3.0998) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7673 (0.7673) acc1: 86.9792 (86.9792) acc5: 97.9167 (97.9167) time: 2.1844 data: 1.9286 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0297 (0.9245) acc1: 80.2083 (80.0512) acc5: 96.3542 (95.3964) time: 0.5889 data: 0.3860 max mem: 57114 Test: Total time: 0:00:03 (0.6125 s / it) * Acc@1 79.034 Acc@5 94.849 loss 1.022 Accuracy of the model on the 50000 test images: 79.0% Max accuracy: 79.64% Test: [0/5] eta: 0:00:11 loss: 0.9323 (0.9323) acc1: 77.6042 (77.6042) acc5: 93.2292 (93.2292) time: 2.3782 data: 2.1348 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1777 (1.1651) acc1: 70.8333 (69.4373) acc5: 91.6667 (87.7238) time: 0.6263 data: 0.4270 max mem: 57114 Test: Total time: 0:00:03 (0.6360 s / it) * Acc@1 66.095 Acc@5 87.388 loss 1.399 Accuracy of the model EMA on 50000 test images: 66.1% Max EMA accuracy: 66.09% Epoch: [152] [ 0/156] eta: 0:07:01 lr: 0.004359 min_lr: 0.004359 loss: 3.2770 (3.2770) weight_decay: 0.0500 (0.0500) time: 2.7037 data: 2.0464 max mem: 57114 Epoch: [152] [ 10/156] eta: 0:02:09 lr: 0.004356 min_lr: 0.004356 loss: 3.2770 (3.1858) weight_decay: 0.0500 (0.0500) time: 0.8849 data: 0.1863 max mem: 57114 Epoch: [152] [ 20/156] eta: 0:01:50 lr: 0.004353 min_lr: 0.004353 loss: 3.1422 (3.0597) weight_decay: 0.0500 (0.0500) time: 0.7142 data: 0.0004 max mem: 57114 Epoch: [152] [ 30/156] eta: 0:01:38 lr: 0.004350 min_lr: 0.004350 loss: 3.0356 (3.0692) weight_decay: 0.0500 (0.0500) time: 0.7278 data: 0.0004 max mem: 57114 Epoch: [152] [ 40/156] eta: 0:01:28 lr: 0.004348 min_lr: 0.004348 loss: 3.0181 (3.0611) weight_decay: 0.0500 (0.0500) time: 0.7220 data: 0.0004 max mem: 57114 Epoch: [152] [ 50/156] eta: 0:01:20 lr: 0.004345 min_lr: 0.004345 loss: 3.0181 (3.0527) weight_decay: 0.0500 (0.0500) time: 0.7292 data: 0.0004 max mem: 57114 Epoch: [152] [ 60/156] eta: 0:01:12 lr: 0.004342 min_lr: 0.004342 loss: 3.0464 (3.0615) weight_decay: 0.0500 (0.0500) time: 0.7344 data: 0.0004 max mem: 57114 Epoch: [152] [ 70/156] eta: 0:01:04 lr: 0.004339 min_lr: 0.004339 loss: 2.9665 (3.0345) weight_decay: 0.0500 (0.0500) time: 0.7158 data: 0.0004 max mem: 57114 Epoch: [152] [ 80/156] eta: 0:00:56 lr: 0.004336 min_lr: 0.004336 loss: 2.8148 (3.0220) weight_decay: 0.0500 (0.0500) time: 0.7017 data: 0.0004 max mem: 57114 Epoch: [152] [ 90/156] eta: 0:00:48 lr: 0.004333 min_lr: 0.004333 loss: 3.0657 (3.0196) weight_decay: 0.0500 (0.0500) time: 0.7003 data: 0.0003 max mem: 57114 Epoch: [152] [100/156] eta: 0:00:41 lr: 0.004330 min_lr: 0.004330 loss: 3.1491 (3.0176) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0003 max mem: 57114 Epoch: [152] [110/156] eta: 0:00:33 lr: 0.004327 min_lr: 0.004327 loss: 3.0320 (3.0094) weight_decay: 0.0500 (0.0500) time: 0.7113 data: 0.0003 max mem: 57114 Epoch: [152] [120/156] eta: 0:00:26 lr: 0.004325 min_lr: 0.004325 loss: 3.2070 (3.0261) weight_decay: 0.0500 (0.0500) time: 0.7118 data: 0.0004 max mem: 57114 Epoch: [152] [130/156] eta: 0:00:18 lr: 0.004322 min_lr: 0.004322 loss: 3.1923 (3.0253) weight_decay: 0.0500 (0.0500) time: 0.7009 data: 0.0008 max mem: 57114 Epoch: [152] [140/156] eta: 0:00:11 lr: 0.004319 min_lr: 0.004319 loss: 3.0490 (3.0154) weight_decay: 0.0500 (0.0500) time: 0.6909 data: 0.0007 max mem: 57114 Epoch: [152] [150/156] eta: 0:00:04 lr: 0.004316 min_lr: 0.004316 loss: 3.0490 (3.0335) weight_decay: 0.0500 (0.0500) time: 0.6810 data: 0.0001 max mem: 57114 Epoch: [152] [155/156] eta: 0:00:00 lr: 0.004315 min_lr: 0.004315 loss: 3.4591 (3.0506) weight_decay: 0.0500 (0.0500) time: 0.6867 data: 0.0001 max mem: 57114 Epoch: [152] Total time: 0:01:52 (0.7237 s / it) Averaged stats: lr: 0.004315 min_lr: 0.004315 loss: 3.4591 (3.0984) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9663 (0.9663) acc1: 85.4167 (85.4167) acc5: 97.3958 (97.3958) time: 2.1707 data: 1.9149 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0904 (1.0302) acc1: 80.2083 (80.6905) acc5: 96.3542 (95.6522) time: 0.5851 data: 0.3831 max mem: 57114 Test: Total time: 0:00:03 (0.6072 s / it) * Acc@1 78.844 Acc@5 94.731 loss 1.156 Accuracy of the model on the 50000 test images: 78.8% Max accuracy: 79.64% Test: [0/5] eta: 0:00:11 loss: 0.9156 (0.9156) acc1: 78.1250 (78.1250) acc5: 93.2292 (93.2292) time: 2.3028 data: 2.0592 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1502 (1.1409) acc1: 71.8750 (69.9489) acc5: 91.6667 (88.2353) time: 0.6112 data: 0.4119 max mem: 57114 Test: Total time: 0:00:03 (0.6224 s / it) * Acc@1 66.600 Acc@5 87.728 loss 1.372 Accuracy of the model EMA on 50000 test images: 66.6% Max EMA accuracy: 66.60% Epoch: [153] [ 0/156] eta: 0:07:16 lr: 0.004314 min_lr: 0.004314 loss: 2.9710 (2.9710) weight_decay: 0.0500 (0.0500) time: 2.7977 data: 2.1318 max mem: 57114 Epoch: [153] [ 10/156] eta: 0:02:13 lr: 0.004311 min_lr: 0.004311 loss: 3.1404 (2.9449) weight_decay: 0.0500 (0.0500) time: 0.9177 data: 0.1941 max mem: 57114 Epoch: [153] [ 20/156] eta: 0:01:52 lr: 0.004309 min_lr: 0.004309 loss: 3.2341 (3.0885) weight_decay: 0.0500 (0.0500) time: 0.7276 data: 0.0003 max mem: 57114 Epoch: [153] [ 30/156] eta: 0:01:39 lr: 0.004306 min_lr: 0.004306 loss: 3.2607 (3.1011) weight_decay: 0.0500 (0.0500) time: 0.7210 data: 0.0004 max mem: 57114 Epoch: [153] [ 40/156] eta: 0:01:30 lr: 0.004303 min_lr: 0.004303 loss: 3.1171 (3.0996) weight_decay: 0.0500 (0.0500) time: 0.7273 data: 0.0004 max mem: 57114 Epoch: [153] [ 50/156] eta: 0:01:20 lr: 0.004300 min_lr: 0.004300 loss: 3.1171 (3.0975) weight_decay: 0.0500 (0.0500) time: 0.7214 data: 0.0004 max mem: 57114 Epoch: [153] [ 60/156] eta: 0:01:12 lr: 0.004297 min_lr: 0.004297 loss: 3.1782 (3.1016) weight_decay: 0.0500 (0.0500) time: 0.7146 data: 0.0004 max mem: 57114 Epoch: [153] [ 70/156] eta: 0:01:04 lr: 0.004294 min_lr: 0.004294 loss: 3.2102 (3.1211) weight_decay: 0.0500 (0.0500) time: 0.7192 data: 0.0004 max mem: 57114 Epoch: [153] [ 80/156] eta: 0:00:56 lr: 0.004291 min_lr: 0.004291 loss: 3.2102 (3.1154) weight_decay: 0.0500 (0.0500) time: 0.7065 data: 0.0004 max mem: 57114 Epoch: [153] [ 90/156] eta: 0:00:48 lr: 0.004288 min_lr: 0.004288 loss: 3.1995 (3.1313) weight_decay: 0.0500 (0.0500) time: 0.6980 data: 0.0004 max mem: 57114 Epoch: [153] [100/156] eta: 0:00:41 lr: 0.004286 min_lr: 0.004286 loss: 3.2563 (3.1444) weight_decay: 0.0500 (0.0500) time: 0.7062 data: 0.0004 max mem: 57114 Epoch: [153] [110/156] eta: 0:00:33 lr: 0.004283 min_lr: 0.004283 loss: 3.2032 (3.1316) weight_decay: 0.0500 (0.0500) time: 0.7103 data: 0.0004 max mem: 57114 Epoch: [153] [120/156] eta: 0:00:26 lr: 0.004280 min_lr: 0.004280 loss: 3.2032 (3.1356) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0004 max mem: 57114 Epoch: [153] [130/156] eta: 0:00:18 lr: 0.004277 min_lr: 0.004277 loss: 3.1366 (3.1288) weight_decay: 0.0500 (0.0500) time: 0.6982 data: 0.0010 max mem: 57114 Epoch: [153] [140/156] eta: 0:00:11 lr: 0.004274 min_lr: 0.004274 loss: 3.2972 (3.1467) weight_decay: 0.0500 (0.0500) time: 0.6874 data: 0.0008 max mem: 57114 Epoch: [153] [150/156] eta: 0:00:04 lr: 0.004271 min_lr: 0.004271 loss: 3.3365 (3.1511) weight_decay: 0.0500 (0.0500) time: 0.6857 data: 0.0001 max mem: 57114 Epoch: [153] [155/156] eta: 0:00:00 lr: 0.004270 min_lr: 0.004270 loss: 3.3018 (3.1457) weight_decay: 0.0500 (0.0500) time: 0.6853 data: 0.0001 max mem: 57114 Epoch: [153] Total time: 0:01:52 (0.7238 s / it) Averaged stats: lr: 0.004270 min_lr: 0.004270 loss: 3.3018 (3.0928) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.8714 (0.8714) acc1: 87.5000 (87.5000) acc5: 97.3958 (97.3958) time: 2.2330 data: 1.9767 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9757 (0.9677) acc1: 80.2083 (80.4348) acc5: 97.3958 (95.5243) time: 0.5973 data: 0.3954 max mem: 57114 Test: Total time: 0:00:03 (0.6187 s / it) * Acc@1 79.232 Acc@5 95.009 loss 1.083 Accuracy of the model on the 50000 test images: 79.2% Max accuracy: 79.64% Test: [0/5] eta: 0:00:11 loss: 0.8993 (0.8993) acc1: 78.1250 (78.1250) acc5: 93.2292 (93.2292) time: 2.2856 data: 2.0421 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1235 (1.1171) acc1: 71.8750 (70.2046) acc5: 91.6667 (88.3632) time: 0.6078 data: 0.4085 max mem: 57114 Test: Total time: 0:00:03 (0.6241 s / it) * Acc@1 67.056 Acc@5 88.045 loss 1.345 Accuracy of the model EMA on 50000 test images: 67.1% Max EMA accuracy: 67.06% Epoch: [154] [ 0/156] eta: 0:06:38 lr: 0.004270 min_lr: 0.004270 loss: 3.5936 (3.5936) weight_decay: 0.0500 (0.0500) time: 2.5569 data: 1.9073 max mem: 57114 Epoch: [154] [ 10/156] eta: 0:02:08 lr: 0.004267 min_lr: 0.004267 loss: 2.6284 (2.8113) weight_decay: 0.0500 (0.0500) time: 0.8773 data: 0.1737 max mem: 57114 Epoch: [154] [ 20/156] eta: 0:01:50 lr: 0.004264 min_lr: 0.004264 loss: 2.9721 (2.9647) weight_decay: 0.0500 (0.0500) time: 0.7266 data: 0.0003 max mem: 57114 Epoch: [154] [ 30/156] eta: 0:01:39 lr: 0.004261 min_lr: 0.004261 loss: 3.2273 (3.0530) weight_decay: 0.0500 (0.0500) time: 0.7444 data: 0.0003 max mem: 57114 Epoch: [154] [ 40/156] eta: 0:01:29 lr: 0.004258 min_lr: 0.004258 loss: 3.2273 (3.0192) weight_decay: 0.0500 (0.0500) time: 0.7327 data: 0.0004 max mem: 57114 Epoch: [154] [ 50/156] eta: 0:01:20 lr: 0.004255 min_lr: 0.004255 loss: 2.9240 (3.0215) weight_decay: 0.0500 (0.0500) time: 0.7199 data: 0.0004 max mem: 57114 Epoch: [154] [ 60/156] eta: 0:01:12 lr: 0.004252 min_lr: 0.004252 loss: 2.9736 (3.0166) weight_decay: 0.0500 (0.0500) time: 0.7233 data: 0.0004 max mem: 57114 Epoch: [154] [ 70/156] eta: 0:01:04 lr: 0.004249 min_lr: 0.004249 loss: 3.2635 (3.0681) weight_decay: 0.0500 (0.0500) time: 0.7203 data: 0.0004 max mem: 57114 Epoch: [154] [ 80/156] eta: 0:00:56 lr: 0.004247 min_lr: 0.004247 loss: 3.2402 (3.0760) weight_decay: 0.0500 (0.0500) time: 0.7139 data: 0.0004 max mem: 57114 Epoch: [154] [ 90/156] eta: 0:00:49 lr: 0.004244 min_lr: 0.004244 loss: 3.1331 (3.0772) weight_decay: 0.0500 (0.0500) time: 0.7211 data: 0.0004 max mem: 57114 Epoch: [154] [100/156] eta: 0:00:41 lr: 0.004241 min_lr: 0.004241 loss: 3.1242 (3.0573) weight_decay: 0.0500 (0.0500) time: 0.7242 data: 0.0003 max mem: 57114 Epoch: [154] [110/156] eta: 0:00:33 lr: 0.004238 min_lr: 0.004238 loss: 3.1749 (3.0674) weight_decay: 0.0500 (0.0500) time: 0.7121 data: 0.0004 max mem: 57114 Epoch: [154] [120/156] eta: 0:00:26 lr: 0.004235 min_lr: 0.004235 loss: 3.1842 (3.0806) weight_decay: 0.0500 (0.0500) time: 0.7035 data: 0.0004 max mem: 57114 Epoch: [154] [130/156] eta: 0:00:19 lr: 0.004232 min_lr: 0.004232 loss: 3.2143 (3.0834) weight_decay: 0.0500 (0.0500) time: 0.7039 data: 0.0009 max mem: 57114 Epoch: [154] [140/156] eta: 0:00:11 lr: 0.004229 min_lr: 0.004229 loss: 3.3321 (3.1072) weight_decay: 0.0500 (0.0500) time: 0.6968 data: 0.0008 max mem: 57114 Epoch: [154] [150/156] eta: 0:00:04 lr: 0.004226 min_lr: 0.004226 loss: 3.2878 (3.0969) weight_decay: 0.0500 (0.0500) time: 0.6877 data: 0.0001 max mem: 57114 Epoch: [154] [155/156] eta: 0:00:00 lr: 0.004225 min_lr: 0.004225 loss: 3.2372 (3.1024) weight_decay: 0.0500 (0.0500) time: 0.6884 data: 0.0001 max mem: 57114 Epoch: [154] Total time: 0:01:53 (0.7286 s / it) Averaged stats: lr: 0.004225 min_lr: 0.004225 loss: 3.2372 (3.0908) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8690 (0.8690) acc1: 85.9375 (85.9375) acc5: 97.9167 (97.9167) time: 2.0550 data: 1.7991 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9770 (0.9445) acc1: 81.7708 (80.0512) acc5: 96.8750 (95.2685) time: 0.5617 data: 0.3599 max mem: 57114 Test: Total time: 0:00:02 (0.5835 s / it) * Acc@1 79.168 Acc@5 94.905 loss 1.069 Accuracy of the model on the 50000 test images: 79.2% Max accuracy: 79.64% Test: [0/5] eta: 0:00:11 loss: 0.8838 (0.8838) acc1: 78.6458 (78.6458) acc5: 93.2292 (93.2292) time: 2.2843 data: 2.0408 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0992 (1.0954) acc1: 71.8750 (70.7161) acc5: 91.6667 (88.6189) time: 0.6075 data: 0.4082 max mem: 57114 Test: Total time: 0:00:03 (0.6222 s / it) * Acc@1 67.539 Acc@5 88.405 loss 1.321 Accuracy of the model EMA on 50000 test images: 67.5% Max EMA accuracy: 67.54% Epoch: [155] [ 0/156] eta: 0:08:35 lr: 0.004225 min_lr: 0.004225 loss: 3.3469 (3.3469) weight_decay: 0.0500 (0.0500) time: 3.3022 data: 2.6465 max mem: 57114 Epoch: [155] [ 10/156] eta: 0:02:20 lr: 0.004222 min_lr: 0.004222 loss: 3.1917 (3.1237) weight_decay: 0.0500 (0.0500) time: 0.9634 data: 0.2409 max mem: 57114 Epoch: [155] [ 20/156] eta: 0:01:55 lr: 0.004219 min_lr: 0.004219 loss: 3.2364 (3.1850) weight_decay: 0.0500 (0.0500) time: 0.7245 data: 0.0003 max mem: 57114 Epoch: [155] [ 30/156] eta: 0:01:41 lr: 0.004216 min_lr: 0.004216 loss: 3.2364 (3.1401) weight_decay: 0.0500 (0.0500) time: 0.7226 data: 0.0003 max mem: 57114 Epoch: [155] [ 40/156] eta: 0:01:31 lr: 0.004213 min_lr: 0.004213 loss: 2.9351 (3.0764) weight_decay: 0.0500 (0.0500) time: 0.7250 data: 0.0003 max mem: 57114 Epoch: [155] [ 50/156] eta: 0:01:22 lr: 0.004210 min_lr: 0.004210 loss: 3.0090 (3.0571) weight_decay: 0.0500 (0.0500) time: 0.7271 data: 0.0004 max mem: 57114 Epoch: [155] [ 60/156] eta: 0:01:13 lr: 0.004208 min_lr: 0.004208 loss: 3.0489 (3.0349) weight_decay: 0.0500 (0.0500) time: 0.7253 data: 0.0004 max mem: 57114 Epoch: [155] [ 70/156] eta: 0:01:05 lr: 0.004205 min_lr: 0.004205 loss: 3.1368 (3.0409) weight_decay: 0.0500 (0.0500) time: 0.7254 data: 0.0004 max mem: 57114 Epoch: [155] [ 80/156] eta: 0:00:57 lr: 0.004202 min_lr: 0.004202 loss: 3.1368 (3.0455) weight_decay: 0.0500 (0.0500) time: 0.7194 data: 0.0003 max mem: 57114 Epoch: [155] [ 90/156] eta: 0:00:49 lr: 0.004199 min_lr: 0.004199 loss: 3.1672 (3.0663) weight_decay: 0.0500 (0.0500) time: 0.7107 data: 0.0004 max mem: 57114 Epoch: [155] [100/156] eta: 0:00:41 lr: 0.004196 min_lr: 0.004196 loss: 3.2385 (3.0869) weight_decay: 0.0500 (0.0500) time: 0.7098 data: 0.0004 max mem: 57114 Epoch: [155] [110/156] eta: 0:00:34 lr: 0.004193 min_lr: 0.004193 loss: 3.1082 (3.0725) weight_decay: 0.0500 (0.0500) time: 0.7069 data: 0.0004 max mem: 57114 Epoch: [155] [120/156] eta: 0:00:26 lr: 0.004190 min_lr: 0.004190 loss: 3.0384 (3.0810) weight_decay: 0.0500 (0.0500) time: 0.7085 data: 0.0004 max mem: 57114 Epoch: [155] [130/156] eta: 0:00:19 lr: 0.004187 min_lr: 0.004187 loss: 3.3302 (3.0979) weight_decay: 0.0500 (0.0500) time: 0.7068 data: 0.0009 max mem: 57114 Epoch: [155] [140/156] eta: 0:00:11 lr: 0.004185 min_lr: 0.004185 loss: 3.1854 (3.1021) weight_decay: 0.0500 (0.0500) time: 0.6920 data: 0.0007 max mem: 57114 Epoch: [155] [150/156] eta: 0:00:04 lr: 0.004182 min_lr: 0.004182 loss: 3.1536 (3.1081) weight_decay: 0.0500 (0.0500) time: 0.6824 data: 0.0001 max mem: 57114 Epoch: [155] [155/156] eta: 0:00:00 lr: 0.004180 min_lr: 0.004180 loss: 3.1536 (3.1032) weight_decay: 0.0500 (0.0500) time: 0.6769 data: 0.0001 max mem: 57114 Epoch: [155] Total time: 0:01:53 (0.7303 s / it) Averaged stats: lr: 0.004180 min_lr: 0.004180 loss: 3.1536 (3.0958) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8031 (0.8031) acc1: 85.4167 (85.4167) acc5: 98.4375 (98.4375) time: 2.0900 data: 1.8335 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9218 (0.9377) acc1: 83.3333 (80.8184) acc5: 96.3542 (95.3964) time: 0.5688 data: 0.3668 max mem: 57114 Test: Total time: 0:00:02 (0.5906 s / it) * Acc@1 79.206 Acc@5 94.903 loss 1.038 Accuracy of the model on the 50000 test images: 79.2% Max accuracy: 79.64% Test: [0/5] eta: 0:00:11 loss: 0.8689 (0.8689) acc1: 78.6458 (78.6458) acc5: 93.2292 (93.2292) time: 2.2752 data: 2.0315 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0765 (1.0746) acc1: 71.8750 (70.9719) acc5: 91.6667 (88.6189) time: 0.6062 data: 0.4065 max mem: 57114 Test: Total time: 0:00:03 (0.6187 s / it) * Acc@1 68.051 Acc@5 88.673 loss 1.297 Accuracy of the model EMA on 50000 test images: 68.1% Max EMA accuracy: 68.05% Epoch: [156] [ 0/156] eta: 0:07:27 lr: 0.004180 min_lr: 0.004180 loss: 1.9047 (1.9047) weight_decay: 0.0500 (0.0500) time: 2.8707 data: 2.2237 max mem: 57114 Epoch: [156] [ 10/156] eta: 0:02:11 lr: 0.004177 min_lr: 0.004177 loss: 3.3082 (3.1220) weight_decay: 0.0500 (0.0500) time: 0.9025 data: 0.2024 max mem: 57114 Epoch: [156] [ 20/156] eta: 0:01:51 lr: 0.004174 min_lr: 0.004174 loss: 3.2832 (3.0555) weight_decay: 0.0500 (0.0500) time: 0.7161 data: 0.0003 max mem: 57114 Epoch: [156] [ 30/156] eta: 0:01:39 lr: 0.004171 min_lr: 0.004171 loss: 3.0317 (3.0717) weight_decay: 0.0500 (0.0500) time: 0.7319 data: 0.0003 max mem: 57114 Epoch: [156] [ 40/156] eta: 0:01:29 lr: 0.004168 min_lr: 0.004168 loss: 3.0317 (3.0753) weight_decay: 0.0500 (0.0500) time: 0.7285 data: 0.0003 max mem: 57114 Epoch: [156] [ 50/156] eta: 0:01:21 lr: 0.004166 min_lr: 0.004166 loss: 2.8185 (2.9944) weight_decay: 0.0500 (0.0500) time: 0.7284 data: 0.0004 max mem: 57114 Epoch: [156] [ 60/156] eta: 0:01:12 lr: 0.004163 min_lr: 0.004163 loss: 2.9229 (3.0319) weight_decay: 0.0500 (0.0500) time: 0.7289 data: 0.0005 max mem: 57114 Epoch: [156] [ 70/156] eta: 0:01:04 lr: 0.004160 min_lr: 0.004160 loss: 3.3315 (3.0402) weight_decay: 0.0500 (0.0500) time: 0.7220 data: 0.0005 max mem: 57114 Epoch: [156] [ 80/156] eta: 0:00:56 lr: 0.004157 min_lr: 0.004157 loss: 3.3122 (3.0505) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0004 max mem: 57114 Epoch: [156] [ 90/156] eta: 0:00:49 lr: 0.004154 min_lr: 0.004154 loss: 3.1431 (3.0283) weight_decay: 0.0500 (0.0500) time: 0.7031 data: 0.0003 max mem: 57114 Epoch: [156] [100/156] eta: 0:00:41 lr: 0.004151 min_lr: 0.004151 loss: 3.0823 (3.0294) weight_decay: 0.0500 (0.0500) time: 0.7106 data: 0.0003 max mem: 57114 Epoch: [156] [110/156] eta: 0:00:34 lr: 0.004148 min_lr: 0.004148 loss: 2.9590 (3.0173) weight_decay: 0.0500 (0.0500) time: 0.7207 data: 0.0003 max mem: 57114 Epoch: [156] [120/156] eta: 0:00:26 lr: 0.004145 min_lr: 0.004145 loss: 2.8348 (3.0034) weight_decay: 0.0500 (0.0500) time: 0.7136 data: 0.0004 max mem: 57114 Epoch: [156] [130/156] eta: 0:00:19 lr: 0.004143 min_lr: 0.004143 loss: 3.0060 (3.0135) weight_decay: 0.0500 (0.0500) time: 0.6983 data: 0.0008 max mem: 57114 Epoch: [156] [140/156] eta: 0:00:11 lr: 0.004140 min_lr: 0.004140 loss: 3.1855 (3.0336) weight_decay: 0.0500 (0.0500) time: 0.6955 data: 0.0007 max mem: 57114 Epoch: [156] [150/156] eta: 0:00:04 lr: 0.004137 min_lr: 0.004137 loss: 3.0988 (3.0272) weight_decay: 0.0500 (0.0500) time: 0.6896 data: 0.0001 max mem: 57114 Epoch: [156] [155/156] eta: 0:00:00 lr: 0.004135 min_lr: 0.004135 loss: 3.0869 (3.0319) weight_decay: 0.0500 (0.0500) time: 0.6842 data: 0.0001 max mem: 57114 Epoch: [156] Total time: 0:01:53 (0.7278 s / it) Averaged stats: lr: 0.004135 min_lr: 0.004135 loss: 3.0869 (3.0856) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8460 (0.8460) acc1: 86.9792 (86.9792) acc5: 98.4375 (98.4375) time: 2.0217 data: 1.7659 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9228 (0.9407) acc1: 81.7708 (81.5857) acc5: 96.3542 (95.2685) time: 0.5550 data: 0.3532 max mem: 57114 Test: Total time: 0:00:02 (0.5853 s / it) * Acc@1 79.715 Acc@5 94.935 loss 1.016 Accuracy of the model on the 50000 test images: 79.7% Max accuracy: 79.72% Test: [0/5] eta: 0:00:09 loss: 0.8545 (0.8545) acc1: 78.6458 (78.6458) acc5: 93.2292 (93.2292) time: 1.9949 data: 1.7514 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0555 (1.0547) acc1: 71.8750 (71.2276) acc5: 91.6667 (88.8747) time: 0.5496 data: 0.3504 max mem: 57114 Test: Total time: 0:00:02 (0.5611 s / it) * Acc@1 68.452 Acc@5 88.935 loss 1.275 Accuracy of the model EMA on 50000 test images: 68.5% Max EMA accuracy: 68.45% Epoch: [157] [ 0/156] eta: 0:06:43 lr: 0.004135 min_lr: 0.004135 loss: 3.2011 (3.2011) weight_decay: 0.0500 (0.0500) time: 2.5853 data: 1.9332 max mem: 57114 Epoch: [157] [ 10/156] eta: 0:02:09 lr: 0.004132 min_lr: 0.004132 loss: 3.1985 (2.9729) weight_decay: 0.0500 (0.0500) time: 0.8882 data: 0.1760 max mem: 57114 Epoch: [157] [ 20/156] eta: 0:01:52 lr: 0.004129 min_lr: 0.004129 loss: 3.1985 (3.0148) weight_decay: 0.0500 (0.0500) time: 0.7403 data: 0.0004 max mem: 57114 Epoch: [157] [ 30/156] eta: 0:01:40 lr: 0.004126 min_lr: 0.004126 loss: 3.2139 (3.0333) weight_decay: 0.0500 (0.0500) time: 0.7474 data: 0.0004 max mem: 57114 Epoch: [157] [ 40/156] eta: 0:01:31 lr: 0.004124 min_lr: 0.004124 loss: 3.2469 (3.0764) weight_decay: 0.0500 (0.0500) time: 0.7521 data: 0.0004 max mem: 57114 Epoch: [157] [ 50/156] eta: 0:01:22 lr: 0.004121 min_lr: 0.004121 loss: 3.3261 (3.1154) weight_decay: 0.0500 (0.0500) time: 0.7604 data: 0.0004 max mem: 57114 Epoch: [157] [ 60/156] eta: 0:01:14 lr: 0.004118 min_lr: 0.004118 loss: 3.3151 (3.1304) weight_decay: 0.0500 (0.0500) time: 0.7364 data: 0.0004 max mem: 57114 Epoch: [157] [ 70/156] eta: 0:01:05 lr: 0.004115 min_lr: 0.004115 loss: 3.2128 (3.0953) weight_decay: 0.0500 (0.0500) time: 0.7184 data: 0.0004 max mem: 57114 Epoch: [157] [ 80/156] eta: 0:00:57 lr: 0.004112 min_lr: 0.004112 loss: 3.2128 (3.1112) weight_decay: 0.0500 (0.0500) time: 0.7158 data: 0.0004 max mem: 57114 Epoch: [157] [ 90/156] eta: 0:00:49 lr: 0.004109 min_lr: 0.004109 loss: 3.2555 (3.1122) weight_decay: 0.0500 (0.0500) time: 0.7105 data: 0.0004 max mem: 57114 Epoch: [157] [100/156] eta: 0:00:41 lr: 0.004106 min_lr: 0.004106 loss: 3.2320 (3.1149) weight_decay: 0.0500 (0.0500) time: 0.7120 data: 0.0004 max mem: 57114 Epoch: [157] [110/156] eta: 0:00:34 lr: 0.004103 min_lr: 0.004103 loss: 3.1840 (3.1123) weight_decay: 0.0500 (0.0500) time: 0.7165 data: 0.0004 max mem: 57114 Epoch: [157] [120/156] eta: 0:00:26 lr: 0.004101 min_lr: 0.004101 loss: 3.0093 (3.0939) weight_decay: 0.0500 (0.0500) time: 0.7107 data: 0.0004 max mem: 57114 Epoch: [157] [130/156] eta: 0:00:19 lr: 0.004098 min_lr: 0.004098 loss: 2.9843 (3.1014) weight_decay: 0.0500 (0.0500) time: 0.7021 data: 0.0009 max mem: 57114 Epoch: [157] [140/156] eta: 0:00:11 lr: 0.004095 min_lr: 0.004095 loss: 3.1430 (3.1058) weight_decay: 0.0500 (0.0500) time: 0.6886 data: 0.0007 max mem: 57114 Epoch: [157] [150/156] eta: 0:00:04 lr: 0.004092 min_lr: 0.004092 loss: 3.1430 (3.1043) weight_decay: 0.0500 (0.0500) time: 0.6848 data: 0.0001 max mem: 57114 Epoch: [157] [155/156] eta: 0:00:00 lr: 0.004091 min_lr: 0.004091 loss: 3.2528 (3.1163) weight_decay: 0.0500 (0.0500) time: 0.6845 data: 0.0001 max mem: 57114 Epoch: [157] Total time: 0:01:54 (0.7328 s / it) Averaged stats: lr: 0.004091 min_lr: 0.004091 loss: 3.2528 (3.0809) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9783 (0.9783) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.0465 data: 1.7906 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0878 (1.0879) acc1: 83.3333 (79.4118) acc5: 96.3542 (95.0128) time: 0.5600 data: 0.3582 max mem: 57114 Test: Total time: 0:00:02 (0.5827 s / it) * Acc@1 79.044 Acc@5 94.871 loss 1.169 Accuracy of the model on the 50000 test images: 79.0% Max accuracy: 79.72% Test: [0/5] eta: 0:00:11 loss: 0.8422 (0.8422) acc1: 78.6458 (78.6458) acc5: 93.2292 (93.2292) time: 2.2824 data: 2.0389 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0368 (1.0373) acc1: 73.4375 (71.8670) acc5: 91.6667 (89.1304) time: 0.6071 data: 0.4079 max mem: 57114 Test: Total time: 0:00:03 (0.6216 s / it) * Acc@1 68.882 Acc@5 89.166 loss 1.254 Accuracy of the model EMA on 50000 test images: 68.9% Max EMA accuracy: 68.88% Epoch: [158] [ 0/156] eta: 0:07:56 lr: 0.004090 min_lr: 0.004090 loss: 2.7943 (2.7943) weight_decay: 0.0500 (0.0500) time: 3.0563 data: 2.4023 max mem: 57114 Epoch: [158] [ 10/156] eta: 0:02:20 lr: 0.004087 min_lr: 0.004087 loss: 3.5150 (3.2083) weight_decay: 0.0500 (0.0500) time: 0.9652 data: 0.2186 max mem: 57114 Epoch: [158] [ 20/156] eta: 0:01:55 lr: 0.004084 min_lr: 0.004084 loss: 3.3945 (3.1415) weight_decay: 0.0500 (0.0500) time: 0.7378 data: 0.0003 max mem: 57114 Epoch: [158] [ 30/156] eta: 0:01:41 lr: 0.004082 min_lr: 0.004082 loss: 3.0699 (3.1278) weight_decay: 0.0500 (0.0500) time: 0.7179 data: 0.0004 max mem: 57114 Epoch: [158] [ 40/156] eta: 0:01:30 lr: 0.004079 min_lr: 0.004079 loss: 3.0420 (3.1099) weight_decay: 0.0500 (0.0500) time: 0.7171 data: 0.0004 max mem: 57114 Epoch: [158] [ 50/156] eta: 0:01:21 lr: 0.004076 min_lr: 0.004076 loss: 3.1128 (3.1209) weight_decay: 0.0500 (0.0500) time: 0.7198 data: 0.0004 max mem: 57114 Epoch: [158] [ 60/156] eta: 0:01:13 lr: 0.004073 min_lr: 0.004073 loss: 3.1128 (3.1059) weight_decay: 0.0500 (0.0500) time: 0.7304 data: 0.0004 max mem: 57114 Epoch: [158] [ 70/156] eta: 0:01:05 lr: 0.004070 min_lr: 0.004070 loss: 3.1702 (3.1242) weight_decay: 0.0500 (0.0500) time: 0.7225 data: 0.0004 max mem: 57114 Epoch: [158] [ 80/156] eta: 0:00:57 lr: 0.004067 min_lr: 0.004067 loss: 3.1702 (3.1300) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0004 max mem: 57114 Epoch: [158] [ 90/156] eta: 0:00:49 lr: 0.004064 min_lr: 0.004064 loss: 3.3005 (3.1668) weight_decay: 0.0500 (0.0500) time: 0.7164 data: 0.0004 max mem: 57114 Epoch: [158] [100/156] eta: 0:00:41 lr: 0.004061 min_lr: 0.004061 loss: 3.4865 (3.1823) weight_decay: 0.0500 (0.0500) time: 0.7126 data: 0.0003 max mem: 57114 Epoch: [158] [110/156] eta: 0:00:34 lr: 0.004059 min_lr: 0.004059 loss: 3.2573 (3.1837) weight_decay: 0.0500 (0.0500) time: 0.7043 data: 0.0004 max mem: 57114 Epoch: [158] [120/156] eta: 0:00:26 lr: 0.004056 min_lr: 0.004056 loss: 3.1345 (3.1813) weight_decay: 0.0500 (0.0500) time: 0.7024 data: 0.0004 max mem: 57114 Epoch: [158] [130/156] eta: 0:00:19 lr: 0.004053 min_lr: 0.004053 loss: 3.2044 (3.1910) weight_decay: 0.0500 (0.0500) time: 0.6999 data: 0.0008 max mem: 57114 Epoch: [158] [140/156] eta: 0:00:11 lr: 0.004050 min_lr: 0.004050 loss: 2.9071 (3.1428) weight_decay: 0.0500 (0.0500) time: 0.6912 data: 0.0007 max mem: 57114 Epoch: [158] [150/156] eta: 0:00:04 lr: 0.004047 min_lr: 0.004047 loss: 2.7957 (3.1356) weight_decay: 0.0500 (0.0500) time: 0.6883 data: 0.0001 max mem: 57114 Epoch: [158] [155/156] eta: 0:00:00 lr: 0.004046 min_lr: 0.004046 loss: 2.9930 (3.1321) weight_decay: 0.0500 (0.0500) time: 0.6894 data: 0.0001 max mem: 57114 Epoch: [158] Total time: 0:01:53 (0.7293 s / it) Averaged stats: lr: 0.004046 min_lr: 0.004046 loss: 2.9930 (3.0811) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8046 (0.8046) acc1: 81.7708 (81.7708) acc5: 98.4375 (98.4375) time: 2.0403 data: 1.7844 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8999 (0.8796) acc1: 81.7708 (79.6675) acc5: 96.3542 (95.1407) time: 0.5587 data: 0.3569 max mem: 57114 Test: Total time: 0:00:02 (0.5812 s / it) * Acc@1 79.574 Acc@5 95.079 loss 0.963 Accuracy of the model on the 50000 test images: 79.6% Max accuracy: 79.72% Test: [0/5] eta: 0:00:11 loss: 0.8303 (0.8303) acc1: 78.6458 (78.6458) acc5: 93.7500 (93.7500) time: 2.3338 data: 2.0903 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0191 (1.0200) acc1: 74.4792 (72.2506) acc5: 91.6667 (89.5141) time: 0.6174 data: 0.4181 max mem: 57114 Test: Total time: 0:00:03 (0.6312 s / it) * Acc@1 69.238 Acc@5 89.362 loss 1.235 Accuracy of the model EMA on 50000 test images: 69.2% Max EMA accuracy: 69.24% Epoch: [159] [ 0/156] eta: 0:05:37 lr: 0.004045 min_lr: 0.004045 loss: 3.0553 (3.0553) weight_decay: 0.0500 (0.0500) time: 2.1643 data: 1.5121 max mem: 57114 Epoch: [159] [ 10/156] eta: 0:02:06 lr: 0.004042 min_lr: 0.004042 loss: 3.1676 (3.0739) weight_decay: 0.0500 (0.0500) time: 0.8684 data: 0.1377 max mem: 57114 Epoch: [159] [ 20/156] eta: 0:01:48 lr: 0.004040 min_lr: 0.004040 loss: 3.2425 (3.1086) weight_decay: 0.0500 (0.0500) time: 0.7317 data: 0.0003 max mem: 57114 Epoch: [159] [ 30/156] eta: 0:01:37 lr: 0.004037 min_lr: 0.004037 loss: 2.9713 (2.9827) weight_decay: 0.0500 (0.0500) time: 0.7231 data: 0.0003 max mem: 57114 Epoch: [159] [ 40/156] eta: 0:01:28 lr: 0.004034 min_lr: 0.004034 loss: 2.8845 (2.9800) weight_decay: 0.0500 (0.0500) time: 0.7313 data: 0.0003 max mem: 57114 Epoch: [159] [ 50/156] eta: 0:01:20 lr: 0.004031 min_lr: 0.004031 loss: 3.1839 (3.0212) weight_decay: 0.0500 (0.0500) time: 0.7361 data: 0.0004 max mem: 57114 Epoch: [159] [ 60/156] eta: 0:01:12 lr: 0.004028 min_lr: 0.004028 loss: 3.2642 (3.0782) weight_decay: 0.0500 (0.0500) time: 0.7264 data: 0.0004 max mem: 57114 Epoch: [159] [ 70/156] eta: 0:01:04 lr: 0.004025 min_lr: 0.004025 loss: 3.3791 (3.0769) weight_decay: 0.0500 (0.0500) time: 0.7215 data: 0.0004 max mem: 57114 Epoch: [159] [ 80/156] eta: 0:00:56 lr: 0.004022 min_lr: 0.004022 loss: 2.9613 (3.0602) weight_decay: 0.0500 (0.0500) time: 0.7199 data: 0.0004 max mem: 57114 Epoch: [159] [ 90/156] eta: 0:00:48 lr: 0.004019 min_lr: 0.004019 loss: 3.1606 (3.0528) weight_decay: 0.0500 (0.0500) time: 0.7191 data: 0.0003 max mem: 57114 Epoch: [159] [100/156] eta: 0:00:41 lr: 0.004017 min_lr: 0.004017 loss: 3.1762 (3.0345) weight_decay: 0.0500 (0.0500) time: 0.7057 data: 0.0003 max mem: 57114 Epoch: [159] [110/156] eta: 0:00:33 lr: 0.004014 min_lr: 0.004014 loss: 3.0089 (3.0347) weight_decay: 0.0500 (0.0500) time: 0.7025 data: 0.0004 max mem: 57114 Epoch: [159] [120/156] eta: 0:00:26 lr: 0.004011 min_lr: 0.004011 loss: 3.0297 (3.0401) weight_decay: 0.0500 (0.0500) time: 0.7098 data: 0.0003 max mem: 57114 Epoch: [159] [130/156] eta: 0:00:18 lr: 0.004008 min_lr: 0.004008 loss: 3.0680 (3.0297) weight_decay: 0.0500 (0.0500) time: 0.7018 data: 0.0008 max mem: 57114 Epoch: [159] [140/156] eta: 0:00:11 lr: 0.004005 min_lr: 0.004005 loss: 3.2989 (3.0513) weight_decay: 0.0500 (0.0500) time: 0.6870 data: 0.0007 max mem: 57114 Epoch: [159] [150/156] eta: 0:00:04 lr: 0.004002 min_lr: 0.004002 loss: 3.3260 (3.0600) weight_decay: 0.0500 (0.0500) time: 0.6846 data: 0.0001 max mem: 57114 Epoch: [159] [155/156] eta: 0:00:00 lr: 0.004001 min_lr: 0.004001 loss: 3.3260 (3.0732) weight_decay: 0.0500 (0.0500) time: 0.6873 data: 0.0001 max mem: 57114 Epoch: [159] Total time: 0:01:53 (0.7252 s / it) Averaged stats: lr: 0.004001 min_lr: 0.004001 loss: 3.3260 (3.0633) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 1.0064 (1.0064) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.0919 data: 1.8362 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1210 (1.0812) acc1: 80.7292 (79.4118) acc5: 97.3958 (96.0358) time: 0.5691 data: 0.3673 max mem: 57114 Test: Total time: 0:00:02 (0.5895 s / it) * Acc@1 79.328 Acc@5 95.031 loss 1.162 Accuracy of the model on the 50000 test images: 79.3% Max accuracy: 79.72% Test: [0/5] eta: 0:00:11 loss: 0.8191 (0.8191) acc1: 79.1667 (79.1667) acc5: 93.7500 (93.7500) time: 2.3521 data: 2.1085 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0028 (1.0040) acc1: 74.4792 (73.0179) acc5: 91.6667 (89.6419) time: 0.6210 data: 0.4218 max mem: 57114 Test: Total time: 0:00:03 (0.6342 s / it) * Acc@1 69.561 Acc@5 89.558 loss 1.216 Accuracy of the model EMA on 50000 test images: 69.6% Max EMA accuracy: 69.56% Epoch: [160] [ 0/156] eta: 0:06:38 lr: 0.004001 min_lr: 0.004001 loss: 2.9841 (2.9841) weight_decay: 0.0500 (0.0500) time: 2.5524 data: 1.9036 max mem: 57114 Epoch: [160] [ 10/156] eta: 0:02:10 lr: 0.003998 min_lr: 0.003998 loss: 3.2237 (3.0936) weight_decay: 0.0500 (0.0500) time: 0.8924 data: 0.1733 max mem: 57114 Epoch: [160] [ 20/156] eta: 0:01:49 lr: 0.003995 min_lr: 0.003995 loss: 3.2899 (3.2345) weight_decay: 0.0500 (0.0500) time: 0.7192 data: 0.0003 max mem: 57114 Epoch: [160] [ 30/156] eta: 0:01:38 lr: 0.003992 min_lr: 0.003992 loss: 3.1426 (3.0974) weight_decay: 0.0500 (0.0500) time: 0.7264 data: 0.0003 max mem: 57114 Epoch: [160] [ 40/156] eta: 0:01:29 lr: 0.003989 min_lr: 0.003989 loss: 3.1591 (3.1179) weight_decay: 0.0500 (0.0500) time: 0.7344 data: 0.0004 max mem: 57114 Epoch: [160] [ 50/156] eta: 0:01:20 lr: 0.003986 min_lr: 0.003986 loss: 3.2737 (3.1112) weight_decay: 0.0500 (0.0500) time: 0.7262 data: 0.0004 max mem: 57114 Epoch: [160] [ 60/156] eta: 0:01:12 lr: 0.003983 min_lr: 0.003983 loss: 3.1823 (3.0739) weight_decay: 0.0500 (0.0500) time: 0.7279 data: 0.0004 max mem: 57114 Epoch: [160] [ 70/156] eta: 0:01:04 lr: 0.003980 min_lr: 0.003980 loss: 3.2181 (3.0853) weight_decay: 0.0500 (0.0500) time: 0.7185 data: 0.0004 max mem: 57114 Epoch: [160] [ 80/156] eta: 0:00:56 lr: 0.003977 min_lr: 0.003977 loss: 3.3113 (3.1009) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0004 max mem: 57114 Epoch: [160] [ 90/156] eta: 0:00:48 lr: 0.003975 min_lr: 0.003975 loss: 3.1714 (3.0908) weight_decay: 0.0500 (0.0500) time: 0.7135 data: 0.0004 max mem: 57114 Epoch: [160] [100/156] eta: 0:00:41 lr: 0.003972 min_lr: 0.003972 loss: 2.9496 (3.0544) weight_decay: 0.0500 (0.0500) time: 0.7104 data: 0.0004 max mem: 57114 Epoch: [160] [110/156] eta: 0:00:33 lr: 0.003969 min_lr: 0.003969 loss: 2.9082 (3.0492) weight_decay: 0.0500 (0.0500) time: 0.7045 data: 0.0004 max mem: 57114 Epoch: [160] [120/156] eta: 0:00:26 lr: 0.003966 min_lr: 0.003966 loss: 3.0475 (3.0536) weight_decay: 0.0500 (0.0500) time: 0.7043 data: 0.0004 max mem: 57114 Epoch: [160] [130/156] eta: 0:00:18 lr: 0.003963 min_lr: 0.003963 loss: 3.1143 (3.0613) weight_decay: 0.0500 (0.0500) time: 0.7033 data: 0.0008 max mem: 57114 Epoch: [160] [140/156] eta: 0:00:11 lr: 0.003960 min_lr: 0.003960 loss: 3.1759 (3.0621) weight_decay: 0.0500 (0.0500) time: 0.6941 data: 0.0007 max mem: 57114 Epoch: [160] [150/156] eta: 0:00:04 lr: 0.003957 min_lr: 0.003957 loss: 3.0588 (3.0486) weight_decay: 0.0500 (0.0500) time: 0.6929 data: 0.0001 max mem: 57114 Epoch: [160] [155/156] eta: 0:00:00 lr: 0.003956 min_lr: 0.003956 loss: 2.8063 (3.0370) weight_decay: 0.0500 (0.0500) time: 0.6908 data: 0.0001 max mem: 57114 Epoch: [160] Total time: 0:01:53 (0.7258 s / it) Averaged stats: lr: 0.003956 min_lr: 0.003956 loss: 2.8063 (3.0712) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6980 (0.6980) acc1: 86.9792 (86.9792) acc5: 97.9167 (97.9167) time: 2.0455 data: 1.7899 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8502 (0.8395) acc1: 82.2917 (80.1790) acc5: 96.3542 (95.2685) time: 0.5597 data: 0.3580 max mem: 57114 Test: Total time: 0:00:02 (0.5836 s / it) * Acc@1 79.200 Acc@5 94.891 loss 0.947 Accuracy of the model on the 50000 test images: 79.2% Max accuracy: 79.72% Test: [0/5] eta: 0:00:11 loss: 0.8080 (0.8080) acc1: 79.6875 (79.6875) acc5: 94.2708 (94.2708) time: 2.3083 data: 2.0647 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9878 (0.9885) acc1: 74.4792 (73.2737) acc5: 91.6667 (89.8977) time: 0.6123 data: 0.4130 max mem: 57114 Test: Total time: 0:00:03 (0.6235 s / it) * Acc@1 69.935 Acc@5 89.808 loss 1.199 Accuracy of the model EMA on 50000 test images: 69.9% Max EMA accuracy: 69.93% Epoch: [161] [ 0/156] eta: 0:07:38 lr: 0.003956 min_lr: 0.003956 loss: 2.3420 (2.3420) weight_decay: 0.0500 (0.0500) time: 2.9361 data: 2.2914 max mem: 57114 Epoch: [161] [ 10/156] eta: 0:02:16 lr: 0.003953 min_lr: 0.003953 loss: 2.9392 (2.8553) weight_decay: 0.0500 (0.0500) time: 0.9316 data: 0.2086 max mem: 57114 Epoch: [161] [ 20/156] eta: 0:01:53 lr: 0.003950 min_lr: 0.003950 loss: 2.9507 (2.9074) weight_decay: 0.0500 (0.0500) time: 0.7307 data: 0.0003 max mem: 57114 Epoch: [161] [ 30/156] eta: 0:01:41 lr: 0.003947 min_lr: 0.003947 loss: 2.9768 (2.9454) weight_decay: 0.0500 (0.0500) time: 0.7349 data: 0.0003 max mem: 57114 Epoch: [161] [ 40/156] eta: 0:01:31 lr: 0.003944 min_lr: 0.003944 loss: 3.0421 (2.9443) weight_decay: 0.0500 (0.0500) time: 0.7356 data: 0.0004 max mem: 57114 Epoch: [161] [ 50/156] eta: 0:01:21 lr: 0.003941 min_lr: 0.003941 loss: 3.1201 (2.9938) weight_decay: 0.0500 (0.0500) time: 0.7171 data: 0.0004 max mem: 57114 Epoch: [161] [ 60/156] eta: 0:01:13 lr: 0.003938 min_lr: 0.003938 loss: 3.1969 (3.0111) weight_decay: 0.0500 (0.0500) time: 0.7175 data: 0.0004 max mem: 57114 Epoch: [161] [ 70/156] eta: 0:01:05 lr: 0.003935 min_lr: 0.003935 loss: 3.0572 (2.9842) weight_decay: 0.0500 (0.0500) time: 0.7208 data: 0.0004 max mem: 57114 Epoch: [161] [ 80/156] eta: 0:00:57 lr: 0.003933 min_lr: 0.003933 loss: 2.8922 (3.0018) weight_decay: 0.0500 (0.0500) time: 0.7119 data: 0.0004 max mem: 57114 Epoch: [161] [ 90/156] eta: 0:00:49 lr: 0.003930 min_lr: 0.003930 loss: 3.2160 (3.0339) weight_decay: 0.0500 (0.0500) time: 0.7133 data: 0.0004 max mem: 57114 Epoch: [161] [100/156] eta: 0:00:41 lr: 0.003927 min_lr: 0.003927 loss: 3.2566 (3.0597) weight_decay: 0.0500 (0.0500) time: 0.7056 data: 0.0004 max mem: 57114 Epoch: [161] [110/156] eta: 0:00:34 lr: 0.003924 min_lr: 0.003924 loss: 3.3022 (3.0808) weight_decay: 0.0500 (0.0500) time: 0.7081 data: 0.0003 max mem: 57114 Epoch: [161] [120/156] eta: 0:00:26 lr: 0.003921 min_lr: 0.003921 loss: 3.2310 (3.0907) weight_decay: 0.0500 (0.0500) time: 0.7105 data: 0.0004 max mem: 57114 Epoch: [161] [130/156] eta: 0:00:19 lr: 0.003918 min_lr: 0.003918 loss: 3.1533 (3.0819) weight_decay: 0.0500 (0.0500) time: 0.7005 data: 0.0009 max mem: 57114 Epoch: [161] [140/156] eta: 0:00:11 lr: 0.003915 min_lr: 0.003915 loss: 3.1168 (3.0959) weight_decay: 0.0500 (0.0500) time: 0.6931 data: 0.0007 max mem: 57114 Epoch: [161] [150/156] eta: 0:00:04 lr: 0.003912 min_lr: 0.003912 loss: 3.0798 (3.0838) weight_decay: 0.0500 (0.0500) time: 0.6894 data: 0.0001 max mem: 57114 Epoch: [161] [155/156] eta: 0:00:00 lr: 0.003911 min_lr: 0.003911 loss: 3.0451 (3.0795) weight_decay: 0.0500 (0.0500) time: 0.6895 data: 0.0001 max mem: 57114 Epoch: [161] Total time: 0:01:53 (0.7288 s / it) Averaged stats: lr: 0.003911 min_lr: 0.003911 loss: 3.0451 (3.0722) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9175 (0.9175) acc1: 84.3750 (84.3750) acc5: 96.3542 (96.3542) time: 2.1013 data: 1.8452 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0433 (0.9812) acc1: 78.1250 (78.6445) acc5: 96.3542 (95.5243) time: 0.5709 data: 0.3691 max mem: 57114 Test: Total time: 0:00:02 (0.5938 s / it) * Acc@1 78.872 Acc@5 94.901 loss 1.086 Accuracy of the model on the 50000 test images: 78.9% Max accuracy: 79.72% Test: [0/5] eta: 0:00:11 loss: 0.7979 (0.7979) acc1: 80.7292 (80.7292) acc5: 94.2708 (94.2708) time: 2.3052 data: 2.0618 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9743 (0.9743) acc1: 75.5208 (73.6573) acc5: 92.1875 (90.1535) time: 0.6117 data: 0.4124 max mem: 57114 Test: Total time: 0:00:03 (0.6256 s / it) * Acc@1 70.257 Acc@5 89.992 loss 1.182 Accuracy of the model EMA on 50000 test images: 70.3% Max EMA accuracy: 70.26% Epoch: [162] [ 0/156] eta: 0:08:41 lr: 0.003911 min_lr: 0.003911 loss: 3.4539 (3.4539) weight_decay: 0.0500 (0.0500) time: 3.3422 data: 2.6958 max mem: 57114 Epoch: [162] [ 10/156] eta: 0:02:17 lr: 0.003908 min_lr: 0.003908 loss: 3.3084 (3.1515) weight_decay: 0.0500 (0.0500) time: 0.9411 data: 0.2453 max mem: 57114 Epoch: [162] [ 20/156] eta: 0:01:53 lr: 0.003905 min_lr: 0.003905 loss: 3.0992 (3.0946) weight_decay: 0.0500 (0.0500) time: 0.7097 data: 0.0003 max mem: 57114 Epoch: [162] [ 30/156] eta: 0:01:40 lr: 0.003902 min_lr: 0.003902 loss: 3.1726 (3.1283) weight_decay: 0.0500 (0.0500) time: 0.7209 data: 0.0003 max mem: 57114 Epoch: [162] [ 40/156] eta: 0:01:31 lr: 0.003899 min_lr: 0.003899 loss: 3.1726 (3.1002) weight_decay: 0.0500 (0.0500) time: 0.7323 data: 0.0004 max mem: 57114 Epoch: [162] [ 50/156] eta: 0:01:21 lr: 0.003896 min_lr: 0.003896 loss: 2.9695 (3.0808) weight_decay: 0.0500 (0.0500) time: 0.7308 data: 0.0004 max mem: 57114 Epoch: [162] [ 60/156] eta: 0:01:13 lr: 0.003894 min_lr: 0.003894 loss: 3.0992 (3.0728) weight_decay: 0.0500 (0.0500) time: 0.7286 data: 0.0004 max mem: 57114 Epoch: [162] [ 70/156] eta: 0:01:05 lr: 0.003891 min_lr: 0.003891 loss: 3.0992 (3.0630) weight_decay: 0.0500 (0.0500) time: 0.7200 data: 0.0004 max mem: 57114 Epoch: [162] [ 80/156] eta: 0:00:57 lr: 0.003888 min_lr: 0.003888 loss: 3.2277 (3.0844) weight_decay: 0.0500 (0.0500) time: 0.7054 data: 0.0004 max mem: 57114 Epoch: [162] [ 90/156] eta: 0:00:49 lr: 0.003885 min_lr: 0.003885 loss: 3.2538 (3.0916) weight_decay: 0.0500 (0.0500) time: 0.7011 data: 0.0004 max mem: 57114 Epoch: [162] [100/156] eta: 0:00:41 lr: 0.003882 min_lr: 0.003882 loss: 3.1687 (3.0847) weight_decay: 0.0500 (0.0500) time: 0.7035 data: 0.0004 max mem: 57114 Epoch: [162] [110/156] eta: 0:00:33 lr: 0.003879 min_lr: 0.003879 loss: 3.0422 (3.0653) weight_decay: 0.0500 (0.0500) time: 0.7116 data: 0.0003 max mem: 57114 Epoch: [162] [120/156] eta: 0:00:26 lr: 0.003876 min_lr: 0.003876 loss: 2.9877 (3.0577) weight_decay: 0.0500 (0.0500) time: 0.7078 data: 0.0003 max mem: 57114 Epoch: [162] [130/156] eta: 0:00:19 lr: 0.003873 min_lr: 0.003873 loss: 3.0708 (3.0620) weight_decay: 0.0500 (0.0500) time: 0.7005 data: 0.0008 max mem: 57114 Epoch: [162] [140/156] eta: 0:00:11 lr: 0.003871 min_lr: 0.003871 loss: 3.1583 (3.0542) weight_decay: 0.0500 (0.0500) time: 0.6925 data: 0.0007 max mem: 57114 Epoch: [162] [150/156] eta: 0:00:04 lr: 0.003868 min_lr: 0.003868 loss: 2.8641 (3.0417) weight_decay: 0.0500 (0.0500) time: 0.6864 data: 0.0001 max mem: 57114 Epoch: [162] [155/156] eta: 0:00:00 lr: 0.003866 min_lr: 0.003866 loss: 2.8803 (3.0484) weight_decay: 0.0500 (0.0500) time: 0.6856 data: 0.0001 max mem: 57114 Epoch: [162] Total time: 0:01:53 (0.7275 s / it) Averaged stats: lr: 0.003866 min_lr: 0.003866 loss: 2.8803 (3.0571) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8350 (0.8350) acc1: 85.4167 (85.4167) acc5: 97.3958 (97.3958) time: 2.0725 data: 1.8151 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9400 (0.9239) acc1: 82.2917 (80.6905) acc5: 95.8333 (94.5013) time: 0.5652 data: 0.3631 max mem: 57114 Test: Total time: 0:00:02 (0.5877 s / it) * Acc@1 79.186 Acc@5 94.731 loss 1.007 Accuracy of the model on the 50000 test images: 79.2% Max accuracy: 79.72% Test: [0/5] eta: 0:00:11 loss: 0.7894 (0.7894) acc1: 80.7292 (80.7292) acc5: 94.2708 (94.2708) time: 2.2642 data: 2.0205 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9612 (0.9610) acc1: 76.5625 (73.9130) acc5: 92.1875 (90.4092) time: 0.6035 data: 0.4042 max mem: 57114 Test: Total time: 0:00:03 (0.6153 s / it) * Acc@1 70.552 Acc@5 90.173 loss 1.166 Accuracy of the model EMA on 50000 test images: 70.6% Max EMA accuracy: 70.55% Epoch: [163] [ 0/156] eta: 0:07:42 lr: 0.003866 min_lr: 0.003866 loss: 1.7212 (1.7212) weight_decay: 0.0500 (0.0500) time: 2.9658 data: 2.3140 max mem: 57114 Epoch: [163] [ 10/156] eta: 0:02:15 lr: 0.003863 min_lr: 0.003863 loss: 3.4656 (3.3029) weight_decay: 0.0500 (0.0500) time: 0.9289 data: 0.2106 max mem: 57114 Epoch: [163] [ 20/156] eta: 0:01:52 lr: 0.003860 min_lr: 0.003860 loss: 3.4251 (3.3004) weight_decay: 0.0500 (0.0500) time: 0.7179 data: 0.0003 max mem: 57114 Epoch: [163] [ 30/156] eta: 0:01:40 lr: 0.003857 min_lr: 0.003857 loss: 3.2014 (3.1407) weight_decay: 0.0500 (0.0500) time: 0.7286 data: 0.0004 max mem: 57114 Epoch: [163] [ 40/156] eta: 0:01:30 lr: 0.003854 min_lr: 0.003854 loss: 2.7340 (3.0487) weight_decay: 0.0500 (0.0500) time: 0.7345 data: 0.0004 max mem: 57114 Epoch: [163] [ 50/156] eta: 0:01:21 lr: 0.003852 min_lr: 0.003852 loss: 2.8867 (3.0531) weight_decay: 0.0500 (0.0500) time: 0.7200 data: 0.0003 max mem: 57114 Epoch: [163] [ 60/156] eta: 0:01:13 lr: 0.003849 min_lr: 0.003849 loss: 3.1401 (3.0445) weight_decay: 0.0500 (0.0500) time: 0.7237 data: 0.0003 max mem: 57114 Epoch: [163] [ 70/156] eta: 0:01:04 lr: 0.003846 min_lr: 0.003846 loss: 3.0643 (3.0159) weight_decay: 0.0500 (0.0500) time: 0.7180 data: 0.0004 max mem: 57114 Epoch: [163] [ 80/156] eta: 0:00:56 lr: 0.003843 min_lr: 0.003843 loss: 2.6927 (3.0102) weight_decay: 0.0500 (0.0500) time: 0.7063 data: 0.0004 max mem: 57114 Epoch: [163] [ 90/156] eta: 0:00:49 lr: 0.003840 min_lr: 0.003840 loss: 2.9872 (2.9883) weight_decay: 0.0500 (0.0500) time: 0.7084 data: 0.0004 max mem: 57114 Epoch: [163] [100/156] eta: 0:00:41 lr: 0.003837 min_lr: 0.003837 loss: 2.9752 (2.9607) weight_decay: 0.0500 (0.0500) time: 0.7125 data: 0.0004 max mem: 57114 Epoch: [163] [110/156] eta: 0:00:33 lr: 0.003834 min_lr: 0.003834 loss: 2.7401 (2.9437) weight_decay: 0.0500 (0.0500) time: 0.7039 data: 0.0003 max mem: 57114 Epoch: [163] [120/156] eta: 0:00:26 lr: 0.003831 min_lr: 0.003831 loss: 3.0302 (2.9585) weight_decay: 0.0500 (0.0500) time: 0.7015 data: 0.0004 max mem: 57114 Epoch: [163] [130/156] eta: 0:00:19 lr: 0.003829 min_lr: 0.003829 loss: 3.1653 (2.9558) weight_decay: 0.0500 (0.0500) time: 0.6994 data: 0.0008 max mem: 57114 Epoch: [163] [140/156] eta: 0:00:11 lr: 0.003826 min_lr: 0.003826 loss: 3.1653 (2.9690) weight_decay: 0.0500 (0.0500) time: 0.6865 data: 0.0006 max mem: 57114 Epoch: [163] [150/156] eta: 0:00:04 lr: 0.003823 min_lr: 0.003823 loss: 2.9717 (2.9717) weight_decay: 0.0500 (0.0500) time: 0.6850 data: 0.0001 max mem: 57114 Epoch: [163] [155/156] eta: 0:00:00 lr: 0.003821 min_lr: 0.003821 loss: 2.9717 (2.9805) weight_decay: 0.0500 (0.0500) time: 0.6875 data: 0.0001 max mem: 57114 Epoch: [163] Total time: 0:01:53 (0.7260 s / it) Averaged stats: lr: 0.003821 min_lr: 0.003821 loss: 2.9717 (3.0468) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8472 (0.8472) acc1: 84.3750 (84.3750) acc5: 98.4375 (98.4375) time: 2.1605 data: 1.9047 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0390 (1.0074) acc1: 81.7708 (79.9233) acc5: 95.8333 (95.1407) time: 0.5828 data: 0.3810 max mem: 57114 Test: Total time: 0:00:03 (0.6059 s / it) * Acc@1 79.907 Acc@5 95.137 loss 1.083 Accuracy of the model on the 50000 test images: 79.9% Max accuracy: 79.91% Test: [0/5] eta: 0:00:09 loss: 0.7807 (0.7807) acc1: 81.2500 (81.2500) acc5: 94.2708 (94.2708) time: 1.9569 data: 1.7134 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9487 (0.9476) acc1: 76.5625 (74.5524) acc5: 92.1875 (90.4092) time: 0.5420 data: 0.3428 max mem: 57114 Test: Total time: 0:00:02 (0.5527 s / it) * Acc@1 70.866 Acc@5 90.353 loss 1.151 Accuracy of the model EMA on 50000 test images: 70.9% Max EMA accuracy: 70.87% Epoch: [164] [ 0/156] eta: 0:08:32 lr: 0.003821 min_lr: 0.003821 loss: 2.8352 (2.8352) weight_decay: 0.0500 (0.0500) time: 3.2850 data: 2.6355 max mem: 57114 Epoch: [164] [ 10/156] eta: 0:02:20 lr: 0.003818 min_lr: 0.003818 loss: 3.2044 (3.1646) weight_decay: 0.0500 (0.0500) time: 0.9612 data: 0.2399 max mem: 57114 Epoch: [164] [ 20/156] eta: 0:01:55 lr: 0.003815 min_lr: 0.003815 loss: 3.1511 (3.0449) weight_decay: 0.0500 (0.0500) time: 0.7263 data: 0.0004 max mem: 57114 Epoch: [164] [ 30/156] eta: 0:01:42 lr: 0.003812 min_lr: 0.003812 loss: 3.2901 (3.1692) weight_decay: 0.0500 (0.0500) time: 0.7277 data: 0.0004 max mem: 57114 Epoch: [164] [ 40/156] eta: 0:01:32 lr: 0.003810 min_lr: 0.003810 loss: 3.2901 (3.1366) weight_decay: 0.0500 (0.0500) time: 0.7395 data: 0.0004 max mem: 57114 Epoch: [164] [ 50/156] eta: 0:01:23 lr: 0.003807 min_lr: 0.003807 loss: 3.0422 (3.1157) weight_decay: 0.0500 (0.0500) time: 0.7476 data: 0.0004 max mem: 57114 Epoch: [164] [ 60/156] eta: 0:01:14 lr: 0.003804 min_lr: 0.003804 loss: 2.9696 (3.0734) weight_decay: 0.0500 (0.0500) time: 0.7363 data: 0.0004 max mem: 57114 Epoch: [164] [ 70/156] eta: 0:01:06 lr: 0.003801 min_lr: 0.003801 loss: 2.9696 (3.0782) weight_decay: 0.0500 (0.0500) time: 0.7273 data: 0.0004 max mem: 57114 Epoch: [164] [ 80/156] eta: 0:00:57 lr: 0.003798 min_lr: 0.003798 loss: 3.1425 (3.0755) weight_decay: 0.0500 (0.0500) time: 0.7213 data: 0.0004 max mem: 57114 Epoch: [164] [ 90/156] eta: 0:00:49 lr: 0.003795 min_lr: 0.003795 loss: 3.1759 (3.0622) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [164] [100/156] eta: 0:00:42 lr: 0.003792 min_lr: 0.003792 loss: 2.9022 (3.0442) weight_decay: 0.0500 (0.0500) time: 0.7024 data: 0.0003 max mem: 57114 Epoch: [164] [110/156] eta: 0:00:34 lr: 0.003789 min_lr: 0.003789 loss: 2.9338 (3.0425) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.0003 max mem: 57114 Epoch: [164] [120/156] eta: 0:00:26 lr: 0.003787 min_lr: 0.003787 loss: 3.2043 (3.0502) weight_decay: 0.0500 (0.0500) time: 0.7068 data: 0.0004 max mem: 57114 Epoch: [164] [130/156] eta: 0:00:19 lr: 0.003784 min_lr: 0.003784 loss: 3.2191 (3.0592) weight_decay: 0.0500 (0.0500) time: 0.7032 data: 0.0008 max mem: 57114 Epoch: [164] [140/156] eta: 0:00:11 lr: 0.003781 min_lr: 0.003781 loss: 3.1717 (3.0649) weight_decay: 0.0500 (0.0500) time: 0.6899 data: 0.0007 max mem: 57114 Epoch: [164] [150/156] eta: 0:00:04 lr: 0.003778 min_lr: 0.003778 loss: 3.2965 (3.0709) weight_decay: 0.0500 (0.0500) time: 0.6859 data: 0.0001 max mem: 57114 Epoch: [164] [155/156] eta: 0:00:00 lr: 0.003777 min_lr: 0.003777 loss: 3.1985 (3.0736) weight_decay: 0.0500 (0.0500) time: 0.6900 data: 0.0001 max mem: 57114 Epoch: [164] Total time: 0:01:54 (0.7335 s / it) Averaged stats: lr: 0.003777 min_lr: 0.003777 loss: 3.1985 (3.0588) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9332 (0.9332) acc1: 85.9375 (85.9375) acc5: 97.9167 (97.9167) time: 2.1869 data: 1.9313 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.1245 (1.0692) acc1: 80.7292 (79.6675) acc5: 96.8750 (95.5243) time: 0.5880 data: 0.3863 max mem: 57114 Test: Total time: 0:00:03 (0.6114 s / it) * Acc@1 79.384 Acc@5 95.019 loss 1.138 Accuracy of the model on the 50000 test images: 79.4% Max accuracy: 79.91% Test: [0/5] eta: 0:00:11 loss: 0.7721 (0.7721) acc1: 81.7708 (81.7708) acc5: 94.2708 (94.2708) time: 2.3116 data: 2.0638 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9368 (0.9349) acc1: 76.5625 (74.8082) acc5: 92.7083 (90.5371) time: 0.6130 data: 0.4128 max mem: 57114 Test: Total time: 0:00:03 (0.6228 s / it) * Acc@1 71.140 Acc@5 90.525 loss 1.136 Accuracy of the model EMA on 50000 test images: 71.1% Max EMA accuracy: 71.14% Epoch: [165] [ 0/156] eta: 0:06:57 lr: 0.003776 min_lr: 0.003776 loss: 3.1592 (3.1592) weight_decay: 0.0500 (0.0500) time: 2.6785 data: 2.0282 max mem: 57114 Epoch: [165] [ 10/156] eta: 0:02:10 lr: 0.003773 min_lr: 0.003773 loss: 2.9079 (2.9033) weight_decay: 0.0500 (0.0500) time: 0.8906 data: 0.1846 max mem: 57114 Epoch: [165] [ 20/156] eta: 0:01:49 lr: 0.003771 min_lr: 0.003771 loss: 3.2626 (3.0439) weight_decay: 0.0500 (0.0500) time: 0.7100 data: 0.0003 max mem: 57114 Epoch: [165] [ 30/156] eta: 0:01:38 lr: 0.003768 min_lr: 0.003768 loss: 2.9879 (2.9332) weight_decay: 0.0500 (0.0500) time: 0.7229 data: 0.0003 max mem: 57114 Epoch: [165] [ 40/156] eta: 0:01:29 lr: 0.003765 min_lr: 0.003765 loss: 2.7951 (2.9349) weight_decay: 0.0500 (0.0500) time: 0.7318 data: 0.0003 max mem: 57114 Epoch: [165] [ 50/156] eta: 0:01:20 lr: 0.003762 min_lr: 0.003762 loss: 2.8498 (2.9527) weight_decay: 0.0500 (0.0500) time: 0.7306 data: 0.0003 max mem: 57114 Epoch: [165] [ 60/156] eta: 0:01:12 lr: 0.003759 min_lr: 0.003759 loss: 2.9895 (2.9703) weight_decay: 0.0500 (0.0500) time: 0.7305 data: 0.0003 max mem: 57114 Epoch: [165] [ 70/156] eta: 0:01:04 lr: 0.003756 min_lr: 0.003756 loss: 3.0166 (2.9862) weight_decay: 0.0500 (0.0500) time: 0.7224 data: 0.0004 max mem: 57114 Epoch: [165] [ 80/156] eta: 0:00:56 lr: 0.003753 min_lr: 0.003753 loss: 3.1074 (2.9891) weight_decay: 0.0500 (0.0500) time: 0.7035 data: 0.0004 max mem: 57114 Epoch: [165] [ 90/156] eta: 0:00:48 lr: 0.003750 min_lr: 0.003750 loss: 3.1074 (2.9920) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.0004 max mem: 57114 Epoch: [165] [100/156] eta: 0:00:41 lr: 0.003748 min_lr: 0.003748 loss: 3.1400 (2.9945) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0003 max mem: 57114 Epoch: [165] [110/156] eta: 0:00:33 lr: 0.003745 min_lr: 0.003745 loss: 3.1472 (3.0152) weight_decay: 0.0500 (0.0500) time: 0.7040 data: 0.0003 max mem: 57114 Epoch: [165] [120/156] eta: 0:00:26 lr: 0.003742 min_lr: 0.003742 loss: 3.3019 (3.0252) weight_decay: 0.0500 (0.0500) time: 0.7159 data: 0.0004 max mem: 57114 Epoch: [165] [130/156] eta: 0:00:18 lr: 0.003739 min_lr: 0.003739 loss: 3.2815 (3.0455) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0010 max mem: 57114 Epoch: [165] [140/156] eta: 0:00:11 lr: 0.003736 min_lr: 0.003736 loss: 3.1316 (3.0275) weight_decay: 0.0500 (0.0500) time: 0.6889 data: 0.0009 max mem: 57114 Epoch: [165] [150/156] eta: 0:00:04 lr: 0.003733 min_lr: 0.003733 loss: 2.8092 (3.0048) weight_decay: 0.0500 (0.0500) time: 0.6823 data: 0.0001 max mem: 57114 Epoch: [165] [155/156] eta: 0:00:00 lr: 0.003732 min_lr: 0.003732 loss: 2.8092 (3.0107) weight_decay: 0.0500 (0.0500) time: 0.6798 data: 0.0001 max mem: 57114 Epoch: [165] Total time: 0:01:52 (0.7243 s / it) Averaged stats: lr: 0.003732 min_lr: 0.003732 loss: 2.8092 (3.0415) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7816 (0.7816) acc1: 85.4167 (85.4167) acc5: 97.9167 (97.9167) time: 2.0402 data: 1.7842 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9415 (0.8814) acc1: 80.2083 (81.2020) acc5: 95.8333 (95.5243) time: 0.5588 data: 0.3569 max mem: 57114 Test: Total time: 0:00:02 (0.5820 s / it) * Acc@1 80.065 Acc@5 95.434 loss 0.971 Accuracy of the model on the 50000 test images: 80.1% Max accuracy: 80.07% Test: [0/5] eta: 0:00:09 loss: 0.7632 (0.7632) acc1: 81.7708 (81.7708) acc5: 94.7917 (94.7917) time: 1.9782 data: 1.7347 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9258 (0.9226) acc1: 76.5625 (75.1918) acc5: 93.2292 (90.7928) time: 0.5463 data: 0.3470 max mem: 57114 Test: Total time: 0:00:02 (0.5573 s / it) * Acc@1 71.473 Acc@5 90.709 loss 1.122 Accuracy of the model EMA on 50000 test images: 71.5% Max EMA accuracy: 71.47% Epoch: [166] [ 0/156] eta: 0:07:29 lr: 0.003731 min_lr: 0.003731 loss: 3.4418 (3.4418) weight_decay: 0.0500 (0.0500) time: 2.8802 data: 2.2135 max mem: 57114 Epoch: [166] [ 10/156] eta: 0:02:16 lr: 0.003729 min_lr: 0.003729 loss: 3.0028 (2.8489) weight_decay: 0.0500 (0.0500) time: 0.9357 data: 0.2017 max mem: 57114 Epoch: [166] [ 20/156] eta: 0:01:53 lr: 0.003726 min_lr: 0.003726 loss: 3.0028 (2.9377) weight_decay: 0.0500 (0.0500) time: 0.7311 data: 0.0004 max mem: 57114 Epoch: [166] [ 30/156] eta: 0:01:42 lr: 0.003723 min_lr: 0.003723 loss: 3.0941 (2.9578) weight_decay: 0.0500 (0.0500) time: 0.7403 data: 0.0004 max mem: 57114 Epoch: [166] [ 40/156] eta: 0:01:31 lr: 0.003720 min_lr: 0.003720 loss: 3.0047 (2.9520) weight_decay: 0.0500 (0.0500) time: 0.7453 data: 0.0004 max mem: 57114 Epoch: [166] [ 50/156] eta: 0:01:22 lr: 0.003717 min_lr: 0.003717 loss: 3.0941 (2.9664) weight_decay: 0.0500 (0.0500) time: 0.7411 data: 0.0004 max mem: 57114 Epoch: [166] [ 60/156] eta: 0:01:14 lr: 0.003714 min_lr: 0.003714 loss: 2.7891 (2.9288) weight_decay: 0.0500 (0.0500) time: 0.7354 data: 0.0004 max mem: 57114 Epoch: [166] [ 70/156] eta: 0:01:05 lr: 0.003711 min_lr: 0.003711 loss: 3.0287 (2.9490) weight_decay: 0.0500 (0.0500) time: 0.7196 data: 0.0004 max mem: 57114 Epoch: [166] [ 80/156] eta: 0:00:57 lr: 0.003709 min_lr: 0.003709 loss: 3.2241 (2.9443) weight_decay: 0.0500 (0.0500) time: 0.7063 data: 0.0004 max mem: 57114 Epoch: [166] [ 90/156] eta: 0:00:49 lr: 0.003706 min_lr: 0.003706 loss: 2.9120 (2.9370) weight_decay: 0.0500 (0.0500) time: 0.7061 data: 0.0004 max mem: 57114 Epoch: [166] [100/156] eta: 0:00:41 lr: 0.003703 min_lr: 0.003703 loss: 2.8628 (2.9348) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0004 max mem: 57114 Epoch: [166] [110/156] eta: 0:00:34 lr: 0.003700 min_lr: 0.003700 loss: 3.0879 (2.9463) weight_decay: 0.0500 (0.0500) time: 0.7032 data: 0.0004 max mem: 57114 Epoch: [166] [120/156] eta: 0:00:26 lr: 0.003697 min_lr: 0.003697 loss: 3.1018 (2.9476) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0004 max mem: 57114 Epoch: [166] [130/156] eta: 0:00:19 lr: 0.003694 min_lr: 0.003694 loss: 3.1447 (2.9648) weight_decay: 0.0500 (0.0500) time: 0.7050 data: 0.0009 max mem: 57114 Epoch: [166] [140/156] eta: 0:00:11 lr: 0.003691 min_lr: 0.003691 loss: 3.1801 (2.9733) weight_decay: 0.0500 (0.0500) time: 0.6930 data: 0.0007 max mem: 57114 Epoch: [166] [150/156] eta: 0:00:04 lr: 0.003688 min_lr: 0.003688 loss: 3.1348 (2.9704) weight_decay: 0.0500 (0.0500) time: 0.6874 data: 0.0001 max mem: 57114 Epoch: [166] [155/156] eta: 0:00:00 lr: 0.003687 min_lr: 0.003687 loss: 2.8741 (2.9676) weight_decay: 0.0500 (0.0500) time: 0.6856 data: 0.0001 max mem: 57114 Epoch: [166] Total time: 0:01:54 (0.7317 s / it) Averaged stats: lr: 0.003687 min_lr: 0.003687 loss: 2.8741 (3.0458) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7129 (0.7129) acc1: 88.0208 (88.0208) acc5: 98.4375 (98.4375) time: 2.1475 data: 1.8916 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9037 (0.8391) acc1: 79.6875 (80.6905) acc5: 95.8333 (95.9079) time: 0.5804 data: 0.3785 max mem: 57114 Test: Total time: 0:00:03 (0.6027 s / it) * Acc@1 80.155 Acc@5 95.277 loss 0.915 Accuracy of the model on the 50000 test images: 80.2% Max accuracy: 80.16% Test: [0/5] eta: 0:00:10 loss: 0.7542 (0.7542) acc1: 82.2917 (82.2917) acc5: 94.7917 (94.7917) time: 2.0547 data: 1.8113 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9146 (0.9106) acc1: 76.5625 (75.3197) acc5: 93.2292 (91.0486) time: 0.5616 data: 0.3623 max mem: 57114 Test: Total time: 0:00:02 (0.5715 s / it) * Acc@1 71.783 Acc@5 90.881 loss 1.109 Accuracy of the model EMA on 50000 test images: 71.8% Max EMA accuracy: 71.78% Epoch: [167] [ 0/156] eta: 0:09:17 lr: 0.003687 min_lr: 0.003687 loss: 3.1833 (3.1833) weight_decay: 0.0500 (0.0500) time: 3.5737 data: 2.9212 max mem: 57114 Epoch: [167] [ 10/156] eta: 0:02:22 lr: 0.003684 min_lr: 0.003684 loss: 3.1833 (2.9719) weight_decay: 0.0500 (0.0500) time: 0.9732 data: 0.2658 max mem: 57114 Epoch: [167] [ 20/156] eta: 0:01:58 lr: 0.003681 min_lr: 0.003681 loss: 3.1186 (2.9757) weight_decay: 0.0500 (0.0500) time: 0.7331 data: 0.0003 max mem: 57114 Epoch: [167] [ 30/156] eta: 0:01:43 lr: 0.003678 min_lr: 0.003678 loss: 3.1084 (2.9598) weight_decay: 0.0500 (0.0500) time: 0.7373 data: 0.0003 max mem: 57114 Epoch: [167] [ 40/156] eta: 0:01:32 lr: 0.003675 min_lr: 0.003675 loss: 3.0861 (2.9794) weight_decay: 0.0500 (0.0500) time: 0.7298 data: 0.0003 max mem: 57114 Epoch: [167] [ 50/156] eta: 0:01:23 lr: 0.003672 min_lr: 0.003672 loss: 3.0666 (2.9735) weight_decay: 0.0500 (0.0500) time: 0.7368 data: 0.0004 max mem: 57114 Epoch: [167] [ 60/156] eta: 0:01:14 lr: 0.003669 min_lr: 0.003669 loss: 3.1841 (2.9980) weight_decay: 0.0500 (0.0500) time: 0.7408 data: 0.0004 max mem: 57114 Epoch: [167] [ 70/156] eta: 0:01:06 lr: 0.003667 min_lr: 0.003667 loss: 3.0528 (2.9654) weight_decay: 0.0500 (0.0500) time: 0.7311 data: 0.0004 max mem: 57114 Epoch: [167] [ 80/156] eta: 0:00:57 lr: 0.003664 min_lr: 0.003664 loss: 2.8583 (2.9734) weight_decay: 0.0500 (0.0500) time: 0.7068 data: 0.0004 max mem: 57114 Epoch: [167] [ 90/156] eta: 0:00:49 lr: 0.003661 min_lr: 0.003661 loss: 2.9390 (2.9734) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.0004 max mem: 57114 Epoch: [167] [100/156] eta: 0:00:42 lr: 0.003658 min_lr: 0.003658 loss: 2.9390 (2.9742) weight_decay: 0.0500 (0.0500) time: 0.7026 data: 0.0004 max mem: 57114 Epoch: [167] [110/156] eta: 0:00:34 lr: 0.003655 min_lr: 0.003655 loss: 2.8563 (2.9570) weight_decay: 0.0500 (0.0500) time: 0.7034 data: 0.0004 max mem: 57114 Epoch: [167] [120/156] eta: 0:00:26 lr: 0.003652 min_lr: 0.003652 loss: 3.2142 (2.9776) weight_decay: 0.0500 (0.0500) time: 0.7091 data: 0.0003 max mem: 57114 Epoch: [167] [130/156] eta: 0:00:19 lr: 0.003649 min_lr: 0.003649 loss: 3.2547 (2.9891) weight_decay: 0.0500 (0.0500) time: 0.7036 data: 0.0005 max mem: 57114 Epoch: [167] [140/156] eta: 0:00:11 lr: 0.003647 min_lr: 0.003647 loss: 3.2331 (2.9912) weight_decay: 0.0500 (0.0500) time: 0.6932 data: 0.0004 max mem: 57114 Epoch: [167] [150/156] eta: 0:00:04 lr: 0.003644 min_lr: 0.003644 loss: 3.1454 (2.9925) weight_decay: 0.0500 (0.0500) time: 0.6886 data: 0.0001 max mem: 57114 Epoch: [167] [155/156] eta: 0:00:00 lr: 0.003642 min_lr: 0.003642 loss: 2.9480 (2.9887) weight_decay: 0.0500 (0.0500) time: 0.6899 data: 0.0001 max mem: 57114 Epoch: [167] Total time: 0:01:54 (0.7340 s / it) Averaged stats: lr: 0.003642 min_lr: 0.003642 loss: 2.9480 (3.0403) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8098 (0.8098) acc1: 86.4583 (86.4583) acc5: 97.9167 (97.9167) time: 2.1645 data: 1.9090 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9189 (0.9172) acc1: 79.6875 (79.7954) acc5: 96.3542 (96.5473) time: 0.5836 data: 0.3819 max mem: 57114 Test: Total time: 0:00:03 (0.6068 s / it) * Acc@1 80.133 Acc@5 95.402 loss 1.007 Accuracy of the model on the 50000 test images: 80.1% Max accuracy: 80.16% Test: [0/5] eta: 0:00:11 loss: 0.7463 (0.7463) acc1: 82.2917 (82.2917) acc5: 94.7917 (94.7917) time: 2.2719 data: 2.0283 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9048 (0.8998) acc1: 76.5625 (75.5755) acc5: 93.7500 (91.1765) time: 0.6051 data: 0.4058 max mem: 57114 Test: Total time: 0:00:03 (0.6150 s / it) * Acc@1 72.049 Acc@5 91.009 loss 1.095 Accuracy of the model EMA on 50000 test images: 72.0% Max EMA accuracy: 72.05% Epoch: [168] [ 0/156] eta: 0:06:49 lr: 0.003642 min_lr: 0.003642 loss: 2.8310 (2.8310) weight_decay: 0.0500 (0.0500) time: 2.6254 data: 1.9742 max mem: 57114 Epoch: [168] [ 10/156] eta: 0:02:09 lr: 0.003639 min_lr: 0.003639 loss: 3.2125 (3.0821) weight_decay: 0.0500 (0.0500) time: 0.8898 data: 0.1798 max mem: 57114 Epoch: [168] [ 20/156] eta: 0:01:49 lr: 0.003636 min_lr: 0.003636 loss: 3.1940 (2.9825) weight_decay: 0.0500 (0.0500) time: 0.7170 data: 0.0003 max mem: 57114 Epoch: [168] [ 30/156] eta: 0:01:39 lr: 0.003633 min_lr: 0.003633 loss: 3.0485 (2.9553) weight_decay: 0.0500 (0.0500) time: 0.7309 data: 0.0004 max mem: 57114 Epoch: [168] [ 40/156] eta: 0:01:29 lr: 0.003631 min_lr: 0.003631 loss: 3.0469 (2.9856) weight_decay: 0.0500 (0.0500) time: 0.7305 data: 0.0004 max mem: 57114 Epoch: [168] [ 50/156] eta: 0:01:20 lr: 0.003628 min_lr: 0.003628 loss: 3.2583 (3.0262) weight_decay: 0.0500 (0.0500) time: 0.7192 data: 0.0004 max mem: 57114 Epoch: [168] [ 60/156] eta: 0:01:12 lr: 0.003625 min_lr: 0.003625 loss: 3.2886 (3.0602) weight_decay: 0.0500 (0.0500) time: 0.7283 data: 0.0004 max mem: 57114 Epoch: [168] [ 70/156] eta: 0:01:04 lr: 0.003622 min_lr: 0.003622 loss: 3.2797 (3.0668) weight_decay: 0.0500 (0.0500) time: 0.7220 data: 0.0003 max mem: 57114 Epoch: [168] [ 80/156] eta: 0:00:56 lr: 0.003619 min_lr: 0.003619 loss: 3.2854 (3.0994) weight_decay: 0.0500 (0.0500) time: 0.6994 data: 0.0004 max mem: 57114 Epoch: [168] [ 90/156] eta: 0:00:48 lr: 0.003616 min_lr: 0.003616 loss: 3.0818 (3.0846) weight_decay: 0.0500 (0.0500) time: 0.6993 data: 0.0004 max mem: 57114 Epoch: [168] [100/156] eta: 0:00:41 lr: 0.003613 min_lr: 0.003613 loss: 3.0544 (3.0891) weight_decay: 0.0500 (0.0500) time: 0.7053 data: 0.0004 max mem: 57114 Epoch: [168] [110/156] eta: 0:00:33 lr: 0.003610 min_lr: 0.003610 loss: 3.0220 (3.0690) weight_decay: 0.0500 (0.0500) time: 0.7075 data: 0.0004 max mem: 57114 Epoch: [168] [120/156] eta: 0:00:26 lr: 0.003608 min_lr: 0.003608 loss: 3.0240 (3.0709) weight_decay: 0.0500 (0.0500) time: 0.7095 data: 0.0004 max mem: 57114 Epoch: [168] [130/156] eta: 0:00:18 lr: 0.003605 min_lr: 0.003605 loss: 3.1233 (3.0685) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0004 max mem: 57114 Epoch: [168] [140/156] eta: 0:00:11 lr: 0.003602 min_lr: 0.003602 loss: 3.1529 (3.0764) weight_decay: 0.0500 (0.0500) time: 0.6894 data: 0.0003 max mem: 57114 Epoch: [168] [150/156] eta: 0:00:04 lr: 0.003599 min_lr: 0.003599 loss: 3.1554 (3.0674) weight_decay: 0.0500 (0.0500) time: 0.6774 data: 0.0001 max mem: 57114 Epoch: [168] [155/156] eta: 0:00:00 lr: 0.003598 min_lr: 0.003598 loss: 3.1554 (3.0697) weight_decay: 0.0500 (0.0500) time: 0.6784 data: 0.0001 max mem: 57114 Epoch: [168] Total time: 0:01:52 (0.7226 s / it) Averaged stats: lr: 0.003598 min_lr: 0.003598 loss: 3.1554 (3.0364) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7796 (0.7796) acc1: 86.9792 (86.9792) acc5: 98.4375 (98.4375) time: 2.0250 data: 1.7690 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9344 (0.9405) acc1: 79.6875 (79.1560) acc5: 96.8750 (95.3964) time: 0.5557 data: 0.3539 max mem: 57114 Test: Total time: 0:00:02 (0.5787 s / it) * Acc@1 79.915 Acc@5 95.093 loss 1.003 Accuracy of the model on the 50000 test images: 79.9% Max accuracy: 80.16% Test: [0/5] eta: 0:00:11 loss: 0.7389 (0.7389) acc1: 83.3333 (83.3333) acc5: 94.7917 (94.7917) time: 2.3254 data: 2.0820 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8952 (0.8890) acc1: 77.0833 (75.8312) acc5: 93.7500 (91.1765) time: 0.6157 data: 0.4165 max mem: 57114 Test: Total time: 0:00:03 (0.6257 s / it) * Acc@1 72.309 Acc@5 91.145 loss 1.082 Accuracy of the model EMA on 50000 test images: 72.3% Max EMA accuracy: 72.31% Epoch: [169] [ 0/156] eta: 0:06:33 lr: 0.003597 min_lr: 0.003597 loss: 3.3579 (3.3579) weight_decay: 0.0500 (0.0500) time: 2.5225 data: 1.8704 max mem: 57114 Epoch: [169] [ 10/156] eta: 0:02:10 lr: 0.003594 min_lr: 0.003594 loss: 3.3579 (3.1020) weight_decay: 0.0500 (0.0500) time: 0.8909 data: 0.1703 max mem: 57114 Epoch: [169] [ 20/156] eta: 0:01:50 lr: 0.003592 min_lr: 0.003592 loss: 3.2373 (3.0860) weight_decay: 0.0500 (0.0500) time: 0.7239 data: 0.0003 max mem: 57114 Epoch: [169] [ 30/156] eta: 0:01:39 lr: 0.003589 min_lr: 0.003589 loss: 3.0271 (3.0986) weight_decay: 0.0500 (0.0500) time: 0.7304 data: 0.0004 max mem: 57114 Epoch: [169] [ 40/156] eta: 0:01:29 lr: 0.003586 min_lr: 0.003586 loss: 2.9873 (3.0371) weight_decay: 0.0500 (0.0500) time: 0.7304 data: 0.0004 max mem: 57114 Epoch: [169] [ 50/156] eta: 0:01:20 lr: 0.003583 min_lr: 0.003583 loss: 2.8853 (3.0225) weight_decay: 0.0500 (0.0500) time: 0.7161 data: 0.0004 max mem: 57114 Epoch: [169] [ 60/156] eta: 0:01:12 lr: 0.003580 min_lr: 0.003580 loss: 3.1682 (3.0559) weight_decay: 0.0500 (0.0500) time: 0.7175 data: 0.0003 max mem: 57114 Epoch: [169] [ 70/156] eta: 0:01:04 lr: 0.003577 min_lr: 0.003577 loss: 3.1682 (3.0414) weight_decay: 0.0500 (0.0500) time: 0.7103 data: 0.0003 max mem: 57114 Epoch: [169] [ 80/156] eta: 0:00:56 lr: 0.003574 min_lr: 0.003574 loss: 3.0584 (3.0476) weight_decay: 0.0500 (0.0500) time: 0.7057 data: 0.0003 max mem: 57114 Epoch: [169] [ 90/156] eta: 0:00:48 lr: 0.003572 min_lr: 0.003572 loss: 3.0584 (3.0496) weight_decay: 0.0500 (0.0500) time: 0.7117 data: 0.0003 max mem: 57114 Epoch: [169] [100/156] eta: 0:00:41 lr: 0.003569 min_lr: 0.003569 loss: 3.1250 (3.0632) weight_decay: 0.0500 (0.0500) time: 0.7081 data: 0.0004 max mem: 57114 Epoch: [169] [110/156] eta: 0:00:33 lr: 0.003566 min_lr: 0.003566 loss: 3.1250 (3.0492) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0004 max mem: 57114 Epoch: [169] [120/156] eta: 0:00:26 lr: 0.003563 min_lr: 0.003563 loss: 2.9896 (3.0295) weight_decay: 0.0500 (0.0500) time: 0.7079 data: 0.0004 max mem: 57114 Epoch: [169] [130/156] eta: 0:00:18 lr: 0.003560 min_lr: 0.003560 loss: 2.8497 (3.0184) weight_decay: 0.0500 (0.0500) time: 0.6973 data: 0.0010 max mem: 57114 Epoch: [169] [140/156] eta: 0:00:11 lr: 0.003557 min_lr: 0.003557 loss: 3.0073 (3.0166) weight_decay: 0.0500 (0.0500) time: 0.6893 data: 0.0008 max mem: 57114 Epoch: [169] [150/156] eta: 0:00:04 lr: 0.003554 min_lr: 0.003554 loss: 3.2283 (3.0319) weight_decay: 0.0500 (0.0500) time: 0.6848 data: 0.0001 max mem: 57114 Epoch: [169] [155/156] eta: 0:00:00 lr: 0.003553 min_lr: 0.003553 loss: 3.2395 (3.0288) weight_decay: 0.0500 (0.0500) time: 0.6861 data: 0.0001 max mem: 57114 Epoch: [169] Total time: 0:01:52 (0.7230 s / it) Averaged stats: lr: 0.003553 min_lr: 0.003553 loss: 3.2395 (3.0304) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8110 (0.8110) acc1: 85.9375 (85.9375) acc5: 97.9167 (97.9167) time: 2.1649 data: 1.9087 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9468 (0.8662) acc1: 82.2917 (80.8184) acc5: 97.9167 (96.0358) time: 0.5838 data: 0.3818 max mem: 57114 Test: Total time: 0:00:03 (0.6096 s / it) * Acc@1 80.135 Acc@5 95.279 loss 0.969 Accuracy of the model on the 50000 test images: 80.1% Max accuracy: 80.16% Test: [0/5] eta: 0:00:11 loss: 0.7318 (0.7318) acc1: 83.3333 (83.3333) acc5: 94.7917 (94.7917) time: 2.3413 data: 2.0978 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8862 (0.8784) acc1: 77.6042 (76.0870) acc5: 93.7500 (91.4322) time: 0.6190 data: 0.4197 max mem: 57114 Test: Total time: 0:00:03 (0.6305 s / it) * Acc@1 72.530 Acc@5 91.284 loss 1.070 Accuracy of the model EMA on 50000 test images: 72.5% Max EMA accuracy: 72.53% Epoch: [170] [ 0/156] eta: 0:07:44 lr: 0.003553 min_lr: 0.003553 loss: 2.8674 (2.8674) weight_decay: 0.0500 (0.0500) time: 2.9745 data: 2.3251 max mem: 57114 Epoch: [170] [ 10/156] eta: 0:02:16 lr: 0.003550 min_lr: 0.003550 loss: 3.1418 (3.0585) weight_decay: 0.0500 (0.0500) time: 0.9355 data: 0.2116 max mem: 57114 Epoch: [170] [ 20/156] eta: 0:01:53 lr: 0.003547 min_lr: 0.003547 loss: 3.1588 (3.1487) weight_decay: 0.0500 (0.0500) time: 0.7279 data: 0.0003 max mem: 57114 Epoch: [170] [ 30/156] eta: 0:01:41 lr: 0.003544 min_lr: 0.003544 loss: 3.1869 (3.1433) weight_decay: 0.0500 (0.0500) time: 0.7302 data: 0.0003 max mem: 57114 Epoch: [170] [ 40/156] eta: 0:01:31 lr: 0.003541 min_lr: 0.003541 loss: 3.1250 (3.1078) weight_decay: 0.0500 (0.0500) time: 0.7339 data: 0.0004 max mem: 57114 Epoch: [170] [ 50/156] eta: 0:01:21 lr: 0.003538 min_lr: 0.003538 loss: 3.2284 (3.1151) weight_decay: 0.0500 (0.0500) time: 0.7237 data: 0.0004 max mem: 57114 Epoch: [170] [ 60/156] eta: 0:01:13 lr: 0.003536 min_lr: 0.003536 loss: 3.1536 (3.0970) weight_decay: 0.0500 (0.0500) time: 0.7126 data: 0.0004 max mem: 57114 Epoch: [170] [ 70/156] eta: 0:01:04 lr: 0.003533 min_lr: 0.003533 loss: 3.1536 (3.0804) weight_decay: 0.0500 (0.0500) time: 0.7081 data: 0.0004 max mem: 57114 Epoch: [170] [ 80/156] eta: 0:00:56 lr: 0.003530 min_lr: 0.003530 loss: 3.2029 (3.0741) weight_decay: 0.0500 (0.0500) time: 0.7124 data: 0.0003 max mem: 57114 Epoch: [170] [ 90/156] eta: 0:00:49 lr: 0.003527 min_lr: 0.003527 loss: 2.9983 (3.0514) weight_decay: 0.0500 (0.0500) time: 0.7118 data: 0.0003 max mem: 57114 Epoch: [170] [100/156] eta: 0:00:41 lr: 0.003524 min_lr: 0.003524 loss: 2.9395 (3.0440) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0004 max mem: 57114 Epoch: [170] [110/156] eta: 0:00:33 lr: 0.003521 min_lr: 0.003521 loss: 2.9395 (3.0303) weight_decay: 0.0500 (0.0500) time: 0.7116 data: 0.0004 max mem: 57114 Epoch: [170] [120/156] eta: 0:00:26 lr: 0.003518 min_lr: 0.003518 loss: 2.9354 (3.0102) weight_decay: 0.0500 (0.0500) time: 0.7089 data: 0.0004 max mem: 57114 Epoch: [170] [130/156] eta: 0:00:19 lr: 0.003516 min_lr: 0.003516 loss: 3.0578 (3.0158) weight_decay: 0.0500 (0.0500) time: 0.6963 data: 0.0008 max mem: 57114 Epoch: [170] [140/156] eta: 0:00:11 lr: 0.003513 min_lr: 0.003513 loss: 3.1586 (3.0365) weight_decay: 0.0500 (0.0500) time: 0.6884 data: 0.0007 max mem: 57114 Epoch: [170] [150/156] eta: 0:00:04 lr: 0.003510 min_lr: 0.003510 loss: 3.3746 (3.0483) weight_decay: 0.0500 (0.0500) time: 0.6859 data: 0.0001 max mem: 57114 Epoch: [170] [155/156] eta: 0:00:00 lr: 0.003508 min_lr: 0.003508 loss: 3.3194 (3.0544) weight_decay: 0.0500 (0.0500) time: 0.6935 data: 0.0001 max mem: 57114 Epoch: [170] Total time: 0:01:53 (0.7276 s / it) Averaged stats: lr: 0.003508 min_lr: 0.003508 loss: 3.3194 (3.0197) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9841 (0.9841) acc1: 85.4167 (85.4167) acc5: 97.3958 (97.3958) time: 2.0593 data: 1.8030 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0706 (1.0858) acc1: 79.1667 (80.4348) acc5: 96.3542 (95.1407) time: 0.5625 data: 0.3607 max mem: 57114 Test: Total time: 0:00:02 (0.5860 s / it) * Acc@1 79.574 Acc@5 95.005 loss 1.177 Accuracy of the model on the 50000 test images: 79.6% Max accuracy: 80.16% Test: [0/5] eta: 0:00:12 loss: 0.7257 (0.7257) acc1: 82.8125 (82.8125) acc5: 94.7917 (94.7917) time: 2.4192 data: 2.1757 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8777 (0.8688) acc1: 78.1250 (76.0870) acc5: 93.7500 (91.4322) time: 0.6346 data: 0.4352 max mem: 57114 Test: Total time: 0:00:03 (0.6461 s / it) * Acc@1 72.766 Acc@5 91.384 loss 1.059 Accuracy of the model EMA on 50000 test images: 72.8% Max EMA accuracy: 72.77% Epoch: [171] [ 0/156] eta: 0:07:04 lr: 0.003508 min_lr: 0.003508 loss: 2.8334 (2.8334) weight_decay: 0.0500 (0.0500) time: 2.7195 data: 2.0635 max mem: 57114 Epoch: [171] [ 10/156] eta: 0:02:14 lr: 0.003505 min_lr: 0.003505 loss: 3.0729 (3.0551) weight_decay: 0.0500 (0.0500) time: 0.9217 data: 0.1879 max mem: 57114 Epoch: [171] [ 20/156] eta: 0:01:52 lr: 0.003502 min_lr: 0.003502 loss: 3.1145 (3.0793) weight_decay: 0.0500 (0.0500) time: 0.7293 data: 0.0003 max mem: 57114 Epoch: [171] [ 30/156] eta: 0:01:39 lr: 0.003500 min_lr: 0.003500 loss: 3.3326 (3.1664) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [171] [ 40/156] eta: 0:01:29 lr: 0.003497 min_lr: 0.003497 loss: 3.0507 (3.1129) weight_decay: 0.0500 (0.0500) time: 0.7128 data: 0.0004 max mem: 57114 Epoch: [171] [ 50/156] eta: 0:01:20 lr: 0.003494 min_lr: 0.003494 loss: 3.0507 (3.1197) weight_decay: 0.0500 (0.0500) time: 0.7112 data: 0.0004 max mem: 57114 Epoch: [171] [ 60/156] eta: 0:01:12 lr: 0.003491 min_lr: 0.003491 loss: 3.1882 (3.0917) weight_decay: 0.0500 (0.0500) time: 0.7215 data: 0.0004 max mem: 57114 Epoch: [171] [ 70/156] eta: 0:01:04 lr: 0.003488 min_lr: 0.003488 loss: 3.1882 (3.0817) weight_decay: 0.0500 (0.0500) time: 0.7263 data: 0.0004 max mem: 57114 Epoch: [171] [ 80/156] eta: 0:00:56 lr: 0.003485 min_lr: 0.003485 loss: 3.0997 (3.0580) weight_decay: 0.0500 (0.0500) time: 0.7119 data: 0.0003 max mem: 57114 Epoch: [171] [ 90/156] eta: 0:00:48 lr: 0.003482 min_lr: 0.003482 loss: 2.8502 (3.0406) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0003 max mem: 57114 Epoch: [171] [100/156] eta: 0:00:41 lr: 0.003480 min_lr: 0.003480 loss: 3.0861 (3.0364) weight_decay: 0.0500 (0.0500) time: 0.7062 data: 0.0004 max mem: 57114 Epoch: [171] [110/156] eta: 0:00:33 lr: 0.003477 min_lr: 0.003477 loss: 3.0861 (3.0513) weight_decay: 0.0500 (0.0500) time: 0.6972 data: 0.0003 max mem: 57114 Epoch: [171] [120/156] eta: 0:00:26 lr: 0.003474 min_lr: 0.003474 loss: 3.1578 (3.0543) weight_decay: 0.0500 (0.0500) time: 0.7018 data: 0.0004 max mem: 57114 Epoch: [171] [130/156] eta: 0:00:18 lr: 0.003471 min_lr: 0.003471 loss: 3.1578 (3.0645) weight_decay: 0.0500 (0.0500) time: 0.7073 data: 0.0009 max mem: 57114 Epoch: [171] [140/156] eta: 0:00:11 lr: 0.003468 min_lr: 0.003468 loss: 3.0237 (3.0429) weight_decay: 0.0500 (0.0500) time: 0.6942 data: 0.0007 max mem: 57114 Epoch: [171] [150/156] eta: 0:00:04 lr: 0.003465 min_lr: 0.003465 loss: 3.0651 (3.0588) weight_decay: 0.0500 (0.0500) time: 0.6860 data: 0.0001 max mem: 57114 Epoch: [171] [155/156] eta: 0:00:00 lr: 0.003464 min_lr: 0.003464 loss: 3.0651 (3.0519) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.0001 max mem: 57114 Epoch: [171] Total time: 0:01:52 (0.7240 s / it) Averaged stats: lr: 0.003464 min_lr: 0.003464 loss: 3.0651 (3.0197) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8448 (0.8448) acc1: 84.8958 (84.8958) acc5: 96.8750 (96.8750) time: 2.0502 data: 1.7948 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9320 (0.9043) acc1: 81.2500 (79.7954) acc5: 96.8750 (95.3964) time: 0.5607 data: 0.3590 max mem: 57114 Test: Total time: 0:00:02 (0.5832 s / it) * Acc@1 80.439 Acc@5 95.412 loss 0.989 Accuracy of the model on the 50000 test images: 80.4% Max accuracy: 80.44% Test: [0/5] eta: 0:00:10 loss: 0.7189 (0.7189) acc1: 82.8125 (82.8125) acc5: 95.3125 (95.3125) time: 2.0255 data: 1.7820 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8693 (0.8592) acc1: 78.1250 (76.0870) acc5: 94.2708 (91.5601) time: 0.5557 data: 0.3565 max mem: 57114 Test: Total time: 0:00:02 (0.5673 s / it) * Acc@1 72.996 Acc@5 91.508 loss 1.047 Accuracy of the model EMA on 50000 test images: 73.0% Max EMA accuracy: 73.00% Epoch: [172] [ 0/156] eta: 0:08:51 lr: 0.003464 min_lr: 0.003464 loss: 1.8039 (1.8039) weight_decay: 0.0500 (0.0500) time: 3.4098 data: 2.7574 max mem: 57114 Epoch: [172] [ 10/156] eta: 0:02:22 lr: 0.003461 min_lr: 0.003461 loss: 2.8699 (2.7984) weight_decay: 0.0500 (0.0500) time: 0.9788 data: 0.2510 max mem: 57114 Epoch: [172] [ 20/156] eta: 0:01:56 lr: 0.003458 min_lr: 0.003458 loss: 3.0886 (2.9682) weight_decay: 0.0500 (0.0500) time: 0.7287 data: 0.0004 max mem: 57114 Epoch: [172] [ 30/156] eta: 0:01:41 lr: 0.003455 min_lr: 0.003455 loss: 3.1971 (3.0368) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [172] [ 40/156] eta: 0:01:32 lr: 0.003452 min_lr: 0.003452 loss: 3.1777 (3.0405) weight_decay: 0.0500 (0.0500) time: 0.7267 data: 0.0004 max mem: 57114 Epoch: [172] [ 50/156] eta: 0:01:22 lr: 0.003449 min_lr: 0.003449 loss: 3.1777 (3.0591) weight_decay: 0.0500 (0.0500) time: 0.7343 data: 0.0005 max mem: 57114 Epoch: [172] [ 60/156] eta: 0:01:14 lr: 0.003447 min_lr: 0.003447 loss: 2.9796 (3.0205) weight_decay: 0.0500 (0.0500) time: 0.7301 data: 0.0004 max mem: 57114 Epoch: [172] [ 70/156] eta: 0:01:05 lr: 0.003444 min_lr: 0.003444 loss: 2.9945 (3.0388) weight_decay: 0.0500 (0.0500) time: 0.7414 data: 0.0004 max mem: 57114 Epoch: [172] [ 80/156] eta: 0:00:57 lr: 0.003441 min_lr: 0.003441 loss: 3.0990 (3.0288) weight_decay: 0.0500 (0.0500) time: 0.7212 data: 0.0004 max mem: 57114 Epoch: [172] [ 90/156] eta: 0:00:49 lr: 0.003438 min_lr: 0.003438 loss: 3.1634 (3.0392) weight_decay: 0.0500 (0.0500) time: 0.7102 data: 0.0004 max mem: 57114 Epoch: [172] [100/156] eta: 0:00:42 lr: 0.003435 min_lr: 0.003435 loss: 3.1634 (3.0293) weight_decay: 0.0500 (0.0500) time: 0.7169 data: 0.0005 max mem: 57114 Epoch: [172] [110/156] eta: 0:00:34 lr: 0.003432 min_lr: 0.003432 loss: 2.9890 (3.0298) weight_decay: 0.0500 (0.0500) time: 0.7123 data: 0.0005 max mem: 57114 Epoch: [172] [120/156] eta: 0:00:26 lr: 0.003429 min_lr: 0.003429 loss: 3.2012 (3.0400) weight_decay: 0.0500 (0.0500) time: 0.7031 data: 0.0004 max mem: 57114 Epoch: [172] [130/156] eta: 0:00:19 lr: 0.003427 min_lr: 0.003427 loss: 3.2615 (3.0380) weight_decay: 0.0500 (0.0500) time: 0.6983 data: 0.0008 max mem: 57114 Epoch: [172] [140/156] eta: 0:00:11 lr: 0.003424 min_lr: 0.003424 loss: 3.1555 (3.0416) weight_decay: 0.0500 (0.0500) time: 0.6861 data: 0.0007 max mem: 57114 Epoch: [172] [150/156] eta: 0:00:04 lr: 0.003421 min_lr: 0.003421 loss: 3.0872 (3.0325) weight_decay: 0.0500 (0.0500) time: 0.6786 data: 0.0001 max mem: 57114 Epoch: [172] [155/156] eta: 0:00:00 lr: 0.003419 min_lr: 0.003419 loss: 3.0014 (3.0351) weight_decay: 0.0500 (0.0500) time: 0.6853 data: 0.0001 max mem: 57114 Epoch: [172] Total time: 0:01:54 (0.7326 s / it) Averaged stats: lr: 0.003419 min_lr: 0.003419 loss: 3.0014 (3.0173) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6568 (0.6568) acc1: 88.0208 (88.0208) acc5: 98.9583 (98.9583) time: 2.1217 data: 1.8656 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9728 (0.8621) acc1: 83.3333 (80.6905) acc5: 95.8333 (95.5243) time: 0.5750 data: 0.3732 max mem: 57114 Test: Total time: 0:00:02 (0.5973 s / it) * Acc@1 79.570 Acc@5 94.959 loss 0.981 Accuracy of the model on the 50000 test images: 79.6% Max accuracy: 80.44% Test: [0/5] eta: 0:00:11 loss: 0.7127 (0.7127) acc1: 82.8125 (82.8125) acc5: 95.3125 (95.3125) time: 2.2863 data: 2.0429 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8617 (0.8502) acc1: 78.1250 (76.3427) acc5: 94.2708 (91.5601) time: 0.6082 data: 0.4087 max mem: 57114 Test: Total time: 0:00:03 (0.6195 s / it) * Acc@1 73.272 Acc@5 91.628 loss 1.036 Accuracy of the model EMA on 50000 test images: 73.3% Max EMA accuracy: 73.27% Epoch: [173] [ 0/156] eta: 0:09:11 lr: 0.003419 min_lr: 0.003419 loss: 2.8071 (2.8071) weight_decay: 0.0500 (0.0500) time: 3.5348 data: 2.8857 max mem: 57114 Epoch: [173] [ 10/156] eta: 0:02:24 lr: 0.003416 min_lr: 0.003416 loss: 3.1011 (3.0468) weight_decay: 0.0500 (0.0500) time: 0.9901 data: 0.2626 max mem: 57114 Epoch: [173] [ 20/156] eta: 0:01:57 lr: 0.003414 min_lr: 0.003414 loss: 3.0553 (2.9491) weight_decay: 0.0500 (0.0500) time: 0.7308 data: 0.0003 max mem: 57114 Epoch: [173] [ 30/156] eta: 0:01:42 lr: 0.003411 min_lr: 0.003411 loss: 2.8406 (2.9548) weight_decay: 0.0500 (0.0500) time: 0.7215 data: 0.0003 max mem: 57114 Epoch: [173] [ 40/156] eta: 0:01:31 lr: 0.003408 min_lr: 0.003408 loss: 3.0470 (3.0003) weight_decay: 0.0500 (0.0500) time: 0.7165 data: 0.0003 max mem: 57114 Epoch: [173] [ 50/156] eta: 0:01:22 lr: 0.003405 min_lr: 0.003405 loss: 2.9736 (2.9585) weight_decay: 0.0500 (0.0500) time: 0.7190 data: 0.0004 max mem: 57114 Epoch: [173] [ 60/156] eta: 0:01:14 lr: 0.003402 min_lr: 0.003402 loss: 3.1537 (3.0043) weight_decay: 0.0500 (0.0500) time: 0.7309 data: 0.0004 max mem: 57114 Epoch: [173] [ 70/156] eta: 0:01:05 lr: 0.003399 min_lr: 0.003399 loss: 3.1537 (2.9893) weight_decay: 0.0500 (0.0500) time: 0.7289 data: 0.0004 max mem: 57114 Epoch: [173] [ 80/156] eta: 0:00:57 lr: 0.003396 min_lr: 0.003396 loss: 3.0834 (2.9929) weight_decay: 0.0500 (0.0500) time: 0.7214 data: 0.0004 max mem: 57114 Epoch: [173] [ 90/156] eta: 0:00:49 lr: 0.003394 min_lr: 0.003394 loss: 3.0834 (2.9889) weight_decay: 0.0500 (0.0500) time: 0.7166 data: 0.0004 max mem: 57114 Epoch: [173] [100/156] eta: 0:00:41 lr: 0.003391 min_lr: 0.003391 loss: 3.1455 (3.0073) weight_decay: 0.0500 (0.0500) time: 0.7016 data: 0.0004 max mem: 57114 Epoch: [173] [110/156] eta: 0:00:34 lr: 0.003388 min_lr: 0.003388 loss: 3.2436 (3.0183) weight_decay: 0.0500 (0.0500) time: 0.7030 data: 0.0005 max mem: 57114 Epoch: [173] [120/156] eta: 0:00:26 lr: 0.003385 min_lr: 0.003385 loss: 3.0754 (3.0251) weight_decay: 0.0500 (0.0500) time: 0.7091 data: 0.0004 max mem: 57114 Epoch: [173] [130/156] eta: 0:00:19 lr: 0.003382 min_lr: 0.003382 loss: 3.1302 (3.0402) weight_decay: 0.0500 (0.0500) time: 0.6953 data: 0.0008 max mem: 57114 Epoch: [173] [140/156] eta: 0:00:11 lr: 0.003379 min_lr: 0.003379 loss: 3.3607 (3.0554) weight_decay: 0.0500 (0.0500) time: 0.6844 data: 0.0007 max mem: 57114 Epoch: [173] [150/156] eta: 0:00:04 lr: 0.003377 min_lr: 0.003377 loss: 3.2492 (3.0472) weight_decay: 0.0500 (0.0500) time: 0.6838 data: 0.0001 max mem: 57114 Epoch: [173] [155/156] eta: 0:00:00 lr: 0.003375 min_lr: 0.003375 loss: 3.0598 (3.0392) weight_decay: 0.0500 (0.0500) time: 0.6837 data: 0.0001 max mem: 57114 Epoch: [173] Total time: 0:01:54 (0.7309 s / it) Averaged stats: lr: 0.003375 min_lr: 0.003375 loss: 3.0598 (3.0152) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7167 (0.7167) acc1: 88.0208 (88.0208) acc5: 98.4375 (98.4375) time: 2.0970 data: 1.8407 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9425 (0.9099) acc1: 82.2917 (79.7954) acc5: 96.3542 (95.5243) time: 0.5701 data: 0.3682 max mem: 57114 Test: Total time: 0:00:02 (0.5926 s / it) * Acc@1 80.055 Acc@5 95.231 loss 0.975 Accuracy of the model on the 50000 test images: 80.1% Max accuracy: 80.44% Test: [0/5] eta: 0:00:11 loss: 0.7070 (0.7070) acc1: 83.3333 (83.3333) acc5: 95.3125 (95.3125) time: 2.3390 data: 2.0954 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8546 (0.8419) acc1: 78.1250 (76.5985) acc5: 94.7917 (91.6880) time: 0.6184 data: 0.4192 max mem: 57114 Test: Total time: 0:00:03 (0.6313 s / it) * Acc@1 73.430 Acc@5 91.722 loss 1.026 Accuracy of the model EMA on 50000 test images: 73.4% Max EMA accuracy: 73.43% Epoch: [174] [ 0/156] eta: 0:06:29 lr: 0.003375 min_lr: 0.003375 loss: 2.8825 (2.8825) weight_decay: 0.0500 (0.0500) time: 2.4936 data: 1.8433 max mem: 57114 Epoch: [174] [ 10/156] eta: 0:02:06 lr: 0.003372 min_lr: 0.003372 loss: 3.2770 (3.1858) weight_decay: 0.0500 (0.0500) time: 0.8679 data: 0.1678 max mem: 57114 Epoch: [174] [ 20/156] eta: 0:01:48 lr: 0.003369 min_lr: 0.003369 loss: 3.2796 (3.1915) weight_decay: 0.0500 (0.0500) time: 0.7148 data: 0.0003 max mem: 57114 Epoch: [174] [ 30/156] eta: 0:01:37 lr: 0.003366 min_lr: 0.003366 loss: 3.0063 (3.1082) weight_decay: 0.0500 (0.0500) time: 0.7253 data: 0.0003 max mem: 57114 Epoch: [174] [ 40/156] eta: 0:01:28 lr: 0.003363 min_lr: 0.003363 loss: 2.9361 (3.0619) weight_decay: 0.0500 (0.0500) time: 0.7264 data: 0.0004 max mem: 57114 Epoch: [174] [ 50/156] eta: 0:01:20 lr: 0.003361 min_lr: 0.003361 loss: 3.1434 (3.0688) weight_decay: 0.0500 (0.0500) time: 0.7339 data: 0.0004 max mem: 57114 Epoch: [174] [ 60/156] eta: 0:01:12 lr: 0.003358 min_lr: 0.003358 loss: 3.2407 (3.0773) weight_decay: 0.0500 (0.0500) time: 0.7365 data: 0.0004 max mem: 57114 Epoch: [174] [ 70/156] eta: 0:01:04 lr: 0.003355 min_lr: 0.003355 loss: 2.9681 (3.0330) weight_decay: 0.0500 (0.0500) time: 0.7133 data: 0.0003 max mem: 57114 Epoch: [174] [ 80/156] eta: 0:00:56 lr: 0.003352 min_lr: 0.003352 loss: 2.8364 (3.0147) weight_decay: 0.0500 (0.0500) time: 0.7052 data: 0.0004 max mem: 57114 Epoch: [174] [ 90/156] eta: 0:00:48 lr: 0.003349 min_lr: 0.003349 loss: 2.8364 (2.9915) weight_decay: 0.0500 (0.0500) time: 0.7094 data: 0.0004 max mem: 57114 Epoch: [174] [100/156] eta: 0:00:41 lr: 0.003346 min_lr: 0.003346 loss: 2.6797 (2.9736) weight_decay: 0.0500 (0.0500) time: 0.7152 data: 0.0003 max mem: 57114 Epoch: [174] [110/156] eta: 0:00:33 lr: 0.003344 min_lr: 0.003344 loss: 3.1858 (2.9964) weight_decay: 0.0500 (0.0500) time: 0.7103 data: 0.0004 max mem: 57114 Epoch: [174] [120/156] eta: 0:00:26 lr: 0.003341 min_lr: 0.003341 loss: 3.2130 (3.0202) weight_decay: 0.0500 (0.0500) time: 0.7080 data: 0.0004 max mem: 57114 Epoch: [174] [130/156] eta: 0:00:18 lr: 0.003338 min_lr: 0.003338 loss: 3.1855 (3.0230) weight_decay: 0.0500 (0.0500) time: 0.7050 data: 0.0009 max mem: 57114 Epoch: [174] [140/156] eta: 0:00:11 lr: 0.003335 min_lr: 0.003335 loss: 3.0712 (3.0224) weight_decay: 0.0500 (0.0500) time: 0.6890 data: 0.0007 max mem: 57114 Epoch: [174] [150/156] eta: 0:00:04 lr: 0.003332 min_lr: 0.003332 loss: 3.1293 (3.0166) weight_decay: 0.0500 (0.0500) time: 0.6847 data: 0.0001 max mem: 57114 Epoch: [174] [155/156] eta: 0:00:00 lr: 0.003331 min_lr: 0.003331 loss: 3.1293 (3.0157) weight_decay: 0.0500 (0.0500) time: 0.6804 data: 0.0001 max mem: 57114 Epoch: [174] Total time: 0:01:52 (0.7236 s / it) Averaged stats: lr: 0.003331 min_lr: 0.003331 loss: 3.1293 (3.0042) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7401 (0.7401) acc1: 88.5417 (88.5417) acc5: 98.9583 (98.9583) time: 2.0382 data: 1.7820 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9828 (0.8961) acc1: 79.6875 (80.9463) acc5: 96.3542 (96.0358) time: 0.5584 data: 0.3565 max mem: 57114 Test: Total time: 0:00:02 (0.5792 s / it) * Acc@1 80.393 Acc@5 95.402 loss 0.967 Accuracy of the model on the 50000 test images: 80.4% Max accuracy: 80.44% Test: [0/5] eta: 0:00:11 loss: 0.7018 (0.7018) acc1: 83.3333 (83.3333) acc5: 95.3125 (95.3125) time: 2.3414 data: 2.0977 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8479 (0.8342) acc1: 78.1250 (76.7263) acc5: 94.7917 (91.6880) time: 0.6190 data: 0.4196 max mem: 57114 Test: Total time: 0:00:03 (0.6322 s / it) * Acc@1 73.599 Acc@5 91.824 loss 1.016 Accuracy of the model EMA on 50000 test images: 73.6% Max EMA accuracy: 73.60% Epoch: [175] [ 0/156] eta: 0:07:16 lr: 0.003331 min_lr: 0.003331 loss: 3.0630 (3.0630) weight_decay: 0.0500 (0.0500) time: 2.7989 data: 2.1511 max mem: 57114 Epoch: [175] [ 10/156] eta: 0:02:14 lr: 0.003328 min_lr: 0.003328 loss: 3.0134 (2.8622) weight_decay: 0.0500 (0.0500) time: 0.9213 data: 0.1958 max mem: 57114 Epoch: [175] [ 20/156] eta: 0:01:52 lr: 0.003325 min_lr: 0.003325 loss: 3.0597 (2.9750) weight_decay: 0.0500 (0.0500) time: 0.7264 data: 0.0003 max mem: 57114 Epoch: [175] [ 30/156] eta: 0:01:40 lr: 0.003322 min_lr: 0.003322 loss: 3.2069 (3.0256) weight_decay: 0.0500 (0.0500) time: 0.7279 data: 0.0003 max mem: 57114 Epoch: [175] [ 40/156] eta: 0:01:29 lr: 0.003319 min_lr: 0.003319 loss: 3.1317 (2.9857) weight_decay: 0.0500 (0.0500) time: 0.7191 data: 0.0004 max mem: 57114 Epoch: [175] [ 50/156] eta: 0:01:20 lr: 0.003316 min_lr: 0.003316 loss: 2.7596 (3.0012) weight_decay: 0.0500 (0.0500) time: 0.7095 data: 0.0004 max mem: 57114 Epoch: [175] [ 60/156] eta: 0:01:12 lr: 0.003314 min_lr: 0.003314 loss: 3.2091 (2.9930) weight_decay: 0.0500 (0.0500) time: 0.7193 data: 0.0004 max mem: 57114 Epoch: [175] [ 70/156] eta: 0:01:04 lr: 0.003311 min_lr: 0.003311 loss: 3.0359 (2.9748) weight_decay: 0.0500 (0.0500) time: 0.7148 data: 0.0004 max mem: 57114 Epoch: [175] [ 80/156] eta: 0:00:56 lr: 0.003308 min_lr: 0.003308 loss: 3.0359 (2.9951) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [175] [ 90/156] eta: 0:00:48 lr: 0.003305 min_lr: 0.003305 loss: 3.1828 (3.0047) weight_decay: 0.0500 (0.0500) time: 0.7096 data: 0.0003 max mem: 57114 Epoch: [175] [100/156] eta: 0:00:41 lr: 0.003302 min_lr: 0.003302 loss: 3.1464 (3.0142) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [175] [110/156] eta: 0:00:33 lr: 0.003299 min_lr: 0.003299 loss: 3.0667 (2.9980) weight_decay: 0.0500 (0.0500) time: 0.7061 data: 0.0005 max mem: 57114 Epoch: [175] [120/156] eta: 0:00:26 lr: 0.003297 min_lr: 0.003297 loss: 3.0882 (2.9935) weight_decay: 0.0500 (0.0500) time: 0.7159 data: 0.0004 max mem: 57114 Epoch: [175] [130/156] eta: 0:00:18 lr: 0.003294 min_lr: 0.003294 loss: 3.0451 (2.9869) weight_decay: 0.0500 (0.0500) time: 0.7062 data: 0.0008 max mem: 57114 Epoch: [175] [140/156] eta: 0:00:11 lr: 0.003291 min_lr: 0.003291 loss: 3.0451 (2.9921) weight_decay: 0.0500 (0.0500) time: 0.6878 data: 0.0007 max mem: 57114 Epoch: [175] [150/156] eta: 0:00:04 lr: 0.003288 min_lr: 0.003288 loss: 2.9817 (2.9846) weight_decay: 0.0500 (0.0500) time: 0.6829 data: 0.0001 max mem: 57114 Epoch: [175] [155/156] eta: 0:00:00 lr: 0.003287 min_lr: 0.003287 loss: 2.9992 (2.9873) weight_decay: 0.0500 (0.0500) time: 0.6844 data: 0.0001 max mem: 57114 Epoch: [175] Total time: 0:01:53 (0.7250 s / it) Averaged stats: lr: 0.003287 min_lr: 0.003287 loss: 2.9992 (2.9992) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7791 (0.7791) acc1: 86.9792 (86.9792) acc5: 97.3958 (97.3958) time: 2.0934 data: 1.8379 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9032 (0.8556) acc1: 82.2917 (81.8414) acc5: 97.3958 (96.2916) time: 0.5693 data: 0.3676 max mem: 57114 Test: Total time: 0:00:02 (0.5913 s / it) * Acc@1 80.305 Acc@5 95.322 loss 0.952 Accuracy of the model on the 50000 test images: 80.3% Max accuracy: 80.44% Test: [0/5] eta: 0:00:11 loss: 0.6962 (0.6962) acc1: 83.3333 (83.3333) acc5: 95.3125 (95.3125) time: 2.2630 data: 2.0195 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8412 (0.8265) acc1: 78.1250 (76.5985) acc5: 94.7917 (91.6880) time: 0.6032 data: 0.4040 max mem: 57114 Test: Total time: 0:00:03 (0.6155 s / it) * Acc@1 73.781 Acc@5 91.922 loss 1.006 Accuracy of the model EMA on 50000 test images: 73.8% Max EMA accuracy: 73.78% Epoch: [176] [ 0/156] eta: 0:06:55 lr: 0.003286 min_lr: 0.003286 loss: 1.9077 (1.9077) weight_decay: 0.0500 (0.0500) time: 2.6619 data: 2.0110 max mem: 57114 Epoch: [176] [ 10/156] eta: 0:02:14 lr: 0.003284 min_lr: 0.003284 loss: 2.7810 (2.7117) weight_decay: 0.0500 (0.0500) time: 0.9180 data: 0.1831 max mem: 57114 Epoch: [176] [ 20/156] eta: 0:01:51 lr: 0.003281 min_lr: 0.003281 loss: 2.8669 (2.8443) weight_decay: 0.0500 (0.0500) time: 0.7311 data: 0.0003 max mem: 57114 Epoch: [176] [ 30/156] eta: 0:01:40 lr: 0.003278 min_lr: 0.003278 loss: 3.0628 (2.9230) weight_decay: 0.0500 (0.0500) time: 0.7280 data: 0.0003 max mem: 57114 Epoch: [176] [ 40/156] eta: 0:01:29 lr: 0.003275 min_lr: 0.003275 loss: 3.2039 (2.9500) weight_decay: 0.0500 (0.0500) time: 0.7263 data: 0.0003 max mem: 57114 Epoch: [176] [ 50/156] eta: 0:01:20 lr: 0.003272 min_lr: 0.003272 loss: 2.9913 (2.9456) weight_decay: 0.0500 (0.0500) time: 0.7060 data: 0.0004 max mem: 57114 Epoch: [176] [ 60/156] eta: 0:01:12 lr: 0.003269 min_lr: 0.003269 loss: 2.9160 (2.9281) weight_decay: 0.0500 (0.0500) time: 0.7167 data: 0.0004 max mem: 57114 Epoch: [176] [ 70/156] eta: 0:01:04 lr: 0.003267 min_lr: 0.003267 loss: 2.9784 (2.9417) weight_decay: 0.0500 (0.0500) time: 0.7205 data: 0.0004 max mem: 57114 Epoch: [176] [ 80/156] eta: 0:00:56 lr: 0.003264 min_lr: 0.003264 loss: 3.1099 (2.9669) weight_decay: 0.0500 (0.0500) time: 0.7116 data: 0.0004 max mem: 57114 Epoch: [176] [ 90/156] eta: 0:00:48 lr: 0.003261 min_lr: 0.003261 loss: 3.2209 (2.9719) weight_decay: 0.0500 (0.0500) time: 0.7123 data: 0.0004 max mem: 57114 Epoch: [176] [100/156] eta: 0:00:41 lr: 0.003258 min_lr: 0.003258 loss: 3.2394 (2.9915) weight_decay: 0.0500 (0.0500) time: 0.7075 data: 0.0004 max mem: 57114 Epoch: [176] [110/156] eta: 0:00:33 lr: 0.003255 min_lr: 0.003255 loss: 3.0928 (2.9760) weight_decay: 0.0500 (0.0500) time: 0.7064 data: 0.0004 max mem: 57114 Epoch: [176] [120/156] eta: 0:00:26 lr: 0.003252 min_lr: 0.003252 loss: 2.9059 (2.9630) weight_decay: 0.0500 (0.0500) time: 0.6984 data: 0.0004 max mem: 57114 Epoch: [176] [130/156] eta: 0:00:18 lr: 0.003250 min_lr: 0.003250 loss: 3.0220 (2.9778) weight_decay: 0.0500 (0.0500) time: 0.6939 data: 0.0009 max mem: 57114 Epoch: [176] [140/156] eta: 0:00:11 lr: 0.003247 min_lr: 0.003247 loss: 3.0051 (2.9784) weight_decay: 0.0500 (0.0500) time: 0.6899 data: 0.0007 max mem: 57114 Epoch: [176] [150/156] eta: 0:00:04 lr: 0.003244 min_lr: 0.003244 loss: 2.9380 (2.9785) weight_decay: 0.0500 (0.0500) time: 0.6891 data: 0.0001 max mem: 57114 Epoch: [176] [155/156] eta: 0:00:00 lr: 0.003243 min_lr: 0.003243 loss: 3.0051 (2.9821) weight_decay: 0.0500 (0.0500) time: 0.6882 data: 0.0001 max mem: 57114 Epoch: [176] Total time: 0:01:52 (0.7243 s / it) Averaged stats: lr: 0.003243 min_lr: 0.003243 loss: 3.0051 (2.9956) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8918 (0.8918) acc1: 83.3333 (83.3333) acc5: 97.9167 (97.9167) time: 2.1975 data: 1.9413 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0214 (0.9949) acc1: 79.1667 (79.4118) acc5: 96.8750 (95.2685) time: 0.5902 data: 0.3883 max mem: 57114 Test: Total time: 0:00:03 (0.6155 s / it) * Acc@1 80.483 Acc@5 95.406 loss 1.057 Accuracy of the model on the 50000 test images: 80.5% Max accuracy: 80.48% Test: [0/5] eta: 0:00:10 loss: 0.6906 (0.6906) acc1: 82.8125 (82.8125) acc5: 95.3125 (95.3125) time: 2.0001 data: 1.7567 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8345 (0.8189) acc1: 78.1250 (76.5985) acc5: 94.7917 (91.6880) time: 0.5507 data: 0.3514 max mem: 57114 Test: Total time: 0:00:02 (0.5607 s / it) * Acc@1 73.979 Acc@5 92.002 loss 0.997 Accuracy of the model EMA on 50000 test images: 74.0% Max EMA accuracy: 73.98% Epoch: [177] [ 0/156] eta: 0:07:21 lr: 0.003242 min_lr: 0.003242 loss: 3.1074 (3.1074) weight_decay: 0.0500 (0.0500) time: 2.8287 data: 2.1781 max mem: 57114 Epoch: [177] [ 10/156] eta: 0:02:14 lr: 0.003239 min_lr: 0.003239 loss: 2.5245 (2.7406) weight_decay: 0.0500 (0.0500) time: 0.9182 data: 0.1983 max mem: 57114 Epoch: [177] [ 20/156] eta: 0:01:52 lr: 0.003237 min_lr: 0.003237 loss: 3.0626 (2.9111) weight_decay: 0.0500 (0.0500) time: 0.7249 data: 0.0004 max mem: 57114 Epoch: [177] [ 30/156] eta: 0:01:40 lr: 0.003234 min_lr: 0.003234 loss: 3.0713 (2.9133) weight_decay: 0.0500 (0.0500) time: 0.7269 data: 0.0005 max mem: 57114 Epoch: [177] [ 40/156] eta: 0:01:30 lr: 0.003231 min_lr: 0.003231 loss: 2.8920 (2.9180) weight_decay: 0.0500 (0.0500) time: 0.7363 data: 0.0006 max mem: 57114 Epoch: [177] [ 50/156] eta: 0:01:21 lr: 0.003228 min_lr: 0.003228 loss: 3.0970 (2.9737) weight_decay: 0.0500 (0.0500) time: 0.7360 data: 0.0007 max mem: 57114 Epoch: [177] [ 60/156] eta: 0:01:14 lr: 0.003225 min_lr: 0.003225 loss: 3.1615 (2.9770) weight_decay: 0.0500 (0.0500) time: 0.7532 data: 0.0008 max mem: 57114 Epoch: [177] [ 70/156] eta: 0:01:05 lr: 0.003222 min_lr: 0.003222 loss: 3.0303 (2.9472) weight_decay: 0.0500 (0.0500) time: 0.7489 data: 0.0008 max mem: 57114 Epoch: [177] [ 80/156] eta: 0:00:57 lr: 0.003220 min_lr: 0.003220 loss: 3.1106 (2.9560) weight_decay: 0.0500 (0.0500) time: 0.7164 data: 0.0007 max mem: 57114 Epoch: [177] [ 90/156] eta: 0:00:49 lr: 0.003217 min_lr: 0.003217 loss: 3.2106 (2.9735) weight_decay: 0.0500 (0.0500) time: 0.6983 data: 0.0007 max mem: 57114 Epoch: [177] [100/156] eta: 0:00:41 lr: 0.003214 min_lr: 0.003214 loss: 3.0666 (2.9550) weight_decay: 0.0500 (0.0500) time: 0.6938 data: 0.0006 max mem: 57114 Epoch: [177] [110/156] eta: 0:00:34 lr: 0.003211 min_lr: 0.003211 loss: 2.9995 (2.9691) weight_decay: 0.0500 (0.0500) time: 0.7068 data: 0.0004 max mem: 57114 Epoch: [177] [120/156] eta: 0:00:26 lr: 0.003208 min_lr: 0.003208 loss: 3.0119 (2.9715) weight_decay: 0.0500 (0.0500) time: 0.7137 data: 0.0004 max mem: 57114 Epoch: [177] [130/156] eta: 0:00:19 lr: 0.003206 min_lr: 0.003206 loss: 3.0014 (2.9557) weight_decay: 0.0500 (0.0500) time: 0.7104 data: 0.0010 max mem: 57114 Epoch: [177] [140/156] eta: 0:00:11 lr: 0.003203 min_lr: 0.003203 loss: 3.0014 (2.9568) weight_decay: 0.0500 (0.0500) time: 0.6958 data: 0.0008 max mem: 57114 Epoch: [177] [150/156] eta: 0:00:04 lr: 0.003200 min_lr: 0.003200 loss: 3.1209 (2.9587) weight_decay: 0.0500 (0.0500) time: 0.6819 data: 0.0001 max mem: 57114 Epoch: [177] [155/156] eta: 0:00:00 lr: 0.003199 min_lr: 0.003199 loss: 2.9150 (2.9548) weight_decay: 0.0500 (0.0500) time: 0.6774 data: 0.0001 max mem: 57114 Epoch: [177] Total time: 0:01:53 (0.7306 s / it) Averaged stats: lr: 0.003199 min_lr: 0.003199 loss: 2.9150 (2.9926) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6839 (0.6839) acc1: 88.5417 (88.5417) acc5: 97.9167 (97.9167) time: 2.1390 data: 1.8831 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9398 (0.8365) acc1: 80.2083 (81.3299) acc5: 97.3958 (96.0358) time: 0.5786 data: 0.3767 max mem: 57114 Test: Total time: 0:00:03 (0.6022 s / it) * Acc@1 80.353 Acc@5 95.508 loss 0.927 Accuracy of the model on the 50000 test images: 80.4% Max accuracy: 80.48% Test: [0/5] eta: 0:00:11 loss: 0.6858 (0.6858) acc1: 82.8125 (82.8125) acc5: 95.3125 (95.3125) time: 2.2828 data: 2.0393 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8280 (0.8120) acc1: 78.6458 (76.7263) acc5: 94.7917 (91.6880) time: 0.6072 data: 0.4079 max mem: 57114 Test: Total time: 0:00:03 (0.6192 s / it) * Acc@1 74.153 Acc@5 92.080 loss 0.988 Accuracy of the model EMA on 50000 test images: 74.2% Max EMA accuracy: 74.15% Epoch: [178] [ 0/156] eta: 0:08:22 lr: 0.003198 min_lr: 0.003198 loss: 3.6086 (3.6086) weight_decay: 0.0500 (0.0500) time: 3.2194 data: 2.5636 max mem: 57114 Epoch: [178] [ 10/156] eta: 0:02:18 lr: 0.003195 min_lr: 0.003195 loss: 3.0635 (3.0821) weight_decay: 0.0500 (0.0500) time: 0.9503 data: 0.2334 max mem: 57114 Epoch: [178] [ 20/156] eta: 0:01:54 lr: 0.003193 min_lr: 0.003193 loss: 3.0278 (3.0377) weight_decay: 0.0500 (0.0500) time: 0.7240 data: 0.0004 max mem: 57114 Epoch: [178] [ 30/156] eta: 0:01:41 lr: 0.003190 min_lr: 0.003190 loss: 3.0253 (3.0065) weight_decay: 0.0500 (0.0500) time: 0.7296 data: 0.0004 max mem: 57114 Epoch: [178] [ 40/156] eta: 0:01:32 lr: 0.003187 min_lr: 0.003187 loss: 3.1269 (3.0501) weight_decay: 0.0500 (0.0500) time: 0.7437 data: 0.0003 max mem: 57114 Epoch: [178] [ 50/156] eta: 0:01:22 lr: 0.003184 min_lr: 0.003184 loss: 3.2104 (3.0450) weight_decay: 0.0500 (0.0500) time: 0.7385 data: 0.0004 max mem: 57114 Epoch: [178] [ 60/156] eta: 0:01:13 lr: 0.003181 min_lr: 0.003181 loss: 3.1909 (3.0538) weight_decay: 0.0500 (0.0500) time: 0.7129 data: 0.0004 max mem: 57114 Epoch: [178] [ 70/156] eta: 0:01:05 lr: 0.003179 min_lr: 0.003179 loss: 3.1838 (3.0480) weight_decay: 0.0500 (0.0500) time: 0.7101 data: 0.0004 max mem: 57114 Epoch: [178] [ 80/156] eta: 0:00:57 lr: 0.003176 min_lr: 0.003176 loss: 3.2744 (3.0703) weight_decay: 0.0500 (0.0500) time: 0.7060 data: 0.0004 max mem: 57114 Epoch: [178] [ 90/156] eta: 0:00:49 lr: 0.003173 min_lr: 0.003173 loss: 3.2847 (3.0855) weight_decay: 0.0500 (0.0500) time: 0.6994 data: 0.0004 max mem: 57114 Epoch: [178] [100/156] eta: 0:00:41 lr: 0.003170 min_lr: 0.003170 loss: 3.2565 (3.0821) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [178] [110/156] eta: 0:00:34 lr: 0.003167 min_lr: 0.003167 loss: 3.1840 (3.0849) weight_decay: 0.0500 (0.0500) time: 0.7147 data: 0.0004 max mem: 57114 Epoch: [178] [120/156] eta: 0:00:26 lr: 0.003164 min_lr: 0.003164 loss: 3.0880 (3.0683) weight_decay: 0.0500 (0.0500) time: 0.7099 data: 0.0004 max mem: 57114 Epoch: [178] [130/156] eta: 0:00:19 lr: 0.003162 min_lr: 0.003162 loss: 2.9327 (3.0486) weight_decay: 0.0500 (0.0500) time: 0.7011 data: 0.0008 max mem: 57114 Epoch: [178] [140/156] eta: 0:00:11 lr: 0.003159 min_lr: 0.003159 loss: 2.8341 (3.0312) weight_decay: 0.0500 (0.0500) time: 0.6917 data: 0.0007 max mem: 57114 Epoch: [178] [150/156] eta: 0:00:04 lr: 0.003156 min_lr: 0.003156 loss: 2.8482 (3.0294) weight_decay: 0.0500 (0.0500) time: 0.6794 data: 0.0001 max mem: 57114 Epoch: [178] [155/156] eta: 0:00:00 lr: 0.003155 min_lr: 0.003155 loss: 2.9926 (3.0319) weight_decay: 0.0500 (0.0500) time: 0.6808 data: 0.0001 max mem: 57114 Epoch: [178] Total time: 0:01:53 (0.7288 s / it) Averaged stats: lr: 0.003155 min_lr: 0.003155 loss: 2.9926 (2.9841) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7293 (0.7293) acc1: 88.0208 (88.0208) acc5: 98.9583 (98.9583) time: 2.0703 data: 1.8142 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0313 (0.8931) acc1: 81.2500 (81.4578) acc5: 95.3125 (95.5243) time: 0.5648 data: 0.3629 max mem: 57114 Test: Total time: 0:00:02 (0.5868 s / it) * Acc@1 80.209 Acc@5 95.432 loss 0.980 Accuracy of the model on the 50000 test images: 80.2% Max accuracy: 80.48% Test: [0/5] eta: 0:00:11 loss: 0.6806 (0.6806) acc1: 82.8125 (82.8125) acc5: 95.3125 (95.3125) time: 2.2940 data: 2.0504 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8217 (0.8049) acc1: 78.6458 (76.9821) acc5: 94.7917 (91.6880) time: 0.6095 data: 0.4102 max mem: 57114 Test: Total time: 0:00:03 (0.6214 s / it) * Acc@1 74.299 Acc@5 92.162 loss 0.980 Accuracy of the model EMA on 50000 test images: 74.3% Max EMA accuracy: 74.30% Epoch: [179] [ 0/156] eta: 0:06:21 lr: 0.003154 min_lr: 0.003154 loss: 3.5874 (3.5874) weight_decay: 0.0500 (0.0500) time: 2.4483 data: 1.7991 max mem: 57114 Epoch: [179] [ 10/156] eta: 0:02:08 lr: 0.003152 min_lr: 0.003152 loss: 3.1798 (3.0596) weight_decay: 0.0500 (0.0500) time: 0.8799 data: 0.1639 max mem: 57114 Epoch: [179] [ 20/156] eta: 0:01:49 lr: 0.003149 min_lr: 0.003149 loss: 2.9158 (2.9264) weight_decay: 0.0500 (0.0500) time: 0.7205 data: 0.0003 max mem: 57114 Epoch: [179] [ 30/156] eta: 0:01:37 lr: 0.003146 min_lr: 0.003146 loss: 2.9158 (2.9310) weight_decay: 0.0500 (0.0500) time: 0.7196 data: 0.0004 max mem: 57114 Epoch: [179] [ 40/156] eta: 0:01:28 lr: 0.003143 min_lr: 0.003143 loss: 3.0826 (2.9890) weight_decay: 0.0500 (0.0500) time: 0.7270 data: 0.0004 max mem: 57114 Epoch: [179] [ 50/156] eta: 0:01:20 lr: 0.003140 min_lr: 0.003140 loss: 3.2097 (3.0217) weight_decay: 0.0500 (0.0500) time: 0.7346 data: 0.0004 max mem: 57114 Epoch: [179] [ 60/156] eta: 0:01:12 lr: 0.003137 min_lr: 0.003137 loss: 3.2097 (2.9888) weight_decay: 0.0500 (0.0500) time: 0.7193 data: 0.0004 max mem: 57114 Epoch: [179] [ 70/156] eta: 0:01:04 lr: 0.003135 min_lr: 0.003135 loss: 2.7664 (2.9844) weight_decay: 0.0500 (0.0500) time: 0.7075 data: 0.0004 max mem: 57114 Epoch: [179] [ 80/156] eta: 0:00:56 lr: 0.003132 min_lr: 0.003132 loss: 3.0625 (2.9974) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [179] [ 90/156] eta: 0:00:48 lr: 0.003129 min_lr: 0.003129 loss: 3.1535 (2.9863) weight_decay: 0.0500 (0.0500) time: 0.7112 data: 0.0004 max mem: 57114 Epoch: [179] [100/156] eta: 0:00:41 lr: 0.003126 min_lr: 0.003126 loss: 3.1340 (2.9784) weight_decay: 0.0500 (0.0500) time: 0.7089 data: 0.0003 max mem: 57114 Epoch: [179] [110/156] eta: 0:00:33 lr: 0.003123 min_lr: 0.003123 loss: 3.1340 (2.9759) weight_decay: 0.0500 (0.0500) time: 0.7149 data: 0.0003 max mem: 57114 Epoch: [179] [120/156] eta: 0:00:26 lr: 0.003121 min_lr: 0.003121 loss: 2.9915 (2.9712) weight_decay: 0.0500 (0.0500) time: 0.7124 data: 0.0003 max mem: 57114 Epoch: [179] [130/156] eta: 0:00:18 lr: 0.003118 min_lr: 0.003118 loss: 2.9075 (2.9651) weight_decay: 0.0500 (0.0500) time: 0.6960 data: 0.0008 max mem: 57114 Epoch: [179] [140/156] eta: 0:00:11 lr: 0.003115 min_lr: 0.003115 loss: 2.9075 (2.9639) weight_decay: 0.0500 (0.0500) time: 0.6848 data: 0.0007 max mem: 57114 Epoch: [179] [150/156] eta: 0:00:04 lr: 0.003112 min_lr: 0.003112 loss: 3.0726 (2.9617) weight_decay: 0.0500 (0.0500) time: 0.6818 data: 0.0001 max mem: 57114 Epoch: [179] [155/156] eta: 0:00:00 lr: 0.003111 min_lr: 0.003111 loss: 3.1045 (2.9730) weight_decay: 0.0500 (0.0500) time: 0.6827 data: 0.0001 max mem: 57114 Epoch: [179] Total time: 0:01:52 (0.7228 s / it) Averaged stats: lr: 0.003111 min_lr: 0.003111 loss: 3.1045 (2.9811) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7158 (0.7158) acc1: 88.0208 (88.0208) acc5: 97.3958 (97.3958) time: 2.0945 data: 1.8391 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8807 (0.8380) acc1: 82.2917 (81.7136) acc5: 97.3958 (96.4194) time: 0.5696 data: 0.3679 max mem: 57114 Test: Total time: 0:00:02 (0.5919 s / it) * Acc@1 80.507 Acc@5 95.448 loss 0.941 Accuracy of the model on the 50000 test images: 80.5% Max accuracy: 80.51% Test: [0/5] eta: 0:00:10 loss: 0.6757 (0.6757) acc1: 82.2917 (82.2917) acc5: 95.3125 (95.3125) time: 2.0932 data: 1.8498 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8161 (0.7981) acc1: 78.6458 (76.9821) acc5: 94.7917 (91.8159) time: 0.5696 data: 0.3702 max mem: 57114 Test: Total time: 0:00:02 (0.5843 s / it) * Acc@1 74.500 Acc@5 92.259 loss 0.971 Accuracy of the model EMA on 50000 test images: 74.5% Max EMA accuracy: 74.50% Epoch: [180] [ 0/156] eta: 0:08:25 lr: 0.003111 min_lr: 0.003111 loss: 2.5864 (2.5864) weight_decay: 0.0500 (0.0500) time: 3.2373 data: 2.5907 max mem: 57114 Epoch: [180] [ 10/156] eta: 0:02:22 lr: 0.003108 min_lr: 0.003108 loss: 3.2952 (3.2368) weight_decay: 0.0500 (0.0500) time: 0.9737 data: 0.2358 max mem: 57114 Epoch: [180] [ 20/156] eta: 0:01:55 lr: 0.003105 min_lr: 0.003105 loss: 3.1887 (2.9850) weight_decay: 0.0500 (0.0500) time: 0.7316 data: 0.0003 max mem: 57114 Epoch: [180] [ 30/156] eta: 0:01:42 lr: 0.003102 min_lr: 0.003102 loss: 3.0194 (2.9862) weight_decay: 0.0500 (0.0500) time: 0.7285 data: 0.0003 max mem: 57114 Epoch: [180] [ 40/156] eta: 0:01:31 lr: 0.003099 min_lr: 0.003099 loss: 3.0194 (2.9656) weight_decay: 0.0500 (0.0500) time: 0.7290 data: 0.0004 max mem: 57114 Epoch: [180] [ 50/156] eta: 0:01:22 lr: 0.003097 min_lr: 0.003097 loss: 2.9987 (2.9606) weight_decay: 0.0500 (0.0500) time: 0.7302 data: 0.0004 max mem: 57114 Epoch: [180] [ 60/156] eta: 0:01:14 lr: 0.003094 min_lr: 0.003094 loss: 2.9987 (2.9151) weight_decay: 0.0500 (0.0500) time: 0.7495 data: 0.0004 max mem: 57114 Epoch: [180] [ 70/156] eta: 0:01:06 lr: 0.003091 min_lr: 0.003091 loss: 3.1776 (2.9666) weight_decay: 0.0500 (0.0500) time: 0.7385 data: 0.0004 max mem: 57114 Epoch: [180] [ 80/156] eta: 0:00:57 lr: 0.003088 min_lr: 0.003088 loss: 3.2019 (2.9545) weight_decay: 0.0500 (0.0500) time: 0.7115 data: 0.0004 max mem: 57114 Epoch: [180] [ 90/156] eta: 0:00:49 lr: 0.003085 min_lr: 0.003085 loss: 3.0777 (2.9635) weight_decay: 0.0500 (0.0500) time: 0.7053 data: 0.0004 max mem: 57114 Epoch: [180] [100/156] eta: 0:00:42 lr: 0.003083 min_lr: 0.003083 loss: 3.0777 (2.9786) weight_decay: 0.0500 (0.0500) time: 0.7082 data: 0.0004 max mem: 57114 Epoch: [180] [110/156] eta: 0:00:34 lr: 0.003080 min_lr: 0.003080 loss: 3.0675 (2.9774) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [180] [120/156] eta: 0:00:26 lr: 0.003077 min_lr: 0.003077 loss: 3.0675 (2.9662) weight_decay: 0.0500 (0.0500) time: 0.7155 data: 0.0003 max mem: 57114 Epoch: [180] [130/156] eta: 0:00:19 lr: 0.003074 min_lr: 0.003074 loss: 2.9994 (2.9651) weight_decay: 0.0500 (0.0500) time: 0.7035 data: 0.0008 max mem: 57114 Epoch: [180] [140/156] eta: 0:00:11 lr: 0.003071 min_lr: 0.003071 loss: 2.8936 (2.9586) weight_decay: 0.0500 (0.0500) time: 0.6894 data: 0.0007 max mem: 57114 Epoch: [180] [150/156] eta: 0:00:04 lr: 0.003069 min_lr: 0.003069 loss: 3.0313 (2.9777) weight_decay: 0.0500 (0.0500) time: 0.6819 data: 0.0001 max mem: 57114 Epoch: [180] [155/156] eta: 0:00:00 lr: 0.003067 min_lr: 0.003067 loss: 3.0313 (2.9719) weight_decay: 0.0500 (0.0500) time: 0.6834 data: 0.0001 max mem: 57114 Epoch: [180] Total time: 0:01:54 (0.7338 s / it) Averaged stats: lr: 0.003067 min_lr: 0.003067 loss: 3.0313 (2.9862) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7484 (0.7484) acc1: 86.9792 (86.9792) acc5: 98.9583 (98.9583) time: 2.1210 data: 1.8648 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9837 (0.8850) acc1: 79.1667 (80.6905) acc5: 95.8333 (96.2916) time: 0.5750 data: 0.3731 max mem: 57114 Test: Total time: 0:00:02 (0.5951 s / it) * Acc@1 80.850 Acc@5 95.608 loss 0.965 Accuracy of the model on the 50000 test images: 80.9% Max accuracy: 80.85% Test: [0/5] eta: 0:00:10 loss: 0.6708 (0.6708) acc1: 82.2917 (82.2917) acc5: 95.3125 (95.3125) time: 2.1817 data: 1.9381 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8106 (0.7915) acc1: 78.6458 (76.9821) acc5: 94.7917 (91.9437) time: 0.5870 data: 0.3877 max mem: 57114 Test: Total time: 0:00:02 (0.5998 s / it) * Acc@1 74.656 Acc@5 92.357 loss 0.963 Accuracy of the model EMA on 50000 test images: 74.7% Max EMA accuracy: 74.66% Epoch: [181] [ 0/156] eta: 0:07:15 lr: 0.003067 min_lr: 0.003067 loss: 3.3196 (3.3196) weight_decay: 0.0500 (0.0500) time: 2.7892 data: 2.1345 max mem: 57114 Epoch: [181] [ 10/156] eta: 0:02:10 lr: 0.003064 min_lr: 0.003064 loss: 2.8718 (2.7324) weight_decay: 0.0500 (0.0500) time: 0.8962 data: 0.1945 max mem: 57114 Epoch: [181] [ 20/156] eta: 0:01:52 lr: 0.003061 min_lr: 0.003061 loss: 2.9916 (2.9211) weight_decay: 0.0500 (0.0500) time: 0.7300 data: 0.0004 max mem: 57114 Epoch: [181] [ 30/156] eta: 0:01:41 lr: 0.003058 min_lr: 0.003058 loss: 3.1307 (3.0140) weight_decay: 0.0500 (0.0500) time: 0.7593 data: 0.0004 max mem: 57114 Epoch: [181] [ 40/156] eta: 0:01:31 lr: 0.003056 min_lr: 0.003056 loss: 3.1796 (3.0272) weight_decay: 0.0500 (0.0500) time: 0.7446 data: 0.0005 max mem: 57114 Epoch: [181] [ 50/156] eta: 0:01:21 lr: 0.003053 min_lr: 0.003053 loss: 2.9838 (2.9732) weight_decay: 0.0500 (0.0500) time: 0.7200 data: 0.0006 max mem: 57114 Epoch: [181] [ 60/156] eta: 0:01:13 lr: 0.003050 min_lr: 0.003050 loss: 2.8833 (2.9446) weight_decay: 0.0500 (0.0500) time: 0.7230 data: 0.0005 max mem: 57114 Epoch: [181] [ 70/156] eta: 0:01:05 lr: 0.003047 min_lr: 0.003047 loss: 2.9407 (2.9488) weight_decay: 0.0500 (0.0500) time: 0.7202 data: 0.0005 max mem: 57114 Epoch: [181] [ 80/156] eta: 0:00:57 lr: 0.003044 min_lr: 0.003044 loss: 3.0188 (2.9426) weight_decay: 0.0500 (0.0500) time: 0.7201 data: 0.0004 max mem: 57114 Epoch: [181] [ 90/156] eta: 0:00:49 lr: 0.003042 min_lr: 0.003042 loss: 3.0204 (2.9422) weight_decay: 0.0500 (0.0500) time: 0.7158 data: 0.0004 max mem: 57114 Epoch: [181] [100/156] eta: 0:00:41 lr: 0.003039 min_lr: 0.003039 loss: 2.9131 (2.9329) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [181] [110/156] eta: 0:00:34 lr: 0.003036 min_lr: 0.003036 loss: 2.9091 (2.9327) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0005 max mem: 57114 Epoch: [181] [120/156] eta: 0:00:26 lr: 0.003033 min_lr: 0.003033 loss: 2.9965 (2.9338) weight_decay: 0.0500 (0.0500) time: 0.7018 data: 0.0007 max mem: 57114 Epoch: [181] [130/156] eta: 0:00:19 lr: 0.003031 min_lr: 0.003031 loss: 3.1758 (2.9489) weight_decay: 0.0500 (0.0500) time: 0.6969 data: 0.0016 max mem: 57114 Epoch: [181] [140/156] eta: 0:00:11 lr: 0.003028 min_lr: 0.003028 loss: 3.1949 (2.9536) weight_decay: 0.0500 (0.0500) time: 0.6836 data: 0.0013 max mem: 57114 Epoch: [181] [150/156] eta: 0:00:04 lr: 0.003025 min_lr: 0.003025 loss: 2.9459 (2.9480) weight_decay: 0.0500 (0.0500) time: 0.6812 data: 0.0001 max mem: 57114 Epoch: [181] [155/156] eta: 0:00:00 lr: 0.003024 min_lr: 0.003024 loss: 2.9459 (2.9443) weight_decay: 0.0500 (0.0500) time: 0.6899 data: 0.0001 max mem: 57114 Epoch: [181] Total time: 0:01:53 (0.7297 s / it) Averaged stats: lr: 0.003024 min_lr: 0.003024 loss: 2.9459 (2.9686) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6820 (0.6820) acc1: 89.5833 (89.5833) acc5: 98.4375 (98.4375) time: 2.0738 data: 1.8180 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8860 (0.8081) acc1: 82.8125 (83.6317) acc5: 97.9167 (97.1867) time: 0.5654 data: 0.3637 max mem: 57114 Test: Total time: 0:00:02 (0.5858 s / it) * Acc@1 80.900 Acc@5 95.670 loss 0.906 Accuracy of the model on the 50000 test images: 80.9% Max accuracy: 80.90% Test: [0/5] eta: 0:00:09 loss: 0.6665 (0.6665) acc1: 82.2917 (82.2917) acc5: 95.8333 (95.8333) time: 1.9918 data: 1.7482 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8054 (0.7854) acc1: 78.6458 (76.9821) acc5: 94.7917 (92.0716) time: 0.5490 data: 0.3497 max mem: 57114 Test: Total time: 0:00:02 (0.5607 s / it) * Acc@1 74.836 Acc@5 92.447 loss 0.955 Accuracy of the model EMA on 50000 test images: 74.8% Max EMA accuracy: 74.84% Epoch: [182] [ 0/156] eta: 0:06:27 lr: 0.003023 min_lr: 0.003023 loss: 2.7551 (2.7551) weight_decay: 0.0500 (0.0500) time: 2.4840 data: 1.8318 max mem: 57114 Epoch: [182] [ 10/156] eta: 0:02:11 lr: 0.003020 min_lr: 0.003020 loss: 3.1409 (2.9480) weight_decay: 0.0500 (0.0500) time: 0.9000 data: 0.1668 max mem: 57114 Epoch: [182] [ 20/156] eta: 0:01:50 lr: 0.003018 min_lr: 0.003018 loss: 3.0922 (2.9746) weight_decay: 0.0500 (0.0500) time: 0.7326 data: 0.0003 max mem: 57114 Epoch: [182] [ 30/156] eta: 0:01:38 lr: 0.003015 min_lr: 0.003015 loss: 3.0688 (2.9652) weight_decay: 0.0500 (0.0500) time: 0.7198 data: 0.0003 max mem: 57114 Epoch: [182] [ 40/156] eta: 0:01:29 lr: 0.003012 min_lr: 0.003012 loss: 3.0014 (2.9555) weight_decay: 0.0500 (0.0500) time: 0.7228 data: 0.0003 max mem: 57114 Epoch: [182] [ 50/156] eta: 0:01:21 lr: 0.003009 min_lr: 0.003009 loss: 2.8850 (2.9304) weight_decay: 0.0500 (0.0500) time: 0.7465 data: 0.0004 max mem: 57114 Epoch: [182] [ 60/156] eta: 0:01:13 lr: 0.003007 min_lr: 0.003007 loss: 2.8850 (2.9335) weight_decay: 0.0500 (0.0500) time: 0.7437 data: 0.0004 max mem: 57114 Epoch: [182] [ 70/156] eta: 0:01:05 lr: 0.003004 min_lr: 0.003004 loss: 3.0219 (2.9516) weight_decay: 0.0500 (0.0500) time: 0.7279 data: 0.0004 max mem: 57114 Epoch: [182] [ 80/156] eta: 0:00:57 lr: 0.003001 min_lr: 0.003001 loss: 3.1244 (2.9601) weight_decay: 0.0500 (0.0500) time: 0.7163 data: 0.0004 max mem: 57114 Epoch: [182] [ 90/156] eta: 0:00:49 lr: 0.002998 min_lr: 0.002998 loss: 3.0554 (2.9464) weight_decay: 0.0500 (0.0500) time: 0.7014 data: 0.0003 max mem: 57114 Epoch: [182] [100/156] eta: 0:00:41 lr: 0.002995 min_lr: 0.002995 loss: 2.9983 (2.9525) weight_decay: 0.0500 (0.0500) time: 0.6960 data: 0.0003 max mem: 57114 Epoch: [182] [110/156] eta: 0:00:33 lr: 0.002993 min_lr: 0.002993 loss: 3.0444 (2.9497) weight_decay: 0.0500 (0.0500) time: 0.6968 data: 0.0003 max mem: 57114 Epoch: [182] [120/156] eta: 0:00:26 lr: 0.002990 min_lr: 0.002990 loss: 3.0490 (2.9657) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0004 max mem: 57114 Epoch: [182] [130/156] eta: 0:00:19 lr: 0.002987 min_lr: 0.002987 loss: 3.0128 (2.9591) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.0009 max mem: 57114 Epoch: [182] [140/156] eta: 0:00:11 lr: 0.002984 min_lr: 0.002984 loss: 2.9560 (2.9577) weight_decay: 0.0500 (0.0500) time: 0.6957 data: 0.0007 max mem: 57114 Epoch: [182] [150/156] eta: 0:00:04 lr: 0.002981 min_lr: 0.002981 loss: 2.9647 (2.9650) weight_decay: 0.0500 (0.0500) time: 0.6840 data: 0.0001 max mem: 57114 Epoch: [182] [155/156] eta: 0:00:00 lr: 0.002980 min_lr: 0.002980 loss: 3.0970 (2.9728) weight_decay: 0.0500 (0.0500) time: 0.6846 data: 0.0001 max mem: 57114 Epoch: [182] Total time: 0:01:53 (0.7264 s / it) Averaged stats: lr: 0.002980 min_lr: 0.002980 loss: 3.0970 (2.9636) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.9241 (0.9241) acc1: 84.3750 (84.3750) acc5: 97.9167 (97.9167) time: 2.0798 data: 1.8238 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0312 (1.0499) acc1: 82.8125 (79.7954) acc5: 95.8333 (95.0128) time: 0.5667 data: 0.3648 max mem: 57114 Test: Total time: 0:00:02 (0.5884 s / it) * Acc@1 79.933 Acc@5 95.368 loss 1.100 Accuracy of the model on the 50000 test images: 79.9% Max accuracy: 80.90% Test: [0/5] eta: 0:00:11 loss: 0.6624 (0.6624) acc1: 82.2917 (82.2917) acc5: 95.8333 (95.8333) time: 2.3208 data: 2.0772 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8001 (0.7796) acc1: 78.6458 (77.1100) acc5: 94.7917 (92.3274) time: 0.6148 data: 0.4155 max mem: 57114 Test: Total time: 0:00:03 (0.6272 s / it) * Acc@1 74.922 Acc@5 92.543 loss 0.948 Accuracy of the model EMA on 50000 test images: 74.9% Max EMA accuracy: 74.92% Epoch: [183] [ 0/156] eta: 0:07:50 lr: 0.002980 min_lr: 0.002980 loss: 1.9212 (1.9212) weight_decay: 0.0500 (0.0500) time: 3.0177 data: 2.3690 max mem: 57114 Epoch: [183] [ 10/156] eta: 0:02:14 lr: 0.002977 min_lr: 0.002977 loss: 3.1364 (2.9980) weight_decay: 0.0500 (0.0500) time: 0.9210 data: 0.2156 max mem: 57114 Epoch: [183] [ 20/156] eta: 0:01:52 lr: 0.002974 min_lr: 0.002974 loss: 3.0747 (2.9987) weight_decay: 0.0500 (0.0500) time: 0.7143 data: 0.0003 max mem: 57114 Epoch: [183] [ 30/156] eta: 0:01:41 lr: 0.002971 min_lr: 0.002971 loss: 3.0747 (2.9673) weight_decay: 0.0500 (0.0500) time: 0.7363 data: 0.0003 max mem: 57114 Epoch: [183] [ 40/156] eta: 0:01:31 lr: 0.002969 min_lr: 0.002969 loss: 3.1236 (2.9883) weight_decay: 0.0500 (0.0500) time: 0.7452 data: 0.0003 max mem: 57114 Epoch: [183] [ 50/156] eta: 0:01:22 lr: 0.002966 min_lr: 0.002966 loss: 3.1277 (2.9944) weight_decay: 0.0500 (0.0500) time: 0.7313 data: 0.0004 max mem: 57114 Epoch: [183] [ 60/156] eta: 0:01:13 lr: 0.002963 min_lr: 0.002963 loss: 3.0446 (2.9849) weight_decay: 0.0500 (0.0500) time: 0.7100 data: 0.0004 max mem: 57114 Epoch: [183] [ 70/156] eta: 0:01:04 lr: 0.002960 min_lr: 0.002960 loss: 3.0446 (3.0119) weight_decay: 0.0500 (0.0500) time: 0.6999 data: 0.0004 max mem: 57114 Epoch: [183] [ 80/156] eta: 0:00:56 lr: 0.002958 min_lr: 0.002958 loss: 3.0168 (3.0106) weight_decay: 0.0500 (0.0500) time: 0.7067 data: 0.0003 max mem: 57114 Epoch: [183] [ 90/156] eta: 0:00:48 lr: 0.002955 min_lr: 0.002955 loss: 3.0919 (3.0206) weight_decay: 0.0500 (0.0500) time: 0.7027 data: 0.0004 max mem: 57114 Epoch: [183] [100/156] eta: 0:00:41 lr: 0.002952 min_lr: 0.002952 loss: 3.1501 (3.0087) weight_decay: 0.0500 (0.0500) time: 0.7075 data: 0.0004 max mem: 57114 Epoch: [183] [110/156] eta: 0:00:33 lr: 0.002949 min_lr: 0.002949 loss: 3.1487 (3.0180) weight_decay: 0.0500 (0.0500) time: 0.7152 data: 0.0003 max mem: 57114 Epoch: [183] [120/156] eta: 0:00:26 lr: 0.002946 min_lr: 0.002946 loss: 2.9576 (3.0099) weight_decay: 0.0500 (0.0500) time: 0.7085 data: 0.0004 max mem: 57114 Epoch: [183] [130/156] eta: 0:00:19 lr: 0.002944 min_lr: 0.002944 loss: 2.9448 (3.0047) weight_decay: 0.0500 (0.0500) time: 0.6980 data: 0.0009 max mem: 57114 Epoch: [183] [140/156] eta: 0:00:11 lr: 0.002941 min_lr: 0.002941 loss: 2.9432 (3.0027) weight_decay: 0.0500 (0.0500) time: 0.6873 data: 0.0007 max mem: 57114 Epoch: [183] [150/156] eta: 0:00:04 lr: 0.002938 min_lr: 0.002938 loss: 3.0750 (3.0169) weight_decay: 0.0500 (0.0500) time: 0.6810 data: 0.0001 max mem: 57114 Epoch: [183] [155/156] eta: 0:00:00 lr: 0.002937 min_lr: 0.002937 loss: 3.1759 (3.0200) weight_decay: 0.0500 (0.0500) time: 0.6824 data: 0.0001 max mem: 57114 Epoch: [183] Total time: 0:01:53 (0.7257 s / it) Averaged stats: lr: 0.002937 min_lr: 0.002937 loss: 3.1759 (2.9645) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8423 (0.8423) acc1: 86.4583 (86.4583) acc5: 98.4375 (98.4375) time: 2.0730 data: 1.8173 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0144 (0.9956) acc1: 79.6875 (79.4118) acc5: 96.3542 (95.5243) time: 0.5655 data: 0.3635 max mem: 57114 Test: Total time: 0:00:02 (0.5880 s / it) * Acc@1 80.327 Acc@5 95.336 loss 1.071 Accuracy of the model on the 50000 test images: 80.3% Max accuracy: 80.90% Test: [0/5] eta: 0:00:11 loss: 0.6581 (0.6581) acc1: 82.2917 (82.2917) acc5: 95.8333 (95.8333) time: 2.2826 data: 2.0392 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7949 (0.7738) acc1: 78.6458 (77.1100) acc5: 94.7917 (92.4552) time: 0.6072 data: 0.4079 max mem: 57114 Test: Total time: 0:00:03 (0.6193 s / it) * Acc@1 75.048 Acc@5 92.615 loss 0.941 Accuracy of the model EMA on 50000 test images: 75.0% Max EMA accuracy: 75.05% Epoch: [184] [ 0/156] eta: 0:06:28 lr: 0.002936 min_lr: 0.002936 loss: 3.3497 (3.3497) weight_decay: 0.0500 (0.0500) time: 2.4913 data: 1.8387 max mem: 57114 Epoch: [184] [ 10/156] eta: 0:02:09 lr: 0.002934 min_lr: 0.002934 loss: 3.1756 (2.8088) weight_decay: 0.0500 (0.0500) time: 0.8901 data: 0.1674 max mem: 57114 Epoch: [184] [ 20/156] eta: 0:01:49 lr: 0.002931 min_lr: 0.002931 loss: 3.1602 (2.9423) weight_decay: 0.0500 (0.0500) time: 0.7193 data: 0.0003 max mem: 57114 Epoch: [184] [ 30/156] eta: 0:01:38 lr: 0.002928 min_lr: 0.002928 loss: 3.1724 (3.0211) weight_decay: 0.0500 (0.0500) time: 0.7160 data: 0.0003 max mem: 57114 Epoch: [184] [ 40/156] eta: 0:01:28 lr: 0.002925 min_lr: 0.002925 loss: 3.1473 (2.9647) weight_decay: 0.0500 (0.0500) time: 0.7202 data: 0.0004 max mem: 57114 Epoch: [184] [ 50/156] eta: 0:01:20 lr: 0.002923 min_lr: 0.002923 loss: 3.0215 (2.9685) weight_decay: 0.0500 (0.0500) time: 0.7237 data: 0.0004 max mem: 57114 Epoch: [184] [ 60/156] eta: 0:01:11 lr: 0.002920 min_lr: 0.002920 loss: 2.9488 (2.9278) weight_decay: 0.0500 (0.0500) time: 0.7208 data: 0.0004 max mem: 57114 Epoch: [184] [ 70/156] eta: 0:01:04 lr: 0.002917 min_lr: 0.002917 loss: 2.9187 (2.8974) weight_decay: 0.0500 (0.0500) time: 0.7199 data: 0.0004 max mem: 57114 Epoch: [184] [ 80/156] eta: 0:00:56 lr: 0.002914 min_lr: 0.002914 loss: 2.8860 (2.8991) weight_decay: 0.0500 (0.0500) time: 0.7241 data: 0.0004 max mem: 57114 Epoch: [184] [ 90/156] eta: 0:00:48 lr: 0.002912 min_lr: 0.002912 loss: 3.0111 (2.9130) weight_decay: 0.0500 (0.0500) time: 0.7170 data: 0.0004 max mem: 57114 Epoch: [184] [100/156] eta: 0:00:41 lr: 0.002909 min_lr: 0.002909 loss: 3.1108 (2.9262) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0004 max mem: 57114 Epoch: [184] [110/156] eta: 0:00:33 lr: 0.002906 min_lr: 0.002906 loss: 3.0966 (2.9199) weight_decay: 0.0500 (0.0500) time: 0.6979 data: 0.0004 max mem: 57114 Epoch: [184] [120/156] eta: 0:00:26 lr: 0.002903 min_lr: 0.002903 loss: 3.0201 (2.9196) weight_decay: 0.0500 (0.0500) time: 0.7025 data: 0.0004 max mem: 57114 Epoch: [184] [130/156] eta: 0:00:18 lr: 0.002900 min_lr: 0.002900 loss: 3.0978 (2.9268) weight_decay: 0.0500 (0.0500) time: 0.7008 data: 0.0009 max mem: 57114 Epoch: [184] [140/156] eta: 0:00:11 lr: 0.002898 min_lr: 0.002898 loss: 3.0710 (2.9223) weight_decay: 0.0500 (0.0500) time: 0.6903 data: 0.0007 max mem: 57114 Epoch: [184] [150/156] eta: 0:00:04 lr: 0.002895 min_lr: 0.002895 loss: 3.0710 (2.9339) weight_decay: 0.0500 (0.0500) time: 0.6861 data: 0.0001 max mem: 57114 Epoch: [184] [155/156] eta: 0:00:00 lr: 0.002894 min_lr: 0.002894 loss: 3.0348 (2.9386) weight_decay: 0.0500 (0.0500) time: 0.6904 data: 0.0001 max mem: 57114 Epoch: [184] Total time: 0:01:52 (0.7235 s / it) Averaged stats: lr: 0.002894 min_lr: 0.002894 loss: 3.0348 (2.9601) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7235 (0.7235) acc1: 89.0625 (89.0625) acc5: 98.4375 (98.4375) time: 2.1639 data: 1.9084 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9237 (0.8652) acc1: 81.2500 (82.6087) acc5: 96.8750 (96.1637) time: 0.5835 data: 0.3817 max mem: 57114 Test: Total time: 0:00:03 (0.6056 s / it) * Acc@1 80.760 Acc@5 95.532 loss 0.969 Accuracy of the model on the 50000 test images: 80.8% Max accuracy: 80.90% Test: [0/5] eta: 0:00:11 loss: 0.6543 (0.6543) acc1: 81.7708 (81.7708) acc5: 95.8333 (95.8333) time: 2.2883 data: 2.0448 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7901 (0.7683) acc1: 79.1667 (77.2379) acc5: 94.7917 (92.4552) time: 0.6084 data: 0.4090 max mem: 57114 Test: Total time: 0:00:03 (0.6249 s / it) * Acc@1 75.170 Acc@5 92.679 loss 0.934 Accuracy of the model EMA on 50000 test images: 75.2% Max EMA accuracy: 75.17% Epoch: [185] [ 0/156] eta: 0:08:52 lr: 0.002893 min_lr: 0.002893 loss: 3.1643 (3.1643) weight_decay: 0.0500 (0.0500) time: 3.4139 data: 2.7642 max mem: 57114 Epoch: [185] [ 10/156] eta: 0:02:22 lr: 0.002891 min_lr: 0.002891 loss: 3.3065 (3.2360) weight_decay: 0.0500 (0.0500) time: 0.9787 data: 0.2515 max mem: 57114 Epoch: [185] [ 20/156] eta: 0:01:54 lr: 0.002888 min_lr: 0.002888 loss: 3.1169 (3.0844) weight_decay: 0.0500 (0.0500) time: 0.7163 data: 0.0003 max mem: 57114 Epoch: [185] [ 30/156] eta: 0:01:40 lr: 0.002885 min_lr: 0.002885 loss: 2.9225 (3.0114) weight_decay: 0.0500 (0.0500) time: 0.7041 data: 0.0004 max mem: 57114 Epoch: [185] [ 40/156] eta: 0:01:30 lr: 0.002882 min_lr: 0.002882 loss: 2.8873 (2.9600) weight_decay: 0.0500 (0.0500) time: 0.7135 data: 0.0004 max mem: 57114 Epoch: [185] [ 50/156] eta: 0:01:21 lr: 0.002879 min_lr: 0.002879 loss: 2.7358 (2.9250) weight_decay: 0.0500 (0.0500) time: 0.7214 data: 0.0004 max mem: 57114 Epoch: [185] [ 60/156] eta: 0:01:13 lr: 0.002877 min_lr: 0.002877 loss: 3.0355 (2.9441) weight_decay: 0.0500 (0.0500) time: 0.7312 data: 0.0004 max mem: 57114 Epoch: [185] [ 70/156] eta: 0:01:05 lr: 0.002874 min_lr: 0.002874 loss: 2.8922 (2.9048) weight_decay: 0.0500 (0.0500) time: 0.7245 data: 0.0004 max mem: 57114 Epoch: [185] [ 80/156] eta: 0:00:57 lr: 0.002871 min_lr: 0.002871 loss: 2.8110 (2.9019) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0003 max mem: 57114 Epoch: [185] [ 90/156] eta: 0:00:49 lr: 0.002868 min_lr: 0.002868 loss: 2.8098 (2.8816) weight_decay: 0.0500 (0.0500) time: 0.7046 data: 0.0003 max mem: 57114 Epoch: [185] [100/156] eta: 0:00:41 lr: 0.002866 min_lr: 0.002866 loss: 2.9726 (2.8975) weight_decay: 0.0500 (0.0500) time: 0.6967 data: 0.0004 max mem: 57114 Epoch: [185] [110/156] eta: 0:00:34 lr: 0.002863 min_lr: 0.002863 loss: 3.0754 (2.9125) weight_decay: 0.0500 (0.0500) time: 0.7117 data: 0.0004 max mem: 57114 Epoch: [185] [120/156] eta: 0:00:26 lr: 0.002860 min_lr: 0.002860 loss: 3.0801 (2.9258) weight_decay: 0.0500 (0.0500) time: 0.7276 data: 0.0004 max mem: 57114 Epoch: [185] [130/156] eta: 0:00:19 lr: 0.002857 min_lr: 0.002857 loss: 3.1468 (2.9339) weight_decay: 0.0500 (0.0500) time: 0.7125 data: 0.0009 max mem: 57114 Epoch: [185] [140/156] eta: 0:00:11 lr: 0.002855 min_lr: 0.002855 loss: 3.1109 (2.9268) weight_decay: 0.0500 (0.0500) time: 0.6906 data: 0.0007 max mem: 57114 Epoch: [185] [150/156] eta: 0:00:04 lr: 0.002852 min_lr: 0.002852 loss: 3.0620 (2.9254) weight_decay: 0.0500 (0.0500) time: 0.6843 data: 0.0001 max mem: 57114 Epoch: [185] [155/156] eta: 0:00:00 lr: 0.002851 min_lr: 0.002851 loss: 2.8423 (2.9243) weight_decay: 0.0500 (0.0500) time: 0.6861 data: 0.0001 max mem: 57114 Epoch: [185] Total time: 0:01:53 (0.7295 s / it) Averaged stats: lr: 0.002851 min_lr: 0.002851 loss: 2.8423 (2.9536) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6468 (0.6468) acc1: 86.4583 (86.4583) acc5: 98.4375 (98.4375) time: 2.0142 data: 1.7585 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8048 (0.7636) acc1: 80.7292 (80.6905) acc5: 97.3958 (96.9309) time: 0.5536 data: 0.3518 max mem: 57114 Test: Total time: 0:00:02 (0.5766 s / it) * Acc@1 81.270 Acc@5 95.898 loss 0.843 Accuracy of the model on the 50000 test images: 81.3% Max accuracy: 81.27% Test: [0/5] eta: 0:00:10 loss: 0.6507 (0.6507) acc1: 82.2917 (82.2917) acc5: 95.8333 (95.8333) time: 2.0234 data: 1.7800 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7855 (0.7631) acc1: 79.1667 (77.3657) acc5: 94.7917 (92.5831) time: 0.5553 data: 0.3561 max mem: 57114 Test: Total time: 0:00:02 (0.5682 s / it) * Acc@1 75.294 Acc@5 92.767 loss 0.928 Accuracy of the model EMA on 50000 test images: 75.3% Max EMA accuracy: 75.29% Epoch: [186] [ 0/156] eta: 0:07:33 lr: 0.002850 min_lr: 0.002850 loss: 3.3841 (3.3841) weight_decay: 0.0500 (0.0500) time: 2.9091 data: 2.2553 max mem: 57114 Epoch: [186] [ 10/156] eta: 0:02:12 lr: 0.002847 min_lr: 0.002847 loss: 3.2019 (3.0407) weight_decay: 0.0500 (0.0500) time: 0.9084 data: 0.2053 max mem: 57114 Epoch: [186] [ 20/156] eta: 0:01:53 lr: 0.002845 min_lr: 0.002845 loss: 3.1101 (3.0325) weight_decay: 0.0500 (0.0500) time: 0.7330 data: 0.0003 max mem: 57114 Epoch: [186] [ 30/156] eta: 0:01:42 lr: 0.002842 min_lr: 0.002842 loss: 2.9927 (2.9983) weight_decay: 0.0500 (0.0500) time: 0.7605 data: 0.0004 max mem: 57114 Epoch: [186] [ 40/156] eta: 0:01:31 lr: 0.002839 min_lr: 0.002839 loss: 2.9927 (2.9794) weight_decay: 0.0500 (0.0500) time: 0.7440 data: 0.0004 max mem: 57114 Epoch: [186] [ 50/156] eta: 0:01:22 lr: 0.002836 min_lr: 0.002836 loss: 3.1183 (2.9975) weight_decay: 0.0500 (0.0500) time: 0.7278 data: 0.0004 max mem: 57114 Epoch: [186] [ 60/156] eta: 0:01:14 lr: 0.002834 min_lr: 0.002834 loss: 2.9654 (2.9601) weight_decay: 0.0500 (0.0500) time: 0.7294 data: 0.0004 max mem: 57114 Epoch: [186] [ 70/156] eta: 0:01:05 lr: 0.002831 min_lr: 0.002831 loss: 2.9654 (2.9889) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [186] [ 80/156] eta: 0:00:57 lr: 0.002828 min_lr: 0.002828 loss: 3.0724 (2.9752) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.0004 max mem: 57114 Epoch: [186] [ 90/156] eta: 0:00:49 lr: 0.002825 min_lr: 0.002825 loss: 2.9349 (2.9595) weight_decay: 0.0500 (0.0500) time: 0.7099 data: 0.0004 max mem: 57114 Epoch: [186] [100/156] eta: 0:00:41 lr: 0.002823 min_lr: 0.002823 loss: 3.0237 (2.9556) weight_decay: 0.0500 (0.0500) time: 0.7133 data: 0.0004 max mem: 57114 Epoch: [186] [110/156] eta: 0:00:34 lr: 0.002820 min_lr: 0.002820 loss: 3.0237 (2.9519) weight_decay: 0.0500 (0.0500) time: 0.7076 data: 0.0004 max mem: 57114 Epoch: [186] [120/156] eta: 0:00:26 lr: 0.002817 min_lr: 0.002817 loss: 2.9905 (2.9452) weight_decay: 0.0500 (0.0500) time: 0.7008 data: 0.0004 max mem: 57114 Epoch: [186] [130/156] eta: 0:00:19 lr: 0.002814 min_lr: 0.002814 loss: 2.9905 (2.9576) weight_decay: 0.0500 (0.0500) time: 0.6977 data: 0.0009 max mem: 57114 Epoch: [186] [140/156] eta: 0:00:11 lr: 0.002812 min_lr: 0.002812 loss: 3.1214 (2.9591) weight_decay: 0.0500 (0.0500) time: 0.6906 data: 0.0007 max mem: 57114 Epoch: [186] [150/156] eta: 0:00:04 lr: 0.002809 min_lr: 0.002809 loss: 3.0430 (2.9593) weight_decay: 0.0500 (0.0500) time: 0.6849 data: 0.0001 max mem: 57114 Epoch: [186] [155/156] eta: 0:00:00 lr: 0.002808 min_lr: 0.002808 loss: 3.0430 (2.9633) weight_decay: 0.0500 (0.0500) time: 0.6843 data: 0.0001 max mem: 57114 Epoch: [186] Total time: 0:01:53 (0.7294 s / it) Averaged stats: lr: 0.002808 min_lr: 0.002808 loss: 3.0430 (2.9614) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8306 (0.8306) acc1: 88.0208 (88.0208) acc5: 98.4375 (98.4375) time: 2.0146 data: 1.7590 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9203 (0.9401) acc1: 84.8958 (81.8414) acc5: 96.8750 (95.9079) time: 0.5537 data: 0.3519 max mem: 57114 Test: Total time: 0:00:02 (0.5755 s / it) * Acc@1 80.369 Acc@5 95.484 loss 1.046 Accuracy of the model on the 50000 test images: 80.4% Max accuracy: 81.27% Test: [0/5] eta: 0:00:11 loss: 0.6473 (0.6473) acc1: 82.2917 (82.2917) acc5: 95.8333 (95.8333) time: 2.2502 data: 2.0067 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7807 (0.7581) acc1: 79.6875 (77.4936) acc5: 94.7917 (92.5831) time: 0.6007 data: 0.4014 max mem: 57114 Test: Total time: 0:00:03 (0.6150 s / it) * Acc@1 75.456 Acc@5 92.809 loss 0.922 Accuracy of the model EMA on 50000 test images: 75.5% Max EMA accuracy: 75.46% Epoch: [187] [ 0/156] eta: 0:07:37 lr: 0.002807 min_lr: 0.002807 loss: 1.8706 (1.8706) weight_decay: 0.0500 (0.0500) time: 2.9348 data: 2.2825 max mem: 57114 Epoch: [187] [ 10/156] eta: 0:02:12 lr: 0.002805 min_lr: 0.002805 loss: 2.7937 (2.7398) weight_decay: 0.0500 (0.0500) time: 0.9083 data: 0.2078 max mem: 57114 Epoch: [187] [ 20/156] eta: 0:01:52 lr: 0.002802 min_lr: 0.002802 loss: 2.7937 (2.7745) weight_decay: 0.0500 (0.0500) time: 0.7200 data: 0.0004 max mem: 57114 Epoch: [187] [ 30/156] eta: 0:01:39 lr: 0.002799 min_lr: 0.002799 loss: 3.0181 (2.8198) weight_decay: 0.0500 (0.0500) time: 0.7270 data: 0.0003 max mem: 57114 Epoch: [187] [ 40/156] eta: 0:01:30 lr: 0.002796 min_lr: 0.002796 loss: 3.0543 (2.8490) weight_decay: 0.0500 (0.0500) time: 0.7249 data: 0.0004 max mem: 57114 Epoch: [187] [ 50/156] eta: 0:01:21 lr: 0.002794 min_lr: 0.002794 loss: 3.0464 (2.8771) weight_decay: 0.0500 (0.0500) time: 0.7234 data: 0.0004 max mem: 57114 Epoch: [187] [ 60/156] eta: 0:01:12 lr: 0.002791 min_lr: 0.002791 loss: 3.0379 (2.8769) weight_decay: 0.0500 (0.0500) time: 0.7195 data: 0.0003 max mem: 57114 Epoch: [187] [ 70/156] eta: 0:01:04 lr: 0.002788 min_lr: 0.002788 loss: 3.0655 (2.8900) weight_decay: 0.0500 (0.0500) time: 0.7208 data: 0.0004 max mem: 57114 Epoch: [187] [ 80/156] eta: 0:00:56 lr: 0.002785 min_lr: 0.002785 loss: 3.1737 (2.9076) weight_decay: 0.0500 (0.0500) time: 0.7069 data: 0.0004 max mem: 57114 Epoch: [187] [ 90/156] eta: 0:00:48 lr: 0.002783 min_lr: 0.002783 loss: 3.1293 (2.9045) weight_decay: 0.0500 (0.0500) time: 0.7027 data: 0.0004 max mem: 57114 Epoch: [187] [100/156] eta: 0:00:41 lr: 0.002780 min_lr: 0.002780 loss: 3.0176 (2.9104) weight_decay: 0.0500 (0.0500) time: 0.7071 data: 0.0004 max mem: 57114 Epoch: [187] [110/156] eta: 0:00:33 lr: 0.002777 min_lr: 0.002777 loss: 3.0556 (2.9395) weight_decay: 0.0500 (0.0500) time: 0.7011 data: 0.0004 max mem: 57114 Epoch: [187] [120/156] eta: 0:00:26 lr: 0.002774 min_lr: 0.002774 loss: 3.2161 (2.9618) weight_decay: 0.0500 (0.0500) time: 0.6981 data: 0.0004 max mem: 57114 Epoch: [187] [130/156] eta: 0:00:18 lr: 0.002772 min_lr: 0.002772 loss: 3.1946 (2.9597) weight_decay: 0.0500 (0.0500) time: 0.7021 data: 0.0005 max mem: 57114 Epoch: [187] [140/156] eta: 0:00:11 lr: 0.002769 min_lr: 0.002769 loss: 2.9646 (2.9620) weight_decay: 0.0500 (0.0500) time: 0.6964 data: 0.0003 max mem: 57114 Epoch: [187] [150/156] eta: 0:00:04 lr: 0.002766 min_lr: 0.002766 loss: 2.8989 (2.9584) weight_decay: 0.0500 (0.0500) time: 0.6830 data: 0.0001 max mem: 57114 Epoch: [187] [155/156] eta: 0:00:00 lr: 0.002765 min_lr: 0.002765 loss: 2.9089 (2.9631) weight_decay: 0.0500 (0.0500) time: 0.6834 data: 0.0001 max mem: 57114 Epoch: [187] Total time: 0:01:52 (0.7243 s / it) Averaged stats: lr: 0.002765 min_lr: 0.002765 loss: 2.9089 (2.9455) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8471 (0.8471) acc1: 87.5000 (87.5000) acc5: 98.9583 (98.9583) time: 2.0512 data: 1.7957 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9747 (0.9420) acc1: 82.8125 (81.7136) acc5: 97.3958 (96.1637) time: 0.5609 data: 0.3592 max mem: 57114 Test: Total time: 0:00:02 (0.5834 s / it) * Acc@1 81.328 Acc@5 95.872 loss 1.037 Accuracy of the model on the 50000 test images: 81.3% Max accuracy: 81.33% Test: [0/5] eta: 0:00:10 loss: 0.6441 (0.6441) acc1: 82.8125 (82.8125) acc5: 96.3542 (96.3542) time: 2.0349 data: 1.7915 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7761 (0.7533) acc1: 79.6875 (77.6215) acc5: 94.7917 (92.7110) time: 0.5576 data: 0.3584 max mem: 57114 Test: Total time: 0:00:02 (0.5693 s / it) * Acc@1 75.553 Acc@5 92.879 loss 0.916 Accuracy of the model EMA on 50000 test images: 75.6% Max EMA accuracy: 75.55% Epoch: [188] [ 0/156] eta: 0:07:27 lr: 0.002765 min_lr: 0.002765 loss: 3.4149 (3.4149) weight_decay: 0.0500 (0.0500) time: 2.8686 data: 2.2085 max mem: 57114 Epoch: [188] [ 10/156] eta: 0:02:16 lr: 0.002762 min_lr: 0.002762 loss: 3.1533 (3.0511) weight_decay: 0.0500 (0.0500) time: 0.9367 data: 0.2011 max mem: 57114 Epoch: [188] [ 20/156] eta: 0:01:52 lr: 0.002759 min_lr: 0.002759 loss: 3.1285 (3.0155) weight_decay: 0.0500 (0.0500) time: 0.7246 data: 0.0003 max mem: 57114 Epoch: [188] [ 30/156] eta: 0:01:41 lr: 0.002756 min_lr: 0.002756 loss: 3.1248 (2.9652) weight_decay: 0.0500 (0.0500) time: 0.7289 data: 0.0003 max mem: 57114 Epoch: [188] [ 40/156] eta: 0:01:30 lr: 0.002754 min_lr: 0.002754 loss: 2.9775 (2.9518) weight_decay: 0.0500 (0.0500) time: 0.7382 data: 0.0003 max mem: 57114 Epoch: [188] [ 50/156] eta: 0:01:22 lr: 0.002751 min_lr: 0.002751 loss: 2.9259 (2.9609) weight_decay: 0.0500 (0.0500) time: 0.7302 data: 0.0004 max mem: 57114 Epoch: [188] [ 60/156] eta: 0:01:13 lr: 0.002748 min_lr: 0.002748 loss: 3.0002 (2.9289) weight_decay: 0.0500 (0.0500) time: 0.7354 data: 0.0004 max mem: 57114 Epoch: [188] [ 70/156] eta: 0:01:05 lr: 0.002745 min_lr: 0.002745 loss: 3.0678 (2.9543) weight_decay: 0.0500 (0.0500) time: 0.7363 data: 0.0004 max mem: 57114 Epoch: [188] [ 80/156] eta: 0:00:57 lr: 0.002743 min_lr: 0.002743 loss: 3.0868 (2.9360) weight_decay: 0.0500 (0.0500) time: 0.7336 data: 0.0004 max mem: 57114 Epoch: [188] [ 90/156] eta: 0:00:49 lr: 0.002740 min_lr: 0.002740 loss: 2.9887 (2.9403) weight_decay: 0.0500 (0.0500) time: 0.7260 data: 0.0004 max mem: 57114 Epoch: [188] [100/156] eta: 0:00:41 lr: 0.002737 min_lr: 0.002737 loss: 3.1671 (2.9481) weight_decay: 0.0500 (0.0500) time: 0.7100 data: 0.0003 max mem: 57114 Epoch: [188] [110/156] eta: 0:00:34 lr: 0.002735 min_lr: 0.002735 loss: 3.1340 (2.9461) weight_decay: 0.0500 (0.0500) time: 0.7007 data: 0.0003 max mem: 57114 Epoch: [188] [120/156] eta: 0:00:26 lr: 0.002732 min_lr: 0.002732 loss: 2.9396 (2.9198) weight_decay: 0.0500 (0.0500) time: 0.7062 data: 0.0004 max mem: 57114 Epoch: [188] [130/156] eta: 0:00:19 lr: 0.002729 min_lr: 0.002729 loss: 2.9485 (2.9233) weight_decay: 0.0500 (0.0500) time: 0.7014 data: 0.0008 max mem: 57114 Epoch: [188] [140/156] eta: 0:00:11 lr: 0.002726 min_lr: 0.002726 loss: 3.1210 (2.9260) weight_decay: 0.0500 (0.0500) time: 0.6870 data: 0.0007 max mem: 57114 Epoch: [188] [150/156] eta: 0:00:04 lr: 0.002724 min_lr: 0.002724 loss: 3.1210 (2.9356) weight_decay: 0.0500 (0.0500) time: 0.6829 data: 0.0001 max mem: 57114 Epoch: [188] [155/156] eta: 0:00:00 lr: 0.002722 min_lr: 0.002722 loss: 3.1252 (2.9425) weight_decay: 0.0500 (0.0500) time: 0.6842 data: 0.0001 max mem: 57114 Epoch: [188] Total time: 0:01:54 (0.7317 s / it) Averaged stats: lr: 0.002722 min_lr: 0.002722 loss: 3.1252 (2.9316) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8089 (0.8089) acc1: 88.5417 (88.5417) acc5: 98.9583 (98.9583) time: 2.0810 data: 1.8251 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9888 (0.9579) acc1: 83.3333 (82.8645) acc5: 97.3958 (96.5473) time: 0.5669 data: 0.3651 max mem: 57114 Test: Total time: 0:00:02 (0.5909 s / it) * Acc@1 80.890 Acc@5 95.548 loss 1.050 Accuracy of the model on the 50000 test images: 80.9% Max accuracy: 81.33% Test: [0/5] eta: 0:00:11 loss: 0.6408 (0.6408) acc1: 82.8125 (82.8125) acc5: 96.3542 (96.3542) time: 2.3211 data: 2.0776 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7718 (0.7486) acc1: 79.6875 (77.6215) acc5: 94.7917 (92.8389) time: 0.6149 data: 0.4156 max mem: 57114 Test: Total time: 0:00:03 (0.6275 s / it) * Acc@1 75.647 Acc@5 92.945 loss 0.910 Accuracy of the model EMA on 50000 test images: 75.6% Max EMA accuracy: 75.65% Epoch: [189] [ 0/156] eta: 0:06:04 lr: 0.002722 min_lr: 0.002722 loss: 1.6168 (1.6168) weight_decay: 0.0500 (0.0500) time: 2.3356 data: 1.6895 max mem: 57114 Epoch: [189] [ 10/156] eta: 0:02:04 lr: 0.002719 min_lr: 0.002719 loss: 3.3567 (2.9256) weight_decay: 0.0500 (0.0500) time: 0.8560 data: 0.1539 max mem: 57114 Epoch: [189] [ 20/156] eta: 0:01:47 lr: 0.002717 min_lr: 0.002717 loss: 3.1696 (2.9788) weight_decay: 0.0500 (0.0500) time: 0.7094 data: 0.0003 max mem: 57114 Epoch: [189] [ 30/156] eta: 0:01:36 lr: 0.002714 min_lr: 0.002714 loss: 3.0126 (3.0033) weight_decay: 0.0500 (0.0500) time: 0.7182 data: 0.0003 max mem: 57114 Epoch: [189] [ 40/156] eta: 0:01:27 lr: 0.002711 min_lr: 0.002711 loss: 2.8315 (2.9150) weight_decay: 0.0500 (0.0500) time: 0.7272 data: 0.0004 max mem: 57114 Epoch: [189] [ 50/156] eta: 0:01:19 lr: 0.002708 min_lr: 0.002708 loss: 2.6264 (2.9079) weight_decay: 0.0500 (0.0500) time: 0.7293 data: 0.0004 max mem: 57114 Epoch: [189] [ 60/156] eta: 0:01:11 lr: 0.002706 min_lr: 0.002706 loss: 2.9806 (2.9261) weight_decay: 0.0500 (0.0500) time: 0.7094 data: 0.0004 max mem: 57114 Epoch: [189] [ 70/156] eta: 0:01:03 lr: 0.002703 min_lr: 0.002703 loss: 3.0577 (2.9156) weight_decay: 0.0500 (0.0500) time: 0.7107 data: 0.0004 max mem: 57114 Epoch: [189] [ 80/156] eta: 0:00:56 lr: 0.002700 min_lr: 0.002700 loss: 2.8834 (2.9305) weight_decay: 0.0500 (0.0500) time: 0.7219 data: 0.0004 max mem: 57114 Epoch: [189] [ 90/156] eta: 0:00:48 lr: 0.002697 min_lr: 0.002697 loss: 3.0158 (2.9350) weight_decay: 0.0500 (0.0500) time: 0.7061 data: 0.0003 max mem: 57114 Epoch: [189] [100/156] eta: 0:00:40 lr: 0.002695 min_lr: 0.002695 loss: 2.9662 (2.9223) weight_decay: 0.0500 (0.0500) time: 0.7031 data: 0.0004 max mem: 57114 Epoch: [189] [110/156] eta: 0:00:33 lr: 0.002692 min_lr: 0.002692 loss: 2.8646 (2.9141) weight_decay: 0.0500 (0.0500) time: 0.7062 data: 0.0004 max mem: 57114 Epoch: [189] [120/156] eta: 0:00:26 lr: 0.002689 min_lr: 0.002689 loss: 2.8164 (2.9062) weight_decay: 0.0500 (0.0500) time: 0.7120 data: 0.0004 max mem: 57114 Epoch: [189] [130/156] eta: 0:00:18 lr: 0.002687 min_lr: 0.002687 loss: 2.8608 (2.8951) weight_decay: 0.0500 (0.0500) time: 0.6993 data: 0.0009 max mem: 57114 Epoch: [189] [140/156] eta: 0:00:11 lr: 0.002684 min_lr: 0.002684 loss: 2.7419 (2.8843) weight_decay: 0.0500 (0.0500) time: 0.6859 data: 0.0007 max mem: 57114 Epoch: [189] [150/156] eta: 0:00:04 lr: 0.002681 min_lr: 0.002681 loss: 2.8198 (2.8930) weight_decay: 0.0500 (0.0500) time: 0.6859 data: 0.0001 max mem: 57114 Epoch: [189] [155/156] eta: 0:00:00 lr: 0.002680 min_lr: 0.002680 loss: 3.1009 (2.9056) weight_decay: 0.0500 (0.0500) time: 0.6804 data: 0.0001 max mem: 57114 Epoch: [189] Total time: 0:01:52 (0.7195 s / it) Averaged stats: lr: 0.002680 min_lr: 0.002680 loss: 3.1009 (2.9340) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8119 (0.8119) acc1: 88.5417 (88.5417) acc5: 98.9583 (98.9583) time: 2.1671 data: 1.9109 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0093 (0.9483) acc1: 80.7292 (81.0742) acc5: 96.8750 (96.0358) time: 0.5842 data: 0.3823 max mem: 57114 Test: Total time: 0:00:03 (0.6082 s / it) * Acc@1 80.896 Acc@5 95.824 loss 1.027 Accuracy of the model on the 50000 test images: 80.9% Max accuracy: 81.33% Test: [0/5] eta: 0:00:11 loss: 0.6376 (0.6376) acc1: 83.3333 (83.3333) acc5: 96.3542 (96.3542) time: 2.3419 data: 2.0984 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7678 (0.7442) acc1: 80.2083 (77.8772) acc5: 94.7917 (92.9668) time: 0.6190 data: 0.4198 max mem: 57114 Test: Total time: 0:00:03 (0.6288 s / it) * Acc@1 75.751 Acc@5 93.001 loss 0.905 Accuracy of the model EMA on 50000 test images: 75.8% Max EMA accuracy: 75.75% Epoch: [190] [ 0/156] eta: 0:07:48 lr: 0.002680 min_lr: 0.002680 loss: 2.9301 (2.9301) weight_decay: 0.0500 (0.0500) time: 3.0052 data: 2.3503 max mem: 57114 Epoch: [190] [ 10/156] eta: 0:02:15 lr: 0.002677 min_lr: 0.002677 loss: 3.0585 (2.9608) weight_decay: 0.0500 (0.0500) time: 0.9251 data: 0.2139 max mem: 57114 Epoch: [190] [ 20/156] eta: 0:01:53 lr: 0.002674 min_lr: 0.002674 loss: 2.9337 (2.8705) weight_decay: 0.0500 (0.0500) time: 0.7259 data: 0.0003 max mem: 57114 Epoch: [190] [ 30/156] eta: 0:01:40 lr: 0.002671 min_lr: 0.002671 loss: 2.9337 (2.8998) weight_decay: 0.0500 (0.0500) time: 0.7304 data: 0.0003 max mem: 57114 Epoch: [190] [ 40/156] eta: 0:01:30 lr: 0.002669 min_lr: 0.002669 loss: 3.0810 (2.9468) weight_decay: 0.0500 (0.0500) time: 0.7206 data: 0.0005 max mem: 57114 Epoch: [190] [ 50/156] eta: 0:01:21 lr: 0.002666 min_lr: 0.002666 loss: 3.0810 (2.9582) weight_decay: 0.0500 (0.0500) time: 0.7177 data: 0.0005 max mem: 57114 Epoch: [190] [ 60/156] eta: 0:01:12 lr: 0.002663 min_lr: 0.002663 loss: 3.1615 (2.9807) weight_decay: 0.0500 (0.0500) time: 0.7089 data: 0.0004 max mem: 57114 Epoch: [190] [ 70/156] eta: 0:01:04 lr: 0.002661 min_lr: 0.002661 loss: 2.9762 (2.9652) weight_decay: 0.0500 (0.0500) time: 0.7102 data: 0.0004 max mem: 57114 Epoch: [190] [ 80/156] eta: 0:00:56 lr: 0.002658 min_lr: 0.002658 loss: 2.9762 (2.9767) weight_decay: 0.0500 (0.0500) time: 0.7098 data: 0.0004 max mem: 57114 Epoch: [190] [ 90/156] eta: 0:00:48 lr: 0.002655 min_lr: 0.002655 loss: 2.9255 (2.9711) weight_decay: 0.0500 (0.0500) time: 0.7042 data: 0.0004 max mem: 57114 Epoch: [190] [100/156] eta: 0:00:41 lr: 0.002652 min_lr: 0.002652 loss: 2.8812 (2.9519) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0004 max mem: 57114 Epoch: [190] [110/156] eta: 0:00:33 lr: 0.002650 min_lr: 0.002650 loss: 2.8812 (2.9487) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0003 max mem: 57114 Epoch: [190] [120/156] eta: 0:00:26 lr: 0.002647 min_lr: 0.002647 loss: 2.8791 (2.9469) weight_decay: 0.0500 (0.0500) time: 0.6988 data: 0.0004 max mem: 57114 Epoch: [190] [130/156] eta: 0:00:18 lr: 0.002644 min_lr: 0.002644 loss: 2.9948 (2.9470) weight_decay: 0.0500 (0.0500) time: 0.6878 data: 0.0009 max mem: 57114 Epoch: [190] [140/156] eta: 0:00:11 lr: 0.002642 min_lr: 0.002642 loss: 2.9572 (2.9230) weight_decay: 0.0500 (0.0500) time: 0.6853 data: 0.0007 max mem: 57114 Epoch: [190] [150/156] eta: 0:00:04 lr: 0.002639 min_lr: 0.002639 loss: 2.8177 (2.9315) weight_decay: 0.0500 (0.0500) time: 0.6813 data: 0.0001 max mem: 57114 Epoch: [190] [155/156] eta: 0:00:00 lr: 0.002638 min_lr: 0.002638 loss: 3.0295 (2.9385) weight_decay: 0.0500 (0.0500) time: 0.6824 data: 0.0001 max mem: 57114 Epoch: [190] Total time: 0:01:52 (0.7232 s / it) Averaged stats: lr: 0.002638 min_lr: 0.002638 loss: 3.0295 (2.9323) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5760 (0.5760) acc1: 89.0625 (89.0625) acc5: 98.9583 (98.9583) time: 2.0705 data: 1.8149 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8151 (0.7596) acc1: 80.7292 (81.8414) acc5: 96.8750 (96.1637) time: 0.5648 data: 0.3630 max mem: 57114 Test: Total time: 0:00:02 (0.5871 s / it) * Acc@1 81.212 Acc@5 95.638 loss 0.860 Accuracy of the model on the 50000 test images: 81.2% Max accuracy: 81.33% Test: [0/5] eta: 0:00:11 loss: 0.6346 (0.6346) acc1: 83.3333 (83.3333) acc5: 96.3542 (96.3542) time: 2.3527 data: 2.1092 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7636 (0.7399) acc1: 80.2083 (77.8772) acc5: 94.7917 (93.0946) time: 0.6212 data: 0.4219 max mem: 57114 Test: Total time: 0:00:03 (0.6372 s / it) * Acc@1 75.841 Acc@5 93.065 loss 0.899 Accuracy of the model EMA on 50000 test images: 75.8% Max EMA accuracy: 75.84% Epoch: [191] [ 0/156] eta: 0:07:51 lr: 0.002637 min_lr: 0.002637 loss: 3.2129 (3.2129) weight_decay: 0.0500 (0.0500) time: 3.0211 data: 2.3730 max mem: 57114 Epoch: [191] [ 10/156] eta: 0:02:18 lr: 0.002635 min_lr: 0.002635 loss: 3.1058 (3.0430) weight_decay: 0.0500 (0.0500) time: 0.9486 data: 0.2161 max mem: 57114 Epoch: [191] [ 20/156] eta: 0:01:55 lr: 0.002632 min_lr: 0.002632 loss: 3.1058 (2.9967) weight_decay: 0.0500 (0.0500) time: 0.7387 data: 0.0004 max mem: 57114 Epoch: [191] [ 30/156] eta: 0:01:41 lr: 0.002629 min_lr: 0.002629 loss: 3.1270 (3.0117) weight_decay: 0.0500 (0.0500) time: 0.7224 data: 0.0004 max mem: 57114 Epoch: [191] [ 40/156] eta: 0:01:30 lr: 0.002626 min_lr: 0.002626 loss: 3.1244 (3.0103) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.0003 max mem: 57114 Epoch: [191] [ 50/156] eta: 0:01:21 lr: 0.002624 min_lr: 0.002624 loss: 3.0968 (2.9928) weight_decay: 0.0500 (0.0500) time: 0.7154 data: 0.0004 max mem: 57114 Epoch: [191] [ 60/156] eta: 0:01:13 lr: 0.002621 min_lr: 0.002621 loss: 3.0598 (2.9826) weight_decay: 0.0500 (0.0500) time: 0.7301 data: 0.0004 max mem: 57114 Epoch: [191] [ 70/156] eta: 0:01:04 lr: 0.002618 min_lr: 0.002618 loss: 3.0581 (2.9779) weight_decay: 0.0500 (0.0500) time: 0.7213 data: 0.0004 max mem: 57114 Epoch: [191] [ 80/156] eta: 0:00:57 lr: 0.002616 min_lr: 0.002616 loss: 3.0581 (2.9677) weight_decay: 0.0500 (0.0500) time: 0.7154 data: 0.0004 max mem: 57114 Epoch: [191] [ 90/156] eta: 0:00:49 lr: 0.002613 min_lr: 0.002613 loss: 2.7591 (2.9416) weight_decay: 0.0500 (0.0500) time: 0.7173 data: 0.0004 max mem: 57114 Epoch: [191] [100/156] eta: 0:00:41 lr: 0.002610 min_lr: 0.002610 loss: 2.7591 (2.9313) weight_decay: 0.0500 (0.0500) time: 0.7079 data: 0.0003 max mem: 57114 Epoch: [191] [110/156] eta: 0:00:34 lr: 0.002608 min_lr: 0.002608 loss: 3.0424 (2.9393) weight_decay: 0.0500 (0.0500) time: 0.7049 data: 0.0004 max mem: 57114 Epoch: [191] [120/156] eta: 0:00:26 lr: 0.002605 min_lr: 0.002605 loss: 3.0523 (2.9363) weight_decay: 0.0500 (0.0500) time: 0.7052 data: 0.0004 max mem: 57114 Epoch: [191] [130/156] eta: 0:00:19 lr: 0.002602 min_lr: 0.002602 loss: 3.0098 (2.9270) weight_decay: 0.0500 (0.0500) time: 0.7030 data: 0.0009 max mem: 57114 Epoch: [191] [140/156] eta: 0:00:11 lr: 0.002599 min_lr: 0.002599 loss: 2.8368 (2.9265) weight_decay: 0.0500 (0.0500) time: 0.6920 data: 0.0007 max mem: 57114 Epoch: [191] [150/156] eta: 0:00:04 lr: 0.002597 min_lr: 0.002597 loss: 3.1057 (2.9330) weight_decay: 0.0500 (0.0500) time: 0.6876 data: 0.0001 max mem: 57114 Epoch: [191] [155/156] eta: 0:00:00 lr: 0.002595 min_lr: 0.002595 loss: 3.1177 (2.9450) weight_decay: 0.0500 (0.0500) time: 0.6851 data: 0.0001 max mem: 57114 Epoch: [191] Total time: 0:01:53 (0.7279 s / it) Averaged stats: lr: 0.002595 min_lr: 0.002595 loss: 3.1177 (2.9275) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8171 (0.8171) acc1: 87.5000 (87.5000) acc5: 98.4375 (98.4375) time: 2.0279 data: 1.7715 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9928 (0.9386) acc1: 82.2917 (82.2251) acc5: 98.4375 (96.4194) time: 0.5563 data: 0.3543 max mem: 57114 Test: Total time: 0:00:02 (0.5786 s / it) * Acc@1 81.234 Acc@5 95.746 loss 1.022 Accuracy of the model on the 50000 test images: 81.2% Max accuracy: 81.33% Test: [0/5] eta: 0:00:11 loss: 0.6319 (0.6319) acc1: 83.3333 (83.3333) acc5: 96.3542 (96.3542) time: 2.2611 data: 2.0175 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7601 (0.7360) acc1: 80.7292 (78.1330) acc5: 94.7917 (93.0946) time: 0.6029 data: 0.4036 max mem: 57114 Test: Total time: 0:00:03 (0.6145 s / it) * Acc@1 75.949 Acc@5 93.135 loss 0.894 Accuracy of the model EMA on 50000 test images: 75.9% Max EMA accuracy: 75.95% Epoch: [192] [ 0/156] eta: 0:07:22 lr: 0.002595 min_lr: 0.002595 loss: 3.5172 (3.5172) weight_decay: 0.0500 (0.0500) time: 2.8347 data: 2.1864 max mem: 57114 Epoch: [192] [ 10/156] eta: 0:02:13 lr: 0.002592 min_lr: 0.002592 loss: 3.1484 (3.0348) weight_decay: 0.0500 (0.0500) time: 0.9175 data: 0.1990 max mem: 57114 Epoch: [192] [ 20/156] eta: 0:01:52 lr: 0.002590 min_lr: 0.002590 loss: 3.1484 (3.0754) weight_decay: 0.0500 (0.0500) time: 0.7282 data: 0.0003 max mem: 57114 Epoch: [192] [ 30/156] eta: 0:01:40 lr: 0.002587 min_lr: 0.002587 loss: 3.1216 (2.9568) weight_decay: 0.0500 (0.0500) time: 0.7315 data: 0.0004 max mem: 57114 Epoch: [192] [ 40/156] eta: 0:01:30 lr: 0.002584 min_lr: 0.002584 loss: 3.1216 (2.9642) weight_decay: 0.0500 (0.0500) time: 0.7320 data: 0.0003 max mem: 57114 Epoch: [192] [ 50/156] eta: 0:01:21 lr: 0.002582 min_lr: 0.002582 loss: 3.1801 (2.9703) weight_decay: 0.0500 (0.0500) time: 0.7343 data: 0.0004 max mem: 57114 Epoch: [192] [ 60/156] eta: 0:01:13 lr: 0.002579 min_lr: 0.002579 loss: 3.0599 (2.9729) weight_decay: 0.0500 (0.0500) time: 0.7258 data: 0.0004 max mem: 57114 Epoch: [192] [ 70/156] eta: 0:01:05 lr: 0.002576 min_lr: 0.002576 loss: 3.0599 (2.9610) weight_decay: 0.0500 (0.0500) time: 0.7147 data: 0.0004 max mem: 57114 Epoch: [192] [ 80/156] eta: 0:00:57 lr: 0.002574 min_lr: 0.002574 loss: 3.0084 (2.9523) weight_decay: 0.0500 (0.0500) time: 0.7100 data: 0.0004 max mem: 57114 Epoch: [192] [ 90/156] eta: 0:00:49 lr: 0.002571 min_lr: 0.002571 loss: 2.8182 (2.9372) weight_decay: 0.0500 (0.0500) time: 0.7052 data: 0.0004 max mem: 57114 Epoch: [192] [100/156] eta: 0:00:41 lr: 0.002568 min_lr: 0.002568 loss: 2.9116 (2.9322) weight_decay: 0.0500 (0.0500) time: 0.7079 data: 0.0003 max mem: 57114 Epoch: [192] [110/156] eta: 0:00:33 lr: 0.002566 min_lr: 0.002566 loss: 3.0430 (2.9499) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [192] [120/156] eta: 0:00:26 lr: 0.002563 min_lr: 0.002563 loss: 2.9443 (2.9358) weight_decay: 0.0500 (0.0500) time: 0.7127 data: 0.0004 max mem: 57114 Epoch: [192] [130/156] eta: 0:00:19 lr: 0.002560 min_lr: 0.002560 loss: 2.8208 (2.9299) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0009 max mem: 57114 Epoch: [192] [140/156] eta: 0:00:11 lr: 0.002558 min_lr: 0.002558 loss: 2.9404 (2.9268) weight_decay: 0.0500 (0.0500) time: 0.6861 data: 0.0007 max mem: 57114 Epoch: [192] [150/156] eta: 0:00:04 lr: 0.002555 min_lr: 0.002555 loss: 3.1219 (2.9449) weight_decay: 0.0500 (0.0500) time: 0.6823 data: 0.0001 max mem: 57114 Epoch: [192] [155/156] eta: 0:00:00 lr: 0.002554 min_lr: 0.002554 loss: 3.0483 (2.9398) weight_decay: 0.0500 (0.0500) time: 0.6843 data: 0.0001 max mem: 57114 Epoch: [192] Total time: 0:01:53 (0.7277 s / it) Averaged stats: lr: 0.002554 min_lr: 0.002554 loss: 3.0483 (2.9368) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6746 (0.6746) acc1: 86.4583 (86.4583) acc5: 98.4375 (98.4375) time: 2.1485 data: 1.8923 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9711 (0.8065) acc1: 80.2083 (81.2020) acc5: 96.8750 (96.5473) time: 0.5804 data: 0.3785 max mem: 57114 Test: Total time: 0:00:03 (0.6042 s / it) * Acc@1 81.522 Acc@5 96.006 loss 0.888 Accuracy of the model on the 50000 test images: 81.5% Max accuracy: 81.52% Test: [0/5] eta: 0:00:09 loss: 0.6292 (0.6292) acc1: 83.3333 (83.3333) acc5: 96.3542 (96.3542) time: 1.9958 data: 1.7524 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7566 (0.7321) acc1: 80.7292 (78.0051) acc5: 94.2708 (93.0946) time: 0.5498 data: 0.3506 max mem: 57114 Test: Total time: 0:00:02 (0.5615 s / it) * Acc@1 76.025 Acc@5 93.193 loss 0.889 Accuracy of the model EMA on 50000 test images: 76.0% Max EMA accuracy: 76.03% Epoch: [193] [ 0/156] eta: 0:07:05 lr: 0.002553 min_lr: 0.002553 loss: 2.9424 (2.9424) weight_decay: 0.0500 (0.0500) time: 2.7300 data: 2.0727 max mem: 57114 Epoch: [193] [ 10/156] eta: 0:02:12 lr: 0.002551 min_lr: 0.002551 loss: 3.0541 (3.0533) weight_decay: 0.0500 (0.0500) time: 0.9103 data: 0.1887 max mem: 57114 Epoch: [193] [ 20/156] eta: 0:01:54 lr: 0.002548 min_lr: 0.002548 loss: 3.1877 (3.0662) weight_decay: 0.0500 (0.0500) time: 0.7446 data: 0.0003 max mem: 57114 Epoch: [193] [ 30/156] eta: 0:01:41 lr: 0.002545 min_lr: 0.002545 loss: 3.1827 (3.0333) weight_decay: 0.0500 (0.0500) time: 0.7500 data: 0.0004 max mem: 57114 Epoch: [193] [ 40/156] eta: 0:01:31 lr: 0.002543 min_lr: 0.002543 loss: 3.0496 (2.9775) weight_decay: 0.0500 (0.0500) time: 0.7420 data: 0.0003 max mem: 57114 Epoch: [193] [ 50/156] eta: 0:01:22 lr: 0.002540 min_lr: 0.002540 loss: 3.0496 (2.9814) weight_decay: 0.0500 (0.0500) time: 0.7317 data: 0.0004 max mem: 57114 Epoch: [193] [ 60/156] eta: 0:01:13 lr: 0.002537 min_lr: 0.002537 loss: 3.0594 (2.9651) weight_decay: 0.0500 (0.0500) time: 0.7160 data: 0.0004 max mem: 57114 Epoch: [193] [ 70/156] eta: 0:01:05 lr: 0.002534 min_lr: 0.002534 loss: 3.0594 (2.9816) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [193] [ 80/156] eta: 0:00:57 lr: 0.002532 min_lr: 0.002532 loss: 3.0897 (2.9815) weight_decay: 0.0500 (0.0500) time: 0.7225 data: 0.0003 max mem: 57114 Epoch: [193] [ 90/156] eta: 0:00:49 lr: 0.002529 min_lr: 0.002529 loss: 2.9258 (2.9811) weight_decay: 0.0500 (0.0500) time: 0.7228 data: 0.0003 max mem: 57114 Epoch: [193] [100/156] eta: 0:00:41 lr: 0.002526 min_lr: 0.002526 loss: 3.1482 (2.9946) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0004 max mem: 57114 Epoch: [193] [110/156] eta: 0:00:34 lr: 0.002524 min_lr: 0.002524 loss: 3.1410 (2.9861) weight_decay: 0.0500 (0.0500) time: 0.7114 data: 0.0004 max mem: 57114 Epoch: [193] [120/156] eta: 0:00:26 lr: 0.002521 min_lr: 0.002521 loss: 2.7180 (2.9464) weight_decay: 0.0500 (0.0500) time: 0.7009 data: 0.0004 max mem: 57114 Epoch: [193] [130/156] eta: 0:00:19 lr: 0.002518 min_lr: 0.002518 loss: 2.7349 (2.9461) weight_decay: 0.0500 (0.0500) time: 0.6903 data: 0.0008 max mem: 57114 Epoch: [193] [140/156] eta: 0:00:11 lr: 0.002516 min_lr: 0.002516 loss: 2.9160 (2.9448) weight_decay: 0.0500 (0.0500) time: 0.6890 data: 0.0007 max mem: 57114 Epoch: [193] [150/156] eta: 0:00:04 lr: 0.002513 min_lr: 0.002513 loss: 2.9906 (2.9573) weight_decay: 0.0500 (0.0500) time: 0.6846 data: 0.0001 max mem: 57114 Epoch: [193] [155/156] eta: 0:00:00 lr: 0.002512 min_lr: 0.002512 loss: 3.1041 (2.9614) weight_decay: 0.0500 (0.0500) time: 0.6831 data: 0.0001 max mem: 57114 Epoch: [193] Total time: 0:01:53 (0.7299 s / it) Averaged stats: lr: 0.002512 min_lr: 0.002512 loss: 3.1041 (2.9178) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7891 (0.7891) acc1: 88.0208 (88.0208) acc5: 98.9583 (98.9583) time: 2.1399 data: 1.8833 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9602 (0.8992) acc1: 81.2500 (81.3299) acc5: 96.8750 (96.5473) time: 0.5787 data: 0.3767 max mem: 57114 Test: Total time: 0:00:02 (0.5982 s / it) * Acc@1 81.420 Acc@5 95.784 loss 0.966 Accuracy of the model on the 50000 test images: 81.4% Max accuracy: 81.52% Test: [0/5] eta: 0:00:11 loss: 0.6264 (0.6264) acc1: 83.3333 (83.3333) acc5: 96.8750 (96.8750) time: 2.3493 data: 2.1057 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7531 (0.7283) acc1: 80.7292 (78.1330) acc5: 94.2708 (93.2225) time: 0.6205 data: 0.4212 max mem: 57114 Test: Total time: 0:00:03 (0.6321 s / it) * Acc@1 76.113 Acc@5 93.252 loss 0.885 Accuracy of the model EMA on 50000 test images: 76.1% Max EMA accuracy: 76.11% Epoch: [194] [ 0/156] eta: 0:07:05 lr: 0.002512 min_lr: 0.002512 loss: 2.7745 (2.7745) weight_decay: 0.0500 (0.0500) time: 2.7295 data: 2.0796 max mem: 57114 Epoch: [194] [ 10/156] eta: 0:02:10 lr: 0.002509 min_lr: 0.002509 loss: 2.9269 (2.8751) weight_decay: 0.0500 (0.0500) time: 0.8956 data: 0.1893 max mem: 57114 Epoch: [194] [ 20/156] eta: 0:01:50 lr: 0.002506 min_lr: 0.002506 loss: 2.8822 (2.8190) weight_decay: 0.0500 (0.0500) time: 0.7198 data: 0.0003 max mem: 57114 Epoch: [194] [ 30/156] eta: 0:01:39 lr: 0.002504 min_lr: 0.002504 loss: 2.8246 (2.8060) weight_decay: 0.0500 (0.0500) time: 0.7302 data: 0.0003 max mem: 57114 Epoch: [194] [ 40/156] eta: 0:01:29 lr: 0.002501 min_lr: 0.002501 loss: 3.0370 (2.8592) weight_decay: 0.0500 (0.0500) time: 0.7324 data: 0.0003 max mem: 57114 Epoch: [194] [ 50/156] eta: 0:01:20 lr: 0.002498 min_lr: 0.002498 loss: 3.0739 (2.8707) weight_decay: 0.0500 (0.0500) time: 0.7252 data: 0.0004 max mem: 57114 Epoch: [194] [ 60/156] eta: 0:01:12 lr: 0.002496 min_lr: 0.002496 loss: 3.0984 (2.8994) weight_decay: 0.0500 (0.0500) time: 0.7229 data: 0.0004 max mem: 57114 Epoch: [194] [ 70/156] eta: 0:01:04 lr: 0.002493 min_lr: 0.002493 loss: 3.0984 (2.8972) weight_decay: 0.0500 (0.0500) time: 0.7191 data: 0.0004 max mem: 57114 Epoch: [194] [ 80/156] eta: 0:00:56 lr: 0.002490 min_lr: 0.002490 loss: 3.0049 (2.9028) weight_decay: 0.0500 (0.0500) time: 0.7084 data: 0.0004 max mem: 57114 Epoch: [194] [ 90/156] eta: 0:00:48 lr: 0.002488 min_lr: 0.002488 loss: 2.9636 (2.8937) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [194] [100/156] eta: 0:00:41 lr: 0.002485 min_lr: 0.002485 loss: 2.9389 (2.9067) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0004 max mem: 57114 Epoch: [194] [110/156] eta: 0:00:33 lr: 0.002482 min_lr: 0.002482 loss: 3.0241 (2.8973) weight_decay: 0.0500 (0.0500) time: 0.7006 data: 0.0004 max mem: 57114 Epoch: [194] [120/156] eta: 0:00:26 lr: 0.002480 min_lr: 0.002480 loss: 3.0241 (2.9009) weight_decay: 0.0500 (0.0500) time: 0.6980 data: 0.0004 max mem: 57114 Epoch: [194] [130/156] eta: 0:00:18 lr: 0.002477 min_lr: 0.002477 loss: 3.0424 (2.8988) weight_decay: 0.0500 (0.0500) time: 0.6947 data: 0.0008 max mem: 57114 Epoch: [194] [140/156] eta: 0:00:11 lr: 0.002474 min_lr: 0.002474 loss: 2.9280 (2.8848) weight_decay: 0.0500 (0.0500) time: 0.6883 data: 0.0007 max mem: 57114 Epoch: [194] [150/156] eta: 0:00:04 lr: 0.002472 min_lr: 0.002472 loss: 2.5248 (2.8734) weight_decay: 0.0500 (0.0500) time: 0.6817 data: 0.0001 max mem: 57114 Epoch: [194] [155/156] eta: 0:00:00 lr: 0.002470 min_lr: 0.002470 loss: 2.8091 (2.8860) weight_decay: 0.0500 (0.0500) time: 0.6836 data: 0.0001 max mem: 57114 Epoch: [194] Total time: 0:01:52 (0.7238 s / it) Averaged stats: lr: 0.002470 min_lr: 0.002470 loss: 2.8091 (2.9121) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7127 (0.7127) acc1: 90.1042 (90.1042) acc5: 98.9583 (98.9583) time: 2.0438 data: 1.7877 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9491 (0.8650) acc1: 83.3333 (82.8645) acc5: 97.3958 (96.2916) time: 0.5595 data: 0.3576 max mem: 57114 Test: Total time: 0:00:02 (0.5845 s / it) * Acc@1 81.432 Acc@5 95.806 loss 0.958 Accuracy of the model on the 50000 test images: 81.4% Max accuracy: 81.52% Test: [0/5] eta: 0:00:11 loss: 0.6235 (0.6235) acc1: 83.3333 (83.3333) acc5: 96.8750 (96.8750) time: 2.2930 data: 2.0494 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7497 (0.7246) acc1: 80.7292 (78.1330) acc5: 94.2708 (93.2225) time: 0.6093 data: 0.4100 max mem: 57114 Test: Total time: 0:00:03 (0.6238 s / it) * Acc@1 76.171 Acc@5 93.310 loss 0.880 Accuracy of the model EMA on 50000 test images: 76.2% Max EMA accuracy: 76.17% Epoch: [195] [ 0/156] eta: 0:09:00 lr: 0.002470 min_lr: 0.002470 loss: 3.2102 (3.2102) weight_decay: 0.0500 (0.0500) time: 3.4640 data: 2.8160 max mem: 57114 Epoch: [195] [ 10/156] eta: 0:02:21 lr: 0.002467 min_lr: 0.002467 loss: 3.0470 (3.0290) weight_decay: 0.0500 (0.0500) time: 0.9674 data: 0.2562 max mem: 57114 Epoch: [195] [ 20/156] eta: 0:01:55 lr: 0.002465 min_lr: 0.002465 loss: 3.0690 (3.0217) weight_decay: 0.0500 (0.0500) time: 0.7157 data: 0.0003 max mem: 57114 Epoch: [195] [ 30/156] eta: 0:01:41 lr: 0.002462 min_lr: 0.002462 loss: 2.8441 (2.9027) weight_decay: 0.0500 (0.0500) time: 0.7133 data: 0.0003 max mem: 57114 Epoch: [195] [ 40/156] eta: 0:01:30 lr: 0.002459 min_lr: 0.002459 loss: 2.8441 (2.9184) weight_decay: 0.0500 (0.0500) time: 0.7141 data: 0.0004 max mem: 57114 Epoch: [195] [ 50/156] eta: 0:01:21 lr: 0.002457 min_lr: 0.002457 loss: 3.0509 (2.9395) weight_decay: 0.0500 (0.0500) time: 0.7168 data: 0.0004 max mem: 57114 Epoch: [195] [ 60/156] eta: 0:01:13 lr: 0.002454 min_lr: 0.002454 loss: 3.0346 (2.9486) weight_decay: 0.0500 (0.0500) time: 0.7201 data: 0.0004 max mem: 57114 Epoch: [195] [ 70/156] eta: 0:01:05 lr: 0.002451 min_lr: 0.002451 loss: 3.1718 (2.9790) weight_decay: 0.0500 (0.0500) time: 0.7237 data: 0.0004 max mem: 57114 Epoch: [195] [ 80/156] eta: 0:00:57 lr: 0.002449 min_lr: 0.002449 loss: 3.0486 (2.9664) weight_decay: 0.0500 (0.0500) time: 0.7206 data: 0.0003 max mem: 57114 Epoch: [195] [ 90/156] eta: 0:00:49 lr: 0.002446 min_lr: 0.002446 loss: 2.7741 (2.9410) weight_decay: 0.0500 (0.0500) time: 0.7088 data: 0.0004 max mem: 57114 Epoch: [195] [100/156] eta: 0:00:41 lr: 0.002443 min_lr: 0.002443 loss: 2.6596 (2.9198) weight_decay: 0.0500 (0.0500) time: 0.7037 data: 0.0004 max mem: 57114 Epoch: [195] [110/156] eta: 0:00:33 lr: 0.002441 min_lr: 0.002441 loss: 2.5992 (2.8909) weight_decay: 0.0500 (0.0500) time: 0.7000 data: 0.0004 max mem: 57114 Epoch: [195] [120/156] eta: 0:00:26 lr: 0.002438 min_lr: 0.002438 loss: 2.6265 (2.8919) weight_decay: 0.0500 (0.0500) time: 0.7047 data: 0.0004 max mem: 57114 Epoch: [195] [130/156] eta: 0:00:19 lr: 0.002435 min_lr: 0.002435 loss: 3.0298 (2.8959) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0008 max mem: 57114 Epoch: [195] [140/156] eta: 0:00:11 lr: 0.002433 min_lr: 0.002433 loss: 2.8706 (2.8844) weight_decay: 0.0500 (0.0500) time: 0.6943 data: 0.0007 max mem: 57114 Epoch: [195] [150/156] eta: 0:00:04 lr: 0.002430 min_lr: 0.002430 loss: 2.8632 (2.8782) weight_decay: 0.0500 (0.0500) time: 0.6887 data: 0.0001 max mem: 57114 Epoch: [195] [155/156] eta: 0:00:00 lr: 0.002429 min_lr: 0.002429 loss: 2.7916 (2.8740) weight_decay: 0.0500 (0.0500) time: 0.6852 data: 0.0001 max mem: 57114 Epoch: [195] Total time: 0:01:53 (0.7276 s / it) Averaged stats: lr: 0.002429 min_lr: 0.002429 loss: 2.7916 (2.9055) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5383 (0.5383) acc1: 88.0208 (88.0208) acc5: 98.9583 (98.9583) time: 2.0225 data: 1.7667 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7504 (0.6948) acc1: 83.8542 (82.7366) acc5: 96.3542 (96.2916) time: 0.5554 data: 0.3534 max mem: 57114 Test: Total time: 0:00:02 (0.5757 s / it) * Acc@1 81.877 Acc@5 95.962 loss 0.782 Accuracy of the model on the 50000 test images: 81.9% Max accuracy: 81.88% Test: [0/5] eta: 0:00:10 loss: 0.6208 (0.6208) acc1: 83.8542 (83.8542) acc5: 96.8750 (96.8750) time: 2.0773 data: 1.8338 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7463 (0.7209) acc1: 80.2083 (78.2609) acc5: 94.2708 (93.2225) time: 0.5662 data: 0.3669 max mem: 57114 Test: Total time: 0:00:02 (0.5771 s / it) * Acc@1 76.287 Acc@5 93.356 loss 0.876 Accuracy of the model EMA on 50000 test images: 76.3% Max EMA accuracy: 76.29% Epoch: [196] [ 0/156] eta: 0:06:28 lr: 0.002429 min_lr: 0.002429 loss: 3.0527 (3.0527) weight_decay: 0.0500 (0.0500) time: 2.4901 data: 1.8423 max mem: 57114 Epoch: [196] [ 10/156] eta: 0:02:05 lr: 0.002426 min_lr: 0.002426 loss: 3.0527 (2.9372) weight_decay: 0.0500 (0.0500) time: 0.8624 data: 0.1678 max mem: 57114 Epoch: [196] [ 20/156] eta: 0:01:48 lr: 0.002423 min_lr: 0.002423 loss: 3.1208 (2.9783) weight_decay: 0.0500 (0.0500) time: 0.7125 data: 0.0003 max mem: 57114 Epoch: [196] [ 30/156] eta: 0:01:37 lr: 0.002421 min_lr: 0.002421 loss: 3.1034 (2.9392) weight_decay: 0.0500 (0.0500) time: 0.7195 data: 0.0003 max mem: 57114 Epoch: [196] [ 40/156] eta: 0:01:28 lr: 0.002418 min_lr: 0.002418 loss: 3.0120 (2.9238) weight_decay: 0.0500 (0.0500) time: 0.7299 data: 0.0004 max mem: 57114 Epoch: [196] [ 50/156] eta: 0:01:20 lr: 0.002415 min_lr: 0.002415 loss: 3.0814 (2.9454) weight_decay: 0.0500 (0.0500) time: 0.7455 data: 0.0004 max mem: 57114 Epoch: [196] [ 60/156] eta: 0:01:12 lr: 0.002413 min_lr: 0.002413 loss: 3.1084 (2.9261) weight_decay: 0.0500 (0.0500) time: 0.7360 data: 0.0004 max mem: 57114 Epoch: [196] [ 70/156] eta: 0:01:04 lr: 0.002410 min_lr: 0.002410 loss: 2.9302 (2.9142) weight_decay: 0.0500 (0.0500) time: 0.7227 data: 0.0005 max mem: 57114 Epoch: [196] [ 80/156] eta: 0:00:56 lr: 0.002407 min_lr: 0.002407 loss: 3.1020 (2.9528) weight_decay: 0.0500 (0.0500) time: 0.7265 data: 0.0004 max mem: 57114 Epoch: [196] [ 90/156] eta: 0:00:49 lr: 0.002405 min_lr: 0.002405 loss: 3.1294 (2.9565) weight_decay: 0.0500 (0.0500) time: 0.7170 data: 0.0004 max mem: 57114 Epoch: [196] [100/156] eta: 0:00:41 lr: 0.002402 min_lr: 0.002402 loss: 3.0553 (2.9403) weight_decay: 0.0500 (0.0500) time: 0.7013 data: 0.0003 max mem: 57114 Epoch: [196] [110/156] eta: 0:00:33 lr: 0.002400 min_lr: 0.002400 loss: 2.4005 (2.8952) weight_decay: 0.0500 (0.0500) time: 0.6945 data: 0.0004 max mem: 57114 Epoch: [196] [120/156] eta: 0:00:26 lr: 0.002397 min_lr: 0.002397 loss: 2.4685 (2.8938) weight_decay: 0.0500 (0.0500) time: 0.6993 data: 0.0004 max mem: 57114 Epoch: [196] [130/156] eta: 0:00:18 lr: 0.002394 min_lr: 0.002394 loss: 2.8681 (2.8905) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0009 max mem: 57114 Epoch: [196] [140/156] eta: 0:00:11 lr: 0.002392 min_lr: 0.002392 loss: 2.9842 (2.8991) weight_decay: 0.0500 (0.0500) time: 0.6899 data: 0.0007 max mem: 57114 Epoch: [196] [150/156] eta: 0:00:04 lr: 0.002389 min_lr: 0.002389 loss: 3.0327 (2.9047) weight_decay: 0.0500 (0.0500) time: 0.6769 data: 0.0001 max mem: 57114 Epoch: [196] [155/156] eta: 0:00:00 lr: 0.002388 min_lr: 0.002388 loss: 2.9874 (2.9053) weight_decay: 0.0500 (0.0500) time: 0.6785 data: 0.0001 max mem: 57114 Epoch: [196] Total time: 0:01:52 (0.7242 s / it) Averaged stats: lr: 0.002388 min_lr: 0.002388 loss: 2.9874 (2.9020) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7510 (0.7510) acc1: 86.9792 (86.9792) acc5: 98.4375 (98.4375) time: 2.0660 data: 1.8104 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9748 (0.8688) acc1: 81.2500 (81.9693) acc5: 96.8750 (96.2916) time: 0.5639 data: 0.3622 max mem: 57114 Test: Total time: 0:00:02 (0.5831 s / it) * Acc@1 81.566 Acc@5 95.950 loss 0.935 Accuracy of the model on the 50000 test images: 81.6% Max accuracy: 81.88% Test: [0/5] eta: 0:00:11 loss: 0.6180 (0.6180) acc1: 84.3750 (84.3750) acc5: 96.8750 (96.8750) time: 2.2461 data: 2.0026 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7433 (0.7174) acc1: 79.6875 (78.2609) acc5: 94.7917 (93.3504) time: 0.5999 data: 0.4006 max mem: 57114 Test: Total time: 0:00:03 (0.6098 s / it) * Acc@1 76.357 Acc@5 93.400 loss 0.871 Accuracy of the model EMA on 50000 test images: 76.4% Max EMA accuracy: 76.36% Epoch: [197] [ 0/156] eta: 0:05:54 lr: 0.002387 min_lr: 0.002387 loss: 2.7355 (2.7355) weight_decay: 0.0500 (0.0500) time: 2.2705 data: 1.5962 max mem: 57114 Epoch: [197] [ 10/156] eta: 0:02:05 lr: 0.002385 min_lr: 0.002385 loss: 2.8965 (2.8591) weight_decay: 0.0500 (0.0500) time: 0.8584 data: 0.1454 max mem: 57114 Epoch: [197] [ 20/156] eta: 0:01:47 lr: 0.002382 min_lr: 0.002382 loss: 3.0176 (2.8965) weight_decay: 0.0500 (0.0500) time: 0.7135 data: 0.0003 max mem: 57114 Epoch: [197] [ 30/156] eta: 0:01:36 lr: 0.002380 min_lr: 0.002380 loss: 3.0778 (2.9096) weight_decay: 0.0500 (0.0500) time: 0.7158 data: 0.0003 max mem: 57114 Epoch: [197] [ 40/156] eta: 0:01:27 lr: 0.002377 min_lr: 0.002377 loss: 3.1921 (2.9584) weight_decay: 0.0500 (0.0500) time: 0.7178 data: 0.0004 max mem: 57114 Epoch: [197] [ 50/156] eta: 0:01:19 lr: 0.002374 min_lr: 0.002374 loss: 3.1876 (2.9541) weight_decay: 0.0500 (0.0500) time: 0.7162 data: 0.0004 max mem: 57114 Epoch: [197] [ 60/156] eta: 0:01:11 lr: 0.002372 min_lr: 0.002372 loss: 3.1111 (2.9693) weight_decay: 0.0500 (0.0500) time: 0.7159 data: 0.0004 max mem: 57114 Epoch: [197] [ 70/156] eta: 0:01:03 lr: 0.002369 min_lr: 0.002369 loss: 3.1111 (2.9778) weight_decay: 0.0500 (0.0500) time: 0.7125 data: 0.0004 max mem: 57114 Epoch: [197] [ 80/156] eta: 0:00:55 lr: 0.002366 min_lr: 0.002366 loss: 3.0948 (2.9910) weight_decay: 0.0500 (0.0500) time: 0.7064 data: 0.0004 max mem: 57114 Epoch: [197] [ 90/156] eta: 0:00:48 lr: 0.002364 min_lr: 0.002364 loss: 3.1648 (3.0092) weight_decay: 0.0500 (0.0500) time: 0.7052 data: 0.0004 max mem: 57114 Epoch: [197] [100/156] eta: 0:00:40 lr: 0.002361 min_lr: 0.002361 loss: 3.1648 (3.0144) weight_decay: 0.0500 (0.0500) time: 0.7101 data: 0.0004 max mem: 57114 Epoch: [197] [110/156] eta: 0:00:33 lr: 0.002359 min_lr: 0.002359 loss: 3.0876 (3.0001) weight_decay: 0.0500 (0.0500) time: 0.7141 data: 0.0004 max mem: 57114 Epoch: [197] [120/156] eta: 0:00:26 lr: 0.002356 min_lr: 0.002356 loss: 3.0353 (2.9988) weight_decay: 0.0500 (0.0500) time: 0.7130 data: 0.0004 max mem: 57114 Epoch: [197] [130/156] eta: 0:00:18 lr: 0.002353 min_lr: 0.002353 loss: 3.0530 (3.0081) weight_decay: 0.0500 (0.0500) time: 0.7035 data: 0.0009 max mem: 57114 Epoch: [197] [140/156] eta: 0:00:11 lr: 0.002351 min_lr: 0.002351 loss: 3.1119 (3.0153) weight_decay: 0.0500 (0.0500) time: 0.6901 data: 0.0007 max mem: 57114 Epoch: [197] [150/156] eta: 0:00:04 lr: 0.002348 min_lr: 0.002348 loss: 3.0556 (3.0019) weight_decay: 0.0500 (0.0500) time: 0.6824 data: 0.0001 max mem: 57114 Epoch: [197] [155/156] eta: 0:00:00 lr: 0.002347 min_lr: 0.002347 loss: 3.0879 (3.0037) weight_decay: 0.0500 (0.0500) time: 0.6802 data: 0.0001 max mem: 57114 Epoch: [197] Total time: 0:01:52 (0.7190 s / it) Averaged stats: lr: 0.002347 min_lr: 0.002347 loss: 3.0879 (2.8980) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7143 (0.7143) acc1: 86.9792 (86.9792) acc5: 98.9583 (98.9583) time: 2.0536 data: 1.7976 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9401 (0.8636) acc1: 82.8125 (81.4578) acc5: 96.8750 (96.5473) time: 0.5613 data: 0.3596 max mem: 57114 Test: Total time: 0:00:02 (0.5852 s / it) * Acc@1 81.849 Acc@5 95.974 loss 0.926 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.88% Test: [0/5] eta: 0:00:11 loss: 0.6155 (0.6155) acc1: 84.3750 (84.3750) acc5: 96.3542 (96.3542) time: 2.2523 data: 2.0087 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7405 (0.7141) acc1: 79.6875 (78.2609) acc5: 94.7917 (93.0946) time: 0.6011 data: 0.4018 max mem: 57114 Test: Total time: 0:00:03 (0.6128 s / it) * Acc@1 76.415 Acc@5 93.428 loss 0.867 Accuracy of the model EMA on 50000 test images: 76.4% Max EMA accuracy: 76.41% Epoch: [198] [ 0/156] eta: 0:06:06 lr: 0.002346 min_lr: 0.002346 loss: 2.9177 (2.9177) weight_decay: 0.0500 (0.0500) time: 2.3526 data: 1.7030 max mem: 57114 Epoch: [198] [ 10/156] eta: 0:02:10 lr: 0.002344 min_lr: 0.002344 loss: 2.8825 (2.8014) weight_decay: 0.0500 (0.0500) time: 0.8927 data: 0.1551 max mem: 57114 Epoch: [198] [ 20/156] eta: 0:01:50 lr: 0.002341 min_lr: 0.002341 loss: 2.8825 (2.8690) weight_decay: 0.0500 (0.0500) time: 0.7344 data: 0.0003 max mem: 57114 Epoch: [198] [ 30/156] eta: 0:01:38 lr: 0.002339 min_lr: 0.002339 loss: 2.9950 (2.9114) weight_decay: 0.0500 (0.0500) time: 0.7182 data: 0.0004 max mem: 57114 Epoch: [198] [ 40/156] eta: 0:01:28 lr: 0.002336 min_lr: 0.002336 loss: 3.0232 (2.9233) weight_decay: 0.0500 (0.0500) time: 0.7173 data: 0.0004 max mem: 57114 Epoch: [198] [ 50/156] eta: 0:01:19 lr: 0.002333 min_lr: 0.002333 loss: 3.1520 (2.9292) weight_decay: 0.0500 (0.0500) time: 0.7129 data: 0.0004 max mem: 57114 Epoch: [198] [ 60/156] eta: 0:01:11 lr: 0.002331 min_lr: 0.002331 loss: 3.1388 (2.9044) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0004 max mem: 57114 Epoch: [198] [ 70/156] eta: 0:01:04 lr: 0.002328 min_lr: 0.002328 loss: 2.9498 (2.8804) weight_decay: 0.0500 (0.0500) time: 0.7205 data: 0.0004 max mem: 57114 Epoch: [198] [ 80/156] eta: 0:00:56 lr: 0.002326 min_lr: 0.002326 loss: 2.9498 (2.8705) weight_decay: 0.0500 (0.0500) time: 0.7220 data: 0.0004 max mem: 57114 Epoch: [198] [ 90/156] eta: 0:00:48 lr: 0.002323 min_lr: 0.002323 loss: 2.9194 (2.8733) weight_decay: 0.0500 (0.0500) time: 0.7143 data: 0.0004 max mem: 57114 Epoch: [198] [100/156] eta: 0:00:41 lr: 0.002320 min_lr: 0.002320 loss: 2.9231 (2.8735) weight_decay: 0.0500 (0.0500) time: 0.7042 data: 0.0004 max mem: 57114 Epoch: [198] [110/156] eta: 0:00:33 lr: 0.002318 min_lr: 0.002318 loss: 2.9609 (2.8746) weight_decay: 0.0500 (0.0500) time: 0.7035 data: 0.0004 max mem: 57114 Epoch: [198] [120/156] eta: 0:00:26 lr: 0.002315 min_lr: 0.002315 loss: 2.8412 (2.8652) weight_decay: 0.0500 (0.0500) time: 0.7014 data: 0.0004 max mem: 57114 Epoch: [198] [130/156] eta: 0:00:18 lr: 0.002312 min_lr: 0.002312 loss: 3.0300 (2.8705) weight_decay: 0.0500 (0.0500) time: 0.6972 data: 0.0005 max mem: 57114 Epoch: [198] [140/156] eta: 0:00:11 lr: 0.002310 min_lr: 0.002310 loss: 3.0766 (2.8682) weight_decay: 0.0500 (0.0500) time: 0.6932 data: 0.0003 max mem: 57114 Epoch: [198] [150/156] eta: 0:00:04 lr: 0.002307 min_lr: 0.002307 loss: 2.8819 (2.8754) weight_decay: 0.0500 (0.0500) time: 0.6873 data: 0.0001 max mem: 57114 Epoch: [198] [155/156] eta: 0:00:00 lr: 0.002306 min_lr: 0.002306 loss: 3.1004 (2.8887) weight_decay: 0.0500 (0.0500) time: 0.6838 data: 0.0001 max mem: 57114 Epoch: [198] Total time: 0:01:52 (0.7222 s / it) Averaged stats: lr: 0.002306 min_lr: 0.002306 loss: 3.1004 (2.8911) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.8073 (0.8073) acc1: 90.6250 (90.6250) acc5: 98.4375 (98.4375) time: 2.1079 data: 1.8521 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 1.0319 (0.9654) acc1: 83.8542 (82.9923) acc5: 96.8750 (96.1637) time: 0.5722 data: 0.3705 max mem: 57114 Test: Total time: 0:00:02 (0.5953 s / it) * Acc@1 81.839 Acc@5 95.868 loss 1.035 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.88% Test: [0/5] eta: 0:00:11 loss: 0.6130 (0.6130) acc1: 84.3750 (84.3750) acc5: 96.3542 (96.3542) time: 2.3075 data: 2.0639 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7379 (0.7112) acc1: 79.6875 (78.2609) acc5: 94.7917 (93.0946) time: 0.6122 data: 0.4129 max mem: 57114 Test: Total time: 0:00:03 (0.6255 s / it) * Acc@1 76.507 Acc@5 93.460 loss 0.863 Accuracy of the model EMA on 50000 test images: 76.5% Max EMA accuracy: 76.51% Epoch: [199] [ 0/156] eta: 0:08:13 lr: 0.002306 min_lr: 0.002306 loss: 3.0608 (3.0608) weight_decay: 0.0500 (0.0500) time: 3.1616 data: 2.5098 max mem: 57114 Epoch: [199] [ 10/156] eta: 0:02:18 lr: 0.002303 min_lr: 0.002303 loss: 3.3678 (3.0794) weight_decay: 0.0500 (0.0500) time: 0.9468 data: 0.2284 max mem: 57114 Epoch: [199] [ 20/156] eta: 0:01:54 lr: 0.002301 min_lr: 0.002301 loss: 3.1734 (3.0585) weight_decay: 0.0500 (0.0500) time: 0.7282 data: 0.0003 max mem: 57114 Epoch: [199] [ 30/156] eta: 0:01:40 lr: 0.002298 min_lr: 0.002298 loss: 3.0988 (3.0199) weight_decay: 0.0500 (0.0500) time: 0.7198 data: 0.0003 max mem: 57114 Epoch: [199] [ 40/156] eta: 0:01:30 lr: 0.002295 min_lr: 0.002295 loss: 3.0067 (3.0226) weight_decay: 0.0500 (0.0500) time: 0.7182 data: 0.0003 max mem: 57114 Epoch: [199] [ 50/156] eta: 0:01:21 lr: 0.002293 min_lr: 0.002293 loss: 3.0067 (3.0037) weight_decay: 0.0500 (0.0500) time: 0.7251 data: 0.0004 max mem: 57114 Epoch: [199] [ 60/156] eta: 0:01:13 lr: 0.002290 min_lr: 0.002290 loss: 3.0141 (3.0114) weight_decay: 0.0500 (0.0500) time: 0.7307 data: 0.0004 max mem: 57114 Epoch: [199] [ 70/156] eta: 0:01:05 lr: 0.002288 min_lr: 0.002288 loss: 3.0639 (3.0066) weight_decay: 0.0500 (0.0500) time: 0.7216 data: 0.0004 max mem: 57114 Epoch: [199] [ 80/156] eta: 0:00:57 lr: 0.002285 min_lr: 0.002285 loss: 2.9728 (2.9919) weight_decay: 0.0500 (0.0500) time: 0.7036 data: 0.0004 max mem: 57114 Epoch: [199] [ 90/156] eta: 0:00:49 lr: 0.002282 min_lr: 0.002282 loss: 3.0590 (2.9990) weight_decay: 0.0500 (0.0500) time: 0.7080 data: 0.0004 max mem: 57114 Epoch: [199] [100/156] eta: 0:00:41 lr: 0.002280 min_lr: 0.002280 loss: 3.1377 (3.0062) weight_decay: 0.0500 (0.0500) time: 0.7105 data: 0.0004 max mem: 57114 Epoch: [199] [110/156] eta: 0:00:33 lr: 0.002277 min_lr: 0.002277 loss: 3.1296 (3.0014) weight_decay: 0.0500 (0.0500) time: 0.6996 data: 0.0004 max mem: 57114 Epoch: [199] [120/156] eta: 0:00:26 lr: 0.002275 min_lr: 0.002275 loss: 3.0393 (2.9828) weight_decay: 0.0500 (0.0500) time: 0.6979 data: 0.0004 max mem: 57114 Epoch: [199] [130/156] eta: 0:00:19 lr: 0.002272 min_lr: 0.002272 loss: 2.9767 (2.9802) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0009 max mem: 57114 Epoch: [199] [140/156] eta: 0:00:11 lr: 0.002269 min_lr: 0.002269 loss: 2.9767 (2.9675) weight_decay: 0.0500 (0.0500) time: 0.6928 data: 0.0007 max mem: 57114 Epoch: [199] [150/156] eta: 0:00:04 lr: 0.002267 min_lr: 0.002267 loss: 2.7153 (2.9498) weight_decay: 0.0500 (0.0500) time: 0.6854 data: 0.0001 max mem: 57114 Epoch: [199] [155/156] eta: 0:00:00 lr: 0.002265 min_lr: 0.002265 loss: 2.8253 (2.9476) weight_decay: 0.0500 (0.0500) time: 0.6906 data: 0.0001 max mem: 57114 Epoch: [199] Total time: 0:01:53 (0.7277 s / it) Averaged stats: lr: 0.002265 min_lr: 0.002265 loss: 2.8253 (2.8888) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5975 (0.5975) acc1: 89.0625 (89.0625) acc5: 98.9583 (98.9583) time: 2.0919 data: 1.8359 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8204 (0.7653) acc1: 84.3750 (83.6317) acc5: 98.4375 (96.8031) time: 0.5691 data: 0.3672 max mem: 57114 Test: Total time: 0:00:02 (0.5908 s / it) * Acc@1 81.616 Acc@5 95.984 loss 0.864 Accuracy of the model on the 50000 test images: 81.6% Max accuracy: 81.88% Test: [0/5] eta: 0:00:11 loss: 0.6106 (0.6106) acc1: 83.8542 (83.8542) acc5: 96.8750 (96.8750) time: 2.2918 data: 2.0483 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7354 (0.7082) acc1: 79.6875 (78.1330) acc5: 95.3125 (93.4783) time: 0.6090 data: 0.4097 max mem: 57114 Test: Total time: 0:00:03 (0.6190 s / it) * Acc@1 76.576 Acc@5 93.506 loss 0.860 Accuracy of the model EMA on 50000 test images: 76.6% Max EMA accuracy: 76.58% Epoch: [200] [ 0/156] eta: 0:07:06 lr: 0.002265 min_lr: 0.002265 loss: 2.2504 (2.2504) weight_decay: 0.0500 (0.0500) time: 2.7314 data: 2.0789 max mem: 57114 Epoch: [200] [ 10/156] eta: 0:02:09 lr: 0.002263 min_lr: 0.002263 loss: 2.8075 (2.7156) weight_decay: 0.0500 (0.0500) time: 0.8889 data: 0.1893 max mem: 57114 Epoch: [200] [ 20/156] eta: 0:01:49 lr: 0.002260 min_lr: 0.002260 loss: 2.8606 (2.7959) weight_decay: 0.0500 (0.0500) time: 0.7102 data: 0.0003 max mem: 57114 Epoch: [200] [ 30/156] eta: 0:01:38 lr: 0.002257 min_lr: 0.002257 loss: 2.9564 (2.8376) weight_decay: 0.0500 (0.0500) time: 0.7182 data: 0.0003 max mem: 57114 Epoch: [200] [ 40/156] eta: 0:01:29 lr: 0.002255 min_lr: 0.002255 loss: 2.9796 (2.8750) weight_decay: 0.0500 (0.0500) time: 0.7272 data: 0.0003 max mem: 57114 Epoch: [200] [ 50/156] eta: 0:01:20 lr: 0.002252 min_lr: 0.002252 loss: 3.0076 (2.8945) weight_decay: 0.0500 (0.0500) time: 0.7334 data: 0.0003 max mem: 57114 Epoch: [200] [ 60/156] eta: 0:01:12 lr: 0.002250 min_lr: 0.002250 loss: 3.0519 (2.8776) weight_decay: 0.0500 (0.0500) time: 0.7282 data: 0.0004 max mem: 57114 Epoch: [200] [ 70/156] eta: 0:01:04 lr: 0.002247 min_lr: 0.002247 loss: 3.0162 (2.8736) weight_decay: 0.0500 (0.0500) time: 0.7075 data: 0.0004 max mem: 57114 Epoch: [200] [ 80/156] eta: 0:00:56 lr: 0.002244 min_lr: 0.002244 loss: 3.0293 (2.8897) weight_decay: 0.0500 (0.0500) time: 0.7006 data: 0.0004 max mem: 57114 Epoch: [200] [ 90/156] eta: 0:00:48 lr: 0.002242 min_lr: 0.002242 loss: 3.0600 (2.8910) weight_decay: 0.0500 (0.0500) time: 0.7056 data: 0.0004 max mem: 57114 Epoch: [200] [100/156] eta: 0:00:41 lr: 0.002239 min_lr: 0.002239 loss: 2.6617 (2.8588) weight_decay: 0.0500 (0.0500) time: 0.6989 data: 0.0004 max mem: 57114 Epoch: [200] [110/156] eta: 0:00:33 lr: 0.002237 min_lr: 0.002237 loss: 2.8961 (2.8605) weight_decay: 0.0500 (0.0500) time: 0.6998 data: 0.0003 max mem: 57114 Epoch: [200] [120/156] eta: 0:00:26 lr: 0.002234 min_lr: 0.002234 loss: 2.8925 (2.8471) weight_decay: 0.0500 (0.0500) time: 0.7150 data: 0.0003 max mem: 57114 Epoch: [200] [130/156] eta: 0:00:18 lr: 0.002232 min_lr: 0.002232 loss: 2.8475 (2.8606) weight_decay: 0.0500 (0.0500) time: 0.7123 data: 0.0005 max mem: 57114 Epoch: [200] [140/156] eta: 0:00:11 lr: 0.002229 min_lr: 0.002229 loss: 3.1995 (2.8892) weight_decay: 0.0500 (0.0500) time: 0.6899 data: 0.0004 max mem: 57114 Epoch: [200] [150/156] eta: 0:00:04 lr: 0.002226 min_lr: 0.002226 loss: 3.0945 (2.8797) weight_decay: 0.0500 (0.0500) time: 0.6813 data: 0.0001 max mem: 57114 Epoch: [200] [155/156] eta: 0:00:00 lr: 0.002225 min_lr: 0.002225 loss: 2.9625 (2.8811) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.0001 max mem: 57114 Epoch: [200] Total time: 0:01:52 (0.7229 s / it) Averaged stats: lr: 0.002225 min_lr: 0.002225 loss: 2.9625 (2.8840) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6576 (0.6576) acc1: 89.0625 (89.0625) acc5: 98.9583 (98.9583) time: 2.1290 data: 1.8728 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8788 (0.7900) acc1: 84.3750 (82.9923) acc5: 98.9583 (97.5703) time: 0.5767 data: 0.3746 max mem: 57114 Test: Total time: 0:00:02 (0.5987 s / it) * Acc@1 81.977 Acc@5 96.000 loss 0.893 Accuracy of the model on the 50000 test images: 82.0% Max accuracy: 81.98% Test: [0/5] eta: 0:00:10 loss: 0.6085 (0.6085) acc1: 84.3750 (84.3750) acc5: 96.8750 (96.8750) time: 2.0602 data: 1.8169 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7336 (0.7055) acc1: 79.6875 (78.2609) acc5: 95.3125 (93.6061) time: 0.5626 data: 0.3635 max mem: 57114 Test: Total time: 0:00:02 (0.5762 s / it) * Acc@1 76.624 Acc@5 93.544 loss 0.856 Accuracy of the model EMA on 50000 test images: 76.6% Max EMA accuracy: 76.62% Epoch: [201] [ 0/156] eta: 0:07:32 lr: 0.002225 min_lr: 0.002225 loss: 2.8912 (2.8912) weight_decay: 0.0500 (0.0500) time: 2.9033 data: 2.2354 max mem: 57114 Epoch: [201] [ 10/156] eta: 0:02:12 lr: 0.002222 min_lr: 0.002222 loss: 3.1337 (2.9810) weight_decay: 0.0500 (0.0500) time: 0.9059 data: 0.2035 max mem: 57114 Epoch: [201] [ 20/156] eta: 0:01:51 lr: 0.002220 min_lr: 0.002220 loss: 3.0247 (2.9404) weight_decay: 0.0500 (0.0500) time: 0.7177 data: 0.0003 max mem: 57114 Epoch: [201] [ 30/156] eta: 0:01:40 lr: 0.002217 min_lr: 0.002217 loss: 3.0003 (2.9756) weight_decay: 0.0500 (0.0500) time: 0.7385 data: 0.0003 max mem: 57114 Epoch: [201] [ 40/156] eta: 0:01:31 lr: 0.002215 min_lr: 0.002215 loss: 3.0165 (2.9291) weight_decay: 0.0500 (0.0500) time: 0.7477 data: 0.0003 max mem: 57114 Epoch: [201] [ 50/156] eta: 0:01:21 lr: 0.002212 min_lr: 0.002212 loss: 3.0435 (2.9196) weight_decay: 0.0500 (0.0500) time: 0.7253 data: 0.0003 max mem: 57114 Epoch: [201] [ 60/156] eta: 0:01:13 lr: 0.002209 min_lr: 0.002209 loss: 2.9208 (2.9063) weight_decay: 0.0500 (0.0500) time: 0.7267 data: 0.0004 max mem: 57114 Epoch: [201] [ 70/156] eta: 0:01:05 lr: 0.002207 min_lr: 0.002207 loss: 2.9598 (2.9391) weight_decay: 0.0500 (0.0500) time: 0.7327 data: 0.0004 max mem: 57114 Epoch: [201] [ 80/156] eta: 0:00:57 lr: 0.002204 min_lr: 0.002204 loss: 3.1282 (2.9394) weight_decay: 0.0500 (0.0500) time: 0.7131 data: 0.0004 max mem: 57114 Epoch: [201] [ 90/156] eta: 0:00:49 lr: 0.002202 min_lr: 0.002202 loss: 2.9318 (2.9186) weight_decay: 0.0500 (0.0500) time: 0.7066 data: 0.0004 max mem: 57114 Epoch: [201] [100/156] eta: 0:00:41 lr: 0.002199 min_lr: 0.002199 loss: 2.8449 (2.9142) weight_decay: 0.0500 (0.0500) time: 0.7140 data: 0.0004 max mem: 57114 Epoch: [201] [110/156] eta: 0:00:34 lr: 0.002197 min_lr: 0.002197 loss: 3.0445 (2.9244) weight_decay: 0.0500 (0.0500) time: 0.7218 data: 0.0003 max mem: 57114 Epoch: [201] [120/156] eta: 0:00:26 lr: 0.002194 min_lr: 0.002194 loss: 3.0445 (2.9306) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0003 max mem: 57114 Epoch: [201] [130/156] eta: 0:00:19 lr: 0.002191 min_lr: 0.002191 loss: 3.1183 (2.9280) weight_decay: 0.0500 (0.0500) time: 0.6997 data: 0.0008 max mem: 57114 Epoch: [201] [140/156] eta: 0:00:11 lr: 0.002189 min_lr: 0.002189 loss: 2.9625 (2.9180) weight_decay: 0.0500 (0.0500) time: 0.6874 data: 0.0007 max mem: 57114 Epoch: [201] [150/156] eta: 0:00:04 lr: 0.002186 min_lr: 0.002186 loss: 2.9533 (2.9182) weight_decay: 0.0500 (0.0500) time: 0.6844 data: 0.0001 max mem: 57114 Epoch: [201] [155/156] eta: 0:00:00 lr: 0.002185 min_lr: 0.002185 loss: 2.8550 (2.9094) weight_decay: 0.0500 (0.0500) time: 0.6856 data: 0.0001 max mem: 57114 Epoch: [201] Total time: 0:01:53 (0.7299 s / it) Averaged stats: lr: 0.002185 min_lr: 0.002185 loss: 2.8550 (2.8841) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7064 (0.7064) acc1: 88.5417 (88.5417) acc5: 98.9583 (98.9583) time: 2.0233 data: 1.7647 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9368 (0.8301) acc1: 80.7292 (81.9693) acc5: 97.3958 (96.5473) time: 0.5553 data: 0.3530 max mem: 57114 Test: Total time: 0:00:02 (0.5783 s / it) * Acc@1 81.951 Acc@5 96.092 loss 0.907 Accuracy of the model on the 50000 test images: 82.0% Max accuracy: 81.98% Test: [0/5] eta: 0:00:11 loss: 0.6067 (0.6067) acc1: 84.3750 (84.3750) acc5: 96.8750 (96.8750) time: 2.2651 data: 2.0216 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7316 (0.7030) acc1: 79.6875 (78.2609) acc5: 95.3125 (93.6061) time: 0.6037 data: 0.4044 max mem: 57114 Test: Total time: 0:00:03 (0.6143 s / it) * Acc@1 76.680 Acc@5 93.580 loss 0.853 Accuracy of the model EMA on 50000 test images: 76.7% Max EMA accuracy: 76.68% Epoch: [202] [ 0/156] eta: 0:08:16 lr: 0.002185 min_lr: 0.002185 loss: 2.6517 (2.6517) weight_decay: 0.0500 (0.0500) time: 3.1844 data: 2.5384 max mem: 57114 Epoch: [202] [ 10/156] eta: 0:02:20 lr: 0.002182 min_lr: 0.002182 loss: 3.1549 (3.0522) weight_decay: 0.0500 (0.0500) time: 0.9596 data: 0.2311 max mem: 57114 Epoch: [202] [ 20/156] eta: 0:01:55 lr: 0.002180 min_lr: 0.002180 loss: 3.0674 (2.9805) weight_decay: 0.0500 (0.0500) time: 0.7291 data: 0.0004 max mem: 57114 Epoch: [202] [ 30/156] eta: 0:01:42 lr: 0.002177 min_lr: 0.002177 loss: 3.0674 (2.9238) weight_decay: 0.0500 (0.0500) time: 0.7277 data: 0.0004 max mem: 57114 Epoch: [202] [ 40/156] eta: 0:01:31 lr: 0.002175 min_lr: 0.002175 loss: 2.5730 (2.8619) weight_decay: 0.0500 (0.0500) time: 0.7289 data: 0.0003 max mem: 57114 Epoch: [202] [ 50/156] eta: 0:01:22 lr: 0.002172 min_lr: 0.002172 loss: 2.9037 (2.8618) weight_decay: 0.0500 (0.0500) time: 0.7196 data: 0.0004 max mem: 57114 Epoch: [202] [ 60/156] eta: 0:01:13 lr: 0.002169 min_lr: 0.002169 loss: 3.0188 (2.8902) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [202] [ 70/156] eta: 0:01:05 lr: 0.002167 min_lr: 0.002167 loss: 3.1123 (2.9212) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0003 max mem: 57114 Epoch: [202] [ 80/156] eta: 0:00:56 lr: 0.002164 min_lr: 0.002164 loss: 3.1864 (2.9333) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0004 max mem: 57114 Epoch: [202] [ 90/156] eta: 0:00:49 lr: 0.002162 min_lr: 0.002162 loss: 2.9442 (2.9070) weight_decay: 0.0500 (0.0500) time: 0.7027 data: 0.0004 max mem: 57114 Epoch: [202] [100/156] eta: 0:00:41 lr: 0.002159 min_lr: 0.002159 loss: 2.6525 (2.8851) weight_decay: 0.0500 (0.0500) time: 0.7197 data: 0.0004 max mem: 57114 Epoch: [202] [110/156] eta: 0:00:34 lr: 0.002157 min_lr: 0.002157 loss: 2.5961 (2.8804) weight_decay: 0.0500 (0.0500) time: 0.7148 data: 0.0003 max mem: 57114 Epoch: [202] [120/156] eta: 0:00:26 lr: 0.002154 min_lr: 0.002154 loss: 2.7600 (2.8744) weight_decay: 0.0500 (0.0500) time: 0.7003 data: 0.0003 max mem: 57114 Epoch: [202] [130/156] eta: 0:00:19 lr: 0.002152 min_lr: 0.002152 loss: 2.9744 (2.8843) weight_decay: 0.0500 (0.0500) time: 0.6901 data: 0.0004 max mem: 57114 Epoch: [202] [140/156] eta: 0:00:11 lr: 0.002149 min_lr: 0.002149 loss: 3.0788 (2.8942) weight_decay: 0.0500 (0.0500) time: 0.6850 data: 0.0003 max mem: 57114 Epoch: [202] [150/156] eta: 0:00:04 lr: 0.002146 min_lr: 0.002146 loss: 3.0705 (2.8849) weight_decay: 0.0500 (0.0500) time: 0.6838 data: 0.0001 max mem: 57114 Epoch: [202] [155/156] eta: 0:00:00 lr: 0.002145 min_lr: 0.002145 loss: 3.0556 (2.8901) weight_decay: 0.0500 (0.0500) time: 0.6885 data: 0.0001 max mem: 57114 Epoch: [202] Total time: 0:01:53 (0.7267 s / it) Averaged stats: lr: 0.002145 min_lr: 0.002145 loss: 3.0556 (2.8757) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6629 (0.6629) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.1883 data: 1.9318 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8884 (0.8298) acc1: 81.7708 (82.6087) acc5: 97.3958 (96.6752) time: 0.5884 data: 0.3864 max mem: 57114 Test: Total time: 0:00:03 (0.6095 s / it) * Acc@1 81.781 Acc@5 96.010 loss 0.917 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.98% Test: [0/5] eta: 0:00:12 loss: 0.6048 (0.6048) acc1: 84.3750 (84.3750) acc5: 96.8750 (96.8750) time: 2.4120 data: 2.1684 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7295 (0.7006) acc1: 79.6875 (78.2609) acc5: 95.3125 (93.6061) time: 0.6331 data: 0.4338 max mem: 57114 Test: Total time: 0:00:03 (0.6446 s / it) * Acc@1 76.720 Acc@5 93.602 loss 0.850 Accuracy of the model EMA on 50000 test images: 76.7% Max EMA accuracy: 76.72% Epoch: [203] [ 0/156] eta: 0:08:07 lr: 0.002145 min_lr: 0.002145 loss: 3.2289 (3.2289) weight_decay: 0.0500 (0.0500) time: 3.1255 data: 2.4649 max mem: 57114 Epoch: [203] [ 10/156] eta: 0:02:19 lr: 0.002142 min_lr: 0.002142 loss: 3.0699 (2.9277) weight_decay: 0.0500 (0.0500) time: 0.9534 data: 0.2243 max mem: 57114 Epoch: [203] [ 20/156] eta: 0:01:54 lr: 0.002140 min_lr: 0.002140 loss: 3.0299 (2.9520) weight_decay: 0.0500 (0.0500) time: 0.7311 data: 0.0003 max mem: 57114 Epoch: [203] [ 30/156] eta: 0:01:41 lr: 0.002137 min_lr: 0.002137 loss: 2.9963 (2.9025) weight_decay: 0.0500 (0.0500) time: 0.7229 data: 0.0003 max mem: 57114 Epoch: [203] [ 40/156] eta: 0:01:30 lr: 0.002135 min_lr: 0.002135 loss: 2.9963 (2.9342) weight_decay: 0.0500 (0.0500) time: 0.7150 data: 0.0003 max mem: 57114 Epoch: [203] [ 50/156] eta: 0:01:21 lr: 0.002132 min_lr: 0.002132 loss: 3.0563 (2.9230) weight_decay: 0.0500 (0.0500) time: 0.7153 data: 0.0003 max mem: 57114 Epoch: [203] [ 60/156] eta: 0:01:13 lr: 0.002130 min_lr: 0.002130 loss: 3.0869 (2.9365) weight_decay: 0.0500 (0.0500) time: 0.7288 data: 0.0003 max mem: 57114 Epoch: [203] [ 70/156] eta: 0:01:04 lr: 0.002127 min_lr: 0.002127 loss: 2.8190 (2.8928) weight_decay: 0.0500 (0.0500) time: 0.7189 data: 0.0004 max mem: 57114 Epoch: [203] [ 80/156] eta: 0:00:57 lr: 0.002125 min_lr: 0.002125 loss: 2.8091 (2.8912) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0003 max mem: 57114 Epoch: [203] [ 90/156] eta: 0:00:49 lr: 0.002122 min_lr: 0.002122 loss: 2.8991 (2.8988) weight_decay: 0.0500 (0.0500) time: 0.7178 data: 0.0004 max mem: 57114 Epoch: [203] [100/156] eta: 0:00:41 lr: 0.002119 min_lr: 0.002119 loss: 3.0220 (2.9134) weight_decay: 0.0500 (0.0500) time: 0.7043 data: 0.0003 max mem: 57114 Epoch: [203] [110/156] eta: 0:00:34 lr: 0.002117 min_lr: 0.002117 loss: 3.0428 (2.9069) weight_decay: 0.0500 (0.0500) time: 0.7086 data: 0.0003 max mem: 57114 Epoch: [203] [120/156] eta: 0:00:26 lr: 0.002114 min_lr: 0.002114 loss: 2.9522 (2.9094) weight_decay: 0.0500 (0.0500) time: 0.7095 data: 0.0003 max mem: 57114 Epoch: [203] [130/156] eta: 0:00:19 lr: 0.002112 min_lr: 0.002112 loss: 2.9522 (2.9258) weight_decay: 0.0500 (0.0500) time: 0.6989 data: 0.0008 max mem: 57114 Epoch: [203] [140/156] eta: 0:00:11 lr: 0.002109 min_lr: 0.002109 loss: 3.0045 (2.9147) weight_decay: 0.0500 (0.0500) time: 0.6887 data: 0.0007 max mem: 57114 Epoch: [203] [150/156] eta: 0:00:04 lr: 0.002107 min_lr: 0.002107 loss: 3.0045 (2.9123) weight_decay: 0.0500 (0.0500) time: 0.6806 data: 0.0001 max mem: 57114 Epoch: [203] [155/156] eta: 0:00:00 lr: 0.002106 min_lr: 0.002106 loss: 2.8478 (2.9105) weight_decay: 0.0500 (0.0500) time: 0.6816 data: 0.0001 max mem: 57114 Epoch: [203] Total time: 0:01:53 (0.7277 s / it) Averaged stats: lr: 0.002106 min_lr: 0.002106 loss: 2.8478 (2.8670) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6582 (0.6582) acc1: 89.0625 (89.0625) acc5: 100.0000 (100.0000) time: 2.1707 data: 1.9150 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8506 (0.8268) acc1: 84.3750 (82.6087) acc5: 97.3958 (96.6752) time: 0.5848 data: 0.3831 max mem: 57114 Test: Total time: 0:00:03 (0.6094 s / it) * Acc@1 82.045 Acc@5 95.968 loss 0.916 Accuracy of the model on the 50000 test images: 82.0% Max accuracy: 82.05% Test: [0/5] eta: 0:00:10 loss: 0.6028 (0.6028) acc1: 84.3750 (84.3750) acc5: 96.8750 (96.8750) time: 2.0350 data: 1.7915 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7275 (0.6982) acc1: 79.6875 (78.2609) acc5: 95.3125 (93.7340) time: 0.5576 data: 0.3584 max mem: 57114 Test: Total time: 0:00:02 (0.5673 s / it) * Acc@1 76.784 Acc@5 93.630 loss 0.847 Accuracy of the model EMA on 50000 test images: 76.8% Max EMA accuracy: 76.78% Epoch: [204] [ 0/156] eta: 0:08:34 lr: 0.002105 min_lr: 0.002105 loss: 3.3828 (3.3828) weight_decay: 0.0500 (0.0500) time: 3.2959 data: 2.6469 max mem: 57114 Epoch: [204] [ 10/156] eta: 0:02:21 lr: 0.002103 min_lr: 0.002103 loss: 3.1751 (2.9801) weight_decay: 0.0500 (0.0500) time: 0.9710 data: 0.2409 max mem: 57114 Epoch: [204] [ 20/156] eta: 0:01:57 lr: 0.002100 min_lr: 0.002100 loss: 2.9862 (2.9111) weight_decay: 0.0500 (0.0500) time: 0.7418 data: 0.0003 max mem: 57114 Epoch: [204] [ 30/156] eta: 0:01:42 lr: 0.002098 min_lr: 0.002098 loss: 2.7160 (2.8332) weight_decay: 0.0500 (0.0500) time: 0.7301 data: 0.0003 max mem: 57114 Epoch: [204] [ 40/156] eta: 0:01:32 lr: 0.002095 min_lr: 0.002095 loss: 2.9142 (2.8493) weight_decay: 0.0500 (0.0500) time: 0.7308 data: 0.0003 max mem: 57114 Epoch: [204] [ 50/156] eta: 0:01:22 lr: 0.002093 min_lr: 0.002093 loss: 3.0503 (2.8599) weight_decay: 0.0500 (0.0500) time: 0.7226 data: 0.0004 max mem: 57114 Epoch: [204] [ 60/156] eta: 0:01:14 lr: 0.002090 min_lr: 0.002090 loss: 2.8788 (2.8463) weight_decay: 0.0500 (0.0500) time: 0.7167 data: 0.0003 max mem: 57114 Epoch: [204] [ 70/156] eta: 0:01:05 lr: 0.002088 min_lr: 0.002088 loss: 2.8124 (2.8430) weight_decay: 0.0500 (0.0500) time: 0.7252 data: 0.0004 max mem: 57114 Epoch: [204] [ 80/156] eta: 0:00:57 lr: 0.002085 min_lr: 0.002085 loss: 2.7394 (2.8466) weight_decay: 0.0500 (0.0500) time: 0.7218 data: 0.0004 max mem: 57114 Epoch: [204] [ 90/156] eta: 0:00:49 lr: 0.002083 min_lr: 0.002083 loss: 2.9333 (2.8600) weight_decay: 0.0500 (0.0500) time: 0.7124 data: 0.0004 max mem: 57114 Epoch: [204] [100/156] eta: 0:00:41 lr: 0.002080 min_lr: 0.002080 loss: 2.9810 (2.8702) weight_decay: 0.0500 (0.0500) time: 0.6977 data: 0.0004 max mem: 57114 Epoch: [204] [110/156] eta: 0:00:34 lr: 0.002077 min_lr: 0.002077 loss: 2.9876 (2.8785) weight_decay: 0.0500 (0.0500) time: 0.7021 data: 0.0004 max mem: 57114 Epoch: [204] [120/156] eta: 0:00:26 lr: 0.002075 min_lr: 0.002075 loss: 3.0552 (2.8857) weight_decay: 0.0500 (0.0500) time: 0.6995 data: 0.0004 max mem: 57114 Epoch: [204] [130/156] eta: 0:00:19 lr: 0.002072 min_lr: 0.002072 loss: 3.1503 (2.8891) weight_decay: 0.0500 (0.0500) time: 0.6930 data: 0.0009 max mem: 57114 Epoch: [204] [140/156] eta: 0:00:11 lr: 0.002070 min_lr: 0.002070 loss: 2.9157 (2.8769) weight_decay: 0.0500 (0.0500) time: 0.6867 data: 0.0008 max mem: 57114 Epoch: [204] [150/156] eta: 0:00:04 lr: 0.002067 min_lr: 0.002067 loss: 2.9157 (2.8713) weight_decay: 0.0500 (0.0500) time: 0.6868 data: 0.0001 max mem: 57114 Epoch: [204] [155/156] eta: 0:00:00 lr: 0.002066 min_lr: 0.002066 loss: 2.9157 (2.8613) weight_decay: 0.0500 (0.0500) time: 0.6860 data: 0.0001 max mem: 57114 Epoch: [204] Total time: 0:01:53 (0.7297 s / it) Averaged stats: lr: 0.002066 min_lr: 0.002066 loss: 2.9157 (2.8555) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6539 (0.6539) acc1: 88.5417 (88.5417) acc5: 98.9583 (98.9583) time: 2.0715 data: 1.8155 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8525 (0.7874) acc1: 82.8125 (82.4808) acc5: 97.3958 (96.8031) time: 0.5650 data: 0.3632 max mem: 57114 Test: Total time: 0:00:02 (0.5885 s / it) * Acc@1 82.415 Acc@5 96.124 loss 0.841 Accuracy of the model on the 50000 test images: 82.4% Max accuracy: 82.41% Test: [0/5] eta: 0:00:10 loss: 0.6008 (0.6008) acc1: 84.3750 (84.3750) acc5: 96.8750 (96.8750) time: 2.0420 data: 1.7987 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7253 (0.6960) acc1: 79.6875 (78.2609) acc5: 95.3125 (93.7340) time: 0.5589 data: 0.3598 max mem: 57114 Test: Total time: 0:00:02 (0.5717 s / it) * Acc@1 76.848 Acc@5 93.674 loss 0.844 Accuracy of the model EMA on 50000 test images: 76.8% Max EMA accuracy: 76.85% Epoch: [205] [ 0/156] eta: 0:09:01 lr: 0.002066 min_lr: 0.002066 loss: 3.0317 (3.0317) weight_decay: 0.0500 (0.0500) time: 3.4706 data: 2.8180 max mem: 57114 Epoch: [205] [ 10/156] eta: 0:02:23 lr: 0.002063 min_lr: 0.002063 loss: 2.7307 (2.7127) weight_decay: 0.0500 (0.0500) time: 0.9816 data: 0.2564 max mem: 57114 Epoch: [205] [ 20/156] eta: 0:01:57 lr: 0.002061 min_lr: 0.002061 loss: 2.6662 (2.7246) weight_decay: 0.0500 (0.0500) time: 0.7316 data: 0.0003 max mem: 57114 Epoch: [205] [ 30/156] eta: 0:01:42 lr: 0.002058 min_lr: 0.002058 loss: 2.8829 (2.7725) weight_decay: 0.0500 (0.0500) time: 0.7242 data: 0.0003 max mem: 57114 Epoch: [205] [ 40/156] eta: 0:01:32 lr: 0.002056 min_lr: 0.002056 loss: 2.8829 (2.7704) weight_decay: 0.0500 (0.0500) time: 0.7319 data: 0.0004 max mem: 57114 Epoch: [205] [ 50/156] eta: 0:01:23 lr: 0.002053 min_lr: 0.002053 loss: 2.8606 (2.7623) weight_decay: 0.0500 (0.0500) time: 0.7408 data: 0.0004 max mem: 57114 Epoch: [205] [ 60/156] eta: 0:01:14 lr: 0.002051 min_lr: 0.002051 loss: 2.6102 (2.7556) weight_decay: 0.0500 (0.0500) time: 0.7146 data: 0.0003 max mem: 57114 Epoch: [205] [ 70/156] eta: 0:01:05 lr: 0.002048 min_lr: 0.002048 loss: 2.6113 (2.7602) weight_decay: 0.0500 (0.0500) time: 0.7056 data: 0.0003 max mem: 57114 Epoch: [205] [ 80/156] eta: 0:00:57 lr: 0.002046 min_lr: 0.002046 loss: 2.9097 (2.7888) weight_decay: 0.0500 (0.0500) time: 0.7188 data: 0.0004 max mem: 57114 Epoch: [205] [ 90/156] eta: 0:00:49 lr: 0.002043 min_lr: 0.002043 loss: 2.9515 (2.8096) weight_decay: 0.0500 (0.0500) time: 0.7100 data: 0.0004 max mem: 57114 Epoch: [205] [100/156] eta: 0:00:41 lr: 0.002041 min_lr: 0.002041 loss: 3.0389 (2.8234) weight_decay: 0.0500 (0.0500) time: 0.7060 data: 0.0004 max mem: 57114 Epoch: [205] [110/156] eta: 0:00:34 lr: 0.002038 min_lr: 0.002038 loss: 2.8946 (2.8172) weight_decay: 0.0500 (0.0500) time: 0.7121 data: 0.0004 max mem: 57114 Epoch: [205] [120/156] eta: 0:00:26 lr: 0.002036 min_lr: 0.002036 loss: 2.9026 (2.8285) weight_decay: 0.0500 (0.0500) time: 0.7020 data: 0.0003 max mem: 57114 Epoch: [205] [130/156] eta: 0:00:19 lr: 0.002033 min_lr: 0.002033 loss: 3.0006 (2.8440) weight_decay: 0.0500 (0.0500) time: 0.6902 data: 0.0008 max mem: 57114 Epoch: [205] [140/156] eta: 0:00:11 lr: 0.002031 min_lr: 0.002031 loss: 3.0377 (2.8579) weight_decay: 0.0500 (0.0500) time: 0.6854 data: 0.0008 max mem: 57114 Epoch: [205] [150/156] eta: 0:00:04 lr: 0.002028 min_lr: 0.002028 loss: 3.0946 (2.8685) weight_decay: 0.0500 (0.0500) time: 0.6838 data: 0.0001 max mem: 57114 Epoch: [205] [155/156] eta: 0:00:00 lr: 0.002027 min_lr: 0.002027 loss: 2.9352 (2.8619) weight_decay: 0.0500 (0.0500) time: 0.6865 data: 0.0001 max mem: 57114 Epoch: [205] Total time: 0:01:53 (0.7301 s / it) Averaged stats: lr: 0.002027 min_lr: 0.002027 loss: 2.9352 (2.8583) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6436 (0.6436) acc1: 89.5833 (89.5833) acc5: 98.9583 (98.9583) time: 2.1166 data: 1.8605 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9419 (0.8389) acc1: 82.2917 (82.7366) acc5: 96.8750 (96.8031) time: 0.5741 data: 0.3722 max mem: 57114 Test: Total time: 0:00:02 (0.5967 s / it) * Acc@1 82.049 Acc@5 96.110 loss 0.926 Accuracy of the model on the 50000 test images: 82.0% Max accuracy: 82.41% Test: [0/5] eta: 0:00:12 loss: 0.5991 (0.5991) acc1: 84.3750 (84.3750) acc5: 96.8750 (96.8750) time: 2.4856 data: 2.2421 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7233 (0.6940) acc1: 79.6875 (78.2609) acc5: 95.8333 (93.8619) time: 0.6485 data: 0.4487 max mem: 57114 Test: Total time: 0:00:03 (0.6622 s / it) * Acc@1 76.914 Acc@5 93.694 loss 0.841 Accuracy of the model EMA on 50000 test images: 76.9% Max EMA accuracy: 76.91% Epoch: [206] [ 0/156] eta: 0:08:26 lr: 0.002027 min_lr: 0.002027 loss: 3.0778 (3.0778) weight_decay: 0.0500 (0.0500) time: 3.2439 data: 2.5953 max mem: 57114 Epoch: [206] [ 10/156] eta: 0:02:18 lr: 0.002024 min_lr: 0.002024 loss: 3.0778 (2.9486) weight_decay: 0.0500 (0.0500) time: 0.9468 data: 0.2364 max mem: 57114 Epoch: [206] [ 20/156] eta: 0:01:55 lr: 0.002022 min_lr: 0.002022 loss: 3.0514 (2.9435) weight_decay: 0.0500 (0.0500) time: 0.7292 data: 0.0004 max mem: 57114 Epoch: [206] [ 30/156] eta: 0:01:41 lr: 0.002019 min_lr: 0.002019 loss: 3.0537 (2.9865) weight_decay: 0.0500 (0.0500) time: 0.7262 data: 0.0004 max mem: 57114 Epoch: [206] [ 40/156] eta: 0:01:30 lr: 0.002017 min_lr: 0.002017 loss: 3.0949 (2.9787) weight_decay: 0.0500 (0.0500) time: 0.7117 data: 0.0004 max mem: 57114 Epoch: [206] [ 50/156] eta: 0:01:21 lr: 0.002014 min_lr: 0.002014 loss: 3.0979 (2.9714) weight_decay: 0.0500 (0.0500) time: 0.7129 data: 0.0004 max mem: 57114 Epoch: [206] [ 60/156] eta: 0:01:13 lr: 0.002012 min_lr: 0.002012 loss: 3.0845 (2.9682) weight_decay: 0.0500 (0.0500) time: 0.7175 data: 0.0004 max mem: 57114 Epoch: [206] [ 70/156] eta: 0:01:04 lr: 0.002009 min_lr: 0.002009 loss: 2.9499 (2.9503) weight_decay: 0.0500 (0.0500) time: 0.7094 data: 0.0004 max mem: 57114 Epoch: [206] [ 80/156] eta: 0:00:56 lr: 0.002007 min_lr: 0.002007 loss: 2.8573 (2.9522) weight_decay: 0.0500 (0.0500) time: 0.7053 data: 0.0004 max mem: 57114 Epoch: [206] [ 90/156] eta: 0:00:48 lr: 0.002004 min_lr: 0.002004 loss: 2.9706 (2.9434) weight_decay: 0.0500 (0.0500) time: 0.7078 data: 0.0004 max mem: 57114 Epoch: [206] [100/156] eta: 0:00:41 lr: 0.002002 min_lr: 0.002002 loss: 2.8239 (2.9117) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.0003 max mem: 57114 Epoch: [206] [110/156] eta: 0:00:33 lr: 0.001999 min_lr: 0.001999 loss: 2.8878 (2.9139) weight_decay: 0.0500 (0.0500) time: 0.7026 data: 0.0003 max mem: 57114 Epoch: [206] [120/156] eta: 0:00:26 lr: 0.001997 min_lr: 0.001997 loss: 2.9648 (2.9099) weight_decay: 0.0500 (0.0500) time: 0.7059 data: 0.0003 max mem: 57114 Epoch: [206] [130/156] eta: 0:00:19 lr: 0.001994 min_lr: 0.001994 loss: 3.0898 (2.9113) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0008 max mem: 57114 Epoch: [206] [140/156] eta: 0:00:11 lr: 0.001992 min_lr: 0.001992 loss: 3.0875 (2.9075) weight_decay: 0.0500 (0.0500) time: 0.6919 data: 0.0007 max mem: 57114 Epoch: [206] [150/156] eta: 0:00:04 lr: 0.001989 min_lr: 0.001989 loss: 2.9916 (2.8966) weight_decay: 0.0500 (0.0500) time: 0.6889 data: 0.0001 max mem: 57114 Epoch: [206] [155/156] eta: 0:00:00 lr: 0.001988 min_lr: 0.001988 loss: 2.7660 (2.8930) weight_decay: 0.0500 (0.0500) time: 0.6850 data: 0.0001 max mem: 57114 Epoch: [206] Total time: 0:01:53 (0.7263 s / it) Averaged stats: lr: 0.001988 min_lr: 0.001988 loss: 2.7660 (2.8540) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6593 (0.6593) acc1: 86.9792 (86.9792) acc5: 99.4792 (99.4792) time: 2.0840 data: 1.8280 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9029 (0.7996) acc1: 83.8542 (82.7366) acc5: 97.9167 (97.3146) time: 0.5676 data: 0.3657 max mem: 57114 Test: Total time: 0:00:02 (0.5901 s / it) * Acc@1 82.447 Acc@5 96.350 loss 0.893 Accuracy of the model on the 50000 test images: 82.4% Max accuracy: 82.45% Test: [0/5] eta: 0:00:10 loss: 0.5978 (0.5978) acc1: 84.3750 (84.3750) acc5: 96.8750 (96.8750) time: 2.1061 data: 1.8626 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7210 (0.6919) acc1: 79.6875 (78.3887) acc5: 95.8333 (93.8619) time: 0.5718 data: 0.3726 max mem: 57114 Test: Total time: 0:00:02 (0.5814 s / it) * Acc@1 76.956 Acc@5 93.708 loss 0.838 Accuracy of the model EMA on 50000 test images: 77.0% Max EMA accuracy: 76.96% Epoch: [207] [ 0/156] eta: 0:08:25 lr: 0.001988 min_lr: 0.001988 loss: 3.2162 (3.2162) weight_decay: 0.0500 (0.0500) time: 3.2414 data: 2.5870 max mem: 57114 Epoch: [207] [ 10/156] eta: 0:02:17 lr: 0.001985 min_lr: 0.001985 loss: 3.0816 (3.0095) weight_decay: 0.0500 (0.0500) time: 0.9401 data: 0.2354 max mem: 57114 Epoch: [207] [ 20/156] eta: 0:01:54 lr: 0.001983 min_lr: 0.001983 loss: 3.0271 (2.9609) weight_decay: 0.0500 (0.0500) time: 0.7248 data: 0.0003 max mem: 57114 Epoch: [207] [ 30/156] eta: 0:01:42 lr: 0.001980 min_lr: 0.001980 loss: 3.0578 (3.0083) weight_decay: 0.0500 (0.0500) time: 0.7425 data: 0.0004 max mem: 57114 Epoch: [207] [ 40/156] eta: 0:01:32 lr: 0.001978 min_lr: 0.001978 loss: 3.1191 (2.9845) weight_decay: 0.0500 (0.0500) time: 0.7453 data: 0.0004 max mem: 57114 Epoch: [207] [ 50/156] eta: 0:01:22 lr: 0.001975 min_lr: 0.001975 loss: 3.1153 (2.9966) weight_decay: 0.0500 (0.0500) time: 0.7361 data: 0.0004 max mem: 57114 Epoch: [207] [ 60/156] eta: 0:01:14 lr: 0.001973 min_lr: 0.001973 loss: 3.0331 (2.9686) weight_decay: 0.0500 (0.0500) time: 0.7281 data: 0.0004 max mem: 57114 Epoch: [207] [ 70/156] eta: 0:01:06 lr: 0.001970 min_lr: 0.001970 loss: 2.9175 (2.9576) weight_decay: 0.0500 (0.0500) time: 0.7293 data: 0.0004 max mem: 57114 Epoch: [207] [ 80/156] eta: 0:00:57 lr: 0.001968 min_lr: 0.001968 loss: 2.7604 (2.9210) weight_decay: 0.0500 (0.0500) time: 0.7123 data: 0.0004 max mem: 57114 Epoch: [207] [ 90/156] eta: 0:00:49 lr: 0.001965 min_lr: 0.001965 loss: 2.8384 (2.9170) weight_decay: 0.0500 (0.0500) time: 0.6956 data: 0.0004 max mem: 57114 Epoch: [207] [100/156] eta: 0:00:41 lr: 0.001963 min_lr: 0.001963 loss: 2.9829 (2.9128) weight_decay: 0.0500 (0.0500) time: 0.7069 data: 0.0004 max mem: 57114 Epoch: [207] [110/156] eta: 0:00:34 lr: 0.001961 min_lr: 0.001961 loss: 2.9577 (2.8994) weight_decay: 0.0500 (0.0500) time: 0.7149 data: 0.0004 max mem: 57114 Epoch: [207] [120/156] eta: 0:00:26 lr: 0.001958 min_lr: 0.001958 loss: 2.9442 (2.9067) weight_decay: 0.0500 (0.0500) time: 0.7081 data: 0.0004 max mem: 57114 Epoch: [207] [130/156] eta: 0:00:19 lr: 0.001956 min_lr: 0.001956 loss: 2.9442 (2.9045) weight_decay: 0.0500 (0.0500) time: 0.7021 data: 0.0009 max mem: 57114 Epoch: [207] [140/156] eta: 0:00:11 lr: 0.001953 min_lr: 0.001953 loss: 2.9117 (2.8904) weight_decay: 0.0500 (0.0500) time: 0.6895 data: 0.0007 max mem: 57114 Epoch: [207] [150/156] eta: 0:00:04 lr: 0.001951 min_lr: 0.001951 loss: 3.0935 (2.9118) weight_decay: 0.0500 (0.0500) time: 0.6833 data: 0.0001 max mem: 57114 Epoch: [207] [155/156] eta: 0:00:00 lr: 0.001949 min_lr: 0.001949 loss: 3.0935 (2.9017) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.0001 max mem: 57114 Epoch: [207] Total time: 0:01:54 (0.7320 s / it) Averaged stats: lr: 0.001949 min_lr: 0.001949 loss: 3.0935 (2.8436) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.6591 (0.6591) acc1: 90.1042 (90.1042) acc5: 98.9583 (98.9583) time: 2.2670 data: 2.0112 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8585 (0.8118) acc1: 81.2500 (81.7136) acc5: 96.8750 (96.5473) time: 0.6040 data: 0.4023 max mem: 57114 Test: Total time: 0:00:03 (0.6272 s / it) * Acc@1 82.171 Acc@5 96.164 loss 0.896 Accuracy of the model on the 50000 test images: 82.2% Max accuracy: 82.45% Test: [0/5] eta: 0:00:12 loss: 0.5962 (0.5962) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.5014 data: 2.2576 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7190 (0.6899) acc1: 79.6875 (78.3887) acc5: 95.8333 (93.9898) time: 0.6511 data: 0.4517 max mem: 57114 Test: Total time: 0:00:03 (0.6657 s / it) * Acc@1 76.988 Acc@5 93.742 loss 0.836 Accuracy of the model EMA on 50000 test images: 77.0% Max EMA accuracy: 76.99% Epoch: [208] [ 0/156] eta: 0:06:30 lr: 0.001949 min_lr: 0.001949 loss: 2.3653 (2.3653) weight_decay: 0.0500 (0.0500) time: 2.5022 data: 1.8492 max mem: 57114 Epoch: [208] [ 10/156] eta: 0:02:09 lr: 0.001947 min_lr: 0.001947 loss: 2.6498 (2.7259) weight_decay: 0.0500 (0.0500) time: 0.8863 data: 0.1684 max mem: 57114 Epoch: [208] [ 20/156] eta: 0:01:50 lr: 0.001944 min_lr: 0.001944 loss: 2.8921 (2.7735) weight_decay: 0.0500 (0.0500) time: 0.7245 data: 0.0003 max mem: 57114 Epoch: [208] [ 30/156] eta: 0:01:38 lr: 0.001942 min_lr: 0.001942 loss: 2.9192 (2.8188) weight_decay: 0.0500 (0.0500) time: 0.7241 data: 0.0003 max mem: 57114 Epoch: [208] [ 40/156] eta: 0:01:29 lr: 0.001939 min_lr: 0.001939 loss: 2.9192 (2.7795) weight_decay: 0.0500 (0.0500) time: 0.7247 data: 0.0004 max mem: 57114 Epoch: [208] [ 50/156] eta: 0:01:20 lr: 0.001937 min_lr: 0.001937 loss: 2.5062 (2.7563) weight_decay: 0.0500 (0.0500) time: 0.7299 data: 0.0004 max mem: 57114 Epoch: [208] [ 60/156] eta: 0:01:12 lr: 0.001934 min_lr: 0.001934 loss: 2.7591 (2.7825) weight_decay: 0.0500 (0.0500) time: 0.7279 data: 0.0004 max mem: 57114 Epoch: [208] [ 70/156] eta: 0:01:04 lr: 0.001932 min_lr: 0.001932 loss: 2.7995 (2.7872) weight_decay: 0.0500 (0.0500) time: 0.7124 data: 0.0004 max mem: 57114 Epoch: [208] [ 80/156] eta: 0:00:56 lr: 0.001929 min_lr: 0.001929 loss: 2.7038 (2.7863) weight_decay: 0.0500 (0.0500) time: 0.7076 data: 0.0004 max mem: 57114 Epoch: [208] [ 90/156] eta: 0:00:48 lr: 0.001927 min_lr: 0.001927 loss: 2.7950 (2.7907) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0004 max mem: 57114 Epoch: [208] [100/156] eta: 0:00:41 lr: 0.001925 min_lr: 0.001925 loss: 2.8195 (2.7903) weight_decay: 0.0500 (0.0500) time: 0.7033 data: 0.0004 max mem: 57114 Epoch: [208] [110/156] eta: 0:00:33 lr: 0.001922 min_lr: 0.001922 loss: 2.8155 (2.7735) weight_decay: 0.0500 (0.0500) time: 0.6991 data: 0.0004 max mem: 57114 Epoch: [208] [120/156] eta: 0:00:26 lr: 0.001920 min_lr: 0.001920 loss: 2.9433 (2.7944) weight_decay: 0.0500 (0.0500) time: 0.7022 data: 0.0004 max mem: 57114 Epoch: [208] [130/156] eta: 0:00:18 lr: 0.001917 min_lr: 0.001917 loss: 2.9844 (2.8112) weight_decay: 0.0500 (0.0500) time: 0.6950 data: 0.0009 max mem: 57114 Epoch: [208] [140/156] eta: 0:00:11 lr: 0.001915 min_lr: 0.001915 loss: 2.9073 (2.8151) weight_decay: 0.0500 (0.0500) time: 0.6819 data: 0.0007 max mem: 57114 Epoch: [208] [150/156] eta: 0:00:04 lr: 0.001912 min_lr: 0.001912 loss: 2.7715 (2.8060) weight_decay: 0.0500 (0.0500) time: 0.6817 data: 0.0001 max mem: 57114 Epoch: [208] [155/156] eta: 0:00:00 lr: 0.001911 min_lr: 0.001911 loss: 2.5928 (2.8007) weight_decay: 0.0500 (0.0500) time: 0.6808 data: 0.0001 max mem: 57114 Epoch: [208] Total time: 0:01:52 (0.7214 s / it) Averaged stats: lr: 0.001911 min_lr: 0.001911 loss: 2.5928 (2.8373) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6579 (0.6579) acc1: 86.9792 (86.9792) acc5: 98.9583 (98.9583) time: 2.1693 data: 1.9134 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8435 (0.7695) acc1: 82.8125 (82.8645) acc5: 96.8750 (96.9309) time: 0.5845 data: 0.3827 max mem: 57114 Test: Total time: 0:00:03 (0.6103 s / it) * Acc@1 82.151 Acc@5 96.112 loss 0.859 Accuracy of the model on the 50000 test images: 82.2% Max accuracy: 82.45% Test: [0/5] eta: 0:00:11 loss: 0.5953 (0.5953) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.3392 data: 2.0958 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7171 (0.6882) acc1: 80.2083 (78.5166) acc5: 96.3542 (94.1177) time: 0.6185 data: 0.4192 max mem: 57114 Test: Total time: 0:00:03 (0.6324 s / it) * Acc@1 77.040 Acc@5 93.756 loss 0.833 Accuracy of the model EMA on 50000 test images: 77.0% Max EMA accuracy: 77.04% Epoch: [209] [ 0/156] eta: 0:05:55 lr: 0.001911 min_lr: 0.001911 loss: 3.2205 (3.2205) weight_decay: 0.0500 (0.0500) time: 2.2790 data: 1.6312 max mem: 57114 Epoch: [209] [ 10/156] eta: 0:02:06 lr: 0.001908 min_lr: 0.001908 loss: 2.9805 (2.7906) weight_decay: 0.0500 (0.0500) time: 0.8667 data: 0.1485 max mem: 57114 Epoch: [209] [ 20/156] eta: 0:01:49 lr: 0.001906 min_lr: 0.001906 loss: 2.6608 (2.7588) weight_decay: 0.0500 (0.0500) time: 0.7304 data: 0.0003 max mem: 57114 Epoch: [209] [ 30/156] eta: 0:01:38 lr: 0.001903 min_lr: 0.001903 loss: 2.8798 (2.7739) weight_decay: 0.0500 (0.0500) time: 0.7300 data: 0.0004 max mem: 57114 Epoch: [209] [ 40/156] eta: 0:01:28 lr: 0.001901 min_lr: 0.001901 loss: 3.1848 (2.8489) weight_decay: 0.0500 (0.0500) time: 0.7244 data: 0.0004 max mem: 57114 Epoch: [209] [ 50/156] eta: 0:01:20 lr: 0.001899 min_lr: 0.001899 loss: 2.9932 (2.8273) weight_decay: 0.0500 (0.0500) time: 0.7211 data: 0.0004 max mem: 57114 Epoch: [209] [ 60/156] eta: 0:01:12 lr: 0.001896 min_lr: 0.001896 loss: 2.8371 (2.8232) weight_decay: 0.0500 (0.0500) time: 0.7236 data: 0.0004 max mem: 57114 Epoch: [209] [ 70/156] eta: 0:01:04 lr: 0.001894 min_lr: 0.001894 loss: 2.8855 (2.8408) weight_decay: 0.0500 (0.0500) time: 0.7234 data: 0.0004 max mem: 57114 Epoch: [209] [ 80/156] eta: 0:00:56 lr: 0.001891 min_lr: 0.001891 loss: 3.0248 (2.8516) weight_decay: 0.0500 (0.0500) time: 0.7129 data: 0.0004 max mem: 57114 Epoch: [209] [ 90/156] eta: 0:00:48 lr: 0.001889 min_lr: 0.001889 loss: 2.8449 (2.8273) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0004 max mem: 57114 Epoch: [209] [100/156] eta: 0:00:41 lr: 0.001886 min_lr: 0.001886 loss: 2.6148 (2.8060) weight_decay: 0.0500 (0.0500) time: 0.7042 data: 0.0004 max mem: 57114 Epoch: [209] [110/156] eta: 0:00:33 lr: 0.001884 min_lr: 0.001884 loss: 2.8853 (2.8265) weight_decay: 0.0500 (0.0500) time: 0.7009 data: 0.0004 max mem: 57114 Epoch: [209] [120/156] eta: 0:00:26 lr: 0.001881 min_lr: 0.001881 loss: 3.0647 (2.8343) weight_decay: 0.0500 (0.0500) time: 0.6978 data: 0.0004 max mem: 57114 Epoch: [209] [130/156] eta: 0:00:18 lr: 0.001879 min_lr: 0.001879 loss: 3.0032 (2.8595) weight_decay: 0.0500 (0.0500) time: 0.6963 data: 0.0010 max mem: 57114 Epoch: [209] [140/156] eta: 0:00:11 lr: 0.001877 min_lr: 0.001877 loss: 2.9019 (2.8498) weight_decay: 0.0500 (0.0500) time: 0.6931 data: 0.0008 max mem: 57114 Epoch: [209] [150/156] eta: 0:00:04 lr: 0.001874 min_lr: 0.001874 loss: 2.7419 (2.8375) weight_decay: 0.0500 (0.0500) time: 0.6877 data: 0.0001 max mem: 57114 Epoch: [209] [155/156] eta: 0:00:00 lr: 0.001873 min_lr: 0.001873 loss: 2.7713 (2.8473) weight_decay: 0.0500 (0.0500) time: 0.6893 data: 0.0001 max mem: 57114 Epoch: [209] Total time: 0:01:52 (0.7225 s / it) Averaged stats: lr: 0.001873 min_lr: 0.001873 loss: 2.7713 (2.8363) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7393 (0.7393) acc1: 89.5833 (89.5833) acc5: 98.9583 (98.9583) time: 2.1062 data: 1.8476 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9426 (0.8733) acc1: 81.7708 (82.0972) acc5: 97.9167 (97.1867) time: 0.5720 data: 0.3696 max mem: 57114 Test: Total time: 0:00:02 (0.5960 s / it) * Acc@1 82.363 Acc@5 96.250 loss 0.954 Accuracy of the model on the 50000 test images: 82.4% Max accuracy: 82.45% Test: [0/5] eta: 0:00:11 loss: 0.5939 (0.5939) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2756 data: 2.0321 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7154 (0.6866) acc1: 80.7292 (78.6445) acc5: 96.3542 (94.1177) time: 0.6058 data: 0.4065 max mem: 57114 Test: Total time: 0:00:03 (0.6192 s / it) * Acc@1 77.108 Acc@5 93.770 loss 0.831 Accuracy of the model EMA on 50000 test images: 77.1% Max EMA accuracy: 77.11% Epoch: [210] [ 0/156] eta: 0:06:49 lr: 0.001873 min_lr: 0.001873 loss: 1.8876 (1.8876) weight_decay: 0.0500 (0.0500) time: 2.6278 data: 1.9814 max mem: 57114 Epoch: [210] [ 10/156] eta: 0:02:07 lr: 0.001870 min_lr: 0.001870 loss: 2.9087 (2.8264) weight_decay: 0.0500 (0.0500) time: 0.8740 data: 0.1804 max mem: 57114 Epoch: [210] [ 20/156] eta: 0:01:48 lr: 0.001868 min_lr: 0.001868 loss: 2.9087 (2.8784) weight_decay: 0.0500 (0.0500) time: 0.7095 data: 0.0004 max mem: 57114 Epoch: [210] [ 30/156] eta: 0:01:36 lr: 0.001865 min_lr: 0.001865 loss: 2.7975 (2.8876) weight_decay: 0.0500 (0.0500) time: 0.7113 data: 0.0004 max mem: 57114 Epoch: [210] [ 40/156] eta: 0:01:28 lr: 0.001863 min_lr: 0.001863 loss: 2.9561 (2.8899) weight_decay: 0.0500 (0.0500) time: 0.7215 data: 0.0004 max mem: 57114 Epoch: [210] [ 50/156] eta: 0:01:19 lr: 0.001860 min_lr: 0.001860 loss: 3.0177 (2.8810) weight_decay: 0.0500 (0.0500) time: 0.7230 data: 0.0004 max mem: 57114 Epoch: [210] [ 60/156] eta: 0:01:11 lr: 0.001858 min_lr: 0.001858 loss: 3.0304 (2.8936) weight_decay: 0.0500 (0.0500) time: 0.7191 data: 0.0004 max mem: 57114 Epoch: [210] [ 70/156] eta: 0:01:04 lr: 0.001856 min_lr: 0.001856 loss: 3.0808 (2.9262) weight_decay: 0.0500 (0.0500) time: 0.7275 data: 0.0004 max mem: 57114 Epoch: [210] [ 80/156] eta: 0:00:56 lr: 0.001853 min_lr: 0.001853 loss: 3.0093 (2.8901) weight_decay: 0.0500 (0.0500) time: 0.7163 data: 0.0004 max mem: 57114 Epoch: [210] [ 90/156] eta: 0:00:48 lr: 0.001851 min_lr: 0.001851 loss: 2.9949 (2.9149) weight_decay: 0.0500 (0.0500) time: 0.6992 data: 0.0004 max mem: 57114 Epoch: [210] [100/156] eta: 0:00:40 lr: 0.001848 min_lr: 0.001848 loss: 3.1043 (2.9181) weight_decay: 0.0500 (0.0500) time: 0.6958 data: 0.0004 max mem: 57114 Epoch: [210] [110/156] eta: 0:00:33 lr: 0.001846 min_lr: 0.001846 loss: 3.0506 (2.9087) weight_decay: 0.0500 (0.0500) time: 0.7088 data: 0.0004 max mem: 57114 Epoch: [210] [120/156] eta: 0:00:26 lr: 0.001843 min_lr: 0.001843 loss: 2.8883 (2.8875) weight_decay: 0.0500 (0.0500) time: 0.7120 data: 0.0004 max mem: 57114 Epoch: [210] [130/156] eta: 0:00:18 lr: 0.001841 min_lr: 0.001841 loss: 2.8476 (2.8786) weight_decay: 0.0500 (0.0500) time: 0.6991 data: 0.0009 max mem: 57114 Epoch: [210] [140/156] eta: 0:00:11 lr: 0.001839 min_lr: 0.001839 loss: 3.0155 (2.8684) weight_decay: 0.0500 (0.0500) time: 0.6918 data: 0.0007 max mem: 57114 Epoch: [210] [150/156] eta: 0:00:04 lr: 0.001836 min_lr: 0.001836 loss: 2.9874 (2.8691) weight_decay: 0.0500 (0.0500) time: 0.6874 data: 0.0001 max mem: 57114 Epoch: [210] [155/156] eta: 0:00:00 lr: 0.001835 min_lr: 0.001835 loss: 2.9272 (2.8727) weight_decay: 0.0500 (0.0500) time: 0.6844 data: 0.0001 max mem: 57114 Epoch: [210] Total time: 0:01:52 (0.7212 s / it) Averaged stats: lr: 0.001835 min_lr: 0.001835 loss: 2.9272 (2.8311) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7164 (0.7164) acc1: 88.0208 (88.0208) acc5: 98.9583 (98.9583) time: 2.0817 data: 1.8263 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9071 (0.8340) acc1: 82.8125 (83.7596) acc5: 98.4375 (96.9309) time: 0.5670 data: 0.3653 max mem: 57114 Test: Total time: 0:00:02 (0.5909 s / it) * Acc@1 82.451 Acc@5 96.344 loss 0.933 Accuracy of the model on the 50000 test images: 82.5% Max accuracy: 82.45% Test: [0/5] eta: 0:00:10 loss: 0.5924 (0.5924) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.0267 data: 1.7832 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7134 (0.6848) acc1: 80.7292 (78.7724) acc5: 96.3542 (94.1177) time: 0.5560 data: 0.3567 max mem: 57114 Test: Total time: 0:00:02 (0.5687 s / it) * Acc@1 77.150 Acc@5 93.788 loss 0.829 Accuracy of the model EMA on 50000 test images: 77.1% Max EMA accuracy: 77.15% Epoch: [211] [ 0/156] eta: 0:06:46 lr: 0.001835 min_lr: 0.001835 loss: 2.2324 (2.2324) weight_decay: 0.0500 (0.0500) time: 2.6040 data: 1.9459 max mem: 57114 Epoch: [211] [ 10/156] eta: 0:02:12 lr: 0.001832 min_lr: 0.001832 loss: 2.8865 (2.7494) weight_decay: 0.0500 (0.0500) time: 0.9070 data: 0.1771 max mem: 57114 Epoch: [211] [ 20/156] eta: 0:01:51 lr: 0.001830 min_lr: 0.001830 loss: 2.9698 (2.8458) weight_decay: 0.0500 (0.0500) time: 0.7317 data: 0.0003 max mem: 57114 Epoch: [211] [ 30/156] eta: 0:01:40 lr: 0.001828 min_lr: 0.001828 loss: 2.9698 (2.8725) weight_decay: 0.0500 (0.0500) time: 0.7318 data: 0.0004 max mem: 57114 Epoch: [211] [ 40/156] eta: 0:01:30 lr: 0.001825 min_lr: 0.001825 loss: 2.7753 (2.8481) weight_decay: 0.0500 (0.0500) time: 0.7320 data: 0.0004 max mem: 57114 Epoch: [211] [ 50/156] eta: 0:01:21 lr: 0.001823 min_lr: 0.001823 loss: 2.7357 (2.8277) weight_decay: 0.0500 (0.0500) time: 0.7367 data: 0.0004 max mem: 57114 Epoch: [211] [ 60/156] eta: 0:01:13 lr: 0.001820 min_lr: 0.001820 loss: 2.9037 (2.8149) weight_decay: 0.0500 (0.0500) time: 0.7411 data: 0.0004 max mem: 57114 Epoch: [211] [ 70/156] eta: 0:01:05 lr: 0.001818 min_lr: 0.001818 loss: 2.6284 (2.7782) weight_decay: 0.0500 (0.0500) time: 0.7327 data: 0.0004 max mem: 57114 Epoch: [211] [ 80/156] eta: 0:00:57 lr: 0.001815 min_lr: 0.001815 loss: 2.7352 (2.7930) weight_decay: 0.0500 (0.0500) time: 0.7195 data: 0.0004 max mem: 57114 Epoch: [211] [ 90/156] eta: 0:00:49 lr: 0.001813 min_lr: 0.001813 loss: 2.8236 (2.7991) weight_decay: 0.0500 (0.0500) time: 0.7054 data: 0.0004 max mem: 57114 Epoch: [211] [100/156] eta: 0:00:41 lr: 0.001811 min_lr: 0.001811 loss: 2.8873 (2.8024) weight_decay: 0.0500 (0.0500) time: 0.7038 data: 0.0004 max mem: 57114 Epoch: [211] [110/156] eta: 0:00:34 lr: 0.001808 min_lr: 0.001808 loss: 3.1638 (2.8285) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.0004 max mem: 57114 Epoch: [211] [120/156] eta: 0:00:26 lr: 0.001806 min_lr: 0.001806 loss: 3.0965 (2.8288) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [211] [130/156] eta: 0:00:19 lr: 0.001803 min_lr: 0.001803 loss: 2.9411 (2.8417) weight_decay: 0.0500 (0.0500) time: 0.7021 data: 0.0009 max mem: 57114 Epoch: [211] [140/156] eta: 0:00:11 lr: 0.001801 min_lr: 0.001801 loss: 2.9726 (2.8514) weight_decay: 0.0500 (0.0500) time: 0.6949 data: 0.0007 max mem: 57114 Epoch: [211] [150/156] eta: 0:00:04 lr: 0.001799 min_lr: 0.001799 loss: 2.9880 (2.8600) weight_decay: 0.0500 (0.0500) time: 0.6873 data: 0.0001 max mem: 57114 Epoch: [211] [155/156] eta: 0:00:00 lr: 0.001797 min_lr: 0.001797 loss: 2.9726 (2.8473) weight_decay: 0.0500 (0.0500) time: 0.6833 data: 0.0001 max mem: 57114 Epoch: [211] Total time: 0:01:53 (0.7295 s / it) Averaged stats: lr: 0.001797 min_lr: 0.001797 loss: 2.9726 (2.8316) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6444 (0.6444) acc1: 87.5000 (87.5000) acc5: 98.4375 (98.4375) time: 2.0453 data: 1.7895 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8285 (0.7840) acc1: 83.8542 (82.7366) acc5: 98.4375 (96.6752) time: 0.5597 data: 0.3580 max mem: 57114 Test: Total time: 0:00:02 (0.5842 s / it) * Acc@1 82.235 Acc@5 96.120 loss 0.872 Accuracy of the model on the 50000 test images: 82.2% Max accuracy: 82.45% Test: [0/5] eta: 0:00:11 loss: 0.5910 (0.5910) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2922 data: 2.0487 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7116 (0.6832) acc1: 80.7292 (78.7724) acc5: 96.3542 (94.1177) time: 0.6091 data: 0.4098 max mem: 57114 Test: Total time: 0:00:03 (0.6206 s / it) * Acc@1 77.202 Acc@5 93.826 loss 0.827 Accuracy of the model EMA on 50000 test images: 77.2% Max EMA accuracy: 77.20% Epoch: [212] [ 0/156] eta: 0:06:25 lr: 0.001797 min_lr: 0.001797 loss: 2.5425 (2.5425) weight_decay: 0.0500 (0.0500) time: 2.4691 data: 1.8108 max mem: 57114 Epoch: [212] [ 10/156] eta: 0:02:06 lr: 0.001795 min_lr: 0.001795 loss: 2.9265 (2.8999) weight_decay: 0.0500 (0.0500) time: 0.8650 data: 0.1649 max mem: 57114 Epoch: [212] [ 20/156] eta: 0:01:48 lr: 0.001792 min_lr: 0.001792 loss: 2.9320 (2.8968) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0003 max mem: 57114 Epoch: [212] [ 30/156] eta: 0:01:36 lr: 0.001790 min_lr: 0.001790 loss: 2.8940 (2.8384) weight_decay: 0.0500 (0.0500) time: 0.7155 data: 0.0004 max mem: 57114 Epoch: [212] [ 40/156] eta: 0:01:27 lr: 0.001788 min_lr: 0.001788 loss: 2.9729 (2.8943) weight_decay: 0.0500 (0.0500) time: 0.7089 data: 0.0004 max mem: 57114 Epoch: [212] [ 50/156] eta: 0:01:19 lr: 0.001785 min_lr: 0.001785 loss: 3.0534 (2.8944) weight_decay: 0.0500 (0.0500) time: 0.7126 data: 0.0004 max mem: 57114 Epoch: [212] [ 60/156] eta: 0:01:11 lr: 0.001783 min_lr: 0.001783 loss: 2.9359 (2.8789) weight_decay: 0.0500 (0.0500) time: 0.7256 data: 0.0004 max mem: 57114 Epoch: [212] [ 70/156] eta: 0:01:03 lr: 0.001780 min_lr: 0.001780 loss: 2.7168 (2.8216) weight_decay: 0.0500 (0.0500) time: 0.7286 data: 0.0003 max mem: 57114 Epoch: [212] [ 80/156] eta: 0:00:56 lr: 0.001778 min_lr: 0.001778 loss: 2.6998 (2.8020) weight_decay: 0.0500 (0.0500) time: 0.7192 data: 0.0004 max mem: 57114 Epoch: [212] [ 90/156] eta: 0:00:48 lr: 0.001776 min_lr: 0.001776 loss: 2.8671 (2.8140) weight_decay: 0.0500 (0.0500) time: 0.7067 data: 0.0004 max mem: 57114 Epoch: [212] [100/156] eta: 0:00:40 lr: 0.001773 min_lr: 0.001773 loss: 2.9251 (2.8133) weight_decay: 0.0500 (0.0500) time: 0.7072 data: 0.0004 max mem: 57114 Epoch: [212] [110/156] eta: 0:00:33 lr: 0.001771 min_lr: 0.001771 loss: 2.9506 (2.8076) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.0004 max mem: 57114 Epoch: [212] [120/156] eta: 0:00:26 lr: 0.001768 min_lr: 0.001768 loss: 3.0144 (2.8126) weight_decay: 0.0500 (0.0500) time: 0.6975 data: 0.0003 max mem: 57114 Epoch: [212] [130/156] eta: 0:00:18 lr: 0.001766 min_lr: 0.001766 loss: 2.9154 (2.8157) weight_decay: 0.0500 (0.0500) time: 0.6976 data: 0.0008 max mem: 57114 Epoch: [212] [140/156] eta: 0:00:11 lr: 0.001764 min_lr: 0.001764 loss: 2.7521 (2.8109) weight_decay: 0.0500 (0.0500) time: 0.6971 data: 0.0007 max mem: 57114 Epoch: [212] [150/156] eta: 0:00:04 lr: 0.001761 min_lr: 0.001761 loss: 2.7204 (2.7982) weight_decay: 0.0500 (0.0500) time: 0.6869 data: 0.0001 max mem: 57114 Epoch: [212] [155/156] eta: 0:00:00 lr: 0.001760 min_lr: 0.001760 loss: 2.6853 (2.8029) weight_decay: 0.0500 (0.0500) time: 0.6863 data: 0.0001 max mem: 57114 Epoch: [212] Total time: 0:01:52 (0.7208 s / it) Averaged stats: lr: 0.001760 min_lr: 0.001760 loss: 2.6853 (2.8191) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5953 (0.5953) acc1: 91.1458 (91.1458) acc5: 98.9583 (98.9583) time: 2.0655 data: 1.8098 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8914 (0.7964) acc1: 81.2500 (82.8645) acc5: 97.3958 (96.6752) time: 0.5638 data: 0.3620 max mem: 57114 Test: Total time: 0:00:02 (0.5850 s / it) * Acc@1 82.491 Acc@5 96.144 loss 0.867 Accuracy of the model on the 50000 test images: 82.5% Max accuracy: 82.49% Test: [0/5] eta: 0:00:10 loss: 0.5903 (0.5903) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.0923 data: 1.8488 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7099 (0.6819) acc1: 80.7292 (78.9003) acc5: 96.3542 (94.1177) time: 0.5691 data: 0.3698 max mem: 57114 Test: Total time: 0:00:02 (0.5818 s / it) * Acc@1 77.246 Acc@5 93.858 loss 0.825 Accuracy of the model EMA on 50000 test images: 77.2% Max EMA accuracy: 77.25% Epoch: [213] [ 0/156] eta: 0:08:00 lr: 0.001760 min_lr: 0.001760 loss: 2.7628 (2.7628) weight_decay: 0.0500 (0.0500) time: 3.0796 data: 2.4280 max mem: 57114 Epoch: [213] [ 10/156] eta: 0:02:17 lr: 0.001757 min_lr: 0.001757 loss: 2.8211 (2.8692) weight_decay: 0.0500 (0.0500) time: 0.9402 data: 0.2210 max mem: 57114 Epoch: [213] [ 20/156] eta: 0:01:54 lr: 0.001755 min_lr: 0.001755 loss: 3.0785 (2.9435) weight_decay: 0.0500 (0.0500) time: 0.7320 data: 0.0004 max mem: 57114 Epoch: [213] [ 30/156] eta: 0:01:41 lr: 0.001753 min_lr: 0.001753 loss: 2.9407 (2.8709) weight_decay: 0.0500 (0.0500) time: 0.7351 data: 0.0004 max mem: 57114 Epoch: [213] [ 40/156] eta: 0:01:31 lr: 0.001750 min_lr: 0.001750 loss: 2.8129 (2.8800) weight_decay: 0.0500 (0.0500) time: 0.7239 data: 0.0003 max mem: 57114 Epoch: [213] [ 50/156] eta: 0:01:22 lr: 0.001748 min_lr: 0.001748 loss: 2.8129 (2.8463) weight_decay: 0.0500 (0.0500) time: 0.7226 data: 0.0004 max mem: 57114 Epoch: [213] [ 60/156] eta: 0:01:13 lr: 0.001746 min_lr: 0.001746 loss: 2.8774 (2.8615) weight_decay: 0.0500 (0.0500) time: 0.7245 data: 0.0004 max mem: 57114 Epoch: [213] [ 70/156] eta: 0:01:05 lr: 0.001743 min_lr: 0.001743 loss: 2.9847 (2.8758) weight_decay: 0.0500 (0.0500) time: 0.7127 data: 0.0004 max mem: 57114 Epoch: [213] [ 80/156] eta: 0:00:57 lr: 0.001741 min_lr: 0.001741 loss: 2.8885 (2.8756) weight_decay: 0.0500 (0.0500) time: 0.7147 data: 0.0004 max mem: 57114 Epoch: [213] [ 90/156] eta: 0:00:49 lr: 0.001738 min_lr: 0.001738 loss: 2.9550 (2.8852) weight_decay: 0.0500 (0.0500) time: 0.7184 data: 0.0004 max mem: 57114 Epoch: [213] [100/156] eta: 0:00:41 lr: 0.001736 min_lr: 0.001736 loss: 2.9926 (2.8886) weight_decay: 0.0500 (0.0500) time: 0.7088 data: 0.0004 max mem: 57114 Epoch: [213] [110/156] eta: 0:00:34 lr: 0.001734 min_lr: 0.001734 loss: 2.9075 (2.8819) weight_decay: 0.0500 (0.0500) time: 0.6985 data: 0.0003 max mem: 57114 Epoch: [213] [120/156] eta: 0:00:26 lr: 0.001731 min_lr: 0.001731 loss: 2.7935 (2.8783) weight_decay: 0.0500 (0.0500) time: 0.6997 data: 0.0004 max mem: 57114 Epoch: [213] [130/156] eta: 0:00:19 lr: 0.001729 min_lr: 0.001729 loss: 2.8508 (2.8825) weight_decay: 0.0500 (0.0500) time: 0.7009 data: 0.0009 max mem: 57114 Epoch: [213] [140/156] eta: 0:00:11 lr: 0.001727 min_lr: 0.001727 loss: 2.8508 (2.8666) weight_decay: 0.0500 (0.0500) time: 0.6892 data: 0.0007 max mem: 57114 Epoch: [213] [150/156] eta: 0:00:04 lr: 0.001724 min_lr: 0.001724 loss: 2.8140 (2.8604) weight_decay: 0.0500 (0.0500) time: 0.6835 data: 0.0001 max mem: 57114 Epoch: [213] [155/156] eta: 0:00:00 lr: 0.001723 min_lr: 0.001723 loss: 2.8597 (2.8644) weight_decay: 0.0500 (0.0500) time: 0.6809 data: 0.0001 max mem: 57114 Epoch: [213] Total time: 0:01:53 (0.7277 s / it) Averaged stats: lr: 0.001723 min_lr: 0.001723 loss: 2.8597 (2.8138) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6350 (0.6350) acc1: 88.5417 (88.5417) acc5: 98.9583 (98.9583) time: 2.0278 data: 1.7716 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8248 (0.7654) acc1: 84.8958 (83.1202) acc5: 97.3958 (97.3146) time: 0.5563 data: 0.3544 max mem: 57114 Test: Total time: 0:00:02 (0.5779 s / it) * Acc@1 82.587 Acc@5 96.266 loss 0.864 Accuracy of the model on the 50000 test images: 82.6% Max accuracy: 82.59% Test: [0/5] eta: 0:00:09 loss: 0.5895 (0.5895) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 1.9865 data: 1.7430 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7082 (0.6806) acc1: 80.7292 (78.9003) acc5: 96.3542 (94.1177) time: 0.5479 data: 0.3487 max mem: 57114 Test: Total time: 0:00:02 (0.5583 s / it) * Acc@1 77.280 Acc@5 93.858 loss 0.823 Accuracy of the model EMA on 50000 test images: 77.3% Max EMA accuracy: 77.28% Epoch: [214] [ 0/156] eta: 0:07:03 lr: 0.001723 min_lr: 0.001723 loss: 3.1968 (3.1968) weight_decay: 0.0500 (0.0500) time: 2.7135 data: 2.0637 max mem: 57114 Epoch: [214] [ 10/156] eta: 0:02:11 lr: 0.001720 min_lr: 0.001720 loss: 2.8807 (2.7945) weight_decay: 0.0500 (0.0500) time: 0.8992 data: 0.1879 max mem: 57114 Epoch: [214] [ 20/156] eta: 0:01:51 lr: 0.001718 min_lr: 0.001718 loss: 2.8179 (2.7614) weight_decay: 0.0500 (0.0500) time: 0.7240 data: 0.0003 max mem: 57114 Epoch: [214] [ 30/156] eta: 0:01:38 lr: 0.001716 min_lr: 0.001716 loss: 2.5409 (2.6516) weight_decay: 0.0500 (0.0500) time: 0.7149 data: 0.0004 max mem: 57114 Epoch: [214] [ 40/156] eta: 0:01:29 lr: 0.001713 min_lr: 0.001713 loss: 2.7182 (2.6841) weight_decay: 0.0500 (0.0500) time: 0.7169 data: 0.0004 max mem: 57114 Epoch: [214] [ 50/156] eta: 0:01:21 lr: 0.001711 min_lr: 0.001711 loss: 2.7617 (2.6942) weight_decay: 0.0500 (0.0500) time: 0.7407 data: 0.0004 max mem: 57114 Epoch: [214] [ 60/156] eta: 0:01:12 lr: 0.001709 min_lr: 0.001709 loss: 2.7617 (2.7030) weight_decay: 0.0500 (0.0500) time: 0.7360 data: 0.0004 max mem: 57114 Epoch: [214] [ 70/156] eta: 0:01:04 lr: 0.001706 min_lr: 0.001706 loss: 2.8029 (2.7121) weight_decay: 0.0500 (0.0500) time: 0.7240 data: 0.0003 max mem: 57114 Epoch: [214] [ 80/156] eta: 0:00:57 lr: 0.001704 min_lr: 0.001704 loss: 2.8139 (2.7303) weight_decay: 0.0500 (0.0500) time: 0.7302 data: 0.0003 max mem: 57114 Epoch: [214] [ 90/156] eta: 0:00:49 lr: 0.001702 min_lr: 0.001702 loss: 2.9338 (2.7680) weight_decay: 0.0500 (0.0500) time: 0.7218 data: 0.0004 max mem: 57114 Epoch: [214] [100/156] eta: 0:00:41 lr: 0.001699 min_lr: 0.001699 loss: 3.0048 (2.7920) weight_decay: 0.0500 (0.0500) time: 0.7064 data: 0.0004 max mem: 57114 Epoch: [214] [110/156] eta: 0:00:34 lr: 0.001697 min_lr: 0.001697 loss: 2.9215 (2.7781) weight_decay: 0.0500 (0.0500) time: 0.7066 data: 0.0004 max mem: 57114 Epoch: [214] [120/156] eta: 0:00:26 lr: 0.001695 min_lr: 0.001695 loss: 2.9665 (2.7995) weight_decay: 0.0500 (0.0500) time: 0.7046 data: 0.0004 max mem: 57114 Epoch: [214] [130/156] eta: 0:00:19 lr: 0.001692 min_lr: 0.001692 loss: 3.0128 (2.8098) weight_decay: 0.0500 (0.0500) time: 0.7066 data: 0.0008 max mem: 57114 Epoch: [214] [140/156] eta: 0:00:11 lr: 0.001690 min_lr: 0.001690 loss: 3.0215 (2.8177) weight_decay: 0.0500 (0.0500) time: 0.6969 data: 0.0007 max mem: 57114 Epoch: [214] [150/156] eta: 0:00:04 lr: 0.001688 min_lr: 0.001688 loss: 3.0253 (2.8284) weight_decay: 0.0500 (0.0500) time: 0.6816 data: 0.0001 max mem: 57114 Epoch: [214] [155/156] eta: 0:00:00 lr: 0.001686 min_lr: 0.001686 loss: 2.9849 (2.8256) weight_decay: 0.0500 (0.0500) time: 0.6769 data: 0.0001 max mem: 57114 Epoch: [214] Total time: 0:01:53 (0.7276 s / it) Averaged stats: lr: 0.001686 min_lr: 0.001686 loss: 2.9849 (2.8047) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6697 (0.6697) acc1: 89.0625 (89.0625) acc5: 98.4375 (98.4375) time: 2.0978 data: 1.8419 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8195 (0.8152) acc1: 82.8125 (81.4578) acc5: 97.9167 (97.0588) time: 0.5703 data: 0.3684 max mem: 57114 Test: Total time: 0:00:02 (0.5916 s / it) * Acc@1 82.121 Acc@5 96.076 loss 0.889 Accuracy of the model on the 50000 test images: 82.1% Max accuracy: 82.59% Test: [0/5] eta: 0:00:11 loss: 0.5888 (0.5888) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.3700 data: 2.1264 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7068 (0.6796) acc1: 80.7292 (78.9003) acc5: 96.3542 (94.1177) time: 0.6247 data: 0.4254 max mem: 57114 Test: Total time: 0:00:03 (0.6361 s / it) * Acc@1 77.292 Acc@5 93.874 loss 0.822 Accuracy of the model EMA on 50000 test images: 77.3% Max EMA accuracy: 77.29% Epoch: [215] [ 0/156] eta: 0:06:55 lr: 0.001686 min_lr: 0.001686 loss: 2.9261 (2.9261) weight_decay: 0.0500 (0.0500) time: 2.6639 data: 2.0164 max mem: 57114 Epoch: [215] [ 10/156] eta: 0:02:12 lr: 0.001684 min_lr: 0.001684 loss: 2.9261 (2.8429) weight_decay: 0.0500 (0.0500) time: 0.9053 data: 0.1835 max mem: 57114 Epoch: [215] [ 20/156] eta: 0:01:51 lr: 0.001681 min_lr: 0.001681 loss: 3.0051 (2.9248) weight_decay: 0.0500 (0.0500) time: 0.7306 data: 0.0003 max mem: 57114 Epoch: [215] [ 30/156] eta: 0:01:38 lr: 0.001679 min_lr: 0.001679 loss: 2.9810 (2.8710) weight_decay: 0.0500 (0.0500) time: 0.7172 data: 0.0004 max mem: 57114 Epoch: [215] [ 40/156] eta: 0:01:29 lr: 0.001677 min_lr: 0.001677 loss: 2.9570 (2.8840) weight_decay: 0.0500 (0.0500) time: 0.7125 data: 0.0004 max mem: 57114 Epoch: [215] [ 50/156] eta: 0:01:20 lr: 0.001674 min_lr: 0.001674 loss: 3.0666 (2.8646) weight_decay: 0.0500 (0.0500) time: 0.7241 data: 0.0004 max mem: 57114 Epoch: [215] [ 60/156] eta: 0:01:12 lr: 0.001672 min_lr: 0.001672 loss: 2.9298 (2.8645) weight_decay: 0.0500 (0.0500) time: 0.7162 data: 0.0004 max mem: 57114 Epoch: [215] [ 70/156] eta: 0:01:04 lr: 0.001670 min_lr: 0.001670 loss: 2.9803 (2.8748) weight_decay: 0.0500 (0.0500) time: 0.7120 data: 0.0004 max mem: 57114 Epoch: [215] [ 80/156] eta: 0:00:56 lr: 0.001667 min_lr: 0.001667 loss: 2.9275 (2.8565) weight_decay: 0.0500 (0.0500) time: 0.7131 data: 0.0004 max mem: 57114 Epoch: [215] [ 90/156] eta: 0:00:48 lr: 0.001665 min_lr: 0.001665 loss: 3.0788 (2.8874) weight_decay: 0.0500 (0.0500) time: 0.7045 data: 0.0004 max mem: 57114 Epoch: [215] [100/156] eta: 0:00:41 lr: 0.001663 min_lr: 0.001663 loss: 3.0542 (2.8607) weight_decay: 0.0500 (0.0500) time: 0.6975 data: 0.0004 max mem: 57114 Epoch: [215] [110/156] eta: 0:00:33 lr: 0.001660 min_lr: 0.001660 loss: 2.6133 (2.8330) weight_decay: 0.0500 (0.0500) time: 0.6937 data: 0.0004 max mem: 57114 Epoch: [215] [120/156] eta: 0:00:26 lr: 0.001658 min_lr: 0.001658 loss: 2.7857 (2.8333) weight_decay: 0.0500 (0.0500) time: 0.7066 data: 0.0004 max mem: 57114 Epoch: [215] [130/156] eta: 0:00:18 lr: 0.001656 min_lr: 0.001656 loss: 2.8538 (2.8228) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0008 max mem: 57114 Epoch: [215] [140/156] eta: 0:00:11 lr: 0.001653 min_lr: 0.001653 loss: 2.8923 (2.8307) weight_decay: 0.0500 (0.0500) time: 0.6913 data: 0.0007 max mem: 57114 Epoch: [215] [150/156] eta: 0:00:04 lr: 0.001651 min_lr: 0.001651 loss: 2.7870 (2.8100) weight_decay: 0.0500 (0.0500) time: 0.6938 data: 0.0001 max mem: 57114 Epoch: [215] [155/156] eta: 0:00:00 lr: 0.001650 min_lr: 0.001650 loss: 2.7864 (2.8135) weight_decay: 0.0500 (0.0500) time: 0.6919 data: 0.0001 max mem: 57114 Epoch: [215] Total time: 0:01:52 (0.7227 s / it) Averaged stats: lr: 0.001650 min_lr: 0.001650 loss: 2.7864 (2.8011) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6498 (0.6498) acc1: 89.0625 (89.0625) acc5: 98.9583 (98.9583) time: 2.0225 data: 1.7670 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9024 (0.8064) acc1: 82.2917 (82.6087) acc5: 96.3542 (96.5473) time: 0.5551 data: 0.3535 max mem: 57114 Test: Total time: 0:00:02 (0.5749 s / it) * Acc@1 82.379 Acc@5 96.282 loss 0.880 Accuracy of the model on the 50000 test images: 82.4% Max accuracy: 82.59% Test: [0/5] eta: 0:00:11 loss: 0.5880 (0.5880) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2700 data: 2.0264 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7054 (0.6786) acc1: 80.7292 (78.9003) acc5: 96.8750 (94.2455) time: 0.6046 data: 0.4053 max mem: 57114 Test: Total time: 0:00:03 (0.6188 s / it) * Acc@1 77.326 Acc@5 93.892 loss 0.820 Accuracy of the model EMA on 50000 test images: 77.3% Max EMA accuracy: 77.33% Epoch: [216] [ 0/156] eta: 0:06:52 lr: 0.001650 min_lr: 0.001650 loss: 3.2552 (3.2552) weight_decay: 0.0500 (0.0500) time: 2.6449 data: 1.9959 max mem: 57114 Epoch: [216] [ 10/156] eta: 0:02:07 lr: 0.001647 min_lr: 0.001647 loss: 2.9157 (2.8355) weight_decay: 0.0500 (0.0500) time: 0.8757 data: 0.1817 max mem: 57114 Epoch: [216] [ 20/156] eta: 0:01:48 lr: 0.001645 min_lr: 0.001645 loss: 2.8921 (2.8048) weight_decay: 0.0500 (0.0500) time: 0.7089 data: 0.0003 max mem: 57114 Epoch: [216] [ 30/156] eta: 0:01:37 lr: 0.001643 min_lr: 0.001643 loss: 2.7639 (2.6903) weight_decay: 0.0500 (0.0500) time: 0.7204 data: 0.0003 max mem: 57114 Epoch: [216] [ 40/156] eta: 0:01:28 lr: 0.001640 min_lr: 0.001640 loss: 2.6247 (2.6805) weight_decay: 0.0500 (0.0500) time: 0.7263 data: 0.0004 max mem: 57114 Epoch: [216] [ 50/156] eta: 0:01:20 lr: 0.001638 min_lr: 0.001638 loss: 2.8831 (2.7425) weight_decay: 0.0500 (0.0500) time: 0.7258 data: 0.0005 max mem: 57114 Epoch: [216] [ 60/156] eta: 0:01:12 lr: 0.001636 min_lr: 0.001636 loss: 2.8512 (2.7135) weight_decay: 0.0500 (0.0500) time: 0.7344 data: 0.0004 max mem: 57114 Epoch: [216] [ 70/156] eta: 0:01:04 lr: 0.001633 min_lr: 0.001633 loss: 2.5431 (2.7032) weight_decay: 0.0500 (0.0500) time: 0.7321 data: 0.0004 max mem: 57114 Epoch: [216] [ 80/156] eta: 0:00:56 lr: 0.001631 min_lr: 0.001631 loss: 2.8804 (2.7082) weight_decay: 0.0500 (0.0500) time: 0.7125 data: 0.0004 max mem: 57114 Epoch: [216] [ 90/156] eta: 0:00:48 lr: 0.001629 min_lr: 0.001629 loss: 2.7075 (2.6930) weight_decay: 0.0500 (0.0500) time: 0.7089 data: 0.0004 max mem: 57114 Epoch: [216] [100/156] eta: 0:00:41 lr: 0.001626 min_lr: 0.001626 loss: 2.7075 (2.6913) weight_decay: 0.0500 (0.0500) time: 0.7049 data: 0.0004 max mem: 57114 Epoch: [216] [110/156] eta: 0:00:33 lr: 0.001624 min_lr: 0.001624 loss: 3.0020 (2.7203) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0004 max mem: 57114 Epoch: [216] [120/156] eta: 0:00:26 lr: 0.001622 min_lr: 0.001622 loss: 2.9910 (2.7291) weight_decay: 0.0500 (0.0500) time: 0.7159 data: 0.0004 max mem: 57114 Epoch: [216] [130/156] eta: 0:00:18 lr: 0.001620 min_lr: 0.001620 loss: 2.8891 (2.7343) weight_decay: 0.0500 (0.0500) time: 0.6997 data: 0.0005 max mem: 57114 Epoch: [216] [140/156] eta: 0:00:11 lr: 0.001617 min_lr: 0.001617 loss: 2.9321 (2.7400) weight_decay: 0.0500 (0.0500) time: 0.6834 data: 0.0004 max mem: 57114 Epoch: [216] [150/156] eta: 0:00:04 lr: 0.001615 min_lr: 0.001615 loss: 2.9792 (2.7486) weight_decay: 0.0500 (0.0500) time: 0.6830 data: 0.0001 max mem: 57114 Epoch: [216] [155/156] eta: 0:00:00 lr: 0.001614 min_lr: 0.001614 loss: 2.9907 (2.7574) weight_decay: 0.0500 (0.0500) time: 0.6879 data: 0.0001 max mem: 57114 Epoch: [216] Total time: 0:01:53 (0.7247 s / it) Averaged stats: lr: 0.001614 min_lr: 0.001614 loss: 2.9907 (2.7977) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7400 (0.7400) acc1: 90.1042 (90.1042) acc5: 98.9583 (98.9583) time: 2.0434 data: 1.7863 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9928 (0.9050) acc1: 81.7708 (82.8645) acc5: 97.3958 (96.2916) time: 0.5596 data: 0.3574 max mem: 57114 Test: Total time: 0:00:02 (0.5831 s / it) * Acc@1 82.709 Acc@5 96.282 loss 0.974 Accuracy of the model on the 50000 test images: 82.7% Max accuracy: 82.71% Test: [0/5] eta: 0:00:10 loss: 0.5876 (0.5876) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.0626 data: 1.8190 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7040 (0.6778) acc1: 81.2500 (79.0281) acc5: 96.8750 (94.2455) time: 0.5631 data: 0.3639 max mem: 57114 Test: Total time: 0:00:02 (0.5732 s / it) * Acc@1 77.316 Acc@5 93.908 loss 0.819 Accuracy of the model EMA on 50000 test images: 77.3% Epoch: [217] [ 0/156] eta: 0:13:12 lr: 0.001613 min_lr: 0.001613 loss: 2.4681 (2.4681) weight_decay: 0.0500 (0.0500) time: 5.0817 data: 3.5839 max mem: 57114 Epoch: [217] [ 10/156] eta: 0:02:45 lr: 0.001611 min_lr: 0.001611 loss: 3.0939 (2.9551) weight_decay: 0.0500 (0.0500) time: 1.1352 data: 0.3262 max mem: 57114 Epoch: [217] [ 20/156] eta: 0:02:07 lr: 0.001609 min_lr: 0.001609 loss: 3.0579 (2.9152) weight_decay: 0.0500 (0.0500) time: 0.7319 data: 0.0004 max mem: 57114 Epoch: [217] [ 30/156] eta: 0:01:50 lr: 0.001607 min_lr: 0.001607 loss: 2.9400 (2.8891) weight_decay: 0.0500 (0.0500) time: 0.7390 data: 0.0003 max mem: 57114 Epoch: [217] [ 40/156] eta: 0:01:37 lr: 0.001604 min_lr: 0.001604 loss: 2.7994 (2.8673) weight_decay: 0.0500 (0.0500) time: 0.7414 data: 0.0003 max mem: 57114 Epoch: [217] [ 50/156] eta: 0:01:26 lr: 0.001602 min_lr: 0.001602 loss: 2.9340 (2.8602) weight_decay: 0.0500 (0.0500) time: 0.7191 data: 0.0003 max mem: 57114 Epoch: [217] [ 60/156] eta: 0:01:17 lr: 0.001600 min_lr: 0.001600 loss: 2.8926 (2.7992) weight_decay: 0.0500 (0.0500) time: 0.7384 data: 0.0004 max mem: 57114 Epoch: [217] [ 70/156] eta: 0:01:08 lr: 0.001597 min_lr: 0.001597 loss: 2.4654 (2.7570) weight_decay: 0.0500 (0.0500) time: 0.7373 data: 0.0004 max mem: 57114 Epoch: [217] [ 80/156] eta: 0:00:59 lr: 0.001595 min_lr: 0.001595 loss: 2.7259 (2.7903) weight_decay: 0.0500 (0.0500) time: 0.7106 data: 0.0004 max mem: 57114 Epoch: [217] [ 90/156] eta: 0:00:51 lr: 0.001593 min_lr: 0.001593 loss: 2.9721 (2.7828) weight_decay: 0.0500 (0.0500) time: 0.7098 data: 0.0004 max mem: 57114 Epoch: [217] [100/156] eta: 0:00:43 lr: 0.001590 min_lr: 0.001590 loss: 2.5400 (2.7516) weight_decay: 0.0500 (0.0500) time: 0.7084 data: 0.0004 max mem: 57114 Epoch: [217] [110/156] eta: 0:00:35 lr: 0.001588 min_lr: 0.001588 loss: 2.6261 (2.7546) weight_decay: 0.0500 (0.0500) time: 0.7061 data: 0.0004 max mem: 57114 Epoch: [217] [120/156] eta: 0:00:27 lr: 0.001586 min_lr: 0.001586 loss: 2.7959 (2.7504) weight_decay: 0.0500 (0.0500) time: 0.7007 data: 0.0004 max mem: 57114 Epoch: [217] [130/156] eta: 0:00:19 lr: 0.001584 min_lr: 0.001584 loss: 2.8840 (2.7618) weight_decay: 0.0500 (0.0500) time: 0.6945 data: 0.0008 max mem: 57114 Epoch: [217] [140/156] eta: 0:00:11 lr: 0.001581 min_lr: 0.001581 loss: 2.9407 (2.7556) weight_decay: 0.0500 (0.0500) time: 0.6869 data: 0.0007 max mem: 57114 Epoch: [217] [150/156] eta: 0:00:04 lr: 0.001579 min_lr: 0.001579 loss: 2.9231 (2.7580) weight_decay: 0.0500 (0.0500) time: 0.6863 data: 0.0001 max mem: 57114 Epoch: [217] [155/156] eta: 0:00:00 lr: 0.001578 min_lr: 0.001578 loss: 2.9781 (2.7624) weight_decay: 0.0500 (0.0500) time: 0.6868 data: 0.0001 max mem: 57114 Epoch: [217] Total time: 0:01:56 (0.7444 s / it) Averaged stats: lr: 0.001578 min_lr: 0.001578 loss: 2.9781 (2.7921) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5860 (0.5860) acc1: 87.5000 (87.5000) acc5: 98.9583 (98.9583) time: 2.0266 data: 1.7709 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8304 (0.7369) acc1: 81.7708 (82.7366) acc5: 97.9167 (97.0588) time: 0.5559 data: 0.3542 max mem: 57114 Test: Total time: 0:00:02 (0.5748 s / it) * Acc@1 82.996 Acc@5 96.365 loss 0.828 Accuracy of the model on the 50000 test images: 83.0% Max accuracy: 83.00% Test: [0/5] eta: 0:00:10 loss: 0.5873 (0.5873) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.0302 data: 1.7866 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7028 (0.6770) acc1: 81.2500 (79.2839) acc5: 96.8750 (94.2455) time: 0.5567 data: 0.3574 max mem: 57114 Test: Total time: 0:00:02 (0.5682 s / it) * Acc@1 77.344 Acc@5 93.912 loss 0.818 Accuracy of the model EMA on 50000 test images: 77.3% Max EMA accuracy: 77.34% Epoch: [218] [ 0/156] eta: 0:07:29 lr: 0.001578 min_lr: 0.001578 loss: 2.3260 (2.3260) weight_decay: 0.0500 (0.0500) time: 2.8837 data: 2.2381 max mem: 57114 Epoch: [218] [ 10/156] eta: 0:02:13 lr: 0.001575 min_lr: 0.001575 loss: 2.8035 (2.7279) weight_decay: 0.0500 (0.0500) time: 0.9132 data: 0.2037 max mem: 57114 Epoch: [218] [ 20/156] eta: 0:01:51 lr: 0.001573 min_lr: 0.001573 loss: 2.8035 (2.7381) weight_decay: 0.0500 (0.0500) time: 0.7195 data: 0.0003 max mem: 57114 Epoch: [218] [ 30/156] eta: 0:01:39 lr: 0.001571 min_lr: 0.001571 loss: 2.7775 (2.7589) weight_decay: 0.0500 (0.0500) time: 0.7236 data: 0.0004 max mem: 57114 Epoch: [218] [ 40/156] eta: 0:01:30 lr: 0.001568 min_lr: 0.001568 loss: 2.8025 (2.7586) weight_decay: 0.0500 (0.0500) time: 0.7385 data: 0.0004 max mem: 57114 Epoch: [218] [ 50/156] eta: 0:01:22 lr: 0.001566 min_lr: 0.001566 loss: 2.8025 (2.7556) weight_decay: 0.0500 (0.0500) time: 0.7607 data: 0.0004 max mem: 57114 Epoch: [218] [ 60/156] eta: 0:01:13 lr: 0.001564 min_lr: 0.001564 loss: 2.8507 (2.7838) weight_decay: 0.0500 (0.0500) time: 0.7446 data: 0.0004 max mem: 57114 Epoch: [218] [ 70/156] eta: 0:01:05 lr: 0.001562 min_lr: 0.001562 loss: 2.8507 (2.7864) weight_decay: 0.0500 (0.0500) time: 0.7329 data: 0.0004 max mem: 57114 Epoch: [218] [ 80/156] eta: 0:00:57 lr: 0.001559 min_lr: 0.001559 loss: 2.6080 (2.7627) weight_decay: 0.0500 (0.0500) time: 0.7427 data: 0.0004 max mem: 57114 Epoch: [218] [ 90/156] eta: 0:00:49 lr: 0.001557 min_lr: 0.001557 loss: 2.8761 (2.7800) weight_decay: 0.0500 (0.0500) time: 0.7211 data: 0.0004 max mem: 57114 Epoch: [218] [100/156] eta: 0:00:42 lr: 0.001555 min_lr: 0.001555 loss: 2.8273 (2.7560) weight_decay: 0.0500 (0.0500) time: 0.7068 data: 0.0004 max mem: 57114 Epoch: [218] [110/156] eta: 0:00:34 lr: 0.001553 min_lr: 0.001553 loss: 2.7250 (2.7534) weight_decay: 0.0500 (0.0500) time: 0.7080 data: 0.0004 max mem: 57114 Epoch: [218] [120/156] eta: 0:00:26 lr: 0.001550 min_lr: 0.001550 loss: 2.7892 (2.7541) weight_decay: 0.0500 (0.0500) time: 0.7018 data: 0.0004 max mem: 57114 Epoch: [218] [130/156] eta: 0:00:19 lr: 0.001548 min_lr: 0.001548 loss: 2.9273 (2.7759) weight_decay: 0.0500 (0.0500) time: 0.6973 data: 0.0004 max mem: 57114 Epoch: [218] [140/156] eta: 0:00:11 lr: 0.001546 min_lr: 0.001546 loss: 2.8515 (2.7598) weight_decay: 0.0500 (0.0500) time: 0.6945 data: 0.0003 max mem: 57114 Epoch: [218] [150/156] eta: 0:00:04 lr: 0.001543 min_lr: 0.001543 loss: 2.6936 (2.7698) weight_decay: 0.0500 (0.0500) time: 0.6896 data: 0.0001 max mem: 57114 Epoch: [218] [155/156] eta: 0:00:00 lr: 0.001542 min_lr: 0.001542 loss: 2.6598 (2.7703) weight_decay: 0.0500 (0.0500) time: 0.6833 data: 0.0001 max mem: 57114 Epoch: [218] Total time: 0:01:54 (0.7333 s / it) Averaged stats: lr: 0.001542 min_lr: 0.001542 loss: 2.6598 (2.7905) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6929 (0.6929) acc1: 88.0208 (88.0208) acc5: 98.9583 (98.9583) time: 2.0599 data: 1.8011 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8601 (0.8188) acc1: 84.8958 (82.7366) acc5: 97.9167 (97.0588) time: 0.5628 data: 0.3603 max mem: 57114 Test: Total time: 0:00:02 (0.5846 s / it) * Acc@1 82.585 Acc@5 96.264 loss 0.910 Accuracy of the model on the 50000 test images: 82.6% Max accuracy: 83.00% Test: [0/5] eta: 0:00:11 loss: 0.5871 (0.5871) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.3067 data: 2.0632 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7015 (0.6763) acc1: 81.2500 (79.2839) acc5: 96.8750 (94.2455) time: 0.6126 data: 0.4127 max mem: 57114 Test: Total time: 0:00:03 (0.6258 s / it) * Acc@1 77.378 Acc@5 93.912 loss 0.817 Accuracy of the model EMA on 50000 test images: 77.4% Max EMA accuracy: 77.38% Epoch: [219] [ 0/156] eta: 0:06:36 lr: 0.001542 min_lr: 0.001542 loss: 3.2494 (3.2494) weight_decay: 0.0500 (0.0500) time: 2.5414 data: 1.8894 max mem: 57114 Epoch: [219] [ 10/156] eta: 0:02:09 lr: 0.001540 min_lr: 0.001540 loss: 2.6458 (2.6560) weight_decay: 0.0500 (0.0500) time: 0.8838 data: 0.1720 max mem: 57114 Epoch: [219] [ 20/156] eta: 0:01:49 lr: 0.001538 min_lr: 0.001538 loss: 2.8356 (2.6962) weight_decay: 0.0500 (0.0500) time: 0.7202 data: 0.0003 max mem: 57114 Epoch: [219] [ 30/156] eta: 0:01:37 lr: 0.001535 min_lr: 0.001535 loss: 2.8966 (2.7169) weight_decay: 0.0500 (0.0500) time: 0.7179 data: 0.0004 max mem: 57114 Epoch: [219] [ 40/156] eta: 0:01:28 lr: 0.001533 min_lr: 0.001533 loss: 2.7510 (2.6533) weight_decay: 0.0500 (0.0500) time: 0.7192 data: 0.0004 max mem: 57114 Epoch: [219] [ 50/156] eta: 0:01:20 lr: 0.001531 min_lr: 0.001531 loss: 2.5255 (2.6581) weight_decay: 0.0500 (0.0500) time: 0.7347 data: 0.0004 max mem: 57114 Epoch: [219] [ 60/156] eta: 0:01:12 lr: 0.001529 min_lr: 0.001529 loss: 2.5255 (2.6435) weight_decay: 0.0500 (0.0500) time: 0.7296 data: 0.0004 max mem: 57114 Epoch: [219] [ 70/156] eta: 0:01:04 lr: 0.001526 min_lr: 0.001526 loss: 2.7146 (2.6346) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0004 max mem: 57114 Epoch: [219] [ 80/156] eta: 0:00:56 lr: 0.001524 min_lr: 0.001524 loss: 2.8541 (2.6482) weight_decay: 0.0500 (0.0500) time: 0.7053 data: 0.0004 max mem: 57114 Epoch: [219] [ 90/156] eta: 0:00:48 lr: 0.001522 min_lr: 0.001522 loss: 2.9468 (2.6537) weight_decay: 0.0500 (0.0500) time: 0.7038 data: 0.0004 max mem: 57114 Epoch: [219] [100/156] eta: 0:00:41 lr: 0.001519 min_lr: 0.001519 loss: 2.9801 (2.6669) weight_decay: 0.0500 (0.0500) time: 0.7084 data: 0.0004 max mem: 57114 Epoch: [219] [110/156] eta: 0:00:33 lr: 0.001517 min_lr: 0.001517 loss: 2.9191 (2.6858) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0004 max mem: 57114 Epoch: [219] [120/156] eta: 0:00:26 lr: 0.001515 min_lr: 0.001515 loss: 2.9191 (2.6949) weight_decay: 0.0500 (0.0500) time: 0.6959 data: 0.0004 max mem: 57114 Epoch: [219] [130/156] eta: 0:00:18 lr: 0.001513 min_lr: 0.001513 loss: 2.9063 (2.7044) weight_decay: 0.0500 (0.0500) time: 0.6916 data: 0.0009 max mem: 57114 Epoch: [219] [140/156] eta: 0:00:11 lr: 0.001510 min_lr: 0.001510 loss: 2.9063 (2.7229) weight_decay: 0.0500 (0.0500) time: 0.6943 data: 0.0007 max mem: 57114 Epoch: [219] [150/156] eta: 0:00:04 lr: 0.001508 min_lr: 0.001508 loss: 2.8681 (2.7246) weight_decay: 0.0500 (0.0500) time: 0.6886 data: 0.0001 max mem: 57114 Epoch: [219] [155/156] eta: 0:00:00 lr: 0.001507 min_lr: 0.001507 loss: 2.9777 (2.7345) weight_decay: 0.0500 (0.0500) time: 0.6845 data: 0.0001 max mem: 57114 Epoch: [219] Total time: 0:01:52 (0.7217 s / it) Averaged stats: lr: 0.001507 min_lr: 0.001507 loss: 2.9777 (2.7903) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7473 (0.7473) acc1: 88.5417 (88.5417) acc5: 98.9583 (98.9583) time: 2.0559 data: 1.7994 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.9031 (0.8556) acc1: 83.3333 (83.3760) acc5: 96.3542 (96.5473) time: 0.5619 data: 0.3600 max mem: 57114 Test: Total time: 0:00:02 (0.5832 s / it) * Acc@1 82.846 Acc@5 96.367 loss 0.945 Accuracy of the model on the 50000 test images: 82.8% Max accuracy: 83.00% Test: [0/5] eta: 0:00:11 loss: 0.5866 (0.5866) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.3336 data: 2.0900 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7002 (0.6756) acc1: 81.2500 (79.4118) acc5: 96.8750 (94.2455) time: 0.6174 data: 0.4181 max mem: 57114 Test: Total time: 0:00:03 (0.6284 s / it) * Acc@1 77.364 Acc@5 93.926 loss 0.816 Accuracy of the model EMA on 50000 test images: 77.4% Epoch: [220] [ 0/156] eta: 0:13:06 lr: 0.001507 min_lr: 0.001507 loss: 3.0378 (3.0378) weight_decay: 0.0500 (0.0500) time: 5.0448 data: 4.1106 max mem: 57114 Epoch: [220] [ 10/156] eta: 0:02:44 lr: 0.001505 min_lr: 0.001505 loss: 3.0378 (2.8864) weight_decay: 0.0500 (0.0500) time: 1.1265 data: 0.3739 max mem: 57114 Epoch: [220] [ 20/156] eta: 0:02:06 lr: 0.001502 min_lr: 0.001502 loss: 2.8286 (2.7010) weight_decay: 0.0500 (0.0500) time: 0.7227 data: 0.0003 max mem: 57114 Epoch: [220] [ 30/156] eta: 0:01:48 lr: 0.001500 min_lr: 0.001500 loss: 2.6623 (2.7017) weight_decay: 0.0500 (0.0500) time: 0.7141 data: 0.0003 max mem: 57114 Epoch: [220] [ 40/156] eta: 0:01:35 lr: 0.001498 min_lr: 0.001498 loss: 2.8710 (2.7522) weight_decay: 0.0500 (0.0500) time: 0.7191 data: 0.0003 max mem: 57114 Epoch: [220] [ 50/156] eta: 0:01:25 lr: 0.001496 min_lr: 0.001496 loss: 2.9978 (2.7934) weight_decay: 0.0500 (0.0500) time: 0.7350 data: 0.0003 max mem: 57114 Epoch: [220] [ 60/156] eta: 0:01:16 lr: 0.001493 min_lr: 0.001493 loss: 3.0795 (2.8251) weight_decay: 0.0500 (0.0500) time: 0.7390 data: 0.0003 max mem: 57114 Epoch: [220] [ 70/156] eta: 0:01:07 lr: 0.001491 min_lr: 0.001491 loss: 2.8684 (2.8055) weight_decay: 0.0500 (0.0500) time: 0.7198 data: 0.0004 max mem: 57114 Epoch: [220] [ 80/156] eta: 0:00:58 lr: 0.001489 min_lr: 0.001489 loss: 2.7513 (2.8092) weight_decay: 0.0500 (0.0500) time: 0.7080 data: 0.0004 max mem: 57114 Epoch: [220] [ 90/156] eta: 0:00:50 lr: 0.001487 min_lr: 0.001487 loss: 2.8227 (2.8119) weight_decay: 0.0500 (0.0500) time: 0.7058 data: 0.0003 max mem: 57114 Epoch: [220] [100/156] eta: 0:00:42 lr: 0.001484 min_lr: 0.001484 loss: 2.8186 (2.7859) weight_decay: 0.0500 (0.0500) time: 0.7043 data: 0.0003 max mem: 57114 Epoch: [220] [110/156] eta: 0:00:34 lr: 0.001482 min_lr: 0.001482 loss: 2.6561 (2.7753) weight_decay: 0.0500 (0.0500) time: 0.7107 data: 0.0004 max mem: 57114 Epoch: [220] [120/156] eta: 0:00:27 lr: 0.001480 min_lr: 0.001480 loss: 2.7482 (2.7781) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0004 max mem: 57114 Epoch: [220] [130/156] eta: 0:00:19 lr: 0.001478 min_lr: 0.001478 loss: 2.9941 (2.7894) weight_decay: 0.0500 (0.0500) time: 0.6991 data: 0.0005 max mem: 57114 Epoch: [220] [140/156] eta: 0:00:11 lr: 0.001475 min_lr: 0.001475 loss: 2.9238 (2.7807) weight_decay: 0.0500 (0.0500) time: 0.6931 data: 0.0003 max mem: 57114 Epoch: [220] [150/156] eta: 0:00:04 lr: 0.001473 min_lr: 0.001473 loss: 2.5924 (2.7722) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.0001 max mem: 57114 Epoch: [220] [155/156] eta: 0:00:00 lr: 0.001472 min_lr: 0.001472 loss: 2.8782 (2.7798) weight_decay: 0.0500 (0.0500) time: 0.6887 data: 0.0001 max mem: 57114 Epoch: [220] Total time: 0:01:55 (0.7407 s / it) Averaged stats: lr: 0.001472 min_lr: 0.001472 loss: 2.8782 (2.7795) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6861 (0.6861) acc1: 89.5833 (89.5833) acc5: 98.4375 (98.4375) time: 2.0828 data: 1.8220 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8673 (0.8323) acc1: 84.8958 (83.5038) acc5: 98.4375 (97.3146) time: 0.5675 data: 0.3645 max mem: 57114 Test: Total time: 0:00:02 (0.5856 s / it) * Acc@1 82.751 Acc@5 96.407 loss 0.910 Accuracy of the model on the 50000 test images: 82.8% Max accuracy: 83.00% Test: [0/5] eta: 0:00:11 loss: 0.5864 (0.5864) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.2531 data: 2.0096 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6988 (0.6749) acc1: 81.2500 (79.4118) acc5: 96.8750 (94.2455) time: 0.6013 data: 0.4020 max mem: 57114 Test: Total time: 0:00:03 (0.6166 s / it) * Acc@1 77.378 Acc@5 93.942 loss 0.815 Accuracy of the model EMA on 50000 test images: 77.4% Max EMA accuracy: 77.38% Epoch: [221] [ 0/156] eta: 0:06:21 lr: 0.001472 min_lr: 0.001472 loss: 2.7650 (2.7650) weight_decay: 0.0500 (0.0500) time: 2.4487 data: 1.7726 max mem: 57114 Epoch: [221] [ 10/156] eta: 0:02:08 lr: 0.001470 min_lr: 0.001470 loss: 3.0584 (2.9570) weight_decay: 0.0500 (0.0500) time: 0.8813 data: 0.1614 max mem: 57114 Epoch: [221] [ 20/156] eta: 0:01:50 lr: 0.001467 min_lr: 0.001467 loss: 2.9664 (2.8711) weight_decay: 0.0500 (0.0500) time: 0.7281 data: 0.0003 max mem: 57114 Epoch: [221] [ 30/156] eta: 0:01:37 lr: 0.001465 min_lr: 0.001465 loss: 2.7610 (2.8373) weight_decay: 0.0500 (0.0500) time: 0.7146 data: 0.0004 max mem: 57114 Epoch: [221] [ 40/156] eta: 0:01:28 lr: 0.001463 min_lr: 0.001463 loss: 2.7642 (2.8025) weight_decay: 0.0500 (0.0500) time: 0.7160 data: 0.0004 max mem: 57114 Epoch: [221] [ 50/156] eta: 0:01:20 lr: 0.001461 min_lr: 0.001461 loss: 2.7642 (2.7875) weight_decay: 0.0500 (0.0500) time: 0.7386 data: 0.0004 max mem: 57114 Epoch: [221] [ 60/156] eta: 0:01:12 lr: 0.001459 min_lr: 0.001459 loss: 2.7157 (2.7681) weight_decay: 0.0500 (0.0500) time: 0.7444 data: 0.0004 max mem: 57114 Epoch: [221] [ 70/156] eta: 0:01:04 lr: 0.001456 min_lr: 0.001456 loss: 2.8333 (2.7549) weight_decay: 0.0500 (0.0500) time: 0.7349 data: 0.0004 max mem: 57114 Epoch: [221] [ 80/156] eta: 0:00:56 lr: 0.001454 min_lr: 0.001454 loss: 2.8593 (2.7543) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0003 max mem: 57114 Epoch: [221] [ 90/156] eta: 0:00:48 lr: 0.001452 min_lr: 0.001452 loss: 2.9528 (2.7625) weight_decay: 0.0500 (0.0500) time: 0.6996 data: 0.0004 max mem: 57114 Epoch: [221] [100/156] eta: 0:00:41 lr: 0.001450 min_lr: 0.001450 loss: 2.9030 (2.7653) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0004 max mem: 57114 Epoch: [221] [110/156] eta: 0:00:33 lr: 0.001447 min_lr: 0.001447 loss: 2.8823 (2.7729) weight_decay: 0.0500 (0.0500) time: 0.7151 data: 0.0003 max mem: 57114 Epoch: [221] [120/156] eta: 0:00:26 lr: 0.001445 min_lr: 0.001445 loss: 2.7856 (2.7574) weight_decay: 0.0500 (0.0500) time: 0.7198 data: 0.0003 max mem: 57114 Epoch: [221] [130/156] eta: 0:00:19 lr: 0.001443 min_lr: 0.001443 loss: 2.7856 (2.7585) weight_decay: 0.0500 (0.0500) time: 0.7096 data: 0.0010 max mem: 57114 Epoch: [221] [140/156] eta: 0:00:11 lr: 0.001441 min_lr: 0.001441 loss: 2.8090 (2.7424) weight_decay: 0.0500 (0.0500) time: 0.6880 data: 0.0009 max mem: 57114 Epoch: [221] [150/156] eta: 0:00:04 lr: 0.001439 min_lr: 0.001439 loss: 2.7462 (2.7345) weight_decay: 0.0500 (0.0500) time: 0.6795 data: 0.0001 max mem: 57114 Epoch: [221] [155/156] eta: 0:00:00 lr: 0.001438 min_lr: 0.001438 loss: 2.8642 (2.7378) weight_decay: 0.0500 (0.0500) time: 0.6829 data: 0.0001 max mem: 57114 Epoch: [221] Total time: 0:01:53 (0.7264 s / it) Averaged stats: lr: 0.001438 min_lr: 0.001438 loss: 2.8642 (2.7692) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5499 (0.5499) acc1: 90.1042 (90.1042) acc5: 99.4792 (99.4792) time: 2.0633 data: 1.8072 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7835 (0.6931) acc1: 83.8542 (83.1202) acc5: 97.9167 (97.1867) time: 0.5633 data: 0.3615 max mem: 57114 Test: Total time: 0:00:02 (0.5865 s / it) * Acc@1 83.232 Acc@5 96.551 loss 0.784 Accuracy of the model on the 50000 test images: 83.2% Max accuracy: 83.23% Test: [0/5] eta: 0:00:10 loss: 0.5863 (0.5863) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.0206 data: 1.7769 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6975 (0.6744) acc1: 81.2500 (79.5396) acc5: 96.8750 (94.2455) time: 0.5548 data: 0.3555 max mem: 57114 Test: Total time: 0:00:02 (0.5688 s / it) * Acc@1 77.402 Acc@5 93.960 loss 0.814 Accuracy of the model EMA on 50000 test images: 77.4% Max EMA accuracy: 77.40% Epoch: [222] [ 0/156] eta: 0:08:35 lr: 0.001437 min_lr: 0.001437 loss: 3.1455 (3.1455) weight_decay: 0.0500 (0.0500) time: 3.3061 data: 2.6575 max mem: 57114 Epoch: [222] [ 10/156] eta: 0:02:20 lr: 0.001435 min_lr: 0.001435 loss: 2.5650 (2.5430) weight_decay: 0.0500 (0.0500) time: 0.9608 data: 0.2419 max mem: 57114 Epoch: [222] [ 20/156] eta: 0:01:58 lr: 0.001433 min_lr: 0.001433 loss: 2.5650 (2.6212) weight_decay: 0.0500 (0.0500) time: 0.7497 data: 0.0003 max mem: 57114 Epoch: [222] [ 30/156] eta: 0:01:44 lr: 0.001431 min_lr: 0.001431 loss: 2.7590 (2.6352) weight_decay: 0.0500 (0.0500) time: 0.7548 data: 0.0004 max mem: 57114 Epoch: [222] [ 40/156] eta: 0:01:32 lr: 0.001428 min_lr: 0.001428 loss: 2.7977 (2.7026) weight_decay: 0.0500 (0.0500) time: 0.7232 data: 0.0005 max mem: 57114 Epoch: [222] [ 50/156] eta: 0:01:23 lr: 0.001426 min_lr: 0.001426 loss: 2.8509 (2.6766) weight_decay: 0.0500 (0.0500) time: 0.7241 data: 0.0004 max mem: 57114 Epoch: [222] [ 60/156] eta: 0:01:14 lr: 0.001424 min_lr: 0.001424 loss: 2.8922 (2.7416) weight_decay: 0.0500 (0.0500) time: 0.7219 data: 0.0004 max mem: 57114 Epoch: [222] [ 70/156] eta: 0:01:06 lr: 0.001422 min_lr: 0.001422 loss: 3.0092 (2.7731) weight_decay: 0.0500 (0.0500) time: 0.7220 data: 0.0004 max mem: 57114 Epoch: [222] [ 80/156] eta: 0:00:57 lr: 0.001420 min_lr: 0.001420 loss: 3.0143 (2.8083) weight_decay: 0.0500 (0.0500) time: 0.7267 data: 0.0004 max mem: 57114 Epoch: [222] [ 90/156] eta: 0:00:49 lr: 0.001417 min_lr: 0.001417 loss: 3.0054 (2.8065) weight_decay: 0.0500 (0.0500) time: 0.7069 data: 0.0004 max mem: 57114 Epoch: [222] [100/156] eta: 0:00:42 lr: 0.001415 min_lr: 0.001415 loss: 2.7921 (2.8092) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0004 max mem: 57114 Epoch: [222] [110/156] eta: 0:00:34 lr: 0.001413 min_lr: 0.001413 loss: 2.8220 (2.7950) weight_decay: 0.0500 (0.0500) time: 0.7056 data: 0.0004 max mem: 57114 Epoch: [222] [120/156] eta: 0:00:26 lr: 0.001411 min_lr: 0.001411 loss: 2.8531 (2.7997) weight_decay: 0.0500 (0.0500) time: 0.7029 data: 0.0004 max mem: 57114 Epoch: [222] [130/156] eta: 0:00:19 lr: 0.001409 min_lr: 0.001409 loss: 2.9417 (2.8027) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0008 max mem: 57114 Epoch: [222] [140/156] eta: 0:00:11 lr: 0.001407 min_lr: 0.001407 loss: 2.9897 (2.8109) weight_decay: 0.0500 (0.0500) time: 0.6911 data: 0.0007 max mem: 57114 Epoch: [222] [150/156] eta: 0:00:04 lr: 0.001404 min_lr: 0.001404 loss: 2.9524 (2.8182) weight_decay: 0.0500 (0.0500) time: 0.6855 data: 0.0001 max mem: 57114 Epoch: [222] [155/156] eta: 0:00:00 lr: 0.001403 min_lr: 0.001403 loss: 2.9445 (2.8048) weight_decay: 0.0500 (0.0500) time: 0.6816 data: 0.0001 max mem: 57114 Epoch: [222] Total time: 0:01:54 (0.7334 s / it) Averaged stats: lr: 0.001403 min_lr: 0.001403 loss: 2.9445 (2.7710) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:09 loss: 0.6109 (0.6109) acc1: 90.1042 (90.1042) acc5: 98.9583 (98.9583) time: 1.9969 data: 1.7410 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7764 (0.7485) acc1: 82.8125 (82.9923) acc5: 97.9167 (96.8031) time: 0.5500 data: 0.3483 max mem: 57114 Test: Total time: 0:00:02 (0.5700 s / it) * Acc@1 82.890 Acc@5 96.447 loss 0.847 Accuracy of the model on the 50000 test images: 82.9% Max accuracy: 83.23% Test: [0/5] eta: 0:00:11 loss: 0.5861 (0.5861) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.3804 data: 2.1370 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6962 (0.6738) acc1: 81.2500 (79.4118) acc5: 96.8750 (94.3734) time: 0.6268 data: 0.4275 max mem: 57114 Test: Total time: 0:00:03 (0.6391 s / it) * Acc@1 77.398 Acc@5 93.962 loss 0.813 Accuracy of the model EMA on 50000 test images: 77.4% Epoch: [223] [ 0/156] eta: 0:13:27 lr: 0.001403 min_lr: 0.001403 loss: 2.9177 (2.9177) weight_decay: 0.0500 (0.0500) time: 5.1792 data: 3.0889 max mem: 57114 Epoch: [223] [ 10/156] eta: 0:02:46 lr: 0.001401 min_lr: 0.001401 loss: 2.8550 (2.6600) weight_decay: 0.0500 (0.0500) time: 1.1419 data: 0.2812 max mem: 57114 Epoch: [223] [ 20/156] eta: 0:02:07 lr: 0.001399 min_lr: 0.001399 loss: 2.8550 (2.6889) weight_decay: 0.0500 (0.0500) time: 0.7246 data: 0.0003 max mem: 57114 Epoch: [223] [ 30/156] eta: 0:01:49 lr: 0.001396 min_lr: 0.001396 loss: 2.9430 (2.7384) weight_decay: 0.0500 (0.0500) time: 0.7219 data: 0.0003 max mem: 57114 Epoch: [223] [ 40/156] eta: 0:01:36 lr: 0.001394 min_lr: 0.001394 loss: 2.7898 (2.7126) weight_decay: 0.0500 (0.0500) time: 0.7289 data: 0.0004 max mem: 57114 Epoch: [223] [ 50/156] eta: 0:01:26 lr: 0.001392 min_lr: 0.001392 loss: 2.8652 (2.7390) weight_decay: 0.0500 (0.0500) time: 0.7271 data: 0.0004 max mem: 57114 Epoch: [223] [ 60/156] eta: 0:01:16 lr: 0.001390 min_lr: 0.001390 loss: 2.8343 (2.7210) weight_decay: 0.0500 (0.0500) time: 0.7253 data: 0.0004 max mem: 57114 Epoch: [223] [ 70/156] eta: 0:01:07 lr: 0.001388 min_lr: 0.001388 loss: 2.7987 (2.7349) weight_decay: 0.0500 (0.0500) time: 0.7182 data: 0.0004 max mem: 57114 Epoch: [223] [ 80/156] eta: 0:00:59 lr: 0.001386 min_lr: 0.001386 loss: 2.8518 (2.7512) weight_decay: 0.0500 (0.0500) time: 0.7191 data: 0.0004 max mem: 57114 Epoch: [223] [ 90/156] eta: 0:00:50 lr: 0.001383 min_lr: 0.001383 loss: 3.0145 (2.7788) weight_decay: 0.0500 (0.0500) time: 0.7078 data: 0.0004 max mem: 57114 Epoch: [223] [100/156] eta: 0:00:42 lr: 0.001381 min_lr: 0.001381 loss: 2.8864 (2.7438) weight_decay: 0.0500 (0.0500) time: 0.6977 data: 0.0004 max mem: 57114 Epoch: [223] [110/156] eta: 0:00:34 lr: 0.001379 min_lr: 0.001379 loss: 2.5627 (2.7399) weight_decay: 0.0500 (0.0500) time: 0.7072 data: 0.0004 max mem: 57114 Epoch: [223] [120/156] eta: 0:00:27 lr: 0.001377 min_lr: 0.001377 loss: 2.9093 (2.7577) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [223] [130/156] eta: 0:00:19 lr: 0.001375 min_lr: 0.001375 loss: 2.9557 (2.7636) weight_decay: 0.0500 (0.0500) time: 0.7026 data: 0.0008 max mem: 57114 Epoch: [223] [140/156] eta: 0:00:11 lr: 0.001373 min_lr: 0.001373 loss: 2.8656 (2.7692) weight_decay: 0.0500 (0.0500) time: 0.6945 data: 0.0007 max mem: 57114 Epoch: [223] [150/156] eta: 0:00:04 lr: 0.001370 min_lr: 0.001370 loss: 2.8656 (2.7651) weight_decay: 0.0500 (0.0500) time: 0.6843 data: 0.0001 max mem: 57114 Epoch: [223] [155/156] eta: 0:00:00 lr: 0.001369 min_lr: 0.001369 loss: 2.6775 (2.7667) weight_decay: 0.0500 (0.0500) time: 0.6845 data: 0.0001 max mem: 57114 Epoch: [223] Total time: 0:01:55 (0.7417 s / it) Averaged stats: lr: 0.001369 min_lr: 0.001369 loss: 2.6775 (2.7576) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6218 (0.6218) acc1: 89.5833 (89.5833) acc5: 99.4792 (99.4792) time: 2.0444 data: 1.7884 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8097 (0.7626) acc1: 83.3333 (83.3760) acc5: 97.9167 (97.5703) time: 0.5595 data: 0.3577 max mem: 57114 Test: Total time: 0:00:02 (0.5820 s / it) * Acc@1 83.200 Acc@5 96.375 loss 0.849 Accuracy of the model on the 50000 test images: 83.2% Max accuracy: 83.23% Test: [0/5] eta: 0:00:11 loss: 0.5862 (0.5862) acc1: 83.8542 (83.8542) acc5: 97.3958 (97.3958) time: 2.3517 data: 2.1081 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6950 (0.6734) acc1: 81.2500 (79.2839) acc5: 96.8750 (94.3734) time: 0.6211 data: 0.4217 max mem: 57114 Test: Total time: 0:00:03 (0.6338 s / it) * Acc@1 77.438 Acc@5 93.958 loss 0.813 Accuracy of the model EMA on 50000 test images: 77.4% Max EMA accuracy: 77.44% Epoch: [224] [ 0/156] eta: 0:06:04 lr: 0.001369 min_lr: 0.001369 loss: 3.0654 (3.0654) weight_decay: 0.0500 (0.0500) time: 2.3344 data: 1.6857 max mem: 57114 Epoch: [224] [ 10/156] eta: 0:02:07 lr: 0.001367 min_lr: 0.001367 loss: 3.0654 (3.0235) weight_decay: 0.0500 (0.0500) time: 0.8701 data: 0.1535 max mem: 57114 Epoch: [224] [ 20/156] eta: 0:01:48 lr: 0.001365 min_lr: 0.001365 loss: 3.0065 (2.9860) weight_decay: 0.0500 (0.0500) time: 0.7219 data: 0.0003 max mem: 57114 Epoch: [224] [ 30/156] eta: 0:01:37 lr: 0.001363 min_lr: 0.001363 loss: 3.0065 (2.9359) weight_decay: 0.0500 (0.0500) time: 0.7163 data: 0.0004 max mem: 57114 Epoch: [224] [ 40/156] eta: 0:01:28 lr: 0.001360 min_lr: 0.001360 loss: 2.6563 (2.8035) weight_decay: 0.0500 (0.0500) time: 0.7327 data: 0.0004 max mem: 57114 Epoch: [224] [ 50/156] eta: 0:01:20 lr: 0.001358 min_lr: 0.001358 loss: 2.5123 (2.7748) weight_decay: 0.0500 (0.0500) time: 0.7382 data: 0.0004 max mem: 57114 Epoch: [224] [ 60/156] eta: 0:01:12 lr: 0.001356 min_lr: 0.001356 loss: 2.9556 (2.7767) weight_decay: 0.0500 (0.0500) time: 0.7184 data: 0.0004 max mem: 57114 Epoch: [224] [ 70/156] eta: 0:01:04 lr: 0.001354 min_lr: 0.001354 loss: 2.8081 (2.7555) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [224] [ 80/156] eta: 0:00:56 lr: 0.001352 min_lr: 0.001352 loss: 2.5825 (2.7514) weight_decay: 0.0500 (0.0500) time: 0.7021 data: 0.0004 max mem: 57114 Epoch: [224] [ 90/156] eta: 0:00:48 lr: 0.001350 min_lr: 0.001350 loss: 2.8448 (2.7533) weight_decay: 0.0500 (0.0500) time: 0.7094 data: 0.0003 max mem: 57114 Epoch: [224] [100/156] eta: 0:00:41 lr: 0.001347 min_lr: 0.001347 loss: 2.9100 (2.7427) weight_decay: 0.0500 (0.0500) time: 0.7235 data: 0.0003 max mem: 57114 Epoch: [224] [110/156] eta: 0:00:33 lr: 0.001345 min_lr: 0.001345 loss: 2.8074 (2.7418) weight_decay: 0.0500 (0.0500) time: 0.7181 data: 0.0004 max mem: 57114 Epoch: [224] [120/156] eta: 0:00:26 lr: 0.001343 min_lr: 0.001343 loss: 2.8074 (2.7323) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0004 max mem: 57114 Epoch: [224] [130/156] eta: 0:00:18 lr: 0.001341 min_lr: 0.001341 loss: 2.8850 (2.7437) weight_decay: 0.0500 (0.0500) time: 0.6980 data: 0.0010 max mem: 57114 Epoch: [224] [140/156] eta: 0:00:11 lr: 0.001339 min_lr: 0.001339 loss: 2.8806 (2.7348) weight_decay: 0.0500 (0.0500) time: 0.6871 data: 0.0009 max mem: 57114 Epoch: [224] [150/156] eta: 0:00:04 lr: 0.001337 min_lr: 0.001337 loss: 2.8376 (2.7498) weight_decay: 0.0500 (0.0500) time: 0.6817 data: 0.0001 max mem: 57114 Epoch: [224] [155/156] eta: 0:00:00 lr: 0.001336 min_lr: 0.001336 loss: 2.8806 (2.7433) weight_decay: 0.0500 (0.0500) time: 0.6860 data: 0.0001 max mem: 57114 Epoch: [224] Total time: 0:01:52 (0.7230 s / it) Averaged stats: lr: 0.001336 min_lr: 0.001336 loss: 2.8806 (2.7593) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5708 (0.5708) acc1: 88.0208 (88.0208) acc5: 99.4792 (99.4792) time: 2.0372 data: 1.7815 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7840 (0.7277) acc1: 84.8958 (83.5038) acc5: 97.9167 (97.4425) time: 0.5581 data: 0.3564 max mem: 57114 Test: Total time: 0:00:02 (0.5776 s / it) * Acc@1 82.828 Acc@5 96.379 loss 0.809 Accuracy of the model on the 50000 test images: 82.8% Max accuracy: 83.23% Test: [0/5] eta: 0:00:11 loss: 0.5863 (0.5863) acc1: 83.8542 (83.8542) acc5: 97.3958 (97.3958) time: 2.3182 data: 2.0748 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6940 (0.6731) acc1: 81.2500 (79.2839) acc5: 96.8750 (94.3734) time: 0.6143 data: 0.4150 max mem: 57114 Test: Total time: 0:00:03 (0.6250 s / it) * Acc@1 77.458 Acc@5 93.964 loss 0.812 Accuracy of the model EMA on 50000 test images: 77.5% Max EMA accuracy: 77.46% Epoch: [225] [ 0/156] eta: 0:07:22 lr: 0.001335 min_lr: 0.001335 loss: 2.8954 (2.8954) weight_decay: 0.0500 (0.0500) time: 2.8342 data: 2.1751 max mem: 57114 Epoch: [225] [ 10/156] eta: 0:02:14 lr: 0.001333 min_lr: 0.001333 loss: 2.8113 (2.7125) weight_decay: 0.0500 (0.0500) time: 0.9186 data: 0.1980 max mem: 57114 Epoch: [225] [ 20/156] eta: 0:01:53 lr: 0.001331 min_lr: 0.001331 loss: 2.9085 (2.8530) weight_decay: 0.0500 (0.0500) time: 0.7329 data: 0.0004 max mem: 57114 Epoch: [225] [ 30/156] eta: 0:01:40 lr: 0.001329 min_lr: 0.001329 loss: 3.0886 (2.9043) weight_decay: 0.0500 (0.0500) time: 0.7302 data: 0.0004 max mem: 57114 Epoch: [225] [ 40/156] eta: 0:01:29 lr: 0.001327 min_lr: 0.001327 loss: 3.0302 (2.9106) weight_decay: 0.0500 (0.0500) time: 0.7126 data: 0.0004 max mem: 57114 Epoch: [225] [ 50/156] eta: 0:01:21 lr: 0.001325 min_lr: 0.001325 loss: 2.7650 (2.8657) weight_decay: 0.0500 (0.0500) time: 0.7135 data: 0.0004 max mem: 57114 Epoch: [225] [ 60/156] eta: 0:01:12 lr: 0.001323 min_lr: 0.001323 loss: 2.8073 (2.8524) weight_decay: 0.0500 (0.0500) time: 0.7210 data: 0.0003 max mem: 57114 Epoch: [225] [ 70/156] eta: 0:01:04 lr: 0.001320 min_lr: 0.001320 loss: 2.9514 (2.8577) weight_decay: 0.0500 (0.0500) time: 0.7180 data: 0.0004 max mem: 57114 Epoch: [225] [ 80/156] eta: 0:00:56 lr: 0.001318 min_lr: 0.001318 loss: 2.9780 (2.8660) weight_decay: 0.0500 (0.0500) time: 0.7191 data: 0.0004 max mem: 57114 Epoch: [225] [ 90/156] eta: 0:00:49 lr: 0.001316 min_lr: 0.001316 loss: 2.8846 (2.8768) weight_decay: 0.0500 (0.0500) time: 0.7133 data: 0.0004 max mem: 57114 Epoch: [225] [100/156] eta: 0:00:41 lr: 0.001314 min_lr: 0.001314 loss: 2.9014 (2.8779) weight_decay: 0.0500 (0.0500) time: 0.7101 data: 0.0004 max mem: 57114 Epoch: [225] [110/156] eta: 0:00:33 lr: 0.001312 min_lr: 0.001312 loss: 2.9366 (2.8766) weight_decay: 0.0500 (0.0500) time: 0.7086 data: 0.0004 max mem: 57114 Epoch: [225] [120/156] eta: 0:00:26 lr: 0.001310 min_lr: 0.001310 loss: 2.9515 (2.8648) weight_decay: 0.0500 (0.0500) time: 0.6967 data: 0.0004 max mem: 57114 Epoch: [225] [130/156] eta: 0:00:18 lr: 0.001308 min_lr: 0.001308 loss: 2.7908 (2.8645) weight_decay: 0.0500 (0.0500) time: 0.6924 data: 0.0010 max mem: 57114 Epoch: [225] [140/156] eta: 0:00:11 lr: 0.001306 min_lr: 0.001306 loss: 2.7908 (2.8537) weight_decay: 0.0500 (0.0500) time: 0.6918 data: 0.0008 max mem: 57114 Epoch: [225] [150/156] eta: 0:00:04 lr: 0.001303 min_lr: 0.001303 loss: 2.9919 (2.8686) weight_decay: 0.0500 (0.0500) time: 0.6843 data: 0.0001 max mem: 57114 Epoch: [225] [155/156] eta: 0:00:00 lr: 0.001302 min_lr: 0.001302 loss: 2.9838 (2.8677) weight_decay: 0.0500 (0.0500) time: 0.6820 data: 0.0001 max mem: 57114 Epoch: [225] Total time: 0:01:53 (0.7245 s / it) Averaged stats: lr: 0.001302 min_lr: 0.001302 loss: 2.9838 (2.7523) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7345 (0.7345) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.0555 data: 1.7937 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8947 (0.8415) acc1: 81.7708 (83.2481) acc5: 97.9167 (97.3146) time: 0.5618 data: 0.3588 max mem: 57114 Test: Total time: 0:00:02 (0.5854 s / it) * Acc@1 83.134 Acc@5 96.537 loss 0.929 Accuracy of the model on the 50000 test images: 83.1% Max accuracy: 83.23% Test: [0/5] eta: 0:00:12 loss: 0.5866 (0.5866) acc1: 83.8542 (83.8542) acc5: 97.3958 (97.3958) time: 2.4083 data: 2.1646 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6933 (0.6729) acc1: 81.7708 (79.4118) acc5: 96.8750 (94.3734) time: 0.6324 data: 0.4331 max mem: 57114 Test: Total time: 0:00:03 (0.6448 s / it) * Acc@1 77.470 Acc@5 93.958 loss 0.812 Accuracy of the model EMA on 50000 test images: 77.5% Max EMA accuracy: 77.47% Epoch: [226] [ 0/156] eta: 0:06:29 lr: 0.001302 min_lr: 0.001302 loss: 3.0514 (3.0514) weight_decay: 0.0500 (0.0500) time: 2.4954 data: 1.8428 max mem: 57114 Epoch: [226] [ 10/156] eta: 0:02:09 lr: 0.001300 min_lr: 0.001300 loss: 2.8489 (2.7058) weight_decay: 0.0500 (0.0500) time: 0.8901 data: 0.1679 max mem: 57114 Epoch: [226] [ 20/156] eta: 0:01:49 lr: 0.001298 min_lr: 0.001298 loss: 2.7731 (2.6387) weight_decay: 0.0500 (0.0500) time: 0.7210 data: 0.0004 max mem: 57114 Epoch: [226] [ 30/156] eta: 0:01:38 lr: 0.001296 min_lr: 0.001296 loss: 2.7007 (2.6587) weight_decay: 0.0500 (0.0500) time: 0.7183 data: 0.0005 max mem: 57114 Epoch: [226] [ 40/156] eta: 0:01:29 lr: 0.001294 min_lr: 0.001294 loss: 2.6164 (2.6408) weight_decay: 0.0500 (0.0500) time: 0.7314 data: 0.0005 max mem: 57114 Epoch: [226] [ 50/156] eta: 0:01:20 lr: 0.001292 min_lr: 0.001292 loss: 2.6164 (2.6455) weight_decay: 0.0500 (0.0500) time: 0.7268 data: 0.0004 max mem: 57114 Epoch: [226] [ 60/156] eta: 0:01:12 lr: 0.001289 min_lr: 0.001289 loss: 2.7531 (2.6716) weight_decay: 0.0500 (0.0500) time: 0.7216 data: 0.0004 max mem: 57114 Epoch: [226] [ 70/156] eta: 0:01:04 lr: 0.001287 min_lr: 0.001287 loss: 2.7531 (2.6712) weight_decay: 0.0500 (0.0500) time: 0.7248 data: 0.0004 max mem: 57114 Epoch: [226] [ 80/156] eta: 0:00:56 lr: 0.001285 min_lr: 0.001285 loss: 2.8228 (2.6750) weight_decay: 0.0500 (0.0500) time: 0.7146 data: 0.0003 max mem: 57114 Epoch: [226] [ 90/156] eta: 0:00:48 lr: 0.001283 min_lr: 0.001283 loss: 2.7391 (2.6736) weight_decay: 0.0500 (0.0500) time: 0.7032 data: 0.0004 max mem: 57114 Epoch: [226] [100/156] eta: 0:00:41 lr: 0.001281 min_lr: 0.001281 loss: 2.6262 (2.6656) weight_decay: 0.0500 (0.0500) time: 0.6990 data: 0.0003 max mem: 57114 Epoch: [226] [110/156] eta: 0:00:33 lr: 0.001279 min_lr: 0.001279 loss: 2.7728 (2.6670) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0003 max mem: 57114 Epoch: [226] [120/156] eta: 0:00:26 lr: 0.001277 min_lr: 0.001277 loss: 2.7430 (2.6669) weight_decay: 0.0500 (0.0500) time: 0.7074 data: 0.0003 max mem: 57114 Epoch: [226] [130/156] eta: 0:00:18 lr: 0.001275 min_lr: 0.001275 loss: 2.7430 (2.6693) weight_decay: 0.0500 (0.0500) time: 0.6989 data: 0.0007 max mem: 57114 Epoch: [226] [140/156] eta: 0:00:11 lr: 0.001273 min_lr: 0.001273 loss: 2.4783 (2.6474) weight_decay: 0.0500 (0.0500) time: 0.6935 data: 0.0006 max mem: 57114 Epoch: [226] [150/156] eta: 0:00:04 lr: 0.001270 min_lr: 0.001270 loss: 2.4783 (2.6509) weight_decay: 0.0500 (0.0500) time: 0.6883 data: 0.0001 max mem: 57114 Epoch: [226] [155/156] eta: 0:00:00 lr: 0.001269 min_lr: 0.001269 loss: 2.7925 (2.6527) weight_decay: 0.0500 (0.0500) time: 0.6851 data: 0.0001 max mem: 57114 Epoch: [226] Total time: 0:01:52 (0.7236 s / it) Averaged stats: lr: 0.001269 min_lr: 0.001269 loss: 2.7925 (2.7450) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6153 (0.6153) acc1: 89.0625 (89.0625) acc5: 99.4792 (99.4792) time: 2.0558 data: 1.7994 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8323 (0.7271) acc1: 82.2917 (82.6087) acc5: 96.8750 (96.5473) time: 0.5619 data: 0.3600 max mem: 57114 Test: Total time: 0:00:02 (0.5830 s / it) * Acc@1 83.236 Acc@5 96.483 loss 0.789 Accuracy of the model on the 50000 test images: 83.2% Max accuracy: 83.24% Test: [0/5] eta: 0:00:10 loss: 0.5868 (0.5868) acc1: 83.8542 (83.8542) acc5: 97.3958 (97.3958) time: 2.0700 data: 1.8263 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6929 (0.6728) acc1: 81.7708 (79.4118) acc5: 96.8750 (94.3734) time: 0.5647 data: 0.3654 max mem: 57114 Test: Total time: 0:00:02 (0.5755 s / it) * Acc@1 77.472 Acc@5 93.954 loss 0.812 Accuracy of the model EMA on 50000 test images: 77.5% Max EMA accuracy: 77.47% Epoch: [227] [ 0/156] eta: 0:07:26 lr: 0.001269 min_lr: 0.001269 loss: 2.5881 (2.5881) weight_decay: 0.0500 (0.0500) time: 2.8606 data: 2.2140 max mem: 57114 Epoch: [227] [ 10/156] eta: 0:02:14 lr: 0.001267 min_lr: 0.001267 loss: 2.7772 (2.7492) weight_decay: 0.0500 (0.0500) time: 0.9199 data: 0.2015 max mem: 57114 Epoch: [227] [ 20/156] eta: 0:01:53 lr: 0.001265 min_lr: 0.001265 loss: 2.8620 (2.8197) weight_decay: 0.0500 (0.0500) time: 0.7299 data: 0.0003 max mem: 57114 Epoch: [227] [ 30/156] eta: 0:01:39 lr: 0.001263 min_lr: 0.001263 loss: 2.8128 (2.7727) weight_decay: 0.0500 (0.0500) time: 0.7226 data: 0.0003 max mem: 57114 Epoch: [227] [ 40/156] eta: 0:01:30 lr: 0.001261 min_lr: 0.001261 loss: 2.6927 (2.7295) weight_decay: 0.0500 (0.0500) time: 0.7287 data: 0.0003 max mem: 57114 Epoch: [227] [ 50/156] eta: 0:01:21 lr: 0.001259 min_lr: 0.001259 loss: 2.7533 (2.7220) weight_decay: 0.0500 (0.0500) time: 0.7219 data: 0.0004 max mem: 57114 Epoch: [227] [ 60/156] eta: 0:01:13 lr: 0.001257 min_lr: 0.001257 loss: 2.8414 (2.7330) weight_decay: 0.0500 (0.0500) time: 0.7239 data: 0.0004 max mem: 57114 Epoch: [227] [ 70/156] eta: 0:01:05 lr: 0.001255 min_lr: 0.001255 loss: 2.9050 (2.7672) weight_decay: 0.0500 (0.0500) time: 0.7432 data: 0.0004 max mem: 57114 Epoch: [227] [ 80/156] eta: 0:00:57 lr: 0.001252 min_lr: 0.001252 loss: 2.9535 (2.7538) weight_decay: 0.0500 (0.0500) time: 0.7359 data: 0.0004 max mem: 57114 Epoch: [227] [ 90/156] eta: 0:00:49 lr: 0.001250 min_lr: 0.001250 loss: 2.9103 (2.7530) weight_decay: 0.0500 (0.0500) time: 0.7224 data: 0.0004 max mem: 57114 Epoch: [227] [100/156] eta: 0:00:41 lr: 0.001248 min_lr: 0.001248 loss: 2.9317 (2.7512) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0004 max mem: 57114 Epoch: [227] [110/156] eta: 0:00:34 lr: 0.001246 min_lr: 0.001246 loss: 2.8120 (2.7461) weight_decay: 0.0500 (0.0500) time: 0.7031 data: 0.0004 max mem: 57114 Epoch: [227] [120/156] eta: 0:00:26 lr: 0.001244 min_lr: 0.001244 loss: 2.7173 (2.7274) weight_decay: 0.0500 (0.0500) time: 0.7026 data: 0.0004 max mem: 57114 Epoch: [227] [130/156] eta: 0:00:19 lr: 0.001242 min_lr: 0.001242 loss: 2.4229 (2.7171) weight_decay: 0.0500 (0.0500) time: 0.6980 data: 0.0008 max mem: 57114 Epoch: [227] [140/156] eta: 0:00:11 lr: 0.001240 min_lr: 0.001240 loss: 2.8172 (2.7160) weight_decay: 0.0500 (0.0500) time: 0.6911 data: 0.0007 max mem: 57114 Epoch: [227] [150/156] eta: 0:00:04 lr: 0.001238 min_lr: 0.001238 loss: 2.8604 (2.7243) weight_decay: 0.0500 (0.0500) time: 0.6870 data: 0.0001 max mem: 57114 Epoch: [227] [155/156] eta: 0:00:00 lr: 0.001237 min_lr: 0.001237 loss: 2.8295 (2.7315) weight_decay: 0.0500 (0.0500) time: 0.6870 data: 0.0001 max mem: 57114 Epoch: [227] Total time: 0:01:53 (0.7299 s / it) Averaged stats: lr: 0.001237 min_lr: 0.001237 loss: 2.8295 (2.7354) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6415 (0.6415) acc1: 90.1042 (90.1042) acc5: 98.9583 (98.9583) time: 2.0464 data: 1.7903 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7984 (0.7832) acc1: 84.8958 (83.6317) acc5: 97.3958 (96.9310) time: 0.5600 data: 0.3581 max mem: 57114 Test: Total time: 0:00:02 (0.5802 s / it) * Acc@1 83.168 Acc@5 96.533 loss 0.873 Accuracy of the model on the 50000 test images: 83.2% Max accuracy: 83.24% Test: [0/5] eta: 0:00:11 loss: 0.5871 (0.5871) acc1: 83.8542 (83.8542) acc5: 97.3958 (97.3958) time: 2.2728 data: 2.0294 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6923 (0.6725) acc1: 81.7708 (79.5396) acc5: 96.8750 (94.3734) time: 0.6052 data: 0.4060 max mem: 57114 Test: Total time: 0:00:03 (0.6161 s / it) * Acc@1 77.478 Acc@5 93.946 loss 0.811 Accuracy of the model EMA on 50000 test images: 77.5% Max EMA accuracy: 77.48% Epoch: [228] [ 0/156] eta: 0:08:21 lr: 0.001237 min_lr: 0.001237 loss: 2.4061 (2.4061) weight_decay: 0.0500 (0.0500) time: 3.2153 data: 2.5622 max mem: 57114 Epoch: [228] [ 10/156] eta: 0:02:20 lr: 0.001235 min_lr: 0.001235 loss: 2.9539 (2.7955) weight_decay: 0.0500 (0.0500) time: 0.9606 data: 0.2332 max mem: 57114 Epoch: [228] [ 20/156] eta: 0:01:52 lr: 0.001232 min_lr: 0.001232 loss: 3.0168 (2.7747) weight_decay: 0.0500 (0.0500) time: 0.7100 data: 0.0003 max mem: 57114 Epoch: [228] [ 30/156] eta: 0:01:40 lr: 0.001230 min_lr: 0.001230 loss: 2.9454 (2.7993) weight_decay: 0.0500 (0.0500) time: 0.7065 data: 0.0004 max mem: 57114 Epoch: [228] [ 40/156] eta: 0:01:30 lr: 0.001228 min_lr: 0.001228 loss: 3.0014 (2.8667) weight_decay: 0.0500 (0.0500) time: 0.7324 data: 0.0005 max mem: 57114 Epoch: [228] [ 50/156] eta: 0:01:21 lr: 0.001226 min_lr: 0.001226 loss: 2.9682 (2.8324) weight_decay: 0.0500 (0.0500) time: 0.7376 data: 0.0003 max mem: 57114 Epoch: [228] [ 60/156] eta: 0:01:13 lr: 0.001224 min_lr: 0.001224 loss: 2.9210 (2.8520) weight_decay: 0.0500 (0.0500) time: 0.7263 data: 0.0004 max mem: 57114 Epoch: [228] [ 70/156] eta: 0:01:05 lr: 0.001222 min_lr: 0.001222 loss: 2.8650 (2.8279) weight_decay: 0.0500 (0.0500) time: 0.7187 data: 0.0004 max mem: 57114 Epoch: [228] [ 80/156] eta: 0:00:57 lr: 0.001220 min_lr: 0.001220 loss: 2.4848 (2.7655) weight_decay: 0.0500 (0.0500) time: 0.7136 data: 0.0004 max mem: 57114 Epoch: [228] [ 90/156] eta: 0:00:49 lr: 0.001218 min_lr: 0.001218 loss: 2.7752 (2.7877) weight_decay: 0.0500 (0.0500) time: 0.7004 data: 0.0004 max mem: 57114 Epoch: [228] [100/156] eta: 0:00:41 lr: 0.001216 min_lr: 0.001216 loss: 2.7752 (2.7619) weight_decay: 0.0500 (0.0500) time: 0.7052 data: 0.0004 max mem: 57114 Epoch: [228] [110/156] eta: 0:00:33 lr: 0.001214 min_lr: 0.001214 loss: 2.5209 (2.7535) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0003 max mem: 57114 Epoch: [228] [120/156] eta: 0:00:26 lr: 0.001212 min_lr: 0.001212 loss: 2.8213 (2.7431) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [228] [130/156] eta: 0:00:19 lr: 0.001210 min_lr: 0.001210 loss: 2.8547 (2.7538) weight_decay: 0.0500 (0.0500) time: 0.6994 data: 0.0009 max mem: 57114 Epoch: [228] [140/156] eta: 0:00:11 lr: 0.001208 min_lr: 0.001208 loss: 2.8882 (2.7527) weight_decay: 0.0500 (0.0500) time: 0.6899 data: 0.0008 max mem: 57114 Epoch: [228] [150/156] eta: 0:00:04 lr: 0.001206 min_lr: 0.001206 loss: 2.5569 (2.7290) weight_decay: 0.0500 (0.0500) time: 0.6844 data: 0.0001 max mem: 57114 Epoch: [228] [155/156] eta: 0:00:00 lr: 0.001205 min_lr: 0.001205 loss: 2.7336 (2.7368) weight_decay: 0.0500 (0.0500) time: 0.6782 data: 0.0001 max mem: 57114 Epoch: [228] Total time: 0:01:53 (0.7270 s / it) Averaged stats: lr: 0.001205 min_lr: 0.001205 loss: 2.7336 (2.7309) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5877 (0.5877) acc1: 90.6250 (90.6250) acc5: 98.9583 (98.9583) time: 2.1104 data: 1.8522 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7884 (0.7434) acc1: 83.3333 (83.3760) acc5: 97.9167 (97.4425) time: 0.5728 data: 0.3705 max mem: 57114 Test: Total time: 0:00:02 (0.5953 s / it) * Acc@1 83.434 Acc@5 96.565 loss 0.824 Accuracy of the model on the 50000 test images: 83.4% Max accuracy: 83.43% Test: [0/5] eta: 0:00:09 loss: 0.5874 (0.5874) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 1.9865 data: 1.7430 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6918 (0.6725) acc1: 81.7708 (79.6675) acc5: 96.8750 (94.3734) time: 0.5479 data: 0.3487 max mem: 57114 Test: Total time: 0:00:02 (0.5598 s / it) * Acc@1 77.490 Acc@5 93.938 loss 0.811 Accuracy of the model EMA on 50000 test images: 77.5% Max EMA accuracy: 77.49% Epoch: [229] [ 0/156] eta: 0:07:19 lr: 0.001204 min_lr: 0.001204 loss: 2.3187 (2.3187) weight_decay: 0.0500 (0.0500) time: 2.8182 data: 2.0471 max mem: 57114 Epoch: [229] [ 10/156] eta: 0:02:16 lr: 0.001202 min_lr: 0.001202 loss: 2.8693 (2.6634) weight_decay: 0.0500 (0.0500) time: 0.9367 data: 0.1864 max mem: 57114 Epoch: [229] [ 20/156] eta: 0:01:51 lr: 0.001200 min_lr: 0.001200 loss: 2.8612 (2.6681) weight_decay: 0.0500 (0.0500) time: 0.7203 data: 0.0004 max mem: 57114 Epoch: [229] [ 30/156] eta: 0:01:39 lr: 0.001198 min_lr: 0.001198 loss: 2.8835 (2.7570) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0003 max mem: 57114 Epoch: [229] [ 40/156] eta: 0:01:30 lr: 0.001196 min_lr: 0.001196 loss: 2.8746 (2.6925) weight_decay: 0.0500 (0.0500) time: 0.7446 data: 0.0004 max mem: 57114 Epoch: [229] [ 50/156] eta: 0:01:21 lr: 0.001194 min_lr: 0.001194 loss: 2.7178 (2.6852) weight_decay: 0.0500 (0.0500) time: 0.7496 data: 0.0004 max mem: 57114 Epoch: [229] [ 60/156] eta: 0:01:13 lr: 0.001192 min_lr: 0.001192 loss: 2.6739 (2.6834) weight_decay: 0.0500 (0.0500) time: 0.7269 data: 0.0004 max mem: 57114 Epoch: [229] [ 70/156] eta: 0:01:05 lr: 0.001190 min_lr: 0.001190 loss: 2.6663 (2.6825) weight_decay: 0.0500 (0.0500) time: 0.7238 data: 0.0004 max mem: 57114 Epoch: [229] [ 80/156] eta: 0:00:57 lr: 0.001188 min_lr: 0.001188 loss: 2.8275 (2.7060) weight_decay: 0.0500 (0.0500) time: 0.7163 data: 0.0004 max mem: 57114 Epoch: [229] [ 90/156] eta: 0:00:49 lr: 0.001186 min_lr: 0.001186 loss: 2.7159 (2.6816) weight_decay: 0.0500 (0.0500) time: 0.7091 data: 0.0004 max mem: 57114 Epoch: [229] [100/156] eta: 0:00:41 lr: 0.001184 min_lr: 0.001184 loss: 2.6210 (2.6976) weight_decay: 0.0500 (0.0500) time: 0.7116 data: 0.0005 max mem: 57114 Epoch: [229] [110/156] eta: 0:00:34 lr: 0.001182 min_lr: 0.001182 loss: 2.8553 (2.7011) weight_decay: 0.0500 (0.0500) time: 0.7103 data: 0.0004 max mem: 57114 Epoch: [229] [120/156] eta: 0:00:26 lr: 0.001180 min_lr: 0.001180 loss: 2.8205 (2.7146) weight_decay: 0.0500 (0.0500) time: 0.7121 data: 0.0004 max mem: 57114 Epoch: [229] [130/156] eta: 0:00:19 lr: 0.001178 min_lr: 0.001178 loss: 2.7388 (2.7008) weight_decay: 0.0500 (0.0500) time: 0.7025 data: 0.0008 max mem: 57114 Epoch: [229] [140/156] eta: 0:00:11 lr: 0.001176 min_lr: 0.001176 loss: 2.7500 (2.7001) weight_decay: 0.0500 (0.0500) time: 0.6802 data: 0.0007 max mem: 57114 Epoch: [229] [150/156] eta: 0:00:04 lr: 0.001174 min_lr: 0.001174 loss: 2.7500 (2.6953) weight_decay: 0.0500 (0.0500) time: 0.6849 data: 0.0001 max mem: 57114 Epoch: [229] [155/156] eta: 0:00:00 lr: 0.001173 min_lr: 0.001173 loss: 2.7640 (2.6939) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.0001 max mem: 57114 Epoch: [229] Total time: 0:01:53 (0.7296 s / it) Averaged stats: lr: 0.001173 min_lr: 0.001173 loss: 2.7640 (2.7270) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6648 (0.6648) acc1: 89.0625 (89.0625) acc5: 98.9583 (98.9583) time: 2.0688 data: 1.8128 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8269 (0.7945) acc1: 83.3333 (83.6317) acc5: 97.9167 (97.0588) time: 0.5644 data: 0.3626 max mem: 57114 Test: Total time: 0:00:02 (0.5887 s / it) * Acc@1 83.398 Acc@5 96.535 loss 0.871 Accuracy of the model on the 50000 test images: 83.4% Max accuracy: 83.43% Test: [0/5] eta: 0:00:12 loss: 0.5878 (0.5878) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.4001 data: 2.1566 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6915 (0.6726) acc1: 81.7708 (79.6675) acc5: 96.8750 (94.2455) time: 0.6307 data: 0.4314 max mem: 57114 Test: Total time: 0:00:03 (0.6419 s / it) * Acc@1 77.504 Acc@5 93.944 loss 0.811 Accuracy of the model EMA on 50000 test images: 77.5% Max EMA accuracy: 77.50% Epoch: [230] [ 0/156] eta: 0:07:55 lr: 0.001172 min_lr: 0.001172 loss: 2.0727 (2.0727) weight_decay: 0.0500 (0.0500) time: 3.0483 data: 2.3952 max mem: 57114 Epoch: [230] [ 10/156] eta: 0:02:19 lr: 0.001170 min_lr: 0.001170 loss: 2.3149 (2.4736) weight_decay: 0.0500 (0.0500) time: 0.9589 data: 0.2181 max mem: 57114 Epoch: [230] [ 20/156] eta: 0:01:54 lr: 0.001168 min_lr: 0.001168 loss: 2.7766 (2.5862) weight_decay: 0.0500 (0.0500) time: 0.7341 data: 0.0004 max mem: 57114 Epoch: [230] [ 30/156] eta: 0:01:42 lr: 0.001166 min_lr: 0.001166 loss: 2.7795 (2.6346) weight_decay: 0.0500 (0.0500) time: 0.7290 data: 0.0004 max mem: 57114 Epoch: [230] [ 40/156] eta: 0:01:31 lr: 0.001164 min_lr: 0.001164 loss: 2.7795 (2.6464) weight_decay: 0.0500 (0.0500) time: 0.7295 data: 0.0004 max mem: 57114 Epoch: [230] [ 50/156] eta: 0:01:22 lr: 0.001162 min_lr: 0.001162 loss: 2.5921 (2.6178) weight_decay: 0.0500 (0.0500) time: 0.7177 data: 0.0004 max mem: 57114 Epoch: [230] [ 60/156] eta: 0:01:13 lr: 0.001160 min_lr: 0.001160 loss: 2.7835 (2.6629) weight_decay: 0.0500 (0.0500) time: 0.7149 data: 0.0003 max mem: 57114 Epoch: [230] [ 70/156] eta: 0:01:05 lr: 0.001158 min_lr: 0.001158 loss: 2.8188 (2.6664) weight_decay: 0.0500 (0.0500) time: 0.7147 data: 0.0004 max mem: 57114 Epoch: [230] [ 80/156] eta: 0:00:57 lr: 0.001156 min_lr: 0.001156 loss: 2.8600 (2.6948) weight_decay: 0.0500 (0.0500) time: 0.7184 data: 0.0004 max mem: 57114 Epoch: [230] [ 90/156] eta: 0:00:49 lr: 0.001154 min_lr: 0.001154 loss: 2.8600 (2.6768) weight_decay: 0.0500 (0.0500) time: 0.7155 data: 0.0004 max mem: 57114 Epoch: [230] [100/156] eta: 0:00:41 lr: 0.001152 min_lr: 0.001152 loss: 2.2587 (2.6483) weight_decay: 0.0500 (0.0500) time: 0.7168 data: 0.0004 max mem: 57114 Epoch: [230] [110/156] eta: 0:00:34 lr: 0.001150 min_lr: 0.001150 loss: 2.5822 (2.6714) weight_decay: 0.0500 (0.0500) time: 0.7163 data: 0.0004 max mem: 57114 Epoch: [230] [120/156] eta: 0:00:26 lr: 0.001148 min_lr: 0.001148 loss: 2.8271 (2.6564) weight_decay: 0.0500 (0.0500) time: 0.7089 data: 0.0003 max mem: 57114 Epoch: [230] [130/156] eta: 0:00:19 lr: 0.001146 min_lr: 0.001146 loss: 2.5110 (2.6569) weight_decay: 0.0500 (0.0500) time: 0.7009 data: 0.0004 max mem: 57114 Epoch: [230] [140/156] eta: 0:00:11 lr: 0.001144 min_lr: 0.001144 loss: 2.7200 (2.6523) weight_decay: 0.0500 (0.0500) time: 0.6856 data: 0.0003 max mem: 57114 Epoch: [230] [150/156] eta: 0:00:04 lr: 0.001142 min_lr: 0.001142 loss: 2.7369 (2.6686) weight_decay: 0.0500 (0.0500) time: 0.6782 data: 0.0001 max mem: 57114 Epoch: [230] [155/156] eta: 0:00:00 lr: 0.001141 min_lr: 0.001141 loss: 2.8375 (2.6777) weight_decay: 0.0500 (0.0500) time: 0.6841 data: 0.0001 max mem: 57114 Epoch: [230] Total time: 0:01:53 (0.7295 s / it) Averaged stats: lr: 0.001141 min_lr: 0.001141 loss: 2.8375 (2.7189) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6953 (0.6953) acc1: 92.1875 (92.1875) acc5: 98.9583 (98.9583) time: 2.0208 data: 1.7647 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8849 (0.8505) acc1: 82.2917 (84.0153) acc5: 97.9167 (97.3146) time: 0.5549 data: 0.3530 max mem: 57114 Test: Total time: 0:00:02 (0.5770 s / it) * Acc@1 83.542 Acc@5 96.535 loss 0.931 Accuracy of the model on the 50000 test images: 83.5% Max accuracy: 83.54% Test: [0/5] eta: 0:00:10 loss: 0.5882 (0.5882) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.0368 data: 1.7931 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6911 (0.6728) acc1: 81.7708 (79.6675) acc5: 96.8750 (94.2455) time: 0.5581 data: 0.3587 max mem: 57114 Test: Total time: 0:00:02 (0.5679 s / it) * Acc@1 77.512 Acc@5 93.962 loss 0.811 Accuracy of the model EMA on 50000 test images: 77.5% Max EMA accuracy: 77.51% Epoch: [231] [ 0/156] eta: 0:07:10 lr: 0.001141 min_lr: 0.001141 loss: 1.9720 (1.9720) weight_decay: 0.0500 (0.0500) time: 2.7624 data: 1.9905 max mem: 57114 Epoch: [231] [ 10/156] eta: 0:02:13 lr: 0.001139 min_lr: 0.001139 loss: 2.6790 (2.7157) weight_decay: 0.0500 (0.0500) time: 0.9149 data: 0.1813 max mem: 57114 Epoch: [231] [ 20/156] eta: 0:01:54 lr: 0.001137 min_lr: 0.001137 loss: 2.6790 (2.6868) weight_decay: 0.0500 (0.0500) time: 0.7423 data: 0.0004 max mem: 57114 Epoch: [231] [ 30/156] eta: 0:01:42 lr: 0.001135 min_lr: 0.001135 loss: 2.7280 (2.7249) weight_decay: 0.0500 (0.0500) time: 0.7533 data: 0.0004 max mem: 57114 Epoch: [231] [ 40/156] eta: 0:01:32 lr: 0.001133 min_lr: 0.001133 loss: 2.7280 (2.7016) weight_decay: 0.0500 (0.0500) time: 0.7471 data: 0.0004 max mem: 57114 Epoch: [231] [ 50/156] eta: 0:01:22 lr: 0.001131 min_lr: 0.001131 loss: 2.6007 (2.6756) weight_decay: 0.0500 (0.0500) time: 0.7282 data: 0.0004 max mem: 57114 Epoch: [231] [ 60/156] eta: 0:01:13 lr: 0.001129 min_lr: 0.001129 loss: 2.6512 (2.6906) weight_decay: 0.0500 (0.0500) time: 0.7175 data: 0.0004 max mem: 57114 Epoch: [231] [ 70/156] eta: 0:01:05 lr: 0.001127 min_lr: 0.001127 loss: 2.6402 (2.6623) weight_decay: 0.0500 (0.0500) time: 0.7168 data: 0.0004 max mem: 57114 Epoch: [231] [ 80/156] eta: 0:00:57 lr: 0.001125 min_lr: 0.001125 loss: 2.5838 (2.6602) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0003 max mem: 57114 Epoch: [231] [ 90/156] eta: 0:00:49 lr: 0.001123 min_lr: 0.001123 loss: 2.7115 (2.6649) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0004 max mem: 57114 Epoch: [231] [100/156] eta: 0:00:41 lr: 0.001121 min_lr: 0.001121 loss: 2.7751 (2.6562) weight_decay: 0.0500 (0.0500) time: 0.7118 data: 0.0004 max mem: 57114 Epoch: [231] [110/156] eta: 0:00:34 lr: 0.001119 min_lr: 0.001119 loss: 2.7658 (2.6609) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0004 max mem: 57114 Epoch: [231] [120/156] eta: 0:00:26 lr: 0.001117 min_lr: 0.001117 loss: 2.8429 (2.6775) weight_decay: 0.0500 (0.0500) time: 0.6991 data: 0.0004 max mem: 57114 Epoch: [231] [130/156] eta: 0:00:19 lr: 0.001115 min_lr: 0.001115 loss: 2.8429 (2.6892) weight_decay: 0.0500 (0.0500) time: 0.6898 data: 0.0008 max mem: 57114 Epoch: [231] [140/156] eta: 0:00:11 lr: 0.001113 min_lr: 0.001113 loss: 2.8105 (2.6900) weight_decay: 0.0500 (0.0500) time: 0.6798 data: 0.0007 max mem: 57114 Epoch: [231] [150/156] eta: 0:00:04 lr: 0.001111 min_lr: 0.001111 loss: 2.6972 (2.6862) weight_decay: 0.0500 (0.0500) time: 0.6794 data: 0.0001 max mem: 57114 Epoch: [231] [155/156] eta: 0:00:00 lr: 0.001110 min_lr: 0.001110 loss: 2.7322 (2.6935) weight_decay: 0.0500 (0.0500) time: 0.6804 data: 0.0001 max mem: 57114 Epoch: [231] Total time: 0:01:53 (0.7278 s / it) Averaged stats: lr: 0.001110 min_lr: 0.001110 loss: 2.7322 (2.7261) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6132 (0.6132) acc1: 89.5833 (89.5833) acc5: 98.9583 (98.9583) time: 2.0770 data: 1.8208 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7791 (0.7553) acc1: 84.8958 (84.2711) acc5: 97.9167 (97.6982) time: 0.5661 data: 0.3642 max mem: 57114 Test: Total time: 0:00:02 (0.5896 s / it) * Acc@1 83.410 Acc@5 96.559 loss 0.841 Accuracy of the model on the 50000 test images: 83.4% Max accuracy: 83.54% Test: [0/5] eta: 0:00:11 loss: 0.5885 (0.5885) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2478 data: 2.0042 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6907 (0.6729) acc1: 81.7708 (79.6675) acc5: 96.8750 (94.2455) time: 0.6002 data: 0.4009 max mem: 57114 Test: Total time: 0:00:03 (0.6121 s / it) * Acc@1 77.516 Acc@5 93.954 loss 0.812 Accuracy of the model EMA on 50000 test images: 77.5% Max EMA accuracy: 77.52% Epoch: [232] [ 0/156] eta: 0:07:07 lr: 0.001110 min_lr: 0.001110 loss: 2.7848 (2.7848) weight_decay: 0.0500 (0.0500) time: 2.7427 data: 2.0883 max mem: 57114 Epoch: [232] [ 10/156] eta: 0:02:15 lr: 0.001108 min_lr: 0.001108 loss: 2.6979 (2.5724) weight_decay: 0.0500 (0.0500) time: 0.9280 data: 0.1901 max mem: 57114 Epoch: [232] [ 20/156] eta: 0:01:51 lr: 0.001106 min_lr: 0.001106 loss: 2.6979 (2.6688) weight_decay: 0.0500 (0.0500) time: 0.7259 data: 0.0002 max mem: 57114 Epoch: [232] [ 30/156] eta: 0:01:39 lr: 0.001104 min_lr: 0.001104 loss: 2.7527 (2.6980) weight_decay: 0.0500 (0.0500) time: 0.7197 data: 0.0003 max mem: 57114 Epoch: [232] [ 40/156] eta: 0:01:29 lr: 0.001102 min_lr: 0.001102 loss: 2.7527 (2.6874) weight_decay: 0.0500 (0.0500) time: 0.7262 data: 0.0003 max mem: 57114 Epoch: [232] [ 50/156] eta: 0:01:20 lr: 0.001100 min_lr: 0.001100 loss: 2.8026 (2.7023) weight_decay: 0.0500 (0.0500) time: 0.7179 data: 0.0004 max mem: 57114 Epoch: [232] [ 60/156] eta: 0:01:12 lr: 0.001098 min_lr: 0.001098 loss: 2.7688 (2.7096) weight_decay: 0.0500 (0.0500) time: 0.7273 data: 0.0004 max mem: 57114 Epoch: [232] [ 70/156] eta: 0:01:04 lr: 0.001096 min_lr: 0.001096 loss: 2.8857 (2.7321) weight_decay: 0.0500 (0.0500) time: 0.7322 data: 0.0003 max mem: 57114 Epoch: [232] [ 80/156] eta: 0:00:57 lr: 0.001094 min_lr: 0.001094 loss: 2.9388 (2.7560) weight_decay: 0.0500 (0.0500) time: 0.7230 data: 0.0003 max mem: 57114 Epoch: [232] [ 90/156] eta: 0:00:49 lr: 0.001092 min_lr: 0.001092 loss: 3.0140 (2.7661) weight_decay: 0.0500 (0.0500) time: 0.7129 data: 0.0004 max mem: 57114 Epoch: [232] [100/156] eta: 0:00:41 lr: 0.001090 min_lr: 0.001090 loss: 2.7291 (2.7606) weight_decay: 0.0500 (0.0500) time: 0.7018 data: 0.0004 max mem: 57114 Epoch: [232] [110/156] eta: 0:00:33 lr: 0.001088 min_lr: 0.001088 loss: 2.8275 (2.7569) weight_decay: 0.0500 (0.0500) time: 0.6990 data: 0.0004 max mem: 57114 Epoch: [232] [120/156] eta: 0:00:26 lr: 0.001086 min_lr: 0.001086 loss: 2.5624 (2.7381) weight_decay: 0.0500 (0.0500) time: 0.7064 data: 0.0003 max mem: 57114 Epoch: [232] [130/156] eta: 0:00:19 lr: 0.001084 min_lr: 0.001084 loss: 2.3740 (2.7228) weight_decay: 0.0500 (0.0500) time: 0.7055 data: 0.0004 max mem: 57114 Epoch: [232] [140/156] eta: 0:00:11 lr: 0.001082 min_lr: 0.001082 loss: 2.5278 (2.7116) weight_decay: 0.0500 (0.0500) time: 0.6920 data: 0.0003 max mem: 57114 Epoch: [232] [150/156] eta: 0:00:04 lr: 0.001080 min_lr: 0.001080 loss: 2.4782 (2.6949) weight_decay: 0.0500 (0.0500) time: 0.6841 data: 0.0001 max mem: 57114 Epoch: [232] [155/156] eta: 0:00:00 lr: 0.001079 min_lr: 0.001079 loss: 2.4766 (2.6924) weight_decay: 0.0500 (0.0500) time: 0.6837 data: 0.0001 max mem: 57114 Epoch: [232] Total time: 0:01:53 (0.7268 s / it) Averaged stats: lr: 0.001079 min_lr: 0.001079 loss: 2.4766 (2.7124) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5006 (0.5006) acc1: 91.1458 (91.1458) acc5: 98.9583 (98.9583) time: 2.0837 data: 1.8278 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6938 (0.6524) acc1: 84.3750 (84.1432) acc5: 97.9167 (97.5703) time: 0.5675 data: 0.3656 max mem: 57114 Test: Total time: 0:00:02 (0.5901 s / it) * Acc@1 83.630 Acc@5 96.701 loss 0.744 Accuracy of the model on the 50000 test images: 83.6% Max accuracy: 83.63% Test: [0/5] eta: 0:00:10 loss: 0.5888 (0.5888) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.0852 data: 1.8416 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6901 (0.6730) acc1: 81.7708 (79.6675) acc5: 96.8750 (94.2455) time: 0.5677 data: 0.3684 max mem: 57114 Test: Total time: 0:00:02 (0.5815 s / it) * Acc@1 77.524 Acc@5 93.944 loss 0.812 Accuracy of the model EMA on 50000 test images: 77.5% Max EMA accuracy: 77.52% Epoch: [233] [ 0/156] eta: 0:07:10 lr: 0.001079 min_lr: 0.001079 loss: 3.1283 (3.1283) weight_decay: 0.0500 (0.0500) time: 2.7607 data: 2.1086 max mem: 57114 Epoch: [233] [ 10/156] eta: 0:02:11 lr: 0.001077 min_lr: 0.001077 loss: 2.9530 (2.7918) weight_decay: 0.0500 (0.0500) time: 0.9037 data: 0.1920 max mem: 57114 Epoch: [233] [ 20/156] eta: 0:01:50 lr: 0.001075 min_lr: 0.001075 loss: 2.8865 (2.7796) weight_decay: 0.0500 (0.0500) time: 0.7171 data: 0.0004 max mem: 57114 Epoch: [233] [ 30/156] eta: 0:01:38 lr: 0.001073 min_lr: 0.001073 loss: 2.8865 (2.7604) weight_decay: 0.0500 (0.0500) time: 0.7179 data: 0.0003 max mem: 57114 Epoch: [233] [ 40/156] eta: 0:01:29 lr: 0.001071 min_lr: 0.001071 loss: 2.7755 (2.7089) weight_decay: 0.0500 (0.0500) time: 0.7277 data: 0.0004 max mem: 57114 Epoch: [233] [ 50/156] eta: 0:01:21 lr: 0.001069 min_lr: 0.001069 loss: 2.7313 (2.7290) weight_decay: 0.0500 (0.0500) time: 0.7340 data: 0.0004 max mem: 57114 Epoch: [233] [ 60/156] eta: 0:01:13 lr: 0.001067 min_lr: 0.001067 loss: 2.9206 (2.7380) weight_decay: 0.0500 (0.0500) time: 0.7478 data: 0.0004 max mem: 57114 Epoch: [233] [ 70/156] eta: 0:01:05 lr: 0.001065 min_lr: 0.001065 loss: 2.9206 (2.7008) weight_decay: 0.0500 (0.0500) time: 0.7570 data: 0.0004 max mem: 57114 Epoch: [233] [ 80/156] eta: 0:00:57 lr: 0.001063 min_lr: 0.001063 loss: 2.6315 (2.6958) weight_decay: 0.0500 (0.0500) time: 0.7239 data: 0.0004 max mem: 57114 Epoch: [233] [ 90/156] eta: 0:00:49 lr: 0.001061 min_lr: 0.001061 loss: 2.9097 (2.7264) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.0004 max mem: 57114 Epoch: [233] [100/156] eta: 0:00:41 lr: 0.001059 min_lr: 0.001059 loss: 2.9194 (2.7327) weight_decay: 0.0500 (0.0500) time: 0.7081 data: 0.0004 max mem: 57114 Epoch: [233] [110/156] eta: 0:00:34 lr: 0.001057 min_lr: 0.001057 loss: 2.8128 (2.7249) weight_decay: 0.0500 (0.0500) time: 0.7107 data: 0.0004 max mem: 57114 Epoch: [233] [120/156] eta: 0:00:26 lr: 0.001055 min_lr: 0.001055 loss: 2.5488 (2.7035) weight_decay: 0.0500 (0.0500) time: 0.7124 data: 0.0004 max mem: 57114 Epoch: [233] [130/156] eta: 0:00:19 lr: 0.001053 min_lr: 0.001053 loss: 2.4893 (2.6886) weight_decay: 0.0500 (0.0500) time: 0.7017 data: 0.0009 max mem: 57114 Epoch: [233] [140/156] eta: 0:00:11 lr: 0.001052 min_lr: 0.001052 loss: 2.7769 (2.6969) weight_decay: 0.0500 (0.0500) time: 0.6883 data: 0.0007 max mem: 57114 Epoch: [233] [150/156] eta: 0:00:04 lr: 0.001050 min_lr: 0.001050 loss: 2.8902 (2.7018) weight_decay: 0.0500 (0.0500) time: 0.6870 data: 0.0001 max mem: 57114 Epoch: [233] [155/156] eta: 0:00:00 lr: 0.001049 min_lr: 0.001049 loss: 2.7144 (2.6955) weight_decay: 0.0500 (0.0500) time: 0.6828 data: 0.0001 max mem: 57114 Epoch: [233] Total time: 0:01:53 (0.7297 s / it) Averaged stats: lr: 0.001049 min_lr: 0.001049 loss: 2.7144 (2.7089) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5757 (0.5757) acc1: 90.1042 (90.1042) acc5: 98.9583 (98.9583) time: 2.0601 data: 1.8044 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7382 (0.7088) acc1: 83.8542 (84.3990) acc5: 98.4375 (97.6982) time: 0.5628 data: 0.3609 max mem: 57114 Test: Total time: 0:00:02 (0.5835 s / it) * Acc@1 83.558 Acc@5 96.659 loss 0.800 Accuracy of the model on the 50000 test images: 83.6% Max accuracy: 83.63% Test: [0/5] eta: 0:00:11 loss: 0.5893 (0.5893) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2964 data: 2.0530 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6898 (0.6733) acc1: 81.2500 (79.5396) acc5: 96.3542 (94.2455) time: 0.6099 data: 0.4107 max mem: 57114 Test: Total time: 0:00:03 (0.6195 s / it) * Acc@1 77.520 Acc@5 93.934 loss 0.812 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [234] [ 0/156] eta: 0:12:47 lr: 0.001048 min_lr: 0.001048 loss: 2.9810 (2.9810) weight_decay: 0.0500 (0.0500) time: 4.9178 data: 3.8859 max mem: 57114 Epoch: [234] [ 10/156] eta: 0:02:42 lr: 0.001046 min_lr: 0.001046 loss: 2.8095 (2.6829) weight_decay: 0.0500 (0.0500) time: 1.1156 data: 0.3535 max mem: 57114 Epoch: [234] [ 20/156] eta: 0:02:07 lr: 0.001045 min_lr: 0.001045 loss: 2.8095 (2.7556) weight_decay: 0.0500 (0.0500) time: 0.7406 data: 0.0003 max mem: 57114 Epoch: [234] [ 30/156] eta: 0:01:49 lr: 0.001043 min_lr: 0.001043 loss: 2.7805 (2.6971) weight_decay: 0.0500 (0.0500) time: 0.7328 data: 0.0003 max mem: 57114 Epoch: [234] [ 40/156] eta: 0:01:36 lr: 0.001041 min_lr: 0.001041 loss: 2.7622 (2.7169) weight_decay: 0.0500 (0.0500) time: 0.7213 data: 0.0003 max mem: 57114 Epoch: [234] [ 50/156] eta: 0:01:26 lr: 0.001039 min_lr: 0.001039 loss: 2.6819 (2.7063) weight_decay: 0.0500 (0.0500) time: 0.7333 data: 0.0004 max mem: 57114 Epoch: [234] [ 60/156] eta: 0:01:16 lr: 0.001037 min_lr: 0.001037 loss: 2.6419 (2.6786) weight_decay: 0.0500 (0.0500) time: 0.7335 data: 0.0004 max mem: 57114 Epoch: [234] [ 70/156] eta: 0:01:07 lr: 0.001035 min_lr: 0.001035 loss: 2.7928 (2.7033) weight_decay: 0.0500 (0.0500) time: 0.7168 data: 0.0003 max mem: 57114 Epoch: [234] [ 80/156] eta: 0:00:58 lr: 0.001033 min_lr: 0.001033 loss: 2.8357 (2.6891) weight_decay: 0.0500 (0.0500) time: 0.6996 data: 0.0003 max mem: 57114 Epoch: [234] [ 90/156] eta: 0:00:50 lr: 0.001031 min_lr: 0.001031 loss: 2.9105 (2.7078) weight_decay: 0.0500 (0.0500) time: 0.6999 data: 0.0004 max mem: 57114 Epoch: [234] [100/156] eta: 0:00:42 lr: 0.001029 min_lr: 0.001029 loss: 2.9750 (2.7264) weight_decay: 0.0500 (0.0500) time: 0.7137 data: 0.0004 max mem: 57114 Epoch: [234] [110/156] eta: 0:00:34 lr: 0.001027 min_lr: 0.001027 loss: 2.8983 (2.7267) weight_decay: 0.0500 (0.0500) time: 0.7082 data: 0.0004 max mem: 57114 Epoch: [234] [120/156] eta: 0:00:27 lr: 0.001025 min_lr: 0.001025 loss: 2.7532 (2.7157) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0003 max mem: 57114 Epoch: [234] [130/156] eta: 0:00:19 lr: 0.001023 min_lr: 0.001023 loss: 2.5581 (2.7047) weight_decay: 0.0500 (0.0500) time: 0.7046 data: 0.0008 max mem: 57114 Epoch: [234] [140/156] eta: 0:00:11 lr: 0.001021 min_lr: 0.001021 loss: 2.7478 (2.7040) weight_decay: 0.0500 (0.0500) time: 0.6898 data: 0.0007 max mem: 57114 Epoch: [234] [150/156] eta: 0:00:04 lr: 0.001019 min_lr: 0.001019 loss: 2.8099 (2.7073) weight_decay: 0.0500 (0.0500) time: 0.6795 data: 0.0001 max mem: 57114 Epoch: [234] [155/156] eta: 0:00:00 lr: 0.001018 min_lr: 0.001018 loss: 2.8814 (2.7183) weight_decay: 0.0500 (0.0500) time: 0.6820 data: 0.0001 max mem: 57114 Epoch: [234] Total time: 0:01:55 (0.7408 s / it) Averaged stats: lr: 0.001018 min_lr: 0.001018 loss: 2.8814 (2.7028) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.7171 (0.7171) acc1: 91.1458 (91.1458) acc5: 99.4792 (99.4792) time: 2.0317 data: 1.7757 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8750 (0.8755) acc1: 84.3750 (84.1432) acc5: 97.9167 (97.0588) time: 0.5570 data: 0.3552 max mem: 57114 Test: Total time: 0:00:02 (0.5770 s / it) * Acc@1 83.642 Acc@5 96.727 loss 0.953 Accuracy of the model on the 50000 test images: 83.6% Max accuracy: 83.64% Test: [0/5] eta: 0:00:10 loss: 0.5899 (0.5899) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.0158 data: 1.7722 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6897 (0.6738) acc1: 81.2500 (79.5396) acc5: 96.3542 (94.2455) time: 0.5538 data: 0.3545 max mem: 57114 Test: Total time: 0:00:02 (0.5646 s / it) * Acc@1 77.508 Acc@5 93.926 loss 0.813 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [235] [ 0/156] eta: 0:13:53 lr: 0.001018 min_lr: 0.001018 loss: 3.0371 (3.0371) weight_decay: 0.0500 (0.0500) time: 5.3441 data: 4.0995 max mem: 57114 Epoch: [235] [ 10/156] eta: 0:02:48 lr: 0.001016 min_lr: 0.001016 loss: 2.8588 (2.7936) weight_decay: 0.0500 (0.0500) time: 1.1520 data: 0.3730 max mem: 57114 Epoch: [235] [ 20/156] eta: 0:02:10 lr: 0.001014 min_lr: 0.001014 loss: 2.7924 (2.6915) weight_decay: 0.0500 (0.0500) time: 0.7423 data: 0.0003 max mem: 57114 Epoch: [235] [ 30/156] eta: 0:01:52 lr: 0.001013 min_lr: 0.001013 loss: 2.7536 (2.6816) weight_decay: 0.0500 (0.0500) time: 0.7533 data: 0.0004 max mem: 57114 Epoch: [235] [ 40/156] eta: 0:01:38 lr: 0.001011 min_lr: 0.001011 loss: 2.6150 (2.6426) weight_decay: 0.0500 (0.0500) time: 0.7398 data: 0.0004 max mem: 57114 Epoch: [235] [ 50/156] eta: 0:01:28 lr: 0.001009 min_lr: 0.001009 loss: 2.6190 (2.6509) weight_decay: 0.0500 (0.0500) time: 0.7332 data: 0.0004 max mem: 57114 Epoch: [235] [ 60/156] eta: 0:01:18 lr: 0.001007 min_lr: 0.001007 loss: 2.7548 (2.6325) weight_decay: 0.0500 (0.0500) time: 0.7334 data: 0.0004 max mem: 57114 Epoch: [235] [ 70/156] eta: 0:01:08 lr: 0.001005 min_lr: 0.001005 loss: 2.7395 (2.6261) weight_decay: 0.0500 (0.0500) time: 0.7201 data: 0.0004 max mem: 57114 Epoch: [235] [ 80/156] eta: 0:00:59 lr: 0.001003 min_lr: 0.001003 loss: 2.7395 (2.6220) weight_decay: 0.0500 (0.0500) time: 0.7142 data: 0.0004 max mem: 57114 Epoch: [235] [ 90/156] eta: 0:00:51 lr: 0.001001 min_lr: 0.001001 loss: 2.7703 (2.6388) weight_decay: 0.0500 (0.0500) time: 0.7167 data: 0.0004 max mem: 57114 Epoch: [235] [100/156] eta: 0:00:43 lr: 0.000999 min_lr: 0.000999 loss: 2.8958 (2.6718) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0004 max mem: 57114 Epoch: [235] [110/156] eta: 0:00:35 lr: 0.000997 min_lr: 0.000997 loss: 2.8958 (2.6765) weight_decay: 0.0500 (0.0500) time: 0.7081 data: 0.0004 max mem: 57114 Epoch: [235] [120/156] eta: 0:00:27 lr: 0.000995 min_lr: 0.000995 loss: 2.7869 (2.6778) weight_decay: 0.0500 (0.0500) time: 0.7129 data: 0.0003 max mem: 57114 Epoch: [235] [130/156] eta: 0:00:19 lr: 0.000994 min_lr: 0.000994 loss: 2.7869 (2.6787) weight_decay: 0.0500 (0.0500) time: 0.6984 data: 0.0009 max mem: 57114 Epoch: [235] [140/156] eta: 0:00:12 lr: 0.000992 min_lr: 0.000992 loss: 2.6535 (2.6669) weight_decay: 0.0500 (0.0500) time: 0.6820 data: 0.0008 max mem: 57114 Epoch: [235] [150/156] eta: 0:00:04 lr: 0.000990 min_lr: 0.000990 loss: 2.6120 (2.6634) weight_decay: 0.0500 (0.0500) time: 0.6865 data: 0.0001 max mem: 57114 Epoch: [235] [155/156] eta: 0:00:00 lr: 0.000989 min_lr: 0.000989 loss: 2.6541 (2.6524) weight_decay: 0.0500 (0.0500) time: 0.6893 data: 0.0001 max mem: 57114 Epoch: [235] Total time: 0:01:56 (0.7488 s / it) Averaged stats: lr: 0.000989 min_lr: 0.000989 loss: 2.6541 (2.6983) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5209 (0.5209) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.0411 data: 1.7846 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6838 (0.6481) acc1: 84.8958 (84.3990) acc5: 98.4375 (97.1867) time: 0.5589 data: 0.3570 max mem: 57114 Test: Total time: 0:00:02 (0.5801 s / it) * Acc@1 83.560 Acc@5 96.635 loss 0.750 Accuracy of the model on the 50000 test images: 83.6% Max accuracy: 83.64% Test: [0/5] eta: 0:00:11 loss: 0.5903 (0.5903) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2640 data: 2.0205 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6896 (0.6742) acc1: 81.2500 (79.4118) acc5: 96.3542 (94.2455) time: 0.6035 data: 0.4042 max mem: 57114 Test: Total time: 0:00:03 (0.6163 s / it) * Acc@1 77.518 Acc@5 93.942 loss 0.813 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [236] [ 0/156] eta: 0:12:29 lr: 0.000989 min_lr: 0.000989 loss: 2.9109 (2.9109) weight_decay: 0.0500 (0.0500) time: 4.8028 data: 3.6036 max mem: 57114 Epoch: [236] [ 10/156] eta: 0:02:40 lr: 0.000987 min_lr: 0.000987 loss: 2.9536 (2.7662) weight_decay: 0.0500 (0.0500) time: 1.0999 data: 0.3279 max mem: 57114 Epoch: [236] [ 20/156] eta: 0:02:06 lr: 0.000985 min_lr: 0.000985 loss: 2.6551 (2.6318) weight_decay: 0.0500 (0.0500) time: 0.7388 data: 0.0004 max mem: 57114 Epoch: [236] [ 30/156] eta: 0:01:49 lr: 0.000983 min_lr: 0.000983 loss: 2.6551 (2.6821) weight_decay: 0.0500 (0.0500) time: 0.7430 data: 0.0004 max mem: 57114 Epoch: [236] [ 40/156] eta: 0:01:36 lr: 0.000981 min_lr: 0.000981 loss: 2.7871 (2.6441) weight_decay: 0.0500 (0.0500) time: 0.7272 data: 0.0004 max mem: 57114 Epoch: [236] [ 50/156] eta: 0:01:25 lr: 0.000979 min_lr: 0.000979 loss: 2.7619 (2.6791) weight_decay: 0.0500 (0.0500) time: 0.7085 data: 0.0004 max mem: 57114 Epoch: [236] [ 60/156] eta: 0:01:16 lr: 0.000977 min_lr: 0.000977 loss: 2.7658 (2.6806) weight_decay: 0.0500 (0.0500) time: 0.7147 data: 0.0004 max mem: 57114 Epoch: [236] [ 70/156] eta: 0:01:07 lr: 0.000975 min_lr: 0.000975 loss: 2.7658 (2.6914) weight_decay: 0.0500 (0.0500) time: 0.7178 data: 0.0004 max mem: 57114 Epoch: [236] [ 80/156] eta: 0:00:58 lr: 0.000973 min_lr: 0.000973 loss: 2.8656 (2.6987) weight_decay: 0.0500 (0.0500) time: 0.7142 data: 0.0004 max mem: 57114 Epoch: [236] [ 90/156] eta: 0:00:50 lr: 0.000972 min_lr: 0.000972 loss: 2.7085 (2.6887) weight_decay: 0.0500 (0.0500) time: 0.7118 data: 0.0004 max mem: 57114 Epoch: [236] [100/156] eta: 0:00:42 lr: 0.000970 min_lr: 0.000970 loss: 2.8138 (2.6971) weight_decay: 0.0500 (0.0500) time: 0.7027 data: 0.0004 max mem: 57114 Epoch: [236] [110/156] eta: 0:00:34 lr: 0.000968 min_lr: 0.000968 loss: 2.7719 (2.6797) weight_decay: 0.0500 (0.0500) time: 0.7042 data: 0.0004 max mem: 57114 Epoch: [236] [120/156] eta: 0:00:27 lr: 0.000966 min_lr: 0.000966 loss: 2.8754 (2.7094) weight_decay: 0.0500 (0.0500) time: 0.7022 data: 0.0004 max mem: 57114 Epoch: [236] [130/156] eta: 0:00:19 lr: 0.000964 min_lr: 0.000964 loss: 2.8420 (2.6946) weight_decay: 0.0500 (0.0500) time: 0.6991 data: 0.0009 max mem: 57114 Epoch: [236] [140/156] eta: 0:00:11 lr: 0.000962 min_lr: 0.000962 loss: 2.5744 (2.6932) weight_decay: 0.0500 (0.0500) time: 0.6900 data: 0.0007 max mem: 57114 Epoch: [236] [150/156] eta: 0:00:04 lr: 0.000960 min_lr: 0.000960 loss: 2.7102 (2.7066) weight_decay: 0.0500 (0.0500) time: 0.6838 data: 0.0001 max mem: 57114 Epoch: [236] [155/156] eta: 0:00:00 lr: 0.000959 min_lr: 0.000959 loss: 2.7984 (2.7030) weight_decay: 0.0500 (0.0500) time: 0.6846 data: 0.0001 max mem: 57114 Epoch: [236] Total time: 0:01:55 (0.7383 s / it) Averaged stats: lr: 0.000959 min_lr: 0.000959 loss: 2.7984 (2.6875) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6525 (0.6525) acc1: 90.6250 (90.6250) acc5: 98.9583 (98.9583) time: 2.0993 data: 1.8431 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8468 (0.7648) acc1: 82.8125 (84.6547) acc5: 98.4375 (97.3146) time: 0.5705 data: 0.3687 max mem: 57114 Test: Total time: 0:00:02 (0.5962 s / it) * Acc@1 83.422 Acc@5 96.531 loss 0.862 Accuracy of the model on the 50000 test images: 83.4% Max accuracy: 83.64% Test: [0/5] eta: 0:00:11 loss: 0.5910 (0.5910) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.3595 data: 2.1160 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6895 (0.6746) acc1: 81.2500 (79.2839) acc5: 96.3542 (94.2455) time: 0.6226 data: 0.4233 max mem: 57114 Test: Total time: 0:00:03 (0.6347 s / it) * Acc@1 77.506 Acc@5 93.946 loss 0.814 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [237] [ 0/156] eta: 0:11:57 lr: 0.000959 min_lr: 0.000959 loss: 2.1521 (2.1521) weight_decay: 0.0500 (0.0500) time: 4.5995 data: 3.5763 max mem: 57114 Epoch: [237] [ 10/156] eta: 0:02:41 lr: 0.000957 min_lr: 0.000957 loss: 2.8411 (2.7515) weight_decay: 0.0500 (0.0500) time: 1.1041 data: 0.3254 max mem: 57114 Epoch: [237] [ 20/156] eta: 0:02:03 lr: 0.000956 min_lr: 0.000956 loss: 2.8443 (2.8129) weight_decay: 0.0500 (0.0500) time: 0.7262 data: 0.0003 max mem: 57114 Epoch: [237] [ 30/156] eta: 0:01:48 lr: 0.000954 min_lr: 0.000954 loss: 2.8378 (2.8264) weight_decay: 0.0500 (0.0500) time: 0.7250 data: 0.0003 max mem: 57114 Epoch: [237] [ 40/156] eta: 0:01:35 lr: 0.000952 min_lr: 0.000952 loss: 2.8003 (2.7736) weight_decay: 0.0500 (0.0500) time: 0.7362 data: 0.0004 max mem: 57114 Epoch: [237] [ 50/156] eta: 0:01:25 lr: 0.000950 min_lr: 0.000950 loss: 2.6161 (2.7377) weight_decay: 0.0500 (0.0500) time: 0.7300 data: 0.0004 max mem: 57114 Epoch: [237] [ 60/156] eta: 0:01:16 lr: 0.000948 min_lr: 0.000948 loss: 2.6554 (2.7201) weight_decay: 0.0500 (0.0500) time: 0.7258 data: 0.0004 max mem: 57114 Epoch: [237] [ 70/156] eta: 0:01:07 lr: 0.000946 min_lr: 0.000946 loss: 2.6192 (2.7035) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0003 max mem: 57114 Epoch: [237] [ 80/156] eta: 0:00:58 lr: 0.000944 min_lr: 0.000944 loss: 2.4737 (2.6761) weight_decay: 0.0500 (0.0500) time: 0.7014 data: 0.0004 max mem: 57114 Epoch: [237] [ 90/156] eta: 0:00:50 lr: 0.000943 min_lr: 0.000943 loss: 2.7668 (2.6908) weight_decay: 0.0500 (0.0500) time: 0.7055 data: 0.0004 max mem: 57114 Epoch: [237] [100/156] eta: 0:00:42 lr: 0.000941 min_lr: 0.000941 loss: 2.8812 (2.6912) weight_decay: 0.0500 (0.0500) time: 0.7160 data: 0.0004 max mem: 57114 Epoch: [237] [110/156] eta: 0:00:34 lr: 0.000939 min_lr: 0.000939 loss: 2.7770 (2.6893) weight_decay: 0.0500 (0.0500) time: 0.7099 data: 0.0004 max mem: 57114 Epoch: [237] [120/156] eta: 0:00:27 lr: 0.000937 min_lr: 0.000937 loss: 2.7173 (2.6735) weight_decay: 0.0500 (0.0500) time: 0.7041 data: 0.0004 max mem: 57114 Epoch: [237] [130/156] eta: 0:00:19 lr: 0.000935 min_lr: 0.000935 loss: 2.5927 (2.6787) weight_decay: 0.0500 (0.0500) time: 0.6950 data: 0.0009 max mem: 57114 Epoch: [237] [140/156] eta: 0:00:11 lr: 0.000933 min_lr: 0.000933 loss: 2.7390 (2.6829) weight_decay: 0.0500 (0.0500) time: 0.6860 data: 0.0007 max mem: 57114 Epoch: [237] [150/156] eta: 0:00:04 lr: 0.000931 min_lr: 0.000931 loss: 2.7286 (2.6772) weight_decay: 0.0500 (0.0500) time: 0.6811 data: 0.0001 max mem: 57114 Epoch: [237] [155/156] eta: 0:00:00 lr: 0.000930 min_lr: 0.000930 loss: 2.7286 (2.6728) weight_decay: 0.0500 (0.0500) time: 0.6834 data: 0.0001 max mem: 57114 Epoch: [237] Total time: 0:01:55 (0.7380 s / it) Averaged stats: lr: 0.000930 min_lr: 0.000930 loss: 2.7286 (2.6932) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5504 (0.5504) acc1: 91.6667 (91.6667) acc5: 98.4375 (98.4375) time: 2.0819 data: 1.8260 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7495 (0.7045) acc1: 83.3333 (84.1432) acc5: 97.9167 (96.6752) time: 0.5670 data: 0.3653 max mem: 57114 Test: Total time: 0:00:02 (0.5878 s / it) * Acc@1 83.670 Acc@5 96.643 loss 0.789 Accuracy of the model on the 50000 test images: 83.7% Max accuracy: 83.67% Test: [0/5] eta: 0:00:10 loss: 0.5917 (0.5917) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.0174 data: 1.7739 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6897 (0.6753) acc1: 81.2500 (79.2839) acc5: 95.8333 (94.1177) time: 0.5541 data: 0.3548 max mem: 57114 Test: Total time: 0:00:02 (0.5648 s / it) * Acc@1 77.504 Acc@5 93.944 loss 0.814 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [238] [ 0/156] eta: 0:12:55 lr: 0.000930 min_lr: 0.000930 loss: 2.8718 (2.8718) weight_decay: 0.0500 (0.0500) time: 4.9737 data: 3.8017 max mem: 57114 Epoch: [238] [ 10/156] eta: 0:02:44 lr: 0.000928 min_lr: 0.000928 loss: 3.0933 (2.8711) weight_decay: 0.0500 (0.0500) time: 1.1266 data: 0.3459 max mem: 57114 Epoch: [238] [ 20/156] eta: 0:02:10 lr: 0.000927 min_lr: 0.000927 loss: 2.7523 (2.7640) weight_decay: 0.0500 (0.0500) time: 0.7578 data: 0.0003 max mem: 57114 Epoch: [238] [ 30/156] eta: 0:01:51 lr: 0.000925 min_lr: 0.000925 loss: 2.7523 (2.7909) weight_decay: 0.0500 (0.0500) time: 0.7552 data: 0.0003 max mem: 57114 Epoch: [238] [ 40/156] eta: 0:01:38 lr: 0.000923 min_lr: 0.000923 loss: 2.7491 (2.7246) weight_decay: 0.0500 (0.0500) time: 0.7263 data: 0.0004 max mem: 57114 Epoch: [238] [ 50/156] eta: 0:01:27 lr: 0.000921 min_lr: 0.000921 loss: 2.6577 (2.7565) weight_decay: 0.0500 (0.0500) time: 0.7188 data: 0.0004 max mem: 57114 Epoch: [238] [ 60/156] eta: 0:01:17 lr: 0.000919 min_lr: 0.000919 loss: 2.8916 (2.7426) weight_decay: 0.0500 (0.0500) time: 0.7314 data: 0.0004 max mem: 57114 Epoch: [238] [ 70/156] eta: 0:01:08 lr: 0.000917 min_lr: 0.000917 loss: 2.6711 (2.7029) weight_decay: 0.0500 (0.0500) time: 0.7378 data: 0.0004 max mem: 57114 Epoch: [238] [ 80/156] eta: 0:01:00 lr: 0.000916 min_lr: 0.000916 loss: 2.5190 (2.6737) weight_decay: 0.0500 (0.0500) time: 0.7380 data: 0.0004 max mem: 57114 Epoch: [238] [ 90/156] eta: 0:00:51 lr: 0.000914 min_lr: 0.000914 loss: 2.6794 (2.6905) weight_decay: 0.0500 (0.0500) time: 0.7257 data: 0.0003 max mem: 57114 Epoch: [238] [100/156] eta: 0:00:43 lr: 0.000912 min_lr: 0.000912 loss: 2.8271 (2.6906) weight_decay: 0.0500 (0.0500) time: 0.7063 data: 0.0003 max mem: 57114 Epoch: [238] [110/156] eta: 0:00:35 lr: 0.000910 min_lr: 0.000910 loss: 2.7779 (2.6929) weight_decay: 0.0500 (0.0500) time: 0.6984 data: 0.0004 max mem: 57114 Epoch: [238] [120/156] eta: 0:00:27 lr: 0.000908 min_lr: 0.000908 loss: 2.6617 (2.6812) weight_decay: 0.0500 (0.0500) time: 0.7005 data: 0.0004 max mem: 57114 Epoch: [238] [130/156] eta: 0:00:19 lr: 0.000906 min_lr: 0.000906 loss: 2.4958 (2.6770) weight_decay: 0.0500 (0.0500) time: 0.6996 data: 0.0008 max mem: 57114 Epoch: [238] [140/156] eta: 0:00:12 lr: 0.000905 min_lr: 0.000905 loss: 2.5970 (2.6778) weight_decay: 0.0500 (0.0500) time: 0.6917 data: 0.0007 max mem: 57114 Epoch: [238] [150/156] eta: 0:00:04 lr: 0.000903 min_lr: 0.000903 loss: 2.8178 (2.6765) weight_decay: 0.0500 (0.0500) time: 0.6879 data: 0.0001 max mem: 57114 Epoch: [238] [155/156] eta: 0:00:00 lr: 0.000902 min_lr: 0.000902 loss: 2.8296 (2.6867) weight_decay: 0.0500 (0.0500) time: 0.6895 data: 0.0001 max mem: 57114 Epoch: [238] Total time: 0:01:56 (0.7479 s / it) Averaged stats: lr: 0.000902 min_lr: 0.000902 loss: 2.8296 (2.6815) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:12 loss: 0.6663 (0.6663) acc1: 89.0625 (89.0625) acc5: 98.9583 (98.9583) time: 2.4615 data: 2.2056 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8463 (0.7948) acc1: 83.3333 (83.7596) acc5: 98.9583 (97.4425) time: 0.6430 data: 0.4412 max mem: 57114 Test: Total time: 0:00:03 (0.6640 s / it) * Acc@1 83.492 Acc@5 96.743 loss 0.880 Accuracy of the model on the 50000 test images: 83.5% Max accuracy: 83.67% Test: [0/5] eta: 0:00:12 loss: 0.5924 (0.5924) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.4486 data: 2.2050 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6898 (0.6758) acc1: 81.2500 (79.2839) acc5: 95.8333 (94.1177) time: 0.6404 data: 0.4411 max mem: 57114 Test: Total time: 0:00:03 (0.6530 s / it) * Acc@1 77.516 Acc@5 93.934 loss 0.815 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [239] [ 0/156] eta: 0:16:03 lr: 0.000902 min_lr: 0.000902 loss: 2.9742 (2.9742) weight_decay: 0.0500 (0.0500) time: 6.1786 data: 4.7116 max mem: 57114 Epoch: [239] [ 10/156] eta: 0:02:59 lr: 0.000900 min_lr: 0.000900 loss: 2.8584 (2.6847) weight_decay: 0.0500 (0.0500) time: 1.2262 data: 0.4286 max mem: 57114 Epoch: [239] [ 20/156] eta: 0:02:13 lr: 0.000898 min_lr: 0.000898 loss: 2.6876 (2.6873) weight_decay: 0.0500 (0.0500) time: 0.7229 data: 0.0003 max mem: 57114 Epoch: [239] [ 30/156] eta: 0:01:53 lr: 0.000896 min_lr: 0.000896 loss: 2.6901 (2.7242) weight_decay: 0.0500 (0.0500) time: 0.7257 data: 0.0003 max mem: 57114 Epoch: [239] [ 40/156] eta: 0:01:39 lr: 0.000894 min_lr: 0.000894 loss: 2.8200 (2.7339) weight_decay: 0.0500 (0.0500) time: 0.7264 data: 0.0003 max mem: 57114 Epoch: [239] [ 50/156] eta: 0:01:28 lr: 0.000893 min_lr: 0.000893 loss: 2.8261 (2.7429) weight_decay: 0.0500 (0.0500) time: 0.7367 data: 0.0003 max mem: 57114 Epoch: [239] [ 60/156] eta: 0:01:18 lr: 0.000891 min_lr: 0.000891 loss: 2.6290 (2.7117) weight_decay: 0.0500 (0.0500) time: 0.7312 data: 0.0004 max mem: 57114 Epoch: [239] [ 70/156] eta: 0:01:09 lr: 0.000889 min_lr: 0.000889 loss: 2.7255 (2.7087) weight_decay: 0.0500 (0.0500) time: 0.7175 data: 0.0004 max mem: 57114 Epoch: [239] [ 80/156] eta: 0:01:00 lr: 0.000887 min_lr: 0.000887 loss: 2.8262 (2.7257) weight_decay: 0.0500 (0.0500) time: 0.7143 data: 0.0004 max mem: 57114 Epoch: [239] [ 90/156] eta: 0:00:51 lr: 0.000885 min_lr: 0.000885 loss: 2.6860 (2.7076) weight_decay: 0.0500 (0.0500) time: 0.7011 data: 0.0004 max mem: 57114 Epoch: [239] [100/156] eta: 0:00:43 lr: 0.000884 min_lr: 0.000884 loss: 2.6088 (2.7023) weight_decay: 0.0500 (0.0500) time: 0.7056 data: 0.0003 max mem: 57114 Epoch: [239] [110/156] eta: 0:00:35 lr: 0.000882 min_lr: 0.000882 loss: 2.8541 (2.7119) weight_decay: 0.0500 (0.0500) time: 0.6983 data: 0.0003 max mem: 57114 Epoch: [239] [120/156] eta: 0:00:27 lr: 0.000880 min_lr: 0.000880 loss: 2.8541 (2.6964) weight_decay: 0.0500 (0.0500) time: 0.7066 data: 0.0004 max mem: 57114 Epoch: [239] [130/156] eta: 0:00:19 lr: 0.000878 min_lr: 0.000878 loss: 2.5710 (2.6989) weight_decay: 0.0500 (0.0500) time: 0.7045 data: 0.0008 max mem: 57114 Epoch: [239] [140/156] eta: 0:00:12 lr: 0.000876 min_lr: 0.000876 loss: 2.8241 (2.7010) weight_decay: 0.0500 (0.0500) time: 0.6852 data: 0.0007 max mem: 57114 Epoch: [239] [150/156] eta: 0:00:04 lr: 0.000875 min_lr: 0.000875 loss: 2.8241 (2.6962) weight_decay: 0.0500 (0.0500) time: 0.6884 data: 0.0001 max mem: 57114 Epoch: [239] [155/156] eta: 0:00:00 lr: 0.000874 min_lr: 0.000874 loss: 2.7771 (2.6949) weight_decay: 0.0500 (0.0500) time: 0.6870 data: 0.0001 max mem: 57114 Epoch: [239] Total time: 0:01:56 (0.7481 s / it) Averaged stats: lr: 0.000874 min_lr: 0.000874 loss: 2.7771 (2.6750) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6058 (0.6058) acc1: 89.5833 (89.5833) acc5: 98.4375 (98.4375) time: 2.0994 data: 1.8383 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7907 (0.7284) acc1: 82.2917 (83.6317) acc5: 97.9167 (97.0588) time: 0.5706 data: 0.3677 max mem: 57114 Test: Total time: 0:00:02 (0.5941 s / it) * Acc@1 83.624 Acc@5 96.649 loss 0.820 Accuracy of the model on the 50000 test images: 83.6% Max accuracy: 83.67% Test: [0/5] eta: 0:00:12 loss: 0.5931 (0.5931) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.4062 data: 2.1624 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6898 (0.6765) acc1: 81.2500 (79.2839) acc5: 95.8333 (94.2455) time: 0.6320 data: 0.4326 max mem: 57114 Test: Total time: 0:00:03 (0.6435 s / it) * Acc@1 77.528 Acc@5 93.938 loss 0.815 Accuracy of the model EMA on 50000 test images: 77.5% Max EMA accuracy: 77.53% Epoch: [240] [ 0/156] eta: 0:08:40 lr: 0.000874 min_lr: 0.000874 loss: 2.9621 (2.9621) weight_decay: 0.0500 (0.0500) time: 3.3350 data: 2.6808 max mem: 57114 Epoch: [240] [ 10/156] eta: 0:02:19 lr: 0.000872 min_lr: 0.000872 loss: 2.5732 (2.5811) weight_decay: 0.0500 (0.0500) time: 0.9564 data: 0.2440 max mem: 57114 Epoch: [240] [ 20/156] eta: 0:01:55 lr: 0.000870 min_lr: 0.000870 loss: 2.7109 (2.6075) weight_decay: 0.0500 (0.0500) time: 0.7244 data: 0.0003 max mem: 57114 Epoch: [240] [ 30/156] eta: 0:01:42 lr: 0.000868 min_lr: 0.000868 loss: 2.7219 (2.5809) weight_decay: 0.0500 (0.0500) time: 0.7355 data: 0.0004 max mem: 57114 Epoch: [240] [ 40/156] eta: 0:01:31 lr: 0.000866 min_lr: 0.000866 loss: 2.6848 (2.6164) weight_decay: 0.0500 (0.0500) time: 0.7331 data: 0.0004 max mem: 57114 Epoch: [240] [ 50/156] eta: 0:01:22 lr: 0.000865 min_lr: 0.000865 loss: 2.6848 (2.6271) weight_decay: 0.0500 (0.0500) time: 0.7253 data: 0.0004 max mem: 57114 Epoch: [240] [ 60/156] eta: 0:01:13 lr: 0.000863 min_lr: 0.000863 loss: 2.6349 (2.6040) weight_decay: 0.0500 (0.0500) time: 0.7137 data: 0.0004 max mem: 57114 Epoch: [240] [ 70/156] eta: 0:01:05 lr: 0.000861 min_lr: 0.000861 loss: 2.7275 (2.6113) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [240] [ 80/156] eta: 0:00:57 lr: 0.000859 min_lr: 0.000859 loss: 2.5979 (2.6016) weight_decay: 0.0500 (0.0500) time: 0.7177 data: 0.0004 max mem: 57114 Epoch: [240] [ 90/156] eta: 0:00:49 lr: 0.000857 min_lr: 0.000857 loss: 2.5394 (2.6114) weight_decay: 0.0500 (0.0500) time: 0.7138 data: 0.0003 max mem: 57114 Epoch: [240] [100/156] eta: 0:00:41 lr: 0.000856 min_lr: 0.000856 loss: 2.7878 (2.6176) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0003 max mem: 57114 Epoch: [240] [110/156] eta: 0:00:34 lr: 0.000854 min_lr: 0.000854 loss: 2.7733 (2.6195) weight_decay: 0.0500 (0.0500) time: 0.7042 data: 0.0003 max mem: 57114 Epoch: [240] [120/156] eta: 0:00:26 lr: 0.000852 min_lr: 0.000852 loss: 2.7176 (2.6247) weight_decay: 0.0500 (0.0500) time: 0.7178 data: 0.0003 max mem: 57114 Epoch: [240] [130/156] eta: 0:00:19 lr: 0.000850 min_lr: 0.000850 loss: 2.7176 (2.6231) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0008 max mem: 57114 Epoch: [240] [140/156] eta: 0:00:11 lr: 0.000849 min_lr: 0.000849 loss: 2.8247 (2.6348) weight_decay: 0.0500 (0.0500) time: 0.6912 data: 0.0007 max mem: 57114 Epoch: [240] [150/156] eta: 0:00:04 lr: 0.000847 min_lr: 0.000847 loss: 2.8384 (2.6451) weight_decay: 0.0500 (0.0500) time: 0.6895 data: 0.0001 max mem: 57114 Epoch: [240] [155/156] eta: 0:00:00 lr: 0.000846 min_lr: 0.000846 loss: 2.8529 (2.6375) weight_decay: 0.0500 (0.0500) time: 0.6894 data: 0.0001 max mem: 57114 Epoch: [240] Total time: 0:01:54 (0.7315 s / it) Averaged stats: lr: 0.000846 min_lr: 0.000846 loss: 2.8529 (2.6700) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5677 (0.5677) acc1: 91.6667 (91.6667) acc5: 98.9583 (98.9583) time: 2.0775 data: 1.8215 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7812 (0.7199) acc1: 82.8125 (83.3760) acc5: 98.4375 (97.3146) time: 0.5662 data: 0.3644 max mem: 57114 Test: Total time: 0:00:02 (0.5853 s / it) * Acc@1 83.698 Acc@5 96.729 loss 0.793 Accuracy of the model on the 50000 test images: 83.7% Max accuracy: 83.70% Test: [0/5] eta: 0:00:10 loss: 0.5938 (0.5938) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.0339 data: 1.7904 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6900 (0.6772) acc1: 81.2500 (79.2839) acc5: 95.8333 (94.2455) time: 0.5574 data: 0.3582 max mem: 57114 Test: Total time: 0:00:02 (0.5708 s / it) * Acc@1 77.508 Acc@5 93.928 loss 0.816 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [241] [ 0/156] eta: 0:12:27 lr: 0.000846 min_lr: 0.000846 loss: 2.8060 (2.8060) weight_decay: 0.0500 (0.0500) time: 4.7924 data: 3.8471 max mem: 57114 Epoch: [241] [ 10/156] eta: 0:02:40 lr: 0.000844 min_lr: 0.000844 loss: 2.6657 (2.5579) weight_decay: 0.0500 (0.0500) time: 1.1007 data: 0.3501 max mem: 57114 Epoch: [241] [ 20/156] eta: 0:02:06 lr: 0.000842 min_lr: 0.000842 loss: 2.6460 (2.5315) weight_decay: 0.0500 (0.0500) time: 0.7339 data: 0.0004 max mem: 57114 Epoch: [241] [ 30/156] eta: 0:01:49 lr: 0.000840 min_lr: 0.000840 loss: 2.6737 (2.5384) weight_decay: 0.0500 (0.0500) time: 0.7434 data: 0.0004 max mem: 57114 Epoch: [241] [ 40/156] eta: 0:01:37 lr: 0.000839 min_lr: 0.000839 loss: 2.8474 (2.6238) weight_decay: 0.0500 (0.0500) time: 0.7537 data: 0.0004 max mem: 57114 Epoch: [241] [ 50/156] eta: 0:01:26 lr: 0.000837 min_lr: 0.000837 loss: 2.9437 (2.6691) weight_decay: 0.0500 (0.0500) time: 0.7427 data: 0.0004 max mem: 57114 Epoch: [241] [ 60/156] eta: 0:01:17 lr: 0.000835 min_lr: 0.000835 loss: 2.9515 (2.6886) weight_decay: 0.0500 (0.0500) time: 0.7240 data: 0.0003 max mem: 57114 Epoch: [241] [ 70/156] eta: 0:01:08 lr: 0.000833 min_lr: 0.000833 loss: 2.9310 (2.6967) weight_decay: 0.0500 (0.0500) time: 0.7334 data: 0.0003 max mem: 57114 Epoch: [241] [ 80/156] eta: 0:00:59 lr: 0.000832 min_lr: 0.000832 loss: 2.6387 (2.6795) weight_decay: 0.0500 (0.0500) time: 0.7263 data: 0.0003 max mem: 57114 Epoch: [241] [ 90/156] eta: 0:00:51 lr: 0.000830 min_lr: 0.000830 loss: 2.6281 (2.6807) weight_decay: 0.0500 (0.0500) time: 0.7098 data: 0.0004 max mem: 57114 Epoch: [241] [100/156] eta: 0:00:43 lr: 0.000828 min_lr: 0.000828 loss: 2.8104 (2.6856) weight_decay: 0.0500 (0.0500) time: 0.7131 data: 0.0004 max mem: 57114 Epoch: [241] [110/156] eta: 0:00:35 lr: 0.000826 min_lr: 0.000826 loss: 2.8928 (2.6955) weight_decay: 0.0500 (0.0500) time: 0.7103 data: 0.0003 max mem: 57114 Epoch: [241] [120/156] eta: 0:00:27 lr: 0.000825 min_lr: 0.000825 loss: 2.8187 (2.6998) weight_decay: 0.0500 (0.0500) time: 0.7009 data: 0.0004 max mem: 57114 Epoch: [241] [130/156] eta: 0:00:19 lr: 0.000823 min_lr: 0.000823 loss: 2.7643 (2.6955) weight_decay: 0.0500 (0.0500) time: 0.7056 data: 0.0005 max mem: 57114 Epoch: [241] [140/156] eta: 0:00:12 lr: 0.000821 min_lr: 0.000821 loss: 2.8489 (2.7003) weight_decay: 0.0500 (0.0500) time: 0.7217 data: 0.0003 max mem: 57114 Epoch: [241] [150/156] eta: 0:00:04 lr: 0.000819 min_lr: 0.000819 loss: 2.6469 (2.6679) weight_decay: 0.0500 (0.0500) time: 0.7067 data: 0.0001 max mem: 57114 Epoch: [241] [155/156] eta: 0:00:00 lr: 0.000819 min_lr: 0.000819 loss: 2.2003 (2.6544) weight_decay: 0.0500 (0.0500) time: 0.6946 data: 0.0001 max mem: 57114 Epoch: [241] Total time: 0:01:56 (0.7492 s / it) Averaged stats: lr: 0.000819 min_lr: 0.000819 loss: 2.2003 (2.6624) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.4867 (0.4867) acc1: 90.6250 (90.6250) acc5: 99.4792 (99.4792) time: 2.0909 data: 1.8352 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6725 (0.6267) acc1: 84.3750 (84.2711) acc5: 98.4375 (97.6982) time: 0.5688 data: 0.3671 max mem: 57114 Test: Total time: 0:00:02 (0.5926 s / it) * Acc@1 83.951 Acc@5 96.839 loss 0.720 Accuracy of the model on the 50000 test images: 84.0% Max accuracy: 83.95% Test: [0/5] eta: 0:00:10 loss: 0.5946 (0.5946) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.0281 data: 1.7846 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6901 (0.6779) acc1: 81.2500 (79.2839) acc5: 95.8333 (94.2455) time: 0.5562 data: 0.3570 max mem: 57114 Test: Total time: 0:00:02 (0.5714 s / it) * Acc@1 77.512 Acc@5 93.926 loss 0.817 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [242] [ 0/156] eta: 0:13:05 lr: 0.000818 min_lr: 0.000818 loss: 2.3768 (2.3768) weight_decay: 0.0500 (0.0500) time: 5.0335 data: 3.5966 max mem: 57114 Epoch: [242] [ 10/156] eta: 0:02:46 lr: 0.000817 min_lr: 0.000817 loss: 2.7988 (2.7441) weight_decay: 0.0500 (0.0500) time: 1.1400 data: 0.3273 max mem: 57114 Epoch: [242] [ 20/156] eta: 0:02:09 lr: 0.000815 min_lr: 0.000815 loss: 2.7988 (2.7200) weight_decay: 0.0500 (0.0500) time: 0.7499 data: 0.0003 max mem: 57114 Epoch: [242] [ 30/156] eta: 0:01:51 lr: 0.000813 min_lr: 0.000813 loss: 2.5689 (2.7071) weight_decay: 0.0500 (0.0500) time: 0.7484 data: 0.0003 max mem: 57114 Epoch: [242] [ 40/156] eta: 0:01:39 lr: 0.000811 min_lr: 0.000811 loss: 2.5988 (2.6884) weight_decay: 0.0500 (0.0500) time: 0.7593 data: 0.0004 max mem: 57114 Epoch: [242] [ 50/156] eta: 0:01:28 lr: 0.000810 min_lr: 0.000810 loss: 2.6698 (2.7037) weight_decay: 0.0500 (0.0500) time: 0.7569 data: 0.0004 max mem: 57114 Epoch: [242] [ 60/156] eta: 0:01:18 lr: 0.000808 min_lr: 0.000808 loss: 2.5607 (2.6634) weight_decay: 0.0500 (0.0500) time: 0.7441 data: 0.0004 max mem: 57114 Epoch: [242] [ 70/156] eta: 0:01:09 lr: 0.000806 min_lr: 0.000806 loss: 2.5607 (2.6645) weight_decay: 0.0500 (0.0500) time: 0.7390 data: 0.0004 max mem: 57114 Epoch: [242] [ 80/156] eta: 0:01:00 lr: 0.000805 min_lr: 0.000805 loss: 2.6089 (2.6434) weight_decay: 0.0500 (0.0500) time: 0.7294 data: 0.0004 max mem: 57114 Epoch: [242] [ 90/156] eta: 0:00:52 lr: 0.000803 min_lr: 0.000803 loss: 2.5645 (2.6337) weight_decay: 0.0500 (0.0500) time: 0.7209 data: 0.0004 max mem: 57114 Epoch: [242] [100/156] eta: 0:00:43 lr: 0.000801 min_lr: 0.000801 loss: 2.7806 (2.6592) weight_decay: 0.0500 (0.0500) time: 0.7177 data: 0.0004 max mem: 57114 Epoch: [242] [110/156] eta: 0:00:35 lr: 0.000799 min_lr: 0.000799 loss: 2.8728 (2.6600) weight_decay: 0.0500 (0.0500) time: 0.7360 data: 0.0004 max mem: 57114 Epoch: [242] [120/156] eta: 0:00:28 lr: 0.000798 min_lr: 0.000798 loss: 2.7782 (2.6619) weight_decay: 0.0500 (0.0500) time: 0.7551 data: 0.0004 max mem: 57114 Epoch: [242] [130/156] eta: 0:00:20 lr: 0.000796 min_lr: 0.000796 loss: 2.8597 (2.6769) weight_decay: 0.0500 (0.0500) time: 0.7429 data: 0.0009 max mem: 57114 Epoch: [242] [140/156] eta: 0:00:12 lr: 0.000794 min_lr: 0.000794 loss: 2.8860 (2.6749) weight_decay: 0.0500 (0.0500) time: 0.7255 data: 0.0008 max mem: 57114 Epoch: [242] [150/156] eta: 0:00:04 lr: 0.000792 min_lr: 0.000792 loss: 2.7185 (2.6648) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0001 max mem: 57114 Epoch: [242] [155/156] eta: 0:00:00 lr: 0.000792 min_lr: 0.000792 loss: 2.7185 (2.6673) weight_decay: 0.0500 (0.0500) time: 0.6949 data: 0.0001 max mem: 57114 Epoch: [242] Total time: 0:01:59 (0.7654 s / it) Averaged stats: lr: 0.000792 min_lr: 0.000792 loss: 2.7185 (2.6574) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6182 (0.6182) acc1: 91.1458 (91.1458) acc5: 98.9583 (98.9583) time: 2.0254 data: 1.7699 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7742 (0.7532) acc1: 84.8958 (84.3990) acc5: 98.9583 (97.5703) time: 0.5558 data: 0.3541 max mem: 57114 Test: Total time: 0:00:02 (0.5756 s / it) * Acc@1 83.766 Acc@5 96.805 loss 0.835 Accuracy of the model on the 50000 test images: 83.8% Max accuracy: 83.95% Test: [0/5] eta: 0:00:11 loss: 0.5952 (0.5952) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2551 data: 2.0116 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6903 (0.6785) acc1: 81.2500 (79.2839) acc5: 95.8333 (94.1177) time: 0.6017 data: 0.4024 max mem: 57114 Test: Total time: 0:00:03 (0.6126 s / it) * Acc@1 77.498 Acc@5 93.916 loss 0.818 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [243] [ 0/156] eta: 0:13:00 lr: 0.000791 min_lr: 0.000791 loss: 2.5057 (2.5057) weight_decay: 0.0500 (0.0500) time: 5.0028 data: 3.5833 max mem: 57114 Epoch: [243] [ 10/156] eta: 0:02:44 lr: 0.000790 min_lr: 0.000790 loss: 2.8692 (2.6019) weight_decay: 0.0500 (0.0500) time: 1.1271 data: 0.3260 max mem: 57114 Epoch: [243] [ 20/156] eta: 0:02:08 lr: 0.000788 min_lr: 0.000788 loss: 2.8816 (2.6357) weight_decay: 0.0500 (0.0500) time: 0.7396 data: 0.0003 max mem: 57114 Epoch: [243] [ 30/156] eta: 0:01:50 lr: 0.000786 min_lr: 0.000786 loss: 2.7979 (2.6052) weight_decay: 0.0500 (0.0500) time: 0.7368 data: 0.0003 max mem: 57114 Epoch: [243] [ 40/156] eta: 0:01:37 lr: 0.000785 min_lr: 0.000785 loss: 2.5276 (2.5870) weight_decay: 0.0500 (0.0500) time: 0.7367 data: 0.0003 max mem: 57114 Epoch: [243] [ 50/156] eta: 0:01:26 lr: 0.000783 min_lr: 0.000783 loss: 2.6567 (2.5875) weight_decay: 0.0500 (0.0500) time: 0.7325 data: 0.0004 max mem: 57114 Epoch: [243] [ 60/156] eta: 0:01:17 lr: 0.000781 min_lr: 0.000781 loss: 2.6791 (2.5993) weight_decay: 0.0500 (0.0500) time: 0.7308 data: 0.0004 max mem: 57114 Epoch: [243] [ 70/156] eta: 0:01:08 lr: 0.000779 min_lr: 0.000779 loss: 2.6791 (2.6142) weight_decay: 0.0500 (0.0500) time: 0.7197 data: 0.0004 max mem: 57114 Epoch: [243] [ 80/156] eta: 0:00:59 lr: 0.000778 min_lr: 0.000778 loss: 2.6636 (2.6028) weight_decay: 0.0500 (0.0500) time: 0.6968 data: 0.0004 max mem: 57114 Epoch: [243] [ 90/156] eta: 0:00:50 lr: 0.000776 min_lr: 0.000776 loss: 2.7099 (2.6269) weight_decay: 0.0500 (0.0500) time: 0.6943 data: 0.0004 max mem: 57114 Epoch: [243] [100/156] eta: 0:00:43 lr: 0.000774 min_lr: 0.000774 loss: 2.8396 (2.6112) weight_decay: 0.0500 (0.0500) time: 0.7264 data: 0.0003 max mem: 57114 Epoch: [243] [110/156] eta: 0:00:35 lr: 0.000773 min_lr: 0.000773 loss: 2.7292 (2.6140) weight_decay: 0.0500 (0.0500) time: 0.7338 data: 0.0003 max mem: 57114 Epoch: [243] [120/156] eta: 0:00:27 lr: 0.000771 min_lr: 0.000771 loss: 2.7292 (2.6199) weight_decay: 0.0500 (0.0500) time: 0.7168 data: 0.0004 max mem: 57114 Epoch: [243] [130/156] eta: 0:00:19 lr: 0.000769 min_lr: 0.000769 loss: 2.6704 (2.6120) weight_decay: 0.0500 (0.0500) time: 0.7294 data: 0.0009 max mem: 57114 Epoch: [243] [140/156] eta: 0:00:12 lr: 0.000768 min_lr: 0.000768 loss: 2.7085 (2.6142) weight_decay: 0.0500 (0.0500) time: 0.7293 data: 0.0007 max mem: 57114 Epoch: [243] [150/156] eta: 0:00:04 lr: 0.000766 min_lr: 0.000766 loss: 2.7268 (2.6171) weight_decay: 0.0500 (0.0500) time: 0.7205 data: 0.0001 max mem: 57114 Epoch: [243] [155/156] eta: 0:00:00 lr: 0.000765 min_lr: 0.000765 loss: 2.7140 (2.6163) weight_decay: 0.0500 (0.0500) time: 0.7253 data: 0.0001 max mem: 57114 Epoch: [243] Total time: 0:01:57 (0.7551 s / it) Averaged stats: lr: 0.000765 min_lr: 0.000765 loss: 2.7140 (2.6527) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:17 loss: 0.6121 (0.6121) acc1: 89.0625 (89.0625) acc5: 98.9583 (98.9583) time: 3.4770 data: 3.1987 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7548 (0.7176) acc1: 84.8958 (84.0153) acc5: 98.9583 (97.9540) time: 0.8509 data: 0.6398 max mem: 57114 Test: Total time: 0:00:04 (0.8880 s / it) * Acc@1 83.875 Acc@5 96.847 loss 0.803 Accuracy of the model on the 50000 test images: 83.9% Max accuracy: 83.95% Test: [0/5] eta: 0:00:18 loss: 0.5958 (0.5958) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 3.7638 data: 3.5197 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6905 (0.6791) acc1: 81.2500 (79.2839) acc5: 95.8333 (94.1177) time: 0.9060 data: 0.7040 max mem: 57114 Test: Total time: 0:00:04 (0.9214 s / it) * Acc@1 77.490 Acc@5 93.916 loss 0.818 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [244] [ 0/156] eta: 0:13:26 lr: 0.000765 min_lr: 0.000765 loss: 3.0913 (3.0913) weight_decay: 0.0500 (0.0500) time: 5.1727 data: 4.0515 max mem: 57114 Epoch: [244] [ 10/156] eta: 0:02:45 lr: 0.000763 min_lr: 0.000763 loss: 2.7114 (2.5481) weight_decay: 0.0500 (0.0500) time: 1.1342 data: 0.3686 max mem: 57114 Epoch: [244] [ 20/156] eta: 0:02:08 lr: 0.000761 min_lr: 0.000761 loss: 2.5909 (2.5043) weight_decay: 0.0500 (0.0500) time: 0.7342 data: 0.0003 max mem: 57114 Epoch: [244] [ 30/156] eta: 0:01:50 lr: 0.000760 min_lr: 0.000760 loss: 2.6295 (2.5620) weight_decay: 0.0500 (0.0500) time: 0.7349 data: 0.0004 max mem: 57114 Epoch: [244] [ 40/156] eta: 0:01:37 lr: 0.000758 min_lr: 0.000758 loss: 2.9121 (2.6571) weight_decay: 0.0500 (0.0500) time: 0.7339 data: 0.0004 max mem: 57114 Epoch: [244] [ 50/156] eta: 0:01:26 lr: 0.000756 min_lr: 0.000756 loss: 2.9121 (2.6724) weight_decay: 0.0500 (0.0500) time: 0.7250 data: 0.0004 max mem: 57114 Epoch: [244] [ 60/156] eta: 0:01:17 lr: 0.000755 min_lr: 0.000755 loss: 2.8832 (2.6736) weight_decay: 0.0500 (0.0500) time: 0.7249 data: 0.0004 max mem: 57114 Epoch: [244] [ 70/156] eta: 0:01:07 lr: 0.000753 min_lr: 0.000753 loss: 2.8578 (2.6698) weight_decay: 0.0500 (0.0500) time: 0.7191 data: 0.0004 max mem: 57114 Epoch: [244] [ 80/156] eta: 0:00:59 lr: 0.000751 min_lr: 0.000751 loss: 2.7743 (2.6751) weight_decay: 0.0500 (0.0500) time: 0.7140 data: 0.0004 max mem: 57114 Epoch: [244] [ 90/156] eta: 0:00:51 lr: 0.000750 min_lr: 0.000750 loss: 2.8261 (2.6991) weight_decay: 0.0500 (0.0500) time: 0.7166 data: 0.0004 max mem: 57114 Epoch: [244] [100/156] eta: 0:00:43 lr: 0.000748 min_lr: 0.000748 loss: 2.8484 (2.6978) weight_decay: 0.0500 (0.0500) time: 0.7137 data: 0.0004 max mem: 57114 Epoch: [244] [110/156] eta: 0:00:35 lr: 0.000746 min_lr: 0.000746 loss: 2.6802 (2.6951) weight_decay: 0.0500 (0.0500) time: 0.7161 data: 0.0004 max mem: 57114 Epoch: [244] [120/156] eta: 0:00:27 lr: 0.000745 min_lr: 0.000745 loss: 2.6092 (2.6797) weight_decay: 0.0500 (0.0500) time: 0.7081 data: 0.0004 max mem: 57114 Epoch: [244] [130/156] eta: 0:00:19 lr: 0.000743 min_lr: 0.000743 loss: 2.6092 (2.6811) weight_decay: 0.0500 (0.0500) time: 0.7052 data: 0.0009 max mem: 57114 Epoch: [244] [140/156] eta: 0:00:11 lr: 0.000741 min_lr: 0.000741 loss: 2.8109 (2.6842) weight_decay: 0.0500 (0.0500) time: 0.6976 data: 0.0007 max mem: 57114 Epoch: [244] [150/156] eta: 0:00:04 lr: 0.000740 min_lr: 0.000740 loss: 2.8982 (2.6973) weight_decay: 0.0500 (0.0500) time: 0.6848 data: 0.0001 max mem: 57114 Epoch: [244] [155/156] eta: 0:00:00 lr: 0.000739 min_lr: 0.000739 loss: 2.8019 (2.6859) weight_decay: 0.0500 (0.0500) time: 0.6849 data: 0.0001 max mem: 57114 Epoch: [244] Total time: 0:01:56 (0.7456 s / it) Averaged stats: lr: 0.000739 min_lr: 0.000739 loss: 2.8019 (2.6483) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5932 (0.5932) acc1: 92.1875 (92.1875) acc5: 98.9583 (98.9583) time: 2.1024 data: 1.8466 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7528 (0.7026) acc1: 86.4583 (85.4220) acc5: 98.9583 (97.4425) time: 0.5712 data: 0.3694 max mem: 57114 Test: Total time: 0:00:02 (0.5938 s / it) * Acc@1 83.871 Acc@5 96.831 loss 0.795 Accuracy of the model on the 50000 test images: 83.9% Max accuracy: 83.95% Test: [0/5] eta: 0:00:11 loss: 0.5966 (0.5966) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2536 data: 2.0101 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6908 (0.6799) acc1: 81.2500 (79.4118) acc5: 95.8333 (94.1177) time: 0.6013 data: 0.4021 max mem: 57114 Test: Total time: 0:00:03 (0.6138 s / it) * Acc@1 77.492 Acc@5 93.906 loss 0.819 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [245] [ 0/156] eta: 0:12:52 lr: 0.000739 min_lr: 0.000739 loss: 2.8720 (2.8720) weight_decay: 0.0500 (0.0500) time: 4.9510 data: 3.6757 max mem: 57114 Epoch: [245] [ 10/156] eta: 0:02:40 lr: 0.000737 min_lr: 0.000737 loss: 2.6145 (2.6116) weight_decay: 0.0500 (0.0500) time: 1.1021 data: 0.3344 max mem: 57114 Epoch: [245] [ 20/156] eta: 0:02:04 lr: 0.000735 min_lr: 0.000735 loss: 2.6808 (2.6834) weight_decay: 0.0500 (0.0500) time: 0.7158 data: 0.0003 max mem: 57114 Epoch: [245] [ 30/156] eta: 0:01:47 lr: 0.000734 min_lr: 0.000734 loss: 2.7031 (2.6464) weight_decay: 0.0500 (0.0500) time: 0.7178 data: 0.0002 max mem: 57114 Epoch: [245] [ 40/156] eta: 0:01:35 lr: 0.000732 min_lr: 0.000732 loss: 2.7331 (2.6911) weight_decay: 0.0500 (0.0500) time: 0.7202 data: 0.0003 max mem: 57114 Epoch: [245] [ 50/156] eta: 0:01:25 lr: 0.000730 min_lr: 0.000730 loss: 2.8630 (2.7042) weight_decay: 0.0500 (0.0500) time: 0.7330 data: 0.0004 max mem: 57114 Epoch: [245] [ 60/156] eta: 0:01:16 lr: 0.000729 min_lr: 0.000729 loss: 2.8623 (2.7095) weight_decay: 0.0500 (0.0500) time: 0.7333 data: 0.0003 max mem: 57114 Epoch: [245] [ 70/156] eta: 0:01:07 lr: 0.000727 min_lr: 0.000727 loss: 2.7005 (2.7033) weight_decay: 0.0500 (0.0500) time: 0.7189 data: 0.0003 max mem: 57114 Epoch: [245] [ 80/156] eta: 0:00:58 lr: 0.000725 min_lr: 0.000725 loss: 2.5755 (2.6697) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0003 max mem: 57114 Epoch: [245] [ 90/156] eta: 0:00:50 lr: 0.000724 min_lr: 0.000724 loss: 2.5310 (2.6335) weight_decay: 0.0500 (0.0500) time: 0.7131 data: 0.0003 max mem: 57114 Epoch: [245] [100/156] eta: 0:00:42 lr: 0.000722 min_lr: 0.000722 loss: 2.5122 (2.6275) weight_decay: 0.0500 (0.0500) time: 0.7097 data: 0.0004 max mem: 57114 Epoch: [245] [110/156] eta: 0:00:34 lr: 0.000720 min_lr: 0.000720 loss: 2.5436 (2.6225) weight_decay: 0.0500 (0.0500) time: 0.7053 data: 0.0004 max mem: 57114 Epoch: [245] [120/156] eta: 0:00:27 lr: 0.000719 min_lr: 0.000719 loss: 2.6193 (2.6157) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0003 max mem: 57114 Epoch: [245] [130/156] eta: 0:00:19 lr: 0.000717 min_lr: 0.000717 loss: 2.6752 (2.6067) weight_decay: 0.0500 (0.0500) time: 0.7008 data: 0.0007 max mem: 57114 Epoch: [245] [140/156] eta: 0:00:11 lr: 0.000716 min_lr: 0.000716 loss: 2.6286 (2.6124) weight_decay: 0.0500 (0.0500) time: 0.6930 data: 0.0006 max mem: 57114 Epoch: [245] [150/156] eta: 0:00:04 lr: 0.000714 min_lr: 0.000714 loss: 2.6179 (2.6138) weight_decay: 0.0500 (0.0500) time: 0.6867 data: 0.0001 max mem: 57114 Epoch: [245] [155/156] eta: 0:00:00 lr: 0.000713 min_lr: 0.000713 loss: 2.6179 (2.6105) weight_decay: 0.0500 (0.0500) time: 0.6893 data: 0.0001 max mem: 57114 Epoch: [245] Total time: 0:01:55 (0.7402 s / it) Averaged stats: lr: 0.000713 min_lr: 0.000713 loss: 2.6179 (2.6433) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5833 (0.5833) acc1: 90.1042 (90.1042) acc5: 98.9583 (98.9583) time: 2.0839 data: 1.8285 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7926 (0.6977) acc1: 82.8125 (84.6547) acc5: 97.9167 (97.3146) time: 0.5676 data: 0.3658 max mem: 57114 Test: Total time: 0:00:02 (0.5865 s / it) * Acc@1 83.905 Acc@5 96.751 loss 0.784 Accuracy of the model on the 50000 test images: 83.9% Max accuracy: 83.95% Test: [0/5] eta: 0:00:11 loss: 0.5973 (0.5973) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.3988 data: 2.1554 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6912 (0.6808) acc1: 80.7292 (79.2839) acc5: 95.8333 (94.1177) time: 0.6305 data: 0.4312 max mem: 57114 Test: Total time: 0:00:03 (0.6445 s / it) * Acc@1 77.510 Acc@5 93.906 loss 0.820 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [246] [ 0/156] eta: 0:13:00 lr: 0.000713 min_lr: 0.000713 loss: 2.3376 (2.3376) weight_decay: 0.0500 (0.0500) time: 5.0018 data: 4.0003 max mem: 57114 Epoch: [246] [ 10/156] eta: 0:02:46 lr: 0.000711 min_lr: 0.000711 loss: 2.7793 (2.5905) weight_decay: 0.0500 (0.0500) time: 1.1380 data: 0.3640 max mem: 57114 Epoch: [246] [ 20/156] eta: 0:02:07 lr: 0.000710 min_lr: 0.000710 loss: 2.8654 (2.6670) weight_decay: 0.0500 (0.0500) time: 0.7318 data: 0.0004 max mem: 57114 Epoch: [246] [ 30/156] eta: 0:01:49 lr: 0.000708 min_lr: 0.000708 loss: 2.8654 (2.5866) weight_decay: 0.0500 (0.0500) time: 0.7220 data: 0.0003 max mem: 57114 Epoch: [246] [ 40/156] eta: 0:01:36 lr: 0.000706 min_lr: 0.000706 loss: 2.5279 (2.5766) weight_decay: 0.0500 (0.0500) time: 0.7270 data: 0.0003 max mem: 57114 Epoch: [246] [ 50/156] eta: 0:01:26 lr: 0.000705 min_lr: 0.000705 loss: 2.6798 (2.6198) weight_decay: 0.0500 (0.0500) time: 0.7291 data: 0.0004 max mem: 57114 Epoch: [246] [ 60/156] eta: 0:01:16 lr: 0.000703 min_lr: 0.000703 loss: 2.8773 (2.6314) weight_decay: 0.0500 (0.0500) time: 0.7348 data: 0.0004 max mem: 57114 Epoch: [246] [ 70/156] eta: 0:01:07 lr: 0.000701 min_lr: 0.000701 loss: 2.7511 (2.6085) weight_decay: 0.0500 (0.0500) time: 0.7243 data: 0.0004 max mem: 57114 Epoch: [246] [ 80/156] eta: 0:00:59 lr: 0.000700 min_lr: 0.000700 loss: 2.7114 (2.6127) weight_decay: 0.0500 (0.0500) time: 0.7156 data: 0.0004 max mem: 57114 Epoch: [246] [ 90/156] eta: 0:00:51 lr: 0.000698 min_lr: 0.000698 loss: 2.7114 (2.6131) weight_decay: 0.0500 (0.0500) time: 0.7174 data: 0.0004 max mem: 57114 Epoch: [246] [100/156] eta: 0:00:42 lr: 0.000697 min_lr: 0.000697 loss: 2.6416 (2.6156) weight_decay: 0.0500 (0.0500) time: 0.7023 data: 0.0004 max mem: 57114 Epoch: [246] [110/156] eta: 0:00:34 lr: 0.000695 min_lr: 0.000695 loss: 2.6091 (2.6055) weight_decay: 0.0500 (0.0500) time: 0.6927 data: 0.0004 max mem: 57114 Epoch: [246] [120/156] eta: 0:00:27 lr: 0.000693 min_lr: 0.000693 loss: 2.5283 (2.5992) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [246] [130/156] eta: 0:00:19 lr: 0.000692 min_lr: 0.000692 loss: 2.7953 (2.6131) weight_decay: 0.0500 (0.0500) time: 0.7088 data: 0.0008 max mem: 57114 Epoch: [246] [140/156] eta: 0:00:11 lr: 0.000690 min_lr: 0.000690 loss: 2.8219 (2.6206) weight_decay: 0.0500 (0.0500) time: 0.6912 data: 0.0007 max mem: 57114 Epoch: [246] [150/156] eta: 0:00:04 lr: 0.000689 min_lr: 0.000689 loss: 2.7377 (2.6212) weight_decay: 0.0500 (0.0500) time: 0.6830 data: 0.0001 max mem: 57114 Epoch: [246] [155/156] eta: 0:00:00 lr: 0.000688 min_lr: 0.000688 loss: 2.7377 (2.6188) weight_decay: 0.0500 (0.0500) time: 0.6830 data: 0.0001 max mem: 57114 Epoch: [246] Total time: 0:01:55 (0.7422 s / it) Averaged stats: lr: 0.000688 min_lr: 0.000688 loss: 2.7377 (2.6424) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6205 (0.6205) acc1: 89.5833 (89.5833) acc5: 98.9583 (98.9583) time: 2.1437 data: 1.8880 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8362 (0.7453) acc1: 82.8125 (84.3990) acc5: 98.4375 (97.1867) time: 0.5794 data: 0.3777 max mem: 57114 Test: Total time: 0:00:03 (0.6014 s / it) * Acc@1 83.863 Acc@5 96.815 loss 0.825 Accuracy of the model on the 50000 test images: 83.9% Max accuracy: 83.95% Test: [0/5] eta: 0:00:11 loss: 0.5983 (0.5983) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2811 data: 2.0376 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6917 (0.6817) acc1: 80.7292 (79.2839) acc5: 95.8333 (94.1177) time: 0.6072 data: 0.4076 max mem: 57114 Test: Total time: 0:00:03 (0.6215 s / it) * Acc@1 77.512 Acc@5 93.898 loss 0.821 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [247] [ 0/156] eta: 0:13:33 lr: 0.000688 min_lr: 0.000688 loss: 2.5759 (2.5759) weight_decay: 0.0500 (0.0500) time: 5.2172 data: 4.2051 max mem: 57114 Epoch: [247] [ 10/156] eta: 0:02:47 lr: 0.000686 min_lr: 0.000686 loss: 2.7932 (2.6614) weight_decay: 0.0500 (0.0500) time: 1.1457 data: 0.3826 max mem: 57114 Epoch: [247] [ 20/156] eta: 0:02:08 lr: 0.000684 min_lr: 0.000684 loss: 2.5118 (2.5469) weight_decay: 0.0500 (0.0500) time: 0.7317 data: 0.0003 max mem: 57114 Epoch: [247] [ 30/156] eta: 0:01:49 lr: 0.000683 min_lr: 0.000683 loss: 2.5304 (2.5473) weight_decay: 0.0500 (0.0500) time: 0.7204 data: 0.0003 max mem: 57114 Epoch: [247] [ 40/156] eta: 0:01:37 lr: 0.000681 min_lr: 0.000681 loss: 2.5749 (2.5897) weight_decay: 0.0500 (0.0500) time: 0.7310 data: 0.0004 max mem: 57114 Epoch: [247] [ 50/156] eta: 0:01:26 lr: 0.000680 min_lr: 0.000680 loss: 2.7890 (2.6151) weight_decay: 0.0500 (0.0500) time: 0.7339 data: 0.0004 max mem: 57114 Epoch: [247] [ 60/156] eta: 0:01:16 lr: 0.000678 min_lr: 0.000678 loss: 2.6851 (2.5663) weight_decay: 0.0500 (0.0500) time: 0.7125 data: 0.0004 max mem: 57114 Epoch: [247] [ 70/156] eta: 0:01:07 lr: 0.000676 min_lr: 0.000676 loss: 2.6200 (2.5777) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0004 max mem: 57114 Epoch: [247] [ 80/156] eta: 0:00:59 lr: 0.000675 min_lr: 0.000675 loss: 2.6481 (2.5791) weight_decay: 0.0500 (0.0500) time: 0.7099 data: 0.0004 max mem: 57114 Epoch: [247] [ 90/156] eta: 0:00:50 lr: 0.000673 min_lr: 0.000673 loss: 2.6339 (2.5838) weight_decay: 0.0500 (0.0500) time: 0.7032 data: 0.0003 max mem: 57114 Epoch: [247] [100/156] eta: 0:00:42 lr: 0.000672 min_lr: 0.000672 loss: 2.6339 (2.5773) weight_decay: 0.0500 (0.0500) time: 0.7112 data: 0.0004 max mem: 57114 Epoch: [247] [110/156] eta: 0:00:34 lr: 0.000670 min_lr: 0.000670 loss: 2.6916 (2.5783) weight_decay: 0.0500 (0.0500) time: 0.7191 data: 0.0004 max mem: 57114 Epoch: [247] [120/156] eta: 0:00:27 lr: 0.000668 min_lr: 0.000668 loss: 2.6182 (2.5797) weight_decay: 0.0500 (0.0500) time: 0.7153 data: 0.0004 max mem: 57114 Epoch: [247] [130/156] eta: 0:00:19 lr: 0.000667 min_lr: 0.000667 loss: 2.6182 (2.5807) weight_decay: 0.0500 (0.0500) time: 0.6966 data: 0.0009 max mem: 57114 Epoch: [247] [140/156] eta: 0:00:11 lr: 0.000665 min_lr: 0.000665 loss: 2.7976 (2.5839) weight_decay: 0.0500 (0.0500) time: 0.6822 data: 0.0008 max mem: 57114 Epoch: [247] [150/156] eta: 0:00:04 lr: 0.000664 min_lr: 0.000664 loss: 2.7740 (2.5863) weight_decay: 0.0500 (0.0500) time: 0.6806 data: 0.0001 max mem: 57114 Epoch: [247] [155/156] eta: 0:00:00 lr: 0.000663 min_lr: 0.000663 loss: 2.7876 (2.5898) weight_decay: 0.0500 (0.0500) time: 0.6806 data: 0.0001 max mem: 57114 Epoch: [247] Total time: 0:01:55 (0.7413 s / it) Averaged stats: lr: 0.000663 min_lr: 0.000663 loss: 2.7876 (2.6359) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5737 (0.5737) acc1: 90.6250 (90.6250) acc5: 98.4375 (98.4375) time: 2.0881 data: 1.8259 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7504 (0.7021) acc1: 82.8125 (83.6317) acc5: 97.9167 (97.3146) time: 0.5683 data: 0.3653 max mem: 57114 Test: Total time: 0:00:02 (0.5901 s / it) * Acc@1 84.085 Acc@5 96.855 loss 0.789 Accuracy of the model on the 50000 test images: 84.1% Max accuracy: 84.09% Test: [0/5] eta: 0:00:10 loss: 0.5992 (0.5992) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.0147 data: 1.7714 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6922 (0.6826) acc1: 80.7292 (79.2839) acc5: 95.8333 (94.1177) time: 0.5535 data: 0.3544 max mem: 57114 Test: Total time: 0:00:02 (0.5652 s / it) * Acc@1 77.492 Acc@5 93.892 loss 0.822 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [248] [ 0/156] eta: 0:14:26 lr: 0.000663 min_lr: 0.000663 loss: 1.8118 (1.8118) weight_decay: 0.0500 (0.0500) time: 5.5553 data: 3.8552 max mem: 57114 Epoch: [248] [ 10/156] eta: 0:02:53 lr: 0.000661 min_lr: 0.000661 loss: 2.6814 (2.5731) weight_decay: 0.0500 (0.0500) time: 1.1914 data: 0.3507 max mem: 57114 Epoch: [248] [ 20/156] eta: 0:02:12 lr: 0.000659 min_lr: 0.000659 loss: 2.6628 (2.6067) weight_decay: 0.0500 (0.0500) time: 0.7485 data: 0.0002 max mem: 57114 Epoch: [248] [ 30/156] eta: 0:01:53 lr: 0.000658 min_lr: 0.000658 loss: 2.6628 (2.5990) weight_decay: 0.0500 (0.0500) time: 0.7355 data: 0.0003 max mem: 57114 Epoch: [248] [ 40/156] eta: 0:01:39 lr: 0.000656 min_lr: 0.000656 loss: 2.6644 (2.5867) weight_decay: 0.0500 (0.0500) time: 0.7289 data: 0.0003 max mem: 57114 Epoch: [248] [ 50/156] eta: 0:01:28 lr: 0.000655 min_lr: 0.000655 loss: 2.7507 (2.6448) weight_decay: 0.0500 (0.0500) time: 0.7346 data: 0.0004 max mem: 57114 Epoch: [248] [ 60/156] eta: 0:01:18 lr: 0.000653 min_lr: 0.000653 loss: 2.8245 (2.6416) weight_decay: 0.0500 (0.0500) time: 0.7348 data: 0.0004 max mem: 57114 Epoch: [248] [ 70/156] eta: 0:01:08 lr: 0.000652 min_lr: 0.000652 loss: 2.7360 (2.6415) weight_decay: 0.0500 (0.0500) time: 0.7169 data: 0.0004 max mem: 57114 Epoch: [248] [ 80/156] eta: 0:01:00 lr: 0.000650 min_lr: 0.000650 loss: 2.5921 (2.6063) weight_decay: 0.0500 (0.0500) time: 0.7276 data: 0.0004 max mem: 57114 Epoch: [248] [ 90/156] eta: 0:00:51 lr: 0.000648 min_lr: 0.000648 loss: 2.5932 (2.6159) weight_decay: 0.0500 (0.0500) time: 0.7327 data: 0.0004 max mem: 57114 Epoch: [248] [100/156] eta: 0:00:43 lr: 0.000647 min_lr: 0.000647 loss: 2.6907 (2.6046) weight_decay: 0.0500 (0.0500) time: 0.7079 data: 0.0004 max mem: 57114 Epoch: [248] [110/156] eta: 0:00:35 lr: 0.000645 min_lr: 0.000645 loss: 2.7962 (2.6126) weight_decay: 0.0500 (0.0500) time: 0.6992 data: 0.0004 max mem: 57114 Epoch: [248] [120/156] eta: 0:00:27 lr: 0.000644 min_lr: 0.000644 loss: 2.7962 (2.6146) weight_decay: 0.0500 (0.0500) time: 0.6989 data: 0.0004 max mem: 57114 Epoch: [248] [130/156] eta: 0:00:19 lr: 0.000642 min_lr: 0.000642 loss: 2.5656 (2.5925) weight_decay: 0.0500 (0.0500) time: 0.6983 data: 0.0009 max mem: 57114 Epoch: [248] [140/156] eta: 0:00:12 lr: 0.000641 min_lr: 0.000641 loss: 2.2189 (2.5720) weight_decay: 0.0500 (0.0500) time: 0.6899 data: 0.0008 max mem: 57114 Epoch: [248] [150/156] eta: 0:00:04 lr: 0.000639 min_lr: 0.000639 loss: 2.3940 (2.5696) weight_decay: 0.0500 (0.0500) time: 0.6843 data: 0.0001 max mem: 57114 Epoch: [248] [155/156] eta: 0:00:00 lr: 0.000638 min_lr: 0.000638 loss: 2.4921 (2.5722) weight_decay: 0.0500 (0.0500) time: 0.6873 data: 0.0001 max mem: 57114 Epoch: [248] Total time: 0:01:56 (0.7494 s / it) Averaged stats: lr: 0.000638 min_lr: 0.000638 loss: 2.4921 (2.6246) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5484 (0.5484) acc1: 89.0625 (89.0625) acc5: 98.9583 (98.9583) time: 2.0591 data: 1.8034 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7541 (0.6870) acc1: 83.8542 (83.5038) acc5: 98.9583 (97.8261) time: 0.5624 data: 0.3607 max mem: 57114 Test: Total time: 0:00:02 (0.5849 s / it) * Acc@1 83.901 Acc@5 96.847 loss 0.766 Accuracy of the model on the 50000 test images: 83.9% Max accuracy: 84.09% Test: [0/5] eta: 0:00:11 loss: 0.6002 (0.6002) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2413 data: 1.9978 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6929 (0.6837) acc1: 80.7292 (79.2839) acc5: 96.3542 (94.2455) time: 0.5989 data: 0.3996 max mem: 57114 Test: Total time: 0:00:03 (0.6107 s / it) * Acc@1 77.512 Acc@5 93.892 loss 0.823 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [249] [ 0/156] eta: 0:13:23 lr: 0.000638 min_lr: 0.000638 loss: 2.7060 (2.7060) weight_decay: 0.0500 (0.0500) time: 5.1492 data: 4.3648 max mem: 57114 Epoch: [249] [ 10/156] eta: 0:02:45 lr: 0.000637 min_lr: 0.000637 loss: 2.6864 (2.4748) weight_decay: 0.0500 (0.0500) time: 1.1347 data: 0.3972 max mem: 57114 Epoch: [249] [ 20/156] eta: 0:02:07 lr: 0.000635 min_lr: 0.000635 loss: 2.6864 (2.5448) weight_decay: 0.0500 (0.0500) time: 0.7235 data: 0.0004 max mem: 57114 Epoch: [249] [ 30/156] eta: 0:01:49 lr: 0.000633 min_lr: 0.000633 loss: 2.7257 (2.5872) weight_decay: 0.0500 (0.0500) time: 0.7233 data: 0.0004 max mem: 57114 Epoch: [249] [ 40/156] eta: 0:01:36 lr: 0.000632 min_lr: 0.000632 loss: 2.7257 (2.6142) weight_decay: 0.0500 (0.0500) time: 0.7214 data: 0.0004 max mem: 57114 Epoch: [249] [ 50/156] eta: 0:01:25 lr: 0.000630 min_lr: 0.000630 loss: 2.8093 (2.6605) weight_decay: 0.0500 (0.0500) time: 0.7170 data: 0.0004 max mem: 57114 Epoch: [249] [ 60/156] eta: 0:01:16 lr: 0.000629 min_lr: 0.000629 loss: 2.8093 (2.6791) weight_decay: 0.0500 (0.0500) time: 0.7279 data: 0.0004 max mem: 57114 Epoch: [249] [ 70/156] eta: 0:01:07 lr: 0.000627 min_lr: 0.000627 loss: 2.7719 (2.6812) weight_decay: 0.0500 (0.0500) time: 0.7211 data: 0.0004 max mem: 57114 Epoch: [249] [ 80/156] eta: 0:00:58 lr: 0.000626 min_lr: 0.000626 loss: 2.6628 (2.6526) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [249] [ 90/156] eta: 0:00:50 lr: 0.000624 min_lr: 0.000624 loss: 2.6628 (2.6703) weight_decay: 0.0500 (0.0500) time: 0.7079 data: 0.0004 max mem: 57114 Epoch: [249] [100/156] eta: 0:00:42 lr: 0.000623 min_lr: 0.000623 loss: 2.7872 (2.6548) weight_decay: 0.0500 (0.0500) time: 0.7061 data: 0.0004 max mem: 57114 Epoch: [249] [110/156] eta: 0:00:34 lr: 0.000621 min_lr: 0.000621 loss: 2.7385 (2.6629) weight_decay: 0.0500 (0.0500) time: 0.7085 data: 0.0004 max mem: 57114 Epoch: [249] [120/156] eta: 0:00:27 lr: 0.000620 min_lr: 0.000620 loss: 2.7913 (2.6594) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0004 max mem: 57114 Epoch: [249] [130/156] eta: 0:00:19 lr: 0.000618 min_lr: 0.000618 loss: 2.7562 (2.6524) weight_decay: 0.0500 (0.0500) time: 0.7043 data: 0.0009 max mem: 57114 Epoch: [249] [140/156] eta: 0:00:11 lr: 0.000616 min_lr: 0.000616 loss: 2.7235 (2.6610) weight_decay: 0.0500 (0.0500) time: 0.6898 data: 0.0007 max mem: 57114 Epoch: [249] [150/156] eta: 0:00:04 lr: 0.000615 min_lr: 0.000615 loss: 2.7437 (2.6661) weight_decay: 0.0500 (0.0500) time: 0.6870 data: 0.0001 max mem: 57114 Epoch: [249] [155/156] eta: 0:00:00 lr: 0.000614 min_lr: 0.000614 loss: 2.7235 (2.6585) weight_decay: 0.0500 (0.0500) time: 0.6882 data: 0.0001 max mem: 57114 Epoch: [249] Total time: 0:01:55 (0.7411 s / it) Averaged stats: lr: 0.000614 min_lr: 0.000614 loss: 2.7235 (2.6252) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5820 (0.5820) acc1: 89.5833 (89.5833) acc5: 98.9583 (98.9583) time: 2.0508 data: 1.7949 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7595 (0.6953) acc1: 83.8542 (84.0153) acc5: 98.9583 (97.6982) time: 0.5608 data: 0.3591 max mem: 57114 Test: Total time: 0:00:02 (0.5843 s / it) * Acc@1 84.011 Acc@5 96.963 loss 0.782 Accuracy of the model on the 50000 test images: 84.0% Max accuracy: 84.09% Test: [0/5] eta: 0:00:11 loss: 0.6012 (0.6012) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2537 data: 2.0099 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6935 (0.6847) acc1: 80.7292 (79.2839) acc5: 96.3542 (94.2455) time: 0.6014 data: 0.4021 max mem: 57114 Test: Total time: 0:00:03 (0.6123 s / it) * Acc@1 77.504 Acc@5 93.888 loss 0.824 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [250] [ 0/156] eta: 0:12:46 lr: 0.000614 min_lr: 0.000614 loss: 2.9340 (2.9340) weight_decay: 0.0500 (0.0500) time: 4.9152 data: 4.2479 max mem: 57114 Epoch: [250] [ 10/156] eta: 0:02:45 lr: 0.000612 min_lr: 0.000612 loss: 2.4727 (2.5303) weight_decay: 0.0500 (0.0500) time: 1.1337 data: 0.3865 max mem: 57114 Epoch: [250] [ 20/156] eta: 0:02:06 lr: 0.000611 min_lr: 0.000611 loss: 2.3573 (2.4863) weight_decay: 0.0500 (0.0500) time: 0.7326 data: 0.0003 max mem: 57114 Epoch: [250] [ 30/156] eta: 0:01:48 lr: 0.000609 min_lr: 0.000609 loss: 2.5561 (2.5418) weight_decay: 0.0500 (0.0500) time: 0.7136 data: 0.0004 max mem: 57114 Epoch: [250] [ 40/156] eta: 0:01:36 lr: 0.000608 min_lr: 0.000608 loss: 2.6881 (2.5638) weight_decay: 0.0500 (0.0500) time: 0.7217 data: 0.0004 max mem: 57114 Epoch: [250] [ 50/156] eta: 0:01:25 lr: 0.000606 min_lr: 0.000606 loss: 2.8775 (2.6293) weight_decay: 0.0500 (0.0500) time: 0.7186 data: 0.0003 max mem: 57114 Epoch: [250] [ 60/156] eta: 0:01:16 lr: 0.000605 min_lr: 0.000605 loss: 2.8564 (2.6126) weight_decay: 0.0500 (0.0500) time: 0.7191 data: 0.0004 max mem: 57114 Epoch: [250] [ 70/156] eta: 0:01:07 lr: 0.000603 min_lr: 0.000603 loss: 2.6989 (2.6259) weight_decay: 0.0500 (0.0500) time: 0.7153 data: 0.0004 max mem: 57114 Epoch: [250] [ 80/156] eta: 0:00:58 lr: 0.000602 min_lr: 0.000602 loss: 2.7123 (2.6394) weight_decay: 0.0500 (0.0500) time: 0.7138 data: 0.0004 max mem: 57114 Epoch: [250] [ 90/156] eta: 0:00:50 lr: 0.000600 min_lr: 0.000600 loss: 2.7493 (2.6568) weight_decay: 0.0500 (0.0500) time: 0.7188 data: 0.0004 max mem: 57114 Epoch: [250] [100/156] eta: 0:00:42 lr: 0.000599 min_lr: 0.000599 loss: 2.7398 (2.6381) weight_decay: 0.0500 (0.0500) time: 0.7047 data: 0.0004 max mem: 57114 Epoch: [250] [110/156] eta: 0:00:34 lr: 0.000597 min_lr: 0.000597 loss: 2.7846 (2.6593) weight_decay: 0.0500 (0.0500) time: 0.7023 data: 0.0004 max mem: 57114 Epoch: [250] [120/156] eta: 0:00:27 lr: 0.000596 min_lr: 0.000596 loss: 2.7676 (2.6512) weight_decay: 0.0500 (0.0500) time: 0.7178 data: 0.0004 max mem: 57114 Epoch: [250] [130/156] eta: 0:00:19 lr: 0.000594 min_lr: 0.000594 loss: 2.7364 (2.6441) weight_decay: 0.0500 (0.0500) time: 0.7330 data: 0.0009 max mem: 57114 Epoch: [250] [140/156] eta: 0:00:11 lr: 0.000593 min_lr: 0.000593 loss: 2.6282 (2.6338) weight_decay: 0.0500 (0.0500) time: 0.7236 data: 0.0008 max mem: 57114 Epoch: [250] [150/156] eta: 0:00:04 lr: 0.000591 min_lr: 0.000591 loss: 2.5085 (2.6269) weight_decay: 0.0500 (0.0500) time: 0.7252 data: 0.0001 max mem: 57114 Epoch: [250] [155/156] eta: 0:00:00 lr: 0.000591 min_lr: 0.000591 loss: 2.8433 (2.6256) weight_decay: 0.0500 (0.0500) time: 0.7270 data: 0.0001 max mem: 57114 Epoch: [250] Total time: 0:01:56 (0.7489 s / it) Averaged stats: lr: 0.000591 min_lr: 0.000591 loss: 2.8433 (2.6255) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6032 (0.6032) acc1: 89.0625 (89.0625) acc5: 98.9583 (98.9583) time: 2.0538 data: 1.7981 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7971 (0.7386) acc1: 83.3333 (83.6317) acc5: 97.9167 (97.3146) time: 0.5614 data: 0.3597 max mem: 57114 Test: Total time: 0:00:02 (0.5823 s / it) * Acc@1 83.991 Acc@5 96.937 loss 0.813 Accuracy of the model on the 50000 test images: 84.0% Max accuracy: 84.09% Test: [0/5] eta: 0:00:11 loss: 0.6020 (0.6020) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2794 data: 2.0358 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6939 (0.6856) acc1: 80.7292 (79.2839) acc5: 96.3542 (94.2455) time: 0.6066 data: 0.4072 max mem: 57114 Test: Total time: 0:00:03 (0.6195 s / it) * Acc@1 77.506 Acc@5 93.880 loss 0.825 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [251] [ 0/156] eta: 0:14:14 lr: 0.000590 min_lr: 0.000590 loss: 2.0616 (2.0616) weight_decay: 0.0500 (0.0500) time: 5.4805 data: 4.6445 max mem: 57114 Epoch: [251] [ 10/156] eta: 0:02:50 lr: 0.000589 min_lr: 0.000589 loss: 2.5969 (2.5012) weight_decay: 0.0500 (0.0500) time: 1.1681 data: 0.4226 max mem: 57114 Epoch: [251] [ 20/156] eta: 0:02:09 lr: 0.000587 min_lr: 0.000587 loss: 2.7396 (2.6225) weight_decay: 0.0500 (0.0500) time: 0.7274 data: 0.0003 max mem: 57114 Epoch: [251] [ 30/156] eta: 0:01:50 lr: 0.000586 min_lr: 0.000586 loss: 2.7590 (2.6398) weight_decay: 0.0500 (0.0500) time: 0.7179 data: 0.0003 max mem: 57114 Epoch: [251] [ 40/156] eta: 0:01:37 lr: 0.000584 min_lr: 0.000584 loss: 2.6591 (2.6412) weight_decay: 0.0500 (0.0500) time: 0.7190 data: 0.0004 max mem: 57114 Epoch: [251] [ 50/156] eta: 0:01:26 lr: 0.000583 min_lr: 0.000583 loss: 2.6369 (2.6260) weight_decay: 0.0500 (0.0500) time: 0.7166 data: 0.0004 max mem: 57114 Epoch: [251] [ 60/156] eta: 0:01:16 lr: 0.000581 min_lr: 0.000581 loss: 2.6369 (2.6356) weight_decay: 0.0500 (0.0500) time: 0.7144 data: 0.0004 max mem: 57114 Epoch: [251] [ 70/156] eta: 0:01:07 lr: 0.000580 min_lr: 0.000580 loss: 2.6655 (2.6213) weight_decay: 0.0500 (0.0500) time: 0.7216 data: 0.0004 max mem: 57114 Epoch: [251] [ 80/156] eta: 0:00:59 lr: 0.000578 min_lr: 0.000578 loss: 2.7111 (2.6320) weight_decay: 0.0500 (0.0500) time: 0.7263 data: 0.0004 max mem: 57114 Epoch: [251] [ 90/156] eta: 0:00:50 lr: 0.000577 min_lr: 0.000577 loss: 2.6536 (2.6128) weight_decay: 0.0500 (0.0500) time: 0.7136 data: 0.0003 max mem: 57114 Epoch: [251] [100/156] eta: 0:00:42 lr: 0.000575 min_lr: 0.000575 loss: 2.6841 (2.6220) weight_decay: 0.0500 (0.0500) time: 0.7031 data: 0.0003 max mem: 57114 Epoch: [251] [110/156] eta: 0:00:34 lr: 0.000574 min_lr: 0.000574 loss: 2.7757 (2.6319) weight_decay: 0.0500 (0.0500) time: 0.6996 data: 0.0004 max mem: 57114 Epoch: [251] [120/156] eta: 0:00:27 lr: 0.000572 min_lr: 0.000572 loss: 2.6875 (2.6202) weight_decay: 0.0500 (0.0500) time: 0.6982 data: 0.0004 max mem: 57114 Epoch: [251] [130/156] eta: 0:00:19 lr: 0.000571 min_lr: 0.000571 loss: 2.5121 (2.6105) weight_decay: 0.0500 (0.0500) time: 0.7018 data: 0.0008 max mem: 57114 Epoch: [251] [140/156] eta: 0:00:11 lr: 0.000569 min_lr: 0.000569 loss: 2.4401 (2.5938) weight_decay: 0.0500 (0.0500) time: 0.6950 data: 0.0007 max mem: 57114 Epoch: [251] [150/156] eta: 0:00:04 lr: 0.000568 min_lr: 0.000568 loss: 2.4905 (2.6064) weight_decay: 0.0500 (0.0500) time: 0.6835 data: 0.0001 max mem: 57114 Epoch: [251] [155/156] eta: 0:00:00 lr: 0.000567 min_lr: 0.000567 loss: 2.6901 (2.6095) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0001 max mem: 57114 Epoch: [251] Total time: 0:01:56 (0.7452 s / it) Averaged stats: lr: 0.000567 min_lr: 0.000567 loss: 2.6901 (2.6151) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:16 loss: 0.5465 (0.5465) acc1: 91.1458 (91.1458) acc5: 98.9583 (98.9583) time: 3.2219 data: 2.9491 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7764 (0.7103) acc1: 84.8958 (84.7826) acc5: 97.9167 (97.1867) time: 0.7976 data: 0.5899 max mem: 57114 Test: Total time: 0:00:04 (0.8446 s / it) * Acc@1 84.147 Acc@5 96.947 loss 0.792 Accuracy of the model on the 50000 test images: 84.1% Max accuracy: 84.15% Test: [0/5] eta: 0:00:24 loss: 0.6030 (0.6030) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 4.8594 data: 4.6159 max mem: 57114 Test: [4/5] eta: 0:00:01 loss: 0.6943 (0.6865) acc1: 80.7292 (79.2839) acc5: 96.3542 (94.2455) time: 1.1272 data: 0.9233 max mem: 57114 Test: Total time: 0:00:05 (1.1694 s / it) * Acc@1 77.506 Acc@5 93.858 loss 0.826 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [252] [ 0/156] eta: 0:21:09 lr: 0.000567 min_lr: 0.000567 loss: 2.5979 (2.5979) weight_decay: 0.0500 (0.0500) time: 8.1388 data: 6.1163 max mem: 57114 Epoch: [252] [ 10/156] eta: 0:03:34 lr: 0.000566 min_lr: 0.000566 loss: 2.5979 (2.6648) weight_decay: 0.0500 (0.0500) time: 1.4679 data: 0.5564 max mem: 57114 Epoch: [252] [ 20/156] eta: 0:02:35 lr: 0.000564 min_lr: 0.000564 loss: 2.5624 (2.5127) weight_decay: 0.0500 (0.0500) time: 0.7949 data: 0.0004 max mem: 57114 Epoch: [252] [ 30/156] eta: 0:02:09 lr: 0.000563 min_lr: 0.000563 loss: 2.3942 (2.5271) weight_decay: 0.0500 (0.0500) time: 0.7895 data: 0.0004 max mem: 57114 Epoch: [252] [ 40/156] eta: 0:01:55 lr: 0.000561 min_lr: 0.000561 loss: 2.6721 (2.5833) weight_decay: 0.0500 (0.0500) time: 0.8340 data: 0.0006 max mem: 57114 Epoch: [252] [ 50/156] eta: 0:01:41 lr: 0.000560 min_lr: 0.000560 loss: 2.7276 (2.6033) weight_decay: 0.0500 (0.0500) time: 0.8403 data: 0.0006 max mem: 57114 Epoch: [252] [ 60/156] eta: 0:01:28 lr: 0.000558 min_lr: 0.000558 loss: 2.6623 (2.5979) weight_decay: 0.0500 (0.0500) time: 0.7887 data: 0.0017 max mem: 57114 Epoch: [252] [ 70/156] eta: 0:01:17 lr: 0.000557 min_lr: 0.000557 loss: 2.8437 (2.6075) weight_decay: 0.0500 (0.0500) time: 0.7613 data: 0.0017 max mem: 57114 Epoch: [252] [ 80/156] eta: 0:01:06 lr: 0.000555 min_lr: 0.000555 loss: 2.8022 (2.6082) weight_decay: 0.0500 (0.0500) time: 0.7335 data: 0.0004 max mem: 57114 Epoch: [252] [ 90/156] eta: 0:00:56 lr: 0.000554 min_lr: 0.000554 loss: 2.8022 (2.6409) weight_decay: 0.0500 (0.0500) time: 0.7086 data: 0.0004 max mem: 57114 Epoch: [252] [100/156] eta: 0:00:47 lr: 0.000552 min_lr: 0.000552 loss: 2.8128 (2.6322) weight_decay: 0.0500 (0.0500) time: 0.7063 data: 0.0004 max mem: 57114 Epoch: [252] [110/156] eta: 0:00:38 lr: 0.000551 min_lr: 0.000551 loss: 2.8067 (2.6372) weight_decay: 0.0500 (0.0500) time: 0.7033 data: 0.0004 max mem: 57114 Epoch: [252] [120/156] eta: 0:00:29 lr: 0.000550 min_lr: 0.000550 loss: 2.7859 (2.6332) weight_decay: 0.0500 (0.0500) time: 0.7054 data: 0.0004 max mem: 57114 Epoch: [252] [130/156] eta: 0:00:21 lr: 0.000548 min_lr: 0.000548 loss: 2.6981 (2.6288) weight_decay: 0.0500 (0.0500) time: 0.7082 data: 0.0008 max mem: 57114 Epoch: [252] [140/156] eta: 0:00:12 lr: 0.000547 min_lr: 0.000547 loss: 2.7129 (2.6308) weight_decay: 0.0500 (0.0500) time: 0.6913 data: 0.0007 max mem: 57114 Epoch: [252] [150/156] eta: 0:00:04 lr: 0.000545 min_lr: 0.000545 loss: 2.7468 (2.6303) weight_decay: 0.0500 (0.0500) time: 0.6861 data: 0.0001 max mem: 57114 Epoch: [252] [155/156] eta: 0:00:00 lr: 0.000544 min_lr: 0.000544 loss: 2.7266 (2.6289) weight_decay: 0.0500 (0.0500) time: 0.6826 data: 0.0001 max mem: 57114 Epoch: [252] Total time: 0:02:03 (0.7934 s / it) Averaged stats: lr: 0.000544 min_lr: 0.000544 loss: 2.7266 (2.6123) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6096 (0.6096) acc1: 90.6250 (90.6250) acc5: 98.9583 (98.9583) time: 2.1314 data: 1.8756 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7776 (0.7038) acc1: 84.3750 (85.0384) acc5: 98.9583 (97.8261) time: 0.5775 data: 0.3752 max mem: 57114 Test: Total time: 0:00:02 (0.5988 s / it) * Acc@1 84.143 Acc@5 96.897 loss 0.816 Accuracy of the model on the 50000 test images: 84.1% Max accuracy: 84.15% Test: [0/5] eta: 0:00:11 loss: 0.6039 (0.6039) acc1: 83.8542 (83.8542) acc5: 97.3958 (97.3958) time: 2.3950 data: 2.1514 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6946 (0.6873) acc1: 80.7292 (79.1560) acc5: 96.3542 (94.2455) time: 0.6297 data: 0.4304 max mem: 57114 Test: Total time: 0:00:03 (0.6415 s / it) * Acc@1 77.494 Acc@5 93.860 loss 0.827 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [253] [ 0/156] eta: 0:13:22 lr: 0.000544 min_lr: 0.000544 loss: 2.7542 (2.7542) weight_decay: 0.0500 (0.0500) time: 5.1430 data: 3.8511 max mem: 57114 Epoch: [253] [ 10/156] eta: 0:02:44 lr: 0.000543 min_lr: 0.000543 loss: 2.7542 (2.6189) weight_decay: 0.0500 (0.0500) time: 1.1247 data: 0.3504 max mem: 57114 Epoch: [253] [ 20/156] eta: 0:02:06 lr: 0.000541 min_lr: 0.000541 loss: 2.6728 (2.6386) weight_decay: 0.0500 (0.0500) time: 0.7232 data: 0.0003 max mem: 57114 Epoch: [253] [ 30/156] eta: 0:01:49 lr: 0.000540 min_lr: 0.000540 loss: 2.6728 (2.6021) weight_decay: 0.0500 (0.0500) time: 0.7226 data: 0.0003 max mem: 57114 Epoch: [253] [ 40/156] eta: 0:01:36 lr: 0.000539 min_lr: 0.000539 loss: 2.7314 (2.6556) weight_decay: 0.0500 (0.0500) time: 0.7334 data: 0.0003 max mem: 57114 Epoch: [253] [ 50/156] eta: 0:01:26 lr: 0.000537 min_lr: 0.000537 loss: 2.7932 (2.6755) weight_decay: 0.0500 (0.0500) time: 0.7337 data: 0.0004 max mem: 57114 Epoch: [253] [ 60/156] eta: 0:01:16 lr: 0.000536 min_lr: 0.000536 loss: 2.8332 (2.6838) weight_decay: 0.0500 (0.0500) time: 0.7262 data: 0.0004 max mem: 57114 Epoch: [253] [ 70/156] eta: 0:01:07 lr: 0.000534 min_lr: 0.000534 loss: 2.8234 (2.6521) weight_decay: 0.0500 (0.0500) time: 0.7226 data: 0.0003 max mem: 57114 Epoch: [253] [ 80/156] eta: 0:00:58 lr: 0.000533 min_lr: 0.000533 loss: 2.5578 (2.6393) weight_decay: 0.0500 (0.0500) time: 0.7041 data: 0.0004 max mem: 57114 Epoch: [253] [ 90/156] eta: 0:00:50 lr: 0.000531 min_lr: 0.000531 loss: 2.5578 (2.6134) weight_decay: 0.0500 (0.0500) time: 0.6959 data: 0.0003 max mem: 57114 Epoch: [253] [100/156] eta: 0:00:42 lr: 0.000530 min_lr: 0.000530 loss: 2.6849 (2.6022) weight_decay: 0.0500 (0.0500) time: 0.7045 data: 0.0003 max mem: 57114 Epoch: [253] [110/156] eta: 0:00:34 lr: 0.000529 min_lr: 0.000529 loss: 2.7135 (2.6246) weight_decay: 0.0500 (0.0500) time: 0.7120 data: 0.0003 max mem: 57114 Epoch: [253] [120/156] eta: 0:00:27 lr: 0.000527 min_lr: 0.000527 loss: 2.7666 (2.6370) weight_decay: 0.0500 (0.0500) time: 0.7091 data: 0.0003 max mem: 57114 Epoch: [253] [130/156] eta: 0:00:19 lr: 0.000526 min_lr: 0.000526 loss: 2.8190 (2.6407) weight_decay: 0.0500 (0.0500) time: 0.7005 data: 0.0008 max mem: 57114 Epoch: [253] [140/156] eta: 0:00:11 lr: 0.000524 min_lr: 0.000524 loss: 2.7481 (2.6378) weight_decay: 0.0500 (0.0500) time: 0.6914 data: 0.0007 max mem: 57114 Epoch: [253] [150/156] eta: 0:00:04 lr: 0.000523 min_lr: 0.000523 loss: 2.6424 (2.6350) weight_decay: 0.0500 (0.0500) time: 0.6816 data: 0.0001 max mem: 57114 Epoch: [253] [155/156] eta: 0:00:00 lr: 0.000522 min_lr: 0.000522 loss: 2.6844 (2.6400) weight_decay: 0.0500 (0.0500) time: 0.6818 data: 0.0001 max mem: 57114 Epoch: [253] Total time: 0:01:55 (0.7406 s / it) Averaged stats: lr: 0.000522 min_lr: 0.000522 loss: 2.6844 (2.6044) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.6225 (0.6225) acc1: 90.6250 (90.6250) acc5: 98.9583 (98.9583) time: 2.2443 data: 1.9881 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8000 (0.7481) acc1: 84.3750 (84.6547) acc5: 98.9583 (97.9540) time: 0.5995 data: 0.3977 max mem: 57114 Test: Total time: 0:00:03 (0.6244 s / it) * Acc@1 84.195 Acc@5 96.859 loss 0.844 Accuracy of the model on the 50000 test images: 84.2% Max accuracy: 84.20% Test: [0/5] eta: 0:00:09 loss: 0.6049 (0.6049) acc1: 83.8542 (83.8542) acc5: 97.3958 (97.3958) time: 1.9989 data: 1.7554 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6951 (0.6882) acc1: 80.7292 (79.1560) acc5: 96.3542 (94.2455) time: 0.5504 data: 0.3511 max mem: 57114 Test: Total time: 0:00:02 (0.5614 s / it) * Acc@1 77.498 Acc@5 93.852 loss 0.828 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [254] [ 0/156] eta: 0:14:13 lr: 0.000522 min_lr: 0.000522 loss: 2.7188 (2.7188) weight_decay: 0.0500 (0.0500) time: 5.4689 data: 4.3263 max mem: 57114 Epoch: [254] [ 10/156] eta: 0:02:52 lr: 0.000521 min_lr: 0.000521 loss: 2.5659 (2.5981) weight_decay: 0.0500 (0.0500) time: 1.1818 data: 0.3935 max mem: 57114 Epoch: [254] [ 20/156] eta: 0:02:11 lr: 0.000519 min_lr: 0.000519 loss: 2.6097 (2.6361) weight_decay: 0.0500 (0.0500) time: 0.7431 data: 0.0003 max mem: 57114 Epoch: [254] [ 30/156] eta: 0:01:52 lr: 0.000518 min_lr: 0.000518 loss: 2.8088 (2.6316) weight_decay: 0.0500 (0.0500) time: 0.7349 data: 0.0003 max mem: 57114 Epoch: [254] [ 40/156] eta: 0:01:38 lr: 0.000516 min_lr: 0.000516 loss: 2.6722 (2.6035) weight_decay: 0.0500 (0.0500) time: 0.7312 data: 0.0004 max mem: 57114 Epoch: [254] [ 50/156] eta: 0:01:28 lr: 0.000515 min_lr: 0.000515 loss: 2.6782 (2.6177) weight_decay: 0.0500 (0.0500) time: 0.7342 data: 0.0004 max mem: 57114 Epoch: [254] [ 60/156] eta: 0:01:18 lr: 0.000513 min_lr: 0.000513 loss: 2.7681 (2.6256) weight_decay: 0.0500 (0.0500) time: 0.7405 data: 0.0004 max mem: 57114 Epoch: [254] [ 70/156] eta: 0:01:08 lr: 0.000512 min_lr: 0.000512 loss: 2.7681 (2.6251) weight_decay: 0.0500 (0.0500) time: 0.7277 data: 0.0004 max mem: 57114 Epoch: [254] [ 80/156] eta: 0:01:00 lr: 0.000511 min_lr: 0.000511 loss: 2.6802 (2.6245) weight_decay: 0.0500 (0.0500) time: 0.7115 data: 0.0004 max mem: 57114 Epoch: [254] [ 90/156] eta: 0:00:51 lr: 0.000509 min_lr: 0.000509 loss: 2.7436 (2.6271) weight_decay: 0.0500 (0.0500) time: 0.7113 data: 0.0004 max mem: 57114 Epoch: [254] [100/156] eta: 0:00:43 lr: 0.000508 min_lr: 0.000508 loss: 2.7793 (2.6467) weight_decay: 0.0500 (0.0500) time: 0.7139 data: 0.0004 max mem: 57114 Epoch: [254] [110/156] eta: 0:00:35 lr: 0.000506 min_lr: 0.000506 loss: 2.7840 (2.6467) weight_decay: 0.0500 (0.0500) time: 0.7065 data: 0.0004 max mem: 57114 Epoch: [254] [120/156] eta: 0:00:27 lr: 0.000505 min_lr: 0.000505 loss: 2.7784 (2.6429) weight_decay: 0.0500 (0.0500) time: 0.7003 data: 0.0004 max mem: 57114 Epoch: [254] [130/156] eta: 0:00:19 lr: 0.000504 min_lr: 0.000504 loss: 2.6897 (2.6392) weight_decay: 0.0500 (0.0500) time: 0.6973 data: 0.0008 max mem: 57114 Epoch: [254] [140/156] eta: 0:00:12 lr: 0.000502 min_lr: 0.000502 loss: 2.6861 (2.6277) weight_decay: 0.0500 (0.0500) time: 0.6923 data: 0.0007 max mem: 57114 Epoch: [254] [150/156] eta: 0:00:04 lr: 0.000501 min_lr: 0.000501 loss: 2.5829 (2.6215) weight_decay: 0.0500 (0.0500) time: 0.6873 data: 0.0001 max mem: 57114 Epoch: [254] [155/156] eta: 0:00:00 lr: 0.000500 min_lr: 0.000500 loss: 2.5829 (2.6123) weight_decay: 0.0500 (0.0500) time: 0.6943 data: 0.0001 max mem: 57114 Epoch: [254] Total time: 0:01:56 (0.7490 s / it) Averaged stats: lr: 0.000500 min_lr: 0.000500 loss: 2.5829 (2.6016) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5571 (0.5571) acc1: 89.5833 (89.5833) acc5: 98.9583 (98.9583) time: 2.1233 data: 1.8588 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7267 (0.6565) acc1: 85.9375 (84.7826) acc5: 98.9583 (97.9540) time: 0.5753 data: 0.3718 max mem: 57114 Test: Total time: 0:00:02 (0.5999 s / it) * Acc@1 84.275 Acc@5 96.881 loss 0.768 Accuracy of the model on the 50000 test images: 84.3% Max accuracy: 84.28% Test: [0/5] eta: 0:00:10 loss: 0.6058 (0.6058) acc1: 83.8542 (83.8542) acc5: 97.3958 (97.3958) time: 2.1178 data: 1.8743 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6955 (0.6891) acc1: 80.7292 (79.1560) acc5: 96.3542 (94.2455) time: 0.5742 data: 0.3750 max mem: 57114 Test: Total time: 0:00:02 (0.5879 s / it) * Acc@1 77.494 Acc@5 93.828 loss 0.829 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [255] [ 0/156] eta: 0:13:39 lr: 0.000500 min_lr: 0.000500 loss: 2.4848 (2.4848) weight_decay: 0.0500 (0.0500) time: 5.2532 data: 4.3519 max mem: 57114 Epoch: [255] [ 10/156] eta: 0:02:50 lr: 0.000499 min_lr: 0.000499 loss: 2.5768 (2.6002) weight_decay: 0.0500 (0.0500) time: 1.1712 data: 0.3959 max mem: 57114 Epoch: [255] [ 20/156] eta: 0:02:09 lr: 0.000497 min_lr: 0.000497 loss: 2.6992 (2.5569) weight_decay: 0.0500 (0.0500) time: 0.7367 data: 0.0004 max mem: 57114 Epoch: [255] [ 30/156] eta: 0:01:51 lr: 0.000496 min_lr: 0.000496 loss: 2.7822 (2.6106) weight_decay: 0.0500 (0.0500) time: 0.7319 data: 0.0004 max mem: 57114 Epoch: [255] [ 40/156] eta: 0:01:38 lr: 0.000494 min_lr: 0.000494 loss: 2.8568 (2.6298) weight_decay: 0.0500 (0.0500) time: 0.7463 data: 0.0004 max mem: 57114 Epoch: [255] [ 50/156] eta: 0:01:27 lr: 0.000493 min_lr: 0.000493 loss: 2.7546 (2.6265) weight_decay: 0.0500 (0.0500) time: 0.7311 data: 0.0004 max mem: 57114 Epoch: [255] [ 60/156] eta: 0:01:17 lr: 0.000492 min_lr: 0.000492 loss: 2.6516 (2.6150) weight_decay: 0.0500 (0.0500) time: 0.7310 data: 0.0004 max mem: 57114 Epoch: [255] [ 70/156] eta: 0:01:08 lr: 0.000490 min_lr: 0.000490 loss: 2.6455 (2.5943) weight_decay: 0.0500 (0.0500) time: 0.7377 data: 0.0004 max mem: 57114 Epoch: [255] [ 80/156] eta: 0:00:59 lr: 0.000489 min_lr: 0.000489 loss: 2.6455 (2.6009) weight_decay: 0.0500 (0.0500) time: 0.7200 data: 0.0004 max mem: 57114 Epoch: [255] [ 90/156] eta: 0:00:51 lr: 0.000488 min_lr: 0.000488 loss: 2.7531 (2.6190) weight_decay: 0.0500 (0.0500) time: 0.7047 data: 0.0004 max mem: 57114 Epoch: [255] [100/156] eta: 0:00:43 lr: 0.000486 min_lr: 0.000486 loss: 2.6991 (2.6152) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0004 max mem: 57114 Epoch: [255] [110/156] eta: 0:00:35 lr: 0.000485 min_lr: 0.000485 loss: 2.3781 (2.5875) weight_decay: 0.0500 (0.0500) time: 0.7086 data: 0.0004 max mem: 57114 Epoch: [255] [120/156] eta: 0:00:27 lr: 0.000483 min_lr: 0.000483 loss: 2.4055 (2.5987) weight_decay: 0.0500 (0.0500) time: 0.7132 data: 0.0004 max mem: 57114 Epoch: [255] [130/156] eta: 0:00:19 lr: 0.000482 min_lr: 0.000482 loss: 2.6495 (2.5852) weight_decay: 0.0500 (0.0500) time: 0.7027 data: 0.0008 max mem: 57114 Epoch: [255] [140/156] eta: 0:00:12 lr: 0.000481 min_lr: 0.000481 loss: 2.7044 (2.5959) weight_decay: 0.0500 (0.0500) time: 0.6917 data: 0.0007 max mem: 57114 Epoch: [255] [150/156] eta: 0:00:04 lr: 0.000479 min_lr: 0.000479 loss: 2.6431 (2.5901) weight_decay: 0.0500 (0.0500) time: 0.6867 data: 0.0001 max mem: 57114 Epoch: [255] [155/156] eta: 0:00:00 lr: 0.000479 min_lr: 0.000479 loss: 2.6431 (2.5883) weight_decay: 0.0500 (0.0500) time: 0.6860 data: 0.0001 max mem: 57114 Epoch: [255] Total time: 0:01:56 (0.7483 s / it) Averaged stats: lr: 0.000479 min_lr: 0.000479 loss: 2.6431 (2.5968) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5618 (0.5618) acc1: 90.1042 (90.1042) acc5: 98.9583 (98.9583) time: 2.1034 data: 1.8474 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6854 (0.6592) acc1: 85.4167 (84.3990) acc5: 98.4375 (97.8261) time: 0.5713 data: 0.3695 max mem: 57114 Test: Total time: 0:00:02 (0.5953 s / it) * Acc@1 84.305 Acc@5 96.979 loss 0.754 Accuracy of the model on the 50000 test images: 84.3% Max accuracy: 84.31% Test: [0/5] eta: 0:00:09 loss: 0.6068 (0.6068) acc1: 83.8542 (83.8542) acc5: 97.3958 (97.3958) time: 1.9859 data: 1.7424 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6960 (0.6901) acc1: 80.7292 (79.1560) acc5: 96.3542 (94.2455) time: 0.5478 data: 0.3486 max mem: 57114 Test: Total time: 0:00:02 (0.5576 s / it) * Acc@1 77.486 Acc@5 93.814 loss 0.830 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [256] [ 0/156] eta: 0:13:32 lr: 0.000479 min_lr: 0.000479 loss: 2.8983 (2.8983) weight_decay: 0.0500 (0.0500) time: 5.2058 data: 3.4702 max mem: 57114 Epoch: [256] [ 10/156] eta: 0:02:47 lr: 0.000477 min_lr: 0.000477 loss: 2.7825 (2.7121) weight_decay: 0.0500 (0.0500) time: 1.1485 data: 0.3157 max mem: 57114 Epoch: [256] [ 20/156] eta: 0:02:08 lr: 0.000476 min_lr: 0.000476 loss: 2.7553 (2.6581) weight_decay: 0.0500 (0.0500) time: 0.7356 data: 0.0003 max mem: 57114 Epoch: [256] [ 30/156] eta: 0:01:51 lr: 0.000474 min_lr: 0.000474 loss: 2.5730 (2.5884) weight_decay: 0.0500 (0.0500) time: 0.7452 data: 0.0003 max mem: 57114 Epoch: [256] [ 40/156] eta: 0:01:38 lr: 0.000473 min_lr: 0.000473 loss: 2.5730 (2.5827) weight_decay: 0.0500 (0.0500) time: 0.7516 data: 0.0003 max mem: 57114 Epoch: [256] [ 50/156] eta: 0:01:27 lr: 0.000472 min_lr: 0.000472 loss: 2.7847 (2.6370) weight_decay: 0.0500 (0.0500) time: 0.7376 data: 0.0004 max mem: 57114 Epoch: [256] [ 60/156] eta: 0:01:18 lr: 0.000470 min_lr: 0.000470 loss: 2.8034 (2.6570) weight_decay: 0.0500 (0.0500) time: 0.7376 data: 0.0004 max mem: 57114 Epoch: [256] [ 70/156] eta: 0:01:08 lr: 0.000469 min_lr: 0.000469 loss: 2.8914 (2.6527) weight_decay: 0.0500 (0.0500) time: 0.7323 data: 0.0004 max mem: 57114 Epoch: [256] [ 80/156] eta: 0:01:00 lr: 0.000468 min_lr: 0.000468 loss: 2.4917 (2.6330) weight_decay: 0.0500 (0.0500) time: 0.7166 data: 0.0004 max mem: 57114 Epoch: [256] [ 90/156] eta: 0:00:51 lr: 0.000466 min_lr: 0.000466 loss: 2.4012 (2.6056) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0004 max mem: 57114 Epoch: [256] [100/156] eta: 0:00:43 lr: 0.000465 min_lr: 0.000465 loss: 2.4302 (2.5959) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0003 max mem: 57114 Epoch: [256] [110/156] eta: 0:00:35 lr: 0.000464 min_lr: 0.000464 loss: 2.5487 (2.5869) weight_decay: 0.0500 (0.0500) time: 0.7082 data: 0.0004 max mem: 57114 Epoch: [256] [120/156] eta: 0:00:27 lr: 0.000462 min_lr: 0.000462 loss: 2.5146 (2.5828) weight_decay: 0.0500 (0.0500) time: 0.7057 data: 0.0004 max mem: 57114 Epoch: [256] [130/156] eta: 0:00:19 lr: 0.000461 min_lr: 0.000461 loss: 2.7132 (2.5917) weight_decay: 0.0500 (0.0500) time: 0.6989 data: 0.0008 max mem: 57114 Epoch: [256] [140/156] eta: 0:00:12 lr: 0.000460 min_lr: 0.000460 loss: 2.8263 (2.5961) weight_decay: 0.0500 (0.0500) time: 0.6940 data: 0.0007 max mem: 57114 Epoch: [256] [150/156] eta: 0:00:04 lr: 0.000458 min_lr: 0.000458 loss: 2.7189 (2.5916) weight_decay: 0.0500 (0.0500) time: 0.6929 data: 0.0001 max mem: 57114 Epoch: [256] [155/156] eta: 0:00:00 lr: 0.000458 min_lr: 0.000458 loss: 2.7914 (2.6014) weight_decay: 0.0500 (0.0500) time: 0.6878 data: 0.0001 max mem: 57114 Epoch: [256] Total time: 0:01:56 (0.7493 s / it) Averaged stats: lr: 0.000458 min_lr: 0.000458 loss: 2.7914 (2.5953) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6207 (0.6207) acc1: 90.6250 (90.6250) acc5: 98.9583 (98.9583) time: 2.1262 data: 1.8705 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7989 (0.7471) acc1: 84.3750 (85.0384) acc5: 98.4375 (97.5703) time: 0.5759 data: 0.3742 max mem: 57114 Test: Total time: 0:00:02 (0.5969 s / it) * Acc@1 84.247 Acc@5 96.877 loss 0.835 Accuracy of the model on the 50000 test images: 84.2% Max accuracy: 84.31% Test: [0/5] eta: 0:00:11 loss: 0.6075 (0.6075) acc1: 83.8542 (83.8542) acc5: 97.3958 (97.3958) time: 2.3359 data: 2.0925 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6965 (0.6910) acc1: 80.7292 (79.1560) acc5: 96.3542 (94.2455) time: 0.6178 data: 0.4186 max mem: 57114 Test: Total time: 0:00:03 (0.6292 s / it) * Acc@1 77.510 Acc@5 93.804 loss 0.831 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [257] [ 0/156] eta: 0:12:48 lr: 0.000458 min_lr: 0.000458 loss: 3.0291 (3.0291) weight_decay: 0.0500 (0.0500) time: 4.9233 data: 3.9156 max mem: 57114 Epoch: [257] [ 10/156] eta: 0:02:43 lr: 0.000456 min_lr: 0.000456 loss: 2.6930 (2.4872) weight_decay: 0.0500 (0.0500) time: 1.1215 data: 0.3562 max mem: 57114 Epoch: [257] [ 20/156] eta: 0:02:06 lr: 0.000455 min_lr: 0.000455 loss: 2.7837 (2.5718) weight_decay: 0.0500 (0.0500) time: 0.7321 data: 0.0003 max mem: 57114 Epoch: [257] [ 30/156] eta: 0:01:49 lr: 0.000454 min_lr: 0.000454 loss: 2.7837 (2.5466) weight_decay: 0.0500 (0.0500) time: 0.7252 data: 0.0003 max mem: 57114 Epoch: [257] [ 40/156] eta: 0:01:36 lr: 0.000452 min_lr: 0.000452 loss: 2.5079 (2.5455) weight_decay: 0.0500 (0.0500) time: 0.7347 data: 0.0003 max mem: 57114 Epoch: [257] [ 50/156] eta: 0:01:26 lr: 0.000451 min_lr: 0.000451 loss: 2.5580 (2.5830) weight_decay: 0.0500 (0.0500) time: 0.7350 data: 0.0003 max mem: 57114 Epoch: [257] [ 60/156] eta: 0:01:17 lr: 0.000450 min_lr: 0.000450 loss: 2.7322 (2.6007) weight_decay: 0.0500 (0.0500) time: 0.7397 data: 0.0004 max mem: 57114 Epoch: [257] [ 70/156] eta: 0:01:07 lr: 0.000448 min_lr: 0.000448 loss: 2.6685 (2.5896) weight_decay: 0.0500 (0.0500) time: 0.7245 data: 0.0004 max mem: 57114 Epoch: [257] [ 80/156] eta: 0:00:59 lr: 0.000447 min_lr: 0.000447 loss: 2.5544 (2.5971) weight_decay: 0.0500 (0.0500) time: 0.6955 data: 0.0003 max mem: 57114 Epoch: [257] [ 90/156] eta: 0:00:50 lr: 0.000446 min_lr: 0.000446 loss: 2.5277 (2.5931) weight_decay: 0.0500 (0.0500) time: 0.7091 data: 0.0004 max mem: 57114 Epoch: [257] [100/156] eta: 0:00:42 lr: 0.000444 min_lr: 0.000444 loss: 2.4014 (2.5776) weight_decay: 0.0500 (0.0500) time: 0.7155 data: 0.0004 max mem: 57114 Epoch: [257] [110/156] eta: 0:00:34 lr: 0.000443 min_lr: 0.000443 loss: 2.5911 (2.5857) weight_decay: 0.0500 (0.0500) time: 0.7078 data: 0.0004 max mem: 57114 Epoch: [257] [120/156] eta: 0:00:27 lr: 0.000442 min_lr: 0.000442 loss: 2.6836 (2.5770) weight_decay: 0.0500 (0.0500) time: 0.7244 data: 0.0004 max mem: 57114 Epoch: [257] [130/156] eta: 0:00:19 lr: 0.000440 min_lr: 0.000440 loss: 2.5418 (2.5803) weight_decay: 0.0500 (0.0500) time: 0.7389 data: 0.0010 max mem: 57114 Epoch: [257] [140/156] eta: 0:00:12 lr: 0.000439 min_lr: 0.000439 loss: 2.6598 (2.5872) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0008 max mem: 57114 Epoch: [257] [150/156] eta: 0:00:04 lr: 0.000438 min_lr: 0.000438 loss: 2.5339 (2.5735) weight_decay: 0.0500 (0.0500) time: 0.6807 data: 0.0001 max mem: 57114 Epoch: [257] [155/156] eta: 0:00:00 lr: 0.000437 min_lr: 0.000437 loss: 2.5339 (2.5717) weight_decay: 0.0500 (0.0500) time: 0.6852 data: 0.0001 max mem: 57114 Epoch: [257] Total time: 0:01:56 (0.7477 s / it) Averaged stats: lr: 0.000437 min_lr: 0.000437 loss: 2.5339 (2.5868) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5512 (0.5512) acc1: 90.6250 (90.6250) acc5: 98.9583 (98.9583) time: 2.0411 data: 1.7853 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7548 (0.6864) acc1: 84.3750 (84.5269) acc5: 98.4375 (97.6982) time: 0.5588 data: 0.3571 max mem: 57114 Test: Total time: 0:00:02 (0.5822 s / it) * Acc@1 84.337 Acc@5 96.999 loss 0.773 Accuracy of the model on the 50000 test images: 84.3% Max accuracy: 84.34% Test: [0/5] eta: 0:00:10 loss: 0.6084 (0.6084) acc1: 83.8542 (83.8542) acc5: 97.3958 (97.3958) time: 2.0051 data: 1.7618 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6969 (0.6919) acc1: 80.7292 (79.1560) acc5: 96.3542 (94.2455) time: 0.5517 data: 0.3525 max mem: 57114 Test: Total time: 0:00:02 (0.5650 s / it) * Acc@1 77.496 Acc@5 93.810 loss 0.832 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [258] [ 0/156] eta: 0:13:50 lr: 0.000437 min_lr: 0.000437 loss: 2.9091 (2.9091) weight_decay: 0.0500 (0.0500) time: 5.3248 data: 4.1412 max mem: 57114 Epoch: [258] [ 10/156] eta: 0:02:53 lr: 0.000436 min_lr: 0.000436 loss: 2.7499 (2.6950) weight_decay: 0.0500 (0.0500) time: 1.1906 data: 0.3767 max mem: 57114 Epoch: [258] [ 20/156] eta: 0:02:12 lr: 0.000434 min_lr: 0.000434 loss: 2.7499 (2.7730) weight_decay: 0.0500 (0.0500) time: 0.7554 data: 0.0003 max mem: 57114 Epoch: [258] [ 30/156] eta: 0:01:54 lr: 0.000433 min_lr: 0.000433 loss: 2.7351 (2.7780) weight_decay: 0.0500 (0.0500) time: 0.7504 data: 0.0003 max mem: 57114 Epoch: [258] [ 40/156] eta: 0:01:40 lr: 0.000432 min_lr: 0.000432 loss: 2.7716 (2.7901) weight_decay: 0.0500 (0.0500) time: 0.7628 data: 0.0004 max mem: 57114 Epoch: [258] [ 50/156] eta: 0:01:29 lr: 0.000430 min_lr: 0.000430 loss: 2.8460 (2.7857) weight_decay: 0.0500 (0.0500) time: 0.7513 data: 0.0004 max mem: 57114 Epoch: [258] [ 60/156] eta: 0:01:20 lr: 0.000429 min_lr: 0.000429 loss: 2.8259 (2.7806) weight_decay: 0.0500 (0.0500) time: 0.7670 data: 0.0004 max mem: 57114 Epoch: [258] [ 70/156] eta: 0:01:11 lr: 0.000428 min_lr: 0.000428 loss: 2.7404 (2.7484) weight_decay: 0.0500 (0.0500) time: 0.7890 data: 0.0004 max mem: 57114 Epoch: [258] [ 80/156] eta: 0:01:02 lr: 0.000427 min_lr: 0.000427 loss: 2.5072 (2.7163) weight_decay: 0.0500 (0.0500) time: 0.7993 data: 0.0004 max mem: 57114 Epoch: [258] [ 90/156] eta: 0:00:54 lr: 0.000425 min_lr: 0.000425 loss: 2.5304 (2.6833) weight_decay: 0.0500 (0.0500) time: 0.7993 data: 0.0004 max mem: 57114 Epoch: [258] [100/156] eta: 0:00:45 lr: 0.000424 min_lr: 0.000424 loss: 2.5304 (2.6680) weight_decay: 0.0500 (0.0500) time: 0.7757 data: 0.0004 max mem: 57114 Epoch: [258] [110/156] eta: 0:00:37 lr: 0.000423 min_lr: 0.000423 loss: 2.6236 (2.6604) weight_decay: 0.0500 (0.0500) time: 0.7621 data: 0.0004 max mem: 57114 Epoch: [258] [120/156] eta: 0:00:29 lr: 0.000421 min_lr: 0.000421 loss: 2.6801 (2.6574) weight_decay: 0.0500 (0.0500) time: 0.7597 data: 0.0004 max mem: 57114 Epoch: [258] [130/156] eta: 0:00:20 lr: 0.000420 min_lr: 0.000420 loss: 2.6864 (2.6583) weight_decay: 0.0500 (0.0500) time: 0.7600 data: 0.0009 max mem: 57114 Epoch: [258] [140/156] eta: 0:00:12 lr: 0.000419 min_lr: 0.000419 loss: 2.7849 (2.6644) weight_decay: 0.0500 (0.0500) time: 0.7498 data: 0.0008 max mem: 57114 Epoch: [258] [150/156] eta: 0:00:04 lr: 0.000418 min_lr: 0.000418 loss: 2.7281 (2.6533) weight_decay: 0.0500 (0.0500) time: 0.7220 data: 0.0001 max mem: 57114 Epoch: [258] [155/156] eta: 0:00:00 lr: 0.000417 min_lr: 0.000417 loss: 2.6447 (2.6438) weight_decay: 0.0500 (0.0500) time: 0.7079 data: 0.0001 max mem: 57114 Epoch: [258] Total time: 0:02:03 (0.7920 s / it) Averaged stats: lr: 0.000417 min_lr: 0.000417 loss: 2.6447 (2.5861) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5550 (0.5550) acc1: 91.1458 (91.1458) acc5: 98.4375 (98.4375) time: 2.1352 data: 1.8793 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7019 (0.6704) acc1: 85.4167 (84.9105) acc5: 98.4375 (97.5703) time: 0.5777 data: 0.3759 max mem: 57114 Test: Total time: 0:00:02 (0.5987 s / it) * Acc@1 84.457 Acc@5 96.935 loss 0.761 Accuracy of the model on the 50000 test images: 84.5% Max accuracy: 84.46% Test: [0/5] eta: 0:00:10 loss: 0.6093 (0.6093) acc1: 83.8542 (83.8542) acc5: 97.3958 (97.3958) time: 2.0808 data: 1.8374 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6976 (0.6928) acc1: 80.7292 (79.1560) acc5: 96.3542 (94.1177) time: 0.5668 data: 0.3676 max mem: 57114 Test: Total time: 0:00:02 (0.5789 s / it) * Acc@1 77.492 Acc@5 93.814 loss 0.833 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [259] [ 0/156] eta: 0:15:49 lr: 0.000417 min_lr: 0.000417 loss: 2.4172 (2.4172) weight_decay: 0.0500 (0.0500) time: 6.0859 data: 5.2331 max mem: 57114 Epoch: [259] [ 10/156] eta: 0:03:00 lr: 0.000415 min_lr: 0.000415 loss: 2.5586 (2.5532) weight_decay: 0.0500 (0.0500) time: 1.2333 data: 0.4760 max mem: 57114 Epoch: [259] [ 20/156] eta: 0:02:15 lr: 0.000414 min_lr: 0.000414 loss: 2.6571 (2.6128) weight_decay: 0.0500 (0.0500) time: 0.7437 data: 0.0003 max mem: 57114 Epoch: [259] [ 30/156] eta: 0:01:54 lr: 0.000413 min_lr: 0.000413 loss: 2.6482 (2.5308) weight_decay: 0.0500 (0.0500) time: 0.7360 data: 0.0004 max mem: 57114 Epoch: [259] [ 40/156] eta: 0:01:40 lr: 0.000412 min_lr: 0.000412 loss: 2.5767 (2.5624) weight_decay: 0.0500 (0.0500) time: 0.7295 data: 0.0004 max mem: 57114 Epoch: [259] [ 50/156] eta: 0:01:28 lr: 0.000410 min_lr: 0.000410 loss: 2.7950 (2.6071) weight_decay: 0.0500 (0.0500) time: 0.7212 data: 0.0004 max mem: 57114 Epoch: [259] [ 60/156] eta: 0:01:18 lr: 0.000409 min_lr: 0.000409 loss: 2.7977 (2.6315) weight_decay: 0.0500 (0.0500) time: 0.7269 data: 0.0004 max mem: 57114 Epoch: [259] [ 70/156] eta: 0:01:09 lr: 0.000408 min_lr: 0.000408 loss: 2.7403 (2.6276) weight_decay: 0.0500 (0.0500) time: 0.7418 data: 0.0004 max mem: 57114 Epoch: [259] [ 80/156] eta: 0:01:00 lr: 0.000407 min_lr: 0.000407 loss: 2.8201 (2.6570) weight_decay: 0.0500 (0.0500) time: 0.7408 data: 0.0004 max mem: 57114 Epoch: [259] [ 90/156] eta: 0:00:52 lr: 0.000405 min_lr: 0.000405 loss: 2.8128 (2.6475) weight_decay: 0.0500 (0.0500) time: 0.7234 data: 0.0004 max mem: 57114 Epoch: [259] [100/156] eta: 0:00:43 lr: 0.000404 min_lr: 0.000404 loss: 2.6223 (2.6420) weight_decay: 0.0500 (0.0500) time: 0.7104 data: 0.0004 max mem: 57114 Epoch: [259] [110/156] eta: 0:00:35 lr: 0.000403 min_lr: 0.000403 loss: 2.7185 (2.6556) weight_decay: 0.0500 (0.0500) time: 0.7005 data: 0.0003 max mem: 57114 Epoch: [259] [120/156] eta: 0:00:27 lr: 0.000402 min_lr: 0.000402 loss: 2.8441 (2.6672) weight_decay: 0.0500 (0.0500) time: 0.6928 data: 0.0003 max mem: 57114 Epoch: [259] [130/156] eta: 0:00:19 lr: 0.000400 min_lr: 0.000400 loss: 2.5830 (2.6493) weight_decay: 0.0500 (0.0500) time: 0.6934 data: 0.0007 max mem: 57114 Epoch: [259] [140/156] eta: 0:00:12 lr: 0.000399 min_lr: 0.000399 loss: 2.4169 (2.6304) weight_decay: 0.0500 (0.0500) time: 0.6897 data: 0.0006 max mem: 57114 Epoch: [259] [150/156] eta: 0:00:04 lr: 0.000398 min_lr: 0.000398 loss: 2.4140 (2.6157) weight_decay: 0.0500 (0.0500) time: 0.6925 data: 0.0001 max mem: 57114 Epoch: [259] [155/156] eta: 0:00:00 lr: 0.000397 min_lr: 0.000397 loss: 2.3520 (2.6009) weight_decay: 0.0500 (0.0500) time: 0.6918 data: 0.0001 max mem: 57114 Epoch: [259] Total time: 0:01:57 (0.7532 s / it) Averaged stats: lr: 0.000397 min_lr: 0.000397 loss: 2.3520 (2.5761) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.5068 (0.5068) acc1: 91.6667 (91.6667) acc5: 98.4375 (98.4375) time: 2.2027 data: 1.9471 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6846 (0.6386) acc1: 84.8958 (84.7826) acc5: 97.9167 (97.1867) time: 0.5911 data: 0.3895 max mem: 57114 Test: Total time: 0:00:03 (0.6132 s / it) * Acc@1 84.355 Acc@5 96.963 loss 0.732 Accuracy of the model on the 50000 test images: 84.4% Max accuracy: 84.46% Test: [0/5] eta: 0:00:11 loss: 0.6102 (0.6102) acc1: 83.8542 (83.8542) acc5: 97.3958 (97.3958) time: 2.3941 data: 2.1506 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6983 (0.6938) acc1: 80.7292 (79.1560) acc5: 96.3542 (94.1177) time: 0.6295 data: 0.4302 max mem: 57114 Test: Total time: 0:00:03 (0.6399 s / it) * Acc@1 77.504 Acc@5 93.816 loss 0.834 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [260] [ 0/156] eta: 0:15:14 lr: 0.000397 min_lr: 0.000397 loss: 2.4455 (2.4455) weight_decay: 0.0500 (0.0500) time: 5.8598 data: 4.9529 max mem: 57114 Epoch: [260] [ 10/156] eta: 0:02:53 lr: 0.000396 min_lr: 0.000396 loss: 2.6752 (2.7494) weight_decay: 0.0500 (0.0500) time: 1.1915 data: 0.4506 max mem: 57114 Epoch: [260] [ 20/156] eta: 0:02:10 lr: 0.000395 min_lr: 0.000395 loss: 2.6752 (2.6734) weight_decay: 0.0500 (0.0500) time: 0.7138 data: 0.0005 max mem: 57114 Epoch: [260] [ 30/156] eta: 0:01:50 lr: 0.000393 min_lr: 0.000393 loss: 2.5365 (2.6287) weight_decay: 0.0500 (0.0500) time: 0.7025 data: 0.0006 max mem: 57114 Epoch: [260] [ 40/156] eta: 0:01:37 lr: 0.000392 min_lr: 0.000392 loss: 2.7129 (2.6026) weight_decay: 0.0500 (0.0500) time: 0.7170 data: 0.0005 max mem: 57114 Epoch: [260] [ 50/156] eta: 0:01:26 lr: 0.000391 min_lr: 0.000391 loss: 2.5612 (2.5656) weight_decay: 0.0500 (0.0500) time: 0.7181 data: 0.0004 max mem: 57114 Epoch: [260] [ 60/156] eta: 0:01:17 lr: 0.000390 min_lr: 0.000390 loss: 2.6271 (2.5676) weight_decay: 0.0500 (0.0500) time: 0.7262 data: 0.0004 max mem: 57114 Epoch: [260] [ 70/156] eta: 0:01:08 lr: 0.000388 min_lr: 0.000388 loss: 2.6507 (2.5632) weight_decay: 0.0500 (0.0500) time: 0.7338 data: 0.0004 max mem: 57114 Epoch: [260] [ 80/156] eta: 0:00:59 lr: 0.000387 min_lr: 0.000387 loss: 2.7169 (2.5582) weight_decay: 0.0500 (0.0500) time: 0.7168 data: 0.0004 max mem: 57114 Epoch: [260] [ 90/156] eta: 0:00:50 lr: 0.000386 min_lr: 0.000386 loss: 2.7719 (2.5612) weight_decay: 0.0500 (0.0500) time: 0.7008 data: 0.0004 max mem: 57114 Epoch: [260] [100/156] eta: 0:00:42 lr: 0.000385 min_lr: 0.000385 loss: 2.6595 (2.5584) weight_decay: 0.0500 (0.0500) time: 0.6925 data: 0.0004 max mem: 57114 Epoch: [260] [110/156] eta: 0:00:34 lr: 0.000383 min_lr: 0.000383 loss: 2.6168 (2.5740) weight_decay: 0.0500 (0.0500) time: 0.6972 data: 0.0004 max mem: 57114 Epoch: [260] [120/156] eta: 0:00:27 lr: 0.000382 min_lr: 0.000382 loss: 2.6096 (2.5693) weight_decay: 0.0500 (0.0500) time: 0.6982 data: 0.0004 max mem: 57114 Epoch: [260] [130/156] eta: 0:00:19 lr: 0.000381 min_lr: 0.000381 loss: 2.4402 (2.5531) weight_decay: 0.0500 (0.0500) time: 0.7248 data: 0.0011 max mem: 57114 Epoch: [260] [140/156] eta: 0:00:11 lr: 0.000380 min_lr: 0.000380 loss: 2.4985 (2.5487) weight_decay: 0.0500 (0.0500) time: 0.7239 data: 0.0010 max mem: 57114 Epoch: [260] [150/156] eta: 0:00:04 lr: 0.000379 min_lr: 0.000379 loss: 2.6165 (2.5482) weight_decay: 0.0500 (0.0500) time: 0.6895 data: 0.0001 max mem: 57114 Epoch: [260] [155/156] eta: 0:00:00 lr: 0.000378 min_lr: 0.000378 loss: 2.6599 (2.5540) weight_decay: 0.0500 (0.0500) time: 0.6831 data: 0.0001 max mem: 57114 Epoch: [260] Total time: 0:01:56 (0.7452 s / it) Averaged stats: lr: 0.000378 min_lr: 0.000378 loss: 2.6599 (2.5774) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5918 (0.5918) acc1: 90.6250 (90.6250) acc5: 98.4375 (98.4375) time: 2.1144 data: 1.8581 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7799 (0.7209) acc1: 85.4167 (84.1432) acc5: 98.4375 (97.4425) time: 0.5736 data: 0.3717 max mem: 57114 Test: Total time: 0:00:02 (0.5965 s / it) * Acc@1 84.309 Acc@5 96.949 loss 0.805 Accuracy of the model on the 50000 test images: 84.3% Max accuracy: 84.46% Test: [0/5] eta: 0:00:11 loss: 0.6110 (0.6110) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.3155 data: 2.0719 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6988 (0.6945) acc1: 80.7292 (79.2839) acc5: 96.3542 (94.1177) time: 0.6138 data: 0.4145 max mem: 57114 Test: Total time: 0:00:03 (0.6267 s / it) * Acc@1 77.492 Acc@5 93.802 loss 0.835 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [261] [ 0/156] eta: 0:12:59 lr: 0.000378 min_lr: 0.000378 loss: 2.9627 (2.9627) weight_decay: 0.0500 (0.0500) time: 4.9952 data: 3.5486 max mem: 57114 Epoch: [261] [ 10/156] eta: 0:02:43 lr: 0.000377 min_lr: 0.000377 loss: 2.8482 (2.7978) weight_decay: 0.0500 (0.0500) time: 1.1231 data: 0.3229 max mem: 57114 Epoch: [261] [ 20/156] eta: 0:02:05 lr: 0.000375 min_lr: 0.000375 loss: 2.5630 (2.6267) weight_decay: 0.0500 (0.0500) time: 0.7194 data: 0.0003 max mem: 57114 Epoch: [261] [ 30/156] eta: 0:01:47 lr: 0.000374 min_lr: 0.000374 loss: 2.5630 (2.6030) weight_decay: 0.0500 (0.0500) time: 0.7054 data: 0.0003 max mem: 57114 Epoch: [261] [ 40/156] eta: 0:01:35 lr: 0.000373 min_lr: 0.000373 loss: 2.6856 (2.5716) weight_decay: 0.0500 (0.0500) time: 0.7156 data: 0.0003 max mem: 57114 Epoch: [261] [ 50/156] eta: 0:01:26 lr: 0.000372 min_lr: 0.000372 loss: 2.6392 (2.5770) weight_decay: 0.0500 (0.0500) time: 0.7468 data: 0.0004 max mem: 57114 Epoch: [261] [ 60/156] eta: 0:01:17 lr: 0.000371 min_lr: 0.000371 loss: 2.5051 (2.5496) weight_decay: 0.0500 (0.0500) time: 0.7803 data: 0.0004 max mem: 57114 Epoch: [261] [ 70/156] eta: 0:01:08 lr: 0.000369 min_lr: 0.000369 loss: 2.3328 (2.5428) weight_decay: 0.0500 (0.0500) time: 0.7654 data: 0.0004 max mem: 57114 Epoch: [261] [ 80/156] eta: 0:01:00 lr: 0.000368 min_lr: 0.000368 loss: 2.4943 (2.5409) weight_decay: 0.0500 (0.0500) time: 0.7366 data: 0.0004 max mem: 57114 Epoch: [261] [ 90/156] eta: 0:00:51 lr: 0.000367 min_lr: 0.000367 loss: 2.5539 (2.5350) weight_decay: 0.0500 (0.0500) time: 0.7462 data: 0.0004 max mem: 57114 Epoch: [261] [100/156] eta: 0:00:44 lr: 0.000366 min_lr: 0.000366 loss: 2.6163 (2.5371) weight_decay: 0.0500 (0.0500) time: 0.7796 data: 0.0005 max mem: 57114 Epoch: [261] [110/156] eta: 0:00:36 lr: 0.000365 min_lr: 0.000365 loss: 2.5189 (2.5219) weight_decay: 0.0500 (0.0500) time: 0.7848 data: 0.0010 max mem: 57114 Epoch: [261] [120/156] eta: 0:00:28 lr: 0.000363 min_lr: 0.000363 loss: 2.3393 (2.5118) weight_decay: 0.0500 (0.0500) time: 0.7638 data: 0.0011 max mem: 57114 Epoch: [261] [130/156] eta: 0:00:20 lr: 0.000362 min_lr: 0.000362 loss: 2.4034 (2.5067) weight_decay: 0.0500 (0.0500) time: 0.7554 data: 0.0013 max mem: 57114 Epoch: [261] [140/156] eta: 0:00:12 lr: 0.000361 min_lr: 0.000361 loss: 2.5631 (2.5116) weight_decay: 0.0500 (0.0500) time: 0.7515 data: 0.0009 max mem: 57114 Epoch: [261] [150/156] eta: 0:00:04 lr: 0.000360 min_lr: 0.000360 loss: 2.7038 (2.5184) weight_decay: 0.0500 (0.0500) time: 0.7384 data: 0.0001 max mem: 57114 Epoch: [261] [155/156] eta: 0:00:00 lr: 0.000359 min_lr: 0.000359 loss: 2.8579 (2.5304) weight_decay: 0.0500 (0.0500) time: 0.7244 data: 0.0001 max mem: 57114 Epoch: [261] Total time: 0:02:01 (0.7770 s / it) Averaged stats: lr: 0.000359 min_lr: 0.000359 loss: 2.8579 (2.5716) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:16 loss: 0.6648 (0.6648) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 3.2814 data: 3.0190 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8232 (0.7794) acc1: 83.8542 (84.3990) acc5: 98.4375 (97.6982) time: 0.8072 data: 0.6039 max mem: 57114 Test: Total time: 0:00:04 (0.8284 s / it) * Acc@1 84.293 Acc@5 96.963 loss 0.865 Accuracy of the model on the 50000 test images: 84.3% Max accuracy: 84.46% Test: [0/5] eta: 0:00:11 loss: 0.6118 (0.6118) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2768 data: 2.0331 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6996 (0.6954) acc1: 80.7292 (79.1560) acc5: 96.3542 (94.1177) time: 0.6061 data: 0.4067 max mem: 57114 Test: Total time: 0:00:03 (0.6179 s / it) * Acc@1 77.488 Acc@5 93.798 loss 0.836 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [262] [ 0/156] eta: 0:15:57 lr: 0.000359 min_lr: 0.000359 loss: 2.0623 (2.0623) weight_decay: 0.0500 (0.0500) time: 6.1383 data: 4.7778 max mem: 57114 Epoch: [262] [ 10/156] eta: 0:03:01 lr: 0.000358 min_lr: 0.000358 loss: 2.5234 (2.4944) weight_decay: 0.0500 (0.0500) time: 1.2427 data: 0.4347 max mem: 57114 Epoch: [262] [ 20/156] eta: 0:02:14 lr: 0.000357 min_lr: 0.000357 loss: 2.4687 (2.4161) weight_decay: 0.0500 (0.0500) time: 0.7343 data: 0.0004 max mem: 57114 Epoch: [262] [ 30/156] eta: 0:01:54 lr: 0.000355 min_lr: 0.000355 loss: 2.6503 (2.5195) weight_decay: 0.0500 (0.0500) time: 0.7271 data: 0.0004 max mem: 57114 Epoch: [262] [ 40/156] eta: 0:01:40 lr: 0.000354 min_lr: 0.000354 loss: 2.7258 (2.5015) weight_decay: 0.0500 (0.0500) time: 0.7421 data: 0.0004 max mem: 57114 Epoch: [262] [ 50/156] eta: 0:01:29 lr: 0.000353 min_lr: 0.000353 loss: 2.5803 (2.5415) weight_decay: 0.0500 (0.0500) time: 0.7322 data: 0.0004 max mem: 57114 Epoch: [262] [ 60/156] eta: 0:01:18 lr: 0.000352 min_lr: 0.000352 loss: 2.6079 (2.5396) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0004 max mem: 57114 Epoch: [262] [ 70/156] eta: 0:01:09 lr: 0.000351 min_lr: 0.000351 loss: 2.4161 (2.5184) weight_decay: 0.0500 (0.0500) time: 0.7117 data: 0.0004 max mem: 57114 Epoch: [262] [ 80/156] eta: 0:01:00 lr: 0.000350 min_lr: 0.000350 loss: 2.5355 (2.5166) weight_decay: 0.0500 (0.0500) time: 0.7182 data: 0.0004 max mem: 57114 Epoch: [262] [ 90/156] eta: 0:00:51 lr: 0.000348 min_lr: 0.000348 loss: 2.5942 (2.5156) weight_decay: 0.0500 (0.0500) time: 0.7097 data: 0.0003 max mem: 57114 Epoch: [262] [100/156] eta: 0:00:43 lr: 0.000347 min_lr: 0.000347 loss: 2.5942 (2.5244) weight_decay: 0.0500 (0.0500) time: 0.7048 data: 0.0004 max mem: 57114 Epoch: [262] [110/156] eta: 0:00:35 lr: 0.000346 min_lr: 0.000346 loss: 2.6452 (2.5385) weight_decay: 0.0500 (0.0500) time: 0.7046 data: 0.0004 max mem: 57114 Epoch: [262] [120/156] eta: 0:00:27 lr: 0.000345 min_lr: 0.000345 loss: 2.6452 (2.5405) weight_decay: 0.0500 (0.0500) time: 0.7082 data: 0.0004 max mem: 57114 Epoch: [262] [130/156] eta: 0:00:19 lr: 0.000344 min_lr: 0.000344 loss: 2.6340 (2.5419) weight_decay: 0.0500 (0.0500) time: 0.7000 data: 0.0008 max mem: 57114 Epoch: [262] [140/156] eta: 0:00:12 lr: 0.000343 min_lr: 0.000343 loss: 2.5951 (2.5403) weight_decay: 0.0500 (0.0500) time: 0.6931 data: 0.0007 max mem: 57114 Epoch: [262] [150/156] eta: 0:00:04 lr: 0.000341 min_lr: 0.000341 loss: 2.5923 (2.5476) weight_decay: 0.0500 (0.0500) time: 0.6922 data: 0.0001 max mem: 57114 Epoch: [262] [155/156] eta: 0:00:00 lr: 0.000341 min_lr: 0.000341 loss: 2.6234 (2.5534) weight_decay: 0.0500 (0.0500) time: 0.6908 data: 0.0001 max mem: 57114 Epoch: [262] Total time: 0:01:57 (0.7502 s / it) Averaged stats: lr: 0.000341 min_lr: 0.000341 loss: 2.6234 (2.5675) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6187 (0.6187) acc1: 91.6667 (91.6667) acc5: 98.4375 (98.4375) time: 2.0410 data: 1.7853 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7903 (0.7366) acc1: 83.8542 (84.6547) acc5: 98.4375 (97.1867) time: 0.5589 data: 0.3571 max mem: 57114 Test: Total time: 0:00:02 (0.5792 s / it) * Acc@1 84.437 Acc@5 96.975 loss 0.828 Accuracy of the model on the 50000 test images: 84.4% Max accuracy: 84.46% Test: [0/5] eta: 0:00:11 loss: 0.6126 (0.6126) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2865 data: 2.0429 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7001 (0.6962) acc1: 80.7292 (79.1560) acc5: 96.3542 (93.9898) time: 0.6080 data: 0.4087 max mem: 57114 Test: Total time: 0:00:03 (0.6191 s / it) * Acc@1 77.478 Acc@5 93.788 loss 0.837 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [263] [ 0/156] eta: 0:15:22 lr: 0.000341 min_lr: 0.000341 loss: 2.7037 (2.7037) weight_decay: 0.0500 (0.0500) time: 5.9120 data: 4.7539 max mem: 57114 Epoch: [263] [ 10/156] eta: 0:02:57 lr: 0.000340 min_lr: 0.000340 loss: 2.6838 (2.5865) weight_decay: 0.0500 (0.0500) time: 1.2169 data: 0.4324 max mem: 57114 Epoch: [263] [ 20/156] eta: 0:02:13 lr: 0.000338 min_lr: 0.000338 loss: 2.6838 (2.6382) weight_decay: 0.0500 (0.0500) time: 0.7327 data: 0.0003 max mem: 57114 Epoch: [263] [ 30/156] eta: 0:01:54 lr: 0.000337 min_lr: 0.000337 loss: 2.6412 (2.5630) weight_decay: 0.0500 (0.0500) time: 0.7336 data: 0.0003 max mem: 57114 Epoch: [263] [ 40/156] eta: 0:01:39 lr: 0.000336 min_lr: 0.000336 loss: 2.5524 (2.5922) weight_decay: 0.0500 (0.0500) time: 0.7307 data: 0.0003 max mem: 57114 Epoch: [263] [ 50/156] eta: 0:01:27 lr: 0.000335 min_lr: 0.000335 loss: 2.7656 (2.6153) weight_decay: 0.0500 (0.0500) time: 0.7085 data: 0.0004 max mem: 57114 Epoch: [263] [ 60/156] eta: 0:01:17 lr: 0.000334 min_lr: 0.000334 loss: 2.7514 (2.5989) weight_decay: 0.0500 (0.0500) time: 0.7131 data: 0.0004 max mem: 57114 Epoch: [263] [ 70/156] eta: 0:01:08 lr: 0.000333 min_lr: 0.000333 loss: 2.7514 (2.6181) weight_decay: 0.0500 (0.0500) time: 0.7251 data: 0.0004 max mem: 57114 Epoch: [263] [ 80/156] eta: 0:00:59 lr: 0.000331 min_lr: 0.000331 loss: 2.6816 (2.5917) weight_decay: 0.0500 (0.0500) time: 0.7159 data: 0.0004 max mem: 57114 Epoch: [263] [ 90/156] eta: 0:00:51 lr: 0.000330 min_lr: 0.000330 loss: 2.7875 (2.6101) weight_decay: 0.0500 (0.0500) time: 0.7086 data: 0.0004 max mem: 57114 Epoch: [263] [100/156] eta: 0:00:43 lr: 0.000329 min_lr: 0.000329 loss: 2.8539 (2.6165) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0003 max mem: 57114 Epoch: [263] [110/156] eta: 0:00:35 lr: 0.000328 min_lr: 0.000328 loss: 2.7711 (2.6318) weight_decay: 0.0500 (0.0500) time: 0.7027 data: 0.0003 max mem: 57114 Epoch: [263] [120/156] eta: 0:00:27 lr: 0.000327 min_lr: 0.000327 loss: 2.7443 (2.6390) weight_decay: 0.0500 (0.0500) time: 0.6998 data: 0.0004 max mem: 57114 Epoch: [263] [130/156] eta: 0:00:19 lr: 0.000326 min_lr: 0.000326 loss: 2.7440 (2.6380) weight_decay: 0.0500 (0.0500) time: 0.7003 data: 0.0009 max mem: 57114 Epoch: [263] [140/156] eta: 0:00:12 lr: 0.000325 min_lr: 0.000325 loss: 2.6894 (2.6319) weight_decay: 0.0500 (0.0500) time: 0.6898 data: 0.0007 max mem: 57114 Epoch: [263] [150/156] eta: 0:00:04 lr: 0.000324 min_lr: 0.000324 loss: 2.5892 (2.6252) weight_decay: 0.0500 (0.0500) time: 0.6823 data: 0.0001 max mem: 57114 Epoch: [263] [155/156] eta: 0:00:00 lr: 0.000323 min_lr: 0.000323 loss: 2.5383 (2.6269) weight_decay: 0.0500 (0.0500) time: 0.6873 data: 0.0001 max mem: 57114 Epoch: [263] Total time: 0:01:56 (0.7467 s / it) Averaged stats: lr: 0.000323 min_lr: 0.000323 loss: 2.5383 (2.5639) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5826 (0.5826) acc1: 91.1458 (91.1458) acc5: 98.4375 (98.4375) time: 2.0218 data: 1.7661 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7475 (0.7091) acc1: 85.4167 (84.5269) acc5: 97.9167 (97.8261) time: 0.5551 data: 0.3533 max mem: 57114 Test: Total time: 0:00:02 (0.5771 s / it) * Acc@1 84.471 Acc@5 96.999 loss 0.804 Accuracy of the model on the 50000 test images: 84.5% Max accuracy: 84.47% Test: [0/5] eta: 0:00:09 loss: 0.6134 (0.6134) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 1.9982 data: 1.7540 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7005 (0.6970) acc1: 80.7292 (79.1560) acc5: 96.3542 (94.1177) time: 0.5503 data: 0.3509 max mem: 57114 Test: Total time: 0:00:02 (0.5617 s / it) * Acc@1 77.482 Acc@5 93.784 loss 0.838 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [264] [ 0/156] eta: 0:14:08 lr: 0.000323 min_lr: 0.000323 loss: 1.9886 (1.9886) weight_decay: 0.0500 (0.0500) time: 5.4400 data: 4.6566 max mem: 57114 Epoch: [264] [ 10/156] eta: 0:02:50 lr: 0.000322 min_lr: 0.000322 loss: 2.5858 (2.4804) weight_decay: 0.0500 (0.0500) time: 1.1687 data: 0.4237 max mem: 57114 Epoch: [264] [ 20/156] eta: 0:02:09 lr: 0.000321 min_lr: 0.000321 loss: 2.6470 (2.5429) weight_decay: 0.0500 (0.0500) time: 0.7314 data: 0.0003 max mem: 57114 Epoch: [264] [ 30/156] eta: 0:01:50 lr: 0.000319 min_lr: 0.000319 loss: 2.6891 (2.5782) weight_decay: 0.0500 (0.0500) time: 0.7174 data: 0.0003 max mem: 57114 Epoch: [264] [ 40/156] eta: 0:01:37 lr: 0.000318 min_lr: 0.000318 loss: 2.5269 (2.5363) weight_decay: 0.0500 (0.0500) time: 0.7283 data: 0.0004 max mem: 57114 Epoch: [264] [ 50/156] eta: 0:01:27 lr: 0.000317 min_lr: 0.000317 loss: 2.3410 (2.5071) weight_decay: 0.0500 (0.0500) time: 0.7336 data: 0.0004 max mem: 57114 Epoch: [264] [ 60/156] eta: 0:01:17 lr: 0.000316 min_lr: 0.000316 loss: 2.4997 (2.5008) weight_decay: 0.0500 (0.0500) time: 0.7283 data: 0.0004 max mem: 57114 Epoch: [264] [ 70/156] eta: 0:01:08 lr: 0.000315 min_lr: 0.000315 loss: 2.5345 (2.4886) weight_decay: 0.0500 (0.0500) time: 0.7230 data: 0.0004 max mem: 57114 Epoch: [264] [ 80/156] eta: 0:00:59 lr: 0.000314 min_lr: 0.000314 loss: 2.5669 (2.4852) weight_decay: 0.0500 (0.0500) time: 0.7174 data: 0.0004 max mem: 57114 Epoch: [264] [ 90/156] eta: 0:00:51 lr: 0.000313 min_lr: 0.000313 loss: 2.6153 (2.5085) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0004 max mem: 57114 Epoch: [264] [100/156] eta: 0:00:43 lr: 0.000312 min_lr: 0.000312 loss: 2.7477 (2.5350) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.0004 max mem: 57114 Epoch: [264] [110/156] eta: 0:00:35 lr: 0.000311 min_lr: 0.000311 loss: 2.7912 (2.5385) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0003 max mem: 57114 Epoch: [264] [120/156] eta: 0:00:27 lr: 0.000309 min_lr: 0.000309 loss: 2.6958 (2.5354) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0003 max mem: 57114 Epoch: [264] [130/156] eta: 0:00:19 lr: 0.000308 min_lr: 0.000308 loss: 2.6424 (2.5371) weight_decay: 0.0500 (0.0500) time: 0.6980 data: 0.0008 max mem: 57114 Epoch: [264] [140/156] eta: 0:00:11 lr: 0.000307 min_lr: 0.000307 loss: 2.6258 (2.5347) weight_decay: 0.0500 (0.0500) time: 0.6833 data: 0.0007 max mem: 57114 Epoch: [264] [150/156] eta: 0:00:04 lr: 0.000306 min_lr: 0.000306 loss: 2.6390 (2.5329) weight_decay: 0.0500 (0.0500) time: 0.6813 data: 0.0001 max mem: 57114 Epoch: [264] [155/156] eta: 0:00:00 lr: 0.000306 min_lr: 0.000306 loss: 2.6634 (2.5233) weight_decay: 0.0500 (0.0500) time: 0.6807 data: 0.0001 max mem: 57114 Epoch: [264] Total time: 0:01:56 (0.7438 s / it) Averaged stats: lr: 0.000306 min_lr: 0.000306 loss: 2.6634 (2.5600) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.5162 (0.5162) acc1: 90.6250 (90.6250) acc5: 98.9583 (98.9583) time: 2.2847 data: 2.0285 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7062 (0.6333) acc1: 84.8958 (85.2941) acc5: 98.4375 (97.4425) time: 0.6076 data: 0.4058 max mem: 57114 Test: Total time: 0:00:03 (0.6290 s / it) * Acc@1 84.489 Acc@5 97.025 loss 0.729 Accuracy of the model on the 50000 test images: 84.5% Max accuracy: 84.49% Test: [0/5] eta: 0:00:10 loss: 0.6141 (0.6141) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.0086 data: 1.7651 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7009 (0.6976) acc1: 80.7292 (79.1560) acc5: 96.3542 (93.9898) time: 0.5523 data: 0.3531 max mem: 57114 Test: Total time: 0:00:02 (0.5652 s / it) * Acc@1 77.490 Acc@5 93.784 loss 0.839 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [265] [ 0/156] eta: 0:15:49 lr: 0.000305 min_lr: 0.000305 loss: 1.7179 (1.7179) weight_decay: 0.0500 (0.0500) time: 6.0865 data: 4.0951 max mem: 57114 Epoch: [265] [ 10/156] eta: 0:02:57 lr: 0.000304 min_lr: 0.000304 loss: 2.5420 (2.4362) weight_decay: 0.0500 (0.0500) time: 1.2184 data: 0.3726 max mem: 57114 Epoch: [265] [ 20/156] eta: 0:02:13 lr: 0.000303 min_lr: 0.000303 loss: 2.6301 (2.4677) weight_decay: 0.0500 (0.0500) time: 0.7296 data: 0.0003 max mem: 57114 Epoch: [265] [ 30/156] eta: 0:01:54 lr: 0.000302 min_lr: 0.000302 loss: 2.6496 (2.4989) weight_decay: 0.0500 (0.0500) time: 0.7410 data: 0.0003 max mem: 57114 Epoch: [265] [ 40/156] eta: 0:01:41 lr: 0.000301 min_lr: 0.000301 loss: 2.7701 (2.5295) weight_decay: 0.0500 (0.0500) time: 0.7553 data: 0.0004 max mem: 57114 Epoch: [265] [ 50/156] eta: 0:01:29 lr: 0.000300 min_lr: 0.000300 loss: 2.6469 (2.5393) weight_decay: 0.0500 (0.0500) time: 0.7404 data: 0.0004 max mem: 57114 Epoch: [265] [ 60/156] eta: 0:01:19 lr: 0.000299 min_lr: 0.000299 loss: 2.5542 (2.5449) weight_decay: 0.0500 (0.0500) time: 0.7293 data: 0.0004 max mem: 57114 Epoch: [265] [ 70/156] eta: 0:01:09 lr: 0.000298 min_lr: 0.000298 loss: 2.5542 (2.5393) weight_decay: 0.0500 (0.0500) time: 0.7233 data: 0.0004 max mem: 57114 Epoch: [265] [ 80/156] eta: 0:01:00 lr: 0.000297 min_lr: 0.000297 loss: 2.5031 (2.5237) weight_decay: 0.0500 (0.0500) time: 0.7123 data: 0.0004 max mem: 57114 Epoch: [265] [ 90/156] eta: 0:00:51 lr: 0.000296 min_lr: 0.000296 loss: 2.5418 (2.5377) weight_decay: 0.0500 (0.0500) time: 0.7065 data: 0.0004 max mem: 57114 Epoch: [265] [100/156] eta: 0:00:43 lr: 0.000295 min_lr: 0.000295 loss: 2.4837 (2.5255) weight_decay: 0.0500 (0.0500) time: 0.7079 data: 0.0004 max mem: 57114 Epoch: [265] [110/156] eta: 0:00:35 lr: 0.000293 min_lr: 0.000293 loss: 2.5465 (2.5263) weight_decay: 0.0500 (0.0500) time: 0.7123 data: 0.0004 max mem: 57114 Epoch: [265] [120/156] eta: 0:00:27 lr: 0.000292 min_lr: 0.000292 loss: 2.6863 (2.5320) weight_decay: 0.0500 (0.0500) time: 0.7047 data: 0.0004 max mem: 57114 Epoch: [265] [130/156] eta: 0:00:19 lr: 0.000291 min_lr: 0.000291 loss: 2.6635 (2.5229) weight_decay: 0.0500 (0.0500) time: 0.6922 data: 0.0009 max mem: 57114 Epoch: [265] [140/156] eta: 0:00:12 lr: 0.000290 min_lr: 0.000290 loss: 2.6054 (2.5224) weight_decay: 0.0500 (0.0500) time: 0.6863 data: 0.0007 max mem: 57114 Epoch: [265] [150/156] eta: 0:00:04 lr: 0.000289 min_lr: 0.000289 loss: 2.6054 (2.5248) weight_decay: 0.0500 (0.0500) time: 0.6840 data: 0.0001 max mem: 57114 Epoch: [265] [155/156] eta: 0:00:00 lr: 0.000289 min_lr: 0.000289 loss: 2.5729 (2.5265) weight_decay: 0.0500 (0.0500) time: 0.6842 data: 0.0001 max mem: 57114 Epoch: [265] Total time: 0:01:57 (0.7508 s / it) Averaged stats: lr: 0.000289 min_lr: 0.000289 loss: 2.5729 (2.5621) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5742 (0.5742) acc1: 91.6667 (91.6667) acc5: 98.9583 (98.9583) time: 2.0726 data: 1.8168 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7442 (0.6870) acc1: 83.8542 (84.3990) acc5: 98.4375 (97.5703) time: 0.5653 data: 0.3634 max mem: 57114 Test: Total time: 0:00:02 (0.5845 s / it) * Acc@1 84.395 Acc@5 96.993 loss 0.776 Accuracy of the model on the 50000 test images: 84.4% Max accuracy: 84.49% Test: [0/5] eta: 0:00:11 loss: 0.6147 (0.6147) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.3226 data: 2.0791 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7013 (0.6982) acc1: 80.7292 (79.1560) acc5: 96.3542 (93.9898) time: 0.6151 data: 0.4159 max mem: 57114 Test: Total time: 0:00:03 (0.6276 s / it) * Acc@1 77.492 Acc@5 93.782 loss 0.839 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [266] [ 0/156] eta: 0:14:10 lr: 0.000289 min_lr: 0.000289 loss: 1.6959 (1.6959) weight_decay: 0.0500 (0.0500) time: 5.4547 data: 4.4488 max mem: 57114 Epoch: [266] [ 10/156] eta: 0:02:50 lr: 0.000287 min_lr: 0.000287 loss: 2.4515 (2.3374) weight_decay: 0.0500 (0.0500) time: 1.1674 data: 0.4048 max mem: 57114 Epoch: [266] [ 20/156] eta: 0:02:11 lr: 0.000286 min_lr: 0.000286 loss: 2.6115 (2.4954) weight_decay: 0.0500 (0.0500) time: 0.7429 data: 0.0004 max mem: 57114 Epoch: [266] [ 30/156] eta: 0:01:51 lr: 0.000285 min_lr: 0.000285 loss: 2.6115 (2.4464) weight_decay: 0.0500 (0.0500) time: 0.7356 data: 0.0003 max mem: 57114 Epoch: [266] [ 40/156] eta: 0:01:38 lr: 0.000284 min_lr: 0.000284 loss: 2.6038 (2.4662) weight_decay: 0.0500 (0.0500) time: 0.7170 data: 0.0003 max mem: 57114 Epoch: [266] [ 50/156] eta: 0:01:26 lr: 0.000283 min_lr: 0.000283 loss: 2.6140 (2.4679) weight_decay: 0.0500 (0.0500) time: 0.7090 data: 0.0004 max mem: 57114 Epoch: [266] [ 60/156] eta: 0:01:17 lr: 0.000282 min_lr: 0.000282 loss: 2.5531 (2.4658) weight_decay: 0.0500 (0.0500) time: 0.7209 data: 0.0004 max mem: 57114 Epoch: [266] [ 70/156] eta: 0:01:08 lr: 0.000281 min_lr: 0.000281 loss: 2.6665 (2.5133) weight_decay: 0.0500 (0.0500) time: 0.7239 data: 0.0004 max mem: 57114 Epoch: [266] [ 80/156] eta: 0:00:59 lr: 0.000280 min_lr: 0.000280 loss: 2.7105 (2.5264) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0004 max mem: 57114 Epoch: [266] [ 90/156] eta: 0:00:51 lr: 0.000279 min_lr: 0.000279 loss: 2.7105 (2.5532) weight_decay: 0.0500 (0.0500) time: 0.7190 data: 0.0004 max mem: 57114 Epoch: [266] [100/156] eta: 0:00:43 lr: 0.000278 min_lr: 0.000278 loss: 2.7296 (2.5593) weight_decay: 0.0500 (0.0500) time: 0.7188 data: 0.0004 max mem: 57114 Epoch: [266] [110/156] eta: 0:00:35 lr: 0.000277 min_lr: 0.000277 loss: 2.4902 (2.5419) weight_decay: 0.0500 (0.0500) time: 0.6982 data: 0.0004 max mem: 57114 Epoch: [266] [120/156] eta: 0:00:27 lr: 0.000276 min_lr: 0.000276 loss: 2.4368 (2.5467) weight_decay: 0.0500 (0.0500) time: 0.6948 data: 0.0004 max mem: 57114 Epoch: [266] [130/156] eta: 0:00:19 lr: 0.000275 min_lr: 0.000275 loss: 2.6208 (2.5521) weight_decay: 0.0500 (0.0500) time: 0.6945 data: 0.0009 max mem: 57114 Epoch: [266] [140/156] eta: 0:00:11 lr: 0.000274 min_lr: 0.000274 loss: 2.5477 (2.5380) weight_decay: 0.0500 (0.0500) time: 0.6815 data: 0.0007 max mem: 57114 Epoch: [266] [150/156] eta: 0:00:04 lr: 0.000273 min_lr: 0.000273 loss: 2.3826 (2.5234) weight_decay: 0.0500 (0.0500) time: 0.6832 data: 0.0001 max mem: 57114 Epoch: [266] [155/156] eta: 0:00:00 lr: 0.000272 min_lr: 0.000272 loss: 2.4860 (2.5281) weight_decay: 0.0500 (0.0500) time: 0.6861 data: 0.0001 max mem: 57114 Epoch: [266] Total time: 0:01:55 (0.7425 s / it) Averaged stats: lr: 0.000272 min_lr: 0.000272 loss: 2.4860 (2.5544) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5489 (0.5489) acc1: 90.6250 (90.6250) acc5: 98.4375 (98.4375) time: 2.0815 data: 1.8242 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7506 (0.6820) acc1: 84.8958 (84.7826) acc5: 98.4375 (97.3146) time: 0.5669 data: 0.3649 max mem: 57114 Test: Total time: 0:00:02 (0.5868 s / it) * Acc@1 84.451 Acc@5 96.981 loss 0.768 Accuracy of the model on the 50000 test images: 84.5% Max accuracy: 84.49% Test: [0/5] eta: 0:00:11 loss: 0.6154 (0.6154) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.3162 data: 2.0727 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7017 (0.6988) acc1: 80.7292 (79.1560) acc5: 96.3542 (93.9898) time: 0.6139 data: 0.4146 max mem: 57114 Test: Total time: 0:00:03 (0.6277 s / it) * Acc@1 77.524 Acc@5 93.776 loss 0.840 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [267] [ 0/156] eta: 0:14:14 lr: 0.000272 min_lr: 0.000272 loss: 2.5445 (2.5445) weight_decay: 0.0500 (0.0500) time: 5.4786 data: 3.7973 max mem: 57114 Epoch: [267] [ 10/156] eta: 0:02:50 lr: 0.000271 min_lr: 0.000271 loss: 2.5445 (2.4688) weight_decay: 0.0500 (0.0500) time: 1.1655 data: 0.3455 max mem: 57114 Epoch: [267] [ 20/156] eta: 0:02:10 lr: 0.000270 min_lr: 0.000270 loss: 2.6471 (2.4878) weight_decay: 0.0500 (0.0500) time: 0.7347 data: 0.0004 max mem: 57114 Epoch: [267] [ 30/156] eta: 0:01:51 lr: 0.000269 min_lr: 0.000269 loss: 2.7584 (2.4949) weight_decay: 0.0500 (0.0500) time: 0.7285 data: 0.0004 max mem: 57114 Epoch: [267] [ 40/156] eta: 0:01:37 lr: 0.000268 min_lr: 0.000268 loss: 2.5784 (2.5036) weight_decay: 0.0500 (0.0500) time: 0.7152 data: 0.0003 max mem: 57114 Epoch: [267] [ 50/156] eta: 0:01:26 lr: 0.000267 min_lr: 0.000267 loss: 2.5328 (2.4796) weight_decay: 0.0500 (0.0500) time: 0.7149 data: 0.0003 max mem: 57114 Epoch: [267] [ 60/156] eta: 0:01:16 lr: 0.000266 min_lr: 0.000266 loss: 2.6151 (2.5005) weight_decay: 0.0500 (0.0500) time: 0.7216 data: 0.0004 max mem: 57114 Epoch: [267] [ 70/156] eta: 0:01:07 lr: 0.000265 min_lr: 0.000265 loss: 2.6151 (2.4976) weight_decay: 0.0500 (0.0500) time: 0.7166 data: 0.0004 max mem: 57114 Epoch: [267] [ 80/156] eta: 0:00:59 lr: 0.000264 min_lr: 0.000264 loss: 2.6178 (2.4936) weight_decay: 0.0500 (0.0500) time: 0.7161 data: 0.0004 max mem: 57114 Epoch: [267] [ 90/156] eta: 0:00:50 lr: 0.000263 min_lr: 0.000263 loss: 2.6230 (2.5062) weight_decay: 0.0500 (0.0500) time: 0.7129 data: 0.0004 max mem: 57114 Epoch: [267] [100/156] eta: 0:00:42 lr: 0.000262 min_lr: 0.000262 loss: 2.6104 (2.5015) weight_decay: 0.0500 (0.0500) time: 0.7033 data: 0.0004 max mem: 57114 Epoch: [267] [110/156] eta: 0:00:34 lr: 0.000261 min_lr: 0.000261 loss: 2.6104 (2.5101) weight_decay: 0.0500 (0.0500) time: 0.6991 data: 0.0004 max mem: 57114 Epoch: [267] [120/156] eta: 0:00:27 lr: 0.000260 min_lr: 0.000260 loss: 2.5249 (2.4986) weight_decay: 0.0500 (0.0500) time: 0.7066 data: 0.0004 max mem: 57114 Epoch: [267] [130/156] eta: 0:00:19 lr: 0.000259 min_lr: 0.000259 loss: 2.5424 (2.5018) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0008 max mem: 57114 Epoch: [267] [140/156] eta: 0:00:11 lr: 0.000258 min_lr: 0.000258 loss: 2.6770 (2.5083) weight_decay: 0.0500 (0.0500) time: 0.6938 data: 0.0007 max mem: 57114 Epoch: [267] [150/156] eta: 0:00:04 lr: 0.000257 min_lr: 0.000257 loss: 2.6493 (2.5093) weight_decay: 0.0500 (0.0500) time: 0.6874 data: 0.0001 max mem: 57114 Epoch: [267] [155/156] eta: 0:00:00 lr: 0.000256 min_lr: 0.000256 loss: 2.5402 (2.5006) weight_decay: 0.0500 (0.0500) time: 0.6867 data: 0.0001 max mem: 57114 Epoch: [267] Total time: 0:01:55 (0.7426 s / it) Averaged stats: lr: 0.000256 min_lr: 0.000256 loss: 2.5402 (2.5527) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5416 (0.5416) acc1: 90.6250 (90.6250) acc5: 98.9583 (98.9583) time: 2.1738 data: 1.9179 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7355 (0.6741) acc1: 85.4167 (84.6547) acc5: 98.4375 (97.8261) time: 0.5854 data: 0.3836 max mem: 57114 Test: Total time: 0:00:03 (0.6105 s / it) * Acc@1 84.335 Acc@5 97.041 loss 0.762 Accuracy of the model on the 50000 test images: 84.3% Max accuracy: 84.49% Test: [0/5] eta: 0:00:11 loss: 0.6162 (0.6162) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2851 data: 2.0416 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7021 (0.6995) acc1: 81.2500 (79.1560) acc5: 96.3542 (94.1177) time: 0.6077 data: 0.4084 max mem: 57114 Test: Total time: 0:00:03 (0.6202 s / it) * Acc@1 77.518 Acc@5 93.776 loss 0.841 Accuracy of the model EMA on 50000 test images: 77.5% Epoch: [268] [ 0/156] eta: 0:15:53 lr: 0.000256 min_lr: 0.000256 loss: 2.9535 (2.9535) weight_decay: 0.0500 (0.0500) time: 6.1126 data: 5.1222 max mem: 57114 Epoch: [268] [ 10/156] eta: 0:02:57 lr: 0.000255 min_lr: 0.000255 loss: 2.5231 (2.4558) weight_decay: 0.0500 (0.0500) time: 1.2170 data: 0.4660 max mem: 57114 Epoch: [268] [ 20/156] eta: 0:02:13 lr: 0.000254 min_lr: 0.000254 loss: 2.4281 (2.3977) weight_decay: 0.0500 (0.0500) time: 0.7227 data: 0.0004 max mem: 57114 Epoch: [268] [ 30/156] eta: 0:01:52 lr: 0.000253 min_lr: 0.000253 loss: 2.1784 (2.3573) weight_decay: 0.0500 (0.0500) time: 0.7184 data: 0.0003 max mem: 57114 Epoch: [268] [ 40/156] eta: 0:01:39 lr: 0.000252 min_lr: 0.000252 loss: 2.5282 (2.4053) weight_decay: 0.0500 (0.0500) time: 0.7216 data: 0.0004 max mem: 57114 Epoch: [268] [ 50/156] eta: 0:01:27 lr: 0.000251 min_lr: 0.000251 loss: 2.7161 (2.4475) weight_decay: 0.0500 (0.0500) time: 0.7252 data: 0.0004 max mem: 57114 Epoch: [268] [ 60/156] eta: 0:01:17 lr: 0.000250 min_lr: 0.000250 loss: 2.7136 (2.4500) weight_decay: 0.0500 (0.0500) time: 0.7198 data: 0.0004 max mem: 57114 Epoch: [268] [ 70/156] eta: 0:01:08 lr: 0.000249 min_lr: 0.000249 loss: 2.6740 (2.4865) weight_decay: 0.0500 (0.0500) time: 0.7155 data: 0.0004 max mem: 57114 Epoch: [268] [ 80/156] eta: 0:00:59 lr: 0.000248 min_lr: 0.000248 loss: 2.6981 (2.4944) weight_decay: 0.0500 (0.0500) time: 0.7153 data: 0.0004 max mem: 57114 Epoch: [268] [ 90/156] eta: 0:00:51 lr: 0.000247 min_lr: 0.000247 loss: 2.6981 (2.5062) weight_decay: 0.0500 (0.0500) time: 0.7104 data: 0.0004 max mem: 57114 Epoch: [268] [100/156] eta: 0:00:43 lr: 0.000246 min_lr: 0.000246 loss: 2.7829 (2.5344) weight_decay: 0.0500 (0.0500) time: 0.7103 data: 0.0004 max mem: 57114 Epoch: [268] [110/156] eta: 0:00:35 lr: 0.000245 min_lr: 0.000245 loss: 2.7285 (2.5242) weight_decay: 0.0500 (0.0500) time: 0.7142 data: 0.0004 max mem: 57114 Epoch: [268] [120/156] eta: 0:00:27 lr: 0.000244 min_lr: 0.000244 loss: 2.5016 (2.5128) weight_decay: 0.0500 (0.0500) time: 0.7123 data: 0.0004 max mem: 57114 Epoch: [268] [130/156] eta: 0:00:19 lr: 0.000243 min_lr: 0.000243 loss: 2.5016 (2.5051) weight_decay: 0.0500 (0.0500) time: 0.7015 data: 0.0008 max mem: 57114 Epoch: [268] [140/156] eta: 0:00:12 lr: 0.000242 min_lr: 0.000242 loss: 2.5331 (2.5099) weight_decay: 0.0500 (0.0500) time: 0.6934 data: 0.0007 max mem: 57114 Epoch: [268] [150/156] eta: 0:00:04 lr: 0.000241 min_lr: 0.000241 loss: 2.5961 (2.5253) weight_decay: 0.0500 (0.0500) time: 0.6859 data: 0.0001 max mem: 57114 Epoch: [268] [155/156] eta: 0:00:00 lr: 0.000241 min_lr: 0.000241 loss: 2.7041 (2.5353) weight_decay: 0.0500 (0.0500) time: 0.6843 data: 0.0001 max mem: 57114 Epoch: [268] Total time: 0:01:56 (0.7470 s / it) Averaged stats: lr: 0.000241 min_lr: 0.000241 loss: 2.7041 (2.5517) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6582 (0.6582) acc1: 90.6250 (90.6250) acc5: 98.9583 (98.9583) time: 2.0312 data: 1.7747 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8220 (0.7710) acc1: 83.8542 (84.5269) acc5: 97.9167 (97.1867) time: 0.5569 data: 0.3550 max mem: 57114 Test: Total time: 0:00:02 (0.5770 s / it) * Acc@1 84.413 Acc@5 96.999 loss 0.850 Accuracy of the model on the 50000 test images: 84.4% Max accuracy: 84.49% Test: [0/5] eta: 0:00:11 loss: 0.6167 (0.6167) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.2867 data: 2.0432 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7024 (0.6999) acc1: 81.2500 (79.1560) acc5: 96.3542 (94.1177) time: 0.6081 data: 0.4087 max mem: 57114 Test: Total time: 0:00:03 (0.6597 s / it) * Acc@1 77.530 Acc@5 93.774 loss 0.841 Accuracy of the model EMA on 50000 test images: 77.5% Max EMA accuracy: 77.53% Epoch: [269] [ 0/156] eta: 0:12:25 lr: 0.000240 min_lr: 0.000240 loss: 2.8958 (2.8958) weight_decay: 0.0500 (0.0500) time: 4.7788 data: 4.1289 max mem: 57114 Epoch: [269] [ 10/156] eta: 0:02:40 lr: 0.000240 min_lr: 0.000240 loss: 2.5879 (2.3688) weight_decay: 0.0500 (0.0500) time: 1.1008 data: 0.3756 max mem: 57114 Epoch: [269] [ 20/156] eta: 0:02:05 lr: 0.000239 min_lr: 0.000239 loss: 2.6957 (2.5579) weight_decay: 0.0500 (0.0500) time: 0.7317 data: 0.0003 max mem: 57114 Epoch: [269] [ 30/156] eta: 0:01:48 lr: 0.000238 min_lr: 0.000238 loss: 2.7764 (2.5716) weight_decay: 0.0500 (0.0500) time: 0.7250 data: 0.0003 max mem: 57114 Epoch: [269] [ 40/156] eta: 0:01:36 lr: 0.000237 min_lr: 0.000237 loss: 2.7060 (2.5826) weight_decay: 0.0500 (0.0500) time: 0.7283 data: 0.0004 max mem: 57114 Epoch: [269] [ 50/156] eta: 0:01:25 lr: 0.000236 min_lr: 0.000236 loss: 2.6324 (2.5705) weight_decay: 0.0500 (0.0500) time: 0.7237 data: 0.0004 max mem: 57114 Epoch: [269] [ 60/156] eta: 0:01:16 lr: 0.000235 min_lr: 0.000235 loss: 2.5319 (2.5490) weight_decay: 0.0500 (0.0500) time: 0.7225 data: 0.0004 max mem: 57114 Epoch: [269] [ 70/156] eta: 0:01:07 lr: 0.000234 min_lr: 0.000234 loss: 2.6588 (2.5508) weight_decay: 0.0500 (0.0500) time: 0.7291 data: 0.0004 max mem: 57114 Epoch: [269] [ 80/156] eta: 0:00:59 lr: 0.000233 min_lr: 0.000233 loss: 2.6588 (2.5599) weight_decay: 0.0500 (0.0500) time: 0.7245 data: 0.0004 max mem: 57114 Epoch: [269] [ 90/156] eta: 0:00:50 lr: 0.000232 min_lr: 0.000232 loss: 2.5973 (2.5638) weight_decay: 0.0500 (0.0500) time: 0.7158 data: 0.0004 max mem: 57114 Epoch: [269] [100/156] eta: 0:00:42 lr: 0.000231 min_lr: 0.000231 loss: 2.5973 (2.5540) weight_decay: 0.0500 (0.0500) time: 0.7159 data: 0.0004 max mem: 57114 Epoch: [269] [110/156] eta: 0:00:34 lr: 0.000230 min_lr: 0.000230 loss: 2.5489 (2.5535) weight_decay: 0.0500 (0.0500) time: 0.7148 data: 0.0004 max mem: 57114 Epoch: [269] [120/156] eta: 0:00:27 lr: 0.000229 min_lr: 0.000229 loss: 2.6330 (2.5479) weight_decay: 0.0500 (0.0500) time: 0.7000 data: 0.0004 max mem: 57114 Epoch: [269] [130/156] eta: 0:00:19 lr: 0.000228 min_lr: 0.000228 loss: 2.6330 (2.5282) weight_decay: 0.0500 (0.0500) time: 0.6931 data: 0.0008 max mem: 57114 Epoch: [269] [140/156] eta: 0:00:11 lr: 0.000227 min_lr: 0.000227 loss: 2.5789 (2.5348) weight_decay: 0.0500 (0.0500) time: 0.6880 data: 0.0007 max mem: 57114 Epoch: [269] [150/156] eta: 0:00:04 lr: 0.000226 min_lr: 0.000226 loss: 2.5555 (2.5404) weight_decay: 0.0500 (0.0500) time: 0.6829 data: 0.0001 max mem: 57114 Epoch: [269] [155/156] eta: 0:00:00 lr: 0.000226 min_lr: 0.000226 loss: 2.5553 (2.5414) weight_decay: 0.0500 (0.0500) time: 0.6848 data: 0.0001 max mem: 57114 Epoch: [269] Total time: 0:01:55 (0.7409 s / it) Averaged stats: lr: 0.000226 min_lr: 0.000226 loss: 2.5553 (2.5483) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6151 (0.6151) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.0906 data: 1.8306 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8042 (0.7405) acc1: 82.8125 (83.8875) acc5: 98.4375 (97.1867) time: 0.5689 data: 0.3662 max mem: 57114 Test: Total time: 0:00:02 (0.5893 s / it) * Acc@1 84.429 Acc@5 96.947 loss 0.824 Accuracy of the model on the 50000 test images: 84.4% Max accuracy: 84.49% Test: [0/5] eta: 0:00:11 loss: 0.6171 (0.6171) acc1: 84.3750 (84.3750) acc5: 97.3958 (97.3958) time: 2.3655 data: 2.1221 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7027 (0.7004) acc1: 81.2500 (79.2839) acc5: 96.3542 (94.1177) time: 0.6238 data: 0.4245 max mem: 57114 Test: Total time: 0:00:03 (0.6342 s / it) * Acc@1 77.534 Acc@5 93.770 loss 0.842 Accuracy of the model EMA on 50000 test images: 77.5% Max EMA accuracy: 77.53% Epoch: [270] [ 0/156] eta: 0:10:36 lr: 0.000225 min_lr: 0.000225 loss: 2.8529 (2.8529) weight_decay: 0.0500 (0.0500) time: 4.0828 data: 3.4315 max mem: 57114 Epoch: [270] [ 10/156] eta: 0:02:31 lr: 0.000224 min_lr: 0.000224 loss: 2.4895 (2.4408) weight_decay: 0.0500 (0.0500) time: 1.0374 data: 0.3122 max mem: 57114 Epoch: [270] [ 20/156] eta: 0:02:01 lr: 0.000224 min_lr: 0.000224 loss: 2.4908 (2.5522) weight_decay: 0.0500 (0.0500) time: 0.7336 data: 0.0003 max mem: 57114 Epoch: [270] [ 30/156] eta: 0:01:45 lr: 0.000223 min_lr: 0.000223 loss: 2.6121 (2.5352) weight_decay: 0.0500 (0.0500) time: 0.7259 data: 0.0003 max mem: 57114 Epoch: [270] [ 40/156] eta: 0:01:33 lr: 0.000222 min_lr: 0.000222 loss: 2.5526 (2.5108) weight_decay: 0.0500 (0.0500) time: 0.7175 data: 0.0003 max mem: 57114 Epoch: [270] [ 50/156] eta: 0:01:23 lr: 0.000221 min_lr: 0.000221 loss: 2.5526 (2.5116) weight_decay: 0.0500 (0.0500) time: 0.7209 data: 0.0003 max mem: 57114 Epoch: [270] [ 60/156] eta: 0:01:14 lr: 0.000220 min_lr: 0.000220 loss: 2.6718 (2.5352) weight_decay: 0.0500 (0.0500) time: 0.7209 data: 0.0003 max mem: 57114 Epoch: [270] [ 70/156] eta: 0:01:06 lr: 0.000219 min_lr: 0.000219 loss: 2.6718 (2.5359) weight_decay: 0.0500 (0.0500) time: 0.7158 data: 0.0003 max mem: 57114 Epoch: [270] [ 80/156] eta: 0:00:58 lr: 0.000218 min_lr: 0.000218 loss: 2.6827 (2.5555) weight_decay: 0.0500 (0.0500) time: 0.7163 data: 0.0004 max mem: 57114 Epoch: [270] [ 90/156] eta: 0:00:50 lr: 0.000217 min_lr: 0.000217 loss: 2.5931 (2.5359) weight_decay: 0.0500 (0.0500) time: 0.7182 data: 0.0004 max mem: 57114 Epoch: [270] [100/156] eta: 0:00:42 lr: 0.000216 min_lr: 0.000216 loss: 2.4775 (2.5383) weight_decay: 0.0500 (0.0500) time: 0.7145 data: 0.0004 max mem: 57114 Epoch: [270] [110/156] eta: 0:00:34 lr: 0.000215 min_lr: 0.000215 loss: 2.5762 (2.5413) weight_decay: 0.0500 (0.0500) time: 0.7151 data: 0.0004 max mem: 57114 Epoch: [270] [120/156] eta: 0:00:26 lr: 0.000214 min_lr: 0.000214 loss: 2.5884 (2.5522) weight_decay: 0.0500 (0.0500) time: 0.7133 data: 0.0004 max mem: 57114 Epoch: [270] [130/156] eta: 0:00:19 lr: 0.000213 min_lr: 0.000213 loss: 2.5787 (2.5460) weight_decay: 0.0500 (0.0500) time: 0.7001 data: 0.0008 max mem: 57114 Epoch: [270] [140/156] eta: 0:00:11 lr: 0.000212 min_lr: 0.000212 loss: 2.6945 (2.5636) weight_decay: 0.0500 (0.0500) time: 0.6856 data: 0.0007 max mem: 57114 Epoch: [270] [150/156] eta: 0:00:04 lr: 0.000211 min_lr: 0.000211 loss: 2.6766 (2.5411) weight_decay: 0.0500 (0.0500) time: 0.6805 data: 0.0001 max mem: 57114 Epoch: [270] [155/156] eta: 0:00:00 lr: 0.000211 min_lr: 0.000211 loss: 2.4261 (2.5354) weight_decay: 0.0500 (0.0500) time: 0.6823 data: 0.0001 max mem: 57114 Epoch: [270] Total time: 0:01:54 (0.7353 s / it) Averaged stats: lr: 0.000211 min_lr: 0.000211 loss: 2.4261 (2.5402) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5246 (0.5246) acc1: 90.6250 (90.6250) acc5: 98.9583 (98.9583) time: 2.0758 data: 1.8201 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6984 (0.6575) acc1: 84.3750 (84.6547) acc5: 98.9583 (97.3146) time: 0.5658 data: 0.3641 max mem: 57114 Test: Total time: 0:00:02 (0.5919 s / it) * Acc@1 84.513 Acc@5 96.997 loss 0.741 Accuracy of the model on the 50000 test images: 84.5% Max accuracy: 84.51% Test: [0/5] eta: 0:00:10 loss: 0.6175 (0.6175) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.0514 data: 1.8078 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7030 (0.7008) acc1: 81.2500 (79.4118) acc5: 96.3542 (94.1177) time: 0.5610 data: 0.3617 max mem: 57114 Test: Total time: 0:00:02 (0.5741 s / it) * Acc@1 77.540 Acc@5 93.792 loss 0.842 Accuracy of the model EMA on 50000 test images: 77.5% Max EMA accuracy: 77.54% Epoch: [271] [ 0/156] eta: 0:11:44 lr: 0.000211 min_lr: 0.000211 loss: 2.7026 (2.7026) weight_decay: 0.0500 (0.0500) time: 4.5175 data: 3.8375 max mem: 57114 Epoch: [271] [ 10/156] eta: 0:02:40 lr: 0.000210 min_lr: 0.000210 loss: 2.7026 (2.5202) weight_decay: 0.0500 (0.0500) time: 1.0989 data: 0.3630 max mem: 57114 Epoch: [271] [ 20/156] eta: 0:02:05 lr: 0.000209 min_lr: 0.000209 loss: 2.6149 (2.4751) weight_decay: 0.0500 (0.0500) time: 0.7457 data: 0.0080 max mem: 57114 Epoch: [271] [ 30/156] eta: 0:01:48 lr: 0.000208 min_lr: 0.000208 loss: 2.6149 (2.5288) weight_decay: 0.0500 (0.0500) time: 0.7283 data: 0.0003 max mem: 57114 Epoch: [271] [ 40/156] eta: 0:01:36 lr: 0.000207 min_lr: 0.000207 loss: 2.5938 (2.5358) weight_decay: 0.0500 (0.0500) time: 0.7265 data: 0.0003 max mem: 57114 Epoch: [271] [ 50/156] eta: 0:01:25 lr: 0.000206 min_lr: 0.000206 loss: 2.6873 (2.5843) weight_decay: 0.0500 (0.0500) time: 0.7273 data: 0.0004 max mem: 57114 Epoch: [271] [ 60/156] eta: 0:01:16 lr: 0.000205 min_lr: 0.000205 loss: 2.7115 (2.5955) weight_decay: 0.0500 (0.0500) time: 0.7357 data: 0.0004 max mem: 57114 Epoch: [271] [ 70/156] eta: 0:01:07 lr: 0.000204 min_lr: 0.000204 loss: 2.7031 (2.5930) weight_decay: 0.0500 (0.0500) time: 0.7354 data: 0.0003 max mem: 57114 Epoch: [271] [ 80/156] eta: 0:00:59 lr: 0.000204 min_lr: 0.000204 loss: 2.5770 (2.5798) weight_decay: 0.0500 (0.0500) time: 0.7238 data: 0.0004 max mem: 57114 Epoch: [271] [ 90/156] eta: 0:00:50 lr: 0.000203 min_lr: 0.000203 loss: 2.5257 (2.5652) weight_decay: 0.0500 (0.0500) time: 0.7108 data: 0.0005 max mem: 57114 Epoch: [271] [100/156] eta: 0:00:42 lr: 0.000202 min_lr: 0.000202 loss: 2.6019 (2.5694) weight_decay: 0.0500 (0.0500) time: 0.6990 data: 0.0005 max mem: 57114 Epoch: [271] [110/156] eta: 0:00:34 lr: 0.000201 min_lr: 0.000201 loss: 2.5869 (2.5473) weight_decay: 0.0500 (0.0500) time: 0.6991 data: 0.0003 max mem: 57114 Epoch: [271] [120/156] eta: 0:00:27 lr: 0.000200 min_lr: 0.000200 loss: 2.7086 (2.5668) weight_decay: 0.0500 (0.0500) time: 0.7041 data: 0.0003 max mem: 57114 Epoch: [271] [130/156] eta: 0:00:19 lr: 0.000199 min_lr: 0.000199 loss: 2.7420 (2.5671) weight_decay: 0.0500 (0.0500) time: 0.7003 data: 0.0008 max mem: 57114 Epoch: [271] [140/156] eta: 0:00:11 lr: 0.000198 min_lr: 0.000198 loss: 2.4818 (2.5545) weight_decay: 0.0500 (0.0500) time: 0.6874 data: 0.0007 max mem: 57114 Epoch: [271] [150/156] eta: 0:00:04 lr: 0.000197 min_lr: 0.000197 loss: 2.5461 (2.5615) weight_decay: 0.0500 (0.0500) time: 0.6819 data: 0.0001 max mem: 57114 Epoch: [271] [155/156] eta: 0:00:00 lr: 0.000197 min_lr: 0.000197 loss: 2.5843 (2.5621) weight_decay: 0.0500 (0.0500) time: 0.6788 data: 0.0001 max mem: 57114 Epoch: [271] Total time: 0:01:55 (0.7400 s / it) Averaged stats: lr: 0.000197 min_lr: 0.000197 loss: 2.5843 (2.5376) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5870 (0.5870) acc1: 91.1458 (91.1458) acc5: 98.9583 (98.9583) time: 2.0634 data: 1.8077 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7596 (0.7156) acc1: 85.4167 (85.0384) acc5: 98.4375 (97.4425) time: 0.5633 data: 0.3616 max mem: 57114 Test: Total time: 0:00:02 (0.5890 s / it) * Acc@1 84.567 Acc@5 96.987 loss 0.800 Accuracy of the model on the 50000 test images: 84.6% Max accuracy: 84.57% Test: [0/5] eta: 0:00:10 loss: 0.6179 (0.6179) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.1079 data: 1.8643 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7032 (0.7013) acc1: 81.2500 (79.2839) acc5: 96.3542 (94.1177) time: 0.5723 data: 0.3730 max mem: 57114 Test: Total time: 0:00:02 (0.5830 s / it) * Acc@1 77.550 Acc@5 93.796 loss 0.842 Accuracy of the model EMA on 50000 test images: 77.5% Max EMA accuracy: 77.55% Epoch: [272] [ 0/156] eta: 0:09:08 lr: 0.000197 min_lr: 0.000197 loss: 2.7494 (2.7494) weight_decay: 0.0500 (0.0500) time: 3.5144 data: 2.8588 max mem: 57114 Epoch: [272] [ 10/156] eta: 0:02:24 lr: 0.000196 min_lr: 0.000196 loss: 2.6175 (2.4093) weight_decay: 0.0500 (0.0500) time: 0.9911 data: 0.2601 max mem: 57114 Epoch: [272] [ 20/156] eta: 0:02:00 lr: 0.000195 min_lr: 0.000195 loss: 2.5696 (2.4989) weight_decay: 0.0500 (0.0500) time: 0.7524 data: 0.0003 max mem: 57114 Epoch: [272] [ 30/156] eta: 0:01:44 lr: 0.000194 min_lr: 0.000194 loss: 2.5696 (2.4863) weight_decay: 0.0500 (0.0500) time: 0.7405 data: 0.0004 max mem: 57114 Epoch: [272] [ 40/156] eta: 0:01:33 lr: 0.000193 min_lr: 0.000193 loss: 2.6080 (2.4721) weight_decay: 0.0500 (0.0500) time: 0.7301 data: 0.0004 max mem: 57114 Epoch: [272] [ 50/156] eta: 0:01:24 lr: 0.000192 min_lr: 0.000192 loss: 2.5856 (2.4898) weight_decay: 0.0500 (0.0500) time: 0.7359 data: 0.0004 max mem: 57114 Epoch: [272] [ 60/156] eta: 0:01:14 lr: 0.000191 min_lr: 0.000191 loss: 2.5631 (2.5033) weight_decay: 0.0500 (0.0500) time: 0.7146 data: 0.0004 max mem: 57114 Epoch: [272] [ 70/156] eta: 0:01:06 lr: 0.000191 min_lr: 0.000191 loss: 2.7561 (2.5318) weight_decay: 0.0500 (0.0500) time: 0.7139 data: 0.0004 max mem: 57114 Epoch: [272] [ 80/156] eta: 0:00:57 lr: 0.000190 min_lr: 0.000190 loss: 2.7142 (2.5347) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [272] [ 90/156] eta: 0:00:49 lr: 0.000189 min_lr: 0.000189 loss: 2.6476 (2.5400) weight_decay: 0.0500 (0.0500) time: 0.7080 data: 0.0004 max mem: 57114 Epoch: [272] [100/156] eta: 0:00:42 lr: 0.000188 min_lr: 0.000188 loss: 2.4965 (2.5283) weight_decay: 0.0500 (0.0500) time: 0.7067 data: 0.0004 max mem: 57114 Epoch: [272] [110/156] eta: 0:00:34 lr: 0.000187 min_lr: 0.000187 loss: 2.6037 (2.5466) weight_decay: 0.0500 (0.0500) time: 0.7088 data: 0.0004 max mem: 57114 Epoch: [272] [120/156] eta: 0:00:26 lr: 0.000186 min_lr: 0.000186 loss: 2.6725 (2.5450) weight_decay: 0.0500 (0.0500) time: 0.7114 data: 0.0004 max mem: 57114 Epoch: [272] [130/156] eta: 0:00:19 lr: 0.000185 min_lr: 0.000185 loss: 2.2468 (2.5098) weight_decay: 0.0500 (0.0500) time: 0.6941 data: 0.0005 max mem: 57114 Epoch: [272] [140/156] eta: 0:00:11 lr: 0.000184 min_lr: 0.000184 loss: 2.6032 (2.5241) weight_decay: 0.0500 (0.0500) time: 0.6920 data: 0.0004 max mem: 57114 Epoch: [272] [150/156] eta: 0:00:04 lr: 0.000184 min_lr: 0.000184 loss: 2.6547 (2.5209) weight_decay: 0.0500 (0.0500) time: 0.6897 data: 0.0001 max mem: 57114 Epoch: [272] [155/156] eta: 0:00:00 lr: 0.000183 min_lr: 0.000183 loss: 2.6575 (2.5239) weight_decay: 0.0500 (0.0500) time: 0.6878 data: 0.0001 max mem: 57114 Epoch: [272] Total time: 0:01:54 (0.7341 s / it) Averaged stats: lr: 0.000183 min_lr: 0.000183 loss: 2.6575 (2.5398) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.5561 (0.5561) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.2320 data: 1.9714 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7319 (0.6844) acc1: 83.8542 (84.6547) acc5: 98.4375 (97.5703) time: 0.5972 data: 0.3944 max mem: 57114 Test: Total time: 0:00:03 (0.6225 s / it) * Acc@1 84.581 Acc@5 97.067 loss 0.770 Accuracy of the model on the 50000 test images: 84.6% Max accuracy: 84.58% Test: [0/5] eta: 0:00:09 loss: 0.6181 (0.6181) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 1.9948 data: 1.7514 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7034 (0.7016) acc1: 81.2500 (79.2839) acc5: 96.3542 (94.2455) time: 0.5496 data: 0.3504 max mem: 57114 Test: Total time: 0:00:02 (0.5609 s / it) * Acc@1 77.554 Acc@5 93.804 loss 0.843 Accuracy of the model EMA on 50000 test images: 77.6% Max EMA accuracy: 77.55% Epoch: [273] [ 0/156] eta: 0:08:25 lr: 0.000183 min_lr: 0.000183 loss: 2.9475 (2.9475) weight_decay: 0.0500 (0.0500) time: 3.2431 data: 2.5925 max mem: 57114 Epoch: [273] [ 10/156] eta: 0:02:21 lr: 0.000182 min_lr: 0.000182 loss: 2.5582 (2.3839) weight_decay: 0.0500 (0.0500) time: 0.9699 data: 0.2359 max mem: 57114 Epoch: [273] [ 20/156] eta: 0:01:55 lr: 0.000181 min_lr: 0.000181 loss: 2.5025 (2.4199) weight_decay: 0.0500 (0.0500) time: 0.7332 data: 0.0004 max mem: 57114 Epoch: [273] [ 30/156] eta: 0:01:43 lr: 0.000181 min_lr: 0.000181 loss: 2.4808 (2.4127) weight_decay: 0.0500 (0.0500) time: 0.7447 data: 0.0004 max mem: 57114 Epoch: [273] [ 40/156] eta: 0:01:33 lr: 0.000180 min_lr: 0.000180 loss: 2.5352 (2.4185) weight_decay: 0.0500 (0.0500) time: 0.7502 data: 0.0003 max mem: 57114 Epoch: [273] [ 50/156] eta: 0:01:23 lr: 0.000179 min_lr: 0.000179 loss: 2.6604 (2.4609) weight_decay: 0.0500 (0.0500) time: 0.7373 data: 0.0004 max mem: 57114 Epoch: [273] [ 60/156] eta: 0:01:15 lr: 0.000178 min_lr: 0.000178 loss: 2.7362 (2.4880) weight_decay: 0.0500 (0.0500) time: 0.7378 data: 0.0004 max mem: 57114 Epoch: [273] [ 70/156] eta: 0:01:06 lr: 0.000177 min_lr: 0.000177 loss: 2.6977 (2.4733) weight_decay: 0.0500 (0.0500) time: 0.7217 data: 0.0004 max mem: 57114 Epoch: [273] [ 80/156] eta: 0:00:57 lr: 0.000176 min_lr: 0.000176 loss: 2.6380 (2.4789) weight_decay: 0.0500 (0.0500) time: 0.7042 data: 0.0004 max mem: 57114 Epoch: [273] [ 90/156] eta: 0:00:50 lr: 0.000175 min_lr: 0.000175 loss: 2.6580 (2.4890) weight_decay: 0.0500 (0.0500) time: 0.7120 data: 0.0003 max mem: 57114 Epoch: [273] [100/156] eta: 0:00:42 lr: 0.000175 min_lr: 0.000175 loss: 2.6140 (2.4768) weight_decay: 0.0500 (0.0500) time: 0.7138 data: 0.0003 max mem: 57114 Epoch: [273] [110/156] eta: 0:00:34 lr: 0.000174 min_lr: 0.000174 loss: 2.3191 (2.4680) weight_decay: 0.0500 (0.0500) time: 0.7059 data: 0.0004 max mem: 57114 Epoch: [273] [120/156] eta: 0:00:26 lr: 0.000173 min_lr: 0.000173 loss: 2.2966 (2.4608) weight_decay: 0.0500 (0.0500) time: 0.6999 data: 0.0004 max mem: 57114 Epoch: [273] [130/156] eta: 0:00:19 lr: 0.000172 min_lr: 0.000172 loss: 2.4524 (2.4711) weight_decay: 0.0500 (0.0500) time: 0.6882 data: 0.0008 max mem: 57114 Epoch: [273] [140/156] eta: 0:00:11 lr: 0.000171 min_lr: 0.000171 loss: 2.5177 (2.4658) weight_decay: 0.0500 (0.0500) time: 0.6828 data: 0.0007 max mem: 57114 Epoch: [273] [150/156] eta: 0:00:04 lr: 0.000170 min_lr: 0.000170 loss: 2.6965 (2.4839) weight_decay: 0.0500 (0.0500) time: 0.6844 data: 0.0001 max mem: 57114 Epoch: [273] [155/156] eta: 0:00:00 lr: 0.000170 min_lr: 0.000170 loss: 2.6965 (2.4854) weight_decay: 0.0500 (0.0500) time: 0.6841 data: 0.0001 max mem: 57114 Epoch: [273] Total time: 0:01:54 (0.7328 s / it) Averaged stats: lr: 0.000170 min_lr: 0.000170 loss: 2.6965 (2.5301) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6365 (0.6365) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.1875 data: 1.9314 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8155 (0.7596) acc1: 84.8958 (84.9105) acc5: 98.4375 (97.4425) time: 0.5881 data: 0.3863 max mem: 57114 Test: Total time: 0:00:03 (0.6134 s / it) * Acc@1 84.569 Acc@5 96.983 loss 0.838 Accuracy of the model on the 50000 test images: 84.6% Max accuracy: 84.58% Test: [0/5] eta: 0:00:11 loss: 0.6182 (0.6182) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.2924 data: 2.0489 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7035 (0.7018) acc1: 81.2500 (79.2839) acc5: 96.3542 (94.2455) time: 0.6091 data: 0.4099 max mem: 57114 Test: Total time: 0:00:03 (0.6190 s / it) * Acc@1 77.566 Acc@5 93.814 loss 0.843 Accuracy of the model EMA on 50000 test images: 77.6% Max EMA accuracy: 77.57% Epoch: [274] [ 0/156] eta: 0:08:29 lr: 0.000170 min_lr: 0.000170 loss: 2.8898 (2.8898) weight_decay: 0.0500 (0.0500) time: 3.2648 data: 2.6097 max mem: 57114 Epoch: [274] [ 10/156] eta: 0:02:21 lr: 0.000169 min_lr: 0.000169 loss: 2.5481 (2.3952) weight_decay: 0.0500 (0.0500) time: 0.9680 data: 0.2375 max mem: 57114 Epoch: [274] [ 20/156] eta: 0:01:54 lr: 0.000168 min_lr: 0.000168 loss: 2.5481 (2.5074) weight_decay: 0.0500 (0.0500) time: 0.7209 data: 0.0003 max mem: 57114 Epoch: [274] [ 30/156] eta: 0:01:40 lr: 0.000168 min_lr: 0.000168 loss: 2.6699 (2.4988) weight_decay: 0.0500 (0.0500) time: 0.7071 data: 0.0003 max mem: 57114 Epoch: [274] [ 40/156] eta: 0:01:30 lr: 0.000167 min_lr: 0.000167 loss: 2.3832 (2.4834) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.0003 max mem: 57114 Epoch: [274] [ 50/156] eta: 0:01:21 lr: 0.000166 min_lr: 0.000166 loss: 2.4530 (2.5025) weight_decay: 0.0500 (0.0500) time: 0.7243 data: 0.0003 max mem: 57114 Epoch: [274] [ 60/156] eta: 0:01:12 lr: 0.000165 min_lr: 0.000165 loss: 2.6667 (2.5302) weight_decay: 0.0500 (0.0500) time: 0.7217 data: 0.0003 max mem: 57114 Epoch: [274] [ 70/156] eta: 0:01:04 lr: 0.000164 min_lr: 0.000164 loss: 2.5449 (2.5016) weight_decay: 0.0500 (0.0500) time: 0.7177 data: 0.0004 max mem: 57114 Epoch: [274] [ 80/156] eta: 0:00:56 lr: 0.000163 min_lr: 0.000163 loss: 2.6305 (2.5248) weight_decay: 0.0500 (0.0500) time: 0.7224 data: 0.0004 max mem: 57114 Epoch: [274] [ 90/156] eta: 0:00:49 lr: 0.000163 min_lr: 0.000163 loss: 2.6803 (2.5226) weight_decay: 0.0500 (0.0500) time: 0.7008 data: 0.0003 max mem: 57114 Epoch: [274] [100/156] eta: 0:00:41 lr: 0.000162 min_lr: 0.000162 loss: 2.5795 (2.5240) weight_decay: 0.0500 (0.0500) time: 0.7016 data: 0.0004 max mem: 57114 Epoch: [274] [110/156] eta: 0:00:33 lr: 0.000161 min_lr: 0.000161 loss: 2.6473 (2.5205) weight_decay: 0.0500 (0.0500) time: 0.6998 data: 0.0004 max mem: 57114 Epoch: [274] [120/156] eta: 0:00:26 lr: 0.000160 min_lr: 0.000160 loss: 2.7350 (2.5282) weight_decay: 0.0500 (0.0500) time: 0.6923 data: 0.0004 max mem: 57114 Epoch: [274] [130/156] eta: 0:00:18 lr: 0.000159 min_lr: 0.000159 loss: 2.6973 (2.5326) weight_decay: 0.0500 (0.0500) time: 0.6996 data: 0.0009 max mem: 57114 Epoch: [274] [140/156] eta: 0:00:11 lr: 0.000159 min_lr: 0.000159 loss: 2.6101 (2.5244) weight_decay: 0.0500 (0.0500) time: 0.6958 data: 0.0007 max mem: 57114 Epoch: [274] [150/156] eta: 0:00:04 lr: 0.000158 min_lr: 0.000158 loss: 2.1762 (2.5086) weight_decay: 0.0500 (0.0500) time: 0.6886 data: 0.0001 max mem: 57114 Epoch: [274] [155/156] eta: 0:00:00 lr: 0.000157 min_lr: 0.000157 loss: 2.5338 (2.5110) weight_decay: 0.0500 (0.0500) time: 0.6914 data: 0.0001 max mem: 57114 Epoch: [274] Total time: 0:01:53 (0.7258 s / it) Averaged stats: lr: 0.000157 min_lr: 0.000157 loss: 2.5338 (2.5283) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5361 (0.5361) acc1: 91.1458 (91.1458) acc5: 98.9583 (98.9583) time: 2.0419 data: 1.7859 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7270 (0.6629) acc1: 83.3333 (84.3990) acc5: 98.4375 (97.4425) time: 0.5590 data: 0.3572 max mem: 57114 Test: Total time: 0:00:02 (0.5836 s / it) * Acc@1 84.511 Acc@5 97.027 loss 0.750 Accuracy of the model on the 50000 test images: 84.5% Max accuracy: 84.58% Test: [0/5] eta: 0:00:11 loss: 0.6185 (0.6185) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.3213 data: 2.0778 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7034 (0.7020) acc1: 81.2500 (79.4118) acc5: 97.3958 (94.5013) time: 0.6149 data: 0.4156 max mem: 57114 Test: Total time: 0:00:03 (0.6273 s / it) * Acc@1 77.584 Acc@5 93.822 loss 0.843 Accuracy of the model EMA on 50000 test images: 77.6% Max EMA accuracy: 77.58% Epoch: [275] [ 0/156] eta: 0:08:39 lr: 0.000157 min_lr: 0.000157 loss: 2.0686 (2.0686) weight_decay: 0.0500 (0.0500) time: 3.3271 data: 2.6696 max mem: 57114 Epoch: [275] [ 10/156] eta: 0:02:20 lr: 0.000157 min_lr: 0.000157 loss: 2.5409 (2.4712) weight_decay: 0.0500 (0.0500) time: 0.9597 data: 0.2430 max mem: 57114 Epoch: [275] [ 20/156] eta: 0:01:54 lr: 0.000156 min_lr: 0.000156 loss: 2.5567 (2.4595) weight_decay: 0.0500 (0.0500) time: 0.7176 data: 0.0003 max mem: 57114 Epoch: [275] [ 30/156] eta: 0:01:40 lr: 0.000155 min_lr: 0.000155 loss: 2.5640 (2.4804) weight_decay: 0.0500 (0.0500) time: 0.7098 data: 0.0003 max mem: 57114 Epoch: [275] [ 40/156] eta: 0:01:30 lr: 0.000154 min_lr: 0.000154 loss: 2.3592 (2.4389) weight_decay: 0.0500 (0.0500) time: 0.7213 data: 0.0003 max mem: 57114 Epoch: [275] [ 50/156] eta: 0:01:21 lr: 0.000153 min_lr: 0.000153 loss: 2.3257 (2.4385) weight_decay: 0.0500 (0.0500) time: 0.7309 data: 0.0004 max mem: 57114 Epoch: [275] [ 60/156] eta: 0:01:13 lr: 0.000153 min_lr: 0.000153 loss: 2.4714 (2.4329) weight_decay: 0.0500 (0.0500) time: 0.7289 data: 0.0004 max mem: 57114 Epoch: [275] [ 70/156] eta: 0:01:05 lr: 0.000152 min_lr: 0.000152 loss: 2.5979 (2.4388) weight_decay: 0.0500 (0.0500) time: 0.7148 data: 0.0004 max mem: 57114 Epoch: [275] [ 80/156] eta: 0:00:57 lr: 0.000151 min_lr: 0.000151 loss: 2.6292 (2.4564) weight_decay: 0.0500 (0.0500) time: 0.7074 data: 0.0004 max mem: 57114 Epoch: [275] [ 90/156] eta: 0:00:49 lr: 0.000150 min_lr: 0.000150 loss: 2.6414 (2.4609) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0004 max mem: 57114 Epoch: [275] [100/156] eta: 0:00:41 lr: 0.000149 min_lr: 0.000149 loss: 2.6185 (2.4537) weight_decay: 0.0500 (0.0500) time: 0.7007 data: 0.0004 max mem: 57114 Epoch: [275] [110/156] eta: 0:00:33 lr: 0.000149 min_lr: 0.000149 loss: 2.4728 (2.4653) weight_decay: 0.0500 (0.0500) time: 0.7017 data: 0.0003 max mem: 57114 Epoch: [275] [120/156] eta: 0:00:26 lr: 0.000148 min_lr: 0.000148 loss: 2.6112 (2.4674) weight_decay: 0.0500 (0.0500) time: 0.7104 data: 0.0004 max mem: 57114 Epoch: [275] [130/156] eta: 0:00:19 lr: 0.000147 min_lr: 0.000147 loss: 2.6779 (2.4774) weight_decay: 0.0500 (0.0500) time: 0.7020 data: 0.0008 max mem: 57114 Epoch: [275] [140/156] eta: 0:00:11 lr: 0.000146 min_lr: 0.000146 loss: 2.7205 (2.4847) weight_decay: 0.0500 (0.0500) time: 0.6948 data: 0.0007 max mem: 57114 Epoch: [275] [150/156] eta: 0:00:04 lr: 0.000146 min_lr: 0.000146 loss: 2.6341 (2.4981) weight_decay: 0.0500 (0.0500) time: 0.6882 data: 0.0001 max mem: 57114 Epoch: [275] [155/156] eta: 0:00:00 lr: 0.000145 min_lr: 0.000145 loss: 2.6288 (2.4965) weight_decay: 0.0500 (0.0500) time: 0.6907 data: 0.0001 max mem: 57114 Epoch: [275] Total time: 0:01:53 (0.7277 s / it) Averaged stats: lr: 0.000145 min_lr: 0.000145 loss: 2.6288 (2.5257) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5884 (0.5884) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.1439 data: 1.8876 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7677 (0.7135) acc1: 84.8958 (84.5269) acc5: 98.4375 (97.6982) time: 0.5796 data: 0.3776 max mem: 57114 Test: Total time: 0:00:03 (0.6057 s / it) * Acc@1 84.627 Acc@5 97.027 loss 0.797 Accuracy of the model on the 50000 test images: 84.6% Max accuracy: 84.63% Test: [0/5] eta: 0:00:10 loss: 0.6186 (0.6186) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.0292 data: 1.7858 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7033 (0.7022) acc1: 81.2500 (79.2839) acc5: 97.3958 (94.5013) time: 0.5566 data: 0.3572 max mem: 57114 Test: Total time: 0:00:02 (0.5683 s / it) * Acc@1 77.596 Acc@5 93.828 loss 0.843 Accuracy of the model EMA on 50000 test images: 77.6% Max EMA accuracy: 77.60% Epoch: [276] [ 0/156] eta: 0:07:19 lr: 0.000145 min_lr: 0.000145 loss: 2.6308 (2.6308) weight_decay: 0.0500 (0.0500) time: 2.8204 data: 2.1719 max mem: 57114 Epoch: [276] [ 10/156] eta: 0:02:14 lr: 0.000144 min_lr: 0.000144 loss: 2.8230 (2.6378) weight_decay: 0.0500 (0.0500) time: 0.9205 data: 0.1977 max mem: 57114 Epoch: [276] [ 20/156] eta: 0:01:54 lr: 0.000144 min_lr: 0.000144 loss: 2.8230 (2.7037) weight_decay: 0.0500 (0.0500) time: 0.7466 data: 0.0003 max mem: 57114 Epoch: [276] [ 30/156] eta: 0:01:41 lr: 0.000143 min_lr: 0.000143 loss: 2.7381 (2.6881) weight_decay: 0.0500 (0.0500) time: 0.7366 data: 0.0003 max mem: 57114 Epoch: [276] [ 40/156] eta: 0:01:30 lr: 0.000142 min_lr: 0.000142 loss: 2.6959 (2.6769) weight_decay: 0.0500 (0.0500) time: 0.7190 data: 0.0003 max mem: 57114 Epoch: [276] [ 50/156] eta: 0:01:22 lr: 0.000141 min_lr: 0.000141 loss: 2.6880 (2.6328) weight_decay: 0.0500 (0.0500) time: 0.7304 data: 0.0003 max mem: 57114 Epoch: [276] [ 60/156] eta: 0:01:14 lr: 0.000141 min_lr: 0.000141 loss: 2.6771 (2.6348) weight_decay: 0.0500 (0.0500) time: 0.7471 data: 0.0004 max mem: 57114 Epoch: [276] [ 70/156] eta: 0:01:05 lr: 0.000140 min_lr: 0.000140 loss: 2.5726 (2.6053) weight_decay: 0.0500 (0.0500) time: 0.7435 data: 0.0004 max mem: 57114 Epoch: [276] [ 80/156] eta: 0:00:57 lr: 0.000139 min_lr: 0.000139 loss: 2.3472 (2.5646) weight_decay: 0.0500 (0.0500) time: 0.7109 data: 0.0004 max mem: 57114 Epoch: [276] [ 90/156] eta: 0:00:49 lr: 0.000138 min_lr: 0.000138 loss: 2.6817 (2.5802) weight_decay: 0.0500 (0.0500) time: 0.7013 data: 0.0004 max mem: 57114 Epoch: [276] [100/156] eta: 0:00:41 lr: 0.000138 min_lr: 0.000138 loss: 2.6854 (2.5681) weight_decay: 0.0500 (0.0500) time: 0.6971 data: 0.0004 max mem: 57114 Epoch: [276] [110/156] eta: 0:00:34 lr: 0.000137 min_lr: 0.000137 loss: 2.5407 (2.5535) weight_decay: 0.0500 (0.0500) time: 0.6983 data: 0.0004 max mem: 57114 Epoch: [276] [120/156] eta: 0:00:26 lr: 0.000136 min_lr: 0.000136 loss: 2.5728 (2.5638) weight_decay: 0.0500 (0.0500) time: 0.7062 data: 0.0004 max mem: 57114 Epoch: [276] [130/156] eta: 0:00:19 lr: 0.000135 min_lr: 0.000135 loss: 2.6851 (2.5613) weight_decay: 0.0500 (0.0500) time: 0.7016 data: 0.0006 max mem: 57114 Epoch: [276] [140/156] eta: 0:00:11 lr: 0.000135 min_lr: 0.000135 loss: 2.6543 (2.5601) weight_decay: 0.0500 (0.0500) time: 0.6951 data: 0.0005 max mem: 57114 Epoch: [276] [150/156] eta: 0:00:04 lr: 0.000134 min_lr: 0.000134 loss: 2.6413 (2.5609) weight_decay: 0.0500 (0.0500) time: 0.6902 data: 0.0001 max mem: 57114 Epoch: [276] [155/156] eta: 0:00:00 lr: 0.000134 min_lr: 0.000134 loss: 2.6073 (2.5634) weight_decay: 0.0500 (0.0500) time: 0.6868 data: 0.0001 max mem: 57114 Epoch: [276] Total time: 0:01:53 (0.7301 s / it) Averaged stats: lr: 0.000134 min_lr: 0.000134 loss: 2.6073 (2.5280) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6183 (0.6183) acc1: 89.5833 (89.5833) acc5: 98.4375 (98.4375) time: 2.1820 data: 1.9263 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8000 (0.7454) acc1: 84.3750 (84.2711) acc5: 98.4375 (97.4425) time: 0.5872 data: 0.3853 max mem: 57114 Test: Total time: 0:00:03 (0.6094 s / it) * Acc@1 84.591 Acc@5 97.011 loss 0.825 Accuracy of the model on the 50000 test images: 84.6% Max accuracy: 84.63% Test: [0/5] eta: 0:00:12 loss: 0.6186 (0.6186) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.4789 data: 2.2354 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7031 (0.7022) acc1: 81.2500 (79.2839) acc5: 97.3958 (94.6292) time: 0.6465 data: 0.4472 max mem: 57114 Test: Total time: 0:00:03 (0.6578 s / it) * Acc@1 77.621 Acc@5 93.838 loss 0.843 Accuracy of the model EMA on 50000 test images: 77.6% Max EMA accuracy: 77.62% Epoch: [277] [ 0/156] eta: 0:08:31 lr: 0.000133 min_lr: 0.000133 loss: 2.9906 (2.9906) weight_decay: 0.0500 (0.0500) time: 3.2793 data: 2.6265 max mem: 57114 Epoch: [277] [ 10/156] eta: 0:02:20 lr: 0.000133 min_lr: 0.000133 loss: 2.6784 (2.5895) weight_decay: 0.0500 (0.0500) time: 0.9637 data: 0.2390 max mem: 57114 Epoch: [277] [ 20/156] eta: 0:01:57 lr: 0.000132 min_lr: 0.000132 loss: 2.6784 (2.5778) weight_decay: 0.0500 (0.0500) time: 0.7417 data: 0.0003 max mem: 57114 Epoch: [277] [ 30/156] eta: 0:01:44 lr: 0.000131 min_lr: 0.000131 loss: 2.7109 (2.6261) weight_decay: 0.0500 (0.0500) time: 0.7562 data: 0.0004 max mem: 57114 Epoch: [277] [ 40/156] eta: 0:01:34 lr: 0.000131 min_lr: 0.000131 loss: 2.6990 (2.6304) weight_decay: 0.0500 (0.0500) time: 0.7624 data: 0.0006 max mem: 57114 Epoch: [277] [ 50/156] eta: 0:01:25 lr: 0.000130 min_lr: 0.000130 loss: 2.6799 (2.6148) weight_decay: 0.0500 (0.0500) time: 0.7625 data: 0.0006 max mem: 57114 Epoch: [277] [ 60/156] eta: 0:01:16 lr: 0.000129 min_lr: 0.000129 loss: 2.6799 (2.6232) weight_decay: 0.0500 (0.0500) time: 0.7659 data: 0.0004 max mem: 57114 Epoch: [277] [ 70/156] eta: 0:01:07 lr: 0.000128 min_lr: 0.000128 loss: 2.6759 (2.6181) weight_decay: 0.0500 (0.0500) time: 0.7430 data: 0.0004 max mem: 57114 Epoch: [277] [ 80/156] eta: 0:00:58 lr: 0.000128 min_lr: 0.000128 loss: 2.7325 (2.6312) weight_decay: 0.0500 (0.0500) time: 0.7076 data: 0.0004 max mem: 57114 Epoch: [277] [ 90/156] eta: 0:00:50 lr: 0.000127 min_lr: 0.000127 loss: 2.6772 (2.6027) weight_decay: 0.0500 (0.0500) time: 0.6885 data: 0.0004 max mem: 57114 Epoch: [277] [100/156] eta: 0:00:42 lr: 0.000126 min_lr: 0.000126 loss: 2.5353 (2.5864) weight_decay: 0.0500 (0.0500) time: 0.6945 data: 0.0004 max mem: 57114 Epoch: [277] [110/156] eta: 0:00:34 lr: 0.000125 min_lr: 0.000125 loss: 2.5675 (2.5721) weight_decay: 0.0500 (0.0500) time: 0.7120 data: 0.0004 max mem: 57114 Epoch: [277] [120/156] eta: 0:00:27 lr: 0.000125 min_lr: 0.000125 loss: 2.6732 (2.5768) weight_decay: 0.0500 (0.0500) time: 0.7103 data: 0.0004 max mem: 57114 Epoch: [277] [130/156] eta: 0:00:19 lr: 0.000124 min_lr: 0.000124 loss: 2.6282 (2.5778) weight_decay: 0.0500 (0.0500) time: 0.7001 data: 0.0008 max mem: 57114 Epoch: [277] [140/156] eta: 0:00:11 lr: 0.000123 min_lr: 0.000123 loss: 2.6122 (2.5770) weight_decay: 0.0500 (0.0500) time: 0.6918 data: 0.0007 max mem: 57114 Epoch: [277] [150/156] eta: 0:00:04 lr: 0.000123 min_lr: 0.000123 loss: 2.6122 (2.5618) weight_decay: 0.0500 (0.0500) time: 0.6832 data: 0.0001 max mem: 57114 Epoch: [277] [155/156] eta: 0:00:00 lr: 0.000122 min_lr: 0.000122 loss: 2.4647 (2.5651) weight_decay: 0.0500 (0.0500) time: 0.6889 data: 0.0001 max mem: 57114 Epoch: [277] Total time: 0:01:55 (0.7392 s / it) Averaged stats: lr: 0.000122 min_lr: 0.000122 loss: 2.4647 (2.5267) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5786 (0.5786) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.0925 data: 1.8365 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7586 (0.6979) acc1: 83.8542 (84.2711) acc5: 98.4375 (97.4425) time: 0.5692 data: 0.3674 max mem: 57114 Test: Total time: 0:00:02 (0.5915 s / it) * Acc@1 84.585 Acc@5 97.027 loss 0.780 Accuracy of the model on the 50000 test images: 84.6% Max accuracy: 84.63% Test: [0/5] eta: 0:00:11 loss: 0.6185 (0.6185) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.2367 data: 1.9931 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7029 (0.7022) acc1: 81.2500 (79.2839) acc5: 97.3958 (94.5013) time: 0.5980 data: 0.3987 max mem: 57114 Test: Total time: 0:00:03 (0.6126 s / it) * Acc@1 77.639 Acc@5 93.850 loss 0.843 Accuracy of the model EMA on 50000 test images: 77.6% Max EMA accuracy: 77.64% Epoch: [278] [ 0/156] eta: 0:09:30 lr: 0.000122 min_lr: 0.000122 loss: 2.9940 (2.9940) weight_decay: 0.0500 (0.0500) time: 3.6559 data: 2.9993 max mem: 57114 Epoch: [278] [ 10/156] eta: 0:02:25 lr: 0.000122 min_lr: 0.000122 loss: 2.6514 (2.5315) weight_decay: 0.0500 (0.0500) time: 0.9982 data: 0.2730 max mem: 57114 Epoch: [278] [ 20/156] eta: 0:01:59 lr: 0.000121 min_lr: 0.000121 loss: 2.6763 (2.6114) weight_decay: 0.0500 (0.0500) time: 0.7400 data: 0.0004 max mem: 57114 Epoch: [278] [ 30/156] eta: 0:01:44 lr: 0.000120 min_lr: 0.000120 loss: 2.7458 (2.5705) weight_decay: 0.0500 (0.0500) time: 0.7389 data: 0.0004 max mem: 57114 Epoch: [278] [ 40/156] eta: 0:01:33 lr: 0.000119 min_lr: 0.000119 loss: 2.6677 (2.5719) weight_decay: 0.0500 (0.0500) time: 0.7284 data: 0.0004 max mem: 57114 Epoch: [278] [ 50/156] eta: 0:01:23 lr: 0.000119 min_lr: 0.000119 loss: 2.6598 (2.5660) weight_decay: 0.0500 (0.0500) time: 0.7290 data: 0.0004 max mem: 57114 Epoch: [278] [ 60/156] eta: 0:01:14 lr: 0.000118 min_lr: 0.000118 loss: 2.6598 (2.5835) weight_decay: 0.0500 (0.0500) time: 0.7190 data: 0.0004 max mem: 57114 Epoch: [278] [ 70/156] eta: 0:01:05 lr: 0.000117 min_lr: 0.000117 loss: 2.6063 (2.5924) weight_decay: 0.0500 (0.0500) time: 0.7039 data: 0.0004 max mem: 57114 Epoch: [278] [ 80/156] eta: 0:00:57 lr: 0.000117 min_lr: 0.000117 loss: 2.5584 (2.5824) weight_decay: 0.0500 (0.0500) time: 0.7064 data: 0.0004 max mem: 57114 Epoch: [278] [ 90/156] eta: 0:00:49 lr: 0.000116 min_lr: 0.000116 loss: 2.6633 (2.5874) weight_decay: 0.0500 (0.0500) time: 0.7149 data: 0.0004 max mem: 57114 Epoch: [278] [100/156] eta: 0:00:42 lr: 0.000115 min_lr: 0.000115 loss: 2.6428 (2.5818) weight_decay: 0.0500 (0.0500) time: 0.7162 data: 0.0004 max mem: 57114 Epoch: [278] [110/156] eta: 0:00:34 lr: 0.000115 min_lr: 0.000115 loss: 2.6150 (2.5763) weight_decay: 0.0500 (0.0500) time: 0.7069 data: 0.0004 max mem: 57114 Epoch: [278] [120/156] eta: 0:00:26 lr: 0.000114 min_lr: 0.000114 loss: 2.5643 (2.5615) weight_decay: 0.0500 (0.0500) time: 0.7037 data: 0.0004 max mem: 57114 Epoch: [278] [130/156] eta: 0:00:19 lr: 0.000113 min_lr: 0.000113 loss: 2.4336 (2.5476) weight_decay: 0.0500 (0.0500) time: 0.6945 data: 0.0009 max mem: 57114 Epoch: [278] [140/156] eta: 0:00:11 lr: 0.000113 min_lr: 0.000113 loss: 2.4336 (2.5496) weight_decay: 0.0500 (0.0500) time: 0.6830 data: 0.0008 max mem: 57114 Epoch: [278] [150/156] eta: 0:00:04 lr: 0.000112 min_lr: 0.000112 loss: 2.4944 (2.5352) weight_decay: 0.0500 (0.0500) time: 0.6845 data: 0.0002 max mem: 57114 Epoch: [278] [155/156] eta: 0:00:00 lr: 0.000112 min_lr: 0.000112 loss: 2.5171 (2.5440) weight_decay: 0.0500 (0.0500) time: 0.6850 data: 0.0001 max mem: 57114 Epoch: [278] Total time: 0:01:54 (0.7321 s / it) Averaged stats: lr: 0.000112 min_lr: 0.000112 loss: 2.5171 (2.5199) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5961 (0.5961) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.0965 data: 1.8407 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7699 (0.7191) acc1: 84.3750 (84.9105) acc5: 98.4375 (97.3146) time: 0.5700 data: 0.3682 max mem: 57114 Test: Total time: 0:00:02 (0.5922 s / it) * Acc@1 84.545 Acc@5 96.991 loss 0.805 Accuracy of the model on the 50000 test images: 84.5% Max accuracy: 84.63% Test: [0/5] eta: 0:00:11 loss: 0.6184 (0.6184) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.3275 data: 2.0839 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7025 (0.7021) acc1: 81.2500 (79.2839) acc5: 97.3958 (94.5013) time: 0.6162 data: 0.4169 max mem: 57114 Test: Total time: 0:00:03 (0.6298 s / it) * Acc@1 77.663 Acc@5 93.870 loss 0.842 Accuracy of the model EMA on 50000 test images: 77.7% Max EMA accuracy: 77.66% Epoch: [279] [ 0/156] eta: 0:08:13 lr: 0.000112 min_lr: 0.000112 loss: 2.7675 (2.7675) weight_decay: 0.0500 (0.0500) time: 3.1649 data: 2.5151 max mem: 57114 Epoch: [279] [ 10/156] eta: 0:02:17 lr: 0.000111 min_lr: 0.000111 loss: 2.4939 (2.4161) weight_decay: 0.0500 (0.0500) time: 0.9424 data: 0.2289 max mem: 57114 Epoch: [279] [ 20/156] eta: 0:01:54 lr: 0.000110 min_lr: 0.000110 loss: 2.4052 (2.4082) weight_decay: 0.0500 (0.0500) time: 0.7243 data: 0.0003 max mem: 57114 Epoch: [279] [ 30/156] eta: 0:01:41 lr: 0.000110 min_lr: 0.000110 loss: 2.5672 (2.4718) weight_decay: 0.0500 (0.0500) time: 0.7246 data: 0.0003 max mem: 57114 Epoch: [279] [ 40/156] eta: 0:01:30 lr: 0.000109 min_lr: 0.000109 loss: 2.5976 (2.4885) weight_decay: 0.0500 (0.0500) time: 0.7185 data: 0.0004 max mem: 57114 Epoch: [279] [ 50/156] eta: 0:01:21 lr: 0.000108 min_lr: 0.000108 loss: 2.5682 (2.4819) weight_decay: 0.0500 (0.0500) time: 0.7199 data: 0.0004 max mem: 57114 Epoch: [279] [ 60/156] eta: 0:01:13 lr: 0.000108 min_lr: 0.000108 loss: 2.5611 (2.4551) weight_decay: 0.0500 (0.0500) time: 0.7183 data: 0.0004 max mem: 57114 Epoch: [279] [ 70/156] eta: 0:01:05 lr: 0.000107 min_lr: 0.000107 loss: 2.5735 (2.4690) weight_decay: 0.0500 (0.0500) time: 0.7222 data: 0.0004 max mem: 57114 Epoch: [279] [ 80/156] eta: 0:00:57 lr: 0.000106 min_lr: 0.000106 loss: 2.6889 (2.4976) weight_decay: 0.0500 (0.0500) time: 0.7270 data: 0.0004 max mem: 57114 Epoch: [279] [ 90/156] eta: 0:00:49 lr: 0.000106 min_lr: 0.000106 loss: 2.5843 (2.4905) weight_decay: 0.0500 (0.0500) time: 0.7133 data: 0.0004 max mem: 57114 Epoch: [279] [100/156] eta: 0:00:41 lr: 0.000105 min_lr: 0.000105 loss: 2.3322 (2.4799) weight_decay: 0.0500 (0.0500) time: 0.7042 data: 0.0004 max mem: 57114 Epoch: [279] [110/156] eta: 0:00:33 lr: 0.000104 min_lr: 0.000104 loss: 2.5467 (2.4869) weight_decay: 0.0500 (0.0500) time: 0.6977 data: 0.0004 max mem: 57114 Epoch: [279] [120/156] eta: 0:00:26 lr: 0.000104 min_lr: 0.000104 loss: 2.6755 (2.4852) weight_decay: 0.0500 (0.0500) time: 0.7048 data: 0.0004 max mem: 57114 Epoch: [279] [130/156] eta: 0:00:19 lr: 0.000103 min_lr: 0.000103 loss: 2.6444 (2.5002) weight_decay: 0.0500 (0.0500) time: 0.7042 data: 0.0010 max mem: 57114 Epoch: [279] [140/156] eta: 0:00:11 lr: 0.000102 min_lr: 0.000102 loss: 2.6444 (2.5042) weight_decay: 0.0500 (0.0500) time: 0.6873 data: 0.0008 max mem: 57114 Epoch: [279] [150/156] eta: 0:00:04 lr: 0.000102 min_lr: 0.000102 loss: 2.6368 (2.5086) weight_decay: 0.0500 (0.0500) time: 0.6888 data: 0.0001 max mem: 57114 Epoch: [279] [155/156] eta: 0:00:00 lr: 0.000101 min_lr: 0.000101 loss: 2.5307 (2.5094) weight_decay: 0.0500 (0.0500) time: 0.6889 data: 0.0001 max mem: 57114 Epoch: [279] Total time: 0:01:53 (0.7278 s / it) Averaged stats: lr: 0.000101 min_lr: 0.000101 loss: 2.5307 (2.5278) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5715 (0.5715) acc1: 90.6250 (90.6250) acc5: 98.4375 (98.4375) time: 2.0948 data: 1.8364 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7594 (0.6950) acc1: 84.3750 (85.0384) acc5: 98.4375 (97.4425) time: 0.5697 data: 0.3673 max mem: 57114 Test: Total time: 0:00:02 (0.5927 s / it) * Acc@1 84.721 Acc@5 97.091 loss 0.780 Accuracy of the model on the 50000 test images: 84.7% Max accuracy: 84.72% Test: [0/5] eta: 0:00:09 loss: 0.6182 (0.6182) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 1.9789 data: 1.7354 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7021 (0.7020) acc1: 81.2500 (79.4118) acc5: 97.3958 (94.5013) time: 0.5464 data: 0.3471 max mem: 57114 Test: Total time: 0:00:02 (0.5564 s / it) * Acc@1 77.697 Acc@5 93.884 loss 0.842 Accuracy of the model EMA on 50000 test images: 77.7% Max EMA accuracy: 77.70% Epoch: [280] [ 0/156] eta: 0:08:04 lr: 0.000101 min_lr: 0.000101 loss: 2.0101 (2.0101) weight_decay: 0.0500 (0.0500) time: 3.1062 data: 2.4455 max mem: 57114 Epoch: [280] [ 10/156] eta: 0:02:19 lr: 0.000101 min_lr: 0.000101 loss: 2.6123 (2.5334) weight_decay: 0.0500 (0.0500) time: 0.9576 data: 0.2227 max mem: 57114 Epoch: [280] [ 20/156] eta: 0:01:54 lr: 0.000100 min_lr: 0.000100 loss: 2.6640 (2.5595) weight_decay: 0.0500 (0.0500) time: 0.7288 data: 0.0004 max mem: 57114 Epoch: [280] [ 30/156] eta: 0:01:42 lr: 0.000099 min_lr: 0.000099 loss: 2.6287 (2.5270) weight_decay: 0.0500 (0.0500) time: 0.7307 data: 0.0004 max mem: 57114 Epoch: [280] [ 40/156] eta: 0:01:31 lr: 0.000099 min_lr: 0.000099 loss: 2.5919 (2.5726) weight_decay: 0.0500 (0.0500) time: 0.7398 data: 0.0004 max mem: 57114 Epoch: [280] [ 50/156] eta: 0:01:22 lr: 0.000098 min_lr: 0.000098 loss: 2.6243 (2.5325) weight_decay: 0.0500 (0.0500) time: 0.7185 data: 0.0004 max mem: 57114 Epoch: [280] [ 60/156] eta: 0:01:14 lr: 0.000097 min_lr: 0.000097 loss: 2.6243 (2.5189) weight_decay: 0.0500 (0.0500) time: 0.7315 data: 0.0004 max mem: 57114 Epoch: [280] [ 70/156] eta: 0:01:05 lr: 0.000097 min_lr: 0.000097 loss: 2.5152 (2.5166) weight_decay: 0.0500 (0.0500) time: 0.7337 data: 0.0004 max mem: 57114 Epoch: [280] [ 80/156] eta: 0:00:57 lr: 0.000096 min_lr: 0.000096 loss: 2.5083 (2.5138) weight_decay: 0.0500 (0.0500) time: 0.7135 data: 0.0004 max mem: 57114 Epoch: [280] [ 90/156] eta: 0:00:49 lr: 0.000096 min_lr: 0.000096 loss: 2.4619 (2.4801) weight_decay: 0.0500 (0.0500) time: 0.7170 data: 0.0004 max mem: 57114 Epoch: [280] [100/156] eta: 0:00:41 lr: 0.000095 min_lr: 0.000095 loss: 2.4251 (2.4770) weight_decay: 0.0500 (0.0500) time: 0.7038 data: 0.0004 max mem: 57114 Epoch: [280] [110/156] eta: 0:00:34 lr: 0.000094 min_lr: 0.000094 loss: 2.6519 (2.4847) weight_decay: 0.0500 (0.0500) time: 0.7015 data: 0.0004 max mem: 57114 Epoch: [280] [120/156] eta: 0:00:26 lr: 0.000094 min_lr: 0.000094 loss: 2.6109 (2.4612) weight_decay: 0.0500 (0.0500) time: 0.7052 data: 0.0003 max mem: 57114 Epoch: [280] [130/156] eta: 0:00:19 lr: 0.000093 min_lr: 0.000093 loss: 2.3724 (2.4595) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.0008 max mem: 57114 Epoch: [280] [140/156] eta: 0:00:11 lr: 0.000093 min_lr: 0.000093 loss: 2.6619 (2.4804) weight_decay: 0.0500 (0.0500) time: 0.6949 data: 0.0007 max mem: 57114 Epoch: [280] [150/156] eta: 0:00:04 lr: 0.000092 min_lr: 0.000092 loss: 2.6904 (2.4923) weight_decay: 0.0500 (0.0500) time: 0.6868 data: 0.0001 max mem: 57114 Epoch: [280] [155/156] eta: 0:00:00 lr: 0.000092 min_lr: 0.000092 loss: 2.6255 (2.4873) weight_decay: 0.0500 (0.0500) time: 0.6856 data: 0.0001 max mem: 57114 Epoch: [280] Total time: 0:01:54 (0.7310 s / it) Averaged stats: lr: 0.000092 min_lr: 0.000092 loss: 2.6255 (2.5200) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5654 (0.5654) acc1: 90.6250 (90.6250) acc5: 98.9583 (98.9583) time: 2.1104 data: 1.8546 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7381 (0.6843) acc1: 83.8542 (84.7826) acc5: 98.9583 (97.6982) time: 0.5728 data: 0.3710 max mem: 57114 Test: Total time: 0:00:02 (0.5964 s / it) * Acc@1 84.651 Acc@5 97.033 loss 0.765 Accuracy of the model on the 50000 test images: 84.7% Max accuracy: 84.72% Test: [0/5] eta: 0:00:11 loss: 0.6181 (0.6181) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.3193 data: 2.0759 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7018 (0.7018) acc1: 81.7708 (79.6675) acc5: 97.3958 (94.5013) time: 0.6145 data: 0.4153 max mem: 57114 Test: Total time: 0:00:03 (0.6267 s / it) * Acc@1 77.741 Acc@5 93.892 loss 0.841 Accuracy of the model EMA on 50000 test images: 77.7% Max EMA accuracy: 77.74% Epoch: [281] [ 0/156] eta: 0:06:42 lr: 0.000092 min_lr: 0.000092 loss: 2.0319 (2.0319) weight_decay: 0.0500 (0.0500) time: 2.5800 data: 1.9213 max mem: 57114 Epoch: [281] [ 10/156] eta: 0:02:09 lr: 0.000091 min_lr: 0.000091 loss: 2.5829 (2.5059) weight_decay: 0.0500 (0.0500) time: 0.8840 data: 0.1749 max mem: 57114 Epoch: [281] [ 20/156] eta: 0:01:48 lr: 0.000090 min_lr: 0.000090 loss: 2.7030 (2.5965) weight_decay: 0.0500 (0.0500) time: 0.7110 data: 0.0003 max mem: 57114 Epoch: [281] [ 30/156] eta: 0:01:37 lr: 0.000090 min_lr: 0.000090 loss: 2.5181 (2.5277) weight_decay: 0.0500 (0.0500) time: 0.7102 data: 0.0004 max mem: 57114 Epoch: [281] [ 40/156] eta: 0:01:28 lr: 0.000089 min_lr: 0.000089 loss: 2.4610 (2.5294) weight_decay: 0.0500 (0.0500) time: 0.7172 data: 0.0004 max mem: 57114 Epoch: [281] [ 50/156] eta: 0:01:20 lr: 0.000089 min_lr: 0.000089 loss: 2.5784 (2.5171) weight_decay: 0.0500 (0.0500) time: 0.7378 data: 0.0004 max mem: 57114 Epoch: [281] [ 60/156] eta: 0:01:12 lr: 0.000088 min_lr: 0.000088 loss: 2.3786 (2.4867) weight_decay: 0.0500 (0.0500) time: 0.7333 data: 0.0004 max mem: 57114 Epoch: [281] [ 70/156] eta: 0:01:04 lr: 0.000087 min_lr: 0.000087 loss: 2.6111 (2.5108) weight_decay: 0.0500 (0.0500) time: 0.7231 data: 0.0004 max mem: 57114 Epoch: [281] [ 80/156] eta: 0:00:56 lr: 0.000087 min_lr: 0.000087 loss: 2.6111 (2.5168) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0004 max mem: 57114 Epoch: [281] [ 90/156] eta: 0:00:48 lr: 0.000086 min_lr: 0.000086 loss: 2.6211 (2.5299) weight_decay: 0.0500 (0.0500) time: 0.6963 data: 0.0004 max mem: 57114 Epoch: [281] [100/156] eta: 0:00:41 lr: 0.000086 min_lr: 0.000086 loss: 2.7173 (2.5435) weight_decay: 0.0500 (0.0500) time: 0.7038 data: 0.0004 max mem: 57114 Epoch: [281] [110/156] eta: 0:00:33 lr: 0.000085 min_lr: 0.000085 loss: 2.6497 (2.5286) weight_decay: 0.0500 (0.0500) time: 0.7089 data: 0.0004 max mem: 57114 Epoch: [281] [120/156] eta: 0:00:26 lr: 0.000084 min_lr: 0.000084 loss: 2.4717 (2.5220) weight_decay: 0.0500 (0.0500) time: 0.7068 data: 0.0003 max mem: 57114 Epoch: [281] [130/156] eta: 0:00:18 lr: 0.000084 min_lr: 0.000084 loss: 2.5929 (2.5235) weight_decay: 0.0500 (0.0500) time: 0.7011 data: 0.0010 max mem: 57114 Epoch: [281] [140/156] eta: 0:00:11 lr: 0.000083 min_lr: 0.000083 loss: 2.6425 (2.5316) weight_decay: 0.0500 (0.0500) time: 0.6930 data: 0.0009 max mem: 57114 Epoch: [281] [150/156] eta: 0:00:04 lr: 0.000083 min_lr: 0.000083 loss: 2.5734 (2.5229) weight_decay: 0.0500 (0.0500) time: 0.6787 data: 0.0001 max mem: 57114 Epoch: [281] [155/156] eta: 0:00:00 lr: 0.000082 min_lr: 0.000082 loss: 2.6197 (2.5322) weight_decay: 0.0500 (0.0500) time: 0.6762 data: 0.0001 max mem: 57114 Epoch: [281] Total time: 0:01:52 (0.7218 s / it) Averaged stats: lr: 0.000082 min_lr: 0.000082 loss: 2.6197 (2.5195) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.5799 (0.5799) acc1: 91.6667 (91.6667) acc5: 98.4375 (98.4375) time: 2.2449 data: 1.9916 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7706 (0.7116) acc1: 83.3333 (84.7826) acc5: 98.4375 (97.6982) time: 0.5996 data: 0.3984 max mem: 57114 Test: Total time: 0:00:03 (0.6244 s / it) * Acc@1 84.709 Acc@5 97.061 loss 0.790 Accuracy of the model on the 50000 test images: 84.7% Max accuracy: 84.72% Test: [0/5] eta: 0:00:11 loss: 0.6175 (0.6175) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.3023 data: 2.0589 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7012 (0.7014) acc1: 81.7708 (79.7954) acc5: 97.3958 (94.5013) time: 0.6111 data: 0.4119 max mem: 57114 Test: Total time: 0:00:03 (0.6268 s / it) * Acc@1 77.757 Acc@5 93.908 loss 0.841 Accuracy of the model EMA on 50000 test images: 77.8% Max EMA accuracy: 77.76% Epoch: [282] [ 0/156] eta: 0:07:52 lr: 0.000082 min_lr: 0.000082 loss: 2.7780 (2.7780) weight_decay: 0.0500 (0.0500) time: 3.0257 data: 2.3717 max mem: 57114 Epoch: [282] [ 10/156] eta: 0:02:12 lr: 0.000082 min_lr: 0.000082 loss: 2.7384 (2.6031) weight_decay: 0.0500 (0.0500) time: 0.9080 data: 0.2159 max mem: 57114 Epoch: [282] [ 20/156] eta: 0:01:52 lr: 0.000081 min_lr: 0.000081 loss: 2.5619 (2.5258) weight_decay: 0.0500 (0.0500) time: 0.7150 data: 0.0003 max mem: 57114 Epoch: [282] [ 30/156] eta: 0:01:39 lr: 0.000081 min_lr: 0.000081 loss: 2.4692 (2.4633) weight_decay: 0.0500 (0.0500) time: 0.7281 data: 0.0004 max mem: 57114 Epoch: [282] [ 40/156] eta: 0:01:29 lr: 0.000080 min_lr: 0.000080 loss: 2.5466 (2.5219) weight_decay: 0.0500 (0.0500) time: 0.7219 data: 0.0004 max mem: 57114 Epoch: [282] [ 50/156] eta: 0:01:21 lr: 0.000079 min_lr: 0.000079 loss: 2.6479 (2.5355) weight_decay: 0.0500 (0.0500) time: 0.7323 data: 0.0004 max mem: 57114 Epoch: [282] [ 60/156] eta: 0:01:12 lr: 0.000079 min_lr: 0.000079 loss: 2.5866 (2.5310) weight_decay: 0.0500 (0.0500) time: 0.7188 data: 0.0004 max mem: 57114 Epoch: [282] [ 70/156] eta: 0:01:04 lr: 0.000078 min_lr: 0.000078 loss: 2.5687 (2.5242) weight_decay: 0.0500 (0.0500) time: 0.6923 data: 0.0004 max mem: 57114 Epoch: [282] [ 80/156] eta: 0:00:56 lr: 0.000078 min_lr: 0.000078 loss: 2.5604 (2.5387) weight_decay: 0.0500 (0.0500) time: 0.6986 data: 0.0004 max mem: 57114 Epoch: [282] [ 90/156] eta: 0:00:48 lr: 0.000077 min_lr: 0.000077 loss: 2.6537 (2.5318) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0004 max mem: 57114 Epoch: [282] [100/156] eta: 0:00:41 lr: 0.000077 min_lr: 0.000077 loss: 2.6818 (2.5245) weight_decay: 0.0500 (0.0500) time: 0.7163 data: 0.0004 max mem: 57114 Epoch: [282] [110/156] eta: 0:00:33 lr: 0.000076 min_lr: 0.000076 loss: 2.5424 (2.5240) weight_decay: 0.0500 (0.0500) time: 0.7117 data: 0.0004 max mem: 57114 Epoch: [282] [120/156] eta: 0:00:26 lr: 0.000076 min_lr: 0.000076 loss: 2.6644 (2.5406) weight_decay: 0.0500 (0.0500) time: 0.7102 data: 0.0004 max mem: 57114 Epoch: [282] [130/156] eta: 0:00:18 lr: 0.000075 min_lr: 0.000075 loss: 2.6857 (2.5490) weight_decay: 0.0500 (0.0500) time: 0.7051 data: 0.0008 max mem: 57114 Epoch: [282] [140/156] eta: 0:00:11 lr: 0.000074 min_lr: 0.000074 loss: 2.6800 (2.5483) weight_decay: 0.0500 (0.0500) time: 0.6869 data: 0.0007 max mem: 57114 Epoch: [282] [150/156] eta: 0:00:04 lr: 0.000074 min_lr: 0.000074 loss: 2.6228 (2.5441) weight_decay: 0.0500 (0.0500) time: 0.6796 data: 0.0001 max mem: 57114 Epoch: [282] [155/156] eta: 0:00:00 lr: 0.000074 min_lr: 0.000074 loss: 2.6228 (2.5454) weight_decay: 0.0500 (0.0500) time: 0.6837 data: 0.0001 max mem: 57114 Epoch: [282] Total time: 0:01:53 (0.7244 s / it) Averaged stats: lr: 0.000074 min_lr: 0.000074 loss: 2.6228 (2.5120) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5702 (0.5702) acc1: 90.6250 (90.6250) acc5: 98.4375 (98.4375) time: 2.0697 data: 1.8142 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7490 (0.6934) acc1: 83.8542 (84.3990) acc5: 98.4375 (97.5703) time: 0.5646 data: 0.3629 max mem: 57114 Test: Total time: 0:00:02 (0.5844 s / it) * Acc@1 84.687 Acc@5 97.029 loss 0.777 Accuracy of the model on the 50000 test images: 84.7% Max accuracy: 84.72% Test: [0/5] eta: 0:00:11 loss: 0.6171 (0.6171) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.3623 data: 2.1187 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7006 (0.7011) acc1: 81.7708 (79.7954) acc5: 97.3958 (94.5013) time: 0.6232 data: 0.4238 max mem: 57114 Test: Total time: 0:00:03 (0.6346 s / it) * Acc@1 77.791 Acc@5 93.918 loss 0.840 Accuracy of the model EMA on 50000 test images: 77.8% Max EMA accuracy: 77.79% Epoch: [283] [ 0/156] eta: 0:07:03 lr: 0.000074 min_lr: 0.000074 loss: 2.7463 (2.7463) weight_decay: 0.0500 (0.0500) time: 2.7157 data: 2.0655 max mem: 57114 Epoch: [283] [ 10/156] eta: 0:02:12 lr: 0.000073 min_lr: 0.000073 loss: 2.7463 (2.6880) weight_decay: 0.0500 (0.0500) time: 0.9048 data: 0.1881 max mem: 57114 Epoch: [283] [ 20/156] eta: 0:01:50 lr: 0.000072 min_lr: 0.000072 loss: 2.7157 (2.6856) weight_decay: 0.0500 (0.0500) time: 0.7208 data: 0.0003 max mem: 57114 Epoch: [283] [ 30/156] eta: 0:01:38 lr: 0.000072 min_lr: 0.000072 loss: 2.7080 (2.6388) weight_decay: 0.0500 (0.0500) time: 0.7172 data: 0.0004 max mem: 57114 Epoch: [283] [ 40/156] eta: 0:01:29 lr: 0.000071 min_lr: 0.000071 loss: 2.7357 (2.6669) weight_decay: 0.0500 (0.0500) time: 0.7189 data: 0.0003 max mem: 57114 Epoch: [283] [ 50/156] eta: 0:01:20 lr: 0.000071 min_lr: 0.000071 loss: 2.7862 (2.6503) weight_decay: 0.0500 (0.0500) time: 0.7138 data: 0.0004 max mem: 57114 Epoch: [283] [ 60/156] eta: 0:01:11 lr: 0.000070 min_lr: 0.000070 loss: 2.5500 (2.6036) weight_decay: 0.0500 (0.0500) time: 0.7095 data: 0.0004 max mem: 57114 Epoch: [283] [ 70/156] eta: 0:01:04 lr: 0.000070 min_lr: 0.000070 loss: 2.4660 (2.5957) weight_decay: 0.0500 (0.0500) time: 0.7147 data: 0.0004 max mem: 57114 Epoch: [283] [ 80/156] eta: 0:00:56 lr: 0.000069 min_lr: 0.000069 loss: 2.6602 (2.6022) weight_decay: 0.0500 (0.0500) time: 0.7238 data: 0.0003 max mem: 57114 Epoch: [283] [ 90/156] eta: 0:00:48 lr: 0.000069 min_lr: 0.000069 loss: 2.6602 (2.5967) weight_decay: 0.0500 (0.0500) time: 0.7248 data: 0.0003 max mem: 57114 Epoch: [283] [100/156] eta: 0:00:41 lr: 0.000068 min_lr: 0.000068 loss: 2.7206 (2.6046) weight_decay: 0.0500 (0.0500) time: 0.7089 data: 0.0004 max mem: 57114 Epoch: [283] [110/156] eta: 0:00:33 lr: 0.000068 min_lr: 0.000068 loss: 2.6347 (2.5866) weight_decay: 0.0500 (0.0500) time: 0.7021 data: 0.0004 max mem: 57114 Epoch: [283] [120/156] eta: 0:00:26 lr: 0.000067 min_lr: 0.000067 loss: 2.5466 (2.5815) weight_decay: 0.0500 (0.0500) time: 0.7073 data: 0.0004 max mem: 57114 Epoch: [283] [130/156] eta: 0:00:18 lr: 0.000067 min_lr: 0.000067 loss: 2.5845 (2.5796) weight_decay: 0.0500 (0.0500) time: 0.7010 data: 0.0008 max mem: 57114 Epoch: [283] [140/156] eta: 0:00:11 lr: 0.000066 min_lr: 0.000066 loss: 2.4853 (2.5553) weight_decay: 0.0500 (0.0500) time: 0.6873 data: 0.0007 max mem: 57114 Epoch: [283] [150/156] eta: 0:00:04 lr: 0.000066 min_lr: 0.000066 loss: 2.5443 (2.5631) weight_decay: 0.0500 (0.0500) time: 0.6851 data: 0.0001 max mem: 57114 Epoch: [283] [155/156] eta: 0:00:00 lr: 0.000065 min_lr: 0.000065 loss: 2.5885 (2.5652) weight_decay: 0.0500 (0.0500) time: 0.6883 data: 0.0001 max mem: 57114 Epoch: [283] Total time: 0:01:52 (0.7239 s / it) Averaged stats: lr: 0.000065 min_lr: 0.000065 loss: 2.5885 (2.5152) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6114 (0.6114) acc1: 90.6250 (90.6250) acc5: 98.4375 (98.4375) time: 2.1080 data: 1.8518 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7970 (0.7374) acc1: 83.8542 (84.6547) acc5: 98.4375 (97.5703) time: 0.5724 data: 0.3704 max mem: 57114 Test: Total time: 0:00:02 (0.5942 s / it) * Acc@1 84.657 Acc@5 97.033 loss 0.820 Accuracy of the model on the 50000 test images: 84.7% Max accuracy: 84.72% Test: [0/5] eta: 0:00:11 loss: 0.6163 (0.6163) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 2.3043 data: 2.0609 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7000 (0.7005) acc1: 81.7708 (79.9233) acc5: 97.3958 (94.5013) time: 0.6120 data: 0.4123 max mem: 57114 Test: Total time: 0:00:03 (0.6250 s / it) * Acc@1 77.831 Acc@5 93.940 loss 0.839 Accuracy of the model EMA on 50000 test images: 77.8% Max EMA accuracy: 77.83% Epoch: [284] [ 0/156] eta: 0:06:13 lr: 0.000065 min_lr: 0.000065 loss: 1.8651 (1.8651) weight_decay: 0.0500 (0.0500) time: 2.3912 data: 1.7376 max mem: 57114 Epoch: [284] [ 10/156] eta: 0:02:11 lr: 0.000065 min_lr: 0.000065 loss: 2.5416 (2.4717) weight_decay: 0.0500 (0.0500) time: 0.9001 data: 0.1582 max mem: 57114 Epoch: [284] [ 20/156] eta: 0:01:51 lr: 0.000064 min_lr: 0.000064 loss: 2.4742 (2.4277) weight_decay: 0.0500 (0.0500) time: 0.7391 data: 0.0003 max mem: 57114 Epoch: [284] [ 30/156] eta: 0:01:38 lr: 0.000064 min_lr: 0.000064 loss: 2.4772 (2.4982) weight_decay: 0.0500 (0.0500) time: 0.7165 data: 0.0004 max mem: 57114 Epoch: [284] [ 40/156] eta: 0:01:28 lr: 0.000063 min_lr: 0.000063 loss: 2.7330 (2.4825) weight_decay: 0.0500 (0.0500) time: 0.7039 data: 0.0004 max mem: 57114 Epoch: [284] [ 50/156] eta: 0:01:19 lr: 0.000063 min_lr: 0.000063 loss: 2.7312 (2.5367) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0004 max mem: 57114 Epoch: [284] [ 60/156] eta: 0:01:12 lr: 0.000062 min_lr: 0.000062 loss: 2.8083 (2.5514) weight_decay: 0.0500 (0.0500) time: 0.7341 data: 0.0004 max mem: 57114 Epoch: [284] [ 70/156] eta: 0:01:04 lr: 0.000062 min_lr: 0.000062 loss: 2.6008 (2.5296) weight_decay: 0.0500 (0.0500) time: 0.7341 data: 0.0004 max mem: 57114 Epoch: [284] [ 80/156] eta: 0:00:56 lr: 0.000061 min_lr: 0.000061 loss: 2.6066 (2.5470) weight_decay: 0.0500 (0.0500) time: 0.7196 data: 0.0004 max mem: 57114 Epoch: [284] [ 90/156] eta: 0:00:48 lr: 0.000061 min_lr: 0.000061 loss: 2.6556 (2.5513) weight_decay: 0.0500 (0.0500) time: 0.7127 data: 0.0004 max mem: 57114 Epoch: [284] [100/156] eta: 0:00:41 lr: 0.000060 min_lr: 0.000060 loss: 2.5621 (2.5446) weight_decay: 0.0500 (0.0500) time: 0.7055 data: 0.0004 max mem: 57114 Epoch: [284] [110/156] eta: 0:00:33 lr: 0.000060 min_lr: 0.000060 loss: 2.4257 (2.5192) weight_decay: 0.0500 (0.0500) time: 0.6987 data: 0.0004 max mem: 57114 Epoch: [284] [120/156] eta: 0:00:26 lr: 0.000059 min_lr: 0.000059 loss: 2.3880 (2.5055) weight_decay: 0.0500 (0.0500) time: 0.7019 data: 0.0004 max mem: 57114 Epoch: [284] [130/156] eta: 0:00:18 lr: 0.000059 min_lr: 0.000059 loss: 2.5295 (2.5137) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0009 max mem: 57114 Epoch: [284] [140/156] eta: 0:00:11 lr: 0.000058 min_lr: 0.000058 loss: 2.5729 (2.5118) weight_decay: 0.0500 (0.0500) time: 0.6919 data: 0.0007 max mem: 57114 Epoch: [284] [150/156] eta: 0:00:04 lr: 0.000058 min_lr: 0.000058 loss: 2.5729 (2.5025) weight_decay: 0.0500 (0.0500) time: 0.6846 data: 0.0001 max mem: 57114 Epoch: [284] [155/156] eta: 0:00:00 lr: 0.000058 min_lr: 0.000058 loss: 2.5982 (2.5077) weight_decay: 0.0500 (0.0500) time: 0.6843 data: 0.0001 max mem: 57114 Epoch: [284] Total time: 0:01:52 (0.7239 s / it) Averaged stats: lr: 0.000058 min_lr: 0.000058 loss: 2.5982 (2.5137) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5661 (0.5661) acc1: 90.6250 (90.6250) acc5: 98.4375 (98.4375) time: 2.0450 data: 1.7891 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7463 (0.6932) acc1: 85.4167 (85.2941) acc5: 98.4375 (97.5703) time: 0.5597 data: 0.3579 max mem: 57114 Test: Total time: 0:00:02 (0.5832 s / it) * Acc@1 84.793 Acc@5 97.079 loss 0.773 Accuracy of the model on the 50000 test images: 84.8% Max accuracy: 84.79% Test: [0/5] eta: 0:00:09 loss: 0.6157 (0.6157) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 1.9786 data: 1.7351 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6992 (0.6999) acc1: 81.7708 (79.9233) acc5: 97.3958 (94.5013) time: 0.5464 data: 0.3471 max mem: 57114 Test: Total time: 0:00:02 (0.5573 s / it) * Acc@1 77.869 Acc@5 93.964 loss 0.838 Accuracy of the model EMA on 50000 test images: 77.9% Max EMA accuracy: 77.87% Epoch: [285] [ 0/156] eta: 0:07:19 lr: 0.000058 min_lr: 0.000058 loss: 2.0487 (2.0487) weight_decay: 0.0500 (0.0500) time: 2.8171 data: 2.1672 max mem: 57114 Epoch: [285] [ 10/156] eta: 0:02:13 lr: 0.000057 min_lr: 0.000057 loss: 2.7155 (2.6322) weight_decay: 0.0500 (0.0500) time: 0.9176 data: 0.1973 max mem: 57114 Epoch: [285] [ 20/156] eta: 0:01:52 lr: 0.000057 min_lr: 0.000057 loss: 2.6917 (2.5097) weight_decay: 0.0500 (0.0500) time: 0.7270 data: 0.0003 max mem: 57114 Epoch: [285] [ 30/156] eta: 0:01:41 lr: 0.000056 min_lr: 0.000056 loss: 2.4987 (2.4941) weight_decay: 0.0500 (0.0500) time: 0.7384 data: 0.0003 max mem: 57114 Epoch: [285] [ 40/156] eta: 0:01:31 lr: 0.000056 min_lr: 0.000056 loss: 2.5763 (2.5146) weight_decay: 0.0500 (0.0500) time: 0.7508 data: 0.0003 max mem: 57114 Epoch: [285] [ 50/156] eta: 0:01:22 lr: 0.000055 min_lr: 0.000055 loss: 2.5237 (2.4638) weight_decay: 0.0500 (0.0500) time: 0.7394 data: 0.0003 max mem: 57114 Epoch: [285] [ 60/156] eta: 0:01:13 lr: 0.000055 min_lr: 0.000055 loss: 2.3858 (2.4815) weight_decay: 0.0500 (0.0500) time: 0.7316 data: 0.0004 max mem: 57114 Epoch: [285] [ 70/156] eta: 0:01:05 lr: 0.000054 min_lr: 0.000054 loss: 2.3858 (2.4527) weight_decay: 0.0500 (0.0500) time: 0.7178 data: 0.0004 max mem: 57114 Epoch: [285] [ 80/156] eta: 0:00:57 lr: 0.000054 min_lr: 0.000054 loss: 2.4953 (2.4712) weight_decay: 0.0500 (0.0500) time: 0.7166 data: 0.0003 max mem: 57114 Epoch: [285] [ 90/156] eta: 0:00:49 lr: 0.000053 min_lr: 0.000053 loss: 2.7730 (2.4935) weight_decay: 0.0500 (0.0500) time: 0.7219 data: 0.0004 max mem: 57114 Epoch: [285] [100/156] eta: 0:00:41 lr: 0.000053 min_lr: 0.000053 loss: 2.4529 (2.4725) weight_decay: 0.0500 (0.0500) time: 0.7072 data: 0.0005 max mem: 57114 Epoch: [285] [110/156] eta: 0:00:34 lr: 0.000052 min_lr: 0.000052 loss: 2.4529 (2.4687) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0004 max mem: 57114 Epoch: [285] [120/156] eta: 0:00:26 lr: 0.000052 min_lr: 0.000052 loss: 2.6559 (2.4792) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0004 max mem: 57114 Epoch: [285] [130/156] eta: 0:00:19 lr: 0.000051 min_lr: 0.000051 loss: 2.6877 (2.4853) weight_decay: 0.0500 (0.0500) time: 0.6942 data: 0.0008 max mem: 57114 Epoch: [285] [140/156] eta: 0:00:11 lr: 0.000051 min_lr: 0.000051 loss: 2.6935 (2.4910) weight_decay: 0.0500 (0.0500) time: 0.6821 data: 0.0007 max mem: 57114 Epoch: [285] [150/156] eta: 0:00:04 lr: 0.000051 min_lr: 0.000051 loss: 2.6941 (2.5037) weight_decay: 0.0500 (0.0500) time: 0.6812 data: 0.0001 max mem: 57114 Epoch: [285] [155/156] eta: 0:00:00 lr: 0.000050 min_lr: 0.000050 loss: 2.6977 (2.5044) weight_decay: 0.0500 (0.0500) time: 0.6835 data: 0.0001 max mem: 57114 Epoch: [285] Total time: 0:01:53 (0.7307 s / it) Averaged stats: lr: 0.000050 min_lr: 0.000050 loss: 2.6977 (2.5112) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5980 (0.5980) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.0521 data: 1.7959 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7663 (0.7226) acc1: 83.8542 (84.2711) acc5: 98.4375 (97.4425) time: 0.5611 data: 0.3592 max mem: 57114 Test: Total time: 0:00:02 (0.5836 s / it) * Acc@1 84.709 Acc@5 97.055 loss 0.802 Accuracy of the model on the 50000 test images: 84.7% Max accuracy: 84.79% Test: [0/5] eta: 0:00:12 loss: 0.6150 (0.6150) acc1: 84.8958 (84.8958) acc5: 96.8750 (96.8750) time: 2.4194 data: 2.1758 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6985 (0.6993) acc1: 81.7708 (79.9233) acc5: 96.8750 (94.5013) time: 0.6345 data: 0.4352 max mem: 57114 Test: Total time: 0:00:03 (0.6463 s / it) * Acc@1 77.925 Acc@5 93.988 loss 0.837 Accuracy of the model EMA on 50000 test images: 77.9% Max EMA accuracy: 77.93% Epoch: [286] [ 0/156] eta: 0:06:55 lr: 0.000050 min_lr: 0.000050 loss: 2.3051 (2.3051) weight_decay: 0.0500 (0.0500) time: 2.6626 data: 2.0022 max mem: 57114 Epoch: [286] [ 10/156] eta: 0:02:10 lr: 0.000050 min_lr: 0.000050 loss: 2.6897 (2.4922) weight_decay: 0.0500 (0.0500) time: 0.8971 data: 0.1823 max mem: 57114 Epoch: [286] [ 20/156] eta: 0:01:51 lr: 0.000049 min_lr: 0.000049 loss: 2.5623 (2.4376) weight_decay: 0.0500 (0.0500) time: 0.7267 data: 0.0003 max mem: 57114 Epoch: [286] [ 30/156] eta: 0:01:39 lr: 0.000049 min_lr: 0.000049 loss: 2.5736 (2.5136) weight_decay: 0.0500 (0.0500) time: 0.7321 data: 0.0003 max mem: 57114 Epoch: [286] [ 40/156] eta: 0:01:29 lr: 0.000048 min_lr: 0.000048 loss: 2.6281 (2.4966) weight_decay: 0.0500 (0.0500) time: 0.7176 data: 0.0004 max mem: 57114 Epoch: [286] [ 50/156] eta: 0:01:20 lr: 0.000048 min_lr: 0.000048 loss: 2.6205 (2.5222) weight_decay: 0.0500 (0.0500) time: 0.7049 data: 0.0004 max mem: 57114 Epoch: [286] [ 60/156] eta: 0:01:12 lr: 0.000048 min_lr: 0.000048 loss: 2.5620 (2.4906) weight_decay: 0.0500 (0.0500) time: 0.7240 data: 0.0004 max mem: 57114 Epoch: [286] [ 70/156] eta: 0:01:04 lr: 0.000047 min_lr: 0.000047 loss: 2.4241 (2.4898) weight_decay: 0.0500 (0.0500) time: 0.7227 data: 0.0004 max mem: 57114 Epoch: [286] [ 80/156] eta: 0:00:56 lr: 0.000047 min_lr: 0.000047 loss: 2.4241 (2.4708) weight_decay: 0.0500 (0.0500) time: 0.7058 data: 0.0004 max mem: 57114 Epoch: [286] [ 90/156] eta: 0:00:48 lr: 0.000046 min_lr: 0.000046 loss: 2.4434 (2.4855) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0004 max mem: 57114 Epoch: [286] [100/156] eta: 0:00:41 lr: 0.000046 min_lr: 0.000046 loss: 2.7451 (2.5051) weight_decay: 0.0500 (0.0500) time: 0.7195 data: 0.0004 max mem: 57114 Epoch: [286] [110/156] eta: 0:00:33 lr: 0.000045 min_lr: 0.000045 loss: 2.7451 (2.5164) weight_decay: 0.0500 (0.0500) time: 0.7181 data: 0.0004 max mem: 57114 Epoch: [286] [120/156] eta: 0:00:26 lr: 0.000045 min_lr: 0.000045 loss: 2.6937 (2.5238) weight_decay: 0.0500 (0.0500) time: 0.7070 data: 0.0004 max mem: 57114 Epoch: [286] [130/156] eta: 0:00:18 lr: 0.000045 min_lr: 0.000045 loss: 2.6251 (2.5322) weight_decay: 0.0500 (0.0500) time: 0.6971 data: 0.0008 max mem: 57114 Epoch: [286] [140/156] eta: 0:00:11 lr: 0.000044 min_lr: 0.000044 loss: 2.5842 (2.5290) weight_decay: 0.0500 (0.0500) time: 0.6860 data: 0.0007 max mem: 57114 Epoch: [286] [150/156] eta: 0:00:04 lr: 0.000044 min_lr: 0.000044 loss: 2.5613 (2.5208) weight_decay: 0.0500 (0.0500) time: 0.6825 data: 0.0001 max mem: 57114 Epoch: [286] [155/156] eta: 0:00:00 lr: 0.000044 min_lr: 0.000044 loss: 2.5842 (2.5185) weight_decay: 0.0500 (0.0500) time: 0.6817 data: 0.0001 max mem: 57114 Epoch: [286] Total time: 0:01:52 (0.7240 s / it) Averaged stats: lr: 0.000044 min_lr: 0.000044 loss: 2.5842 (2.5045) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5670 (0.5670) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.1480 data: 1.8927 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7437 (0.6922) acc1: 83.8542 (84.3990) acc5: 98.4375 (97.3146) time: 0.5802 data: 0.3786 max mem: 57114 Test: Total time: 0:00:03 (0.6051 s / it) * Acc@1 84.685 Acc@5 97.045 loss 0.774 Accuracy of the model on the 50000 test images: 84.7% Max accuracy: 84.79% Test: [0/5] eta: 0:00:11 loss: 0.6142 (0.6142) acc1: 84.8958 (84.8958) acc5: 96.8750 (96.8750) time: 2.2818 data: 2.0383 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6976 (0.6986) acc1: 81.7708 (79.9233) acc5: 96.8750 (94.5013) time: 0.6070 data: 0.4077 max mem: 57114 Test: Total time: 0:00:03 (0.6181 s / it) * Acc@1 77.949 Acc@5 94.002 loss 0.836 Accuracy of the model EMA on 50000 test images: 77.9% Max EMA accuracy: 77.95% Epoch: [287] [ 0/156] eta: 0:07:05 lr: 0.000043 min_lr: 0.000043 loss: 2.5977 (2.5977) weight_decay: 0.0500 (0.0500) time: 2.7259 data: 2.0650 max mem: 57114 Epoch: [287] [ 10/156] eta: 0:02:15 lr: 0.000043 min_lr: 0.000043 loss: 2.5977 (2.5203) weight_decay: 0.0500 (0.0500) time: 0.9263 data: 0.1880 max mem: 57114 Epoch: [287] [ 20/156] eta: 0:01:53 lr: 0.000043 min_lr: 0.000043 loss: 2.5763 (2.5384) weight_decay: 0.0500 (0.0500) time: 0.7401 data: 0.0004 max mem: 57114 Epoch: [287] [ 30/156] eta: 0:01:41 lr: 0.000042 min_lr: 0.000042 loss: 2.6164 (2.5391) weight_decay: 0.0500 (0.0500) time: 0.7369 data: 0.0004 max mem: 57114 Epoch: [287] [ 40/156] eta: 0:01:30 lr: 0.000042 min_lr: 0.000042 loss: 2.5698 (2.5130) weight_decay: 0.0500 (0.0500) time: 0.7281 data: 0.0004 max mem: 57114 Epoch: [287] [ 50/156] eta: 0:01:21 lr: 0.000041 min_lr: 0.000041 loss: 2.5698 (2.5020) weight_decay: 0.0500 (0.0500) time: 0.7186 data: 0.0004 max mem: 57114 Epoch: [287] [ 60/156] eta: 0:01:13 lr: 0.000041 min_lr: 0.000041 loss: 2.6130 (2.5120) weight_decay: 0.0500 (0.0500) time: 0.7167 data: 0.0003 max mem: 57114 Epoch: [287] [ 70/156] eta: 0:01:04 lr: 0.000041 min_lr: 0.000041 loss: 2.4508 (2.4926) weight_decay: 0.0500 (0.0500) time: 0.7153 data: 0.0004 max mem: 57114 Epoch: [287] [ 80/156] eta: 0:00:56 lr: 0.000040 min_lr: 0.000040 loss: 2.2803 (2.4795) weight_decay: 0.0500 (0.0500) time: 0.7078 data: 0.0004 max mem: 57114 Epoch: [287] [ 90/156] eta: 0:00:49 lr: 0.000040 min_lr: 0.000040 loss: 2.4824 (2.4829) weight_decay: 0.0500 (0.0500) time: 0.7054 data: 0.0004 max mem: 57114 Epoch: [287] [100/156] eta: 0:00:41 lr: 0.000039 min_lr: 0.000039 loss: 2.4824 (2.4636) weight_decay: 0.0500 (0.0500) time: 0.7123 data: 0.0004 max mem: 57114 Epoch: [287] [110/156] eta: 0:00:33 lr: 0.000039 min_lr: 0.000039 loss: 2.3190 (2.4653) weight_decay: 0.0500 (0.0500) time: 0.7121 data: 0.0004 max mem: 57114 Epoch: [287] [120/156] eta: 0:00:26 lr: 0.000039 min_lr: 0.000039 loss: 2.5500 (2.4781) weight_decay: 0.0500 (0.0500) time: 0.7116 data: 0.0004 max mem: 57114 Epoch: [287] [130/156] eta: 0:00:19 lr: 0.000038 min_lr: 0.000038 loss: 2.5588 (2.4704) weight_decay: 0.0500 (0.0500) time: 0.7050 data: 0.0004 max mem: 57114 Epoch: [287] [140/156] eta: 0:00:11 lr: 0.000038 min_lr: 0.000038 loss: 2.5314 (2.4693) weight_decay: 0.0500 (0.0500) time: 0.6916 data: 0.0003 max mem: 57114 Epoch: [287] [150/156] eta: 0:00:04 lr: 0.000037 min_lr: 0.000037 loss: 2.5206 (2.4703) weight_decay: 0.0500 (0.0500) time: 0.6860 data: 0.0001 max mem: 57114 Epoch: [287] [155/156] eta: 0:00:00 lr: 0.000037 min_lr: 0.000037 loss: 2.5743 (2.4714) weight_decay: 0.0500 (0.0500) time: 0.6867 data: 0.0001 max mem: 57114 Epoch: [287] Total time: 0:01:53 (0.7276 s / it) Averaged stats: lr: 0.000037 min_lr: 0.000037 loss: 2.5743 (2.5020) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5495 (0.5495) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.0501 data: 1.7941 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7302 (0.6758) acc1: 85.4167 (84.6547) acc5: 98.4375 (97.4425) time: 0.5607 data: 0.3589 max mem: 57114 Test: Total time: 0:00:02 (0.5837 s / it) * Acc@1 84.685 Acc@5 97.015 loss 0.758 Accuracy of the model on the 50000 test images: 84.7% Max accuracy: 84.79% Test: [0/5] eta: 0:00:11 loss: 0.6133 (0.6133) acc1: 85.9375 (85.9375) acc5: 96.8750 (96.8750) time: 2.3188 data: 2.0752 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6967 (0.6977) acc1: 81.7708 (80.1790) acc5: 96.8750 (94.5013) time: 0.6144 data: 0.4151 max mem: 57114 Test: Total time: 0:00:03 (0.6297 s / it) * Acc@1 77.985 Acc@5 94.022 loss 0.835 Accuracy of the model EMA on 50000 test images: 78.0% Max EMA accuracy: 77.99% Epoch: [288] [ 0/156] eta: 0:08:29 lr: 0.000037 min_lr: 0.000037 loss: 2.9290 (2.9290) weight_decay: 0.0500 (0.0500) time: 3.2635 data: 2.6115 max mem: 57114 Epoch: [288] [ 10/156] eta: 0:02:18 lr: 0.000037 min_lr: 0.000037 loss: 2.7100 (2.4954) weight_decay: 0.0500 (0.0500) time: 0.9477 data: 0.2377 max mem: 57114 Epoch: [288] [ 20/156] eta: 0:01:54 lr: 0.000036 min_lr: 0.000036 loss: 2.6650 (2.5679) weight_decay: 0.0500 (0.0500) time: 0.7188 data: 0.0003 max mem: 57114 Epoch: [288] [ 30/156] eta: 0:01:41 lr: 0.000036 min_lr: 0.000036 loss: 2.6228 (2.5639) weight_decay: 0.0500 (0.0500) time: 0.7236 data: 0.0003 max mem: 57114 Epoch: [288] [ 40/156] eta: 0:01:31 lr: 0.000036 min_lr: 0.000036 loss: 2.6119 (2.5470) weight_decay: 0.0500 (0.0500) time: 0.7263 data: 0.0004 max mem: 57114 Epoch: [288] [ 50/156] eta: 0:01:21 lr: 0.000035 min_lr: 0.000035 loss: 2.6867 (2.5441) weight_decay: 0.0500 (0.0500) time: 0.7226 data: 0.0004 max mem: 57114 Epoch: [288] [ 60/156] eta: 0:01:13 lr: 0.000035 min_lr: 0.000035 loss: 2.5066 (2.5137) weight_decay: 0.0500 (0.0500) time: 0.7286 data: 0.0003 max mem: 57114 Epoch: [288] [ 70/156] eta: 0:01:05 lr: 0.000035 min_lr: 0.000035 loss: 2.4245 (2.5211) weight_decay: 0.0500 (0.0500) time: 0.7287 data: 0.0004 max mem: 57114 Epoch: [288] [ 80/156] eta: 0:00:57 lr: 0.000034 min_lr: 0.000034 loss: 2.6435 (2.5272) weight_decay: 0.0500 (0.0500) time: 0.7037 data: 0.0003 max mem: 57114 Epoch: [288] [ 90/156] eta: 0:00:49 lr: 0.000034 min_lr: 0.000034 loss: 2.5801 (2.5201) weight_decay: 0.0500 (0.0500) time: 0.6936 data: 0.0004 max mem: 57114 Epoch: [288] [100/156] eta: 0:00:41 lr: 0.000033 min_lr: 0.000033 loss: 2.5690 (2.5252) weight_decay: 0.0500 (0.0500) time: 0.7047 data: 0.0004 max mem: 57114 Epoch: [288] [110/156] eta: 0:00:33 lr: 0.000033 min_lr: 0.000033 loss: 2.6338 (2.5313) weight_decay: 0.0500 (0.0500) time: 0.7077 data: 0.0004 max mem: 57114 Epoch: [288] [120/156] eta: 0:00:26 lr: 0.000033 min_lr: 0.000033 loss: 2.6341 (2.5350) weight_decay: 0.0500 (0.0500) time: 0.7143 data: 0.0004 max mem: 57114 Epoch: [288] [130/156] eta: 0:00:19 lr: 0.000032 min_lr: 0.000032 loss: 2.4690 (2.5268) weight_decay: 0.0500 (0.0500) time: 0.7129 data: 0.0009 max mem: 57114 Epoch: [288] [140/156] eta: 0:00:11 lr: 0.000032 min_lr: 0.000032 loss: 2.4469 (2.5256) weight_decay: 0.0500 (0.0500) time: 0.6874 data: 0.0007 max mem: 57114 Epoch: [288] [150/156] eta: 0:00:04 lr: 0.000032 min_lr: 0.000032 loss: 2.6307 (2.5376) weight_decay: 0.0500 (0.0500) time: 0.6802 data: 0.0001 max mem: 57114 Epoch: [288] [155/156] eta: 0:00:00 lr: 0.000031 min_lr: 0.000031 loss: 2.7074 (2.5428) weight_decay: 0.0500 (0.0500) time: 0.6818 data: 0.0001 max mem: 57114 Epoch: [288] Total time: 0:01:53 (0.7278 s / it) Averaged stats: lr: 0.000031 min_lr: 0.000031 loss: 2.7074 (2.5047) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.6495 (0.6495) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.2140 data: 1.9581 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8243 (0.7781) acc1: 84.3750 (84.5269) acc5: 98.4375 (97.4425) time: 0.5935 data: 0.3917 max mem: 57114 Test: Total time: 0:00:03 (0.6203 s / it) * Acc@1 84.669 Acc@5 97.029 loss 0.853 Accuracy of the model on the 50000 test images: 84.7% Max accuracy: 84.79% Test: [0/5] eta: 0:00:12 loss: 0.6122 (0.6122) acc1: 85.9375 (85.9375) acc5: 96.8750 (96.8750) time: 2.4189 data: 2.1753 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6957 (0.6968) acc1: 81.7708 (80.1790) acc5: 96.8750 (94.5013) time: 0.6345 data: 0.4351 max mem: 57114 Test: Total time: 0:00:03 (0.6460 s / it) * Acc@1 78.035 Acc@5 94.050 loss 0.834 Accuracy of the model EMA on 50000 test images: 78.0% Max EMA accuracy: 78.04% Epoch: [289] [ 0/156] eta: 0:07:30 lr: 0.000031 min_lr: 0.000031 loss: 2.9510 (2.9510) weight_decay: 0.0500 (0.0500) time: 2.8904 data: 2.2397 max mem: 57114 Epoch: [289] [ 10/156] eta: 0:02:13 lr: 0.000031 min_lr: 0.000031 loss: 2.5571 (2.6379) weight_decay: 0.0500 (0.0500) time: 0.9128 data: 0.2038 max mem: 57114 Epoch: [289] [ 20/156] eta: 0:01:51 lr: 0.000031 min_lr: 0.000031 loss: 2.5571 (2.6010) weight_decay: 0.0500 (0.0500) time: 0.7135 data: 0.0003 max mem: 57114 Epoch: [289] [ 30/156] eta: 0:01:39 lr: 0.000030 min_lr: 0.000030 loss: 2.6174 (2.5680) weight_decay: 0.0500 (0.0500) time: 0.7187 data: 0.0003 max mem: 57114 Epoch: [289] [ 40/156] eta: 0:01:29 lr: 0.000030 min_lr: 0.000030 loss: 2.6043 (2.5404) weight_decay: 0.0500 (0.0500) time: 0.7292 data: 0.0004 max mem: 57114 Epoch: [289] [ 50/156] eta: 0:01:20 lr: 0.000030 min_lr: 0.000030 loss: 2.6372 (2.5466) weight_decay: 0.0500 (0.0500) time: 0.7262 data: 0.0004 max mem: 57114 Epoch: [289] [ 60/156] eta: 0:01:12 lr: 0.000029 min_lr: 0.000029 loss: 2.6668 (2.5410) weight_decay: 0.0500 (0.0500) time: 0.7152 data: 0.0004 max mem: 57114 Epoch: [289] [ 70/156] eta: 0:01:04 lr: 0.000029 min_lr: 0.000029 loss: 2.6467 (2.5480) weight_decay: 0.0500 (0.0500) time: 0.7103 data: 0.0005 max mem: 57114 Epoch: [289] [ 80/156] eta: 0:00:56 lr: 0.000029 min_lr: 0.000029 loss: 2.6421 (2.5324) weight_decay: 0.0500 (0.0500) time: 0.7101 data: 0.0005 max mem: 57114 Epoch: [289] [ 90/156] eta: 0:00:48 lr: 0.000028 min_lr: 0.000028 loss: 2.5703 (2.5395) weight_decay: 0.0500 (0.0500) time: 0.7135 data: 0.0005 max mem: 57114 Epoch: [289] [100/156] eta: 0:00:41 lr: 0.000028 min_lr: 0.000028 loss: 2.5703 (2.5403) weight_decay: 0.0500 (0.0500) time: 0.7095 data: 0.0005 max mem: 57114 Epoch: [289] [110/156] eta: 0:00:33 lr: 0.000028 min_lr: 0.000028 loss: 2.4352 (2.5262) weight_decay: 0.0500 (0.0500) time: 0.6997 data: 0.0005 max mem: 57114 Epoch: [289] [120/156] eta: 0:00:26 lr: 0.000027 min_lr: 0.000027 loss: 2.5858 (2.5244) weight_decay: 0.0500 (0.0500) time: 0.7089 data: 0.0004 max mem: 57114 Epoch: [289] [130/156] eta: 0:00:18 lr: 0.000027 min_lr: 0.000027 loss: 2.5944 (2.5198) weight_decay: 0.0500 (0.0500) time: 0.7088 data: 0.0014 max mem: 57114 Epoch: [289] [140/156] eta: 0:00:11 lr: 0.000027 min_lr: 0.000027 loss: 2.5415 (2.5197) weight_decay: 0.0500 (0.0500) time: 0.6913 data: 0.0012 max mem: 57114 Epoch: [289] [150/156] eta: 0:00:04 lr: 0.000026 min_lr: 0.000026 loss: 2.5415 (2.5198) weight_decay: 0.0500 (0.0500) time: 0.6820 data: 0.0001 max mem: 57114 Epoch: [289] [155/156] eta: 0:00:00 lr: 0.000026 min_lr: 0.000026 loss: 2.6123 (2.5191) weight_decay: 0.0500 (0.0500) time: 0.6817 data: 0.0001 max mem: 57114 Epoch: [289] Total time: 0:01:52 (0.7243 s / it) Averaged stats: lr: 0.000026 min_lr: 0.000026 loss: 2.6123 (2.5050) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5744 (0.5744) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.0417 data: 1.7860 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7468 (0.6986) acc1: 85.4167 (84.7826) acc5: 98.4375 (97.4425) time: 0.5590 data: 0.3573 max mem: 57114 Test: Total time: 0:00:02 (0.5814 s / it) * Acc@1 84.697 Acc@5 97.071 loss 0.778 Accuracy of the model on the 50000 test images: 84.7% Max accuracy: 84.79% Test: [0/5] eta: 0:00:11 loss: 0.6112 (0.6112) acc1: 85.9375 (85.9375) acc5: 96.8750 (96.8750) time: 2.2393 data: 1.9957 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6947 (0.6959) acc1: 81.7708 (80.1790) acc5: 96.8750 (94.6292) time: 0.5985 data: 0.3992 max mem: 57114 Test: Total time: 0:00:03 (0.6142 s / it) * Acc@1 78.079 Acc@5 94.082 loss 0.832 Accuracy of the model EMA on 50000 test images: 78.1% Max EMA accuracy: 78.08% Epoch: [290] [ 0/156] eta: 0:06:18 lr: 0.000026 min_lr: 0.000026 loss: 1.8939 (1.8939) weight_decay: 0.0500 (0.0500) time: 2.4266 data: 1.7791 max mem: 57114 Epoch: [290] [ 10/156] eta: 0:02:08 lr: 0.000026 min_lr: 0.000026 loss: 2.1026 (2.2630) weight_decay: 0.0500 (0.0500) time: 0.8773 data: 0.1621 max mem: 57114 Epoch: [290] [ 20/156] eta: 0:01:49 lr: 0.000026 min_lr: 0.000026 loss: 2.6515 (2.4758) weight_decay: 0.0500 (0.0500) time: 0.7255 data: 0.0004 max mem: 57114 Epoch: [290] [ 30/156] eta: 0:01:39 lr: 0.000025 min_lr: 0.000025 loss: 2.7228 (2.5068) weight_decay: 0.0500 (0.0500) time: 0.7439 data: 0.0005 max mem: 57114 Epoch: [290] [ 40/156] eta: 0:01:29 lr: 0.000025 min_lr: 0.000025 loss: 2.6402 (2.5195) weight_decay: 0.0500 (0.0500) time: 0.7362 data: 0.0005 max mem: 57114 Epoch: [290] [ 50/156] eta: 0:01:20 lr: 0.000025 min_lr: 0.000025 loss: 2.6174 (2.5321) weight_decay: 0.0500 (0.0500) time: 0.7114 data: 0.0005 max mem: 57114 Epoch: [290] [ 60/156] eta: 0:01:12 lr: 0.000024 min_lr: 0.000024 loss: 2.7401 (2.5570) weight_decay: 0.0500 (0.0500) time: 0.7141 data: 0.0005 max mem: 57114 Epoch: [290] [ 70/156] eta: 0:01:04 lr: 0.000024 min_lr: 0.000024 loss: 2.7443 (2.5822) weight_decay: 0.0500 (0.0500) time: 0.7060 data: 0.0005 max mem: 57114 Epoch: [290] [ 80/156] eta: 0:00:56 lr: 0.000024 min_lr: 0.000024 loss: 2.6812 (2.5756) weight_decay: 0.0500 (0.0500) time: 0.7170 data: 0.0005 max mem: 57114 Epoch: [290] [ 90/156] eta: 0:00:48 lr: 0.000023 min_lr: 0.000023 loss: 2.6033 (2.5458) weight_decay: 0.0500 (0.0500) time: 0.7201 data: 0.0006 max mem: 57114 Epoch: [290] [100/156] eta: 0:00:41 lr: 0.000023 min_lr: 0.000023 loss: 2.5716 (2.5450) weight_decay: 0.0500 (0.0500) time: 0.7039 data: 0.0007 max mem: 57114 Epoch: [290] [110/156] eta: 0:00:33 lr: 0.000023 min_lr: 0.000023 loss: 2.6301 (2.5443) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0007 max mem: 57114 Epoch: [290] [120/156] eta: 0:00:26 lr: 0.000022 min_lr: 0.000022 loss: 2.5429 (2.5286) weight_decay: 0.0500 (0.0500) time: 0.7042 data: 0.0007 max mem: 57114 Epoch: [290] [130/156] eta: 0:00:18 lr: 0.000022 min_lr: 0.000022 loss: 2.4638 (2.5219) weight_decay: 0.0500 (0.0500) time: 0.6906 data: 0.0012 max mem: 57114 Epoch: [290] [140/156] eta: 0:00:11 lr: 0.000022 min_lr: 0.000022 loss: 2.3159 (2.4977) weight_decay: 0.0500 (0.0500) time: 0.6848 data: 0.0009 max mem: 57114 Epoch: [290] [150/156] eta: 0:00:04 lr: 0.000022 min_lr: 0.000022 loss: 2.4545 (2.5127) weight_decay: 0.0500 (0.0500) time: 0.6852 data: 0.0001 max mem: 57114 Epoch: [290] [155/156] eta: 0:00:00 lr: 0.000021 min_lr: 0.000021 loss: 2.6882 (2.5168) weight_decay: 0.0500 (0.0500) time: 0.6906 data: 0.0001 max mem: 57114 Epoch: [290] Total time: 0:01:52 (0.7234 s / it) Averaged stats: lr: 0.000021 min_lr: 0.000021 loss: 2.6882 (2.5092) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6281 (0.6281) acc1: 90.6250 (90.6250) acc5: 98.4375 (98.4375) time: 2.0458 data: 1.7900 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8001 (0.7500) acc1: 84.8958 (84.6547) acc5: 98.4375 (97.6982) time: 0.5599 data: 0.3581 max mem: 57114 Test: Total time: 0:00:02 (0.5830 s / it) * Acc@1 84.727 Acc@5 97.045 loss 0.830 Accuracy of the model on the 50000 test images: 84.7% Max accuracy: 84.79% Test: [0/5] eta: 0:00:11 loss: 0.6101 (0.6101) acc1: 85.9375 (85.9375) acc5: 96.8750 (96.8750) time: 2.3236 data: 2.0801 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6937 (0.6949) acc1: 81.7708 (80.1790) acc5: 96.8750 (94.7570) time: 0.6154 data: 0.4161 max mem: 57114 Test: Total time: 0:00:03 (0.6271 s / it) * Acc@1 78.153 Acc@5 94.126 loss 0.831 Accuracy of the model EMA on 50000 test images: 78.2% Max EMA accuracy: 78.15% Epoch: [291] [ 0/156] eta: 0:07:57 lr: 0.000021 min_lr: 0.000021 loss: 2.4369 (2.4369) weight_decay: 0.0500 (0.0500) time: 3.0620 data: 2.2816 max mem: 57114 Epoch: [291] [ 10/156] eta: 0:02:17 lr: 0.000021 min_lr: 0.000021 loss: 2.6194 (2.5349) weight_decay: 0.0500 (0.0500) time: 0.9391 data: 0.2077 max mem: 57114 Epoch: [291] [ 20/156] eta: 0:01:52 lr: 0.000021 min_lr: 0.000021 loss: 2.5129 (2.5072) weight_decay: 0.0500 (0.0500) time: 0.7191 data: 0.0003 max mem: 57114 Epoch: [291] [ 30/156] eta: 0:01:40 lr: 0.000021 min_lr: 0.000021 loss: 2.6319 (2.5125) weight_decay: 0.0500 (0.0500) time: 0.7198 data: 0.0003 max mem: 57114 Epoch: [291] [ 40/156] eta: 0:01:29 lr: 0.000020 min_lr: 0.000020 loss: 2.6650 (2.5147) weight_decay: 0.0500 (0.0500) time: 0.7097 data: 0.0004 max mem: 57114 Epoch: [291] [ 50/156] eta: 0:01:20 lr: 0.000020 min_lr: 0.000020 loss: 2.5581 (2.5249) weight_decay: 0.0500 (0.0500) time: 0.7074 data: 0.0003 max mem: 57114 Epoch: [291] [ 60/156] eta: 0:01:12 lr: 0.000020 min_lr: 0.000020 loss: 2.5581 (2.5072) weight_decay: 0.0500 (0.0500) time: 0.7224 data: 0.0004 max mem: 57114 Epoch: [291] [ 70/156] eta: 0:01:04 lr: 0.000019 min_lr: 0.000019 loss: 2.5337 (2.5052) weight_decay: 0.0500 (0.0500) time: 0.7189 data: 0.0004 max mem: 57114 Epoch: [291] [ 80/156] eta: 0:00:56 lr: 0.000019 min_lr: 0.000019 loss: 2.5068 (2.4891) weight_decay: 0.0500 (0.0500) time: 0.7219 data: 0.0004 max mem: 57114 Epoch: [291] [ 90/156] eta: 0:00:49 lr: 0.000019 min_lr: 0.000019 loss: 2.6655 (2.5205) weight_decay: 0.0500 (0.0500) time: 0.7170 data: 0.0004 max mem: 57114 Epoch: [291] [100/156] eta: 0:00:41 lr: 0.000019 min_lr: 0.000019 loss: 2.7381 (2.5045) weight_decay: 0.0500 (0.0500) time: 0.7049 data: 0.0004 max mem: 57114 Epoch: [291] [110/156] eta: 0:00:33 lr: 0.000018 min_lr: 0.000018 loss: 2.6517 (2.5078) weight_decay: 0.0500 (0.0500) time: 0.6979 data: 0.0004 max mem: 57114 Epoch: [291] [120/156] eta: 0:00:26 lr: 0.000018 min_lr: 0.000018 loss: 2.4140 (2.4954) weight_decay: 0.0500 (0.0500) time: 0.7041 data: 0.0004 max mem: 57114 Epoch: [291] [130/156] eta: 0:00:18 lr: 0.000018 min_lr: 0.000018 loss: 2.1917 (2.4872) weight_decay: 0.0500 (0.0500) time: 0.7043 data: 0.0008 max mem: 57114 Epoch: [291] [140/156] eta: 0:00:11 lr: 0.000018 min_lr: 0.000018 loss: 2.0702 (2.4630) weight_decay: 0.0500 (0.0500) time: 0.6968 data: 0.0007 max mem: 57114 Epoch: [291] [150/156] eta: 0:00:04 lr: 0.000017 min_lr: 0.000017 loss: 2.4423 (2.4762) weight_decay: 0.0500 (0.0500) time: 0.6918 data: 0.0001 max mem: 57114 Epoch: [291] [155/156] eta: 0:00:00 lr: 0.000017 min_lr: 0.000017 loss: 2.5906 (2.4753) weight_decay: 0.0500 (0.0500) time: 0.6864 data: 0.0001 max mem: 57114 Epoch: [291] Total time: 0:01:53 (0.7257 s / it) Averaged stats: lr: 0.000017 min_lr: 0.000017 loss: 2.5906 (2.5065) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5628 (0.5628) acc1: 90.6250 (90.6250) acc5: 98.4375 (98.4375) time: 2.1020 data: 1.8454 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7401 (0.6913) acc1: 83.8542 (84.6547) acc5: 98.4375 (97.4425) time: 0.5712 data: 0.3692 max mem: 57114 Test: Total time: 0:00:02 (0.5964 s / it) * Acc@1 84.777 Acc@5 97.083 loss 0.771 Accuracy of the model on the 50000 test images: 84.8% Max accuracy: 84.79% Test: [0/5] eta: 0:00:11 loss: 0.6089 (0.6089) acc1: 85.9375 (85.9375) acc5: 96.8750 (96.8750) time: 2.2280 data: 1.9842 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6926 (0.6939) acc1: 81.7708 (80.1790) acc5: 96.8750 (94.7570) time: 0.5963 data: 0.3969 max mem: 57114 Test: Total time: 0:00:03 (0.6071 s / it) * Acc@1 78.227 Acc@5 94.144 loss 0.829 Accuracy of the model EMA on 50000 test images: 78.2% Max EMA accuracy: 78.23% Epoch: [292] [ 0/156] eta: 0:06:39 lr: 0.000017 min_lr: 0.000017 loss: 3.2064 (3.2064) weight_decay: 0.0500 (0.0500) time: 2.5608 data: 1.9092 max mem: 57114 Epoch: [292] [ 10/156] eta: 0:02:09 lr: 0.000017 min_lr: 0.000017 loss: 2.7371 (2.6324) weight_decay: 0.0500 (0.0500) time: 0.8857 data: 0.1739 max mem: 57114 Epoch: [292] [ 20/156] eta: 0:01:49 lr: 0.000017 min_lr: 0.000017 loss: 2.4316 (2.4661) weight_decay: 0.0500 (0.0500) time: 0.7145 data: 0.0003 max mem: 57114 Epoch: [292] [ 30/156] eta: 0:01:37 lr: 0.000016 min_lr: 0.000016 loss: 2.4029 (2.4729) weight_decay: 0.0500 (0.0500) time: 0.7088 data: 0.0003 max mem: 57114 Epoch: [292] [ 40/156] eta: 0:01:27 lr: 0.000016 min_lr: 0.000016 loss: 2.6779 (2.5035) weight_decay: 0.0500 (0.0500) time: 0.7111 data: 0.0004 max mem: 57114 Epoch: [292] [ 50/156] eta: 0:01:20 lr: 0.000016 min_lr: 0.000016 loss: 2.6779 (2.4896) weight_decay: 0.0500 (0.0500) time: 0.7369 data: 0.0004 max mem: 57114 Epoch: [292] [ 60/156] eta: 0:01:12 lr: 0.000016 min_lr: 0.000016 loss: 2.5801 (2.5044) weight_decay: 0.0500 (0.0500) time: 0.7404 data: 0.0004 max mem: 57114 Epoch: [292] [ 70/156] eta: 0:01:04 lr: 0.000015 min_lr: 0.000015 loss: 2.4614 (2.4813) weight_decay: 0.0500 (0.0500) time: 0.7201 data: 0.0004 max mem: 57114 Epoch: [292] [ 80/156] eta: 0:00:56 lr: 0.000015 min_lr: 0.000015 loss: 2.3316 (2.4786) weight_decay: 0.0500 (0.0500) time: 0.7152 data: 0.0003 max mem: 57114 Epoch: [292] [ 90/156] eta: 0:00:48 lr: 0.000015 min_lr: 0.000015 loss: 2.4559 (2.4754) weight_decay: 0.0500 (0.0500) time: 0.7045 data: 0.0003 max mem: 57114 Epoch: [292] [100/156] eta: 0:00:41 lr: 0.000015 min_lr: 0.000015 loss: 2.4261 (2.4548) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.0003 max mem: 57114 Epoch: [292] [110/156] eta: 0:00:33 lr: 0.000014 min_lr: 0.000014 loss: 2.4106 (2.4661) weight_decay: 0.0500 (0.0500) time: 0.7001 data: 0.0003 max mem: 57114 Epoch: [292] [120/156] eta: 0:00:26 lr: 0.000014 min_lr: 0.000014 loss: 2.7057 (2.4797) weight_decay: 0.0500 (0.0500) time: 0.7026 data: 0.0004 max mem: 57114 Epoch: [292] [130/156] eta: 0:00:18 lr: 0.000014 min_lr: 0.000014 loss: 2.6765 (2.4927) weight_decay: 0.0500 (0.0500) time: 0.7081 data: 0.0009 max mem: 57114 Epoch: [292] [140/156] eta: 0:00:11 lr: 0.000014 min_lr: 0.000014 loss: 2.6741 (2.4971) weight_decay: 0.0500 (0.0500) time: 0.6971 data: 0.0007 max mem: 57114 Epoch: [292] [150/156] eta: 0:00:04 lr: 0.000013 min_lr: 0.000013 loss: 2.7464 (2.5149) weight_decay: 0.0500 (0.0500) time: 0.6836 data: 0.0001 max mem: 57114 Epoch: [292] [155/156] eta: 0:00:00 lr: 0.000013 min_lr: 0.000013 loss: 2.4193 (2.5025) weight_decay: 0.0500 (0.0500) time: 0.6780 data: 0.0001 max mem: 57114 Epoch: [292] Total time: 0:01:52 (0.7223 s / it) Averaged stats: lr: 0.000013 min_lr: 0.000013 loss: 2.4193 (2.5086) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5520 (0.5520) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.0270 data: 1.7706 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7297 (0.6767) acc1: 83.8542 (84.3990) acc5: 98.4375 (97.4425) time: 0.5561 data: 0.3542 max mem: 57114 Test: Total time: 0:00:02 (0.5787 s / it) * Acc@1 84.765 Acc@5 97.053 loss 0.758 Accuracy of the model on the 50000 test images: 84.8% Max accuracy: 84.79% Test: [0/5] eta: 0:00:11 loss: 0.6077 (0.6077) acc1: 85.9375 (85.9375) acc5: 96.8750 (96.8750) time: 2.3173 data: 2.0738 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6915 (0.6928) acc1: 81.7708 (80.1790) acc5: 96.8750 (94.7570) time: 0.6142 data: 0.4149 max mem: 57114 Test: Total time: 0:00:03 (0.6274 s / it) * Acc@1 78.257 Acc@5 94.160 loss 0.828 Accuracy of the model EMA on 50000 test images: 78.3% Max EMA accuracy: 78.26% Epoch: [293] [ 0/156] eta: 0:07:08 lr: 0.000013 min_lr: 0.000013 loss: 2.7907 (2.7907) weight_decay: 0.0500 (0.0500) time: 2.7471 data: 2.0949 max mem: 57114 Epoch: [293] [ 10/156] eta: 0:02:11 lr: 0.000013 min_lr: 0.000013 loss: 2.6522 (2.6252) weight_decay: 0.0500 (0.0500) time: 0.9038 data: 0.1908 max mem: 57114 Epoch: [293] [ 20/156] eta: 0:01:50 lr: 0.000013 min_lr: 0.000013 loss: 2.6381 (2.6263) weight_decay: 0.0500 (0.0500) time: 0.7120 data: 0.0004 max mem: 57114 Epoch: [293] [ 30/156] eta: 0:01:38 lr: 0.000013 min_lr: 0.000013 loss: 2.4811 (2.5331) weight_decay: 0.0500 (0.0500) time: 0.7092 data: 0.0004 max mem: 57114 Epoch: [293] [ 40/156] eta: 0:01:29 lr: 0.000012 min_lr: 0.000012 loss: 2.4811 (2.5411) weight_decay: 0.0500 (0.0500) time: 0.7335 data: 0.0004 max mem: 57114 Epoch: [293] [ 50/156] eta: 0:01:20 lr: 0.000012 min_lr: 0.000012 loss: 2.5444 (2.5122) weight_decay: 0.0500 (0.0500) time: 0.7412 data: 0.0004 max mem: 57114 Epoch: [293] [ 60/156] eta: 0:01:12 lr: 0.000012 min_lr: 0.000012 loss: 2.5444 (2.5222) weight_decay: 0.0500 (0.0500) time: 0.7336 data: 0.0004 max mem: 57114 Epoch: [293] [ 70/156] eta: 0:01:04 lr: 0.000012 min_lr: 0.000012 loss: 2.6275 (2.5305) weight_decay: 0.0500 (0.0500) time: 0.7192 data: 0.0004 max mem: 57114 Epoch: [293] [ 80/156] eta: 0:00:56 lr: 0.000012 min_lr: 0.000012 loss: 2.5172 (2.5248) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.0004 max mem: 57114 Epoch: [293] [ 90/156] eta: 0:00:48 lr: 0.000011 min_lr: 0.000011 loss: 2.4601 (2.4927) weight_decay: 0.0500 (0.0500) time: 0.7033 data: 0.0003 max mem: 57114 Epoch: [293] [100/156] eta: 0:00:41 lr: 0.000011 min_lr: 0.000011 loss: 2.2144 (2.4749) weight_decay: 0.0500 (0.0500) time: 0.7144 data: 0.0003 max mem: 57114 Epoch: [293] [110/156] eta: 0:00:33 lr: 0.000011 min_lr: 0.000011 loss: 2.5694 (2.4791) weight_decay: 0.0500 (0.0500) time: 0.7151 data: 0.0003 max mem: 57114 Epoch: [293] [120/156] eta: 0:00:26 lr: 0.000011 min_lr: 0.000011 loss: 2.6839 (2.4910) weight_decay: 0.0500 (0.0500) time: 0.7171 data: 0.0004 max mem: 57114 Epoch: [293] [130/156] eta: 0:00:19 lr: 0.000011 min_lr: 0.000011 loss: 2.6763 (2.4882) weight_decay: 0.0500 (0.0500) time: 0.7089 data: 0.0008 max mem: 57114 Epoch: [293] [140/156] eta: 0:00:11 lr: 0.000010 min_lr: 0.000010 loss: 2.4970 (2.4854) weight_decay: 0.0500 (0.0500) time: 0.6809 data: 0.0007 max mem: 57114 Epoch: [293] [150/156] eta: 0:00:04 lr: 0.000010 min_lr: 0.000010 loss: 2.3130 (2.4741) weight_decay: 0.0500 (0.0500) time: 0.6807 data: 0.0001 max mem: 57114 Epoch: [293] [155/156] eta: 0:00:00 lr: 0.000010 min_lr: 0.000010 loss: 2.4863 (2.4688) weight_decay: 0.0500 (0.0500) time: 0.6810 data: 0.0001 max mem: 57114 Epoch: [293] Total time: 0:01:53 (0.7258 s / it) Averaged stats: lr: 0.000010 min_lr: 0.000010 loss: 2.4863 (2.5083) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5093 (0.5093) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.0590 data: 1.8032 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6869 (0.6353) acc1: 84.3750 (84.7826) acc5: 98.4375 (97.5703) time: 0.5624 data: 0.3607 max mem: 57114 Test: Total time: 0:00:02 (0.5780 s / it) * Acc@1 84.779 Acc@5 97.073 loss 0.719 Accuracy of the model on the 50000 test images: 84.8% Max accuracy: 84.79% Test: [0/5] eta: 0:00:12 loss: 0.6063 (0.6063) acc1: 85.9375 (85.9375) acc5: 96.8750 (96.8750) time: 2.4231 data: 2.1795 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6903 (0.6915) acc1: 81.7708 (80.1790) acc5: 96.8750 (94.8849) time: 0.6352 data: 0.4360 max mem: 57114 Test: Total time: 0:00:03 (0.6449 s / it) * Acc@1 78.329 Acc@5 94.200 loss 0.826 Accuracy of the model EMA on 50000 test images: 78.3% Max EMA accuracy: 78.33% Epoch: [294] [ 0/156] eta: 0:06:41 lr: 0.000010 min_lr: 0.000010 loss: 2.0365 (2.0365) weight_decay: 0.0500 (0.0500) time: 2.5766 data: 1.9111 max mem: 57114 Epoch: [294] [ 10/156] eta: 0:02:07 lr: 0.000010 min_lr: 0.000010 loss: 2.3410 (2.2773) weight_decay: 0.0500 (0.0500) time: 0.8764 data: 0.1741 max mem: 57114 Epoch: [294] [ 20/156] eta: 0:01:49 lr: 0.000010 min_lr: 0.000010 loss: 2.4458 (2.4052) weight_decay: 0.0500 (0.0500) time: 0.7152 data: 0.0004 max mem: 57114 Epoch: [294] [ 30/156] eta: 0:01:38 lr: 0.000009 min_lr: 0.000009 loss: 2.5658 (2.4263) weight_decay: 0.0500 (0.0500) time: 0.7310 data: 0.0004 max mem: 57114 Epoch: [294] [ 40/156] eta: 0:01:28 lr: 0.000009 min_lr: 0.000009 loss: 2.5658 (2.4307) weight_decay: 0.0500 (0.0500) time: 0.7269 data: 0.0004 max mem: 57114 Epoch: [294] [ 50/156] eta: 0:01:20 lr: 0.000009 min_lr: 0.000009 loss: 2.5516 (2.4317) weight_decay: 0.0500 (0.0500) time: 0.7161 data: 0.0004 max mem: 57114 Epoch: [294] [ 60/156] eta: 0:01:12 lr: 0.000009 min_lr: 0.000009 loss: 2.5999 (2.4596) weight_decay: 0.0500 (0.0500) time: 0.7222 data: 0.0005 max mem: 57114 Epoch: [294] [ 70/156] eta: 0:01:03 lr: 0.000009 min_lr: 0.000009 loss: 2.6115 (2.4771) weight_decay: 0.0500 (0.0500) time: 0.7104 data: 0.0005 max mem: 57114 Epoch: [294] [ 80/156] eta: 0:00:56 lr: 0.000009 min_lr: 0.000009 loss: 2.6246 (2.4919) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.0004 max mem: 57114 Epoch: [294] [ 90/156] eta: 0:00:48 lr: 0.000008 min_lr: 0.000008 loss: 2.6375 (2.4988) weight_decay: 0.0500 (0.0500) time: 0.7121 data: 0.0004 max mem: 57114 Epoch: [294] [100/156] eta: 0:00:41 lr: 0.000008 min_lr: 0.000008 loss: 2.6510 (2.5073) weight_decay: 0.0500 (0.0500) time: 0.7042 data: 0.0004 max mem: 57114 Epoch: [294] [110/156] eta: 0:00:33 lr: 0.000008 min_lr: 0.000008 loss: 2.7255 (2.5110) weight_decay: 0.0500 (0.0500) time: 0.6999 data: 0.0004 max mem: 57114 Epoch: [294] [120/156] eta: 0:00:26 lr: 0.000008 min_lr: 0.000008 loss: 2.6268 (2.5038) weight_decay: 0.0500 (0.0500) time: 0.7075 data: 0.0004 max mem: 57114 Epoch: [294] [130/156] eta: 0:00:18 lr: 0.000008 min_lr: 0.000008 loss: 2.5542 (2.5035) weight_decay: 0.0500 (0.0500) time: 0.7034 data: 0.0010 max mem: 57114 Epoch: [294] [140/156] eta: 0:00:11 lr: 0.000008 min_lr: 0.000008 loss: 2.6635 (2.5159) weight_decay: 0.0500 (0.0500) time: 0.6935 data: 0.0008 max mem: 57114 Epoch: [294] [150/156] eta: 0:00:04 lr: 0.000007 min_lr: 0.000007 loss: 2.6635 (2.5126) weight_decay: 0.0500 (0.0500) time: 0.6879 data: 0.0001 max mem: 57114 Epoch: [294] [155/156] eta: 0:00:00 lr: 0.000007 min_lr: 0.000007 loss: 2.6635 (2.5195) weight_decay: 0.0500 (0.0500) time: 0.6839 data: 0.0001 max mem: 57114 Epoch: [294] Total time: 0:01:52 (0.7220 s / it) Averaged stats: lr: 0.000007 min_lr: 0.000007 loss: 2.6635 (2.5020) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5924 (0.5924) acc1: 90.6250 (90.6250) acc5: 98.4375 (98.4375) time: 2.0959 data: 1.8399 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7761 (0.7191) acc1: 84.3750 (84.7826) acc5: 98.4375 (97.4425) time: 0.5699 data: 0.3680 max mem: 57114 Test: Total time: 0:00:02 (0.5927 s / it) * Acc@1 84.833 Acc@5 97.059 loss 0.799 Accuracy of the model on the 50000 test images: 84.8% Max accuracy: 84.83% Test: [0/5] eta: 0:00:10 loss: 0.6050 (0.6050) acc1: 85.9375 (85.9375) acc5: 96.8750 (96.8750) time: 2.0136 data: 1.7700 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6892 (0.6904) acc1: 81.7708 (80.1790) acc5: 96.8750 (94.8849) time: 0.5534 data: 0.3541 max mem: 57114 Test: Total time: 0:00:02 (0.5639 s / it) * Acc@1 78.399 Acc@5 94.252 loss 0.824 Accuracy of the model EMA on 50000 test images: 78.4% Max EMA accuracy: 78.40% Epoch: [295] [ 0/156] eta: 0:06:52 lr: 0.000007 min_lr: 0.000007 loss: 2.3895 (2.3895) weight_decay: 0.0500 (0.0500) time: 2.6443 data: 1.8387 max mem: 57114 Epoch: [295] [ 10/156] eta: 0:02:10 lr: 0.000007 min_lr: 0.000007 loss: 2.6025 (2.6275) weight_decay: 0.0500 (0.0500) time: 0.8933 data: 0.1674 max mem: 57114 Epoch: [295] [ 20/156] eta: 0:01:51 lr: 0.000007 min_lr: 0.000007 loss: 2.6643 (2.5480) weight_decay: 0.0500 (0.0500) time: 0.7296 data: 0.0003 max mem: 57114 Epoch: [295] [ 30/156] eta: 0:01:39 lr: 0.000007 min_lr: 0.000007 loss: 2.5019 (2.5094) weight_decay: 0.0500 (0.0500) time: 0.7289 data: 0.0004 max mem: 57114 Epoch: [295] [ 40/156] eta: 0:01:30 lr: 0.000007 min_lr: 0.000007 loss: 2.5482 (2.5102) weight_decay: 0.0500 (0.0500) time: 0.7367 data: 0.0004 max mem: 57114 Epoch: [295] [ 50/156] eta: 0:01:21 lr: 0.000007 min_lr: 0.000007 loss: 2.5628 (2.4941) weight_decay: 0.0500 (0.0500) time: 0.7491 data: 0.0004 max mem: 57114 Epoch: [295] [ 60/156] eta: 0:01:13 lr: 0.000006 min_lr: 0.000006 loss: 2.6078 (2.5075) weight_decay: 0.0500 (0.0500) time: 0.7249 data: 0.0004 max mem: 57114 Epoch: [295] [ 70/156] eta: 0:01:05 lr: 0.000006 min_lr: 0.000006 loss: 2.6078 (2.4854) weight_decay: 0.0500 (0.0500) time: 0.7195 data: 0.0004 max mem: 57114 Epoch: [295] [ 80/156] eta: 0:00:57 lr: 0.000006 min_lr: 0.000006 loss: 2.6119 (2.4894) weight_decay: 0.0500 (0.0500) time: 0.7305 data: 0.0003 max mem: 57114 Epoch: [295] [ 90/156] eta: 0:00:49 lr: 0.000006 min_lr: 0.000006 loss: 2.6125 (2.4959) weight_decay: 0.0500 (0.0500) time: 0.7178 data: 0.0004 max mem: 57114 Epoch: [295] [100/156] eta: 0:00:41 lr: 0.000006 min_lr: 0.000006 loss: 2.6881 (2.5095) weight_decay: 0.0500 (0.0500) time: 0.7056 data: 0.0004 max mem: 57114 Epoch: [295] [110/156] eta: 0:00:34 lr: 0.000006 min_lr: 0.000006 loss: 2.6908 (2.4989) weight_decay: 0.0500 (0.0500) time: 0.7148 data: 0.0004 max mem: 57114 Epoch: [295] [120/156] eta: 0:00:26 lr: 0.000006 min_lr: 0.000006 loss: 2.5974 (2.4996) weight_decay: 0.0500 (0.0500) time: 0.7151 data: 0.0004 max mem: 57114 Epoch: [295] [130/156] eta: 0:00:19 lr: 0.000005 min_lr: 0.000005 loss: 2.6427 (2.5104) weight_decay: 0.0500 (0.0500) time: 0.7011 data: 0.0008 max mem: 57114 Epoch: [295] [140/156] eta: 0:00:11 lr: 0.000005 min_lr: 0.000005 loss: 2.6487 (2.5094) weight_decay: 0.0500 (0.0500) time: 0.6893 data: 0.0007 max mem: 57114 Epoch: [295] [150/156] eta: 0:00:04 lr: 0.000005 min_lr: 0.000005 loss: 2.5977 (2.5141) weight_decay: 0.0500 (0.0500) time: 0.6813 data: 0.0001 max mem: 57114 Epoch: [295] [155/156] eta: 0:00:00 lr: 0.000005 min_lr: 0.000005 loss: 2.7192 (2.5177) weight_decay: 0.0500 (0.0500) time: 0.6828 data: 0.0001 max mem: 57114 Epoch: [295] Total time: 0:01:53 (0.7300 s / it) Averaged stats: lr: 0.000005 min_lr: 0.000005 loss: 2.7192 (2.5041) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:11 loss: 0.6084 (0.6084) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.2014 data: 1.9454 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7823 (0.7332) acc1: 84.8958 (84.6547) acc5: 98.4375 (97.3146) time: 0.5910 data: 0.3892 max mem: 57114 Test: Total time: 0:00:03 (0.6130 s / it) * Acc@1 84.675 Acc@5 97.027 loss 0.813 Accuracy of the model on the 50000 test images: 84.7% Max accuracy: 84.83% Test: [0/5] eta: 0:00:11 loss: 0.6035 (0.6035) acc1: 85.9375 (85.9375) acc5: 96.8750 (96.8750) time: 2.3486 data: 2.1051 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6878 (0.6891) acc1: 81.7708 (80.3069) acc5: 96.8750 (94.7570) time: 0.6204 data: 0.4211 max mem: 57114 Test: Total time: 0:00:03 (0.6354 s / it) * Acc@1 78.487 Acc@5 94.279 loss 0.822 Accuracy of the model EMA on 50000 test images: 78.5% Max EMA accuracy: 78.49% Epoch: [296] [ 0/156] eta: 0:07:07 lr: 0.000005 min_lr: 0.000005 loss: 2.4646 (2.4646) weight_decay: 0.0500 (0.0500) time: 2.7417 data: 2.0795 max mem: 57114 Epoch: [296] [ 10/156] eta: 0:02:11 lr: 0.000005 min_lr: 0.000005 loss: 2.4646 (2.5019) weight_decay: 0.0500 (0.0500) time: 0.8987 data: 0.1893 max mem: 57114 Epoch: [296] [ 20/156] eta: 0:01:51 lr: 0.000005 min_lr: 0.000005 loss: 2.6728 (2.6178) weight_decay: 0.0500 (0.0500) time: 0.7212 data: 0.0003 max mem: 57114 Epoch: [296] [ 30/156] eta: 0:01:39 lr: 0.000005 min_lr: 0.000005 loss: 2.6437 (2.5451) weight_decay: 0.0500 (0.0500) time: 0.7331 data: 0.0003 max mem: 57114 Epoch: [296] [ 40/156] eta: 0:01:29 lr: 0.000005 min_lr: 0.000005 loss: 2.4283 (2.5375) weight_decay: 0.0500 (0.0500) time: 0.7274 data: 0.0004 max mem: 57114 Epoch: [296] [ 50/156] eta: 0:01:21 lr: 0.000004 min_lr: 0.000004 loss: 2.6016 (2.5368) weight_decay: 0.0500 (0.0500) time: 0.7222 data: 0.0004 max mem: 57114 Epoch: [296] [ 60/156] eta: 0:01:12 lr: 0.000004 min_lr: 0.000004 loss: 2.5970 (2.5297) weight_decay: 0.0500 (0.0500) time: 0.7159 data: 0.0004 max mem: 57114 Epoch: [296] [ 70/156] eta: 0:01:04 lr: 0.000004 min_lr: 0.000004 loss: 2.5120 (2.5103) weight_decay: 0.0500 (0.0500) time: 0.7094 data: 0.0004 max mem: 57114 Epoch: [296] [ 80/156] eta: 0:00:56 lr: 0.000004 min_lr: 0.000004 loss: 2.4565 (2.5058) weight_decay: 0.0500 (0.0500) time: 0.7102 data: 0.0004 max mem: 57114 Epoch: [296] [ 90/156] eta: 0:00:48 lr: 0.000004 min_lr: 0.000004 loss: 2.5497 (2.5064) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.0004 max mem: 57114 Epoch: [296] [100/156] eta: 0:00:41 lr: 0.000004 min_lr: 0.000004 loss: 2.4261 (2.5003) weight_decay: 0.0500 (0.0500) time: 0.7112 data: 0.0004 max mem: 57114 Epoch: [296] [110/156] eta: 0:00:33 lr: 0.000004 min_lr: 0.000004 loss: 2.6450 (2.5150) weight_decay: 0.0500 (0.0500) time: 0.7086 data: 0.0004 max mem: 57114 Epoch: [296] [120/156] eta: 0:00:26 lr: 0.000004 min_lr: 0.000004 loss: 2.6711 (2.5222) weight_decay: 0.0500 (0.0500) time: 0.7046 data: 0.0004 max mem: 57114 Epoch: [296] [130/156] eta: 0:00:18 lr: 0.000004 min_lr: 0.000004 loss: 2.4670 (2.5039) weight_decay: 0.0500 (0.0500) time: 0.6952 data: 0.0009 max mem: 57114 Epoch: [296] [140/156] eta: 0:00:11 lr: 0.000003 min_lr: 0.000003 loss: 2.5317 (2.5099) weight_decay: 0.0500 (0.0500) time: 0.6850 data: 0.0007 max mem: 57114 Epoch: [296] [150/156] eta: 0:00:04 lr: 0.000003 min_lr: 0.000003 loss: 2.5604 (2.5054) weight_decay: 0.0500 (0.0500) time: 0.6849 data: 0.0001 max mem: 57114 Epoch: [296] [155/156] eta: 0:00:00 lr: 0.000003 min_lr: 0.000003 loss: 2.5956 (2.5122) weight_decay: 0.0500 (0.0500) time: 0.6834 data: 0.0001 max mem: 57114 Epoch: [296] Total time: 0:01:52 (0.7234 s / it) Averaged stats: lr: 0.000003 min_lr: 0.000003 loss: 2.5956 (2.5032) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6213 (0.6213) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.0709 data: 1.8149 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8005 (0.7461) acc1: 84.3750 (84.5269) acc5: 98.4375 (97.4425) time: 0.5649 data: 0.3631 max mem: 57114 Test: Total time: 0:00:02 (0.5871 s / it) * Acc@1 84.717 Acc@5 97.043 loss 0.824 Accuracy of the model on the 50000 test images: 84.7% Max accuracy: 84.83% Test: [0/5] eta: 0:00:11 loss: 0.6021 (0.6021) acc1: 85.9375 (85.9375) acc5: 96.8750 (96.8750) time: 2.3107 data: 2.0672 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6866 (0.6878) acc1: 81.7708 (80.4348) acc5: 96.8750 (94.7570) time: 0.6128 data: 0.4135 max mem: 57114 Test: Total time: 0:00:03 (0.6284 s / it) * Acc@1 78.575 Acc@5 94.313 loss 0.820 Accuracy of the model EMA on 50000 test images: 78.6% Max EMA accuracy: 78.57% Epoch: [297] [ 0/156] eta: 0:07:23 lr: 0.000003 min_lr: 0.000003 loss: 2.9631 (2.9631) weight_decay: 0.0500 (0.0500) time: 2.8439 data: 2.1893 max mem: 57114 Epoch: [297] [ 10/156] eta: 0:02:13 lr: 0.000003 min_lr: 0.000003 loss: 2.6833 (2.6589) weight_decay: 0.0500 (0.0500) time: 0.9155 data: 0.1993 max mem: 57114 Epoch: [297] [ 20/156] eta: 0:01:53 lr: 0.000003 min_lr: 0.000003 loss: 2.5248 (2.5060) weight_decay: 0.0500 (0.0500) time: 0.7354 data: 0.0003 max mem: 57114 Epoch: [297] [ 30/156] eta: 0:01:40 lr: 0.000003 min_lr: 0.000003 loss: 2.4723 (2.5361) weight_decay: 0.0500 (0.0500) time: 0.7321 data: 0.0003 max mem: 57114 Epoch: [297] [ 40/156] eta: 0:01:30 lr: 0.000003 min_lr: 0.000003 loss: 2.7450 (2.5773) weight_decay: 0.0500 (0.0500) time: 0.7173 data: 0.0003 max mem: 57114 Epoch: [297] [ 50/156] eta: 0:01:21 lr: 0.000003 min_lr: 0.000003 loss: 2.6115 (2.5466) weight_decay: 0.0500 (0.0500) time: 0.7134 data: 0.0004 max mem: 57114 Epoch: [297] [ 60/156] eta: 0:01:12 lr: 0.000003 min_lr: 0.000003 loss: 2.5492 (2.5281) weight_decay: 0.0500 (0.0500) time: 0.7117 data: 0.0004 max mem: 57114 Epoch: [297] [ 70/156] eta: 0:01:04 lr: 0.000003 min_lr: 0.000003 loss: 2.5492 (2.5340) weight_decay: 0.0500 (0.0500) time: 0.7212 data: 0.0004 max mem: 57114 Epoch: [297] [ 80/156] eta: 0:00:56 lr: 0.000003 min_lr: 0.000003 loss: 2.6727 (2.5355) weight_decay: 0.0500 (0.0500) time: 0.7248 data: 0.0004 max mem: 57114 Epoch: [297] [ 90/156] eta: 0:00:49 lr: 0.000002 min_lr: 0.000002 loss: 2.5943 (2.5264) weight_decay: 0.0500 (0.0500) time: 0.7122 data: 0.0004 max mem: 57114 Epoch: [297] [100/156] eta: 0:00:41 lr: 0.000002 min_lr: 0.000002 loss: 2.5025 (2.5207) weight_decay: 0.0500 (0.0500) time: 0.7048 data: 0.0003 max mem: 57114 Epoch: [297] [110/156] eta: 0:00:33 lr: 0.000002 min_lr: 0.000002 loss: 2.5011 (2.5104) weight_decay: 0.0500 (0.0500) time: 0.7133 data: 0.0003 max mem: 57114 Epoch: [297] [120/156] eta: 0:00:26 lr: 0.000002 min_lr: 0.000002 loss: 2.6513 (2.5223) weight_decay: 0.0500 (0.0500) time: 0.7060 data: 0.0004 max mem: 57114 Epoch: [297] [130/156] eta: 0:00:19 lr: 0.000002 min_lr: 0.000002 loss: 2.6333 (2.5245) weight_decay: 0.0500 (0.0500) time: 0.6983 data: 0.0008 max mem: 57114 Epoch: [297] [140/156] eta: 0:00:11 lr: 0.000002 min_lr: 0.000002 loss: 2.5670 (2.5290) weight_decay: 0.0500 (0.0500) time: 0.6947 data: 0.0007 max mem: 57114 Epoch: [297] [150/156] eta: 0:00:04 lr: 0.000002 min_lr: 0.000002 loss: 2.4846 (2.5101) weight_decay: 0.0500 (0.0500) time: 0.6852 data: 0.0001 max mem: 57114 Epoch: [297] [155/156] eta: 0:00:00 lr: 0.000002 min_lr: 0.000002 loss: 2.4130 (2.5068) weight_decay: 0.0500 (0.0500) time: 0.6832 data: 0.0001 max mem: 57114 Epoch: [297] Total time: 0:01:53 (0.7264 s / it) Averaged stats: lr: 0.000002 min_lr: 0.000002 loss: 2.4130 (2.5037) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.5424 (0.5424) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.0932 data: 1.8369 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7229 (0.6704) acc1: 83.8542 (84.5269) acc5: 98.4375 (97.4425) time: 0.5695 data: 0.3675 max mem: 57114 Test: Total time: 0:00:02 (0.5927 s / it) * Acc@1 84.785 Acc@5 97.079 loss 0.750 Accuracy of the model on the 50000 test images: 84.8% Max accuracy: 84.83% Test: [0/5] eta: 0:00:11 loss: 0.6005 (0.6005) acc1: 85.9375 (85.9375) acc5: 96.8750 (96.8750) time: 2.2689 data: 2.0252 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6853 (0.6864) acc1: 81.7708 (80.5627) acc5: 96.8750 (94.7570) time: 0.6045 data: 0.4051 max mem: 57114 Test: Total time: 0:00:03 (0.6171 s / it) * Acc@1 78.645 Acc@5 94.337 loss 0.818 Accuracy of the model EMA on 50000 test images: 78.6% Max EMA accuracy: 78.64% Epoch: [298] [ 0/156] eta: 0:07:43 lr: 0.000002 min_lr: 0.000002 loss: 3.0060 (3.0060) weight_decay: 0.0500 (0.0500) time: 2.9698 data: 2.3232 max mem: 57114 Epoch: [298] [ 10/156] eta: 0:02:14 lr: 0.000002 min_lr: 0.000002 loss: 2.3719 (2.3607) weight_decay: 0.0500 (0.0500) time: 0.9244 data: 0.2115 max mem: 57114 Epoch: [298] [ 20/156] eta: 0:01:53 lr: 0.000002 min_lr: 0.000002 loss: 2.2991 (2.3634) weight_decay: 0.0500 (0.0500) time: 0.7256 data: 0.0003 max mem: 57114 Epoch: [298] [ 30/156] eta: 0:01:39 lr: 0.000002 min_lr: 0.000002 loss: 2.5927 (2.4091) weight_decay: 0.0500 (0.0500) time: 0.7170 data: 0.0003 max mem: 57114 Epoch: [298] [ 40/156] eta: 0:01:29 lr: 0.000002 min_lr: 0.000002 loss: 2.6478 (2.4383) weight_decay: 0.0500 (0.0500) time: 0.7096 data: 0.0004 max mem: 57114 Epoch: [298] [ 50/156] eta: 0:01:21 lr: 0.000002 min_lr: 0.000002 loss: 2.6122 (2.4473) weight_decay: 0.0500 (0.0500) time: 0.7281 data: 0.0004 max mem: 57114 Epoch: [298] [ 60/156] eta: 0:01:12 lr: 0.000002 min_lr: 0.000002 loss: 2.5659 (2.4547) weight_decay: 0.0500 (0.0500) time: 0.7326 data: 0.0004 max mem: 57114 Epoch: [298] [ 70/156] eta: 0:01:04 lr: 0.000002 min_lr: 0.000002 loss: 2.5010 (2.4386) weight_decay: 0.0500 (0.0500) time: 0.7198 data: 0.0004 max mem: 57114 Epoch: [298] [ 80/156] eta: 0:00:56 lr: 0.000002 min_lr: 0.000002 loss: 2.6040 (2.4529) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.0004 max mem: 57114 Epoch: [298] [ 90/156] eta: 0:00:48 lr: 0.000002 min_lr: 0.000002 loss: 2.7497 (2.4752) weight_decay: 0.0500 (0.0500) time: 0.7043 data: 0.0004 max mem: 57114 Epoch: [298] [100/156] eta: 0:00:41 lr: 0.000001 min_lr: 0.000001 loss: 2.6637 (2.4762) weight_decay: 0.0500 (0.0500) time: 0.7046 data: 0.0004 max mem: 57114 Epoch: [298] [110/156] eta: 0:00:33 lr: 0.000001 min_lr: 0.000001 loss: 2.5131 (2.4739) weight_decay: 0.0500 (0.0500) time: 0.7037 data: 0.0004 max mem: 57114 Epoch: [298] [120/156] eta: 0:00:26 lr: 0.000001 min_lr: 0.000001 loss: 2.5660 (2.4840) weight_decay: 0.0500 (0.0500) time: 0.7011 data: 0.0004 max mem: 57114 Epoch: [298] [130/156] eta: 0:00:18 lr: 0.000001 min_lr: 0.000001 loss: 2.5683 (2.4732) weight_decay: 0.0500 (0.0500) time: 0.6988 data: 0.0009 max mem: 57114 Epoch: [298] [140/156] eta: 0:00:11 lr: 0.000001 min_lr: 0.000001 loss: 2.4904 (2.4752) weight_decay: 0.0500 (0.0500) time: 0.6933 data: 0.0007 max mem: 57114 Epoch: [298] [150/156] eta: 0:00:04 lr: 0.000001 min_lr: 0.000001 loss: 2.6625 (2.4878) weight_decay: 0.0500 (0.0500) time: 0.6858 data: 0.0001 max mem: 57114 Epoch: [298] [155/156] eta: 0:00:00 lr: 0.000001 min_lr: 0.000001 loss: 2.6963 (2.4966) weight_decay: 0.0500 (0.0500) time: 0.6850 data: 0.0001 max mem: 57114 Epoch: [298] Total time: 0:01:53 (0.7253 s / it) Averaged stats: lr: 0.000001 min_lr: 0.000001 loss: 2.6963 (2.5021) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6530 (0.6530) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.0908 data: 1.8350 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.8257 (0.7780) acc1: 84.3750 (84.5269) acc5: 98.4375 (97.4425) time: 0.5688 data: 0.3671 max mem: 57114 Test: Total time: 0:00:02 (0.5904 s / it) * Acc@1 84.745 Acc@5 97.051 loss 0.854 Accuracy of the model on the 50000 test images: 84.7% Max accuracy: 84.83% Test: [0/5] eta: 0:00:11 loss: 0.5989 (0.5989) acc1: 85.9375 (85.9375) acc5: 96.8750 (96.8750) time: 2.3525 data: 2.1089 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6839 (0.6850) acc1: 81.7708 (80.4348) acc5: 96.8750 (94.7570) time: 0.6212 data: 0.4219 max mem: 57114 Test: Total time: 0:00:03 (0.6348 s / it) * Acc@1 78.698 Acc@5 94.385 loss 0.816 Accuracy of the model EMA on 50000 test images: 78.7% Max EMA accuracy: 78.70% Epoch: [299] [ 0/156] eta: 0:09:19 lr: 0.000001 min_lr: 0.000001 loss: 2.8604 (2.8604) weight_decay: 0.0500 (0.0500) time: 3.5871 data: 2.9147 max mem: 57114 Epoch: [299] [ 10/156] eta: 0:02:23 lr: 0.000001 min_lr: 0.000001 loss: 2.5645 (2.4544) weight_decay: 0.0500 (0.0500) time: 0.9834 data: 0.2653 max mem: 57114 Epoch: [299] [ 20/156] eta: 0:01:57 lr: 0.000001 min_lr: 0.000001 loss: 2.5580 (2.4386) weight_decay: 0.0500 (0.0500) time: 0.7282 data: 0.0003 max mem: 57114 Epoch: [299] [ 30/156] eta: 0:01:43 lr: 0.000001 min_lr: 0.000001 loss: 2.6423 (2.4805) weight_decay: 0.0500 (0.0500) time: 0.7307 data: 0.0004 max mem: 57114 Epoch: [299] [ 40/156] eta: 0:01:32 lr: 0.000001 min_lr: 0.000001 loss: 2.4540 (2.4560) weight_decay: 0.0500 (0.0500) time: 0.7249 data: 0.0004 max mem: 57114 Epoch: [299] [ 50/156] eta: 0:01:23 lr: 0.000001 min_lr: 0.000001 loss: 2.4540 (2.4715) weight_decay: 0.0500 (0.0500) time: 0.7302 data: 0.0005 max mem: 57114 Epoch: [299] [ 60/156] eta: 0:01:14 lr: 0.000001 min_lr: 0.000001 loss: 2.5515 (2.4447) weight_decay: 0.0500 (0.0500) time: 0.7334 data: 0.0004 max mem: 57114 Epoch: [299] [ 70/156] eta: 0:01:05 lr: 0.000001 min_lr: 0.000001 loss: 2.5515 (2.4604) weight_decay: 0.0500 (0.0500) time: 0.7214 data: 0.0004 max mem: 57114 Epoch: [299] [ 80/156] eta: 0:00:57 lr: 0.000001 min_lr: 0.000001 loss: 2.6335 (2.4607) weight_decay: 0.0500 (0.0500) time: 0.7101 data: 0.0004 max mem: 57114 Epoch: [299] [ 90/156] eta: 0:00:49 lr: 0.000001 min_lr: 0.000001 loss: 2.4350 (2.4426) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0004 max mem: 57114 Epoch: [299] [100/156] eta: 0:00:41 lr: 0.000001 min_lr: 0.000001 loss: 2.3645 (2.4381) weight_decay: 0.0500 (0.0500) time: 0.7074 data: 0.0004 max mem: 57114 Epoch: [299] [110/156] eta: 0:00:34 lr: 0.000001 min_lr: 0.000001 loss: 2.4013 (2.4403) weight_decay: 0.0500 (0.0500) time: 0.7046 data: 0.0004 max mem: 57114 Epoch: [299] [120/156] eta: 0:00:26 lr: 0.000001 min_lr: 0.000001 loss: 2.6522 (2.4513) weight_decay: 0.0500 (0.0500) time: 0.7140 data: 0.0004 max mem: 57114 Epoch: [299] [130/156] eta: 0:00:19 lr: 0.000001 min_lr: 0.000001 loss: 2.6739 (2.4640) weight_decay: 0.0500 (0.0500) time: 0.7120 data: 0.0009 max mem: 57114 Epoch: [299] [140/156] eta: 0:00:11 lr: 0.000001 min_lr: 0.000001 loss: 2.6540 (2.4751) weight_decay: 0.0500 (0.0500) time: 0.6906 data: 0.0007 max mem: 57114 Epoch: [299] [150/156] eta: 0:00:04 lr: 0.000001 min_lr: 0.000001 loss: 2.7069 (2.4901) weight_decay: 0.0500 (0.0500) time: 0.6819 data: 0.0001 max mem: 57114 Epoch: [299] [155/156] eta: 0:00:00 lr: 0.000001 min_lr: 0.000001 loss: 2.7069 (2.4889) weight_decay: 0.0500 (0.0500) time: 0.6845 data: 0.0001 max mem: 57114 Epoch: [299] Total time: 0:01:54 (0.7335 s / it) Averaged stats: lr: 0.000001 min_lr: 0.000001 loss: 2.7069 (2.5071) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:10 loss: 0.6196 (0.6196) acc1: 90.1042 (90.1042) acc5: 98.4375 (98.4375) time: 2.0909 data: 1.8353 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.7903 (0.7446) acc1: 84.3750 (84.5269) acc5: 98.4375 (97.3146) time: 0.5688 data: 0.3671 max mem: 57114 Test: Total time: 0:00:02 (0.5928 s / it) * Acc@1 84.637 Acc@5 97.023 loss 0.823 Accuracy of the model on the 50000 test images: 84.6% Max accuracy: 84.83% Test: [0/5] eta: 0:00:11 loss: 0.5974 (0.5974) acc1: 85.9375 (85.9375) acc5: 96.8750 (96.8750) time: 2.3034 data: 2.0599 max mem: 57114 Test: [4/5] eta: 0:00:00 loss: 0.6825 (0.6835) acc1: 81.7708 (80.4348) acc5: 96.8750 (94.7570) time: 0.6113 data: 0.4121 max mem: 57114 Test: Total time: 0:00:03 (0.6266 s / it) * Acc@1 78.760 Acc@5 94.427 loss 0.814 Accuracy of the model EMA on 50000 test images: 78.8% Max EMA accuracy: 78.76% Training time 10:47:47