diff --git "a/baseline/8c423e94-c6fb-479d-ae6e-c9a673b14135.txt" "b/baseline/8c423e94-c6fb-479d-ae6e-c9a673b14135.txt" new file mode 100644--- /dev/null +++ "b/baseline/8c423e94-c6fb-479d-ae6e-c9a673b14135.txt" @@ -0,0 +1,3809 @@ +==================================================================================================== +import os +import sys +with open(sys.argv[0]) as f: + code = f.read() # read the code of this file ASAP, for logging +import uuid +import glob +import time +from dataclasses import dataclass + +import numpy as np +import torch +from torch import nn +import torch.nn.functional as F +import torch.distributed as dist +import torch._inductor.config as config +from torch.nn.parallel import DistributedDataParallel as DDP + +# ----------------------------------------------------------------------------- +# Muon optimizer + +def zeropower_via_svd(G, steps=None): + U, S, V = G.svd() + return U @ V.T + +@torch.compile +def zeropower_via_newtonschulz5(G, steps=10, eps=1e-7): + """ + Newton-Schulz iteration to compute the zeroth power / orthogonalization of G. We opt to use a + quintic iteration whose coefficients are selected to maximize the slope at zero. For the purpose + of minimizing steps, it turns out to be empirically effective to keep increasing the slope at + zero even beyond the point where the iteration no longer converges all the way to one everywhere + on the interval. This iteration therefore does not produce UV^T but rather something like US'V^T + where S' is diagonal with S_{ii}' \sim Uniform(0.5, 1.5), which turns out not to hurt model + performance at all relative to UV^T, where USV^T = G is the SVD. + """ + assert len(G.shape) == 2 + a, b, c = (3.4445, -4.7750, 2.0315) + X = G.bfloat16() + X /= (X.norm() + eps) # ensure top singular value <= 1 + if G.size(0) > G.size(1): + X = X.T + for _ in range(steps): + A = X @ X.T + B = A @ X + X = a * X + b * B + c * A @ B + if G.size(0) > G.size(1): + X = X.T + return X + +zeropower_backends = dict(svd=zeropower_via_svd, newtonschulz5=zeropower_via_newtonschulz5) + +class Muon(torch.optim.Optimizer): + """ + Muon - MomentUm Orthogonalized by Newton-schulz + + Muon internally runs standard SGD-momentum, and then performs an orthogonalization post- + processing step, in which each 2D parameter's update is replaced with the nearest orthogonal + matrix. To efficiently orthogonalize each update, we use a Newton-Schulz iteration, which has + the advantage that it can be stably run in bfloat16 on the GPU. + + Some warnings: + - This optimizer assumes that all parameters passed in are 2D. + - It should not be used for the embedding layer, the final fully connected layer, or any {0,1}-D + parameters; those should all be optimized by a standard method (e.g., AdamW). + - To use it with 4D convolutional filters, it works well to just flatten their last 3 dimensions. + - We believe it is unlikely to work well for training with small batch size. + - We believe it may not work well for finetuning pretrained models, but we haven't tested this. + - We have not yet tried this optimizer for training scenarios larger than NanoGPT (124M). + + Arguments: + lr: The learning rate used by the internal SGD. + momentum: The momentum used by the internal SGD. + nesterov: Whether to use Nesterov-style momentum in the internal SGD. (recommended) + backend: The chosen backend for the orthogonalization step. (recommended: 'newtonschulz5') + backend_steps: The number of iteration steps to use in the backend, if it is iterative. + """ + def __init__(self, params, lr=0.02, momentum=0.95, nesterov=True, + backend='newtonschulz5', backend_steps=5): + defaults = dict(lr=lr, momentum=momentum, nesterov=nesterov, backend=backend, backend_steps=backend_steps) + super().__init__(params, defaults) + + def step(self): + + for group in self.param_groups: + + lr = group['lr'] + momentum = group['momentum'] + zeropower_backend = zeropower_backends[group['backend']] + + # generate weight updates in distributed fashion + total_params = sum(p.numel() for p in group['params']) + updates_flat = torch.zeros(total_params, device='cuda', dtype=torch.bfloat16) + curr_idx = 0 + for i, p in enumerate(group['params']): + # luckily this will perfectly distribute a transformer with multiple of 4 layers to 8 GPUs + if i % int(os.environ['WORLD_SIZE']) == int(os.environ['RANK']): + g = p.grad + assert g is not None + state = self.state[p] + if 'momentum_buffer' not in state: + state['momentum_buffer'] = torch.zeros_like(g) + buf = state['momentum_buffer'] + buf.mul_(momentum).add_(g) + if group['nesterov']: + g = g.add(buf, alpha=momentum) + g = zeropower_backend(g, steps=group['backend_steps']) + g *= max(1, g.size(0)/g.size(1))**0.5 + updates_flat[curr_idx:curr_idx+p.numel()] = g.flatten() + curr_idx += p.numel() + + # sync updates across devices. we are not memory-constrained so can do this simple deserialization + dist.all_reduce(updates_flat, op=dist.ReduceOp.SUM) + + # deserialize and apply updates + curr_idx = 0 + for p in group['params']: + g = updates_flat[curr_idx:curr_idx+p.numel()].view_as(p.data).type_as(p.data) + p.data.add_(g, alpha=-lr) + curr_idx += p.numel() + +# ----------------------------------------------------------------------------- +# PyTorch nn.Module definitions for the GPT-2 model + +class Rotary(torch.nn.Module): + + def __init__(self, dim, base=10000): + super().__init__() + self.inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim)) + self.seq_len_cached = None + self.cos_cached = None + self.sin_cached = None + + def forward(self, x): + seq_len = x.shape[1] + if seq_len != self.seq_len_cached: + self.seq_len_cached = seq_len + t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq) + freqs = torch.outer(t, self.inv_freq).to(x.device) + self.cos_cached = freqs.cos().bfloat16() + self.sin_cached = freqs.sin().bfloat16() + return self.cos_cached[None, :, None, :], self.sin_cached[None, :, None, :] + +def apply_rotary_emb(x, cos, sin): + assert x.ndim == 4 # multihead attention + d = x.shape[3]//2 + x1 = x[..., :d] + x2 = x[..., d:] + y1 = x1 * cos + x2 * sin + y2 = x1 * (-sin) + x2 * cos + return torch.cat([y1, y2], 3).type_as(x) + +class CausalSelfAttention(nn.Module): + + def __init__(self, config): + super().__init__() + self.n_head = config.n_head + self.n_embd = config.n_embd + self.head_dim = self.n_embd // self.n_head + assert self.n_embd % self.n_head == 0 + self.c_q = nn.Linear(self.n_embd, self.n_embd, bias=False) + self.c_k = nn.Linear(self.n_embd, self.n_embd, bias=False) + self.c_v = nn.Linear(self.n_embd, self.n_embd, bias=False) + # output projection + self.c_proj = nn.Linear(self.n_embd, self.n_embd, bias=False) + self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 + self.rotary = Rotary(self.head_dim) + self.lamb = nn.Parameter(torch.tensor(0.5)) # @Grad62304977 + + def forward(self, x, v1=None): + B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd) + q = self.c_q(x).view(B, T, self.n_head, self.head_dim) + k = self.c_k(x).view(B, T, self.n_head, self.head_dim) + v = self.c_v(x).view(B, T, self.n_head, self.head_dim) + if v1 is None: + v1 = v # This happens if we are in the first block. v needs to be accessed by subsequent blocks + v = (1 - self.lamb) * v + self.lamb * v1.view_as(v) # @Grad62304977 + cos, sin = self.rotary(q) + q, k = F.rms_norm(q, (q.size(-1),)), F.rms_norm(k, (k.size(-1),)) # QK norm suggested by @Grad62304977 + q, k = apply_rotary_emb(q, cos, sin), apply_rotary_emb(k, cos, sin) + y = F.scaled_dot_product_attention(q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), is_causal=True) + y = y.transpose(1, 2).contiguous().view_as(x) # re-assemble all head outputs side by side + y = self.c_proj(y) + return y, v1 + +class MLP(nn.Module): + + def __init__(self, config): + super().__init__() + self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd, bias=False) + self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd, bias=False) + self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 + + def forward(self, x): + x = self.c_fc(x) + x = F.relu(x).square() # https://arxiv.org/abs/2109.08668v2; ~1-2% better than GELU; suggested by @SKYLINEZ007 and @Grad62304977 + x = self.c_proj(x) + return x + +class Block(nn.Module): + + def __init__(self, config): + super().__init__() + self.attn = CausalSelfAttention(config) + self.mlp = MLP(config) + self.lambdas = nn.Parameter(torch.tensor([1., 0.])) + + def forward(self, x, v1, x0): + x = self.lambdas[0] * x + self.lambdas[1] * x0 + x1, v1 = self.attn(F.rms_norm(x, (x.size(-1),)), v1) + x = x + x1 + x = x + self.mlp(F.rms_norm(x, (x.size(-1),))) + return x, v1 + +# ----------------------------------------------------------------------------- +# The main GPT-2 model + +@dataclass +class GPTConfig: + vocab_size : int = 50304 + n_layer : int = 12 + n_head : int = 6 # head dim 128 suggested by @Grad62304977 + n_embd : int = 768 + +class GPT(nn.Module): + + def __init__(self, config): + super().__init__() + self.config = config + + self.transformer = nn.ModuleDict(dict( + wte = nn.Embedding(config.vocab_size, config.n_embd), + h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]), + )) + self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) + self.lm_head.weight.data.zero_() # @Grad62304977 + + def forward(self, idx, targets=None, return_logits=True): + + # forward the GPT model itself + x = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) + x = F.rms_norm(x, (x.size(-1),)) # @Grad62304977 + x0 = x + v1 = None + for block in self.transformer.h: + x, v1 = block(x, v1, x0) + x = F.rms_norm(x, (x.size(-1),)) + + if targets is not None: + # if we are given some desired targets also calculate the loss + logits = self.lm_head(x) + logits = 30 * torch.tanh(logits / 30) # @Grad62304977 + logits = logits.float() # use tf32/fp32 for logits + loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1) + else: + # inference-time mini-optimization: only forward the lm_head on the very last position + logits = self.lm_head(x[:, [-1], :]) # note: using list [-1] to preserve the time dim + logits = 30 * torch.tanh(logits / 30) # @Grad62304977 + logits = logits.float() # use tf32/fp32 for logits + loss = None + + # there are performance reasons why not returning logits is prudent, if not needed + if not return_logits: + logits = None + + return logits, loss + +# ----------------------------------------------------------------------------- +# Our own simple Distributed Data Loader + +def _peek_data_shard(filename): + # only reads the header, returns header data + with open(filename, "rb") as f: + # first read the header, which is 256 int32 integers (4 bytes each) + header = np.frombuffer(f.read(256*4), dtype=np.int32) + if header[0] != 20240520: + print("ERROR: magic number mismatch in the data .bin file!") + print("---> HINT: Are you passing in a correct file with --input_bin?") + print("---> HINT: Dataset encoding changed recently, re-run data prepro or refer again to README") + print("---> HINT: For example re-run: `python dev/data/tinyshakespeare.py`, then re-try") + exit(1) + assert header[1] == 1, "unsupported version" + ntok = header[2] # number of tokens (claimed) + return ntok # for now just return the number of tokens + +def _load_data_shard(filename): + with open(filename, "rb") as f: + # first read the header, which is 256 int32 integers (4 bytes each) + header = np.frombuffer(f.read(256*4), dtype=np.int32) + assert header[0] == 20240520, "magic number mismatch in the data .bin file" + assert header[1] == 1, "unsupported version" + ntok = header[2] # number of tokens (claimed) + # the rest of it are tokens, stored as uint16 + tokens = np.frombuffer(f.read(), dtype=np.uint16) + assert len(tokens) == ntok, "number of tokens read does not match header?" + return tokens + +class DistributedDataLoader: + def __init__(self, filename_pattern, B, T, process_rank, num_processes): + self.process_rank = process_rank + self.num_processes = num_processes + self.B = B + self.T = T + + # glob files that match the pattern + self.files = sorted(glob.glob(filename_pattern)) + assert len(self.files) > 0, f"did not find any files that match the pattern {filename_pattern}" + + # load and validate all data shards, count number of tokens in total + ntok_total = 0 + for fname in self.files: + shard_ntok = _peek_data_shard(fname) + assert shard_ntok >= num_processes * B * T + 1 + ntok_total += int(shard_ntok) + self.ntok_total = ntok_total + + # kick things off + self.reset() + + def reset(self): + self.current_shard = 0 + self.current_position = self.process_rank * self.B * self.T + self.tokens = _load_data_shard(self.files[self.current_shard]) + + def advance(self): # advance to next data shard + self.current_shard = (self.current_shard + 1) % len(self.files) + self.current_position = self.process_rank * self.B * self.T + self.tokens = _load_data_shard(self.files[self.current_shard]) + + def next_batch(self): + B = self.B + T = self.T + buf = self.tokens[self.current_position : self.current_position+B*T+1] + buf = torch.tensor(buf.astype(np.int32), dtype=torch.long) + x = (buf[:-1]).view(B, T) # inputs + y = (buf[1:]).view(B, T) # targets + # advance current position and load next shard if necessary + self.current_position += B * T * self.num_processes + if self.current_position + (B * T * self.num_processes + 1) > len(self.tokens): + self.advance() + return x.cuda(), y.cuda() + +# ----------------------------------------------------------------------------- +# int main + +@dataclass +class Hyperparameters: + # data hyperparams + input_bin : str = 'data/fineweb10B/fineweb_train_*.bin' # input .bin to train on + input_val_bin : str = 'data/fineweb10B/fineweb_val_*.bin' # input .bin to eval validation loss on + # optimization hyperparams + batch_size : int = 8*64 # batch size, in sequences, across all devices + device_batch_size : int = 32 # batch size, in sequences, per device + sequence_length : int = 1024 # sequence length, in tokens + num_iterations : int = 3200 # number of iterations to run + warmup_iters : int = 0 + warmdown_iters : int = 914 # number of iterations of linear warmup/warmdown for triangular or trapezoidal schedule + weight_decay : float = 0 + # evaluation and logging hyperparams + val_loss_every : int = 125 # every how many steps to evaluate val loss? 0 for only at the end + val_tokens : int = 10485760 # how many tokens of validation data? it's important to keep this fixed for consistent comparisons + save_every : int = 320 # every how many steps to save the checkpoint? 0 for only at the end +args = Hyperparameters() + +# set up DDP (distributed data parallel). torchrun sets this env variable +assert torch.cuda.is_available() +dist.init_process_group(backend='nccl') +ddp_rank = int(os.environ['RANK']) +ddp_local_rank = int(os.environ['LOCAL_RANK']) +ddp_world_size = int(os.environ['WORLD_SIZE']) +device = f'cuda:{ddp_local_rank}' +torch.cuda.set_device(device) +print(f"using device: {device}") +master_process = (ddp_rank == 0) # this process will do logging, checkpointing etc. + +# convenience variables +B, T = args.device_batch_size, args.sequence_length +# calculate the number of steps to take in the val loop. +assert args.val_tokens % (B * T * ddp_world_size) == 0 +val_steps = args.val_tokens // (B * T * ddp_world_size) +# calculate the steps of gradient accumulation required to attain the desired global batch size. +assert args.batch_size % (B * ddp_world_size) == 0 +train_accumulation_steps = args.batch_size // (B * ddp_world_size) + +# load tokens +train_loader = DistributedDataLoader(args.input_bin, B, T, ddp_rank, ddp_world_size) +val_loader = DistributedDataLoader(args.input_val_bin, B, T, ddp_rank, ddp_world_size) +if master_process: + print(f"Training DataLoader: total number of tokens: {train_loader.ntok_total} across {len(train_loader.files)} files") + print(f"Validation DataLoader: total number of tokens: {val_loader.ntok_total} across {len(val_loader.files)} files") +x, y = train_loader.next_batch() + +# there are only 50257 unique GPT-2 tokens; we extend to nearest multiple of 128 for efficiency. suggested to me by @Grad62304977. +# this originates from Karpathy's experiments. +num_vocab = 50304 +model = GPT(GPTConfig(vocab_size=num_vocab, n_layer=12, n_head=6, n_embd=768)) +model = model.cuda() +if hasattr(config, "coordinate_descent_tuning"): + config.coordinate_descent_tuning = True # suggested by @Chillee +model = torch.compile(model) +# here we wrap model into DDP container +model = DDP(model, device_ids=[ddp_local_rank]) +raw_model = model.module # always contains the "raw" unwrapped model +ctx = torch.amp.autocast(device_type='cuda', dtype=torch.bfloat16) + +# CUDNN attention is ~4ms faster than Flash, but doesn't get selected by default in PyTorch 2.5.1 +from torch.backends.cuda import enable_cudnn_sdp, enable_flash_sdp, enable_math_sdp, enable_mem_efficient_sdp +enable_cudnn_sdp(True) +enable_flash_sdp(False) +enable_mem_efficient_sdp(False) +enable_math_sdp(False) + +# init the optimizer(s) +optimizer1 = torch.optim.Adam([raw_model.transformer.wte.weight], lr=0.3, betas=(0.9, 0.95), fused=True) +optimizer2 = torch.optim.Adam([raw_model.lm_head.weight], lr=0.002, betas=(0.9, 0.95), fused=True) +params = list(raw_model.transformer.h.parameters()) +matrix_params = [p for p in params if p.ndim == 2] +scalar_params = [p for p in params if p.ndim < 2] +optimizer3 = Muon(matrix_params, lr=0.02, momentum=0.95) +optimizer4 = torch.optim.Adam(scalar_params, lr=0.02, betas=(0.9, 0.95), fused=True) # note that this learning rate is neither sensitive nor tuned +optimizers = [optimizer1, optimizer2, optimizer3, optimizer4] +# learning rate decay scheduler (linear warmup and warmdown) +def get_lr(it): + assert it <= args.num_iterations + # 1) linear warmup for warmup_iters steps + if it < args.warmup_iters: + return (it+1) / args.warmup_iters + # 2) constant lr for a while + elif it < args.num_iterations - args.warmdown_iters: + return 1.0 + # 3) linear warmdown + else: + decay_ratio = (args.num_iterations - it) / args.warmdown_iters + return decay_ratio +schedulers = [torch.optim.lr_scheduler.LambdaLR(opt, get_lr) for opt in optimizers] + +# begin logging +if master_process: + run_id = str(uuid.uuid4()) + logdir = 'logs/%s/' % run_id + os.makedirs(logdir, exist_ok=True) + logfile = 'logs/%s.txt' % run_id + # create the log file + with open(logfile, "w") as f: + # begin the log by printing this file (the Python code) + f.write('='*100 + '\n') + f.write(code) + f.write('='*100 + '\n') + # log information about the hardware/software environment this is running on + # and print the full `nvidia-smi` to file + f.write(f"Running pytorch {torch.version.__version__} compiled for CUDA {torch.version.cuda}\nnvidia-smi:\n") + import subprocess + result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + f.write(f'{result.stdout}\n') + f.write('='*100 + '\n') + +training_time_ms = 0 +# start the clock +torch.cuda.synchronize() +t0 = time.time() +# begin training +train_loader.reset() +for step in range(args.num_iterations + 1): + last_step = (step == args.num_iterations) + # This effectively ignores timing first 10 steps, which are slower for weird reasons. + # Alternately, and slightly more correctly in terms of benchmarking, we could do 10 + # steps with dummy data first, and then re-initialize the model and reset the loader. + if step == 10: + training_time_ms = 0 + t0 = time.time() + timed_steps = float('nan') if step <= 11 else (step - 10) + 1 # <= 11 to avoid bug in val + + # once in a while evaluate the validation dataset + if (last_step or (args.val_loss_every > 0 and step % args.val_loss_every == 0)): + # stop the clock + torch.cuda.synchronize() + training_time_ms += 1000 * (time.time() - t0) + # run validation batches + model.eval() + val_loader.reset() + val_loss = 0.0 + for _ in range(val_steps): + x_val, y_val = val_loader.next_batch() + with ctx: # of course, we'd like to use no_grad() here too, but that creates a torch.compile error for some reason + _, loss = model(x_val, y_val, return_logits=False) + val_loss += loss.detach() + del loss + dist.all_reduce(val_loss, op=dist.ReduceOp.AVG) + val_loss /= val_steps + # log val loss to console and to logfile + if master_process: + print(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms') + with open(logfile, "a") as f: + f.write(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms\n') + # start the clock again + torch.cuda.synchronize() + t0 = time.time() + + if master_process and (last_step or (args.save_every > 0 and step % args.save_every == 0)): + # stop the clock + torch.cuda.synchronize() + training_time_ms += 1000 * (time.time() - t0) + # save the state of the training process + log = dict(step=step, code=code, model=raw_model.state_dict(), optimizers=[opt.state_dict() for opt in optimizers]) + torch.save(log, 'logs/%s/state_step%06d.pt' % (run_id, step)) + # start the clock again + torch.cuda.synchronize() + t0 = time.time() + + # bit confusing: we want to make sure to eval on 0th iteration + # but also after the very last iteration. so we loop for step <= num_iterations + # instead of just < num_iterations (one extra due to <=), only to do + # the validation/sampling one last time, and then we break right here as we're done. + if last_step: + break + + # --------------- TRAINING SECTION BEGIN ----------------- + model.train() + for i in range(1, train_accumulation_steps+1): + # forward pass + with ctx: + _, loss = model(x, y, return_logits=False) + train_loss = loss.detach() + # advance the dataset for the next batch + x, y = train_loader.next_batch() + # backward pass + if i < train_accumulation_steps: + with model.no_sync(): # there's no need to sync gradients every accumulation step + loss.backward() + else: + loss.backward() # just sync on the last step + for p in model.parameters(): + p.grad /= train_accumulation_steps + # momentum warmup for Muon + frac = min(step/500, 1) + optimizer3.param_groups[0]['momentum'] = (1 - frac) * 0.85 + frac * 0.95 + # step the optimizers and schedulers + for opt, sched in zip(optimizers, schedulers): + opt.step() + sched.step() + # null the gradients + model.zero_grad(set_to_none=True) + # --------------- TRAINING SECTION END ------------------- + # everything that follows now is just diagnostics, prints, logging, etc. + + #dist.all_reduce(train_loss, op=dist.ReduceOp.AVG) # all-reducing the training loss would be more correct in terms of logging, but slower + if master_process: + approx_time = training_time_ms + 1000 * (time.time() - t0) + print(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms") + with open(logfile, "a") as f: + f.write(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms\n") + +if master_process: + print(f"peak memory consumption: {torch.cuda.max_memory_allocated() // 1024 // 1024} MiB") + +# ------------------------------------------------------------------------- +# clean up nice +dist.destroy_process_group() +==================================================================================================== +Running pytorch 2.5.1+cu124 compiled for CUDA 12.4 +nvidia-smi: +Fri Nov 8 18:04:18 2024 ++---------------------------------------------------------------------------------------+ +| NVIDIA-SMI 535.183.01 Driver Version: 535.183.01 CUDA Version: 12.2 | +|-----------------------------------------+----------------------+----------------------+ +| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | +| | | MIG M. | +|=========================================+======================+======================| +| 0 NVIDIA L40S On | 00000000:34:00.0 Off | 0 | +| N/A 31C P0 72W / 350W | 2724MiB / 46068MiB | 8% Default | +| | | N/A | ++-----------------------------------------+----------------------+----------------------+ + ++---------------------------------------------------------------------------------------+ +| Processes: | +| GPU GI CI PID Type Process name GPU Memory | +| ID ID Usage | +|=======================================================================================| ++---------------------------------------------------------------------------------------+ + +==================================================================================================== +step:0/3200 val_loss:10.8259 train_time:219ms step_avg:nanms +step:1/3200 train_loss:10.8258 train_time:48353ms step_avg:nanms +step:2/3200 train_loss:10.6001 train_time:52153ms step_avg:nanms +step:3/3200 train_loss:10.0403 train_time:56071ms step_avg:nanms +step:4/3200 train_loss:9.1487 train_time:60007ms step_avg:nanms +step:5/3200 train_loss:8.1693 train_time:63952ms step_avg:nanms +step:6/3200 train_loss:7.6040 train_time:67904ms step_avg:nanms +step:7/3200 train_loss:7.2514 train_time:71863ms step_avg:nanms +step:8/3200 train_loss:7.0003 train_time:75825ms step_avg:nanms +step:9/3200 train_loss:7.2817 train_time:79781ms step_avg:nanms +step:10/3200 train_loss:6.7501 train_time:83747ms step_avg:nanms +step:11/3200 train_loss:6.9062 train_time:3981ms step_avg:nanms +step:12/3200 train_loss:6.5774 train_time:7972ms step_avg:nanms +step:13/3200 train_loss:6.5117 train_time:11951ms step_avg:3983.73ms +step:14/3200 train_loss:6.5683 train_time:15938ms step_avg:3984.55ms +step:15/3200 train_loss:6.3263 train_time:19940ms step_avg:3988.03ms +step:16/3200 train_loss:6.2147 train_time:23952ms step_avg:3992.08ms +step:17/3200 train_loss:6.2890 train_time:27980ms step_avg:3997.12ms +step:18/3200 train_loss:6.3679 train_time:32014ms step_avg:4001.75ms +step:19/3200 train_loss:6.3626 train_time:36070ms step_avg:4007.73ms +step:20/3200 train_loss:6.4105 train_time:40148ms step_avg:4014.79ms +step:21/3200 train_loss:6.3490 train_time:44248ms step_avg:4022.52ms +step:22/3200 train_loss:6.0031 train_time:48373ms step_avg:4031.12ms +step:23/3200 train_loss:5.7725 train_time:52526ms step_avg:4040.46ms +step:24/3200 train_loss:6.3310 train_time:56701ms step_avg:4050.06ms +step:25/3200 train_loss:6.3124 train_time:60879ms step_avg:4058.60ms +step:26/3200 train_loss:6.0229 train_time:65058ms step_avg:4066.09ms +step:27/3200 train_loss:6.1184 train_time:69238ms step_avg:4072.80ms +step:28/3200 train_loss:6.1316 train_time:73415ms step_avg:4078.64ms +step:29/3200 train_loss:6.0346 train_time:77594ms step_avg:4083.92ms +step:30/3200 train_loss:6.1556 train_time:81774ms step_avg:4088.68ms +step:31/3200 train_loss:5.9850 train_time:85953ms step_avg:4092.99ms +step:32/3200 train_loss:6.0302 train_time:90138ms step_avg:4097.20ms +step:33/3200 train_loss:6.0273 train_time:94334ms step_avg:4101.46ms +step:34/3200 train_loss:5.9602 train_time:98553ms step_avg:4106.37ms +step:35/3200 train_loss:5.9009 train_time:102753ms step_avg:4110.12ms +step:36/3200 train_loss:5.9463 train_time:106943ms step_avg:4113.19ms +step:37/3200 train_loss:5.9439 train_time:111093ms step_avg:4114.55ms +step:38/3200 train_loss:6.2273 train_time:115219ms step_avg:4114.95ms +step:39/3200 train_loss:6.3516 train_time:119329ms step_avg:4114.81ms +step:40/3200 train_loss:5.9282 train_time:123423ms step_avg:4114.11ms +step:41/3200 train_loss:5.9061 train_time:127505ms step_avg:4113.06ms +step:42/3200 train_loss:5.8045 train_time:131582ms step_avg:4111.93ms +step:43/3200 train_loss:5.6552 train_time:135658ms step_avg:4110.83ms +step:44/3200 train_loss:5.6186 train_time:139726ms step_avg:4109.59ms +step:45/3200 train_loss:5.5809 train_time:143793ms step_avg:4108.37ms +step:46/3200 train_loss:5.7056 train_time:147856ms step_avg:4107.12ms +step:47/3200 train_loss:5.7148 train_time:151917ms step_avg:4105.87ms +step:48/3200 train_loss:5.4491 train_time:155977ms step_avg:4104.65ms +step:49/3200 train_loss:5.6130 train_time:160036ms step_avg:4103.48ms +step:50/3200 train_loss:5.5373 train_time:164102ms step_avg:4102.55ms +step:51/3200 train_loss:5.4612 train_time:168166ms step_avg:4101.62ms +step:52/3200 train_loss:5.5302 train_time:172239ms step_avg:4100.93ms +step:53/3200 train_loss:5.4676 train_time:176316ms step_avg:4100.37ms +step:54/3200 train_loss:5.5463 train_time:180399ms step_avg:4099.97ms +step:55/3200 train_loss:5.4484 train_time:184485ms step_avg:4099.66ms +step:56/3200 train_loss:5.5138 train_time:188589ms step_avg:4099.76ms +step:57/3200 train_loss:5.5702 train_time:192699ms step_avg:4099.99ms +step:58/3200 train_loss:5.2871 train_time:196824ms step_avg:4100.50ms +step:59/3200 train_loss:5.3233 train_time:200955ms step_avg:4101.13ms +step:60/3200 train_loss:5.1519 train_time:205095ms step_avg:4101.89ms +step:61/3200 train_loss:5.4645 train_time:209272ms step_avg:4103.38ms +step:62/3200 train_loss:5.4996 train_time:213451ms step_avg:4104.83ms +step:63/3200 train_loss:5.4123 train_time:217658ms step_avg:4106.76ms +step:64/3200 train_loss:5.5326 train_time:221895ms step_avg:4109.17ms +step:65/3200 train_loss:5.5284 train_time:226132ms step_avg:4111.50ms +step:66/3200 train_loss:5.4288 train_time:230324ms step_avg:4112.94ms +step:67/3200 train_loss:5.4468 train_time:234461ms step_avg:4113.34ms +step:68/3200 train_loss:5.4959 train_time:238572ms step_avg:4113.30ms +step:69/3200 train_loss:5.3346 train_time:242655ms step_avg:4112.79ms +step:70/3200 train_loss:5.1385 train_time:246717ms step_avg:4111.95ms +step:71/3200 train_loss:5.1542 train_time:250757ms step_avg:4110.78ms +step:72/3200 train_loss:5.3235 train_time:254819ms step_avg:4109.99ms +step:73/3200 train_loss:5.3034 train_time:258869ms step_avg:4109.03ms +step:74/3200 train_loss:5.2726 train_time:262913ms step_avg:4108.02ms +step:75/3200 train_loss:5.3384 train_time:266943ms step_avg:4106.82ms +step:76/3200 train_loss:5.2889 train_time:270970ms step_avg:4105.61ms +step:77/3200 train_loss:5.2573 train_time:274991ms step_avg:4104.34ms +step:78/3200 train_loss:5.4338 train_time:279012ms step_avg:4103.12ms +step:79/3200 train_loss:5.0812 train_time:283031ms step_avg:4101.90ms +step:80/3200 train_loss:5.2128 train_time:287045ms step_avg:4100.64ms +step:81/3200 train_loss:4.9174 train_time:291056ms step_avg:4099.38ms +step:82/3200 train_loss:5.0614 train_time:295072ms step_avg:4098.22ms +step:83/3200 train_loss:5.1598 train_time:299085ms step_avg:4097.05ms +step:84/3200 train_loss:5.1010 train_time:303098ms step_avg:4095.92ms +step:85/3200 train_loss:4.9514 train_time:307111ms step_avg:4094.81ms +step:86/3200 train_loss:4.9858 train_time:311126ms step_avg:4093.76ms +step:87/3200 train_loss:5.1761 train_time:315146ms step_avg:4092.80ms +step:88/3200 train_loss:5.0091 train_time:319166ms step_avg:4091.88ms +step:89/3200 train_loss:5.1585 train_time:323184ms step_avg:4090.93ms +step:90/3200 train_loss:5.1347 train_time:327205ms step_avg:4090.06ms +step:91/3200 train_loss:5.1060 train_time:331233ms step_avg:4089.30ms +step:92/3200 train_loss:5.0316 train_time:335264ms step_avg:4088.58ms +step:93/3200 train_loss:5.0712 train_time:339304ms step_avg:4088.00ms +step:94/3200 train_loss:5.1352 train_time:343345ms step_avg:4087.44ms +step:95/3200 train_loss:4.9161 train_time:347391ms step_avg:4086.95ms +step:96/3200 train_loss:5.0715 train_time:351440ms step_avg:4086.51ms +step:97/3200 train_loss:4.9629 train_time:355479ms step_avg:4085.97ms +step:98/3200 train_loss:5.0630 train_time:359536ms step_avg:4085.63ms +step:99/3200 train_loss:5.0076 train_time:363597ms step_avg:4085.36ms +step:100/3200 train_loss:4.9957 train_time:367671ms step_avg:4085.24ms +step:101/3200 train_loss:5.0037 train_time:371758ms step_avg:4085.26ms +step:102/3200 train_loss:4.9098 train_time:375853ms step_avg:4085.36ms +step:103/3200 train_loss:5.0799 train_time:379970ms step_avg:4085.70ms +step:104/3200 train_loss:4.8957 train_time:384096ms step_avg:4086.13ms +step:105/3200 train_loss:4.9924 train_time:388253ms step_avg:4086.88ms +step:106/3200 train_loss:4.9174 train_time:392437ms step_avg:4087.89ms +step:107/3200 train_loss:4.6095 train_time:396644ms step_avg:4089.11ms +step:108/3200 train_loss:4.8999 train_time:400878ms step_avg:4090.59ms +step:109/3200 train_loss:4.8402 train_time:405072ms step_avg:4091.64ms +step:110/3200 train_loss:4.7280 train_time:409202ms step_avg:4092.02ms +step:111/3200 train_loss:4.8738 train_time:413280ms step_avg:4091.88ms +step:112/3200 train_loss:4.8569 train_time:417319ms step_avg:4091.36ms +step:113/3200 train_loss:4.8117 train_time:421358ms step_avg:4090.86ms +step:114/3200 train_loss:4.8431 train_time:425379ms step_avg:4090.18ms +step:115/3200 train_loss:4.7878 train_time:429381ms step_avg:4089.34ms +step:116/3200 train_loss:4.9981 train_time:433373ms step_avg:4088.42ms +step:117/3200 train_loss:4.7663 train_time:437356ms step_avg:4087.44ms +step:118/3200 train_loss:4.8516 train_time:441333ms step_avg:4086.42ms +step:119/3200 train_loss:4.7922 train_time:445303ms step_avg:4085.35ms +step:120/3200 train_loss:4.6265 train_time:449273ms step_avg:4084.30ms +step:121/3200 train_loss:4.7673 train_time:453235ms step_avg:4083.20ms +step:122/3200 train_loss:4.4846 train_time:457188ms step_avg:4082.04ms +step:123/3200 train_loss:4.7519 train_time:461163ms step_avg:4081.09ms +step:124/3200 train_loss:4.7385 train_time:465139ms step_avg:4080.17ms +step:125/3200 train_loss:4.8081 train_time:469112ms step_avg:4079.23ms +step:125/3200 val_loss:4.7168 train_time:469112ms step_avg:4079.23ms +step:126/3200 train_loss:4.6532 train_time:473086ms step_avg:4078.33ms +step:127/3200 train_loss:4.6349 train_time:477061ms step_avg:4077.44ms +step:128/3200 train_loss:4.7974 train_time:481031ms step_avg:4076.53ms +step:129/3200 train_loss:4.6673 train_time:485007ms step_avg:4075.69ms +step:130/3200 train_loss:4.6959 train_time:488982ms step_avg:4074.85ms +step:131/3200 train_loss:4.8231 train_time:492954ms step_avg:4074.00ms +step:132/3200 train_loss:4.6620 train_time:496932ms step_avg:4073.21ms +step:133/3200 train_loss:4.5921 train_time:500909ms step_avg:4072.43ms +step:134/3200 train_loss:4.6457 train_time:504890ms step_avg:4071.69ms +step:135/3200 train_loss:4.7013 train_time:508864ms step_avg:4070.91ms +step:136/3200 train_loss:4.5077 train_time:512847ms step_avg:4070.22ms +step:137/3200 train_loss:4.6344 train_time:516834ms step_avg:4069.56ms +step:138/3200 train_loss:4.9498 train_time:520819ms step_avg:4068.90ms +step:139/3200 train_loss:4.5726 train_time:524805ms step_avg:4068.26ms +step:140/3200 train_loss:4.5680 train_time:528796ms step_avg:4067.66ms +step:141/3200 train_loss:4.5759 train_time:532790ms step_avg:4067.10ms +step:142/3200 train_loss:4.5972 train_time:536787ms step_avg:4066.57ms +step:143/3200 train_loss:4.4597 train_time:540787ms step_avg:4066.07ms +step:144/3200 train_loss:4.5055 train_time:544796ms step_avg:4065.64ms +step:145/3200 train_loss:4.7178 train_time:548806ms step_avg:4065.23ms +step:146/3200 train_loss:4.5062 train_time:552822ms step_avg:4064.87ms +step:147/3200 train_loss:4.4491 train_time:556838ms step_avg:4064.51ms +step:148/3200 train_loss:4.5011 train_time:560836ms step_avg:4064.03ms +step:149/3200 train_loss:4.6435 train_time:564836ms step_avg:4063.57ms +step:150/3200 train_loss:4.5146 train_time:568840ms step_avg:4063.14ms +step:151/3200 train_loss:4.5807 train_time:572855ms step_avg:4062.80ms +step:152/3200 train_loss:4.4452 train_time:576875ms step_avg:4062.50ms +step:153/3200 train_loss:4.6090 train_time:580907ms step_avg:4062.28ms +step:154/3200 train_loss:4.5008 train_time:584943ms step_avg:4062.10ms +step:155/3200 train_loss:4.4837 train_time:588971ms step_avg:4061.87ms +step:156/3200 train_loss:4.6117 train_time:593004ms step_avg:4061.67ms +step:157/3200 train_loss:4.4686 train_time:597058ms step_avg:4061.62ms +step:158/3200 train_loss:4.4039 train_time:601128ms step_avg:4061.68ms +step:159/3200 train_loss:4.2918 train_time:605220ms step_avg:4061.88ms +step:160/3200 train_loss:4.5800 train_time:609322ms step_avg:4062.14ms +step:161/3200 train_loss:4.2625 train_time:613442ms step_avg:4062.53ms +step:162/3200 train_loss:4.4641 train_time:617595ms step_avg:4063.13ms +step:163/3200 train_loss:4.3824 train_time:621783ms step_avg:4063.94ms +step:164/3200 train_loss:4.4151 train_time:625987ms step_avg:4064.85ms +step:165/3200 train_loss:4.4252 train_time:630206ms step_avg:4065.84ms +step:166/3200 train_loss:4.2249 train_time:634399ms step_avg:4066.66ms +step:167/3200 train_loss:4.4236 train_time:638534ms step_avg:4067.10ms +step:168/3200 train_loss:4.3081 train_time:642635ms step_avg:4067.31ms +step:169/3200 train_loss:4.3591 train_time:646718ms step_avg:4067.41ms +step:170/3200 train_loss:4.0883 train_time:650777ms step_avg:4067.35ms +step:171/3200 train_loss:4.5466 train_time:654815ms step_avg:4067.17ms +step:172/3200 train_loss:4.4462 train_time:658849ms step_avg:4066.97ms +step:173/3200 train_loss:4.2299 train_time:662875ms step_avg:4066.72ms +step:174/3200 train_loss:4.3446 train_time:666907ms step_avg:4066.51ms +step:175/3200 train_loss:4.2011 train_time:670942ms step_avg:4066.31ms +step:176/3200 train_loss:4.2611 train_time:674969ms step_avg:4066.08ms +step:177/3200 train_loss:4.2148 train_time:678991ms step_avg:4065.82ms +step:178/3200 train_loss:4.4734 train_time:683007ms step_avg:4065.52ms +step:179/3200 train_loss:4.3088 train_time:687020ms step_avg:4065.21ms +step:180/3200 train_loss:4.3297 train_time:691033ms step_avg:4064.90ms +step:181/3200 train_loss:4.3155 train_time:695044ms step_avg:4064.58ms +step:182/3200 train_loss:4.4256 train_time:699055ms step_avg:4064.27ms +step:183/3200 train_loss:4.2002 train_time:703069ms step_avg:4063.98ms +step:184/3200 train_loss:4.2397 train_time:707083ms step_avg:4063.69ms +step:185/3200 train_loss:4.3318 train_time:711096ms step_avg:4063.40ms +step:186/3200 train_loss:4.0248 train_time:715117ms step_avg:4063.16ms +step:187/3200 train_loss:4.3952 train_time:719140ms step_avg:4062.94ms +step:188/3200 train_loss:4.2798 train_time:723164ms step_avg:4062.72ms +step:189/3200 train_loss:4.1206 train_time:727192ms step_avg:4062.52ms +step:190/3200 train_loss:4.1656 train_time:731226ms step_avg:4062.36ms +step:191/3200 train_loss:4.2515 train_time:735256ms step_avg:4062.19ms +step:192/3200 train_loss:4.2286 train_time:739293ms step_avg:4062.05ms +step:193/3200 train_loss:4.0183 train_time:743320ms step_avg:4061.86ms +step:194/3200 train_loss:4.3272 train_time:747351ms step_avg:4061.69ms +step:195/3200 train_loss:4.2061 train_time:751392ms step_avg:4061.58ms +step:196/3200 train_loss:4.2792 train_time:755440ms step_avg:4061.51ms +step:197/3200 train_loss:4.1198 train_time:759502ms step_avg:4061.51ms +step:198/3200 train_loss:4.3033 train_time:763577ms step_avg:4061.58ms +step:199/3200 train_loss:4.0874 train_time:767658ms step_avg:4061.68ms +step:200/3200 train_loss:4.5550 train_time:771757ms step_avg:4061.88ms +step:201/3200 train_loss:4.3229 train_time:775881ms step_avg:4062.20ms +step:202/3200 train_loss:4.2404 train_time:780017ms step_avg:4062.59ms +step:203/3200 train_loss:4.6433 train_time:784193ms step_avg:4063.18ms +step:204/3200 train_loss:4.2239 train_time:788395ms step_avg:4063.89ms +step:205/3200 train_loss:4.2252 train_time:792593ms step_avg:4064.58ms +step:206/3200 train_loss:4.1525 train_time:796780ms step_avg:4065.20ms +step:207/3200 train_loss:4.2365 train_time:800885ms step_avg:4065.40ms +step:208/3200 train_loss:4.3107 train_time:804962ms step_avg:4065.46ms +step:209/3200 train_loss:4.2079 train_time:809012ms step_avg:4065.39ms +step:210/3200 train_loss:4.1494 train_time:813037ms step_avg:4065.19ms +step:211/3200 train_loss:4.1654 train_time:817060ms step_avg:4064.97ms +step:212/3200 train_loss:4.1329 train_time:821079ms step_avg:4064.75ms +step:213/3200 train_loss:4.4645 train_time:825091ms step_avg:4064.49ms +step:214/3200 train_loss:4.2647 train_time:829091ms step_avg:4064.17ms +step:215/3200 train_loss:4.2045 train_time:833090ms step_avg:4063.86ms +step:216/3200 train_loss:4.1214 train_time:837081ms step_avg:4063.50ms +step:217/3200 train_loss:4.2397 train_time:841070ms step_avg:4063.14ms +step:218/3200 train_loss:4.2035 train_time:845056ms step_avg:4062.77ms +step:219/3200 train_loss:4.1313 train_time:849042ms step_avg:4062.40ms +step:220/3200 train_loss:4.1508 train_time:853024ms step_avg:4062.02ms +step:221/3200 train_loss:3.9857 train_time:857025ms step_avg:4061.73ms +step:222/3200 train_loss:4.0367 train_time:861027ms step_avg:4061.45ms +step:223/3200 train_loss:4.2728 train_time:865026ms step_avg:4061.15ms +step:224/3200 train_loss:4.0811 train_time:869025ms step_avg:4060.86ms +step:225/3200 train_loss:4.4851 train_time:873026ms step_avg:4060.59ms +step:226/3200 train_loss:4.0461 train_time:877034ms step_avg:4060.34ms +step:227/3200 train_loss:4.0561 train_time:881039ms step_avg:4060.09ms +step:228/3200 train_loss:4.1409 train_time:885042ms step_avg:4059.82ms +step:229/3200 train_loss:4.2043 train_time:889045ms step_avg:4059.57ms +step:230/3200 train_loss:4.1884 train_time:893053ms step_avg:4059.33ms +step:231/3200 train_loss:4.1180 train_time:897065ms step_avg:4059.12ms +step:232/3200 train_loss:4.2275 train_time:901066ms step_avg:4058.85ms +step:233/3200 train_loss:4.1199 train_time:905057ms step_avg:4058.55ms +step:234/3200 train_loss:4.0663 train_time:909051ms step_avg:4058.26ms +step:235/3200 train_loss:4.0769 train_time:913048ms step_avg:4057.99ms +step:236/3200 train_loss:4.2146 train_time:917047ms step_avg:4057.73ms +step:237/3200 train_loss:4.0606 train_time:921055ms step_avg:4057.51ms +step:238/3200 train_loss:4.1468 train_time:925066ms step_avg:4057.31ms +step:239/3200 train_loss:4.1442 train_time:929084ms step_avg:4057.14ms +step:240/3200 train_loss:4.1090 train_time:933107ms step_avg:4056.99ms +step:241/3200 train_loss:4.3047 train_time:937138ms step_avg:4056.87ms +step:242/3200 train_loss:3.9476 train_time:941158ms step_avg:4056.71ms +step:243/3200 train_loss:4.1284 train_time:945193ms step_avg:4056.62ms +step:244/3200 train_loss:4.0756 train_time:949233ms step_avg:4056.55ms +step:245/3200 train_loss:4.0647 train_time:953275ms step_avg:4056.49ms +step:246/3200 train_loss:3.9913 train_time:957333ms step_avg:4056.50ms +step:247/3200 train_loss:4.0787 train_time:961412ms step_avg:4056.59ms +step:248/3200 train_loss:4.2475 train_time:965512ms step_avg:4056.77ms +step:249/3200 train_loss:3.8390 train_time:969618ms step_avg:4056.98ms +step:250/3200 train_loss:4.0433 train_time:973752ms step_avg:4057.30ms +step:250/3200 val_loss:4.0971 train_time:973752ms step_avg:4057.30ms +step:251/3200 train_loss:3.9808 train_time:977957ms step_avg:4057.91ms +step:252/3200 train_loss:4.2669 train_time:982158ms step_avg:4058.50ms +step:253/3200 train_loss:4.1900 train_time:986355ms step_avg:4059.08ms +step:254/3200 train_loss:4.0999 train_time:990553ms step_avg:4059.65ms +step:255/3200 train_loss:4.0796 train_time:994751ms step_avg:4060.21ms +step:256/3200 train_loss:4.1820 train_time:998950ms step_avg:4060.77ms +step:257/3200 train_loss:3.9326 train_time:1003148ms step_avg:4061.33ms +step:258/3200 train_loss:4.2898 train_time:1007346ms step_avg:4061.88ms +step:259/3200 train_loss:4.1009 train_time:1011544ms step_avg:4062.43ms +step:260/3200 train_loss:4.1023 train_time:1015744ms step_avg:4062.98ms +step:261/3200 train_loss:4.3948 train_time:1019944ms step_avg:4063.52ms +step:262/3200 train_loss:4.0706 train_time:1024139ms step_avg:4064.04ms +step:263/3200 train_loss:4.0574 train_time:1028316ms step_avg:4064.49ms +step:264/3200 train_loss:4.0599 train_time:1032486ms step_avg:4064.91ms +step:265/3200 train_loss:3.9921 train_time:1036619ms step_avg:4065.17ms +step:266/3200 train_loss:4.1441 train_time:1040752ms step_avg:4065.44ms +step:267/3200 train_loss:4.1208 train_time:1044885ms step_avg:4065.70ms +step:268/3200 train_loss:4.0718 train_time:1049008ms step_avg:4065.92ms +step:269/3200 train_loss:4.2623 train_time:1053138ms step_avg:4066.17ms +step:270/3200 train_loss:4.1337 train_time:1057269ms step_avg:4066.42ms +step:271/3200 train_loss:4.0549 train_time:1061406ms step_avg:4066.69ms +step:272/3200 train_loss:4.3276 train_time:1065542ms step_avg:4066.95ms +step:273/3200 train_loss:4.0379 train_time:1069703ms step_avg:4067.31ms +step:274/3200 train_loss:4.1289 train_time:1073877ms step_avg:4067.72ms +step:275/3200 train_loss:4.1451 train_time:1078065ms step_avg:4068.17ms +step:276/3200 train_loss:4.1030 train_time:1082261ms step_avg:4068.65ms +step:277/3200 train_loss:4.0498 train_time:1086437ms step_avg:4069.05ms +step:278/3200 train_loss:3.9972 train_time:1090587ms step_avg:4069.35ms +step:279/3200 train_loss:3.9631 train_time:1094707ms step_avg:4069.54ms +step:280/3200 train_loss:4.0161 train_time:1098827ms step_avg:4069.73ms +step:281/3200 train_loss:4.2244 train_time:1102930ms step_avg:4069.85ms +step:282/3200 train_loss:3.9931 train_time:1107023ms step_avg:4069.94ms +step:283/3200 train_loss:3.9750 train_time:1111115ms step_avg:4070.02ms +step:284/3200 train_loss:4.1135 train_time:1115209ms step_avg:4070.10ms +step:285/3200 train_loss:4.0300 train_time:1119302ms step_avg:4070.19ms +step:286/3200 train_loss:4.1779 train_time:1123387ms step_avg:4070.24ms +step:287/3200 train_loss:4.0074 train_time:1127483ms step_avg:4070.33ms +step:288/3200 train_loss:4.0244 train_time:1131580ms step_avg:4070.43ms +step:289/3200 train_loss:4.0346 train_time:1135685ms step_avg:4070.55ms +step:290/3200 train_loss:3.7945 train_time:1139798ms step_avg:4070.71ms +step:291/3200 train_loss:4.0534 train_time:1143922ms step_avg:4070.90ms +step:292/3200 train_loss:4.1674 train_time:1148053ms step_avg:4071.11ms +step:293/3200 train_loss:3.9250 train_time:1152197ms step_avg:4071.37ms +step:294/3200 train_loss:4.0503 train_time:1156358ms step_avg:4071.68ms +step:295/3200 train_loss:3.8258 train_time:1160560ms step_avg:4072.14ms +step:296/3200 train_loss:4.2619 train_time:1164760ms step_avg:4072.59ms +step:297/3200 train_loss:4.0592 train_time:1168962ms step_avg:4073.04ms +step:298/3200 train_loss:4.0955 train_time:1173187ms step_avg:4073.57ms +step:299/3200 train_loss:3.8285 train_time:1177399ms step_avg:4074.04ms +step:300/3200 train_loss:4.0239 train_time:1181592ms step_avg:4074.46ms +step:301/3200 train_loss:4.0917 train_time:1185787ms step_avg:4074.87ms +step:302/3200 train_loss:3.9671 train_time:1189961ms step_avg:4075.21ms +step:303/3200 train_loss:3.9037 train_time:1194101ms step_avg:4075.43ms +step:304/3200 train_loss:4.0355 train_time:1198223ms step_avg:4075.59ms +step:305/3200 train_loss:3.9087 train_time:1202338ms step_avg:4075.72ms +step:306/3200 train_loss:4.0303 train_time:1206450ms step_avg:4075.84ms +step:307/3200 train_loss:4.3890 train_time:1210557ms step_avg:4075.95ms +step:308/3200 train_loss:4.1961 train_time:1214664ms step_avg:4076.05ms +step:309/3200 train_loss:3.9551 train_time:1218775ms step_avg:4076.17ms +step:310/3200 train_loss:3.8832 train_time:1222886ms step_avg:4076.29ms +step:311/3200 train_loss:3.9136 train_time:1227009ms step_avg:4076.44ms +step:312/3200 train_loss:3.9831 train_time:1231132ms step_avg:4076.60ms +step:313/3200 train_loss:3.9683 train_time:1235256ms step_avg:4076.75ms +step:314/3200 train_loss:3.8860 train_time:1239394ms step_avg:4076.95ms +step:315/3200 train_loss:3.9934 train_time:1243539ms step_avg:4077.18ms +step:316/3200 train_loss:3.8236 train_time:1247705ms step_avg:4077.47ms +step:317/3200 train_loss:3.9215 train_time:1251886ms step_avg:4077.81ms +step:318/3200 train_loss:3.8290 train_time:1256072ms step_avg:4078.16ms +step:319/3200 train_loss:3.9858 train_time:1260273ms step_avg:4078.55ms +step:320/3200 train_loss:4.0467 train_time:1264471ms step_avg:4078.94ms +step:321/3200 train_loss:4.0092 train_time:1268646ms step_avg:4079.25ms +step:322/3200 train_loss:4.0022 train_time:1272823ms step_avg:4079.56ms +step:323/3200 train_loss:3.9293 train_time:1276984ms step_avg:4079.82ms +step:324/3200 train_loss:4.1477 train_time:1281128ms step_avg:4080.03ms +step:325/3200 train_loss:4.1286 train_time:1285273ms step_avg:4080.23ms +step:326/3200 train_loss:3.8389 train_time:1289416ms step_avg:4080.43ms +step:327/3200 train_loss:3.9081 train_time:1293542ms step_avg:4080.57ms +step:328/3200 train_loss:3.9950 train_time:1297674ms step_avg:4080.74ms +step:329/3200 train_loss:3.7412 train_time:1301815ms step_avg:4080.92ms +step:330/3200 train_loss:4.1865 train_time:1305952ms step_avg:4081.10ms +step:331/3200 train_loss:3.9384 train_time:1310093ms step_avg:4081.29ms +step:332/3200 train_loss:3.8618 train_time:1314247ms step_avg:4081.51ms +step:333/3200 train_loss:4.0207 train_time:1318409ms step_avg:4081.76ms +step:334/3200 train_loss:3.9468 train_time:1322585ms step_avg:4082.05ms +step:335/3200 train_loss:3.9760 train_time:1326767ms step_avg:4082.36ms +step:336/3200 train_loss:4.1294 train_time:1330953ms step_avg:4082.68ms +step:337/3200 train_loss:5.0012 train_time:1335175ms step_avg:4083.10ms +step:338/3200 train_loss:4.1019 train_time:1339415ms step_avg:4083.58ms +step:339/3200 train_loss:3.9257 train_time:1343629ms step_avg:4083.98ms +step:340/3200 train_loss:3.9948 train_time:1347818ms step_avg:4084.30ms +step:341/3200 train_loss:4.0647 train_time:1351942ms step_avg:4084.42ms +step:342/3200 train_loss:3.8372 train_time:1356034ms step_avg:4084.44ms +step:343/3200 train_loss:4.0600 train_time:1360109ms step_avg:4084.41ms +step:344/3200 train_loss:4.0796 train_time:1364160ms step_avg:4084.31ms +step:345/3200 train_loss:3.9585 train_time:1368198ms step_avg:4084.17ms +step:346/3200 train_loss:3.8649 train_time:1372222ms step_avg:4083.99ms +step:347/3200 train_loss:3.8687 train_time:1376250ms step_avg:4083.83ms +step:348/3200 train_loss:4.0147 train_time:1380281ms step_avg:4083.67ms +step:349/3200 train_loss:3.9308 train_time:1384307ms step_avg:4083.50ms +step:350/3200 train_loss:3.9712 train_time:1388328ms step_avg:4083.32ms +step:351/3200 train_loss:3.8117 train_time:1392347ms step_avg:4083.13ms +step:352/3200 train_loss:3.9933 train_time:1396362ms step_avg:4082.93ms +step:353/3200 train_loss:3.7327 train_time:1400376ms step_avg:4082.73ms +step:354/3200 train_loss:4.0591 train_time:1404390ms step_avg:4082.53ms +step:355/3200 train_loss:4.0855 train_time:1408405ms step_avg:4082.33ms +step:356/3200 train_loss:3.9374 train_time:1412419ms step_avg:4082.14ms +step:357/3200 train_loss:3.9934 train_time:1416428ms step_avg:4081.93ms +step:358/3200 train_loss:4.0119 train_time:1420436ms step_avg:4081.71ms +step:359/3200 train_loss:3.7770 train_time:1424450ms step_avg:4081.52ms +step:360/3200 train_loss:4.0251 train_time:1428465ms step_avg:4081.33ms +step:361/3200 train_loss:3.8950 train_time:1432482ms step_avg:4081.15ms +step:362/3200 train_loss:3.8057 train_time:1436501ms step_avg:4080.97ms +step:363/3200 train_loss:3.9170 train_time:1440519ms step_avg:4080.79ms +step:364/3200 train_loss:3.9634 train_time:1444547ms step_avg:4080.64ms +step:365/3200 train_loss:3.8849 train_time:1448576ms step_avg:4080.50ms +step:366/3200 train_loss:3.8126 train_time:1452607ms step_avg:4080.36ms +step:367/3200 train_loss:3.7970 train_time:1456647ms step_avg:4080.24ms +step:368/3200 train_loss:3.9807 train_time:1460683ms step_avg:4080.12ms +step:369/3200 train_loss:3.8492 train_time:1464701ms step_avg:4079.95ms +step:370/3200 train_loss:3.8545 train_time:1468730ms step_avg:4079.81ms +step:371/3200 train_loss:4.0574 train_time:1472767ms step_avg:4079.69ms +step:372/3200 train_loss:3.9308 train_time:1476814ms step_avg:4079.60ms +step:373/3200 train_loss:4.2503 train_time:1480872ms step_avg:4079.54ms +step:374/3200 train_loss:3.9660 train_time:1484953ms step_avg:4079.54ms +step:375/3200 train_loss:4.0932 train_time:1489038ms step_avg:4079.56ms +step:375/3200 val_loss:3.9125 train_time:1489039ms step_avg:4079.56ms +step:376/3200 train_loss:4.1962 train_time:1493165ms step_avg:4079.69ms +step:377/3200 train_loss:3.8236 train_time:1497248ms step_avg:4079.69ms +step:378/3200 train_loss:3.9721 train_time:1501306ms step_avg:4079.64ms +step:379/3200 train_loss:4.0371 train_time:1505342ms step_avg:4079.52ms +step:380/3200 train_loss:3.9652 train_time:1509368ms step_avg:4079.37ms +step:381/3200 train_loss:3.9102 train_time:1513389ms step_avg:4079.22ms +step:382/3200 train_loss:3.8293 train_time:1517427ms step_avg:4079.10ms +step:383/3200 train_loss:3.7984 train_time:1521455ms step_avg:4078.97ms +step:384/3200 train_loss:3.8774 train_time:1525481ms step_avg:4078.83ms +step:385/3200 train_loss:3.7149 train_time:1529505ms step_avg:4078.68ms +step:386/3200 train_loss:4.0216 train_time:1533521ms step_avg:4078.51ms +step:387/3200 train_loss:3.8151 train_time:1537537ms step_avg:4078.35ms +step:388/3200 train_loss:3.9272 train_time:1541549ms step_avg:4078.17ms +step:389/3200 train_loss:3.9820 train_time:1545559ms step_avg:4077.99ms +step:390/3200 train_loss:3.9651 train_time:1549572ms step_avg:4077.82ms +step:391/3200 train_loss:3.7413 train_time:1553579ms step_avg:4077.63ms +step:392/3200 train_loss:3.5766 train_time:1557592ms step_avg:4077.46ms +step:393/3200 train_loss:3.7633 train_time:1561607ms step_avg:4077.30ms +step:394/3200 train_loss:3.9939 train_time:1565624ms step_avg:4077.14ms +step:395/3200 train_loss:3.8569 train_time:1569646ms step_avg:4077.00ms +step:396/3200 train_loss:3.9063 train_time:1573667ms step_avg:4076.86ms +step:397/3200 train_loss:3.7287 train_time:1577687ms step_avg:4076.71ms +step:398/3200 train_loss:3.9131 train_time:1581716ms step_avg:4076.59ms +step:399/3200 train_loss:3.8602 train_time:1585745ms step_avg:4076.46ms +step:400/3200 train_loss:3.8285 train_time:1589775ms step_avg:4076.35ms +step:401/3200 train_loss:3.8196 train_time:1593815ms step_avg:4076.25ms +step:402/3200 train_loss:3.7863 train_time:1597841ms step_avg:4076.12ms +step:403/3200 train_loss:4.0336 train_time:1601871ms step_avg:4076.01ms +step:404/3200 train_loss:3.9603 train_time:1605908ms step_avg:4075.91ms +step:405/3200 train_loss:3.9584 train_time:1609953ms step_avg:4075.83ms +step:406/3200 train_loss:3.9216 train_time:1614009ms step_avg:4075.78ms +step:407/3200 train_loss:3.8668 train_time:1618085ms step_avg:4075.78ms +step:408/3200 train_loss:3.7894 train_time:1622169ms step_avg:4075.80ms +step:409/3200 train_loss:3.8059 train_time:1626272ms step_avg:4075.87ms +step:410/3200 train_loss:3.7293 train_time:1630404ms step_avg:4076.01ms +step:411/3200 train_loss:3.8788 train_time:1634548ms step_avg:4076.18ms +step:412/3200 train_loss:3.8859 train_time:1638734ms step_avg:4076.45ms +step:413/3200 train_loss:3.8207 train_time:1642955ms step_avg:4076.81ms +step:414/3200 train_loss:3.8204 train_time:1647150ms step_avg:4077.10ms +step:415/3200 train_loss:3.7135 train_time:1651304ms step_avg:4077.29ms +step:416/3200 train_loss:3.9187 train_time:1655393ms step_avg:4077.32ms +step:417/3200 train_loss:4.0730 train_time:1659454ms step_avg:4077.28ms +step:418/3200 train_loss:3.7833 train_time:1663491ms step_avg:4077.18ms +step:419/3200 train_loss:3.8995 train_time:1667520ms step_avg:4077.07ms +step:420/3200 train_loss:4.1327 train_time:1671546ms step_avg:4076.94ms +step:421/3200 train_loss:3.8429 train_time:1675564ms step_avg:4076.80ms +step:422/3200 train_loss:3.9999 train_time:1679575ms step_avg:4076.64ms +step:423/3200 train_loss:3.7467 train_time:1683580ms step_avg:4076.47ms +step:424/3200 train_loss:3.8069 train_time:1687574ms step_avg:4076.26ms +step:425/3200 train_loss:3.6669 train_time:1691586ms step_avg:4076.11ms +step:426/3200 train_loss:3.9785 train_time:1695591ms step_avg:4075.94ms +step:427/3200 train_loss:3.9154 train_time:1699597ms step_avg:4075.77ms +step:428/3200 train_loss:3.9724 train_time:1703598ms step_avg:4075.59ms +step:429/3200 train_loss:3.9769 train_time:1707595ms step_avg:4075.41ms +step:430/3200 train_loss:3.7270 train_time:1711592ms step_avg:4075.22ms +step:431/3200 train_loss:3.6413 train_time:1715587ms step_avg:4075.03ms +step:432/3200 train_loss:4.0417 train_time:1719580ms step_avg:4074.83ms +step:433/3200 train_loss:3.9529 train_time:1723571ms step_avg:4074.64ms +step:434/3200 train_loss:3.8786 train_time:1727561ms step_avg:4074.44ms +step:435/3200 train_loss:3.6607 train_time:1731554ms step_avg:4074.25ms +step:436/3200 train_loss:3.9665 train_time:1735547ms step_avg:4074.05ms +step:437/3200 train_loss:3.9347 train_time:1739545ms step_avg:4073.88ms +step:438/3200 train_loss:3.8714 train_time:1743541ms step_avg:4073.69ms +step:439/3200 train_loss:3.8788 train_time:1747541ms step_avg:4073.52ms +step:440/3200 train_loss:3.8081 train_time:1751547ms step_avg:4073.36ms +step:441/3200 train_loss:3.7865 train_time:1755548ms step_avg:4073.20ms +step:442/3200 train_loss:3.9446 train_time:1759540ms step_avg:4073.01ms +step:443/3200 train_loss:3.6723 train_time:1763528ms step_avg:4072.81ms +step:444/3200 train_loss:3.7727 train_time:1767519ms step_avg:4072.62ms +step:445/3200 train_loss:4.1408 train_time:1771516ms step_avg:4072.45ms +step:446/3200 train_loss:3.7704 train_time:1775517ms step_avg:4072.29ms +step:447/3200 train_loss:3.9947 train_time:1779523ms step_avg:4072.13ms +step:448/3200 train_loss:4.0433 train_time:1783534ms step_avg:4072.00ms +step:449/3200 train_loss:3.8696 train_time:1787553ms step_avg:4071.88ms +step:450/3200 train_loss:3.9218 train_time:1791580ms step_avg:4071.77ms +step:451/3200 train_loss:3.8574 train_time:1795605ms step_avg:4071.67ms +step:452/3200 train_loss:3.8964 train_time:1799627ms step_avg:4071.55ms +step:453/3200 train_loss:4.2493 train_time:1803658ms step_avg:4071.46ms +step:454/3200 train_loss:3.6807 train_time:1807689ms step_avg:4071.37ms +step:455/3200 train_loss:3.8284 train_time:1811744ms step_avg:4071.33ms +step:456/3200 train_loss:3.9488 train_time:1815809ms step_avg:4071.32ms +step:457/3200 train_loss:3.8242 train_time:1819890ms step_avg:4071.34ms +step:458/3200 train_loss:3.9107 train_time:1823986ms step_avg:4071.40ms +step:459/3200 train_loss:4.0022 train_time:1828104ms step_avg:4071.50ms +step:460/3200 train_loss:3.6353 train_time:1832244ms step_avg:4071.65ms +step:461/3200 train_loss:3.8253 train_time:1836432ms step_avg:4071.91ms +step:462/3200 train_loss:3.9134 train_time:1840636ms step_avg:4072.20ms +step:463/3200 train_loss:3.7126 train_time:1844861ms step_avg:4072.54ms +step:464/3200 train_loss:3.7908 train_time:1849089ms step_avg:4072.88ms +step:465/3200 train_loss:4.0368 train_time:1853288ms step_avg:4073.16ms +step:466/3200 train_loss:3.7665 train_time:1857484ms step_avg:4073.43ms +step:467/3200 train_loss:3.7184 train_time:1861681ms step_avg:4073.70ms +step:468/3200 train_loss:3.7055 train_time:1865820ms step_avg:4073.84ms +step:469/3200 train_loss:4.1121 train_time:1869941ms step_avg:4073.95ms +step:470/3200 train_loss:3.8932 train_time:1874066ms step_avg:4074.06ms +step:471/3200 train_loss:3.6497 train_time:1878190ms step_avg:4074.16ms +step:472/3200 train_loss:3.9589 train_time:1882306ms step_avg:4074.26ms +step:473/3200 train_loss:3.7350 train_time:1886421ms step_avg:4074.34ms +step:474/3200 train_loss:3.9479 train_time:1890533ms step_avg:4074.42ms +step:475/3200 train_loss:3.9628 train_time:1894647ms step_avg:4074.51ms +step:476/3200 train_loss:3.8444 train_time:1898767ms step_avg:4074.61ms +step:477/3200 train_loss:4.0580 train_time:1902904ms step_avg:4074.74ms +step:478/3200 train_loss:3.8946 train_time:1907044ms step_avg:4074.88ms +step:479/3200 train_loss:4.0820 train_time:1911204ms step_avg:4075.06ms +step:480/3200 train_loss:3.7414 train_time:1915384ms step_avg:4075.28ms +step:481/3200 train_loss:3.7692 train_time:1919564ms step_avg:4075.51ms +step:482/3200 train_loss:3.9487 train_time:1923756ms step_avg:4075.75ms +step:483/3200 train_loss:3.8781 train_time:1927942ms step_avg:4075.99ms +step:484/3200 train_loss:3.9329 train_time:1932139ms step_avg:4076.24ms +step:485/3200 train_loss:3.8529 train_time:1936314ms step_avg:4076.45ms +step:486/3200 train_loss:3.6453 train_time:1940452ms step_avg:4076.58ms +step:487/3200 train_loss:3.7865 train_time:1944568ms step_avg:4076.66ms +step:488/3200 train_loss:3.9007 train_time:1948669ms step_avg:4076.71ms +step:489/3200 train_loss:3.8836 train_time:1952754ms step_avg:4076.73ms +step:490/3200 train_loss:4.1618 train_time:1956832ms step_avg:4076.73ms +step:491/3200 train_loss:3.7181 train_time:1960905ms step_avg:4076.73ms +step:492/3200 train_loss:3.7238 train_time:1964969ms step_avg:4076.70ms +step:493/3200 train_loss:3.8603 train_time:1969033ms step_avg:4076.67ms +step:494/3200 train_loss:3.6279 train_time:1973100ms step_avg:4076.65ms +step:495/3200 train_loss:3.9490 train_time:1977158ms step_avg:4076.62ms +step:496/3200 train_loss:4.0921 train_time:1981219ms step_avg:4076.58ms +step:497/3200 train_loss:3.8566 train_time:1985279ms step_avg:4076.55ms +step:498/3200 train_loss:3.8116 train_time:1989345ms step_avg:4076.53ms +step:499/3200 train_loss:3.8227 train_time:1993415ms step_avg:4076.51ms +step:500/3200 train_loss:3.6910 train_time:1997488ms step_avg:4076.51ms +step:500/3200 val_loss:3.8130 train_time:1997488ms step_avg:4076.51ms +step:501/3200 train_loss:3.7603 train_time:2001553ms step_avg:4076.48ms +step:502/3200 train_loss:3.7526 train_time:2005610ms step_avg:4076.44ms +step:503/3200 train_loss:3.9485 train_time:2009668ms step_avg:4076.41ms +step:504/3200 train_loss:3.6912 train_time:2013725ms step_avg:4076.37ms +step:505/3200 train_loss:4.0804 train_time:2017788ms step_avg:4076.34ms +step:506/3200 train_loss:3.7774 train_time:2021851ms step_avg:4076.31ms +step:507/3200 train_loss:4.0306 train_time:2025919ms step_avg:4076.30ms +step:508/3200 train_loss:4.1475 train_time:2029994ms step_avg:4076.29ms +step:509/3200 train_loss:3.6497 train_time:2034082ms step_avg:4076.32ms +step:510/3200 train_loss:3.9593 train_time:2038169ms step_avg:4076.34ms +step:511/3200 train_loss:3.7679 train_time:2042267ms step_avg:4076.38ms +step:512/3200 train_loss:3.7783 train_time:2046380ms step_avg:4076.45ms +step:513/3200 train_loss:3.5584 train_time:2050504ms step_avg:4076.55ms +step:514/3200 train_loss:3.7468 train_time:2054647ms step_avg:4076.68ms +step:515/3200 train_loss:4.3355 train_time:2058823ms step_avg:4076.88ms +step:516/3200 train_loss:3.9006 train_time:2063005ms step_avg:4077.08ms +step:517/3200 train_loss:3.8148 train_time:2067204ms step_avg:4077.33ms +step:518/3200 train_loss:3.7867 train_time:2071404ms step_avg:4077.57ms +step:519/3200 train_loss:3.7093 train_time:2075600ms step_avg:4077.80ms +step:520/3200 train_loss:4.1163 train_time:2079776ms step_avg:4077.99ms +step:521/3200 train_loss:3.7919 train_time:2083953ms step_avg:4078.19ms +step:522/3200 train_loss:3.6749 train_time:2088098ms step_avg:4078.32ms +step:523/3200 train_loss:4.0267 train_time:2092234ms step_avg:4078.43ms +step:524/3200 train_loss:3.6350 train_time:2096370ms step_avg:4078.54ms +step:525/3200 train_loss:3.7325 train_time:2100491ms step_avg:4078.62ms +step:526/3200 train_loss:3.9443 train_time:2104610ms step_avg:4078.70ms +step:527/3200 train_loss:3.8942 train_time:2108730ms step_avg:4078.78ms +step:528/3200 train_loss:3.7760 train_time:2112858ms step_avg:4078.88ms +step:529/3200 train_loss:3.8769 train_time:2116983ms step_avg:4078.97ms +step:530/3200 train_loss:3.7776 train_time:2121121ms step_avg:4079.08ms +step:531/3200 train_loss:3.8195 train_time:2125263ms step_avg:4079.20ms +step:532/3200 train_loss:3.8464 train_time:2129413ms step_avg:4079.34ms +step:533/3200 train_loss:3.8471 train_time:2133599ms step_avg:4079.54ms +step:534/3200 train_loss:3.8371 train_time:2137782ms step_avg:4079.74ms +step:535/3200 train_loss:3.8241 train_time:2141982ms step_avg:4079.97ms +step:536/3200 train_loss:3.8009 train_time:2146175ms step_avg:4080.18ms +step:537/3200 train_loss:3.9318 train_time:2150297ms step_avg:4080.26ms +step:538/3200 train_loss:3.7124 train_time:2154396ms step_avg:4080.29ms +step:539/3200 train_loss:3.8581 train_time:2158473ms step_avg:4080.29ms +step:540/3200 train_loss:3.8397 train_time:2162532ms step_avg:4080.25ms +step:541/3200 train_loss:3.7765 train_time:2166572ms step_avg:4080.17ms +step:542/3200 train_loss:3.8848 train_time:2170606ms step_avg:4080.09ms +step:543/3200 train_loss:3.8305 train_time:2174633ms step_avg:4079.99ms +step:544/3200 train_loss:3.7830 train_time:2178654ms step_avg:4079.88ms +step:545/3200 train_loss:3.8555 train_time:2182670ms step_avg:4079.76ms +step:546/3200 train_loss:3.9582 train_time:2186680ms step_avg:4079.63ms +step:547/3200 train_loss:3.7848 train_time:2190685ms step_avg:4079.49ms +step:548/3200 train_loss:4.1319 train_time:2194691ms step_avg:4079.35ms +step:549/3200 train_loss:3.4314 train_time:2198695ms step_avg:4079.21ms +step:550/3200 train_loss:3.9066 train_time:2202696ms step_avg:4079.07ms +step:551/3200 train_loss:3.9062 train_time:2206697ms step_avg:4078.92ms +step:552/3200 train_loss:3.6991 train_time:2210695ms step_avg:4078.77ms +step:553/3200 train_loss:3.9045 train_time:2214701ms step_avg:4078.64ms +step:554/3200 train_loss:3.7325 train_time:2218709ms step_avg:4078.51ms +step:555/3200 train_loss:3.7790 train_time:2222720ms step_avg:4078.38ms +step:556/3200 train_loss:4.0916 train_time:2226734ms step_avg:4078.27ms +step:557/3200 train_loss:3.7751 train_time:2230751ms step_avg:4078.16ms +step:558/3200 train_loss:3.7495 train_time:2234767ms step_avg:4078.04ms +step:559/3200 train_loss:3.7444 train_time:2238794ms step_avg:4077.95ms +step:560/3200 train_loss:3.7439 train_time:2242825ms step_avg:4077.86ms +step:561/3200 train_loss:3.7391 train_time:2246865ms step_avg:4077.79ms +step:562/3200 train_loss:3.7029 train_time:2250916ms step_avg:4077.75ms +step:563/3200 train_loss:3.5403 train_time:2254975ms step_avg:4077.71ms +step:564/3200 train_loss:3.9269 train_time:2259049ms step_avg:4077.71ms +step:565/3200 train_loss:3.6902 train_time:2263136ms step_avg:4077.72ms +step:566/3200 train_loss:3.8257 train_time:2267231ms step_avg:4077.75ms +step:567/3200 train_loss:4.0711 train_time:2271354ms step_avg:4077.84ms +step:568/3200 train_loss:3.7835 train_time:2275497ms step_avg:4077.95ms +step:569/3200 train_loss:4.5050 train_time:2279653ms step_avg:4078.09ms +step:570/3200 train_loss:3.7876 train_time:2283835ms step_avg:4078.28ms +step:571/3200 train_loss:3.7644 train_time:2288035ms step_avg:4078.49ms +step:572/3200 train_loss:3.7260 train_time:2292233ms step_avg:4078.71ms +step:573/3200 train_loss:3.8263 train_time:2296549ms step_avg:4079.13ms +step:574/3200 train_loss:3.7327 train_time:2300726ms step_avg:4079.30ms +step:575/3200 train_loss:3.6171 train_time:2304876ms step_avg:4079.43ms +step:576/3200 train_loss:3.6488 train_time:2309008ms step_avg:4079.52ms +step:577/3200 train_loss:3.7574 train_time:2313127ms step_avg:4079.59ms +step:578/3200 train_loss:3.7844 train_time:2317237ms step_avg:4079.64ms +step:579/3200 train_loss:3.6824 train_time:2321341ms step_avg:4079.69ms +step:580/3200 train_loss:3.9346 train_time:2325448ms step_avg:4079.73ms +step:581/3200 train_loss:3.7952 train_time:2329549ms step_avg:4079.77ms +step:582/3200 train_loss:3.7182 train_time:2333652ms step_avg:4079.81ms +step:583/3200 train_loss:3.9549 train_time:2337756ms step_avg:4079.85ms +step:584/3200 train_loss:3.6274 train_time:2341863ms step_avg:4079.90ms +step:585/3200 train_loss:3.8626 train_time:2345977ms step_avg:4079.96ms +step:586/3200 train_loss:3.6011 train_time:2350100ms step_avg:4080.04ms +step:587/3200 train_loss:3.5959 train_time:2354229ms step_avg:4080.12ms +step:588/3200 train_loss:3.6734 train_time:2358373ms step_avg:4080.23ms +step:589/3200 train_loss:3.7814 train_time:2362526ms step_avg:4080.36ms +step:590/3200 train_loss:3.8467 train_time:2366706ms step_avg:4080.53ms +step:591/3200 train_loss:3.8538 train_time:2370887ms step_avg:4080.70ms +step:592/3200 train_loss:3.7125 train_time:2375088ms step_avg:4080.91ms +step:593/3200 train_loss:3.9345 train_time:2379284ms step_avg:4081.11ms +step:594/3200 train_loss:3.7495 train_time:2383460ms step_avg:4081.27ms +step:595/3200 train_loss:3.8319 train_time:2387607ms step_avg:4081.38ms +step:596/3200 train_loss:4.4130 train_time:2391734ms step_avg:4081.46ms +step:597/3200 train_loss:3.5165 train_time:2395839ms step_avg:4081.50ms +step:598/3200 train_loss:3.7387 train_time:2399931ms step_avg:4081.52ms +step:599/3200 train_loss:3.8324 train_time:2404016ms step_avg:4081.52ms +step:600/3200 train_loss:3.7288 train_time:2408094ms step_avg:4081.52ms +step:601/3200 train_loss:3.6419 train_time:2412166ms step_avg:4081.50ms +step:602/3200 train_loss:3.7258 train_time:2416232ms step_avg:4081.47ms +step:603/3200 train_loss:3.8446 train_time:2420300ms step_avg:4081.45ms +step:604/3200 train_loss:3.7053 train_time:2424365ms step_avg:4081.42ms +step:605/3200 train_loss:4.1835 train_time:2428426ms step_avg:4081.39ms +step:606/3200 train_loss:3.7315 train_time:2432492ms step_avg:4081.36ms +step:607/3200 train_loss:3.6782 train_time:2436561ms step_avg:4081.34ms +step:608/3200 train_loss:3.8263 train_time:2440628ms step_avg:4081.32ms +step:609/3200 train_loss:3.6446 train_time:2444708ms step_avg:4081.32ms +step:610/3200 train_loss:3.6194 train_time:2448794ms step_avg:4081.32ms +step:611/3200 train_loss:3.8483 train_time:2452882ms step_avg:4081.33ms +step:612/3200 train_loss:3.7671 train_time:2456987ms step_avg:4081.37ms +step:613/3200 train_loss:3.5444 train_time:2461098ms step_avg:4081.42ms +step:614/3200 train_loss:3.7733 train_time:2465233ms step_avg:4081.51ms +step:615/3200 train_loss:3.6388 train_time:2469385ms step_avg:4081.63ms +step:616/3200 train_loss:4.0457 train_time:2473552ms step_avg:4081.77ms +step:617/3200 train_loss:3.5856 train_time:2477733ms step_avg:4081.93ms +step:618/3200 train_loss:3.6583 train_time:2481910ms step_avg:4082.09ms +step:619/3200 train_loss:3.7739 train_time:2486059ms step_avg:4082.20ms +step:620/3200 train_loss:3.6659 train_time:2490166ms step_avg:4082.24ms +step:621/3200 train_loss:3.6970 train_time:2494248ms step_avg:4082.24ms +step:622/3200 train_loss:3.7681 train_time:2498321ms step_avg:4082.22ms +step:623/3200 train_loss:3.5850 train_time:2502377ms step_avg:4082.18ms +step:624/3200 train_loss:3.7933 train_time:2506428ms step_avg:4082.13ms +step:625/3200 train_loss:3.3688 train_time:2510461ms step_avg:4082.05ms +step:625/3200 val_loss:3.7382 train_time:2510461ms step_avg:4082.05ms +step:626/3200 train_loss:3.7246 train_time:2514479ms step_avg:4081.95ms +step:627/3200 train_loss:3.8712 train_time:2518496ms step_avg:4081.84ms +step:628/3200 train_loss:3.9221 train_time:2522514ms step_avg:4081.74ms +step:629/3200 train_loss:3.6581 train_time:2526534ms step_avg:4081.64ms +step:630/3200 train_loss:3.6249 train_time:2530560ms step_avg:4081.55ms +step:631/3200 train_loss:3.7852 train_time:2534580ms step_avg:4081.45ms +step:632/3200 train_loss:3.7886 train_time:2538611ms step_avg:4081.37ms +step:633/3200 train_loss:3.7565 train_time:2542640ms step_avg:4081.28ms +step:634/3200 train_loss:3.7917 train_time:2546683ms step_avg:4081.22ms +step:635/3200 train_loss:3.7813 train_time:2550732ms step_avg:4081.17ms +step:636/3200 train_loss:3.2973 train_time:2554793ms step_avg:4081.14ms +step:637/3200 train_loss:3.7905 train_time:2558869ms step_avg:4081.13ms +step:638/3200 train_loss:3.7854 train_time:2562950ms step_avg:4081.13ms +step:639/3200 train_loss:3.6447 train_time:2567048ms step_avg:4081.16ms +step:640/3200 train_loss:3.9010 train_time:2571163ms step_avg:4081.21ms +step:641/3200 train_loss:3.8775 train_time:2575266ms step_avg:4081.25ms +step:642/3200 train_loss:3.7270 train_time:2579405ms step_avg:4081.34ms +step:643/3200 train_loss:3.8186 train_time:2583586ms step_avg:4081.49ms +step:644/3200 train_loss:3.7589 train_time:2587767ms step_avg:4081.65ms +step:645/3200 train_loss:4.0638 train_time:2591961ms step_avg:4081.83ms +step:646/3200 train_loss:3.8871 train_time:2596090ms step_avg:4081.90ms +step:647/3200 train_loss:3.7905 train_time:2600174ms step_avg:4081.91ms +step:648/3200 train_loss:3.7225 train_time:2604229ms step_avg:4081.86ms +step:649/3200 train_loss:4.1011 train_time:2608261ms step_avg:4081.79ms +step:650/3200 train_loss:3.7121 train_time:2612276ms step_avg:4081.68ms +step:651/3200 train_loss:3.7917 train_time:2616277ms step_avg:4081.56ms +step:652/3200 train_loss:3.8538 train_time:2620300ms step_avg:4081.46ms +step:653/3200 train_loss:3.6834 train_time:2624308ms step_avg:4081.35ms +step:654/3200 train_loss:3.6746 train_time:2628301ms step_avg:4081.21ms +step:655/3200 train_loss:3.6487 train_time:2632294ms step_avg:4081.08ms +step:656/3200 train_loss:3.8284 train_time:2636283ms step_avg:4080.93ms +step:657/3200 train_loss:3.8116 train_time:2640266ms step_avg:4080.78ms +step:658/3200 train_loss:3.7514 train_time:2644245ms step_avg:4080.62ms +step:659/3200 train_loss:3.9592 train_time:2648221ms step_avg:4080.46ms +step:660/3200 train_loss:3.8091 train_time:2652200ms step_avg:4080.31ms +step:661/3200 train_loss:3.7630 train_time:2656175ms step_avg:4080.15ms +step:662/3200 train_loss:3.7367 train_time:2660147ms step_avg:4079.98ms +step:663/3200 train_loss:3.8553 train_time:2664122ms step_avg:4079.82ms +step:664/3200 train_loss:3.6208 train_time:2668098ms step_avg:4079.66ms +step:665/3200 train_loss:3.6346 train_time:2672073ms step_avg:4079.50ms +step:666/3200 train_loss:3.7809 train_time:2676052ms step_avg:4079.35ms +step:667/3200 train_loss:3.6623 train_time:2680026ms step_avg:4079.19ms +step:668/3200 train_loss:3.7431 train_time:2684004ms step_avg:4079.03ms +step:669/3200 train_loss:3.7613 train_time:2687982ms step_avg:4078.88ms +step:670/3200 train_loss:3.7352 train_time:2691962ms step_avg:4078.73ms +step:671/3200 train_loss:3.8071 train_time:2695946ms step_avg:4078.59ms +step:672/3200 train_loss:3.8818 train_time:2699930ms step_avg:4078.44ms +step:673/3200 train_loss:3.8780 train_time:2703924ms step_avg:4078.32ms +step:674/3200 train_loss:3.6687 train_time:2707915ms step_avg:4078.19ms +step:675/3200 train_loss:3.7702 train_time:2711914ms step_avg:4078.07ms +step:676/3200 train_loss:3.5586 train_time:2715913ms step_avg:4077.95ms +step:677/3200 train_loss:3.7912 train_time:2719919ms step_avg:4077.84ms +step:678/3200 train_loss:3.8087 train_time:2723930ms step_avg:4077.74ms +step:679/3200 train_loss:3.9421 train_time:2727949ms step_avg:4077.65ms +step:680/3200 train_loss:3.7436 train_time:2731947ms step_avg:4077.53ms +step:681/3200 train_loss:3.5907 train_time:2735944ms step_avg:4077.41ms +step:682/3200 train_loss:3.6008 train_time:2739955ms step_avg:4077.31ms +step:683/3200 train_loss:3.7651 train_time:2743974ms step_avg:4077.23ms +step:684/3200 train_loss:3.7366 train_time:2748005ms step_avg:4077.16ms +step:685/3200 train_loss:3.6148 train_time:2752038ms step_avg:4077.09ms +step:686/3200 train_loss:3.8123 train_time:2756092ms step_avg:4077.06ms +step:687/3200 train_loss:3.6293 train_time:2760158ms step_avg:4077.04ms +step:688/3200 train_loss:3.8758 train_time:2764238ms step_avg:4077.05ms +step:689/3200 train_loss:3.6358 train_time:2768338ms step_avg:4077.08ms +step:690/3200 train_loss:3.8163 train_time:2772463ms step_avg:4077.15ms +step:691/3200 train_loss:3.7325 train_time:2776607ms step_avg:4077.25ms +step:692/3200 train_loss:3.6129 train_time:2780784ms step_avg:4077.40ms +step:693/3200 train_loss:3.6592 train_time:2784965ms step_avg:4077.55ms +step:694/3200 train_loss:3.6729 train_time:2789158ms step_avg:4077.72ms +step:695/3200 train_loss:3.7895 train_time:2793286ms step_avg:4077.79ms +step:696/3200 train_loss:3.6848 train_time:2797366ms step_avg:4077.79ms +step:697/3200 train_loss:3.6745 train_time:2801417ms step_avg:4077.75ms +step:698/3200 train_loss:3.8854 train_time:2805441ms step_avg:4077.68ms +step:699/3200 train_loss:3.6708 train_time:2809452ms step_avg:4077.58ms +step:700/3200 train_loss:3.7206 train_time:2813446ms step_avg:4077.46ms +step:701/3200 train_loss:3.9868 train_time:2817457ms step_avg:4077.36ms +step:702/3200 train_loss:3.6733 train_time:2821461ms step_avg:4077.26ms +step:703/3200 train_loss:3.7543 train_time:2825456ms step_avg:4077.14ms +step:704/3200 train_loss:3.8161 train_time:2829448ms step_avg:4077.01ms +step:705/3200 train_loss:3.8028 train_time:2833432ms step_avg:4076.88ms +step:706/3200 train_loss:3.7506 train_time:2837415ms step_avg:4076.75ms +step:707/3200 train_loss:3.6362 train_time:2841395ms step_avg:4076.61ms +step:708/3200 train_loss:3.6415 train_time:2845373ms step_avg:4076.47ms +step:709/3200 train_loss:4.1922 train_time:2849347ms step_avg:4076.32ms +step:710/3200 train_loss:3.7702 train_time:2853320ms step_avg:4076.17ms +step:711/3200 train_loss:3.8100 train_time:2857295ms step_avg:4076.03ms +step:712/3200 train_loss:3.8916 train_time:2861272ms step_avg:4075.89ms +step:713/3200 train_loss:3.7454 train_time:2865242ms step_avg:4075.74ms +step:714/3200 train_loss:3.7420 train_time:2869219ms step_avg:4075.59ms +step:715/3200 train_loss:4.0057 train_time:2873192ms step_avg:4075.45ms +step:716/3200 train_loss:3.8395 train_time:2877165ms step_avg:4075.30ms +step:717/3200 train_loss:4.0350 train_time:2881143ms step_avg:4075.17ms +step:718/3200 train_loss:3.7551 train_time:2885122ms step_avg:4075.03ms +step:719/3200 train_loss:3.7738 train_time:2889098ms step_avg:4074.89ms +step:720/3200 train_loss:3.6323 train_time:2893078ms step_avg:4074.76ms +step:721/3200 train_loss:3.6793 train_time:2897060ms step_avg:4074.63ms +step:722/3200 train_loss:3.6103 train_time:2901045ms step_avg:4074.50ms +step:723/3200 train_loss:3.8921 train_time:2905037ms step_avg:4074.39ms +step:724/3200 train_loss:3.7629 train_time:2909034ms step_avg:4074.28ms +step:725/3200 train_loss:3.5140 train_time:2913036ms step_avg:4074.18ms +step:726/3200 train_loss:3.6419 train_time:2917037ms step_avg:4074.07ms +step:727/3200 train_loss:3.5200 train_time:2921048ms step_avg:4073.99ms +step:728/3200 train_loss:3.6644 train_time:2925061ms step_avg:4073.90ms +step:729/3200 train_loss:3.6410 train_time:2929057ms step_avg:4073.79ms +step:730/3200 train_loss:3.5909 train_time:2933060ms step_avg:4073.69ms +step:731/3200 train_loss:3.7216 train_time:2937073ms step_avg:4073.61ms +step:732/3200 train_loss:3.6157 train_time:2941091ms step_avg:4073.53ms +step:733/3200 train_loss:3.7641 train_time:2945120ms step_avg:4073.47ms +step:734/3200 train_loss:3.7483 train_time:2949154ms step_avg:4073.42ms +step:735/3200 train_loss:3.7575 train_time:2953208ms step_avg:4073.39ms +step:736/3200 train_loss:3.7004 train_time:2957274ms step_avg:4073.38ms +step:737/3200 train_loss:3.7204 train_time:2961363ms step_avg:4073.40ms +step:738/3200 train_loss:3.5952 train_time:2965463ms step_avg:4073.44ms +step:739/3200 train_loss:3.6396 train_time:2969597ms step_avg:4073.52ms +step:740/3200 train_loss:3.6480 train_time:2973762ms step_avg:4073.65ms +step:741/3200 train_loss:3.7094 train_time:2977966ms step_avg:4073.83ms +step:742/3200 train_loss:3.7221 train_time:2982182ms step_avg:4074.02ms +step:743/3200 train_loss:3.7536 train_time:2986384ms step_avg:4074.19ms +step:744/3200 train_loss:3.6310 train_time:2990579ms step_avg:4074.36ms +step:745/3200 train_loss:3.7031 train_time:2994730ms step_avg:4074.46ms +step:746/3200 train_loss:3.5315 train_time:2998825ms step_avg:4074.49ms +step:747/3200 train_loss:3.6461 train_time:3002899ms step_avg:4074.49ms +step:748/3200 train_loss:3.6444 train_time:3006952ms step_avg:4074.46ms +step:749/3200 train_loss:3.6904 train_time:3010988ms step_avg:4074.41ms +step:750/3200 train_loss:3.6537 train_time:3015010ms step_avg:4074.34ms +step:750/3200 val_loss:3.6825 train_time:3015010ms step_avg:4074.34ms +step:751/3200 train_loss:3.7307 train_time:3019001ms step_avg:4074.23ms +step:752/3200 train_loss:3.7611 train_time:3022986ms step_avg:4074.10ms +step:753/3200 train_loss:3.7300 train_time:3026971ms step_avg:4073.98ms +step:754/3200 train_loss:3.7887 train_time:3030970ms step_avg:4073.88ms +step:755/3200 train_loss:3.8336 train_time:3034984ms step_avg:4073.80ms +step:756/3200 train_loss:3.5575 train_time:3038999ms step_avg:4073.73ms +step:757/3200 train_loss:3.6716 train_time:3043015ms step_avg:4073.65ms +step:758/3200 train_loss:3.8369 train_time:3047037ms step_avg:4073.58ms +step:759/3200 train_loss:3.4428 train_time:3051064ms step_avg:4073.52ms +step:760/3200 train_loss:3.7298 train_time:3055073ms step_avg:4073.43ms +step:761/3200 train_loss:3.7432 train_time:3059073ms step_avg:4073.33ms +step:762/3200 train_loss:3.7086 train_time:3063085ms step_avg:4073.25ms +step:763/3200 train_loss:3.6171 train_time:3067100ms step_avg:4073.17ms +step:764/3200 train_loss:3.6196 train_time:3071124ms step_avg:4073.11ms +step:765/3200 train_loss:3.6525 train_time:3075155ms step_avg:4073.05ms +step:766/3200 train_loss:3.7459 train_time:3079194ms step_avg:4073.01ms +step:767/3200 train_loss:3.9041 train_time:3083241ms step_avg:4072.97ms +step:768/3200 train_loss:4.8675 train_time:3087295ms step_avg:4072.95ms +step:769/3200 train_loss:3.9128 train_time:3091364ms step_avg:4072.94ms +step:770/3200 train_loss:3.7256 train_time:3095447ms step_avg:4072.96ms +step:771/3200 train_loss:3.6486 train_time:3099551ms step_avg:4073.00ms +step:772/3200 train_loss:3.6059 train_time:3103677ms step_avg:4073.07ms +step:773/3200 train_loss:3.5574 train_time:3107816ms step_avg:4073.15ms +step:774/3200 train_loss:3.4946 train_time:3111998ms step_avg:4073.30ms +step:775/3200 train_loss:3.7627 train_time:3116181ms step_avg:4073.44ms +step:776/3200 train_loss:3.6398 train_time:3120379ms step_avg:4073.60ms +step:777/3200 train_loss:3.8875 train_time:3124574ms step_avg:4073.76ms +step:778/3200 train_loss:3.7652 train_time:3128734ms step_avg:4073.87ms +step:779/3200 train_loss:3.6074 train_time:3132846ms step_avg:4073.92ms +step:780/3200 train_loss:3.7236 train_time:3136925ms step_avg:4073.93ms +step:781/3200 train_loss:3.7255 train_time:3140979ms step_avg:4073.90ms +step:782/3200 train_loss:3.7569 train_time:3145013ms step_avg:4073.85ms +step:783/3200 train_loss:3.7107 train_time:3149036ms step_avg:4073.79ms +step:784/3200 train_loss:3.6888 train_time:3153045ms step_avg:4073.70ms +step:785/3200 train_loss:3.6705 train_time:3157043ms step_avg:4073.60ms +step:786/3200 train_loss:3.7153 train_time:3161035ms step_avg:4073.50ms +step:787/3200 train_loss:3.5648 train_time:3165019ms step_avg:4073.38ms +step:788/3200 train_loss:3.6093 train_time:3169024ms step_avg:4073.30ms +step:789/3200 train_loss:3.5688 train_time:3173027ms step_avg:4073.21ms +step:790/3200 train_loss:3.6945 train_time:3177026ms step_avg:4073.11ms +step:791/3200 train_loss:3.8745 train_time:3181024ms step_avg:4073.01ms +step:792/3200 train_loss:3.6871 train_time:3185015ms step_avg:4072.91ms +step:793/3200 train_loss:3.9634 train_time:3189007ms step_avg:4072.81ms +step:794/3200 train_loss:3.7142 train_time:3192998ms step_avg:4072.70ms +step:795/3200 train_loss:3.9059 train_time:3196991ms step_avg:4072.60ms +step:796/3200 train_loss:3.7772 train_time:3200986ms step_avg:4072.50ms +step:797/3200 train_loss:3.5733 train_time:3204979ms step_avg:4072.40ms +step:798/3200 train_loss:3.6698 train_time:3208975ms step_avg:4072.30ms +step:799/3200 train_loss:3.9118 train_time:3212973ms step_avg:4072.21ms +step:800/3200 train_loss:3.6919 train_time:3216971ms step_avg:4072.12ms +step:801/3200 train_loss:3.7868 train_time:3220974ms step_avg:4072.03ms +step:802/3200 train_loss:3.6332 train_time:3224980ms step_avg:4071.94ms +step:803/3200 train_loss:3.7987 train_time:3228993ms step_avg:4071.87ms +step:804/3200 train_loss:3.6220 train_time:3232989ms step_avg:4071.77ms +step:805/3200 train_loss:3.6743 train_time:3236980ms step_avg:4071.67ms +step:806/3200 train_loss:3.7742 train_time:3240976ms step_avg:4071.58ms +step:807/3200 train_loss:3.9590 train_time:3244977ms step_avg:4071.49ms +step:808/3200 train_loss:3.6815 train_time:3248985ms step_avg:4071.41ms +step:809/3200 train_loss:3.6671 train_time:3253001ms step_avg:4071.34ms +step:810/3200 train_loss:3.6774 train_time:3257026ms step_avg:4071.28ms +step:811/3200 train_loss:3.5694 train_time:3261057ms step_avg:4071.23ms +step:812/3200 train_loss:3.6098 train_time:3265098ms step_avg:4071.19ms +step:813/3200 train_loss:3.6175 train_time:3269149ms step_avg:4071.17ms +step:814/3200 train_loss:3.7067 train_time:3273211ms step_avg:4071.16ms +step:815/3200 train_loss:3.7299 train_time:3277287ms step_avg:4071.16ms +step:816/3200 train_loss:3.5587 train_time:3281392ms step_avg:4071.21ms +step:817/3200 train_loss:3.6519 train_time:3285515ms step_avg:4071.27ms +step:818/3200 train_loss:3.7034 train_time:3289666ms step_avg:4071.37ms +step:819/3200 train_loss:3.6482 train_time:3293840ms step_avg:4071.50ms +step:820/3200 train_loss:3.5894 train_time:3298022ms step_avg:4071.63ms +step:821/3200 train_loss:3.5464 train_time:3302220ms step_avg:4071.79ms +step:822/3200 train_loss:3.8758 train_time:3306415ms step_avg:4071.94ms +step:823/3200 train_loss:3.6109 train_time:3310572ms step_avg:4072.04ms +step:824/3200 train_loss:3.6657 train_time:3314682ms step_avg:4072.09ms +step:825/3200 train_loss:3.6087 train_time:3318763ms step_avg:4072.10ms +step:826/3200 train_loss:3.6610 train_time:3322821ms step_avg:4072.08ms +step:827/3200 train_loss:3.6833 train_time:3326864ms step_avg:4072.05ms +step:828/3200 train_loss:3.6456 train_time:3330884ms step_avg:4071.99ms +step:829/3200 train_loss:3.7802 train_time:3334897ms step_avg:4071.91ms +step:830/3200 train_loss:3.6901 train_time:3338899ms step_avg:4071.83ms +step:831/3200 train_loss:3.5344 train_time:3342895ms step_avg:4071.74ms +step:832/3200 train_loss:4.0964 train_time:3346882ms step_avg:4071.63ms +step:833/3200 train_loss:3.5549 train_time:3350870ms step_avg:4071.53ms +step:834/3200 train_loss:3.7640 train_time:3354852ms step_avg:4071.42ms +step:835/3200 train_loss:3.6765 train_time:3358835ms step_avg:4071.31ms +step:836/3200 train_loss:3.6634 train_time:3362824ms step_avg:4071.22ms +step:837/3200 train_loss:3.7799 train_time:3366833ms step_avg:4071.14ms +step:838/3200 train_loss:3.7056 train_time:3370834ms step_avg:4071.06ms +step:839/3200 train_loss:3.6767 train_time:3374835ms step_avg:4070.97ms +step:840/3200 train_loss:3.8318 train_time:3378837ms step_avg:4070.89ms +step:841/3200 train_loss:3.6672 train_time:3382838ms step_avg:4070.80ms +step:842/3200 train_loss:3.5343 train_time:3386846ms step_avg:4070.73ms +step:843/3200 train_loss:3.7606 train_time:3390848ms step_avg:4070.65ms +step:844/3200 train_loss:3.7202 train_time:3394859ms step_avg:4070.57ms +step:845/3200 train_loss:3.6608 train_time:3398877ms step_avg:4070.51ms +step:846/3200 train_loss:3.6254 train_time:3402865ms step_avg:4070.41ms +step:847/3200 train_loss:3.5533 train_time:3406859ms step_avg:4070.32ms +step:848/3200 train_loss:3.7805 train_time:3410857ms step_avg:4070.23ms +step:849/3200 train_loss:3.6887 train_time:3414853ms step_avg:4070.15ms +step:850/3200 train_loss:3.7697 train_time:3418860ms step_avg:4070.07ms +step:851/3200 train_loss:3.6601 train_time:3422874ms step_avg:4070.01ms +step:852/3200 train_loss:3.9548 train_time:3426895ms step_avg:4069.95ms +step:853/3200 train_loss:3.6854 train_time:3430925ms step_avg:4069.90ms +step:854/3200 train_loss:3.7461 train_time:3434967ms step_avg:4069.87ms +step:855/3200 train_loss:3.5342 train_time:3439018ms step_avg:4069.84ms +step:856/3200 train_loss:3.5733 train_time:3443084ms step_avg:4069.84ms +step:857/3200 train_loss:3.6734 train_time:3447153ms step_avg:4069.84ms +step:858/3200 train_loss:3.7162 train_time:3451244ms step_avg:4069.86ms +step:859/3200 train_loss:3.6412 train_time:3455350ms step_avg:4069.91ms +step:860/3200 train_loss:3.7285 train_time:3459485ms step_avg:4069.98ms +step:861/3200 train_loss:3.5441 train_time:3463645ms step_avg:4070.09ms +step:862/3200 train_loss:3.7085 train_time:3467826ms step_avg:4070.22ms +step:863/3200 train_loss:3.6038 train_time:3472034ms step_avg:4070.38ms +step:864/3200 train_loss:3.9166 train_time:3476250ms step_avg:4070.55ms +step:865/3200 train_loss:3.7401 train_time:3480445ms step_avg:4070.70ms +step:866/3200 train_loss:3.5873 train_time:3484617ms step_avg:4070.81ms +step:867/3200 train_loss:3.6272 train_time:3488730ms step_avg:4070.86ms +step:868/3200 train_loss:3.5927 train_time:3492800ms step_avg:4070.86ms +step:869/3200 train_loss:3.7753 train_time:3496847ms step_avg:4070.83ms +step:870/3200 train_loss:3.6200 train_time:3500870ms step_avg:4070.78ms +step:871/3200 train_loss:3.6201 train_time:3504877ms step_avg:4070.71ms +step:872/3200 train_loss:3.6109 train_time:3508874ms step_avg:4070.62ms +step:873/3200 train_loss:3.8210 train_time:3512863ms step_avg:4070.53ms +step:874/3200 train_loss:3.6671 train_time:3516841ms step_avg:4070.42ms +step:875/3200 train_loss:3.7532 train_time:3520825ms step_avg:4070.32ms +step:875/3200 val_loss:3.6385 train_time:3520825ms step_avg:4070.32ms +step:876/3200 train_loss:3.6981 train_time:3524805ms step_avg:4070.21ms +step:877/3200 train_loss:3.6224 train_time:3528780ms step_avg:4070.10ms +step:878/3200 train_loss:3.5923 train_time:3532753ms step_avg:4069.99ms +step:879/3200 train_loss:3.5658 train_time:3536727ms step_avg:4069.88ms +step:880/3200 train_loss:3.5117 train_time:3540703ms step_avg:4069.77ms +step:881/3200 train_loss:3.7036 train_time:3544683ms step_avg:4069.67ms +step:882/3200 train_loss:3.6621 train_time:3548665ms step_avg:4069.57ms +step:883/3200 train_loss:3.5375 train_time:3552645ms step_avg:4069.47ms +step:884/3200 train_loss:3.5833 train_time:3556627ms step_avg:4069.37ms +step:885/3200 train_loss:3.7035 train_time:3560610ms step_avg:4069.27ms +step:886/3200 train_loss:3.5719 train_time:3564596ms step_avg:4069.17ms +step:887/3200 train_loss:3.8098 train_time:3568588ms step_avg:4069.09ms +step:888/3200 train_loss:3.6451 train_time:3572572ms step_avg:4068.99ms +step:889/3200 train_loss:4.4264 train_time:3576570ms step_avg:4068.91ms +step:890/3200 train_loss:3.6776 train_time:3580563ms step_avg:4068.82ms +step:891/3200 train_loss:3.6972 train_time:3584542ms step_avg:4068.72ms +step:892/3200 train_loss:3.5301 train_time:3588529ms step_avg:4068.63ms +step:893/3200 train_loss:3.9317 train_time:3592522ms step_avg:4068.54ms +step:894/3200 train_loss:3.6996 train_time:3596518ms step_avg:4068.46ms +step:895/3200 train_loss:3.5830 train_time:3600522ms step_avg:4068.39ms +step:896/3200 train_loss:3.7222 train_time:3604533ms step_avg:4068.32ms +step:897/3200 train_loss:3.6472 train_time:3608557ms step_avg:4068.27ms +step:898/3200 train_loss:3.5940 train_time:3612587ms step_avg:4068.23ms +step:899/3200 train_loss:3.6023 train_time:3616630ms step_avg:4068.20ms +step:900/3200 train_loss:3.6187 train_time:3620691ms step_avg:4068.19ms +step:901/3200 train_loss:3.7412 train_time:3624764ms step_avg:4068.20ms +step:902/3200 train_loss:3.5041 train_time:3628855ms step_avg:4068.22ms +step:903/3200 train_loss:3.8036 train_time:3632962ms step_avg:4068.27ms +step:904/3200 train_loss:4.0371 train_time:3637098ms step_avg:4068.34ms +step:905/3200 train_loss:3.5567 train_time:3641248ms step_avg:4068.43ms +step:906/3200 train_loss:3.5570 train_time:3645405ms step_avg:4068.53ms +step:907/3200 train_loss:3.6991 train_time:3649543ms step_avg:4068.61ms +step:908/3200 train_loss:3.4333 train_time:3653664ms step_avg:4068.67ms +step:909/3200 train_loss:3.6328 train_time:3657774ms step_avg:4068.71ms +step:910/3200 train_loss:3.7476 train_time:3661882ms step_avg:4068.76ms +step:911/3200 train_loss:4.0616 train_time:3665983ms step_avg:4068.79ms +step:912/3200 train_loss:3.7126 train_time:3670079ms step_avg:4068.82ms +step:913/3200 train_loss:3.5875 train_time:3674177ms step_avg:4068.86ms +step:914/3200 train_loss:3.4770 train_time:3678278ms step_avg:4068.89ms +step:915/3200 train_loss:3.6216 train_time:3682388ms step_avg:4068.94ms +step:916/3200 train_loss:3.5861 train_time:3686501ms step_avg:4068.99ms +step:917/3200 train_loss:3.6062 train_time:3690622ms step_avg:4069.04ms +step:918/3200 train_loss:3.5688 train_time:3694750ms step_avg:4069.11ms +step:919/3200 train_loss:3.6375 train_time:3698892ms step_avg:4069.19ms +step:920/3200 train_loss:4.1905 train_time:3703044ms step_avg:4069.28ms +step:921/3200 train_loss:3.5018 train_time:3707230ms step_avg:4069.41ms +step:922/3200 train_loss:3.9385 train_time:3711411ms step_avg:4069.53ms +step:923/3200 train_loss:3.6452 train_time:3715629ms step_avg:4069.69ms +step:924/3200 train_loss:3.7076 train_time:3719854ms step_avg:4069.86ms +step:925/3200 train_loss:3.6994 train_time:3724049ms step_avg:4070.00ms +step:926/3200 train_loss:3.5408 train_time:3728218ms step_avg:4070.11ms +step:927/3200 train_loss:3.8347 train_time:3732354ms step_avg:4070.18ms +step:928/3200 train_loss:3.5385 train_time:3736463ms step_avg:4070.22ms +step:929/3200 train_loss:3.6225 train_time:3740558ms step_avg:4070.25ms +step:930/3200 train_loss:3.7780 train_time:3744639ms step_avg:4070.26ms +step:931/3200 train_loss:3.5896 train_time:3748715ms step_avg:4070.27ms +step:932/3200 train_loss:3.7671 train_time:3752777ms step_avg:4070.26ms +step:933/3200 train_loss:3.6358 train_time:3756842ms step_avg:4070.25ms +step:934/3200 train_loss:3.8956 train_time:3760906ms step_avg:4070.24ms +step:935/3200 train_loss:3.4895 train_time:3764964ms step_avg:4070.23ms +step:936/3200 train_loss:4.3917 train_time:3769024ms step_avg:4070.22ms +step:937/3200 train_loss:3.7580 train_time:3773086ms step_avg:4070.21ms +step:938/3200 train_loss:3.5549 train_time:3777149ms step_avg:4070.20ms +step:939/3200 train_loss:3.7118 train_time:3781220ms step_avg:4070.20ms +step:940/3200 train_loss:3.7411 train_time:3785294ms step_avg:4070.21ms +step:941/3200 train_loss:3.7106 train_time:3789371ms step_avg:4070.22ms +step:942/3200 train_loss:3.6032 train_time:3793448ms step_avg:4070.22ms +step:943/3200 train_loss:3.5131 train_time:3797537ms step_avg:4070.24ms +step:944/3200 train_loss:3.6205 train_time:3801642ms step_avg:4070.28ms +step:945/3200 train_loss:3.5456 train_time:3805759ms step_avg:4070.33ms +step:946/3200 train_loss:3.7198 train_time:3809893ms step_avg:4070.40ms +step:947/3200 train_loss:3.5625 train_time:3814038ms step_avg:4070.48ms +step:948/3200 train_loss:3.6777 train_time:3818216ms step_avg:4070.59ms +step:949/3200 train_loss:3.6057 train_time:3822400ms step_avg:4070.71ms +step:950/3200 train_loss:3.6870 train_time:3826596ms step_avg:4070.85ms +step:951/3200 train_loss:3.4881 train_time:3830732ms step_avg:4070.92ms +step:952/3200 train_loss:3.6558 train_time:3834834ms step_avg:4070.95ms +step:953/3200 train_loss:3.7239 train_time:3838918ms step_avg:4070.96ms +step:954/3200 train_loss:3.6284 train_time:3842986ms step_avg:4070.96ms +step:955/3200 train_loss:3.4901 train_time:3847033ms step_avg:4070.93ms +step:956/3200 train_loss:3.5697 train_time:3851068ms step_avg:4070.90ms +step:957/3200 train_loss:3.7022 train_time:3855095ms step_avg:4070.85ms +step:958/3200 train_loss:3.6505 train_time:3859121ms step_avg:4070.80ms +step:959/3200 train_loss:3.4329 train_time:3863148ms step_avg:4070.76ms +step:960/3200 train_loss:3.7343 train_time:3867161ms step_avg:4070.70ms +step:961/3200 train_loss:3.7559 train_time:3871158ms step_avg:4070.62ms +step:962/3200 train_loss:3.7517 train_time:3875175ms step_avg:4070.56ms +step:963/3200 train_loss:3.6522 train_time:3879174ms step_avg:4070.49ms +step:964/3200 train_loss:3.6915 train_time:3883174ms step_avg:4070.41ms +step:965/3200 train_loss:3.7391 train_time:3887176ms step_avg:4070.34ms +step:966/3200 train_loss:3.5864 train_time:3891173ms step_avg:4070.27ms +step:967/3200 train_loss:3.7042 train_time:3895175ms step_avg:4070.19ms +step:968/3200 train_loss:3.6716 train_time:3899187ms step_avg:4070.13ms +step:969/3200 train_loss:3.7414 train_time:3903203ms step_avg:4070.08ms +step:970/3200 train_loss:3.5473 train_time:3907227ms step_avg:4070.03ms +step:971/3200 train_loss:3.7389 train_time:3911252ms step_avg:4069.98ms +step:972/3200 train_loss:3.6623 train_time:3915290ms step_avg:4069.95ms +step:973/3200 train_loss:3.4777 train_time:3919340ms step_avg:4069.93ms +step:974/3200 train_loss:3.4215 train_time:3923392ms step_avg:4069.91ms +step:975/3200 train_loss:3.7226 train_time:3927454ms step_avg:4069.90ms +step:976/3200 train_loss:3.5836 train_time:3931526ms step_avg:4069.90ms +step:977/3200 train_loss:3.7185 train_time:3935611ms step_avg:4069.92ms +step:978/3200 train_loss:3.6909 train_time:3939718ms step_avg:4069.96ms +step:979/3200 train_loss:3.5413 train_time:3943840ms step_avg:4070.01ms +step:980/3200 train_loss:3.6757 train_time:3947980ms step_avg:4070.08ms +step:981/3200 train_loss:3.6810 train_time:3952159ms step_avg:4070.19ms +step:982/3200 train_loss:3.5050 train_time:3956345ms step_avg:4070.31ms +step:983/3200 train_loss:3.9469 train_time:3960543ms step_avg:4070.45ms +step:984/3200 train_loss:3.6511 train_time:3964724ms step_avg:4070.56ms +step:985/3200 train_loss:3.6678 train_time:3968834ms step_avg:4070.60ms +step:986/3200 train_loss:3.6042 train_time:3972908ms step_avg:4070.60ms +step:987/3200 train_loss:3.8545 train_time:3976940ms step_avg:4070.56ms +step:988/3200 train_loss:3.7241 train_time:3980963ms step_avg:4070.51ms +step:989/3200 train_loss:3.6282 train_time:3984965ms step_avg:4070.44ms +step:990/3200 train_loss:3.5516 train_time:3988957ms step_avg:4070.36ms +step:991/3200 train_loss:3.3812 train_time:3992940ms step_avg:4070.27ms +step:992/3200 train_loss:3.5914 train_time:3996913ms step_avg:4070.18ms +step:993/3200 train_loss:3.6526 train_time:4000881ms step_avg:4070.07ms +step:994/3200 train_loss:3.5584 train_time:4004854ms step_avg:4069.97ms +step:995/3200 train_loss:3.4857 train_time:4008837ms step_avg:4069.89ms +step:996/3200 train_loss:3.6503 train_time:4012817ms step_avg:4069.79ms +step:997/3200 train_loss:3.4493 train_time:4016795ms step_avg:4069.70ms +step:998/3200 train_loss:4.1124 train_time:4020769ms step_avg:4069.60ms +step:999/3200 train_loss:3.7055 train_time:4024735ms step_avg:4069.50ms +step:1000/3200 train_loss:3.5555 train_time:4028707ms step_avg:4069.40ms +step:1000/3200 val_loss:3.6000 train_time:4028707ms step_avg:4069.40ms +step:1001/3200 train_loss:3.7659 train_time:4032691ms step_avg:4069.32ms +step:1002/3200 train_loss:3.6305 train_time:4036674ms step_avg:4069.23ms +step:1003/3200 train_loss:3.6941 train_time:4040660ms step_avg:4069.14ms +step:1004/3200 train_loss:3.6056 train_time:4044647ms step_avg:4069.06ms +step:1005/3200 train_loss:3.6672 train_time:4048635ms step_avg:4068.98ms +step:1006/3200 train_loss:3.3651 train_time:4052625ms step_avg:4068.90ms +step:1007/3200 train_loss:3.4732 train_time:4056619ms step_avg:4068.83ms +step:1008/3200 train_loss:3.5716 train_time:4060608ms step_avg:4068.75ms +step:1009/3200 train_loss:3.6107 train_time:4064595ms step_avg:4068.66ms +step:1010/3200 train_loss:3.4991 train_time:4068579ms step_avg:4068.58ms +step:1011/3200 train_loss:4.0297 train_time:4072570ms step_avg:4068.50ms +step:1012/3200 train_loss:3.5553 train_time:4076569ms step_avg:4068.43ms +step:1013/3200 train_loss:3.5289 train_time:4080576ms step_avg:4068.37ms +step:1014/3200 train_loss:3.6300 train_time:4084591ms step_avg:4068.32ms +step:1015/3200 train_loss:3.3730 train_time:4088613ms step_avg:4068.27ms +step:1016/3200 train_loss:3.5344 train_time:4092646ms step_avg:4068.24ms +step:1017/3200 train_loss:3.7607 train_time:4096683ms step_avg:4068.21ms +step:1018/3200 train_loss:3.5773 train_time:4100743ms step_avg:4068.20ms +step:1019/3200 train_loss:3.7000 train_time:4104816ms step_avg:4068.20ms +step:1020/3200 train_loss:3.6038 train_time:4108901ms step_avg:4068.22ms +step:1021/3200 train_loss:3.9128 train_time:4113003ms step_avg:4068.25ms +step:1022/3200 train_loss:3.8276 train_time:4117125ms step_avg:4068.31ms +step:1023/3200 train_loss:3.6108 train_time:4121272ms step_avg:4068.38ms +step:1024/3200 train_loss:3.6602 train_time:4125443ms step_avg:4068.48ms +step:1025/3200 train_loss:3.5526 train_time:4129625ms step_avg:4068.60ms +step:1026/3200 train_loss:3.6910 train_time:4133823ms step_avg:4068.72ms +step:1027/3200 train_loss:3.8052 train_time:4137990ms step_avg:4068.82ms +step:1028/3200 train_loss:3.3893 train_time:4142098ms step_avg:4068.86ms +step:1029/3200 train_loss:3.5766 train_time:4146179ms step_avg:4068.87ms +step:1030/3200 train_loss:3.5693 train_time:4150231ms step_avg:4068.85ms +step:1031/3200 train_loss:3.5538 train_time:4154265ms step_avg:4068.82ms +step:1032/3200 train_loss:3.6303 train_time:4158282ms step_avg:4068.77ms +step:1033/3200 train_loss:3.5544 train_time:4162287ms step_avg:4068.71ms +step:1034/3200 train_loss:3.5800 train_time:4166281ms step_avg:4068.63ms +step:1035/3200 train_loss:3.6227 train_time:4170269ms step_avg:4068.56ms +step:1036/3200 train_loss:3.6887 train_time:4174251ms step_avg:4068.47ms +step:1037/3200 train_loss:3.4822 train_time:4178229ms step_avg:4068.38ms +step:1038/3200 train_loss:3.6569 train_time:4182202ms step_avg:4068.29ms +step:1039/3200 train_loss:3.5207 train_time:4186170ms step_avg:4068.19ms +step:1040/3200 train_loss:3.6371 train_time:4190157ms step_avg:4068.11ms +step:1041/3200 train_loss:3.6285 train_time:4194149ms step_avg:4068.04ms +step:1042/3200 train_loss:3.5410 train_time:4198140ms step_avg:4067.97ms +step:1043/3200 train_loss:3.6956 train_time:4202127ms step_avg:4067.89ms +step:1044/3200 train_loss:3.6696 train_time:4206118ms step_avg:4067.81ms +step:1045/3200 train_loss:3.7053 train_time:4210107ms step_avg:4067.74ms +step:1046/3200 train_loss:3.6882 train_time:4214096ms step_avg:4067.66ms +step:1047/3200 train_loss:3.4419 train_time:4218084ms step_avg:4067.58ms +step:1048/3200 train_loss:3.5686 train_time:4222076ms step_avg:4067.51ms +step:1049/3200 train_loss:3.7677 train_time:4226070ms step_avg:4067.44ms +step:1050/3200 train_loss:3.7556 train_time:4230064ms step_avg:4067.37ms +step:1051/3200 train_loss:3.6146 train_time:4234064ms step_avg:4067.30ms +step:1052/3200 train_loss:3.5468 train_time:4238047ms step_avg:4067.22ms +step:1053/3200 train_loss:5.1001 train_time:4242025ms step_avg:4067.14ms +step:1054/3200 train_loss:3.6977 train_time:4246012ms step_avg:4067.06ms +step:1055/3200 train_loss:3.5790 train_time:4250004ms step_avg:4066.99ms +step:1056/3200 train_loss:3.7632 train_time:4253998ms step_avg:4066.92ms +step:1057/3200 train_loss:3.6155 train_time:4258000ms step_avg:4066.86ms +step:1058/3200 train_loss:3.6653 train_time:4262006ms step_avg:4066.80ms +step:1059/3200 train_loss:3.6249 train_time:4266024ms step_avg:4066.75ms +step:1060/3200 train_loss:3.5879 train_time:4270050ms step_avg:4066.71ms +step:1061/3200 train_loss:3.6612 train_time:4274078ms step_avg:4066.68ms +step:1062/3200 train_loss:3.6911 train_time:4278117ms step_avg:4066.65ms +step:1063/3200 train_loss:3.5477 train_time:4282170ms step_avg:4066.64ms +step:1064/3200 train_loss:3.8052 train_time:4286241ms step_avg:4066.64ms +step:1065/3200 train_loss:3.7281 train_time:4290321ms step_avg:4066.66ms +step:1066/3200 train_loss:3.4635 train_time:4294404ms step_avg:4066.67ms +step:1067/3200 train_loss:3.7259 train_time:4298498ms step_avg:4066.70ms +step:1068/3200 train_loss:3.6179 train_time:4302605ms step_avg:4066.73ms +step:1069/3200 train_loss:3.5029 train_time:4306712ms step_avg:4066.77ms +step:1070/3200 train_loss:3.6516 train_time:4310845ms step_avg:4066.84ms +step:1071/3200 train_loss:3.6677 train_time:4314997ms step_avg:4066.92ms +step:1072/3200 train_loss:3.8058 train_time:4319184ms step_avg:4067.03ms +step:1073/3200 train_loss:3.7947 train_time:4323364ms step_avg:4067.13ms +step:1074/3200 train_loss:3.5699 train_time:4327541ms step_avg:4067.24ms +step:1075/3200 train_loss:3.7086 train_time:4331693ms step_avg:4067.32ms +step:1076/3200 train_loss:2.9929 train_time:4335791ms step_avg:4067.35ms +step:1077/3200 train_loss:3.6352 train_time:4339860ms step_avg:4067.35ms +step:1078/3200 train_loss:3.4763 train_time:4343913ms step_avg:4067.33ms +step:1079/3200 train_loss:3.6007 train_time:4347942ms step_avg:4067.30ms +step:1080/3200 train_loss:3.9340 train_time:4351964ms step_avg:4067.26ms +step:1081/3200 train_loss:3.5536 train_time:4355973ms step_avg:4067.20ms +step:1082/3200 train_loss:3.7700 train_time:4359972ms step_avg:4067.14ms +step:1083/3200 train_loss:3.7932 train_time:4363965ms step_avg:4067.07ms +step:1084/3200 train_loss:3.6721 train_time:4367949ms step_avg:4066.99ms +step:1085/3200 train_loss:3.6945 train_time:4371930ms step_avg:4066.91ms +step:1086/3200 train_loss:3.6196 train_time:4375910ms step_avg:4066.83ms +step:1087/3200 train_loss:3.3977 train_time:4379885ms step_avg:4066.75ms +step:1088/3200 train_loss:3.6415 train_time:4383855ms step_avg:4066.66ms +step:1089/3200 train_loss:3.5385 train_time:4387825ms step_avg:4066.57ms +step:1090/3200 train_loss:3.5723 train_time:4391796ms step_avg:4066.48ms +step:1091/3200 train_loss:3.5627 train_time:4395764ms step_avg:4066.39ms +step:1092/3200 train_loss:3.5692 train_time:4399733ms step_avg:4066.30ms +step:1093/3200 train_loss:3.6486 train_time:4403701ms step_avg:4066.21ms +step:1094/3200 train_loss:3.6814 train_time:4407673ms step_avg:4066.12ms +step:1095/3200 train_loss:3.5336 train_time:4411645ms step_avg:4066.03ms +step:1096/3200 train_loss:3.6314 train_time:4415619ms step_avg:4065.95ms +step:1097/3200 train_loss:3.7952 train_time:4419594ms step_avg:4065.86ms +step:1098/3200 train_loss:3.2757 train_time:4423577ms step_avg:4065.79ms +step:1099/3200 train_loss:3.4963 train_time:4427561ms step_avg:4065.71ms +step:1100/3200 train_loss:3.5831 train_time:4431548ms step_avg:4065.64ms +step:1101/3200 train_loss:3.6554 train_time:4435537ms step_avg:4065.57ms +step:1102/3200 train_loss:3.7593 train_time:4439531ms step_avg:4065.50ms +step:1103/3200 train_loss:4.0282 train_time:4443533ms step_avg:4065.45ms +step:1104/3200 train_loss:3.3610 train_time:4447542ms step_avg:4065.39ms +step:1105/3200 train_loss:3.6303 train_time:4451553ms step_avg:4065.35ms +step:1106/3200 train_loss:3.6848 train_time:4455579ms step_avg:4065.31ms +step:1107/3200 train_loss:3.6516 train_time:4459602ms step_avg:4065.27ms +step:1108/3200 train_loss:3.5926 train_time:4463642ms step_avg:4065.25ms +step:1109/3200 train_loss:4.0139 train_time:4467694ms step_avg:4065.24ms +step:1110/3200 train_loss:3.6278 train_time:4471757ms step_avg:4065.23ms +step:1111/3200 train_loss:3.5054 train_time:4475838ms step_avg:4065.25ms +step:1112/3200 train_loss:3.7382 train_time:4479932ms step_avg:4065.27ms +step:1113/3200 train_loss:3.5841 train_time:4484049ms step_avg:4065.32ms +step:1114/3200 train_loss:3.5422 train_time:4488189ms step_avg:4065.39ms +step:1115/3200 train_loss:3.4569 train_time:4492371ms step_avg:4065.49ms +step:1116/3200 train_loss:3.4817 train_time:4496549ms step_avg:4065.60ms +step:1117/3200 train_loss:3.3796 train_time:4500725ms step_avg:4065.70ms +step:1118/3200 train_loss:3.7595 train_time:4504880ms step_avg:4065.78ms +step:1119/3200 train_loss:4.3735 train_time:4509016ms step_avg:4065.84ms +step:1120/3200 train_loss:3.6653 train_time:4513138ms step_avg:4065.89ms +step:1121/3200 train_loss:3.6145 train_time:4517254ms step_avg:4065.94ms +step:1122/3200 train_loss:3.4546 train_time:4521365ms step_avg:4065.98ms +step:1123/3200 train_loss:3.6729 train_time:4525480ms step_avg:4066.02ms +step:1124/3200 train_loss:3.6981 train_time:4529594ms step_avg:4066.06ms +step:1125/3200 train_loss:3.3273 train_time:4533710ms step_avg:4066.11ms +step:1125/3200 val_loss:3.5718 train_time:4533710ms step_avg:4066.11ms +step:1126/3200 train_loss:3.5800 train_time:4537910ms step_avg:4066.23ms +step:1127/3200 train_loss:3.5372 train_time:4542109ms step_avg:4066.35ms +step:1128/3200 train_loss:3.7717 train_time:4546276ms step_avg:4066.44ms +step:1129/3200 train_loss:3.5073 train_time:4550376ms step_avg:4066.47ms +step:1130/3200 train_loss:3.7467 train_time:4554441ms step_avg:4066.47ms +step:1131/3200 train_loss:3.4975 train_time:4558479ms step_avg:4066.44ms +step:1132/3200 train_loss:3.3846 train_time:4562505ms step_avg:4066.40ms +step:1133/3200 train_loss:3.5325 train_time:4566533ms step_avg:4066.37ms +step:1134/3200 train_loss:3.4820 train_time:4570554ms step_avg:4066.33ms +step:1135/3200 train_loss:3.6956 train_time:4574563ms step_avg:4066.28ms +step:1136/3200 train_loss:3.5502 train_time:4578567ms step_avg:4066.22ms +step:1137/3200 train_loss:3.6089 train_time:4582558ms step_avg:4066.16ms +step:1138/3200 train_loss:3.4248 train_time:4586546ms step_avg:4066.09ms +step:1139/3200 train_loss:3.6502 train_time:4590534ms step_avg:4066.02ms +step:1140/3200 train_loss:3.5569 train_time:4594512ms step_avg:4065.94ms +step:1141/3200 train_loss:3.5420 train_time:4598491ms step_avg:4065.86ms +step:1142/3200 train_loss:3.5845 train_time:4602471ms step_avg:4065.79ms +step:1143/3200 train_loss:3.5329 train_time:4606448ms step_avg:4065.71ms +step:1144/3200 train_loss:3.5957 train_time:4610422ms step_avg:4065.63ms +step:1145/3200 train_loss:3.5927 train_time:4614395ms step_avg:4065.55ms +step:1146/3200 train_loss:3.8060 train_time:4618369ms step_avg:4065.47ms +step:1147/3200 train_loss:3.5441 train_time:4622337ms step_avg:4065.38ms +step:1148/3200 train_loss:3.6613 train_time:4626309ms step_avg:4065.30ms +step:1149/3200 train_loss:3.6206 train_time:4630279ms step_avg:4065.21ms +step:1150/3200 train_loss:3.6201 train_time:4634251ms step_avg:4065.13ms +step:1151/3200 train_loss:3.6071 train_time:4638229ms step_avg:4065.06ms +step:1152/3200 train_loss:3.4605 train_time:4642206ms step_avg:4064.98ms +step:1153/3200 train_loss:3.5008 train_time:4646188ms step_avg:4064.91ms +step:1154/3200 train_loss:3.5086 train_time:4650166ms step_avg:4064.83ms +step:1155/3200 train_loss:3.4097 train_time:4654144ms step_avg:4064.75ms +step:1156/3200 train_loss:3.6593 train_time:4658129ms step_avg:4064.69ms +step:1157/3200 train_loss:3.5773 train_time:4662118ms step_avg:4064.62ms +step:1158/3200 train_loss:3.7664 train_time:4666109ms step_avg:4064.55ms +step:1159/3200 train_loss:3.5887 train_time:4670107ms step_avg:4064.50ms +step:1160/3200 train_loss:3.7072 train_time:4674110ms step_avg:4064.44ms +step:1161/3200 train_loss:3.6967 train_time:4678114ms step_avg:4064.39ms +step:1162/3200 train_loss:3.5154 train_time:4682126ms step_avg:4064.35ms +step:1163/3200 train_loss:3.3670 train_time:4686145ms step_avg:4064.31ms +step:1164/3200 train_loss:3.5472 train_time:4690174ms step_avg:4064.28ms +step:1165/3200 train_loss:3.4264 train_time:4694206ms step_avg:4064.25ms +step:1166/3200 train_loss:3.6866 train_time:4698248ms step_avg:4064.23ms +step:1167/3200 train_loss:3.6161 train_time:4702280ms step_avg:4064.20ms +step:1168/3200 train_loss:3.5135 train_time:4706310ms step_avg:4064.17ms +step:1169/3200 train_loss:3.5027 train_time:4710358ms step_avg:4064.16ms +step:1170/3200 train_loss:3.6727 train_time:4714419ms step_avg:4064.15ms +step:1171/3200 train_loss:3.6533 train_time:4718499ms step_avg:4064.17ms +step:1172/3200 train_loss:3.4612 train_time:4722587ms step_avg:4064.19ms +step:1173/3200 train_loss:3.4296 train_time:4726702ms step_avg:4064.23ms +step:1174/3200 train_loss:3.5615 train_time:4730841ms step_avg:4064.30ms +step:1175/3200 train_loss:3.4860 train_time:4735022ms step_avg:4064.40ms +step:1176/3200 train_loss:3.6768 train_time:4739209ms step_avg:4064.50ms +step:1177/3200 train_loss:3.9078 train_time:4743406ms step_avg:4064.62ms +step:1178/3200 train_loss:3.5582 train_time:4747600ms step_avg:4064.73ms +step:1179/3200 train_loss:3.5288 train_time:4751745ms step_avg:4064.79ms +step:1180/3200 train_loss:3.5297 train_time:4755867ms step_avg:4064.84ms +step:1181/3200 train_loss:3.5091 train_time:4759967ms step_avg:4064.87ms +step:1182/3200 train_loss:3.5708 train_time:4764053ms step_avg:4064.89ms +step:1183/3200 train_loss:3.5756 train_time:4768131ms step_avg:4064.90ms +step:1184/3200 train_loss:3.6406 train_time:4772200ms step_avg:4064.91ms +step:1185/3200 train_loss:3.4547 train_time:4776266ms step_avg:4064.91ms +step:1186/3200 train_loss:3.6674 train_time:4780322ms step_avg:4064.90ms +step:1187/3200 train_loss:3.5191 train_time:4784378ms step_avg:4064.89ms +step:1188/3200 train_loss:3.4427 train_time:4788431ms step_avg:4064.88ms +step:1189/3200 train_loss:3.5044 train_time:4792483ms step_avg:4064.87ms +step:1190/3200 train_loss:3.4916 train_time:4796537ms step_avg:4064.86ms +step:1191/3200 train_loss:3.4925 train_time:4800593ms step_avg:4064.85ms +step:1192/3200 train_loss:3.3876 train_time:4804654ms step_avg:4064.85ms +step:1193/3200 train_loss:3.4347 train_time:4808718ms step_avg:4064.85ms +step:1194/3200 train_loss:3.6128 train_time:4812792ms step_avg:4064.86ms +step:1195/3200 train_loss:3.6739 train_time:4816871ms step_avg:4064.87ms +step:1196/3200 train_loss:3.7787 train_time:4820958ms step_avg:4064.89ms +step:1197/3200 train_loss:3.6789 train_time:4825055ms step_avg:4064.92ms +step:1198/3200 train_loss:3.4539 train_time:4829165ms step_avg:4064.95ms +step:1199/3200 train_loss:3.4735 train_time:4833291ms step_avg:4065.01ms +step:1200/3200 train_loss:3.5161 train_time:4837443ms step_avg:4065.08ms +step:1201/3200 train_loss:3.3794 train_time:4841608ms step_avg:4065.16ms +step:1202/3200 train_loss:3.4358 train_time:4845789ms step_avg:4065.26ms +step:1203/3200 train_loss:3.4880 train_time:4850004ms step_avg:4065.38ms +step:1204/3200 train_loss:3.4299 train_time:4854230ms step_avg:4065.52ms +step:1205/3200 train_loss:3.5260 train_time:4858422ms step_avg:4065.63ms +step:1206/3200 train_loss:3.5243 train_time:4862552ms step_avg:4065.68ms +step:1207/3200 train_loss:3.6280 train_time:4866642ms step_avg:4065.70ms +step:1208/3200 train_loss:3.6899 train_time:4870716ms step_avg:4065.71ms +step:1209/3200 train_loss:3.6900 train_time:4874759ms step_avg:4065.69ms +step:1210/3200 train_loss:3.4072 train_time:4878794ms step_avg:4065.66ms +step:1211/3200 train_loss:3.5966 train_time:4882813ms step_avg:4065.62ms +step:1212/3200 train_loss:3.5132 train_time:4886846ms step_avg:4065.60ms +step:1213/3200 train_loss:3.5027 train_time:4890873ms step_avg:4065.56ms +step:1214/3200 train_loss:3.6352 train_time:4894895ms step_avg:4065.53ms +step:1215/3200 train_loss:3.5486 train_time:4898918ms step_avg:4065.49ms +step:1216/3200 train_loss:3.6006 train_time:4902938ms step_avg:4065.45ms +step:1217/3200 train_loss:3.5057 train_time:4906950ms step_avg:4065.41ms +step:1218/3200 train_loss:3.6452 train_time:4910961ms step_avg:4065.37ms +step:1219/3200 train_loss:3.7006 train_time:4914969ms step_avg:4065.32ms +step:1220/3200 train_loss:3.5464 train_time:4918980ms step_avg:4065.27ms +step:1221/3200 train_loss:3.4694 train_time:4922987ms step_avg:4065.22ms +step:1222/3200 train_loss:3.5476 train_time:4926996ms step_avg:4065.18ms +step:1223/3200 train_loss:3.6873 train_time:4931003ms step_avg:4065.13ms +step:1224/3200 train_loss:3.5416 train_time:4935008ms step_avg:4065.08ms +step:1225/3200 train_loss:3.4678 train_time:4939018ms step_avg:4065.04ms +step:1226/3200 train_loss:3.5597 train_time:4943034ms step_avg:4064.99ms +step:1227/3200 train_loss:3.7746 train_time:4947051ms step_avg:4064.96ms +step:1228/3200 train_loss:3.4427 train_time:4951072ms step_avg:4064.92ms +step:1229/3200 train_loss:3.5211 train_time:4955098ms step_avg:4064.89ms +step:1230/3200 train_loss:3.7302 train_time:4959131ms step_avg:4064.86ms +step:1231/3200 train_loss:3.5781 train_time:4963164ms step_avg:4064.84ms +step:1232/3200 train_loss:3.4303 train_time:4967196ms step_avg:4064.81ms +step:1233/3200 train_loss:3.6498 train_time:4971218ms step_avg:4064.77ms +step:1234/3200 train_loss:3.4942 train_time:4975244ms step_avg:4064.74ms +step:1235/3200 train_loss:3.5978 train_time:4979277ms step_avg:4064.72ms +step:1236/3200 train_loss:3.5855 train_time:4983325ms step_avg:4064.70ms +step:1237/3200 train_loss:3.5417 train_time:4987384ms step_avg:4064.70ms +step:1238/3200 train_loss:3.5857 train_time:4991456ms step_avg:4064.70ms +step:1239/3200 train_loss:3.6451 train_time:4995541ms step_avg:4064.72ms +step:1240/3200 train_loss:3.6855 train_time:4999640ms step_avg:4064.75ms +step:1241/3200 train_loss:3.4306 train_time:5003755ms step_avg:4064.79ms +step:1242/3200 train_loss:3.5043 train_time:5007912ms step_avg:4064.86ms +step:1243/3200 train_loss:3.5271 train_time:5012098ms step_avg:4064.96ms +step:1244/3200 train_loss:3.4687 train_time:5016302ms step_avg:4065.07ms +step:1245/3200 train_loss:3.5629 train_time:5020506ms step_avg:4065.19ms +step:1246/3200 train_loss:3.7436 train_time:5024717ms step_avg:4065.30ms +step:1247/3200 train_loss:3.5536 train_time:5028887ms step_avg:4065.39ms +step:1248/3200 train_loss:3.6004 train_time:5032974ms step_avg:4065.41ms +step:1249/3200 train_loss:3.5315 train_time:5037018ms step_avg:4065.39ms +step:1250/3200 train_loss:3.6156 train_time:5041036ms step_avg:4065.35ms +step:1250/3200 val_loss:3.5436 train_time:5041036ms step_avg:4065.35ms +step:1251/3200 train_loss:3.5386 train_time:5045028ms step_avg:4065.29ms +step:1252/3200 train_loss:3.7143 train_time:5049011ms step_avg:4065.23ms +step:1253/3200 train_loss:3.4554 train_time:5052992ms step_avg:4065.16ms +step:1254/3200 train_loss:3.4400 train_time:5056973ms step_avg:4065.09ms +step:1255/3200 train_loss:3.6098 train_time:5060947ms step_avg:4065.02ms +step:1256/3200 train_loss:3.5863 train_time:5064925ms step_avg:4064.95ms +step:1257/3200 train_loss:3.5289 train_time:5068897ms step_avg:4064.87ms +step:1258/3200 train_loss:3.6730 train_time:5072868ms step_avg:4064.80ms +step:1259/3200 train_loss:3.4955 train_time:5076841ms step_avg:4064.72ms +step:1260/3200 train_loss:3.3689 train_time:5080813ms step_avg:4064.65ms +step:1261/3200 train_loss:3.9749 train_time:5084788ms step_avg:4064.58ms +step:1262/3200 train_loss:3.5756 train_time:5088759ms step_avg:4064.50ms +step:1263/3200 train_loss:3.6645 train_time:5092728ms step_avg:4064.43ms +step:1264/3200 train_loss:3.5223 train_time:5096700ms step_avg:4064.35ms +step:1265/3200 train_loss:3.7517 train_time:5100671ms step_avg:4064.28ms +step:1266/3200 train_loss:3.7474 train_time:5104646ms step_avg:4064.21ms +step:1267/3200 train_loss:3.8510 train_time:5108625ms step_avg:4064.14ms +step:1268/3200 train_loss:3.5000 train_time:5112605ms step_avg:4064.07ms +step:1269/3200 train_loss:3.4496 train_time:5116583ms step_avg:4064.01ms +step:1270/3200 train_loss:3.6162 train_time:5120567ms step_avg:4063.94ms +step:1271/3200 train_loss:3.5801 train_time:5124551ms step_avg:4063.88ms +step:1272/3200 train_loss:3.4928 train_time:5128536ms step_avg:4063.82ms +step:1273/3200 train_loss:3.5538 train_time:5132525ms step_avg:4063.76ms +step:1274/3200 train_loss:3.6592 train_time:5136515ms step_avg:4063.70ms +step:1275/3200 train_loss:3.5259 train_time:5140507ms step_avg:4063.64ms +step:1276/3200 train_loss:3.4346 train_time:5144504ms step_avg:4063.59ms +step:1277/3200 train_loss:3.6343 train_time:5148503ms step_avg:4063.54ms +step:1278/3200 train_loss:3.5602 train_time:5152505ms step_avg:4063.49ms +step:1279/3200 train_loss:3.5103 train_time:5156497ms step_avg:4063.43ms +step:1280/3200 train_loss:3.6954 train_time:5160482ms step_avg:4063.37ms +step:1281/3200 train_loss:3.4773 train_time:5164474ms step_avg:4063.32ms +step:1282/3200 train_loss:3.4879 train_time:5168465ms step_avg:4063.26ms +step:1283/3200 train_loss:3.7926 train_time:5172462ms step_avg:4063.21ms +step:1284/3200 train_loss:3.6411 train_time:5176472ms step_avg:4063.17ms +step:1285/3200 train_loss:3.5729 train_time:5180488ms step_avg:4063.13ms +step:1286/3200 train_loss:3.5385 train_time:5184492ms step_avg:4063.08ms +step:1287/3200 train_loss:3.6124 train_time:5188495ms step_avg:4063.03ms +step:1288/3200 train_loss:3.4396 train_time:5192516ms step_avg:4063.00ms +step:1289/3200 train_loss:3.6787 train_time:5196550ms step_avg:4062.98ms +step:1290/3200 train_loss:3.7677 train_time:5200592ms step_avg:4062.96ms +step:1291/3200 train_loss:3.5233 train_time:5204648ms step_avg:4062.96ms +step:1292/3200 train_loss:3.4210 train_time:5208724ms step_avg:4062.97ms +step:1293/3200 train_loss:3.7368 train_time:5212815ms step_avg:4062.99ms +step:1294/3200 train_loss:3.4136 train_time:5216925ms step_avg:4063.03ms +step:1295/3200 train_loss:3.6647 train_time:5221060ms step_avg:4063.08ms +step:1296/3200 train_loss:3.5774 train_time:5225241ms step_avg:4063.17ms +step:1297/3200 train_loss:3.6141 train_time:5229422ms step_avg:4063.27ms +step:1298/3200 train_loss:3.4632 train_time:5233622ms step_avg:4063.37ms +step:1299/3200 train_loss:3.6087 train_time:5237817ms step_avg:4063.47ms +step:1300/3200 train_loss:3.5016 train_time:5241998ms step_avg:4063.56ms +step:1301/3200 train_loss:3.4306 train_time:5246175ms step_avg:4063.65ms +step:1302/3200 train_loss:3.6907 train_time:5250354ms step_avg:4063.74ms +step:1303/3200 train_loss:3.3696 train_time:5254532ms step_avg:4063.83ms +step:1304/3200 train_loss:3.6383 train_time:5258711ms step_avg:4063.92ms +step:1305/3200 train_loss:3.5245 train_time:5262868ms step_avg:4063.99ms +step:1306/3200 train_loss:3.7143 train_time:5266987ms step_avg:4064.03ms +step:1307/3200 train_loss:3.5372 train_time:5271080ms step_avg:4064.06ms +step:1308/3200 train_loss:3.5557 train_time:5275151ms step_avg:4064.06ms +step:1309/3200 train_loss:3.2795 train_time:5279211ms step_avg:4064.06ms +step:1310/3200 train_loss:3.6197 train_time:5283256ms step_avg:4064.04ms +step:1311/3200 train_loss:3.4216 train_time:5287294ms step_avg:4064.02ms +step:1312/3200 train_loss:3.4673 train_time:5291329ms step_avg:4064.00ms +step:1313/3200 train_loss:3.7704 train_time:5295355ms step_avg:4063.97ms +step:1314/3200 train_loss:3.5236 train_time:5299375ms step_avg:4063.94ms +step:1315/3200 train_loss:3.5812 train_time:5303392ms step_avg:4063.90ms +step:1316/3200 train_loss:3.5980 train_time:5307412ms step_avg:4063.87ms +step:1317/3200 train_loss:3.4654 train_time:5311428ms step_avg:4063.83ms +step:1318/3200 train_loss:3.4294 train_time:5315442ms step_avg:4063.79ms +step:1319/3200 train_loss:3.3839 train_time:5319460ms step_avg:4063.76ms +step:1320/3200 train_loss:3.4272 train_time:5323480ms step_avg:4063.73ms +step:1321/3200 train_loss:3.6973 train_time:5327499ms step_avg:4063.69ms +step:1322/3200 train_loss:3.4409 train_time:5331525ms step_avg:4063.66ms +step:1323/3200 train_loss:3.7050 train_time:5335552ms step_avg:4063.63ms +step:1324/3200 train_loss:3.5595 train_time:5339585ms step_avg:4063.61ms +step:1325/3200 train_loss:3.7710 train_time:5343618ms step_avg:4063.59ms +step:1326/3200 train_loss:3.2703 train_time:5347663ms step_avg:4063.57ms +step:1327/3200 train_loss:3.6225 train_time:5351716ms step_avg:4063.57ms +step:1328/3200 train_loss:3.7017 train_time:5355781ms step_avg:4063.57ms +step:1329/3200 train_loss:3.4486 train_time:5359857ms step_avg:4063.58ms +step:1330/3200 train_loss:3.5422 train_time:5363948ms step_avg:4063.60ms +step:1331/3200 train_loss:3.5641 train_time:5368044ms step_avg:4063.62ms +step:1332/3200 train_loss:3.4764 train_time:5372156ms step_avg:4063.66ms +step:1333/3200 train_loss:3.7200 train_time:5376298ms step_avg:4063.72ms +step:1334/3200 train_loss:3.4614 train_time:5380472ms step_avg:4063.80ms +step:1335/3200 train_loss:3.5798 train_time:5384653ms step_avg:4063.89ms +step:1336/3200 train_loss:4.1320 train_time:5388856ms step_avg:4063.99ms +step:1337/3200 train_loss:3.6293 train_time:5393053ms step_avg:4064.09ms +step:1338/3200 train_loss:3.5461 train_time:5397246ms step_avg:4064.19ms +step:1339/3200 train_loss:3.5305 train_time:5401385ms step_avg:4064.25ms +step:1340/3200 train_loss:3.4042 train_time:5405492ms step_avg:4064.28ms +step:1341/3200 train_loss:3.7276 train_time:5409568ms step_avg:4064.29ms +step:1342/3200 train_loss:3.7280 train_time:5413630ms step_avg:4064.29ms +step:1343/3200 train_loss:3.5735 train_time:5417674ms step_avg:4064.27ms +step:1344/3200 train_loss:3.5293 train_time:5421705ms step_avg:4064.25ms +step:1345/3200 train_loss:3.7800 train_time:5425726ms step_avg:4064.21ms +step:1346/3200 train_loss:3.5161 train_time:5429742ms step_avg:4064.18ms +step:1347/3200 train_loss:3.4946 train_time:5433752ms step_avg:4064.14ms +step:1348/3200 train_loss:3.5968 train_time:5437752ms step_avg:4064.09ms +step:1349/3200 train_loss:3.5522 train_time:5441750ms step_avg:4064.04ms +step:1350/3200 train_loss:3.5252 train_time:5445743ms step_avg:4063.99ms +step:1351/3200 train_loss:3.4514 train_time:5449734ms step_avg:4063.93ms +step:1352/3200 train_loss:3.5059 train_time:5453727ms step_avg:4063.88ms +step:1353/3200 train_loss:3.4804 train_time:5457719ms step_avg:4063.83ms +step:1354/3200 train_loss:3.5575 train_time:5461711ms step_avg:4063.77ms +step:1355/3200 train_loss:3.3538 train_time:5465704ms step_avg:4063.72ms +step:1356/3200 train_loss:3.5372 train_time:5469696ms step_avg:4063.67ms +step:1357/3200 train_loss:3.6080 train_time:5473690ms step_avg:4063.62ms +step:1358/3200 train_loss:3.4224 train_time:5477692ms step_avg:4063.57ms +step:1359/3200 train_loss:3.4826 train_time:5481697ms step_avg:4063.53ms +step:1360/3200 train_loss:3.8388 train_time:5485700ms step_avg:4063.48ms +step:1361/3200 train_loss:3.6096 train_time:5489709ms step_avg:4063.44ms +step:1362/3200 train_loss:3.3662 train_time:5493723ms step_avg:4063.40ms +step:1363/3200 train_loss:3.5672 train_time:5497746ms step_avg:4063.37ms +step:1364/3200 train_loss:3.4115 train_time:5501775ms step_avg:4063.35ms +step:1365/3200 train_loss:3.4065 train_time:5505814ms step_avg:4063.33ms +step:1366/3200 train_loss:3.5911 train_time:5509860ms step_avg:4063.32ms +step:1367/3200 train_loss:3.5359 train_time:5513914ms step_avg:4063.31ms +step:1368/3200 train_loss:3.5453 train_time:5517984ms step_avg:4063.32ms +step:1369/3200 train_loss:3.4561 train_time:5522064ms step_avg:4063.33ms +step:1370/3200 train_loss:3.6115 train_time:5526154ms step_avg:4063.35ms +step:1371/3200 train_loss:3.6159 train_time:5530275ms step_avg:4063.39ms +step:1372/3200 train_loss:3.4182 train_time:5534413ms step_avg:4063.45ms +step:1373/3200 train_loss:3.3414 train_time:5538583ms step_avg:4063.52ms +step:1374/3200 train_loss:3.7077 train_time:5542761ms step_avg:4063.61ms +step:1375/3200 train_loss:3.5552 train_time:5546946ms step_avg:4063.70ms +step:1375/3200 val_loss:3.5220 train_time:5546946ms step_avg:4063.70ms +step:1376/3200 train_loss:3.6500 train_time:5550967ms step_avg:4063.67ms +step:1377/3200 train_loss:3.5665 train_time:5554970ms step_avg:4063.62ms +step:1378/3200 train_loss:3.3270 train_time:5558966ms step_avg:4063.57ms +step:1379/3200 train_loss:3.6850 train_time:5562980ms step_avg:4063.54ms +step:1380/3200 train_loss:3.5483 train_time:5566987ms step_avg:4063.49ms +step:1381/3200 train_loss:3.6281 train_time:5570992ms step_avg:4063.45ms +step:1382/3200 train_loss:3.6838 train_time:5574994ms step_avg:4063.41ms +step:1383/3200 train_loss:3.2286 train_time:5578993ms step_avg:4063.36ms +step:1384/3200 train_loss:3.5030 train_time:5582992ms step_avg:4063.31ms +step:1385/3200 train_loss:4.3698 train_time:5586985ms step_avg:4063.26ms +step:1386/3200 train_loss:3.4461 train_time:5590984ms step_avg:4063.22ms +step:1387/3200 train_loss:3.5716 train_time:5594978ms step_avg:4063.16ms +step:1388/3200 train_loss:3.6276 train_time:5598975ms step_avg:4063.12ms +step:1389/3200 train_loss:3.6885 train_time:5602971ms step_avg:4063.07ms +step:1390/3200 train_loss:3.5299 train_time:5606974ms step_avg:4063.02ms +step:1391/3200 train_loss:3.3177 train_time:5610972ms step_avg:4062.98ms +step:1392/3200 train_loss:3.6052 train_time:5614974ms step_avg:4062.93ms +step:1393/3200 train_loss:3.7861 train_time:5618980ms step_avg:4062.89ms +step:1394/3200 train_loss:4.0099 train_time:5622993ms step_avg:4062.86ms +step:1395/3200 train_loss:3.5718 train_time:5627006ms step_avg:4062.82ms +step:1396/3200 train_loss:3.4395 train_time:5631030ms step_avg:4062.79ms +step:1397/3200 train_loss:3.3519 train_time:5635052ms step_avg:4062.76ms +step:1398/3200 train_loss:3.8817 train_time:5639057ms step_avg:4062.72ms +step:1399/3200 train_loss:3.6855 train_time:5643067ms step_avg:4062.68ms +step:1400/3200 train_loss:3.7226 train_time:5647087ms step_avg:4062.65ms +step:1401/3200 train_loss:3.6565 train_time:5651113ms step_avg:4062.63ms +step:1402/3200 train_loss:3.4254 train_time:5655143ms step_avg:4062.60ms +step:1403/3200 train_loss:3.5188 train_time:5659187ms step_avg:4062.59ms +step:1404/3200 train_loss:3.5121 train_time:5663240ms step_avg:4062.58ms +step:1405/3200 train_loss:3.5875 train_time:5667314ms step_avg:4062.59ms +step:1406/3200 train_loss:3.5379 train_time:5671405ms step_avg:4062.61ms +step:1407/3200 train_loss:3.9059 train_time:5675507ms step_avg:4062.64ms +step:1408/3200 train_loss:3.3952 train_time:5679634ms step_avg:4062.69ms +step:1409/3200 train_loss:3.4504 train_time:5683773ms step_avg:4062.74ms +step:1410/3200 train_loss:3.5158 train_time:5687953ms step_avg:4062.82ms +step:1411/3200 train_loss:3.3051 train_time:5692134ms step_avg:4062.91ms +step:1412/3200 train_loss:3.6838 train_time:5696332ms step_avg:4063.00ms +step:1413/3200 train_loss:3.4826 train_time:5700528ms step_avg:4063.10ms +step:1414/3200 train_loss:3.5086 train_time:5704645ms step_avg:4063.14ms +step:1415/3200 train_loss:3.5919 train_time:5708710ms step_avg:4063.14ms +step:1416/3200 train_loss:3.5576 train_time:5712741ms step_avg:4063.12ms +step:1417/3200 train_loss:3.7311 train_time:5716752ms step_avg:4063.08ms +step:1418/3200 train_loss:3.5149 train_time:5720761ms step_avg:4063.04ms +step:1419/3200 train_loss:3.4663 train_time:5724763ms step_avg:4063.00ms +step:1420/3200 train_loss:3.4485 train_time:5728755ms step_avg:4062.95ms +step:1421/3200 train_loss:3.5601 train_time:5732735ms step_avg:4062.89ms +step:1422/3200 train_loss:3.7537 train_time:5736734ms step_avg:4062.84ms +step:1423/3200 train_loss:3.7159 train_time:5740723ms step_avg:4062.79ms +step:1424/3200 train_loss:3.5900 train_time:5744710ms step_avg:4062.74ms +step:1425/3200 train_loss:3.7478 train_time:5748690ms step_avg:4062.68ms +step:1426/3200 train_loss:3.5535 train_time:5752666ms step_avg:4062.62ms +step:1427/3200 train_loss:3.4346 train_time:5756638ms step_avg:4062.55ms +step:1428/3200 train_loss:3.4404 train_time:5760610ms step_avg:4062.49ms +step:1429/3200 train_loss:3.6858 train_time:5764581ms step_avg:4062.42ms +step:1430/3200 train_loss:3.5113 train_time:5768547ms step_avg:4062.36ms +step:1431/3200 train_loss:3.3588 train_time:5772521ms step_avg:4062.29ms +step:1432/3200 train_loss:3.5708 train_time:5776489ms step_avg:4062.23ms +step:1433/3200 train_loss:3.5888 train_time:5780466ms step_avg:4062.17ms +step:1434/3200 train_loss:3.2188 train_time:5784435ms step_avg:4062.10ms +step:1435/3200 train_loss:3.4335 train_time:5788399ms step_avg:4062.03ms +step:1436/3200 train_loss:3.6137 train_time:5792364ms step_avg:4061.97ms +step:1437/3200 train_loss:3.3156 train_time:5796332ms step_avg:4061.90ms +step:1438/3200 train_loss:3.6110 train_time:5800300ms step_avg:4061.84ms +step:1439/3200 train_loss:3.5693 train_time:5804270ms step_avg:4061.77ms +step:1440/3200 train_loss:3.4511 train_time:5808241ms step_avg:4061.71ms +step:1441/3200 train_loss:3.6624 train_time:5812212ms step_avg:4061.64ms +step:1442/3200 train_loss:3.5770 train_time:5816183ms step_avg:4061.58ms +step:1443/3200 train_loss:3.4672 train_time:5820161ms step_avg:4061.52ms +step:1444/3200 train_loss:3.3377 train_time:5824137ms step_avg:4061.46ms +step:1445/3200 train_loss:3.5837 train_time:5828119ms step_avg:4061.41ms +step:1446/3200 train_loss:3.4208 train_time:5832102ms step_avg:4061.35ms +step:1447/3200 train_loss:3.7936 train_time:5836085ms step_avg:4061.30ms +step:1448/3200 train_loss:3.7313 train_time:5840075ms step_avg:4061.25ms +step:1449/3200 train_loss:3.5206 train_time:5844067ms step_avg:4061.20ms +step:1450/3200 train_loss:3.4373 train_time:5848064ms step_avg:4061.16ms +step:1451/3200 train_loss:3.4961 train_time:5852051ms step_avg:4061.10ms +step:1452/3200 train_loss:3.5162 train_time:5856034ms step_avg:4061.05ms +step:1453/3200 train_loss:3.5372 train_time:5860022ms step_avg:4061.00ms +step:1454/3200 train_loss:3.5902 train_time:5864018ms step_avg:4060.95ms +step:1455/3200 train_loss:3.4699 train_time:5868014ms step_avg:4060.91ms +step:1456/3200 train_loss:3.4408 train_time:5872018ms step_avg:4060.87ms +step:1457/3200 train_loss:3.5129 train_time:5876033ms step_avg:4060.84ms +step:1458/3200 train_loss:3.4656 train_time:5880063ms step_avg:4060.82ms +step:1459/3200 train_loss:3.5143 train_time:5884069ms step_avg:4060.78ms +step:1460/3200 train_loss:3.5480 train_time:5888083ms step_avg:4060.75ms +step:1461/3200 train_loss:3.3556 train_time:5892106ms step_avg:4060.72ms +step:1462/3200 train_loss:3.6672 train_time:5896139ms step_avg:4060.70ms +step:1463/3200 train_loss:3.4546 train_time:5900184ms step_avg:4060.69ms +step:1464/3200 train_loss:3.6530 train_time:5904244ms step_avg:4060.69ms +step:1465/3200 train_loss:3.5399 train_time:5908320ms step_avg:4060.70ms +step:1466/3200 train_loss:3.3618 train_time:5912415ms step_avg:4060.72ms +step:1467/3200 train_loss:3.5795 train_time:5916533ms step_avg:4060.76ms +step:1468/3200 train_loss:3.6074 train_time:5920681ms step_avg:4060.82ms +step:1469/3200 train_loss:3.6137 train_time:5924858ms step_avg:4060.90ms +step:1470/3200 train_loss:3.5335 train_time:5929042ms step_avg:4060.99ms +step:1471/3200 train_loss:3.6477 train_time:5933241ms step_avg:4061.08ms +step:1472/3200 train_loss:3.4373 train_time:5937435ms step_avg:4061.17ms +step:1473/3200 train_loss:3.5611 train_time:5941581ms step_avg:4061.23ms +step:1474/3200 train_loss:3.4352 train_time:5945704ms step_avg:4061.27ms +step:1475/3200 train_loss:3.4107 train_time:5949803ms step_avg:4061.30ms +step:1476/3200 train_loss:3.6428 train_time:5953888ms step_avg:4061.32ms +step:1477/3200 train_loss:3.6200 train_time:5957969ms step_avg:4061.33ms +step:1478/3200 train_loss:3.5299 train_time:5962042ms step_avg:4061.34ms +step:1479/3200 train_loss:3.4646 train_time:5966107ms step_avg:4061.34ms +step:1480/3200 train_loss:3.5066 train_time:5970166ms step_avg:4061.34ms +step:1481/3200 train_loss:3.5288 train_time:5974224ms step_avg:4061.34ms +step:1482/3200 train_loss:3.5131 train_time:5978281ms step_avg:4061.33ms +step:1483/3200 train_loss:3.6767 train_time:5982341ms step_avg:4061.33ms +step:1484/3200 train_loss:3.3848 train_time:5986401ms step_avg:4061.33ms +step:1485/3200 train_loss:3.6408 train_time:5990458ms step_avg:4061.33ms +step:1486/3200 train_loss:3.5710 train_time:5994525ms step_avg:4061.33ms +step:1487/3200 train_loss:3.5178 train_time:5998598ms step_avg:4061.34ms +step:1488/3200 train_loss:3.4698 train_time:6002681ms step_avg:4061.35ms +step:1489/3200 train_loss:3.4988 train_time:6006765ms step_avg:4061.37ms +step:1490/3200 train_loss:3.4028 train_time:6010854ms step_avg:4061.39ms +step:1491/3200 train_loss:3.5803 train_time:6014950ms step_avg:4061.41ms +step:1492/3200 train_loss:3.4330 train_time:6019055ms step_avg:4061.44ms +step:1493/3200 train_loss:3.4420 train_time:6023186ms step_avg:4061.49ms +step:1494/3200 train_loss:3.5198 train_time:6027328ms step_avg:4061.54ms +step:1495/3200 train_loss:3.5185 train_time:6031508ms step_avg:4061.62ms +step:1496/3200 train_loss:3.3992 train_time:6035687ms step_avg:4061.70ms +step:1497/3200 train_loss:3.5675 train_time:6039884ms step_avg:4061.79ms +step:1498/3200 train_loss:3.4738 train_time:6044045ms step_avg:4061.86ms +step:1499/3200 train_loss:3.1141 train_time:6048173ms step_avg:4061.90ms +step:1500/3200 train_loss:3.6538 train_time:6052275ms step_avg:4061.93ms +step:1500/3200 val_loss:3.4984 train_time:6052275ms step_avg:4061.93ms +step:1501/3200 train_loss:3.5860 train_time:6056310ms step_avg:4061.91ms +step:1502/3200 train_loss:3.6355 train_time:6060336ms step_avg:4061.89ms +step:1503/3200 train_loss:3.6102 train_time:6064364ms step_avg:4061.86ms +step:1504/3200 train_loss:3.6119 train_time:6068392ms step_avg:4061.84ms +step:1505/3200 train_loss:3.1811 train_time:6072420ms step_avg:4061.82ms +step:1506/3200 train_loss:3.5670 train_time:6076441ms step_avg:4061.79ms +step:1507/3200 train_loss:3.4870 train_time:6080461ms step_avg:4061.76ms +step:1508/3200 train_loss:3.4379 train_time:6084480ms step_avg:4061.74ms +step:1509/3200 train_loss:3.4391 train_time:6088507ms step_avg:4061.71ms +step:1510/3200 train_loss:3.4835 train_time:6092535ms step_avg:4061.69ms +step:1511/3200 train_loss:3.4054 train_time:6096570ms step_avg:4061.67ms +step:1512/3200 train_loss:3.3757 train_time:6100598ms step_avg:4061.65ms +step:1513/3200 train_loss:3.5383 train_time:6104648ms step_avg:4061.64ms +step:1514/3200 train_loss:3.4458 train_time:6108707ms step_avg:4061.64ms +step:1515/3200 train_loss:3.6757 train_time:6112770ms step_avg:4061.64ms +step:1516/3200 train_loss:3.4746 train_time:6116847ms step_avg:4061.65ms +step:1517/3200 train_loss:3.5796 train_time:6120933ms step_avg:4061.67ms +step:1518/3200 train_loss:3.2614 train_time:6125030ms step_avg:4061.69ms +step:1519/3200 train_loss:3.3762 train_time:6129154ms step_avg:4061.73ms +step:1520/3200 train_loss:3.4843 train_time:6133287ms step_avg:4061.78ms +step:1521/3200 train_loss:3.4538 train_time:6137464ms step_avg:4061.86ms +step:1522/3200 train_loss:3.4494 train_time:6141645ms step_avg:4061.93ms +step:1523/3200 train_loss:3.8406 train_time:6145846ms step_avg:4062.03ms +step:1524/3200 train_loss:3.3345 train_time:6150044ms step_avg:4062.12ms +step:1525/3200 train_loss:3.4760 train_time:6154238ms step_avg:4062.20ms +step:1526/3200 train_loss:3.4386 train_time:6158417ms step_avg:4062.28ms +step:1527/3200 train_loss:3.3482 train_time:6162573ms step_avg:4062.34ms +step:1528/3200 train_loss:3.6779 train_time:6166672ms step_avg:4062.37ms +step:1529/3200 train_loss:3.7015 train_time:6170736ms step_avg:4062.37ms +step:1530/3200 train_loss:3.4301 train_time:6174773ms step_avg:4062.35ms +step:1531/3200 train_loss:3.5612 train_time:6178797ms step_avg:4062.33ms +step:1532/3200 train_loss:3.3188 train_time:6182805ms step_avg:4062.29ms +step:1533/3200 train_loss:3.6622 train_time:6186815ms step_avg:4062.26ms +step:1534/3200 train_loss:3.3916 train_time:6190827ms step_avg:4062.22ms +step:1535/3200 train_loss:3.4345 train_time:6194836ms step_avg:4062.19ms +step:1536/3200 train_loss:3.4967 train_time:6198834ms step_avg:4062.15ms +step:1537/3200 train_loss:4.1809 train_time:6202827ms step_avg:4062.10ms +step:1538/3200 train_loss:3.4742 train_time:6206818ms step_avg:4062.05ms +step:1539/3200 train_loss:3.5103 train_time:6210801ms step_avg:4062.00ms +step:1540/3200 train_loss:3.4457 train_time:6214786ms step_avg:4061.95ms +step:1541/3200 train_loss:3.4542 train_time:6218769ms step_avg:4061.90ms +step:1542/3200 train_loss:3.3877 train_time:6222748ms step_avg:4061.85ms +step:1543/3200 train_loss:3.9717 train_time:6226732ms step_avg:4061.80ms +step:1544/3200 train_loss:3.3336 train_time:6230706ms step_avg:4061.74ms +step:1545/3200 train_loss:3.4715 train_time:6234683ms step_avg:4061.68ms +step:1546/3200 train_loss:3.4150 train_time:6238660ms step_avg:4061.63ms +step:1547/3200 train_loss:3.5113 train_time:6242638ms step_avg:4061.57ms +step:1548/3200 train_loss:3.5318 train_time:6246619ms step_avg:4061.52ms +step:1549/3200 train_loss:3.4755 train_time:6250596ms step_avg:4061.47ms +step:1550/3200 train_loss:3.0196 train_time:6254580ms step_avg:4061.42ms +step:1551/3200 train_loss:3.4952 train_time:6258563ms step_avg:4061.36ms +step:1552/3200 train_loss:3.5687 train_time:6262546ms step_avg:4061.31ms +step:1553/3200 train_loss:3.3991 train_time:6266535ms step_avg:4061.27ms +step:1554/3200 train_loss:3.4461 train_time:6270527ms step_avg:4061.22ms +step:1555/3200 train_loss:3.6675 train_time:6274522ms step_avg:4061.18ms +step:1556/3200 train_loss:3.3947 train_time:6278515ms step_avg:4061.14ms +step:1557/3200 train_loss:3.4177 train_time:6282516ms step_avg:4061.10ms +step:1558/3200 train_loss:3.5224 train_time:6286529ms step_avg:4061.07ms +step:1559/3200 train_loss:3.5199 train_time:6290542ms step_avg:4061.03ms +step:1560/3200 train_loss:3.4867 train_time:6294563ms step_avg:4061.01ms +step:1561/3200 train_loss:3.4032 train_time:6298563ms step_avg:4060.97ms +step:1562/3200 train_loss:3.4790 train_time:6302568ms step_avg:4060.93ms +step:1563/3200 train_loss:3.3253 train_time:6306580ms step_avg:4060.90ms +step:1564/3200 train_loss:3.2886 train_time:6310596ms step_avg:4060.87ms +step:1565/3200 train_loss:3.5047 train_time:6314631ms step_avg:4060.86ms +step:1566/3200 train_loss:3.4926 train_time:6318677ms step_avg:4060.85ms +step:1567/3200 train_loss:3.3705 train_time:6322730ms step_avg:4060.84ms +step:1568/3200 train_loss:3.4714 train_time:6326808ms step_avg:4060.85ms +step:1569/3200 train_loss:3.4930 train_time:6330902ms step_avg:4060.87ms +step:1570/3200 train_loss:3.6669 train_time:6335007ms step_avg:4060.90ms +step:1571/3200 train_loss:3.4920 train_time:6339140ms step_avg:4060.95ms +step:1572/3200 train_loss:3.4358 train_time:6343301ms step_avg:4061.01ms +step:1573/3200 train_loss:3.6599 train_time:6347483ms step_avg:4061.09ms +step:1574/3200 train_loss:3.5260 train_time:6351683ms step_avg:4061.18ms +step:1575/3200 train_loss:3.2938 train_time:6355878ms step_avg:4061.26ms +step:1576/3200 train_loss:3.4368 train_time:6360016ms step_avg:4061.31ms +step:1577/3200 train_loss:3.5297 train_time:6364105ms step_avg:4061.33ms +step:1578/3200 train_loss:3.6295 train_time:6368155ms step_avg:4061.32ms +step:1579/3200 train_loss:3.6359 train_time:6372184ms step_avg:4061.30ms +step:1580/3200 train_loss:3.5061 train_time:6376190ms step_avg:4061.27ms +step:1581/3200 train_loss:3.5486 train_time:6380203ms step_avg:4061.24ms +step:1582/3200 train_loss:3.3651 train_time:6384209ms step_avg:4061.20ms +step:1583/3200 train_loss:3.6835 train_time:6388207ms step_avg:4061.16ms +step:1584/3200 train_loss:3.6480 train_time:6392198ms step_avg:4061.12ms +step:1585/3200 train_loss:3.4096 train_time:6396177ms step_avg:4061.06ms +step:1586/3200 train_loss:3.7768 train_time:6400151ms step_avg:4061.01ms +step:1587/3200 train_loss:3.4040 train_time:6404136ms step_avg:4060.96ms +step:1588/3200 train_loss:3.6347 train_time:6408124ms step_avg:4060.91ms +step:1589/3200 train_loss:3.5746 train_time:6412108ms step_avg:4060.87ms +step:1590/3200 train_loss:3.6108 train_time:6416094ms step_avg:4060.82ms +step:1591/3200 train_loss:3.5178 train_time:6420076ms step_avg:4060.77ms +step:1592/3200 train_loss:3.4039 train_time:6424053ms step_avg:4060.72ms +step:1593/3200 train_loss:3.2334 train_time:6428030ms step_avg:4060.66ms +step:1594/3200 train_loss:3.4701 train_time:6432004ms step_avg:4060.61ms +step:1595/3200 train_loss:3.2050 train_time:6435982ms step_avg:4060.56ms +step:1596/3200 train_loss:3.4632 train_time:6439960ms step_avg:4060.50ms +step:1597/3200 train_loss:3.5076 train_time:6443939ms step_avg:4060.45ms +step:1598/3200 train_loss:3.1572 train_time:6447920ms step_avg:4060.40ms +step:1599/3200 train_loss:3.4715 train_time:6451899ms step_avg:4060.35ms +step:1600/3200 train_loss:3.2953 train_time:6455878ms step_avg:4060.30ms +step:1601/3200 train_loss:3.4144 train_time:6459845ms step_avg:4060.24ms +step:1602/3200 train_loss:3.4803 train_time:6463822ms step_avg:4060.19ms +step:1603/3200 train_loss:3.6872 train_time:6467804ms step_avg:4060.14ms +step:1604/3200 train_loss:3.4672 train_time:6471784ms step_avg:4060.09ms +step:1605/3200 train_loss:3.5095 train_time:6475770ms step_avg:4060.04ms +step:1606/3200 train_loss:3.6753 train_time:6479757ms step_avg:4060.00ms +step:1607/3200 train_loss:3.4893 train_time:6483748ms step_avg:4059.96ms +step:1608/3200 train_loss:3.2664 train_time:6487739ms step_avg:4059.91ms +step:1609/3200 train_loss:2.9772 train_time:6491718ms step_avg:4059.86ms +step:1610/3200 train_loss:3.4630 train_time:6495699ms step_avg:4059.81ms +step:1611/3200 train_loss:3.3433 train_time:6499687ms step_avg:4059.77ms +step:1612/3200 train_loss:3.4785 train_time:6503682ms step_avg:4059.73ms +step:1613/3200 train_loss:3.1795 train_time:6507675ms step_avg:4059.69ms +step:1614/3200 train_loss:3.5586 train_time:6511678ms step_avg:4059.65ms +step:1615/3200 train_loss:3.4127 train_time:6515684ms step_avg:4059.62ms +step:1616/3200 train_loss:3.6317 train_time:6519699ms step_avg:4059.59ms +step:1617/3200 train_loss:3.2819 train_time:6523716ms step_avg:4059.56ms +step:1618/3200 train_loss:3.3167 train_time:6527716ms step_avg:4059.53ms +step:1619/3200 train_loss:3.3786 train_time:6531730ms step_avg:4059.50ms +step:1620/3200 train_loss:4.3465 train_time:6535753ms step_avg:4059.47ms +step:1621/3200 train_loss:3.5322 train_time:6539786ms step_avg:4059.46ms +step:1622/3200 train_loss:3.1459 train_time:6543825ms step_avg:4059.45ms +step:1623/3200 train_loss:3.3872 train_time:6547886ms step_avg:4059.45ms +step:1624/3200 train_loss:3.0661 train_time:6551966ms step_avg:4059.46ms +step:1625/3200 train_loss:3.2503 train_time:6556069ms step_avg:4059.49ms +step:1625/3200 val_loss:3.4844 train_time:6556069ms step_avg:4059.49ms +step:1626/3200 train_loss:3.5223 train_time:6560116ms step_avg:4059.48ms +step:1627/3200 train_loss:3.3498 train_time:6564135ms step_avg:4059.45ms +step:1628/3200 train_loss:2.9737 train_time:6568141ms step_avg:4059.42ms +step:1629/3200 train_loss:3.4891 train_time:6572150ms step_avg:4059.39ms +step:1630/3200 train_loss:3.6047 train_time:6576159ms step_avg:4059.36ms +step:1631/3200 train_loss:3.5923 train_time:6580164ms step_avg:4059.32ms +step:1632/3200 train_loss:3.5146 train_time:6584158ms step_avg:4059.28ms +step:1633/3200 train_loss:3.3140 train_time:6588145ms step_avg:4059.24ms +step:1634/3200 train_loss:3.2649 train_time:6592127ms step_avg:4059.19ms +step:1635/3200 train_loss:3.4774 train_time:6596105ms step_avg:4059.14ms +step:1636/3200 train_loss:3.4241 train_time:6600077ms step_avg:4059.09ms +step:1637/3200 train_loss:3.4376 train_time:6604050ms step_avg:4059.04ms +step:1638/3200 train_loss:3.5152 train_time:6608023ms step_avg:4058.98ms +step:1639/3200 train_loss:3.7285 train_time:6611996ms step_avg:4058.93ms +step:1640/3200 train_loss:3.4807 train_time:6615983ms step_avg:4058.89ms +step:1641/3200 train_loss:3.4839 train_time:6619976ms step_avg:4058.84ms +step:1642/3200 train_loss:3.4815 train_time:6623966ms step_avg:4058.80ms +step:1643/3200 train_loss:3.3872 train_time:6627956ms step_avg:4058.76ms +step:1644/3200 train_loss:3.5047 train_time:6631951ms step_avg:4058.72ms +step:1645/3200 train_loss:3.6615 train_time:6635946ms step_avg:4058.68ms +step:1646/3200 train_loss:3.4835 train_time:6639944ms step_avg:4058.65ms +step:1647/3200 train_loss:3.5195 train_time:6643927ms step_avg:4058.60ms +step:1648/3200 train_loss:3.3298 train_time:6647908ms step_avg:4058.55ms +step:1649/3200 train_loss:3.4119 train_time:6651889ms step_avg:4058.50ms +step:1650/3200 train_loss:3.4954 train_time:6655872ms step_avg:4058.46ms +step:1651/3200 train_loss:3.5794 train_time:6659862ms step_avg:4058.42ms +step:1652/3200 train_loss:3.6754 train_time:6663855ms step_avg:4058.38ms +step:1653/3200 train_loss:3.6016 train_time:6667850ms step_avg:4058.34ms +step:1654/3200 train_loss:3.6284 train_time:6671852ms step_avg:4058.30ms +step:1655/3200 train_loss:3.7534 train_time:6675858ms step_avg:4058.27ms +step:1656/3200 train_loss:3.3817 train_time:6679870ms step_avg:4058.24ms +step:1657/3200 train_loss:3.3843 train_time:6683888ms step_avg:4058.22ms +step:1658/3200 train_loss:3.4839 train_time:6687907ms step_avg:4058.20ms +step:1659/3200 train_loss:3.4944 train_time:6691913ms step_avg:4058.16ms +step:1660/3200 train_loss:3.5450 train_time:6695925ms step_avg:4058.14ms +step:1661/3200 train_loss:3.4677 train_time:6699950ms step_avg:4058.12ms +step:1662/3200 train_loss:3.4870 train_time:6703982ms step_avg:4058.10ms +step:1663/3200 train_loss:3.6424 train_time:6708025ms step_avg:4058.09ms +step:1664/3200 train_loss:3.3087 train_time:6712086ms step_avg:4058.09ms +step:1665/3200 train_loss:3.8370 train_time:6716158ms step_avg:4058.10ms +step:1666/3200 train_loss:3.4848 train_time:6720246ms step_avg:4058.12ms +step:1667/3200 train_loss:3.5355 train_time:6724354ms step_avg:4058.15ms +step:1668/3200 train_loss:3.5784 train_time:6728493ms step_avg:4058.20ms +step:1669/3200 train_loss:3.4777 train_time:6732663ms step_avg:4058.27ms +step:1670/3200 train_loss:3.5019 train_time:6736844ms step_avg:4058.34ms +step:1671/3200 train_loss:3.5850 train_time:6741040ms step_avg:4058.42ms +step:1672/3200 train_loss:3.5624 train_time:6745185ms step_avg:4058.47ms +step:1673/3200 train_loss:3.4394 train_time:6749279ms step_avg:4058.50ms +step:1674/3200 train_loss:3.8391 train_time:6753342ms step_avg:4058.50ms +step:1675/3200 train_loss:3.4773 train_time:6757374ms step_avg:4058.48ms +step:1676/3200 train_loss:3.5410 train_time:6761390ms step_avg:4058.46ms +step:1677/3200 train_loss:3.4972 train_time:6765389ms step_avg:4058.42ms +step:1678/3200 train_loss:3.5582 train_time:6769406ms step_avg:4058.40ms +step:1679/3200 train_loss:3.3766 train_time:6773412ms step_avg:4058.37ms +step:1680/3200 train_loss:3.5964 train_time:6777409ms step_avg:4058.33ms +step:1681/3200 train_loss:3.6400 train_time:6781400ms step_avg:4058.29ms +step:1682/3200 train_loss:3.7195 train_time:6785386ms step_avg:4058.25ms +step:1683/3200 train_loss:3.2509 train_time:6789368ms step_avg:4058.20ms +step:1684/3200 train_loss:3.4292 train_time:6793348ms step_avg:4058.15ms +step:1685/3200 train_loss:3.4985 train_time:6797320ms step_avg:4058.10ms +step:1686/3200 train_loss:3.4867 train_time:6801295ms step_avg:4058.05ms +step:1687/3200 train_loss:3.6969 train_time:6805269ms step_avg:4058.00ms +step:1688/3200 train_loss:3.6772 train_time:6809245ms step_avg:4057.95ms +step:1689/3200 train_loss:3.2566 train_time:6813218ms step_avg:4057.90ms +step:1690/3200 train_loss:3.4254 train_time:6817190ms step_avg:4057.85ms +step:1691/3200 train_loss:3.3211 train_time:6821161ms step_avg:4057.80ms +step:1692/3200 train_loss:3.5094 train_time:6825133ms step_avg:4057.75ms +step:1693/3200 train_loss:3.4305 train_time:6829112ms step_avg:4057.70ms +step:1694/3200 train_loss:3.5443 train_time:6833079ms step_avg:4057.65ms +step:1695/3200 train_loss:3.4377 train_time:6837056ms step_avg:4057.60ms +step:1696/3200 train_loss:3.3684 train_time:6841032ms step_avg:4057.55ms +step:1697/3200 train_loss:3.2849 train_time:6845011ms step_avg:4057.51ms +step:1698/3200 train_loss:3.5371 train_time:6848995ms step_avg:4057.46ms +step:1699/3200 train_loss:3.4689 train_time:6852981ms step_avg:4057.42ms +step:1700/3200 train_loss:3.4213 train_time:6856970ms step_avg:4057.38ms +step:1701/3200 train_loss:3.1410 train_time:6860964ms step_avg:4057.34ms +step:1702/3200 train_loss:3.1200 train_time:6864962ms step_avg:4057.31ms +step:1703/3200 train_loss:3.4045 train_time:6868965ms step_avg:4057.27ms +step:1704/3200 train_loss:3.5079 train_time:6872973ms step_avg:4057.25ms +step:1705/3200 train_loss:3.4696 train_time:6876985ms step_avg:4057.22ms +step:1706/3200 train_loss:3.4356 train_time:6880992ms step_avg:4057.19ms +step:1707/3200 train_loss:3.4225 train_time:6884992ms step_avg:4057.16ms +step:1708/3200 train_loss:3.3428 train_time:6888999ms step_avg:4057.13ms +step:1709/3200 train_loss:3.4006 train_time:6893018ms step_avg:4057.10ms +step:1710/3200 train_loss:3.3514 train_time:6897050ms step_avg:4057.09ms +step:1711/3200 train_loss:3.4218 train_time:6901084ms step_avg:4057.07ms +step:1712/3200 train_loss:3.5388 train_time:6905136ms step_avg:4057.07ms +step:1713/3200 train_loss:3.4514 train_time:6909199ms step_avg:4057.08ms +step:1714/3200 train_loss:3.5120 train_time:6913283ms step_avg:4057.09ms +step:1715/3200 train_loss:3.5305 train_time:6917382ms step_avg:4057.12ms +step:1716/3200 train_loss:3.4545 train_time:6921496ms step_avg:4057.15ms +step:1717/3200 train_loss:3.5293 train_time:6925657ms step_avg:4057.21ms +step:1718/3200 train_loss:3.5206 train_time:6929843ms step_avg:4057.29ms +step:1719/3200 train_loss:3.4809 train_time:6934038ms step_avg:4057.37ms +step:1720/3200 train_loss:3.4211 train_time:6938220ms step_avg:4057.44ms +step:1721/3200 train_loss:3.7220 train_time:6942384ms step_avg:4057.50ms +step:1722/3200 train_loss:3.3587 train_time:6946516ms step_avg:4057.54ms +step:1723/3200 train_loss:3.4684 train_time:6950626ms step_avg:4057.58ms +step:1724/3200 train_loss:3.5827 train_time:6954717ms step_avg:4057.59ms +step:1725/3200 train_loss:3.7583 train_time:6958800ms step_avg:4057.61ms +step:1726/3200 train_loss:3.7359 train_time:6962874ms step_avg:4057.62ms +step:1727/3200 train_loss:3.6399 train_time:6966939ms step_avg:4057.62ms +step:1728/3200 train_loss:3.4010 train_time:6971000ms step_avg:4057.63ms +step:1729/3200 train_loss:3.4455 train_time:6975058ms step_avg:4057.63ms +step:1730/3200 train_loss:3.3377 train_time:6979108ms step_avg:4057.62ms +step:1731/3200 train_loss:3.6174 train_time:6983162ms step_avg:4057.62ms +step:1732/3200 train_loss:3.5266 train_time:6987212ms step_avg:4057.61ms +step:1733/3200 train_loss:3.4865 train_time:6991265ms step_avg:4057.61ms +step:1734/3200 train_loss:3.4719 train_time:6995320ms step_avg:4057.61ms +step:1735/3200 train_loss:3.4568 train_time:6999376ms step_avg:4057.61ms +step:1736/3200 train_loss:3.4382 train_time:7003440ms step_avg:4057.61ms +step:1737/3200 train_loss:3.4927 train_time:7007517ms step_avg:4057.62ms +step:1738/3200 train_loss:3.5228 train_time:7011598ms step_avg:4057.64ms +step:1739/3200 train_loss:3.4174 train_time:7015692ms step_avg:4057.66ms +step:1740/3200 train_loss:3.4596 train_time:7019793ms step_avg:4057.68ms +step:1741/3200 train_loss:3.5923 train_time:7023911ms step_avg:4057.72ms +step:1742/3200 train_loss:3.5128 train_time:7028045ms step_avg:4057.76ms +step:1743/3200 train_loss:3.3948 train_time:7032190ms step_avg:4057.81ms +step:1744/3200 train_loss:3.3469 train_time:7036369ms step_avg:4057.88ms +step:1745/3200 train_loss:3.2561 train_time:7040550ms step_avg:4057.95ms +step:1746/3200 train_loss:3.4497 train_time:7044749ms step_avg:4058.04ms +step:1747/3200 train_loss:3.4208 train_time:7048944ms step_avg:4058.11ms +step:1748/3200 train_loss:3.4923 train_time:7053102ms step_avg:4058.17ms +step:1749/3200 train_loss:3.4760 train_time:7057232ms step_avg:4058.21ms +step:1750/3200 train_loss:3.5399 train_time:7061340ms step_avg:4058.24ms +step:1750/3200 val_loss:3.4667 train_time:7061340ms step_avg:4058.24ms +step:1751/3200 train_loss:3.2749 train_time:7065414ms step_avg:4058.25ms +step:1752/3200 train_loss:3.6455 train_time:7069482ms step_avg:4058.26ms +step:1753/3200 train_loss:3.7111 train_time:7073547ms step_avg:4058.26ms +step:1754/3200 train_loss:3.4542 train_time:7077613ms step_avg:4058.26ms +step:1755/3200 train_loss:3.5058 train_time:7081683ms step_avg:4058.27ms +step:1756/3200 train_loss:3.5544 train_time:7085757ms step_avg:4058.28ms +step:1757/3200 train_loss:3.4711 train_time:7089841ms step_avg:4058.29ms +step:1758/3200 train_loss:3.5682 train_time:7093930ms step_avg:4058.31ms +step:1759/3200 train_loss:3.4518 train_time:7098034ms step_avg:4058.34ms +step:1760/3200 train_loss:3.4086 train_time:7102145ms step_avg:4058.37ms +step:1761/3200 train_loss:3.8230 train_time:7106276ms step_avg:4058.41ms +step:1762/3200 train_loss:3.4316 train_time:7110420ms step_avg:4058.46ms +step:1763/3200 train_loss:3.3847 train_time:7114580ms step_avg:4058.52ms +step:1764/3200 train_loss:3.5772 train_time:7118760ms step_avg:4058.59ms +step:1765/3200 train_loss:3.5610 train_time:7122962ms step_avg:4058.67ms +step:1766/3200 train_loss:3.4819 train_time:7127161ms step_avg:4058.75ms +step:1767/3200 train_loss:3.5444 train_time:7131354ms step_avg:4058.82ms +step:1768/3200 train_loss:3.5944 train_time:7135495ms step_avg:4058.87ms +step:1769/3200 train_loss:3.3319 train_time:7139603ms step_avg:4058.90ms +step:1770/3200 train_loss:3.5652 train_time:7143685ms step_avg:4058.91ms +step:1771/3200 train_loss:3.4001 train_time:7147747ms step_avg:4058.91ms +step:1772/3200 train_loss:3.2799 train_time:7151791ms step_avg:4058.91ms +step:1773/3200 train_loss:3.5544 train_time:7155828ms step_avg:4058.89ms +step:1774/3200 train_loss:3.5641 train_time:7159853ms step_avg:4058.87ms +step:1775/3200 train_loss:3.4535 train_time:7163872ms step_avg:4058.85ms +step:1776/3200 train_loss:3.4956 train_time:7167879ms step_avg:4058.82ms +step:1777/3200 train_loss:3.5049 train_time:7171884ms step_avg:4058.79ms +step:1778/3200 train_loss:3.3615 train_time:7175889ms step_avg:4058.76ms +step:1779/3200 train_loss:3.6350 train_time:7179889ms step_avg:4058.73ms +step:1780/3200 train_loss:3.6174 train_time:7183882ms step_avg:4058.69ms +step:1781/3200 train_loss:3.4543 train_time:7187882ms step_avg:4058.66ms +step:1782/3200 train_loss:3.6677 train_time:7191881ms step_avg:4058.62ms +step:1783/3200 train_loss:3.4469 train_time:7195878ms step_avg:4058.59ms +step:1784/3200 train_loss:3.6330 train_time:7199878ms step_avg:4058.56ms +step:1785/3200 train_loss:3.3994 train_time:7203877ms step_avg:4058.52ms +step:1786/3200 train_loss:3.4326 train_time:7207877ms step_avg:4058.49ms +step:1787/3200 train_loss:3.4943 train_time:7211884ms step_avg:4058.46ms +step:1788/3200 train_loss:3.5058 train_time:7215889ms step_avg:4058.43ms +step:1789/3200 train_loss:3.5145 train_time:7219894ms step_avg:4058.40ms +step:1790/3200 train_loss:3.3423 train_time:7223908ms step_avg:4058.38ms +step:1791/3200 train_loss:3.5136 train_time:7227929ms step_avg:4058.35ms +step:1792/3200 train_loss:3.5071 train_time:7231956ms step_avg:4058.34ms +step:1793/3200 train_loss:3.3371 train_time:7235992ms step_avg:4058.32ms +step:1794/3200 train_loss:3.4963 train_time:7240027ms step_avg:4058.31ms +step:1795/3200 train_loss:3.6314 train_time:7244078ms step_avg:4058.31ms +step:1796/3200 train_loss:3.5808 train_time:7248139ms step_avg:4058.31ms +step:1797/3200 train_loss:3.4151 train_time:7252218ms step_avg:4058.32ms +step:1798/3200 train_loss:3.4327 train_time:7256300ms step_avg:4058.33ms +step:1799/3200 train_loss:3.2620 train_time:7260402ms step_avg:4058.36ms +step:1800/3200 train_loss:3.5201 train_time:7264522ms step_avg:4058.39ms +step:1801/3200 train_loss:3.7042 train_time:7268669ms step_avg:4058.44ms +step:1802/3200 train_loss:3.4746 train_time:7272851ms step_avg:4058.51ms +step:1803/3200 train_loss:3.4613 train_time:7277034ms step_avg:4058.58ms +step:1804/3200 train_loss:3.6708 train_time:7281234ms step_avg:4058.66ms +step:1805/3200 train_loss:3.4801 train_time:7285427ms step_avg:4058.73ms +step:1806/3200 train_loss:3.3834 train_time:7289549ms step_avg:4058.77ms +step:1807/3200 train_loss:3.7371 train_time:7293602ms step_avg:4058.77ms +step:1808/3200 train_loss:3.4536 train_time:7297617ms step_avg:4058.74ms +step:1809/3200 train_loss:3.8314 train_time:7301621ms step_avg:4058.71ms +step:1810/3200 train_loss:3.4448 train_time:7305603ms step_avg:4058.67ms +step:1811/3200 train_loss:3.4751 train_time:7309593ms step_avg:4058.63ms +step:1812/3200 train_loss:3.4249 train_time:7313569ms step_avg:4058.58ms +step:1813/3200 train_loss:3.3918 train_time:7317535ms step_avg:4058.53ms +step:1814/3200 train_loss:3.7910 train_time:7321493ms step_avg:4058.48ms +step:1815/3200 train_loss:3.5112 train_time:7325442ms step_avg:4058.42ms +step:1816/3200 train_loss:3.4713 train_time:7329389ms step_avg:4058.35ms +step:1817/3200 train_loss:3.5056 train_time:7333330ms step_avg:4058.29ms +step:1818/3200 train_loss:3.4603 train_time:7337267ms step_avg:4058.22ms +step:1819/3200 train_loss:3.4224 train_time:7341204ms step_avg:4058.16ms +step:1820/3200 train_loss:3.5273 train_time:7345140ms step_avg:4058.09ms +step:1821/3200 train_loss:3.7760 train_time:7349071ms step_avg:4058.02ms +step:1822/3200 train_loss:3.3705 train_time:7353003ms step_avg:4057.95ms +step:1823/3200 train_loss:3.4696 train_time:7356932ms step_avg:4057.88ms +step:1824/3200 train_loss:2.8919 train_time:7360862ms step_avg:4057.81ms +step:1825/3200 train_loss:3.4634 train_time:7364788ms step_avg:4057.73ms +step:1826/3200 train_loss:3.5063 train_time:7368717ms step_avg:4057.66ms +step:1827/3200 train_loss:3.7085 train_time:7372645ms step_avg:4057.59ms +step:1828/3200 train_loss:3.4396 train_time:7376573ms step_avg:4057.52ms +step:1829/3200 train_loss:3.2739 train_time:7380498ms step_avg:4057.45ms +step:1830/3200 train_loss:3.5231 train_time:7384423ms step_avg:4057.38ms +step:1831/3200 train_loss:3.4654 train_time:7388350ms step_avg:4057.30ms +step:1832/3200 train_loss:3.3454 train_time:7392276ms step_avg:4057.23ms +step:1833/3200 train_loss:3.2773 train_time:7396205ms step_avg:4057.16ms +step:1834/3200 train_loss:3.4800 train_time:7400131ms step_avg:4057.09ms +step:1835/3200 train_loss:3.5348 train_time:7404059ms step_avg:4057.02ms +step:1836/3200 train_loss:3.4354 train_time:7407987ms step_avg:4056.95ms +step:1837/3200 train_loss:3.4225 train_time:7411916ms step_avg:4056.88ms +step:1838/3200 train_loss:3.4050 train_time:7415846ms step_avg:4056.81ms +step:1839/3200 train_loss:3.4400 train_time:7419775ms step_avg:4056.74ms +step:1840/3200 train_loss:3.6280 train_time:7423706ms step_avg:4056.67ms +step:1841/3200 train_loss:3.5201 train_time:7427639ms step_avg:4056.60ms +step:1842/3200 train_loss:3.5896 train_time:7431573ms step_avg:4056.54ms +step:1843/3200 train_loss:3.6226 train_time:7435507ms step_avg:4056.47ms +step:1844/3200 train_loss:3.6753 train_time:7439444ms step_avg:4056.40ms +step:1845/3200 train_loss:3.3823 train_time:7443380ms step_avg:4056.34ms +step:1846/3200 train_loss:3.3841 train_time:7447319ms step_avg:4056.27ms +step:1847/3200 train_loss:3.5533 train_time:7451259ms step_avg:4056.21ms +step:1848/3200 train_loss:3.7915 train_time:7455202ms step_avg:4056.15ms +step:1849/3200 train_loss:3.3148 train_time:7459143ms step_avg:4056.09ms +step:1850/3200 train_loss:3.4212 train_time:7463089ms step_avg:4056.03ms +step:1851/3200 train_loss:3.4181 train_time:7467029ms step_avg:4055.96ms +step:1852/3200 train_loss:3.3728 train_time:7470971ms step_avg:4055.90ms +step:1853/3200 train_loss:3.4615 train_time:7474920ms step_avg:4055.84ms +step:1854/3200 train_loss:3.2985 train_time:7478871ms step_avg:4055.79ms +step:1855/3200 train_loss:3.5117 train_time:7482824ms step_avg:4055.73ms +step:1856/3200 train_loss:3.4787 train_time:7486777ms step_avg:4055.68ms +step:1857/3200 train_loss:3.4103 train_time:7490734ms step_avg:4055.62ms +step:1858/3200 train_loss:3.4690 train_time:7494692ms step_avg:4055.57ms +step:1859/3200 train_loss:3.5638 train_time:7498654ms step_avg:4055.52ms +step:1860/3200 train_loss:3.5801 train_time:7502619ms step_avg:4055.47ms +step:1861/3200 train_loss:3.4296 train_time:7506586ms step_avg:4055.42ms +step:1862/3200 train_loss:3.5203 train_time:7510554ms step_avg:4055.37ms +step:1863/3200 train_loss:3.4034 train_time:7514525ms step_avg:4055.33ms +step:1864/3200 train_loss:3.6328 train_time:7518500ms step_avg:4055.29ms +step:1865/3200 train_loss:3.4684 train_time:7522481ms step_avg:4055.25ms +step:1866/3200 train_loss:3.3952 train_time:7526463ms step_avg:4055.21ms +step:1867/3200 train_loss:3.4990 train_time:7530430ms step_avg:4055.16ms +step:1868/3200 train_loss:3.5631 train_time:7534398ms step_avg:4055.11ms +step:1869/3200 train_loss:3.5628 train_time:7538376ms step_avg:4055.07ms +step:1870/3200 train_loss:3.5422 train_time:7542357ms step_avg:4055.03ms +step:1871/3200 train_loss:3.3919 train_time:7546341ms step_avg:4054.99ms +step:1872/3200 train_loss:3.5694 train_time:7550329ms step_avg:4054.96ms +step:1873/3200 train_loss:3.4180 train_time:7554329ms step_avg:4054.93ms +step:1874/3200 train_loss:3.7350 train_time:7558335ms step_avg:4054.90ms +step:1875/3200 train_loss:3.3914 train_time:7562338ms step_avg:4054.87ms +step:1875/3200 val_loss:3.4520 train_time:7562338ms step_avg:4054.87ms +step:1876/3200 train_loss:3.5701 train_time:7566437ms step_avg:4054.90ms +step:1877/3200 train_loss:3.4618 train_time:7570553ms step_avg:4054.93ms +step:1878/3200 train_loss:3.5161 train_time:7574706ms step_avg:4054.98ms +step:1879/3200 train_loss:3.4716 train_time:7578888ms step_avg:4055.05ms +step:1880/3200 train_loss:3.4955 train_time:7583069ms step_avg:4055.12ms +step:1881/3200 train_loss:3.3035 train_time:7587292ms step_avg:4055.21ms +step:1882/3200 train_loss:4.4456 train_time:7591487ms step_avg:4055.28ms +step:1883/3200 train_loss:3.4028 train_time:7595661ms step_avg:4055.34ms +step:1884/3200 train_loss:3.7950 train_time:7599811ms step_avg:4055.40ms +step:1885/3200 train_loss:3.4604 train_time:7603942ms step_avg:4055.44ms +step:1886/3200 train_loss:3.3834 train_time:7608059ms step_avg:4055.47ms +step:1887/3200 train_loss:3.4544 train_time:7612174ms step_avg:4055.50ms +step:1888/3200 train_loss:3.3784 train_time:7616286ms step_avg:4055.53ms +step:1889/3200 train_loss:3.4150 train_time:7620392ms step_avg:4055.56ms +step:1890/3200 train_loss:3.4421 train_time:7624496ms step_avg:4055.58ms +step:1891/3200 train_loss:3.4529 train_time:7628607ms step_avg:4055.61ms +step:1892/3200 train_loss:3.4212 train_time:7632712ms step_avg:4055.64ms +step:1893/3200 train_loss:3.5473 train_time:7636826ms step_avg:4055.67ms +step:1894/3200 train_loss:3.5618 train_time:7640947ms step_avg:4055.70ms +step:1895/3200 train_loss:3.3862 train_time:7645082ms step_avg:4055.75ms +step:1896/3200 train_loss:3.6281 train_time:7649225ms step_avg:4055.79ms +step:1897/3200 train_loss:3.4758 train_time:7653384ms step_avg:4055.85ms +step:1898/3200 train_loss:3.5436 train_time:7657563ms step_avg:4055.91ms +step:1899/3200 train_loss:3.4260 train_time:7661744ms step_avg:4055.98ms +step:1900/3200 train_loss:3.3736 train_time:7665950ms step_avg:4056.06ms +step:1901/3200 train_loss:3.4002 train_time:7670172ms step_avg:4056.15ms +step:1902/3200 train_loss:3.5591 train_time:7674365ms step_avg:4056.22ms +step:1903/3200 train_loss:3.4785 train_time:7678557ms step_avg:4056.29ms +step:1904/3200 train_loss:3.4393 train_time:7682717ms step_avg:4056.34ms +step:1905/3200 train_loss:3.2620 train_time:7686861ms step_avg:4056.39ms +step:1906/3200 train_loss:3.3316 train_time:7690992ms step_avg:4056.43ms +step:1907/3200 train_loss:3.3854 train_time:7695112ms step_avg:4056.46ms +step:1908/3200 train_loss:3.4993 train_time:7699230ms step_avg:4056.50ms +step:1909/3200 train_loss:3.3833 train_time:7703348ms step_avg:4056.53ms +step:1910/3200 train_loss:3.5961 train_time:7707465ms step_avg:4056.56ms +step:1911/3200 train_loss:3.4514 train_time:7711582ms step_avg:4056.59ms +step:1912/3200 train_loss:3.4733 train_time:7715703ms step_avg:4056.63ms +step:1913/3200 train_loss:3.4902 train_time:7719833ms step_avg:4056.66ms +step:1914/3200 train_loss:3.2732 train_time:7723970ms step_avg:4056.71ms +step:1915/3200 train_loss:3.4490 train_time:7728120ms step_avg:4056.76ms +step:1916/3200 train_loss:3.5106 train_time:7732278ms step_avg:4056.81ms +step:1917/3200 train_loss:3.4282 train_time:7736458ms step_avg:4056.87ms +step:1918/3200 train_loss:3.3984 train_time:7740639ms step_avg:4056.94ms +step:1919/3200 train_loss:2.9326 train_time:7744839ms step_avg:4057.01ms +step:1920/3200 train_loss:3.3371 train_time:7749041ms step_avg:4057.09ms +step:1921/3200 train_loss:3.5798 train_time:7753205ms step_avg:4057.15ms +step:1922/3200 train_loss:3.6270 train_time:7757360ms step_avg:4057.20ms +step:1923/3200 train_loss:3.4664 train_time:7761501ms step_avg:4057.24ms +step:1924/3200 train_loss:3.4303 train_time:7765620ms step_avg:4057.27ms +step:1925/3200 train_loss:3.6808 train_time:7769747ms step_avg:4057.31ms +step:1926/3200 train_loss:3.3796 train_time:7773856ms step_avg:4057.34ms +step:1927/3200 train_loss:3.4482 train_time:7777956ms step_avg:4057.36ms +step:1928/3200 train_loss:3.4641 train_time:7782051ms step_avg:4057.38ms +step:1929/3200 train_loss:3.5280 train_time:7786146ms step_avg:4057.40ms +step:1930/3200 train_loss:3.1670 train_time:7790244ms step_avg:4057.42ms +step:1931/3200 train_loss:3.3429 train_time:7794343ms step_avg:4057.44ms +step:1932/3200 train_loss:3.2915 train_time:7798443ms step_avg:4057.46ms +step:1933/3200 train_loss:3.3826 train_time:7802552ms step_avg:4057.49ms +step:1934/3200 train_loss:3.1555 train_time:7806669ms step_avg:4057.52ms +step:1935/3200 train_loss:3.3734 train_time:7810803ms step_avg:4057.56ms +step:1936/3200 train_loss:3.6182 train_time:7814943ms step_avg:4057.60ms +step:1937/3200 train_loss:3.4790 train_time:7819097ms step_avg:4057.65ms +step:1938/3200 train_loss:3.5797 train_time:7823271ms step_avg:4057.71ms +step:1939/3200 train_loss:3.4104 train_time:7827398ms step_avg:4057.75ms +step:1940/3200 train_loss:3.4020 train_time:7831507ms step_avg:4057.78ms +step:1941/3200 train_loss:3.4892 train_time:7835597ms step_avg:4057.79ms +step:1942/3200 train_loss:3.4250 train_time:7839674ms step_avg:4057.80ms +step:1943/3200 train_loss:3.5280 train_time:7843744ms step_avg:4057.81ms +step:1944/3200 train_loss:3.3815 train_time:7847802ms step_avg:4057.81ms +step:1945/3200 train_loss:3.5636 train_time:7851856ms step_avg:4057.81ms +step:1946/3200 train_loss:3.6998 train_time:7855907ms step_avg:4057.80ms +step:1947/3200 train_loss:3.4671 train_time:7859953ms step_avg:4057.80ms +step:1948/3200 train_loss:3.4053 train_time:7864001ms step_avg:4057.79ms +step:1949/3200 train_loss:3.6212 train_time:7868045ms step_avg:4057.78ms +step:1950/3200 train_loss:3.2981 train_time:7872090ms step_avg:4057.78ms +step:1951/3200 train_loss:3.4861 train_time:7876141ms step_avg:4057.77ms +step:1952/3200 train_loss:3.4601 train_time:7880194ms step_avg:4057.77ms +step:1953/3200 train_loss:3.3507 train_time:7884252ms step_avg:4057.77ms +step:1954/3200 train_loss:3.3756 train_time:7888314ms step_avg:4057.77ms +step:1955/3200 train_loss:3.3320 train_time:7892379ms step_avg:4057.78ms +step:1956/3200 train_loss:3.3687 train_time:7896454ms step_avg:4057.79ms +step:1957/3200 train_loss:3.3561 train_time:7900536ms step_avg:4057.80ms +step:1958/3200 train_loss:3.5785 train_time:7904628ms step_avg:4057.82ms +step:1959/3200 train_loss:3.5726 train_time:7908730ms step_avg:4057.84ms +step:1960/3200 train_loss:3.4269 train_time:7912852ms step_avg:4057.87ms +step:1961/3200 train_loss:3.3047 train_time:7916990ms step_avg:4057.91ms +step:1962/3200 train_loss:3.2580 train_time:7921151ms step_avg:4057.97ms +step:1963/3200 train_loss:3.6863 train_time:7925331ms step_avg:4058.03ms +step:1964/3200 train_loss:3.4592 train_time:7929513ms step_avg:4058.09ms +step:1965/3200 train_loss:3.5648 train_time:7933712ms step_avg:4058.16ms +step:1966/3200 train_loss:3.3487 train_time:7937904ms step_avg:4058.23ms +step:1967/3200 train_loss:3.5411 train_time:7942028ms step_avg:4058.27ms +step:1968/3200 train_loss:3.5188 train_time:7946119ms step_avg:4058.28ms +step:1969/3200 train_loss:3.4538 train_time:7950185ms step_avg:4058.29ms +step:1970/3200 train_loss:3.3830 train_time:7954226ms step_avg:4058.28ms +step:1971/3200 train_loss:3.6444 train_time:7958257ms step_avg:4058.26ms +step:1972/3200 train_loss:3.3647 train_time:7962276ms step_avg:4058.24ms +step:1973/3200 train_loss:3.4048 train_time:7966280ms step_avg:4058.22ms +step:1974/3200 train_loss:3.4826 train_time:7970296ms step_avg:4058.20ms +step:1975/3200 train_loss:3.2916 train_time:7974315ms step_avg:4058.18ms +step:1976/3200 train_loss:3.0657 train_time:7978329ms step_avg:4058.15ms +step:1977/3200 train_loss:3.1363 train_time:7982338ms step_avg:4058.13ms +step:1978/3200 train_loss:3.4380 train_time:7986347ms step_avg:4058.10ms +step:1979/3200 train_loss:3.3560 train_time:7990350ms step_avg:4058.08ms +step:1980/3200 train_loss:3.2574 train_time:7994348ms step_avg:4058.04ms +step:1981/3200 train_loss:3.5272 train_time:7998349ms step_avg:4058.02ms +step:1982/3200 train_loss:3.3854 train_time:8002344ms step_avg:4057.98ms +step:1983/3200 train_loss:3.9703 train_time:8006342ms step_avg:4057.95ms +step:1984/3200 train_loss:3.4051 train_time:8010343ms step_avg:4057.92ms +step:1985/3200 train_loss:3.4275 train_time:8014342ms step_avg:4057.89ms +step:1986/3200 train_loss:3.4435 train_time:8018342ms step_avg:4057.87ms +step:1987/3200 train_loss:3.4889 train_time:8022346ms step_avg:4057.84ms +step:1988/3200 train_loss:3.4409 train_time:8026355ms step_avg:4057.81ms +step:1989/3200 train_loss:3.3236 train_time:8030362ms step_avg:4057.79ms +step:1990/3200 train_loss:3.5261 train_time:8034375ms step_avg:4057.77ms +step:1991/3200 train_loss:3.5743 train_time:8038391ms step_avg:4057.74ms +step:1992/3200 train_loss:3.3391 train_time:8042413ms step_avg:4057.73ms +step:1993/3200 train_loss:3.4068 train_time:8046442ms step_avg:4057.71ms +step:1994/3200 train_loss:3.4841 train_time:8050452ms step_avg:4057.69ms +step:1995/3200 train_loss:3.5529 train_time:8054461ms step_avg:4057.66ms +step:1996/3200 train_loss:3.3447 train_time:8058481ms step_avg:4057.64ms +step:1997/3200 train_loss:3.3128 train_time:8062506ms step_avg:4057.63ms +step:1998/3200 train_loss:3.6420 train_time:8066541ms step_avg:4057.62ms +step:1999/3200 train_loss:3.3435 train_time:8070587ms step_avg:4057.61ms +step:2000/3200 train_loss:3.3870 train_time:8074639ms step_avg:4057.61ms +step:2000/3200 val_loss:3.4389 train_time:8074639ms step_avg:4057.61ms +step:2001/3200 train_loss:3.4752 train_time:8078839ms step_avg:4057.68ms +step:2002/3200 train_loss:3.3871 train_time:8083035ms step_avg:4057.75ms +step:2003/3200 train_loss:3.3558 train_time:8087193ms step_avg:4057.80ms +step:2004/3200 train_loss:3.3470 train_time:8091335ms step_avg:4057.84ms +step:2005/3200 train_loss:3.4304 train_time:8095464ms step_avg:4057.88ms +step:2006/3200 train_loss:3.5459 train_time:8099586ms step_avg:4057.91ms +step:2007/3200 train_loss:3.4521 train_time:8103698ms step_avg:4057.94ms +step:2008/3200 train_loss:3.8053 train_time:8107805ms step_avg:4057.96ms +step:2009/3200 train_loss:3.5096 train_time:8111910ms step_avg:4057.98ms +step:2010/3200 train_loss:3.3451 train_time:8116020ms step_avg:4058.01ms +step:2011/3200 train_loss:3.3993 train_time:8120129ms step_avg:4058.04ms +step:2012/3200 train_loss:3.2412 train_time:8124243ms step_avg:4058.06ms +step:2013/3200 train_loss:3.5408 train_time:8128361ms step_avg:4058.09ms +step:2014/3200 train_loss:3.4490 train_time:8132485ms step_avg:4058.13ms +step:2015/3200 train_loss:3.4192 train_time:8136623ms step_avg:4058.17ms +step:2016/3200 train_loss:3.4766 train_time:8140774ms step_avg:4058.21ms +step:2017/3200 train_loss:3.5199 train_time:8144955ms step_avg:4058.27ms +step:2018/3200 train_loss:3.3516 train_time:8149135ms step_avg:4058.33ms +step:2019/3200 train_loss:3.4459 train_time:8153318ms step_avg:4058.40ms +step:2020/3200 train_loss:3.3908 train_time:8157517ms step_avg:4058.47ms +step:2021/3200 train_loss:3.3872 train_time:8161715ms step_avg:4058.54ms +step:2022/3200 train_loss:3.6917 train_time:8165908ms step_avg:4058.60ms +step:2023/3200 train_loss:3.3654 train_time:8170033ms step_avg:4058.64ms +step:2024/3200 train_loss:3.6373 train_time:8174102ms step_avg:4058.64ms +step:2025/3200 train_loss:3.4115 train_time:8178135ms step_avg:4058.63ms +step:2026/3200 train_loss:3.1209 train_time:8182145ms step_avg:4058.60ms +step:2027/3200 train_loss:3.3959 train_time:8186149ms step_avg:4058.58ms +step:2028/3200 train_loss:3.3901 train_time:8190156ms step_avg:4058.55ms +step:2029/3200 train_loss:3.4682 train_time:8194149ms step_avg:4058.52ms +step:2030/3200 train_loss:3.5708 train_time:8198130ms step_avg:4058.48ms +step:2031/3200 train_loss:3.3879 train_time:8202105ms step_avg:4058.44ms +step:2032/3200 train_loss:2.8679 train_time:8206087ms step_avg:4058.40ms +step:2033/3200 train_loss:3.3498 train_time:8210067ms step_avg:4058.36ms +step:2034/3200 train_loss:3.5026 train_time:8214043ms step_avg:4058.32ms +step:2035/3200 train_loss:3.3824 train_time:8218016ms step_avg:4058.28ms +step:2036/3200 train_loss:3.4885 train_time:8221988ms step_avg:4058.24ms +step:2037/3200 train_loss:3.2651 train_time:8225955ms step_avg:4058.19ms +step:2038/3200 train_loss:3.4974 train_time:8229924ms step_avg:4058.15ms +step:2039/3200 train_loss:3.3137 train_time:8233892ms step_avg:4058.10ms +step:2040/3200 train_loss:3.4878 train_time:8237858ms step_avg:4058.06ms +step:2041/3200 train_loss:3.5011 train_time:8241822ms step_avg:4058.01ms +step:2042/3200 train_loss:3.5266 train_time:8245784ms step_avg:4057.96ms +step:2043/3200 train_loss:3.3185 train_time:8249747ms step_avg:4057.92ms +step:2044/3200 train_loss:3.6581 train_time:8253713ms step_avg:4057.87ms +step:2045/3200 train_loss:3.4826 train_time:8257664ms step_avg:4057.82ms +step:2046/3200 train_loss:3.5415 train_time:8261616ms step_avg:4057.77ms +step:2047/3200 train_loss:3.3589 train_time:8265580ms step_avg:4057.72ms +step:2048/3200 train_loss:3.4243 train_time:8269547ms step_avg:4057.68ms +step:2049/3200 train_loss:3.5012 train_time:8273514ms step_avg:4057.63ms +step:2050/3200 train_loss:3.4742 train_time:8277482ms step_avg:4057.59ms +step:2051/3200 train_loss:3.2047 train_time:8281450ms step_avg:4057.55ms +step:2052/3200 train_loss:3.3301 train_time:8285418ms step_avg:4057.50ms +step:2053/3200 train_loss:3.4538 train_time:8289391ms step_avg:4057.46ms +step:2054/3200 train_loss:3.3628 train_time:8293366ms step_avg:4057.42ms +step:2055/3200 train_loss:3.2851 train_time:8297343ms step_avg:4057.38ms +step:2056/3200 train_loss:3.5549 train_time:8301319ms step_avg:4057.34ms +step:2057/3200 train_loss:3.4766 train_time:8305297ms step_avg:4057.30ms +step:2058/3200 train_loss:3.4059 train_time:8309281ms step_avg:4057.27ms +step:2059/3200 train_loss:3.4187 train_time:8313266ms step_avg:4057.23ms +step:2060/3200 train_loss:3.4538 train_time:8317255ms step_avg:4057.20ms +step:2061/3200 train_loss:3.3783 train_time:8321241ms step_avg:4057.16ms +step:2062/3200 train_loss:3.7186 train_time:8325214ms step_avg:4057.12ms +step:2063/3200 train_loss:3.6435 train_time:8329194ms step_avg:4057.08ms +step:2064/3200 train_loss:3.4086 train_time:8333172ms step_avg:4057.05ms +step:2065/3200 train_loss:3.5467 train_time:8337159ms step_avg:4057.01ms +step:2066/3200 train_loss:3.4972 train_time:8341155ms step_avg:4056.98ms +step:2067/3200 train_loss:3.5100 train_time:8345152ms step_avg:4056.95ms +step:2068/3200 train_loss:3.4487 train_time:8349157ms step_avg:4056.93ms +step:2069/3200 train_loss:3.7411 train_time:8353173ms step_avg:4056.91ms +step:2070/3200 train_loss:3.4789 train_time:8357173ms step_avg:4056.88ms +step:2071/3200 train_loss:3.5572 train_time:8361174ms step_avg:4056.85ms +step:2072/3200 train_loss:3.3943 train_time:8365185ms step_avg:4056.83ms +step:2073/3200 train_loss:3.3335 train_time:8369204ms step_avg:4056.81ms +step:2074/3200 train_loss:3.3885 train_time:8373236ms step_avg:4056.80ms +step:2075/3200 train_loss:3.6372 train_time:8377279ms step_avg:4056.79ms +step:2076/3200 train_loss:3.5900 train_time:8381336ms step_avg:4056.79ms +step:2077/3200 train_loss:3.4857 train_time:8385413ms step_avg:4056.80ms +step:2078/3200 train_loss:3.4435 train_time:8389504ms step_avg:4056.82ms +step:2079/3200 train_loss:3.4638 train_time:8393618ms step_avg:4056.85ms +step:2080/3200 train_loss:3.4486 train_time:8397759ms step_avg:4056.89ms +step:2081/3200 train_loss:3.1976 train_time:8401937ms step_avg:4056.95ms +step:2082/3200 train_loss:3.5860 train_time:8406114ms step_avg:4057.00ms +step:2083/3200 train_loss:3.2715 train_time:8410265ms step_avg:4057.05ms +step:2084/3200 train_loss:3.2740 train_time:8414374ms step_avg:4057.08ms +step:2085/3200 train_loss:3.3515 train_time:8418458ms step_avg:4057.09ms +step:2086/3200 train_loss:3.4205 train_time:8422524ms step_avg:4057.09ms +step:2087/3200 train_loss:3.2931 train_time:8426576ms step_avg:4057.09ms +step:2088/3200 train_loss:3.5340 train_time:8430608ms step_avg:4057.08ms +step:2089/3200 train_loss:3.3350 train_time:8434636ms step_avg:4057.06ms +step:2090/3200 train_loss:3.2848 train_time:8438656ms step_avg:4057.05ms +step:2091/3200 train_loss:3.4617 train_time:8442674ms step_avg:4057.03ms +step:2092/3200 train_loss:3.5196 train_time:8446689ms step_avg:4057.01ms +step:2093/3200 train_loss:3.4470 train_time:8450699ms step_avg:4056.98ms +step:2094/3200 train_loss:3.7018 train_time:8454707ms step_avg:4056.96ms +step:2095/3200 train_loss:3.5396 train_time:8458713ms step_avg:4056.94ms +step:2096/3200 train_loss:3.5453 train_time:8462720ms step_avg:4056.91ms +step:2097/3200 train_loss:3.4662 train_time:8466729ms step_avg:4056.89ms +step:2098/3200 train_loss:3.4161 train_time:8470868ms step_avg:4056.93ms +step:2099/3200 train_loss:3.3715 train_time:8474876ms step_avg:4056.91ms +step:2100/3200 train_loss:3.2878 train_time:8478890ms step_avg:4056.89ms +step:2101/3200 train_loss:3.5437 train_time:8482905ms step_avg:4056.87ms +step:2102/3200 train_loss:3.4439 train_time:8486926ms step_avg:4056.85ms +step:2103/3200 train_loss:3.2567 train_time:8490955ms step_avg:4056.83ms +step:2104/3200 train_loss:3.5386 train_time:8494998ms step_avg:4056.83ms +step:2105/3200 train_loss:3.2186 train_time:8499040ms step_avg:4056.82ms +step:2106/3200 train_loss:3.6175 train_time:8503086ms step_avg:4056.82ms +step:2107/3200 train_loss:3.3505 train_time:8507141ms step_avg:4056.81ms +step:2108/3200 train_loss:3.3198 train_time:8511204ms step_avg:4056.82ms +step:2109/3200 train_loss:3.5051 train_time:8515289ms step_avg:4056.83ms +step:2110/3200 train_loss:3.4042 train_time:8519381ms step_avg:4056.85ms +step:2111/3200 train_loss:3.2925 train_time:8523489ms step_avg:4056.87ms +step:2112/3200 train_loss:3.4357 train_time:8527614ms step_avg:4056.90ms +step:2113/3200 train_loss:3.4382 train_time:8531758ms step_avg:4056.95ms +step:2114/3200 train_loss:3.4592 train_time:8535934ms step_avg:4057.00ms +step:2115/3200 train_loss:3.5192 train_time:8540110ms step_avg:4057.06ms +step:2116/3200 train_loss:3.4773 train_time:8544258ms step_avg:4057.10ms +step:2117/3200 train_loss:3.5167 train_time:8548390ms step_avg:4057.14ms +step:2118/3200 train_loss:3.3530 train_time:8552516ms step_avg:4057.17ms +step:2119/3200 train_loss:3.5405 train_time:8556640ms step_avg:4057.20ms +step:2120/3200 train_loss:3.3915 train_time:8560768ms step_avg:4057.24ms +step:2121/3200 train_loss:3.4632 train_time:8564897ms step_avg:4057.27ms +step:2122/3200 train_loss:3.3418 train_time:8569023ms step_avg:4057.30ms +step:2123/3200 train_loss:3.3131 train_time:8573155ms step_avg:4057.34ms +step:2124/3200 train_loss:3.3046 train_time:8577287ms step_avg:4057.37ms +step:2125/3200 train_loss:3.4443 train_time:8581431ms step_avg:4057.41ms +step:2125/3200 val_loss:3.4284 train_time:8581431ms step_avg:4057.41ms +step:2126/3200 train_loss:3.3338 train_time:8585594ms step_avg:4057.46ms +step:2127/3200 train_loss:3.6837 train_time:8589709ms step_avg:4057.49ms +step:2128/3200 train_loss:3.3922 train_time:8593791ms step_avg:4057.50ms +step:2129/3200 train_loss:3.3419 train_time:8597853ms step_avg:4057.50ms +step:2130/3200 train_loss:3.3969 train_time:8601893ms step_avg:4057.50ms +step:2131/3200 train_loss:3.2358 train_time:8605925ms step_avg:4057.48ms +step:2132/3200 train_loss:3.3825 train_time:8609942ms step_avg:4057.47ms +step:2133/3200 train_loss:3.5101 train_time:8613950ms step_avg:4057.44ms +step:2134/3200 train_loss:3.6333 train_time:8617975ms step_avg:4057.43ms +step:2135/3200 train_loss:3.5424 train_time:8621998ms step_avg:4057.41ms +step:2136/3200 train_loss:3.4420 train_time:8626017ms step_avg:4057.39ms +step:2137/3200 train_loss:3.7022 train_time:8630029ms step_avg:4057.37ms +step:2138/3200 train_loss:3.7574 train_time:8634043ms step_avg:4057.35ms +step:2139/3200 train_loss:3.3290 train_time:8638056ms step_avg:4057.33ms +step:2140/3200 train_loss:3.5005 train_time:8642064ms step_avg:4057.31ms +step:2141/3200 train_loss:3.3878 train_time:8646074ms step_avg:4057.28ms +step:2142/3200 train_loss:3.3892 train_time:8650079ms step_avg:4057.26ms +step:2143/3200 train_loss:3.3150 train_time:8654089ms step_avg:4057.24ms +step:2144/3200 train_loss:3.3026 train_time:8658093ms step_avg:4057.21ms +step:2145/3200 train_loss:3.2183 train_time:8662101ms step_avg:4057.19ms +step:2146/3200 train_loss:3.5672 train_time:8666114ms step_avg:4057.17ms +step:2147/3200 train_loss:3.9332 train_time:8670128ms step_avg:4057.15ms +step:2148/3200 train_loss:3.4223 train_time:8674145ms step_avg:4057.13ms +step:2149/3200 train_loss:3.4062 train_time:8678163ms step_avg:4057.11ms +step:2150/3200 train_loss:3.3332 train_time:8682185ms step_avg:4057.10ms +step:2151/3200 train_loss:3.5473 train_time:8686211ms step_avg:4057.08ms +step:2152/3200 train_loss:3.7331 train_time:8690240ms step_avg:4057.07ms +step:2153/3200 train_loss:3.5096 train_time:8694256ms step_avg:4057.05ms +step:2154/3200 train_loss:3.5605 train_time:8698268ms step_avg:4057.03ms +step:2155/3200 train_loss:3.3080 train_time:8702289ms step_avg:4057.01ms +step:2156/3200 train_loss:3.5455 train_time:8706316ms step_avg:4057.00ms +step:2157/3200 train_loss:3.4312 train_time:8710349ms step_avg:4056.99ms +step:2158/3200 train_loss:3.4934 train_time:8714389ms step_avg:4056.98ms +step:2159/3200 train_loss:4.7540 train_time:8718442ms step_avg:4056.98ms +step:2160/3200 train_loss:3.4825 train_time:8722509ms step_avg:4056.98ms +step:2161/3200 train_loss:3.6834 train_time:8726584ms step_avg:4056.99ms +step:2162/3200 train_loss:3.3495 train_time:8730673ms step_avg:4057.00ms +step:2163/3200 train_loss:3.6316 train_time:8734780ms step_avg:4057.03ms +step:2164/3200 train_loss:3.4654 train_time:8738903ms step_avg:4057.06ms +step:2165/3200 train_loss:3.3488 train_time:8743045ms step_avg:4057.10ms +step:2166/3200 train_loss:3.3115 train_time:8747225ms step_avg:4057.15ms +step:2167/3200 train_loss:3.4129 train_time:8751405ms step_avg:4057.21ms +step:2168/3200 train_loss:3.3647 train_time:8755601ms step_avg:4057.28ms +step:2169/3200 train_loss:3.1150 train_time:8759753ms step_avg:4057.32ms +step:2170/3200 train_loss:3.3418 train_time:8763875ms step_avg:4057.35ms +step:2171/3200 train_loss:3.4121 train_time:8767964ms step_avg:4057.36ms +step:2172/3200 train_loss:3.6913 train_time:8772042ms step_avg:4057.37ms +step:2173/3200 train_loss:3.4307 train_time:8776110ms step_avg:4057.38ms +step:2174/3200 train_loss:3.3361 train_time:8780162ms step_avg:4057.38ms +step:2175/3200 train_loss:3.6013 train_time:8784206ms step_avg:4057.37ms +step:2176/3200 train_loss:3.4665 train_time:8788242ms step_avg:4057.36ms +step:2177/3200 train_loss:3.2486 train_time:8792268ms step_avg:4057.35ms +step:2178/3200 train_loss:3.4306 train_time:8796296ms step_avg:4057.33ms +step:2179/3200 train_loss:3.2317 train_time:8800323ms step_avg:4057.32ms +step:2180/3200 train_loss:3.5944 train_time:8804345ms step_avg:4057.30ms +step:2181/3200 train_loss:3.4792 train_time:8808365ms step_avg:4057.28ms +step:2182/3200 train_loss:3.4389 train_time:8812390ms step_avg:4057.27ms +step:2183/3200 train_loss:3.4544 train_time:8816419ms step_avg:4057.26ms +step:2184/3200 train_loss:3.4398 train_time:8820447ms step_avg:4057.24ms +step:2185/3200 train_loss:3.3762 train_time:8824476ms step_avg:4057.23ms +step:2186/3200 train_loss:3.3257 train_time:8828500ms step_avg:4057.22ms +step:2187/3200 train_loss:3.5377 train_time:8832528ms step_avg:4057.20ms +step:2188/3200 train_loss:3.4048 train_time:8836557ms step_avg:4057.19ms +step:2189/3200 train_loss:3.4781 train_time:8840599ms step_avg:4057.18ms +step:2190/3200 train_loss:3.3094 train_time:8844648ms step_avg:4057.18ms +step:2191/3200 train_loss:3.0704 train_time:8848704ms step_avg:4057.18ms +step:2192/3200 train_loss:3.4335 train_time:8852765ms step_avg:4057.18ms +step:2193/3200 train_loss:3.4281 train_time:8856832ms step_avg:4057.18ms +step:2194/3200 train_loss:3.4710 train_time:8860911ms step_avg:4057.19ms +step:2195/3200 train_loss:3.3863 train_time:8865005ms step_avg:4057.21ms +step:2196/3200 train_loss:3.4370 train_time:8869111ms step_avg:4057.23ms +step:2197/3200 train_loss:3.4065 train_time:8873231ms step_avg:4057.26ms +step:2198/3200 train_loss:3.2618 train_time:8877363ms step_avg:4057.30ms +step:2199/3200 train_loss:3.4059 train_time:8881518ms step_avg:4057.34ms +step:2200/3200 train_loss:3.5630 train_time:8885702ms step_avg:4057.40ms +step:2201/3200 train_loss:3.4023 train_time:8889883ms step_avg:4057.45ms +step:2202/3200 train_loss:3.3992 train_time:8894083ms step_avg:4057.52ms +step:2203/3200 train_loss:3.4448 train_time:8898276ms step_avg:4057.58ms +step:2204/3200 train_loss:3.5314 train_time:8902415ms step_avg:4057.62ms +step:2205/3200 train_loss:3.2796 train_time:8906511ms step_avg:4057.64ms +step:2206/3200 train_loss:3.3728 train_time:8910578ms step_avg:4057.64ms +step:2207/3200 train_loss:3.5585 train_time:8914620ms step_avg:4057.63ms +step:2208/3200 train_loss:3.5512 train_time:8918647ms step_avg:4057.62ms +step:2209/3200 train_loss:3.5638 train_time:8922661ms step_avg:4057.60ms +step:2210/3200 train_loss:3.3847 train_time:8926696ms step_avg:4057.59ms +step:2211/3200 train_loss:3.4871 train_time:8930716ms step_avg:4057.57ms +step:2212/3200 train_loss:3.4900 train_time:8934726ms step_avg:4057.55ms +step:2213/3200 train_loss:3.5354 train_time:8938731ms step_avg:4057.53ms +step:2214/3200 train_loss:3.4761 train_time:8942728ms step_avg:4057.50ms +step:2215/3200 train_loss:3.7530 train_time:8946723ms step_avg:4057.47ms +step:2216/3200 train_loss:3.4270 train_time:8950714ms step_avg:4057.44ms +step:2217/3200 train_loss:3.3760 train_time:8954704ms step_avg:4057.41ms +step:2218/3200 train_loss:3.4151 train_time:8958697ms step_avg:4057.38ms +step:2219/3200 train_loss:3.4823 train_time:8962685ms step_avg:4057.35ms +step:2220/3200 train_loss:3.6389 train_time:8966672ms step_avg:4057.32ms +step:2221/3200 train_loss:3.4645 train_time:8970653ms step_avg:4057.28ms +step:2222/3200 train_loss:3.3574 train_time:8974637ms step_avg:4057.25ms +step:2223/3200 train_loss:3.1612 train_time:8978624ms step_avg:4057.22ms +step:2224/3200 train_loss:3.3585 train_time:8982607ms step_avg:4057.18ms +step:2225/3200 train_loss:3.3716 train_time:8986593ms step_avg:4057.15ms +step:2226/3200 train_loss:3.4794 train_time:8990581ms step_avg:4057.12ms +step:2227/3200 train_loss:3.3577 train_time:8994572ms step_avg:4057.09ms +step:2228/3200 train_loss:3.4276 train_time:8998564ms step_avg:4057.06ms +step:2229/3200 train_loss:3.3412 train_time:9002558ms step_avg:4057.03ms +step:2230/3200 train_loss:3.3816 train_time:9006556ms step_avg:4057.01ms +step:2231/3200 train_loss:3.2490 train_time:9010556ms step_avg:4056.98ms +step:2232/3200 train_loss:3.3475 train_time:9014563ms step_avg:4056.96ms +step:2233/3200 train_loss:3.4234 train_time:9018571ms step_avg:4056.94ms +step:2234/3200 train_loss:4.1996 train_time:9022580ms step_avg:4056.92ms +step:2235/3200 train_loss:3.5143 train_time:9026595ms step_avg:4056.90ms +step:2236/3200 train_loss:3.4804 train_time:9030614ms step_avg:4056.88ms +step:2237/3200 train_loss:3.4066 train_time:9034644ms step_avg:4056.87ms +step:2238/3200 train_loss:3.6672 train_time:9038659ms step_avg:4056.85ms +step:2239/3200 train_loss:3.2371 train_time:9042674ms step_avg:4056.83ms +step:2240/3200 train_loss:3.4592 train_time:9046699ms step_avg:4056.82ms +step:2241/3200 train_loss:3.5244 train_time:9050732ms step_avg:4056.80ms +step:2242/3200 train_loss:3.3044 train_time:9054764ms step_avg:4056.79ms +step:2243/3200 train_loss:3.5418 train_time:9058808ms step_avg:4056.79ms +step:2244/3200 train_loss:3.3424 train_time:9062865ms step_avg:4056.79ms +step:2245/3200 train_loss:3.2445 train_time:9066939ms step_avg:4056.80ms +step:2246/3200 train_loss:3.3316 train_time:9071033ms step_avg:4056.81ms +step:2247/3200 train_loss:3.4031 train_time:9075146ms step_avg:4056.84ms +step:2248/3200 train_loss:3.2310 train_time:9079270ms step_avg:4056.87ms +step:2249/3200 train_loss:3.4192 train_time:9083416ms step_avg:4056.91ms +step:2250/3200 train_loss:3.5346 train_time:9087582ms step_avg:4056.96ms +step:2250/3200 val_loss:3.4176 train_time:9087583ms step_avg:4056.96ms +step:2251/3200 train_loss:3.3285 train_time:9091650ms step_avg:4056.96ms +step:2252/3200 train_loss:3.3758 train_time:9095695ms step_avg:4056.96ms +step:2253/3200 train_loss:3.3588 train_time:9099730ms step_avg:4056.95ms +step:2254/3200 train_loss:3.4797 train_time:9103756ms step_avg:4056.93ms +step:2255/3200 train_loss:3.4028 train_time:9107774ms step_avg:4056.91ms +step:2256/3200 train_loss:3.4109 train_time:9111784ms step_avg:4056.89ms +step:2257/3200 train_loss:3.4310 train_time:9115816ms step_avg:4056.88ms +step:2258/3200 train_loss:3.4919 train_time:9119843ms step_avg:4056.87ms +step:2259/3200 train_loss:3.7200 train_time:9123870ms step_avg:4056.86ms +step:2260/3200 train_loss:3.3991 train_time:9127890ms step_avg:4056.84ms +step:2261/3200 train_loss:3.4254 train_time:9131909ms step_avg:4056.82ms +step:2262/3200 train_loss:3.5245 train_time:9135931ms step_avg:4056.81ms +step:2263/3200 train_loss:3.1317 train_time:9139953ms step_avg:4056.79ms +step:2264/3200 train_loss:3.4740 train_time:9143976ms step_avg:4056.78ms +step:2265/3200 train_loss:3.2904 train_time:9148002ms step_avg:4056.76ms +step:2266/3200 train_loss:3.2963 train_time:9152025ms step_avg:4056.75ms +step:2267/3200 train_loss:3.3356 train_time:9156052ms step_avg:4056.74ms +step:2268/3200 train_loss:3.4403 train_time:9160082ms step_avg:4056.72ms +step:2269/3200 train_loss:3.4178 train_time:9164113ms step_avg:4056.71ms +step:2270/3200 train_loss:3.3384 train_time:9168140ms step_avg:4056.70ms +step:2271/3200 train_loss:3.3805 train_time:9172154ms step_avg:4056.68ms +step:2272/3200 train_loss:3.3094 train_time:9176174ms step_avg:4056.66ms +step:2273/3200 train_loss:3.4774 train_time:9180204ms step_avg:4056.65ms +step:2274/3200 train_loss:3.4538 train_time:9184236ms step_avg:4056.64ms +step:2275/3200 train_loss:3.3500 train_time:9188274ms step_avg:4056.63ms +step:2276/3200 train_loss:3.2156 train_time:9192325ms step_avg:4056.63ms +step:2277/3200 train_loss:3.2808 train_time:9196385ms step_avg:4056.63ms +step:2278/3200 train_loss:3.7566 train_time:9200451ms step_avg:4056.64ms +step:2279/3200 train_loss:3.3053 train_time:9204539ms step_avg:4056.65ms +step:2280/3200 train_loss:3.3943 train_time:9208641ms step_avg:4056.67ms +step:2281/3200 train_loss:3.4058 train_time:9212758ms step_avg:4056.70ms +step:2282/3200 train_loss:3.4196 train_time:9216901ms step_avg:4056.73ms +step:2283/3200 train_loss:3.3369 train_time:9221066ms step_avg:4056.78ms +step:2284/3200 train_loss:3.4093 train_time:9225245ms step_avg:4056.84ms +step:2285/3200 train_loss:3.4255 train_time:9229447ms step_avg:4056.90ms +step:2286/3200 train_loss:3.4093 train_time:9233644ms step_avg:4056.96ms +step:2287/3200 train_loss:3.3820 train_time:9237839ms step_avg:4057.02ms +step:2288/3200 train_loss:3.5667 train_time:9242004ms step_avg:4057.07ms +step:2289/3200 train_loss:3.4340 train_time:9246142ms step_avg:4057.10ms +step:2290/3200 train_loss:3.4076 train_time:9250258ms step_avg:4057.13ms +step:2291/3200 train_loss:3.3453 train_time:9254368ms step_avg:4057.15ms +step:2292/3200 train_loss:3.4338 train_time:9258463ms step_avg:4057.17ms +step:2293/3200 train_loss:3.2828 train_time:9262546ms step_avg:4057.18ms +step:2294/3200 train_loss:3.5165 train_time:9266623ms step_avg:4057.19ms +step:2295/3200 train_loss:3.4635 train_time:9270690ms step_avg:4057.19ms +step:2296/3200 train_loss:3.4712 train_time:9274760ms step_avg:4057.20ms +step:2297/3200 train_loss:3.2704 train_time:9278826ms step_avg:4057.20ms +step:2298/3200 train_loss:3.6150 train_time:9282897ms step_avg:4057.21ms +step:2299/3200 train_loss:3.3746 train_time:9286970ms step_avg:4057.22ms +step:2300/3200 train_loss:3.4723 train_time:9291046ms step_avg:4057.23ms +step:2301/3200 train_loss:3.2240 train_time:9295123ms step_avg:4057.23ms +step:2302/3200 train_loss:3.5508 train_time:9299208ms step_avg:4057.25ms +step:2303/3200 train_loss:3.4065 train_time:9303295ms step_avg:4057.26ms +step:2304/3200 train_loss:3.4277 train_time:9307389ms step_avg:4057.27ms +step:2305/3200 train_loss:3.3233 train_time:9311488ms step_avg:4057.29ms +step:2306/3200 train_loss:3.4873 train_time:9315605ms step_avg:4057.32ms +step:2307/3200 train_loss:3.2974 train_time:9319727ms step_avg:4057.35ms +step:2308/3200 train_loss:3.3981 train_time:9323871ms step_avg:4057.39ms +step:2309/3200 train_loss:3.5079 train_time:9328035ms step_avg:4057.43ms +step:2310/3200 train_loss:3.4196 train_time:9332229ms step_avg:4057.49ms +step:2311/3200 train_loss:3.2629 train_time:9336407ms step_avg:4057.54ms +step:2312/3200 train_loss:3.2720 train_time:9340564ms step_avg:4057.59ms +step:2313/3200 train_loss:3.7653 train_time:9344695ms step_avg:4057.62ms +step:2314/3200 train_loss:3.3945 train_time:9348808ms step_avg:4057.64ms +step:2315/3200 train_loss:3.4831 train_time:9352914ms step_avg:4057.66ms +step:2316/3200 train_loss:3.7172 train_time:9357003ms step_avg:4057.68ms +step:2317/3200 train_loss:3.3770 train_time:9361091ms step_avg:4057.69ms +step:2318/3200 train_loss:3.4507 train_time:9365176ms step_avg:4057.70ms +step:2319/3200 train_loss:3.2553 train_time:9369254ms step_avg:4057.71ms +step:2320/3200 train_loss:3.3022 train_time:9373336ms step_avg:4057.72ms +step:2321/3200 train_loss:3.4278 train_time:9377412ms step_avg:4057.73ms +step:2322/3200 train_loss:3.3133 train_time:9381493ms step_avg:4057.74ms +step:2323/3200 train_loss:3.3506 train_time:9385577ms step_avg:4057.75ms +step:2324/3200 train_loss:3.3618 train_time:9389660ms step_avg:4057.76ms +step:2325/3200 train_loss:3.5358 train_time:9393748ms step_avg:4057.77ms +step:2326/3200 train_loss:3.4487 train_time:9397846ms step_avg:4057.79ms +step:2327/3200 train_loss:3.3408 train_time:9401952ms step_avg:4057.81ms +step:2328/3200 train_loss:3.4910 train_time:9406066ms step_avg:4057.84ms +step:2329/3200 train_loss:3.5077 train_time:9410187ms step_avg:4057.86ms +step:2330/3200 train_loss:3.5271 train_time:9414324ms step_avg:4057.90ms +step:2331/3200 train_loss:3.3612 train_time:9418481ms step_avg:4057.94ms +step:2332/3200 train_loss:3.5302 train_time:9422660ms step_avg:4057.99ms +step:2333/3200 train_loss:3.4429 train_time:9426841ms step_avg:4058.05ms +step:2334/3200 train_loss:3.4131 train_time:9431040ms step_avg:4058.11ms +step:2335/3200 train_loss:3.2814 train_time:9435242ms step_avg:4058.17ms +step:2336/3200 train_loss:3.3442 train_time:9439440ms step_avg:4058.23ms +step:2337/3200 train_loss:3.3902 train_time:9443638ms step_avg:4058.29ms +step:2338/3200 train_loss:3.4020 train_time:9447833ms step_avg:4058.35ms +step:2339/3200 train_loss:3.3339 train_time:9451984ms step_avg:4058.39ms +step:2340/3200 train_loss:3.5910 train_time:9456088ms step_avg:4058.41ms +step:2341/3200 train_loss:3.3954 train_time:9460162ms step_avg:4058.41ms +step:2342/3200 train_loss:3.3978 train_time:9464215ms step_avg:4058.41ms +step:2343/3200 train_loss:3.3721 train_time:9468254ms step_avg:4058.40ms +step:2344/3200 train_loss:3.3276 train_time:9472279ms step_avg:4058.39ms +step:2345/3200 train_loss:3.4067 train_time:9476290ms step_avg:4058.37ms +step:2346/3200 train_loss:3.1880 train_time:9480296ms step_avg:4058.35ms +step:2347/3200 train_loss:3.2909 train_time:9484319ms step_avg:4058.33ms +step:2348/3200 train_loss:3.3981 train_time:9488337ms step_avg:4058.31ms +step:2349/3200 train_loss:3.4152 train_time:9492348ms step_avg:4058.29ms +step:2350/3200 train_loss:3.4390 train_time:9496356ms step_avg:4058.27ms +step:2351/3200 train_loss:3.1841 train_time:9500361ms step_avg:4058.25ms +step:2352/3200 train_loss:3.1232 train_time:9504367ms step_avg:4058.23ms +step:2353/3200 train_loss:3.3825 train_time:9508369ms step_avg:4058.20ms +step:2354/3200 train_loss:3.5201 train_time:9512370ms step_avg:4058.18ms +step:2355/3200 train_loss:3.3196 train_time:9516368ms step_avg:4058.15ms +step:2356/3200 train_loss:3.2238 train_time:9520368ms step_avg:4058.13ms +step:2357/3200 train_loss:3.3884 train_time:9524365ms step_avg:4058.10ms +step:2358/3200 train_loss:3.4289 train_time:9528364ms step_avg:4058.08ms +step:2359/3200 train_loss:3.3247 train_time:9532365ms step_avg:4058.05ms +step:2360/3200 train_loss:3.5023 train_time:9536365ms step_avg:4058.03ms +step:2361/3200 train_loss:3.4723 train_time:9540371ms step_avg:4058.01ms +step:2362/3200 train_loss:3.3577 train_time:9544377ms step_avg:4057.98ms +step:2363/3200 train_loss:3.2958 train_time:9548386ms step_avg:4057.96ms +step:2364/3200 train_loss:3.4356 train_time:9552401ms step_avg:4057.94ms +step:2365/3200 train_loss:3.3985 train_time:9556418ms step_avg:4057.93ms +step:2366/3200 train_loss:3.2349 train_time:9560437ms step_avg:4057.91ms +step:2367/3200 train_loss:3.2689 train_time:9564464ms step_avg:4057.90ms +step:2368/3200 train_loss:3.5782 train_time:9568472ms step_avg:4057.88ms +step:2369/3200 train_loss:3.3388 train_time:9572479ms step_avg:4057.85ms +step:2370/3200 train_loss:3.4109 train_time:9576493ms step_avg:4057.84ms +step:2371/3200 train_loss:3.3254 train_time:9580518ms step_avg:4057.82ms +step:2372/3200 train_loss:3.5543 train_time:9584550ms step_avg:4057.81ms +step:2373/3200 train_loss:3.3610 train_time:9588590ms step_avg:4057.80ms +step:2374/3200 train_loss:3.3714 train_time:9592643ms step_avg:4057.80ms +step:2375/3200 train_loss:3.5824 train_time:9596701ms step_avg:4057.80ms +step:2375/3200 val_loss:3.4015 train_time:9596701ms step_avg:4057.80ms +step:2376/3200 train_loss:3.3249 train_time:9600898ms step_avg:4057.86ms +step:2377/3200 train_loss:3.4636 train_time:9605092ms step_avg:4057.92ms +step:2378/3200 train_loss:3.3053 train_time:9609251ms step_avg:4057.96ms +step:2379/3200 train_loss:3.6702 train_time:9613386ms step_avg:4057.99ms +step:2380/3200 train_loss:3.4978 train_time:9617500ms step_avg:4058.02ms +step:2381/3200 train_loss:3.5555 train_time:9621601ms step_avg:4058.03ms +step:2382/3200 train_loss:5.1514 train_time:9625690ms step_avg:4058.05ms +step:2383/3200 train_loss:3.1199 train_time:9629774ms step_avg:4058.06ms +step:2384/3200 train_loss:3.2403 train_time:9633851ms step_avg:4058.07ms +step:2385/3200 train_loss:3.2819 train_time:9637922ms step_avg:4058.07ms +step:2386/3200 train_loss:3.3130 train_time:9641996ms step_avg:4058.08ms +step:2387/3200 train_loss:3.4327 train_time:9646062ms step_avg:4058.08ms +step:2388/3200 train_loss:3.5369 train_time:9650134ms step_avg:4058.09ms +step:2389/3200 train_loss:3.3036 train_time:9654205ms step_avg:4058.09ms +step:2390/3200 train_loss:3.7327 train_time:9658283ms step_avg:4058.10ms +step:2391/3200 train_loss:3.4989 train_time:9662363ms step_avg:4058.11ms +step:2392/3200 train_loss:3.4747 train_time:9666444ms step_avg:4058.12ms +step:2393/3200 train_loss:3.6915 train_time:9670529ms step_avg:4058.13ms +step:2394/3200 train_loss:3.3532 train_time:9674624ms step_avg:4058.15ms +step:2395/3200 train_loss:3.4836 train_time:9678733ms step_avg:4058.17ms +step:2396/3200 train_loss:3.3158 train_time:9682848ms step_avg:4058.19ms +step:2397/3200 train_loss:3.3378 train_time:9686981ms step_avg:4058.22ms +step:2398/3200 train_loss:3.3249 train_time:9691129ms step_avg:4058.26ms +step:2399/3200 train_loss:3.5178 train_time:9695305ms step_avg:4058.31ms +step:2400/3200 train_loss:3.1560 train_time:9699485ms step_avg:4058.36ms +step:2401/3200 train_loss:3.4895 train_time:9703676ms step_avg:4058.42ms +step:2402/3200 train_loss:3.1294 train_time:9707862ms step_avg:4058.47ms +step:2403/3200 train_loss:3.4131 train_time:9712042ms step_avg:4058.52ms +step:2404/3200 train_loss:3.2546 train_time:9716238ms step_avg:4058.58ms +step:2405/3200 train_loss:3.3292 train_time:9720415ms step_avg:4058.63ms +step:2406/3200 train_loss:3.4924 train_time:9724594ms step_avg:4058.68ms +step:2407/3200 train_loss:3.4499 train_time:9728773ms step_avg:4058.73ms +step:2408/3200 train_loss:3.2339 train_time:9732953ms step_avg:4058.78ms +step:2409/3200 train_loss:3.6228 train_time:9737130ms step_avg:4058.83ms +step:2410/3200 train_loss:3.4218 train_time:9741309ms step_avg:4058.88ms +step:2411/3200 train_loss:3.3645 train_time:9745488ms step_avg:4058.93ms +step:2412/3200 train_loss:3.3719 train_time:9749668ms step_avg:4058.98ms +step:2413/3200 train_loss:3.4283 train_time:9753846ms step_avg:4059.03ms +step:2414/3200 train_loss:3.3220 train_time:9758028ms step_avg:4059.08ms +step:2415/3200 train_loss:3.2532 train_time:9762228ms step_avg:4059.14ms +step:2416/3200 train_loss:3.4771 train_time:9766430ms step_avg:4059.20ms +step:2417/3200 train_loss:3.2386 train_time:9770627ms step_avg:4059.26ms +step:2418/3200 train_loss:3.4687 train_time:9774822ms step_avg:4059.31ms +step:2419/3200 train_loss:3.4003 train_time:9779018ms step_avg:4059.37ms +step:2420/3200 train_loss:3.2990 train_time:9783193ms step_avg:4059.42ms +step:2421/3200 train_loss:3.4489 train_time:9787337ms step_avg:4059.45ms +step:2422/3200 train_loss:3.3557 train_time:9791473ms step_avg:4059.48ms +step:2423/3200 train_loss:3.4262 train_time:9795596ms step_avg:4059.51ms +step:2424/3200 train_loss:3.3055 train_time:9799710ms step_avg:4059.53ms +step:2425/3200 train_loss:3.4580 train_time:9803820ms step_avg:4059.55ms +step:2426/3200 train_loss:3.3248 train_time:9807928ms step_avg:4059.57ms +step:2427/3200 train_loss:3.2928 train_time:9812031ms step_avg:4059.59ms +step:2428/3200 train_loss:3.3164 train_time:9816139ms step_avg:4059.61ms +step:2429/3200 train_loss:3.3488 train_time:9820248ms step_avg:4059.63ms +step:2430/3200 train_loss:3.2676 train_time:9824359ms step_avg:4059.65ms +step:2431/3200 train_loss:3.4746 train_time:9828476ms step_avg:4059.68ms +step:2432/3200 train_loss:3.4300 train_time:9832601ms step_avg:4059.70ms +step:2433/3200 train_loss:3.4473 train_time:9836738ms step_avg:4059.74ms +step:2434/3200 train_loss:3.1991 train_time:9840887ms step_avg:4059.77ms +step:2435/3200 train_loss:3.2618 train_time:9845040ms step_avg:4059.81ms +step:2436/3200 train_loss:3.3765 train_time:9849216ms step_avg:4059.86ms +step:2437/3200 train_loss:3.3975 train_time:9853396ms step_avg:4059.91ms +step:2438/3200 train_loss:3.4586 train_time:9857597ms step_avg:4059.97ms +step:2439/3200 train_loss:3.2705 train_time:9861799ms step_avg:4060.02ms +step:2440/3200 train_loss:3.5790 train_time:9866002ms step_avg:4060.08ms +step:2441/3200 train_loss:3.4592 train_time:9870197ms step_avg:4060.14ms +step:2442/3200 train_loss:3.4334 train_time:9874382ms step_avg:4060.19ms +step:2443/3200 train_loss:3.6357 train_time:9878499ms step_avg:4060.21ms +step:2444/3200 train_loss:3.3240 train_time:9882564ms step_avg:4060.22ms +step:2445/3200 train_loss:3.3942 train_time:9886594ms step_avg:4060.20ms +step:2446/3200 train_loss:3.4252 train_time:9890609ms step_avg:4060.18ms +step:2447/3200 train_loss:3.3718 train_time:9894636ms step_avg:4060.17ms +step:2448/3200 train_loss:3.3594 train_time:9898644ms step_avg:4060.15ms +step:2449/3200 train_loss:3.5080 train_time:9902640ms step_avg:4060.12ms +step:2450/3200 train_loss:3.5108 train_time:9906624ms step_avg:4060.09ms +step:2451/3200 train_loss:3.5132 train_time:9910625ms step_avg:4060.07ms +step:2452/3200 train_loss:3.3975 train_time:9914620ms step_avg:4060.04ms +step:2453/3200 train_loss:3.3080 train_time:9918612ms step_avg:4060.01ms +step:2454/3200 train_loss:3.3126 train_time:9922596ms step_avg:4059.98ms +step:2455/3200 train_loss:3.4563 train_time:9926580ms step_avg:4059.95ms +step:2456/3200 train_loss:3.4074 train_time:9930561ms step_avg:4059.92ms +step:2457/3200 train_loss:3.2547 train_time:9934536ms step_avg:4059.88ms +step:2458/3200 train_loss:3.2787 train_time:9938511ms step_avg:4059.85ms +step:2459/3200 train_loss:3.2074 train_time:9942484ms step_avg:4059.81ms +step:2460/3200 train_loss:3.4412 train_time:9946457ms step_avg:4059.78ms +step:2461/3200 train_loss:3.2260 train_time:9950429ms step_avg:4059.74ms +step:2462/3200 train_loss:3.4803 train_time:9954401ms step_avg:4059.71ms +step:2463/3200 train_loss:3.4137 train_time:9958371ms step_avg:4059.67ms +step:2464/3200 train_loss:3.2426 train_time:9962341ms step_avg:4059.63ms +step:2465/3200 train_loss:3.4284 train_time:9966313ms step_avg:4059.60ms +step:2466/3200 train_loss:3.3386 train_time:9970281ms step_avg:4059.56ms +step:2467/3200 train_loss:3.4140 train_time:9974252ms step_avg:4059.52ms +step:2468/3200 train_loss:3.5834 train_time:9978224ms step_avg:4059.49ms +step:2469/3200 train_loss:3.3547 train_time:9982198ms step_avg:4059.45ms +step:2470/3200 train_loss:3.5177 train_time:9986172ms step_avg:4059.42ms +step:2471/3200 train_loss:3.4615 train_time:9990148ms step_avg:4059.39ms +step:2472/3200 train_loss:3.2724 train_time:9994123ms step_avg:4059.35ms +step:2473/3200 train_loss:3.3183 train_time:9998100ms step_avg:4059.32ms +step:2474/3200 train_loss:3.4017 train_time:10002080ms step_avg:4059.29ms +step:2475/3200 train_loss:3.4510 train_time:10006062ms step_avg:4059.25ms +step:2476/3200 train_loss:3.4038 train_time:10010045ms step_avg:4059.22ms +step:2477/3200 train_loss:3.4672 train_time:10014028ms step_avg:4059.19ms +step:2478/3200 train_loss:3.5251 train_time:10018020ms step_avg:4059.17ms +step:2479/3200 train_loss:3.2943 train_time:10022015ms step_avg:4059.14ms +step:2480/3200 train_loss:3.2690 train_time:10026008ms step_avg:4059.11ms +step:2481/3200 train_loss:3.5528 train_time:10030001ms step_avg:4059.09ms +step:2482/3200 train_loss:3.4192 train_time:10033983ms step_avg:4059.05ms +step:2483/3200 train_loss:3.3814 train_time:10037969ms step_avg:4059.02ms +step:2484/3200 train_loss:3.6304 train_time:10041956ms step_avg:4059.00ms +step:2485/3200 train_loss:3.3717 train_time:10045946ms step_avg:4058.97ms +step:2486/3200 train_loss:3.3686 train_time:10049944ms step_avg:4058.94ms +step:2487/3200 train_loss:3.3445 train_time:10053948ms step_avg:4058.92ms +step:2488/3200 train_loss:3.4763 train_time:10057958ms step_avg:4058.90ms +step:2489/3200 train_loss:3.2003 train_time:10061974ms step_avg:4058.88ms +step:2490/3200 train_loss:3.3564 train_time:10065981ms step_avg:4058.86ms +step:2491/3200 train_loss:3.4330 train_time:10069984ms step_avg:4058.84ms +step:2492/3200 train_loss:3.4228 train_time:10073994ms step_avg:4058.82ms +step:2493/3200 train_loss:3.3082 train_time:10078016ms step_avg:4058.81ms +step:2494/3200 train_loss:3.3191 train_time:10082043ms step_avg:4058.79ms +step:2495/3200 train_loss:3.5453 train_time:10086082ms step_avg:4058.79ms +step:2496/3200 train_loss:3.4141 train_time:10090128ms step_avg:4058.78ms +step:2497/3200 train_loss:3.2858 train_time:10094197ms step_avg:4058.78ms +step:2498/3200 train_loss:3.5647 train_time:10098277ms step_avg:4058.79ms +step:2499/3200 train_loss:3.4448 train_time:10102376ms step_avg:4058.81ms +step:2500/3200 train_loss:3.5576 train_time:10106494ms step_avg:4058.83ms +step:2500/3200 val_loss:3.3777 train_time:10106494ms step_avg:4058.83ms +step:2501/3200 train_loss:3.4493 train_time:10110616ms step_avg:4058.86ms +step:2502/3200 train_loss:3.5347 train_time:10114716ms step_avg:4058.87ms +step:2503/3200 train_loss:3.3241 train_time:10118804ms step_avg:4058.89ms +step:2504/3200 train_loss:3.2083 train_time:10122891ms step_avg:4058.90ms +step:2505/3200 train_loss:3.3234 train_time:10126958ms step_avg:4058.90ms +step:2506/3200 train_loss:3.3631 train_time:10131028ms step_avg:4058.91ms +step:2507/3200 train_loss:3.3692 train_time:10135100ms step_avg:4058.91ms +step:2508/3200 train_loss:3.3420 train_time:10139170ms step_avg:4058.92ms +step:2509/3200 train_loss:3.3588 train_time:10143244ms step_avg:4058.92ms +step:2510/3200 train_loss:3.4195 train_time:10147315ms step_avg:4058.93ms +step:2511/3200 train_loss:3.2814 train_time:10151389ms step_avg:4058.93ms +step:2512/3200 train_loss:3.2840 train_time:10155466ms step_avg:4058.94ms +step:2513/3200 train_loss:3.5048 train_time:10159547ms step_avg:4058.95ms +step:2514/3200 train_loss:3.6114 train_time:10163637ms step_avg:4058.96ms +step:2515/3200 train_loss:3.3441 train_time:10167737ms step_avg:4058.98ms +step:2516/3200 train_loss:3.5098 train_time:10171841ms step_avg:4058.99ms +step:2517/3200 train_loss:3.3591 train_time:10175962ms step_avg:4059.02ms +step:2518/3200 train_loss:3.2013 train_time:10180090ms step_avg:4059.05ms +step:2519/3200 train_loss:3.3527 train_time:10184233ms step_avg:4059.08ms +step:2520/3200 train_loss:3.3551 train_time:10188401ms step_avg:4059.12ms +step:2521/3200 train_loss:3.2220 train_time:10192580ms step_avg:4059.17ms +step:2522/3200 train_loss:3.3159 train_time:10196758ms step_avg:4059.22ms +step:2523/3200 train_loss:3.4207 train_time:10200911ms step_avg:4059.26ms +step:2524/3200 train_loss:3.3316 train_time:10205064ms step_avg:4059.29ms +step:2525/3200 train_loss:3.3923 train_time:10209204ms step_avg:4059.33ms +step:2526/3200 train_loss:3.3499 train_time:10213331ms step_avg:4059.35ms +step:2527/3200 train_loss:3.2704 train_time:10217457ms step_avg:4059.38ms +step:2528/3200 train_loss:3.3322 train_time:10221585ms step_avg:4059.41ms +step:2529/3200 train_loss:3.3672 train_time:10225709ms step_avg:4059.43ms +step:2530/3200 train_loss:3.3832 train_time:10229843ms step_avg:4059.46ms +step:2531/3200 train_loss:3.2913 train_time:10233982ms step_avg:4059.49ms +step:2532/3200 train_loss:3.3662 train_time:10238119ms step_avg:4059.52ms +step:2533/3200 train_loss:3.4288 train_time:10242267ms step_avg:4059.56ms +step:2534/3200 train_loss:3.4924 train_time:10246422ms step_avg:4059.60ms +step:2535/3200 train_loss:3.5784 train_time:10250601ms step_avg:4059.64ms +step:2536/3200 train_loss:3.6049 train_time:10254780ms step_avg:4059.69ms +step:2537/3200 train_loss:3.2217 train_time:10258964ms step_avg:4059.74ms +step:2538/3200 train_loss:3.3484 train_time:10263163ms step_avg:4059.80ms +step:2539/3200 train_loss:3.2836 train_time:10267359ms step_avg:4059.85ms +step:2540/3200 train_loss:3.3676 train_time:10271557ms step_avg:4059.90ms +step:2541/3200 train_loss:3.3174 train_time:10275752ms step_avg:4059.96ms +step:2542/3200 train_loss:3.2420 train_time:10279911ms step_avg:4060.00ms +step:2543/3200 train_loss:3.3997 train_time:10284051ms step_avg:4060.03ms +step:2544/3200 train_loss:3.2176 train_time:10288175ms step_avg:4060.05ms +step:2545/3200 train_loss:3.4084 train_time:10292289ms step_avg:4060.07ms +step:2546/3200 train_loss:3.4808 train_time:10296401ms step_avg:4060.09ms +step:2547/3200 train_loss:3.3694 train_time:10300503ms step_avg:4060.11ms +step:2548/3200 train_loss:3.3920 train_time:10304599ms step_avg:4060.13ms +step:2549/3200 train_loss:3.5679 train_time:10308694ms step_avg:4060.14ms +step:2550/3200 train_loss:3.0195 train_time:10312785ms step_avg:4060.15ms +step:2551/3200 train_loss:3.4543 train_time:10316877ms step_avg:4060.16ms +step:2552/3200 train_loss:3.6724 train_time:10320973ms step_avg:4060.18ms +step:2553/3200 train_loss:3.4178 train_time:10325075ms step_avg:4060.19ms +step:2554/3200 train_loss:3.6365 train_time:10329177ms step_avg:4060.21ms +step:2555/3200 train_loss:3.2472 train_time:10333290ms step_avg:4060.23ms +step:2556/3200 train_loss:3.4818 train_time:10337410ms step_avg:4060.26ms +step:2557/3200 train_loss:3.2483 train_time:10341541ms step_avg:4060.28ms +step:2558/3200 train_loss:3.2924 train_time:10345671ms step_avg:4060.31ms +step:2559/3200 train_loss:3.4047 train_time:10349828ms step_avg:4060.35ms +step:2560/3200 train_loss:3.3337 train_time:10354008ms step_avg:4060.40ms +step:2561/3200 train_loss:3.2239 train_time:10358175ms step_avg:4060.44ms +step:2562/3200 train_loss:3.3124 train_time:10362375ms step_avg:4060.49ms +step:2563/3200 train_loss:3.2335 train_time:10366573ms step_avg:4060.55ms +step:2564/3200 train_loss:3.6032 train_time:10370773ms step_avg:4060.60ms +step:2565/3200 train_loss:3.3268 train_time:10374970ms step_avg:4060.65ms +step:2566/3200 train_loss:3.4444 train_time:10379167ms step_avg:4060.71ms +step:2567/3200 train_loss:3.4047 train_time:10383340ms step_avg:4060.75ms +step:2568/3200 train_loss:3.3399 train_time:10387479ms step_avg:4060.78ms +step:2569/3200 train_loss:3.3669 train_time:10391588ms step_avg:4060.80ms +step:2570/3200 train_loss:3.4509 train_time:10395682ms step_avg:4060.81ms +step:2571/3200 train_loss:3.5510 train_time:10399760ms step_avg:4060.82ms +step:2572/3200 train_loss:3.3220 train_time:10403831ms step_avg:4060.82ms +step:2573/3200 train_loss:3.4300 train_time:10407891ms step_avg:4060.82ms +step:2574/3200 train_loss:3.3466 train_time:10411940ms step_avg:4060.82ms +step:2575/3200 train_loss:3.3847 train_time:10415985ms step_avg:4060.81ms +step:2576/3200 train_loss:3.3104 train_time:10420025ms step_avg:4060.80ms +step:2577/3200 train_loss:3.3682 train_time:10424062ms step_avg:4060.80ms +step:2578/3200 train_loss:3.3915 train_time:10428090ms step_avg:4060.78ms +step:2579/3200 train_loss:3.2333 train_time:10432121ms step_avg:4060.77ms +step:2580/3200 train_loss:3.4965 train_time:10436160ms step_avg:4060.76ms +step:2581/3200 train_loss:3.2306 train_time:10440200ms step_avg:4060.75ms +step:2582/3200 train_loss:3.1757 train_time:10444240ms step_avg:4060.75ms +step:2583/3200 train_loss:3.2757 train_time:10448277ms step_avg:4060.74ms +step:2584/3200 train_loss:3.3052 train_time:10452317ms step_avg:4060.73ms +step:2585/3200 train_loss:3.4442 train_time:10456362ms step_avg:4060.72ms +step:2586/3200 train_loss:3.4045 train_time:10460420ms step_avg:4060.72ms +step:2587/3200 train_loss:3.3209 train_time:10464481ms step_avg:4060.72ms +step:2588/3200 train_loss:3.3316 train_time:10468551ms step_avg:4060.73ms +step:2589/3200 train_loss:3.4323 train_time:10472631ms step_avg:4060.73ms +step:2590/3200 train_loss:3.3273 train_time:10476713ms step_avg:4060.74ms +step:2591/3200 train_loss:3.2897 train_time:10480812ms step_avg:4060.76ms +step:2592/3200 train_loss:3.4567 train_time:10484929ms step_avg:4060.78ms +step:2593/3200 train_loss:3.3493 train_time:10489057ms step_avg:4060.80ms +step:2594/3200 train_loss:3.4339 train_time:10493210ms step_avg:4060.84ms +step:2595/3200 train_loss:3.1807 train_time:10497388ms step_avg:4060.89ms +step:2596/3200 train_loss:3.3888 train_time:10501569ms step_avg:4060.93ms +step:2597/3200 train_loss:3.4133 train_time:10505771ms step_avg:4060.99ms +step:2598/3200 train_loss:3.1155 train_time:10509973ms step_avg:4061.04ms +step:2599/3200 train_loss:3.3366 train_time:10514169ms step_avg:4061.09ms +step:2600/3200 train_loss:3.3463 train_time:10518363ms step_avg:4061.14ms +step:2601/3200 train_loss:3.3717 train_time:10522541ms step_avg:4061.19ms +step:2602/3200 train_loss:3.3501 train_time:10526718ms step_avg:4061.23ms +step:2603/3200 train_loss:3.2884 train_time:10530871ms step_avg:4061.27ms +step:2604/3200 train_loss:3.2898 train_time:10535014ms step_avg:4061.30ms +step:2605/3200 train_loss:3.2570 train_time:10539142ms step_avg:4061.33ms +step:2606/3200 train_loss:3.5752 train_time:10543270ms step_avg:4061.35ms +step:2607/3200 train_loss:3.3472 train_time:10547394ms step_avg:4061.38ms +step:2608/3200 train_loss:3.3084 train_time:10551517ms step_avg:4061.40ms +step:2609/3200 train_loss:3.2959 train_time:10555645ms step_avg:4061.43ms +step:2610/3200 train_loss:3.2320 train_time:10559768ms step_avg:4061.45ms +step:2611/3200 train_loss:3.2393 train_time:10563905ms step_avg:4061.48ms +step:2612/3200 train_loss:3.3330 train_time:10568043ms step_avg:4061.51ms +step:2613/3200 train_loss:3.5816 train_time:10572186ms step_avg:4061.54ms +step:2614/3200 train_loss:3.3430 train_time:10576321ms step_avg:4061.57ms +step:2615/3200 train_loss:3.2735 train_time:10580502ms step_avg:4061.61ms +step:2616/3200 train_loss:3.3720 train_time:10584683ms step_avg:4061.66ms +step:2617/3200 train_loss:3.4099 train_time:10588864ms step_avg:4061.70ms +step:2618/3200 train_loss:3.2591 train_time:10593066ms step_avg:4061.76ms +step:2619/3200 train_loss:3.3468 train_time:10597261ms step_avg:4061.81ms +step:2620/3200 train_loss:3.1820 train_time:10601454ms step_avg:4061.86ms +step:2621/3200 train_loss:3.4269 train_time:10605577ms step_avg:4061.88ms +step:2622/3200 train_loss:3.2630 train_time:10609676ms step_avg:4061.90ms +step:2623/3200 train_loss:3.4800 train_time:10613756ms step_avg:4061.90ms +step:2624/3200 train_loss:3.3520 train_time:10617819ms step_avg:4061.90ms +step:2625/3200 train_loss:3.4069 train_time:10621862ms step_avg:4061.90ms +step:2625/3200 val_loss:3.3554 train_time:10621863ms step_avg:4061.90ms +step:2626/3200 train_loss:3.3806 train_time:10625873ms step_avg:4061.88ms +step:2627/3200 train_loss:3.5322 train_time:10629877ms step_avg:4061.86ms +step:2628/3200 train_loss:3.4438 train_time:10633882ms step_avg:4061.83ms +step:2629/3200 train_loss:3.4878 train_time:10637886ms step_avg:4061.81ms +step:2630/3200 train_loss:3.4569 train_time:10641892ms step_avg:4061.79ms +step:2631/3200 train_loss:3.3234 train_time:10645900ms step_avg:4061.77ms +step:2632/3200 train_loss:3.4018 train_time:10649908ms step_avg:4061.75ms +step:2633/3200 train_loss:3.2214 train_time:10653918ms step_avg:4061.73ms +step:2634/3200 train_loss:3.3347 train_time:10657929ms step_avg:4061.71ms +step:2635/3200 train_loss:3.4491 train_time:10661944ms step_avg:4061.69ms +step:2636/3200 train_loss:3.2479 train_time:10665963ms step_avg:4061.68ms +step:2637/3200 train_loss:3.4513 train_time:10669985ms step_avg:4061.66ms +step:2638/3200 train_loss:3.4422 train_time:10674011ms step_avg:4061.65ms +step:2639/3200 train_loss:3.3962 train_time:10678043ms step_avg:4061.64ms +step:2640/3200 train_loss:3.2136 train_time:10682087ms step_avg:4061.63ms +step:2641/3200 train_loss:3.2869 train_time:10686138ms step_avg:4061.63ms +step:2642/3200 train_loss:3.4248 train_time:10690203ms step_avg:4061.63ms +step:2643/3200 train_loss:3.3729 train_time:10694278ms step_avg:4061.63ms +step:2644/3200 train_loss:3.2804 train_time:10698363ms step_avg:4061.64ms +step:2645/3200 train_loss:3.4318 train_time:10702459ms step_avg:4061.65ms +step:2646/3200 train_loss:3.3225 train_time:10706575ms step_avg:4061.67ms +step:2647/3200 train_loss:3.3556 train_time:10710702ms step_avg:4061.70ms +step:2648/3200 train_loss:3.3499 train_time:10714860ms step_avg:4061.74ms +step:2649/3200 train_loss:3.2407 train_time:10719022ms step_avg:4061.77ms +step:2650/3200 train_loss:3.3732 train_time:10723219ms step_avg:4061.83ms +step:2651/3200 train_loss:3.2817 train_time:10727393ms step_avg:4061.87ms +step:2652/3200 train_loss:3.4510 train_time:10731527ms step_avg:4061.90ms +step:2653/3200 train_loss:3.2900 train_time:10735632ms step_avg:4061.91ms +step:2654/3200 train_loss:3.5331 train_time:10739723ms step_avg:4061.92ms +step:2655/3200 train_loss:3.2434 train_time:10743795ms step_avg:4061.93ms +step:2656/3200 train_loss:3.3375 train_time:10747861ms step_avg:4061.93ms +step:2657/3200 train_loss:3.1945 train_time:10751910ms step_avg:4061.92ms +step:2658/3200 train_loss:3.4964 train_time:10755950ms step_avg:4061.91ms +step:2659/3200 train_loss:3.3534 train_time:10759987ms step_avg:4061.91ms +step:2660/3200 train_loss:3.3745 train_time:10764015ms step_avg:4061.89ms +step:2661/3200 train_loss:3.4735 train_time:10768042ms step_avg:4061.88ms +step:2662/3200 train_loss:3.3805 train_time:10772069ms step_avg:4061.87ms +step:2663/3200 train_loss:3.5153 train_time:10776099ms step_avg:4061.85ms +step:2664/3200 train_loss:3.3701 train_time:10780125ms step_avg:4061.84ms +step:2665/3200 train_loss:3.3667 train_time:10784155ms step_avg:4061.83ms +step:2666/3200 train_loss:3.2914 train_time:10788185ms step_avg:4061.82ms +step:2667/3200 train_loss:3.4158 train_time:10792217ms step_avg:4061.81ms +step:2668/3200 train_loss:3.4236 train_time:10796251ms step_avg:4061.80ms +step:2669/3200 train_loss:3.3443 train_time:10800286ms step_avg:4061.78ms +step:2670/3200 train_loss:3.2495 train_time:10804327ms step_avg:4061.78ms +step:2671/3200 train_loss:3.3240 train_time:10808369ms step_avg:4061.77ms +step:2672/3200 train_loss:3.4914 train_time:10812418ms step_avg:4061.76ms +step:2673/3200 train_loss:3.3533 train_time:10816472ms step_avg:4061.76ms +step:2674/3200 train_loss:3.3507 train_time:10820539ms step_avg:4061.76ms +step:2675/3200 train_loss:3.5666 train_time:10824618ms step_avg:4061.77ms +step:2676/3200 train_loss:2.9321 train_time:10828704ms step_avg:4061.78ms +step:2677/3200 train_loss:3.2702 train_time:10832806ms step_avg:4061.79ms +step:2678/3200 train_loss:3.4944 train_time:10836924ms step_avg:4061.82ms +step:2679/3200 train_loss:3.5273 train_time:10841058ms step_avg:4061.84ms +step:2680/3200 train_loss:3.2380 train_time:10845207ms step_avg:4061.88ms +step:2681/3200 train_loss:3.2341 train_time:10849379ms step_avg:4061.92ms +step:2682/3200 train_loss:3.3546 train_time:10853561ms step_avg:4061.96ms +step:2683/3200 train_loss:3.5419 train_time:10857761ms step_avg:4062.01ms +step:2684/3200 train_loss:3.3256 train_time:10861959ms step_avg:4062.06ms +step:2685/3200 train_loss:3.3564 train_time:10866157ms step_avg:4062.11ms +step:2686/3200 train_loss:3.3057 train_time:10870352ms step_avg:4062.16ms +step:2687/3200 train_loss:3.2730 train_time:10874511ms step_avg:4062.20ms +step:2688/3200 train_loss:3.2898 train_time:10878658ms step_avg:4062.23ms +step:2689/3200 train_loss:3.5000 train_time:10882786ms step_avg:4062.26ms +step:2690/3200 train_loss:3.4730 train_time:10886905ms step_avg:4062.28ms +step:2691/3200 train_loss:3.3312 train_time:10891011ms step_avg:4062.29ms +step:2692/3200 train_loss:3.4927 train_time:10895116ms step_avg:4062.31ms +step:2693/3200 train_loss:3.4185 train_time:10899214ms step_avg:4062.32ms +step:2694/3200 train_loss:3.4943 train_time:10903305ms step_avg:4062.33ms +step:2695/3200 train_loss:3.2917 train_time:10907398ms step_avg:4062.35ms +step:2696/3200 train_loss:3.2503 train_time:10911496ms step_avg:4062.36ms +step:2697/3200 train_loss:3.2674 train_time:10915596ms step_avg:4062.37ms +step:2698/3200 train_loss:3.6958 train_time:10919695ms step_avg:4062.39ms +step:2699/3200 train_loss:3.4018 train_time:10923805ms step_avg:4062.40ms +step:2700/3200 train_loss:3.7062 train_time:10927920ms step_avg:4062.42ms +step:2701/3200 train_loss:3.5733 train_time:10932048ms step_avg:4062.45ms +step:2702/3200 train_loss:3.1915 train_time:10936184ms step_avg:4062.48ms +step:2703/3200 train_loss:3.3021 train_time:10940336ms step_avg:4062.51ms +step:2704/3200 train_loss:3.4656 train_time:10944508ms step_avg:4062.55ms +step:2705/3200 train_loss:3.5136 train_time:10948688ms step_avg:4062.59ms +step:2706/3200 train_loss:3.2626 train_time:10952869ms step_avg:4062.64ms +step:2707/3200 train_loss:3.1593 train_time:10957067ms step_avg:4062.69ms +step:2708/3200 train_loss:3.3737 train_time:10961223ms step_avg:4062.72ms +step:2709/3200 train_loss:3.3244 train_time:10965360ms step_avg:4062.75ms +step:2710/3200 train_loss:3.4313 train_time:10969474ms step_avg:4062.77ms +step:2711/3200 train_loss:3.3737 train_time:10973562ms step_avg:4062.78ms +step:2712/3200 train_loss:3.3525 train_time:10977639ms step_avg:4062.78ms +step:2713/3200 train_loss:3.4511 train_time:10981704ms step_avg:4062.78ms +step:2714/3200 train_loss:3.3390 train_time:10985758ms step_avg:4062.78ms +step:2715/3200 train_loss:3.3366 train_time:10989806ms step_avg:4062.77ms +step:2716/3200 train_loss:3.1433 train_time:10993845ms step_avg:4062.77ms +step:2717/3200 train_loss:3.4409 train_time:10997883ms step_avg:4062.76ms +step:2718/3200 train_loss:3.4432 train_time:11001911ms step_avg:4062.74ms +step:2719/3200 train_loss:3.3779 train_time:11005940ms step_avg:4062.73ms +step:2720/3200 train_loss:3.3455 train_time:11009967ms step_avg:4062.72ms +step:2721/3200 train_loss:3.2939 train_time:11013996ms step_avg:4062.71ms +step:2722/3200 train_loss:3.0961 train_time:11018025ms step_avg:4062.69ms +step:2723/3200 train_loss:3.3586 train_time:11022055ms step_avg:4062.68ms +step:2724/3200 train_loss:3.3268 train_time:11026083ms step_avg:4062.67ms +step:2725/3200 train_loss:3.4968 train_time:11030114ms step_avg:4062.66ms +step:2726/3200 train_loss:3.3923 train_time:11034153ms step_avg:4062.65ms +step:2727/3200 train_loss:3.3366 train_time:11038193ms step_avg:4062.64ms +step:2728/3200 train_loss:3.3008 train_time:11042237ms step_avg:4062.63ms +step:2729/3200 train_loss:3.3544 train_time:11046293ms step_avg:4062.63ms +step:2730/3200 train_loss:3.2694 train_time:11050353ms step_avg:4062.63ms +step:2731/3200 train_loss:3.4433 train_time:11054424ms step_avg:4062.63ms +step:2732/3200 train_loss:3.3159 train_time:11058502ms step_avg:4062.64ms +step:2733/3200 train_loss:3.3714 train_time:11062584ms step_avg:4062.65ms +step:2734/3200 train_loss:3.4888 train_time:11066689ms step_avg:4062.66ms +step:2735/3200 train_loss:3.3949 train_time:11070815ms step_avg:4062.68ms +step:2736/3200 train_loss:3.4582 train_time:11074950ms step_avg:4062.71ms +step:2737/3200 train_loss:3.0776 train_time:11079105ms step_avg:4062.74ms +step:2738/3200 train_loss:3.3640 train_time:11083290ms step_avg:4062.79ms +step:2739/3200 train_loss:3.3704 train_time:11087498ms step_avg:4062.84ms +step:2740/3200 train_loss:3.0495 train_time:11091726ms step_avg:4062.90ms +step:2741/3200 train_loss:3.3712 train_time:11095951ms step_avg:4062.96ms +step:2742/3200 train_loss:3.3870 train_time:11100169ms step_avg:4063.02ms +step:2743/3200 train_loss:3.3840 train_time:11104361ms step_avg:4063.07ms +step:2744/3200 train_loss:3.2282 train_time:11108501ms step_avg:4063.09ms +step:2745/3200 train_loss:3.3315 train_time:11112594ms step_avg:4063.11ms +step:2746/3200 train_loss:3.2975 train_time:11116656ms step_avg:4063.11ms +step:2747/3200 train_loss:3.1738 train_time:11120705ms step_avg:4063.10ms +step:2748/3200 train_loss:3.3201 train_time:11124739ms step_avg:4063.09ms +step:2749/3200 train_loss:3.3911 train_time:11128758ms step_avg:4063.07ms +step:2750/3200 train_loss:3.3808 train_time:11132789ms step_avg:4063.06ms +step:2750/3200 val_loss:3.3350 train_time:11132790ms step_avg:4063.06ms +step:2751/3200 train_loss:3.2459 train_time:11136795ms step_avg:4063.04ms +step:2752/3200 train_loss:3.3606 train_time:11140794ms step_avg:4063.02ms +step:2753/3200 train_loss:3.3835 train_time:11144792ms step_avg:4062.99ms +step:2754/3200 train_loss:3.2321 train_time:11148789ms step_avg:4062.97ms +step:2755/3200 train_loss:3.2735 train_time:11152788ms step_avg:4062.95ms +step:2756/3200 train_loss:3.2765 train_time:11156785ms step_avg:4062.92ms +step:2757/3200 train_loss:3.3717 train_time:11160783ms step_avg:4062.90ms +step:2758/3200 train_loss:3.3036 train_time:11164782ms step_avg:4062.88ms +step:2759/3200 train_loss:3.3780 train_time:11168783ms step_avg:4062.85ms +step:2760/3200 train_loss:3.4560 train_time:11172788ms step_avg:4062.83ms +step:2761/3200 train_loss:3.4390 train_time:11176797ms step_avg:4062.81ms +step:2762/3200 train_loss:3.3525 train_time:11180805ms step_avg:4062.79ms +step:2763/3200 train_loss:3.0142 train_time:11184820ms step_avg:4062.78ms +step:2764/3200 train_loss:3.3641 train_time:11188836ms step_avg:4062.76ms +step:2765/3200 train_loss:3.2158 train_time:11192858ms step_avg:4062.74ms +step:2766/3200 train_loss:3.5870 train_time:11196886ms step_avg:4062.73ms +step:2767/3200 train_loss:3.2144 train_time:11200912ms step_avg:4062.72ms +step:2768/3200 train_loss:3.3277 train_time:11204945ms step_avg:4062.71ms +step:2769/3200 train_loss:3.3111 train_time:11208965ms step_avg:4062.69ms +step:2770/3200 train_loss:3.1391 train_time:11212988ms step_avg:4062.68ms +step:2771/3200 train_loss:3.3220 train_time:11217020ms step_avg:4062.67ms +step:2772/3200 train_loss:3.4012 train_time:11221060ms step_avg:4062.66ms +step:2773/3200 train_loss:3.3275 train_time:11225112ms step_avg:4062.65ms +step:2774/3200 train_loss:3.4167 train_time:11229177ms step_avg:4062.65ms +step:2775/3200 train_loss:3.4466 train_time:11233259ms step_avg:4062.66ms +step:2776/3200 train_loss:3.3065 train_time:11237350ms step_avg:4062.67ms +step:2777/3200 train_loss:3.3256 train_time:11241459ms step_avg:4062.69ms +step:2778/3200 train_loss:3.1963 train_time:11245588ms step_avg:4062.71ms +step:2779/3200 train_loss:3.2760 train_time:11249737ms step_avg:4062.74ms +step:2780/3200 train_loss:3.3139 train_time:11253917ms step_avg:4062.79ms +step:2781/3200 train_loss:3.2821 train_time:11258120ms step_avg:4062.84ms +step:2782/3200 train_loss:3.3218 train_time:11262321ms step_avg:4062.89ms +step:2783/3200 train_loss:3.3052 train_time:11266517ms step_avg:4062.93ms +step:2784/3200 train_loss:3.5970 train_time:11270714ms step_avg:4062.98ms +step:2785/3200 train_loss:3.4277 train_time:11274910ms step_avg:4063.03ms +step:2786/3200 train_loss:3.3353 train_time:11279086ms step_avg:4063.07ms +step:2787/3200 train_loss:3.4313 train_time:11283242ms step_avg:4063.10ms +step:2788/3200 train_loss:3.4730 train_time:11287384ms step_avg:4063.13ms +step:2789/3200 train_loss:3.4181 train_time:11291524ms step_avg:4063.16ms +step:2790/3200 train_loss:3.1955 train_time:11295661ms step_avg:4063.19ms +step:2791/3200 train_loss:3.3456 train_time:11299795ms step_avg:4063.21ms +step:2792/3200 train_loss:3.3182 train_time:11303922ms step_avg:4063.24ms +step:2793/3200 train_loss:3.5164 train_time:11308055ms step_avg:4063.26ms +step:2794/3200 train_loss:3.3269 train_time:11312192ms step_avg:4063.29ms +step:2795/3200 train_loss:3.4465 train_time:11316345ms step_avg:4063.32ms +step:2796/3200 train_loss:3.3649 train_time:11320497ms step_avg:4063.35ms +step:2797/3200 train_loss:3.3545 train_time:11324674ms step_avg:4063.39ms +step:2798/3200 train_loss:3.3209 train_time:11328853ms step_avg:4063.43ms +step:2799/3200 train_loss:3.3593 train_time:11333035ms step_avg:4063.48ms +step:2800/3200 train_loss:3.2656 train_time:11337232ms step_avg:4063.52ms +step:2801/3200 train_loss:3.2818 train_time:11341408ms step_avg:4063.56ms +step:2802/3200 train_loss:3.4521 train_time:11345564ms step_avg:4063.60ms +step:2803/3200 train_loss:3.5998 train_time:11349681ms step_avg:4063.62ms +step:2804/3200 train_loss:3.3303 train_time:11353771ms step_avg:4063.63ms +step:2805/3200 train_loss:3.2770 train_time:11357846ms step_avg:4063.63ms +step:2806/3200 train_loss:3.4660 train_time:11361910ms step_avg:4063.63ms +step:2807/3200 train_loss:3.2383 train_time:11365970ms step_avg:4063.63ms +step:2808/3200 train_loss:3.2659 train_time:11370014ms step_avg:4063.62ms +step:2809/3200 train_loss:3.1766 train_time:11374060ms step_avg:4063.62ms +step:2810/3200 train_loss:3.2287 train_time:11378098ms step_avg:4063.61ms +step:2811/3200 train_loss:3.1807 train_time:11382126ms step_avg:4063.59ms +step:2812/3200 train_loss:3.5952 train_time:11386153ms step_avg:4063.58ms +step:2813/3200 train_loss:3.6611 train_time:11390180ms step_avg:4063.57ms +step:2814/3200 train_loss:3.3663 train_time:11394205ms step_avg:4063.55ms +step:2815/3200 train_loss:3.3152 train_time:11398229ms step_avg:4063.54ms +step:2816/3200 train_loss:3.4843 train_time:11402253ms step_avg:4063.53ms +step:2817/3200 train_loss:3.4261 train_time:11406278ms step_avg:4063.51ms +step:2818/3200 train_loss:3.3734 train_time:11410305ms step_avg:4063.50ms +step:2819/3200 train_loss:3.4767 train_time:11414343ms step_avg:4063.49ms +step:2820/3200 train_loss:3.3769 train_time:11418378ms step_avg:4063.48ms +step:2821/3200 train_loss:3.3321 train_time:11422418ms step_avg:4063.47ms +step:2822/3200 train_loss:3.2594 train_time:11426462ms step_avg:4063.46ms +step:2823/3200 train_loss:3.2099 train_time:11430512ms step_avg:4063.46ms +step:2824/3200 train_loss:3.2239 train_time:11434571ms step_avg:4063.46ms +step:2825/3200 train_loss:3.4607 train_time:11438639ms step_avg:4063.46ms +step:2826/3200 train_loss:3.4989 train_time:11442717ms step_avg:4063.46ms +step:2827/3200 train_loss:3.3649 train_time:11446800ms step_avg:4063.47ms +step:2828/3200 train_loss:3.6627 train_time:11450905ms step_avg:4063.49ms +step:2829/3200 train_loss:3.2798 train_time:11455018ms step_avg:4063.50ms +step:2830/3200 train_loss:3.4605 train_time:11459153ms step_avg:4063.53ms +step:2831/3200 train_loss:3.3934 train_time:11463311ms step_avg:4063.56ms +step:2832/3200 train_loss:3.5089 train_time:11467491ms step_avg:4063.60ms +step:2833/3200 train_loss:3.3663 train_time:11471672ms step_avg:4063.65ms +step:2834/3200 train_loss:3.5226 train_time:11475875ms step_avg:4063.70ms +step:2835/3200 train_loss:3.2159 train_time:11480095ms step_avg:4063.75ms +step:2836/3200 train_loss:3.4270 train_time:11484287ms step_avg:4063.80ms +step:2837/3200 train_loss:3.4593 train_time:11488428ms step_avg:4063.82ms +step:2838/3200 train_loss:3.5741 train_time:11492513ms step_avg:4063.83ms +step:2839/3200 train_loss:3.2784 train_time:11496554ms step_avg:4063.82ms +step:2840/3200 train_loss:3.4034 train_time:11500575ms step_avg:4063.81ms +step:2841/3200 train_loss:3.2682 train_time:11504595ms step_avg:4063.79ms +step:2842/3200 train_loss:3.3196 train_time:11508606ms step_avg:4063.77ms +step:2843/3200 train_loss:3.3487 train_time:11512604ms step_avg:4063.75ms +step:2844/3200 train_loss:3.4333 train_time:11516599ms step_avg:4063.73ms +step:2845/3200 train_loss:3.3937 train_time:11520600ms step_avg:4063.70ms +step:2846/3200 train_loss:3.3105 train_time:11524592ms step_avg:4063.68ms +step:2847/3200 train_loss:3.3715 train_time:11528584ms step_avg:4063.65ms +step:2848/3200 train_loss:3.3214 train_time:11532570ms step_avg:4063.63ms +step:2849/3200 train_loss:3.3216 train_time:11536551ms step_avg:4063.60ms +step:2850/3200 train_loss:3.2957 train_time:11540530ms step_avg:4063.57ms +step:2851/3200 train_loss:3.5752 train_time:11544507ms step_avg:4063.54ms +step:2852/3200 train_loss:3.2416 train_time:11548482ms step_avg:4063.51ms +step:2853/3200 train_loss:3.2713 train_time:11552455ms step_avg:4063.47ms +step:2854/3200 train_loss:3.4145 train_time:11556425ms step_avg:4063.44ms +step:2855/3200 train_loss:3.2491 train_time:11560395ms step_avg:4063.41ms +step:2856/3200 train_loss:3.2846 train_time:11564365ms step_avg:4063.38ms +step:2857/3200 train_loss:3.2114 train_time:11568336ms step_avg:4063.34ms +step:2858/3200 train_loss:3.4049 train_time:11572305ms step_avg:4063.31ms +step:2859/3200 train_loss:3.6401 train_time:11576276ms step_avg:4063.28ms +step:2860/3200 train_loss:3.4039 train_time:11580248ms step_avg:4063.24ms +step:2861/3200 train_loss:3.3114 train_time:11584221ms step_avg:4063.21ms +step:2862/3200 train_loss:3.5613 train_time:11588190ms step_avg:4063.18ms +step:2863/3200 train_loss:3.3239 train_time:11592163ms step_avg:4063.15ms +step:2864/3200 train_loss:3.4840 train_time:11596135ms step_avg:4063.12ms +step:2865/3200 train_loss:3.3589 train_time:11600108ms step_avg:4063.09ms +step:2866/3200 train_loss:3.3179 train_time:11604083ms step_avg:4063.05ms +step:2867/3200 train_loss:3.4287 train_time:11608059ms step_avg:4063.02ms +step:2868/3200 train_loss:3.2807 train_time:11612039ms step_avg:4062.99ms +step:2869/3200 train_loss:3.4137 train_time:11616020ms step_avg:4062.97ms +step:2870/3200 train_loss:3.3850 train_time:11620002ms step_avg:4062.94ms +step:2871/3200 train_loss:3.3768 train_time:11623986ms step_avg:4062.91ms +step:2872/3200 train_loss:3.3902 train_time:11627976ms step_avg:4062.88ms +step:2873/3200 train_loss:3.4247 train_time:11631964ms step_avg:4062.86ms +step:2874/3200 train_loss:3.3437 train_time:11635957ms step_avg:4062.83ms +step:2875/3200 train_loss:3.7695 train_time:11639952ms step_avg:4062.81ms +step:2875/3200 val_loss:3.3144 train_time:11639952ms step_avg:4062.81ms +step:2876/3200 train_loss:3.5406 train_time:11643970ms step_avg:4062.79ms +step:2877/3200 train_loss:3.2966 train_time:11647991ms step_avg:4062.78ms +step:2878/3200 train_loss:3.4915 train_time:11652015ms step_avg:4062.77ms +step:2879/3200 train_loss:3.1108 train_time:11656021ms step_avg:4062.75ms +step:2880/3200 train_loss:3.1828 train_time:11660034ms step_avg:4062.73ms +step:2881/3200 train_loss:3.3797 train_time:11664050ms step_avg:4062.71ms +step:2882/3200 train_loss:3.5051 train_time:11668072ms step_avg:4062.70ms +step:2883/3200 train_loss:3.2755 train_time:11672120ms step_avg:4062.69ms +step:2884/3200 train_loss:3.4295 train_time:11676174ms step_avg:4062.69ms +step:2885/3200 train_loss:3.3576 train_time:11680243ms step_avg:4062.69ms +step:2886/3200 train_loss:3.4253 train_time:11684330ms step_avg:4062.70ms +step:2887/3200 train_loss:3.2213 train_time:11688438ms step_avg:4062.72ms +step:2888/3200 train_loss:3.4841 train_time:11692561ms step_avg:4062.74ms +step:2889/3200 train_loss:3.4187 train_time:11696722ms step_avg:4062.77ms +step:2890/3200 train_loss:3.2880 train_time:11700903ms step_avg:4062.81ms +step:2891/3200 train_loss:3.3038 train_time:11705084ms step_avg:4062.85ms +step:2892/3200 train_loss:3.3752 train_time:11709279ms step_avg:4062.90ms +step:2893/3200 train_loss:3.3311 train_time:11713423ms step_avg:4062.93ms +step:2894/3200 train_loss:3.2863 train_time:11717518ms step_avg:4062.94ms +step:2895/3200 train_loss:3.0579 train_time:11721576ms step_avg:4062.94ms +step:2896/3200 train_loss:3.3738 train_time:11725610ms step_avg:4062.93ms +step:2897/3200 train_loss:3.3121 train_time:11729625ms step_avg:4062.91ms +step:2898/3200 train_loss:3.8653 train_time:11733637ms step_avg:4062.89ms +step:2899/3200 train_loss:3.2756 train_time:11737651ms step_avg:4062.88ms +step:2900/3200 train_loss:3.1112 train_time:11741656ms step_avg:4062.86ms +step:2901/3200 train_loss:3.3070 train_time:11745653ms step_avg:4062.83ms +step:2902/3200 train_loss:3.3292 train_time:11749645ms step_avg:4062.81ms +step:2903/3200 train_loss:3.1731 train_time:11753630ms step_avg:4062.78ms +step:2904/3200 train_loss:3.1808 train_time:11757614ms step_avg:4062.76ms +step:2905/3200 train_loss:3.4323 train_time:11761611ms step_avg:4062.73ms +step:2906/3200 train_loss:3.2104 train_time:11765607ms step_avg:4062.71ms +step:2907/3200 train_loss:3.4198 train_time:11769600ms step_avg:4062.69ms +step:2908/3200 train_loss:4.8759 train_time:11773592ms step_avg:4062.66ms +step:2909/3200 train_loss:3.3917 train_time:11777581ms step_avg:4062.64ms +step:2910/3200 train_loss:3.2596 train_time:11781568ms step_avg:4062.61ms +step:2911/3200 train_loss:3.3882 train_time:11785553ms step_avg:4062.58ms +step:2912/3200 train_loss:3.1609 train_time:11789539ms step_avg:4062.56ms +step:2913/3200 train_loss:3.4298 train_time:11793528ms step_avg:4062.53ms +step:2914/3200 train_loss:3.3452 train_time:11797517ms step_avg:4062.51ms +step:2915/3200 train_loss:3.2736 train_time:11801510ms step_avg:4062.48ms +step:2916/3200 train_loss:3.6235 train_time:11805502ms step_avg:4062.46ms +step:2917/3200 train_loss:3.4053 train_time:11809491ms step_avg:4062.43ms +step:2918/3200 train_loss:3.4642 train_time:11813485ms step_avg:4062.41ms +step:2919/3200 train_loss:3.3239 train_time:11817480ms step_avg:4062.39ms +step:2920/3200 train_loss:3.3569 train_time:11821477ms step_avg:4062.36ms +step:2921/3200 train_loss:3.1696 train_time:11825477ms step_avg:4062.34ms +step:2922/3200 train_loss:3.2928 train_time:11829476ms step_avg:4062.32ms +step:2923/3200 train_loss:3.3043 train_time:11833458ms step_avg:4062.29ms +step:2924/3200 train_loss:3.3434 train_time:11837447ms step_avg:4062.27ms +step:2925/3200 train_loss:3.4227 train_time:11841438ms step_avg:4062.24ms +step:2926/3200 train_loss:3.3066 train_time:11845429ms step_avg:4062.22ms +step:2927/3200 train_loss:3.2146 train_time:11849424ms step_avg:4062.20ms +step:2928/3200 train_loss:3.7189 train_time:11853430ms step_avg:4062.18ms +step:2929/3200 train_loss:3.5131 train_time:11857438ms step_avg:4062.16ms +step:2930/3200 train_loss:3.2987 train_time:11861453ms step_avg:4062.14ms +step:2931/3200 train_loss:3.2878 train_time:11865478ms step_avg:4062.13ms +step:2932/3200 train_loss:3.1972 train_time:11869508ms step_avg:4062.12ms +step:2933/3200 train_loss:3.1955 train_time:11873526ms step_avg:4062.10ms +step:2934/3200 train_loss:3.4232 train_time:11877542ms step_avg:4062.09ms +step:2935/3200 train_loss:3.1504 train_time:11881566ms step_avg:4062.07ms +step:2936/3200 train_loss:3.3624 train_time:11885595ms step_avg:4062.06ms +step:2937/3200 train_loss:3.3253 train_time:11889641ms step_avg:4062.06ms +step:2938/3200 train_loss:3.3639 train_time:11893696ms step_avg:4062.05ms +step:2939/3200 train_loss:3.2906 train_time:11897769ms step_avg:4062.06ms +step:2940/3200 train_loss:3.2134 train_time:11901856ms step_avg:4062.07ms +step:2941/3200 train_loss:3.2497 train_time:11905960ms step_avg:4062.08ms +step:2942/3200 train_loss:3.4140 train_time:11910087ms step_avg:4062.10ms +step:2943/3200 train_loss:3.3506 train_time:11914235ms step_avg:4062.13ms +step:2944/3200 train_loss:3.3465 train_time:11918414ms step_avg:4062.17ms +step:2945/3200 train_loss:4.4135 train_time:11922596ms step_avg:4062.21ms +step:2946/3200 train_loss:3.0978 train_time:11926797ms step_avg:4062.26ms +step:2947/3200 train_loss:3.6317 train_time:11930993ms step_avg:4062.31ms +step:2948/3200 train_loss:3.3484 train_time:11935191ms step_avg:4062.35ms +step:2949/3200 train_loss:3.4191 train_time:11939367ms step_avg:4062.39ms +step:2950/3200 train_loss:3.4471 train_time:11943523ms step_avg:4062.42ms +step:2951/3200 train_loss:3.3860 train_time:11947669ms step_avg:4062.45ms +step:2952/3200 train_loss:3.1378 train_time:11951803ms step_avg:4062.48ms +step:2953/3200 train_loss:3.2452 train_time:11955934ms step_avg:4062.50ms +step:2954/3200 train_loss:3.1327 train_time:11960060ms step_avg:4062.52ms +step:2955/3200 train_loss:3.2916 train_time:11964181ms step_avg:4062.54ms +step:2956/3200 train_loss:3.2650 train_time:11968302ms step_avg:4062.56ms +step:2957/3200 train_loss:3.1457 train_time:11972430ms step_avg:4062.58ms +step:2958/3200 train_loss:3.1815 train_time:11976569ms step_avg:4062.61ms +step:2959/3200 train_loss:3.2101 train_time:11980713ms step_avg:4062.64ms +step:2960/3200 train_loss:3.2634 train_time:11984871ms step_avg:4062.67ms +step:2961/3200 train_loss:3.2912 train_time:11989051ms step_avg:4062.71ms +step:2962/3200 train_loss:3.3170 train_time:11993231ms step_avg:4062.75ms +step:2963/3200 train_loss:3.3405 train_time:11997410ms step_avg:4062.79ms +step:2964/3200 train_loss:3.4804 train_time:12001592ms step_avg:4062.83ms +step:2965/3200 train_loss:3.3814 train_time:12005794ms step_avg:4062.87ms +step:2966/3200 train_loss:3.3570 train_time:12009993ms step_avg:4062.92ms +step:2967/3200 train_loss:3.5660 train_time:12014182ms step_avg:4062.96ms +step:2968/3200 train_loss:3.2127 train_time:12018305ms step_avg:4062.98ms +step:2969/3200 train_loss:3.2325 train_time:12022382ms step_avg:4062.99ms +step:2970/3200 train_loss:3.2149 train_time:12026420ms step_avg:4062.98ms +step:2971/3200 train_loss:3.3449 train_time:12030437ms step_avg:4062.96ms +step:2972/3200 train_loss:3.3608 train_time:12034463ms step_avg:4062.95ms +step:2973/3200 train_loss:3.1804 train_time:12038477ms step_avg:4062.94ms +step:2974/3200 train_loss:3.2054 train_time:12042475ms step_avg:4062.91ms +step:2975/3200 train_loss:3.3030 train_time:12046468ms step_avg:4062.89ms +step:2976/3200 train_loss:3.2335 train_time:12050470ms step_avg:4062.87ms +step:2977/3200 train_loss:3.2568 train_time:12054468ms step_avg:4062.85ms +step:2978/3200 train_loss:3.1787 train_time:12058460ms step_avg:4062.82ms +step:2979/3200 train_loss:3.4856 train_time:12062448ms step_avg:4062.80ms +step:2980/3200 train_loss:3.3368 train_time:12066432ms step_avg:4062.77ms +step:2981/3200 train_loss:3.1808 train_time:12070415ms step_avg:4062.74ms +step:2982/3200 train_loss:3.3102 train_time:12074395ms step_avg:4062.72ms +step:2983/3200 train_loss:3.3221 train_time:12078376ms step_avg:4062.69ms +step:2984/3200 train_loss:3.2251 train_time:12082352ms step_avg:4062.66ms +step:2985/3200 train_loss:3.1932 train_time:12086328ms step_avg:4062.63ms +step:2986/3200 train_loss:3.4249 train_time:12090302ms step_avg:4062.60ms +step:2987/3200 train_loss:3.2442 train_time:12094276ms step_avg:4062.57ms +step:2988/3200 train_loss:3.4216 train_time:12098250ms step_avg:4062.54ms +step:2989/3200 train_loss:3.3251 train_time:12102224ms step_avg:4062.51ms +step:2990/3200 train_loss:3.3676 train_time:12106200ms step_avg:4062.48ms +step:2991/3200 train_loss:3.3758 train_time:12110175ms step_avg:4062.45ms +step:2992/3200 train_loss:3.3802 train_time:12114149ms step_avg:4062.42ms +step:2993/3200 train_loss:3.4602 train_time:12118123ms step_avg:4062.39ms +step:2994/3200 train_loss:3.3027 train_time:12122100ms step_avg:4062.37ms +step:2995/3200 train_loss:3.1408 train_time:12126080ms step_avg:4062.34ms +step:2996/3200 train_loss:3.2875 train_time:12130058ms step_avg:4062.31ms +step:2997/3200 train_loss:3.0913 train_time:12134038ms step_avg:4062.28ms +step:2998/3200 train_loss:3.1683 train_time:12138024ms step_avg:4062.26ms +step:2999/3200 train_loss:3.3960 train_time:12142008ms step_avg:4062.23ms +step:3000/3200 train_loss:3.3834 train_time:12145992ms step_avg:4062.20ms +step:3000/3200 val_loss:3.2964 train_time:12145992ms step_avg:4062.20ms +step:3001/3200 train_loss:3.3633 train_time:12149988ms step_avg:4062.18ms +step:3002/3200 train_loss:3.4673 train_time:12153978ms step_avg:4062.16ms +step:3003/3200 train_loss:3.0215 train_time:12157975ms step_avg:4062.14ms +step:3004/3200 train_loss:3.2895 train_time:12161974ms step_avg:4062.12ms +step:3005/3200 train_loss:3.4708 train_time:12165980ms step_avg:4062.10ms +step:3006/3200 train_loss:3.3300 train_time:12169993ms step_avg:4062.08ms +step:3007/3200 train_loss:3.1284 train_time:12174013ms step_avg:4062.07ms +step:3008/3200 train_loss:3.1658 train_time:12178043ms step_avg:4062.06ms +step:3009/3200 train_loss:3.3142 train_time:12182074ms step_avg:4062.05ms +step:3010/3200 train_loss:3.2396 train_time:12186094ms step_avg:4062.03ms +step:3011/3200 train_loss:3.2998 train_time:12190121ms step_avg:4062.02ms +step:3012/3200 train_loss:3.2621 train_time:12194154ms step_avg:4062.01ms +step:3013/3200 train_loss:3.3704 train_time:12198202ms step_avg:4062.01ms +step:3014/3200 train_loss:3.3325 train_time:12202266ms step_avg:4062.01ms +step:3015/3200 train_loss:3.2749 train_time:12206343ms step_avg:4062.01ms +step:3016/3200 train_loss:3.2837 train_time:12210435ms step_avg:4062.02ms +step:3017/3200 train_loss:3.6119 train_time:12214556ms step_avg:4062.04ms +step:3018/3200 train_loss:3.2328 train_time:12218700ms step_avg:4062.07ms +step:3019/3200 train_loss:3.3380 train_time:12222866ms step_avg:4062.10ms +step:3020/3200 train_loss:3.3519 train_time:12227060ms step_avg:4062.15ms +step:3021/3200 train_loss:3.3120 train_time:12231259ms step_avg:4062.19ms +step:3022/3200 train_loss:3.1970 train_time:12235457ms step_avg:4062.24ms +step:3023/3200 train_loss:3.3253 train_time:12239636ms step_avg:4062.28ms +step:3024/3200 train_loss:3.3515 train_time:12243814ms step_avg:4062.31ms +step:3025/3200 train_loss:3.3634 train_time:12247993ms step_avg:4062.35ms +step:3026/3200 train_loss:3.4113 train_time:12252172ms step_avg:4062.39ms +step:3027/3200 train_loss:3.2726 train_time:12256373ms step_avg:4062.44ms +step:3028/3200 train_loss:3.2343 train_time:12260572ms step_avg:4062.48ms +step:3029/3200 train_loss:3.2927 train_time:12264770ms step_avg:4062.53ms +step:3030/3200 train_loss:3.2868 train_time:12268971ms step_avg:4062.57ms +step:3031/3200 train_loss:3.3683 train_time:12273170ms step_avg:4062.62ms +step:3032/3200 train_loss:3.3921 train_time:12277366ms step_avg:4062.66ms +step:3033/3200 train_loss:3.2430 train_time:12281564ms step_avg:4062.71ms +step:3034/3200 train_loss:3.3294 train_time:12285752ms step_avg:4062.75ms +step:3035/3200 train_loss:3.1627 train_time:12289874ms step_avg:4062.77ms +step:3036/3200 train_loss:3.2085 train_time:12293957ms step_avg:4062.78ms +step:3037/3200 train_loss:3.3020 train_time:12298016ms step_avg:4062.77ms +step:3038/3200 train_loss:3.1045 train_time:12302056ms step_avg:4062.77ms +step:3039/3200 train_loss:3.3245 train_time:12306077ms step_avg:4062.75ms +step:3040/3200 train_loss:3.2311 train_time:12310093ms step_avg:4062.74ms +step:3041/3200 train_loss:3.2921 train_time:12314120ms step_avg:4062.73ms +step:3042/3200 train_loss:3.3868 train_time:12318146ms step_avg:4062.71ms +step:3043/3200 train_loss:3.2819 train_time:12322160ms step_avg:4062.70ms +step:3044/3200 train_loss:3.1885 train_time:12326170ms step_avg:4062.68ms +step:3045/3200 train_loss:3.3162 train_time:12330174ms step_avg:4062.66ms +step:3046/3200 train_loss:3.3065 train_time:12334176ms step_avg:4062.64ms +step:3047/3200 train_loss:3.6225 train_time:12338173ms step_avg:4062.62ms +step:3048/3200 train_loss:3.3067 train_time:12342170ms step_avg:4062.60ms +step:3049/3200 train_loss:3.2539 train_time:12346167ms step_avg:4062.58ms +step:3050/3200 train_loss:3.2916 train_time:12350161ms step_avg:4062.55ms +step:3051/3200 train_loss:3.3141 train_time:12354158ms step_avg:4062.53ms +step:3052/3200 train_loss:3.0032 train_time:12358154ms step_avg:4062.51ms +step:3053/3200 train_loss:3.3782 train_time:12362148ms step_avg:4062.49ms +step:3054/3200 train_loss:3.2867 train_time:12366142ms step_avg:4062.46ms +step:3055/3200 train_loss:3.3377 train_time:12370139ms step_avg:4062.44ms +step:3056/3200 train_loss:3.2042 train_time:12374138ms step_avg:4062.42ms +step:3057/3200 train_loss:3.2223 train_time:12378138ms step_avg:4062.40ms +step:3058/3200 train_loss:3.1398 train_time:12382140ms step_avg:4062.38ms +step:3059/3200 train_loss:3.1824 train_time:12386146ms step_avg:4062.36ms +step:3060/3200 train_loss:3.2570 train_time:12390153ms step_avg:4062.35ms +step:3061/3200 train_loss:3.2829 train_time:12394164ms step_avg:4062.33ms +step:3062/3200 train_loss:3.2659 train_time:12398173ms step_avg:4062.31ms +step:3063/3200 train_loss:3.3213 train_time:12402189ms step_avg:4062.30ms +step:3064/3200 train_loss:3.1708 train_time:12406219ms step_avg:4062.29ms +step:3065/3200 train_loss:3.2247 train_time:12410251ms step_avg:4062.28ms +step:3066/3200 train_loss:3.4957 train_time:12414277ms step_avg:4062.26ms +step:3067/3200 train_loss:3.4346 train_time:12418290ms step_avg:4062.25ms +step:3068/3200 train_loss:3.3366 train_time:12422312ms step_avg:4062.23ms +step:3069/3200 train_loss:3.3925 train_time:12426348ms step_avg:4062.23ms +step:3070/3200 train_loss:3.2823 train_time:12430388ms step_avg:4062.22ms +step:3071/3200 train_loss:3.4476 train_time:12434436ms step_avg:4062.21ms +step:3072/3200 train_loss:3.0844 train_time:12438501ms step_avg:4062.21ms +step:3073/3200 train_loss:3.3185 train_time:12442571ms step_avg:4062.22ms +step:3074/3200 train_loss:3.4090 train_time:12446665ms step_avg:4062.23ms +step:3075/3200 train_loss:3.3169 train_time:12450769ms step_avg:4062.24ms +step:3076/3200 train_loss:3.4249 train_time:12454895ms step_avg:4062.26ms +step:3077/3200 train_loss:3.2612 train_time:12459045ms step_avg:4062.29ms +step:3078/3200 train_loss:3.3531 train_time:12463224ms step_avg:4062.33ms +step:3079/3200 train_loss:3.2815 train_time:12467402ms step_avg:4062.37ms +step:3080/3200 train_loss:3.4218 train_time:12471574ms step_avg:4062.40ms +step:3081/3200 train_loss:3.2248 train_time:12475723ms step_avg:4062.43ms +step:3082/3200 train_loss:3.2901 train_time:12479869ms step_avg:4062.46ms +step:3083/3200 train_loss:3.5664 train_time:12484007ms step_avg:4062.48ms +step:3084/3200 train_loss:3.3837 train_time:12488136ms step_avg:4062.50ms +step:3085/3200 train_loss:3.4017 train_time:12492262ms step_avg:4062.52ms +step:3086/3200 train_loss:3.3844 train_time:12496393ms step_avg:4062.55ms +step:3087/3200 train_loss:3.2541 train_time:12500528ms step_avg:4062.57ms +step:3088/3200 train_loss:3.2286 train_time:12504668ms step_avg:4062.60ms +step:3089/3200 train_loss:3.3119 train_time:12508812ms step_avg:4062.62ms +step:3090/3200 train_loss:3.2733 train_time:12512966ms step_avg:4062.65ms +step:3091/3200 train_loss:3.4626 train_time:12517147ms step_avg:4062.69ms +step:3092/3200 train_loss:3.3196 train_time:12521326ms step_avg:4062.73ms +step:3093/3200 train_loss:3.1447 train_time:12525506ms step_avg:4062.77ms +step:3094/3200 train_loss:3.2691 train_time:12529687ms step_avg:4062.80ms +step:3095/3200 train_loss:3.2601 train_time:12533889ms step_avg:4062.85ms +step:3096/3200 train_loss:3.3286 train_time:12538084ms step_avg:4062.89ms +step:3097/3200 train_loss:3.1376 train_time:12542283ms step_avg:4062.94ms +step:3098/3200 train_loss:3.2764 train_time:12546445ms step_avg:4062.97ms +step:3099/3200 train_loss:3.3151 train_time:12550581ms step_avg:4062.99ms +step:3100/3200 train_loss:3.4665 train_time:12554698ms step_avg:4063.01ms +step:3101/3200 train_loss:3.1834 train_time:12558805ms step_avg:4063.02ms +step:3102/3200 train_loss:3.3702 train_time:12562901ms step_avg:4063.03ms +step:3103/3200 train_loss:3.4066 train_time:12566990ms step_avg:4063.04ms +step:3104/3200 train_loss:3.0497 train_time:12571074ms step_avg:4063.05ms +step:3105/3200 train_loss:3.1937 train_time:12575158ms step_avg:4063.06ms +step:3106/3200 train_loss:3.2780 train_time:12579242ms step_avg:4063.06ms +step:3107/3200 train_loss:3.3024 train_time:12583322ms step_avg:4063.07ms +step:3108/3200 train_loss:3.1158 train_time:12587406ms step_avg:4063.07ms +step:3109/3200 train_loss:3.2744 train_time:12591488ms step_avg:4063.08ms +step:3110/3200 train_loss:3.2494 train_time:12595574ms step_avg:4063.09ms +step:3111/3200 train_loss:3.1663 train_time:12599664ms step_avg:4063.10ms +step:3112/3200 train_loss:3.2110 train_time:12603758ms step_avg:4063.11ms +step:3113/3200 train_loss:3.2798 train_time:12607856ms step_avg:4063.12ms +step:3114/3200 train_loss:3.3276 train_time:12611970ms step_avg:4063.13ms +step:3115/3200 train_loss:3.0836 train_time:12616094ms step_avg:4063.15ms +step:3116/3200 train_loss:3.3369 train_time:12620225ms step_avg:4063.18ms +step:3117/3200 train_loss:3.2976 train_time:12624373ms step_avg:4063.20ms +step:3118/3200 train_loss:2.9873 train_time:12628553ms step_avg:4063.24ms +step:3119/3200 train_loss:3.1199 train_time:12632735ms step_avg:4063.28ms +step:3120/3200 train_loss:3.6130 train_time:12636945ms step_avg:4063.33ms +step:3121/3200 train_loss:3.2442 train_time:12641185ms step_avg:4063.38ms +step:3122/3200 train_loss:3.3017 train_time:12645442ms step_avg:4063.45ms +step:3123/3200 train_loss:3.3695 train_time:12649651ms step_avg:4063.49ms +step:3124/3200 train_loss:3.1735 train_time:12653823ms step_avg:4063.53ms +step:3125/3200 train_loss:3.5270 train_time:12657938ms step_avg:4063.54ms +step:3125/3200 val_loss:3.2818 train_time:12657939ms step_avg:4063.54ms +step:3126/3200 train_loss:3.2041 train_time:12661945ms step_avg:4063.53ms +step:3127/3200 train_loss:3.3411 train_time:12665974ms step_avg:4063.51ms +step:3128/3200 train_loss:3.2161 train_time:12669994ms step_avg:4063.50ms +step:3129/3200 train_loss:3.3537 train_time:12674013ms step_avg:4063.49ms +step:3130/3200 train_loss:3.3561 train_time:12678029ms step_avg:4063.47ms +step:3131/3200 train_loss:3.3506 train_time:12682042ms step_avg:4063.45ms +step:3132/3200 train_loss:3.4475 train_time:12686056ms step_avg:4063.44ms +step:3133/3200 train_loss:3.1907 train_time:12690071ms step_avg:4063.42ms +step:3134/3200 train_loss:3.2718 train_time:12694080ms step_avg:4063.41ms +step:3135/3200 train_loss:3.1363 train_time:12698093ms step_avg:4063.39ms +step:3136/3200 train_loss:3.3176 train_time:12702111ms step_avg:4063.38ms +step:3137/3200 train_loss:3.2646 train_time:12706133ms step_avg:4063.36ms +step:3138/3200 train_loss:4.0743 train_time:12710156ms step_avg:4063.35ms +step:3139/3200 train_loss:3.2549 train_time:12714179ms step_avg:4063.34ms +step:3140/3200 train_loss:3.2240 train_time:12718200ms step_avg:4063.32ms +step:3141/3200 train_loss:3.2783 train_time:12722226ms step_avg:4063.31ms +step:3142/3200 train_loss:3.2858 train_time:12726258ms step_avg:4063.30ms +step:3143/3200 train_loss:3.1776 train_time:12730280ms step_avg:4063.29ms +step:3144/3200 train_loss:3.2381 train_time:12734297ms step_avg:4063.27ms +step:3145/3200 train_loss:3.1025 train_time:12738319ms step_avg:4063.26ms +step:3146/3200 train_loss:3.3950 train_time:12742350ms step_avg:4063.25ms +step:3147/3200 train_loss:3.1410 train_time:12746384ms step_avg:4063.24ms +step:3148/3200 train_loss:3.1364 train_time:12750425ms step_avg:4063.23ms +step:3149/3200 train_loss:3.2105 train_time:12754478ms step_avg:4063.23ms +step:3150/3200 train_loss:3.3658 train_time:12758537ms step_avg:4063.23ms +step:3151/3200 train_loss:3.3006 train_time:12762618ms step_avg:4063.23ms +step:3152/3200 train_loss:3.3348 train_time:12766705ms step_avg:4063.24ms +step:3153/3200 train_loss:3.0852 train_time:12770808ms step_avg:4063.25ms +step:3154/3200 train_loss:3.2523 train_time:12774925ms step_avg:4063.27ms +step:3155/3200 train_loss:3.2149 train_time:12779062ms step_avg:4063.29ms +step:3156/3200 train_loss:3.2861 train_time:12783241ms step_avg:4063.33ms +step:3157/3200 train_loss:3.1056 train_time:12787424ms step_avg:4063.37ms +step:3158/3200 train_loss:3.3254 train_time:12791626ms step_avg:4063.41ms +step:3159/3200 train_loss:3.1966 train_time:12795873ms step_avg:4063.47ms +step:3160/3200 train_loss:3.2418 train_time:12800076ms step_avg:4063.52ms +step:3161/3200 train_loss:3.2229 train_time:12804231ms step_avg:4063.55ms +step:3162/3200 train_loss:3.2656 train_time:12808349ms step_avg:4063.56ms +step:3163/3200 train_loss:3.3862 train_time:12812442ms step_avg:4063.57ms +step:3164/3200 train_loss:3.2510 train_time:12816509ms step_avg:4063.57ms +step:3165/3200 train_loss:3.1640 train_time:12820562ms step_avg:4063.57ms +step:3166/3200 train_loss:3.3202 train_time:12824600ms step_avg:4063.56ms +step:3167/3200 train_loss:3.3827 train_time:12828631ms step_avg:4063.55ms +step:3168/3200 train_loss:3.2633 train_time:12832654ms step_avg:4063.54ms +step:3169/3200 train_loss:3.2346 train_time:12836672ms step_avg:4063.52ms +step:3170/3200 train_loss:3.2515 train_time:12840682ms step_avg:4063.51ms +step:3171/3200 train_loss:3.2059 train_time:12844691ms step_avg:4063.49ms +step:3172/3200 train_loss:3.1092 train_time:12848696ms step_avg:4063.47ms +step:3173/3200 train_loss:3.3215 train_time:12852718ms step_avg:4063.46ms +step:3174/3200 train_loss:3.0788 train_time:12856746ms step_avg:4063.45ms +step:3175/3200 train_loss:3.4591 train_time:12860773ms step_avg:4063.44ms +step:3176/3200 train_loss:3.3341 train_time:12864794ms step_avg:4063.42ms +step:3177/3200 train_loss:3.2206 train_time:12868822ms step_avg:4063.41ms +step:3178/3200 train_loss:3.1259 train_time:12872850ms step_avg:4063.40ms +step:3179/3200 train_loss:3.2598 train_time:12876879ms step_avg:4063.39ms +step:3180/3200 train_loss:3.1457 train_time:12880910ms step_avg:4063.38ms +step:3181/3200 train_loss:3.2866 train_time:12884941ms step_avg:4063.37ms +step:3182/3200 train_loss:3.4432 train_time:12888976ms step_avg:4063.36ms +step:3183/3200 train_loss:3.4723 train_time:12892989ms step_avg:4063.34ms +step:3184/3200 train_loss:3.4146 train_time:12897008ms step_avg:4063.33ms +step:3185/3200 train_loss:3.1887 train_time:12901033ms step_avg:4063.32ms +step:3186/3200 train_loss:3.3239 train_time:12905064ms step_avg:4063.31ms +step:3187/3200 train_loss:3.2777 train_time:12909094ms step_avg:4063.30ms +step:3188/3200 train_loss:3.6387 train_time:12913139ms step_avg:4063.29ms +step:3189/3200 train_loss:3.3323 train_time:12917190ms step_avg:4063.29ms +step:3190/3200 train_loss:3.1519 train_time:12921257ms step_avg:4063.29ms +step:3191/3200 train_loss:3.3583 train_time:12925329ms step_avg:4063.29ms +step:3192/3200 train_loss:3.3599 train_time:12929416ms step_avg:4063.30ms +step:3193/3200 train_loss:3.1016 train_time:12933517ms step_avg:4063.31ms +step:3194/3200 train_loss:3.3017 train_time:12937636ms step_avg:4063.33ms +step:3195/3200 train_loss:3.2009 train_time:12941771ms step_avg:4063.35ms +step:3196/3200 train_loss:3.2175 train_time:12945943ms step_avg:4063.38ms +step:3197/3200 train_loss:3.2819 train_time:12950131ms step_avg:4063.42ms +step:3198/3200 train_loss:3.1602 train_time:12954327ms step_avg:4063.47ms +step:3199/3200 train_loss:3.1988 train_time:12958486ms step_avg:4063.50ms +step:3200/3200 train_loss:3.3662 train_time:12962609ms step_avg:4063.51ms +step:3200/3200 val_loss:3.2775 train_time:12962609ms step_avg:4063.51ms