diff --git "a/flow_s_1000kx1024_vavae/log.txt" "b/flow_s_1000kx1024_vavae/log.txt" new file mode 100644--- /dev/null +++ "b/flow_s_1000kx1024_vavae/log.txt" @@ -0,0 +1,10256 @@ +[2025-02-22 22:02:35] Model: DistributedDataParallel( + (module): DiT( + (x_embedder): PatchEmbed( + (proj): Conv2d(32, 384, kernel_size=(1, 1), stride=(1, 1)) + (norm): Identity() + ) + (t_embedder): TimestepEmbedder( + (mlp): Sequential( + (0): Linear(in_features=256, out_features=384, bias=True) + (1): SiLU() + (2): Linear(in_features=384, out_features=384, bias=True) + ) + ) + (y_embedder): LabelEmbedder( + (embedding_table): Embedding(1001, 384) + ) + (feat_rope): VisionRotaryEmbeddingFast() + (blocks): ModuleList( + (0-11): 12 x DiTBlock( + (norm1): RMSNorm() + (norm2): RMSNorm() + (attn): Attention( + (qkv): Linear(in_features=384, out_features=1152, bias=True) + (q_norm): RMSNorm() + (k_norm): RMSNorm() + (attn_drop): Dropout(p=0.0, inplace=False) + (proj): Linear(in_features=384, out_features=384, bias=True) + (proj_drop): Dropout(p=0.0, inplace=False) + ) + (mlp): SwiGLUFFN( + (w12): Linear(in_features=384, out_features=2048, bias=True) + (w3): Linear(in_features=1024, out_features=384, bias=True) + ) + (adaLN_modulation): Sequential( + (0): SiLU() + (1): Linear(in_features=384, out_features=2304, bias=True) + ) + ) + ) + (final_layer): FinalLayer( + (norm_final): RMSNorm() + (linear): Linear(in_features=384, out_features=32, bias=True) + (adaLN_modulation): Sequential( + (0): SiLU() + (1): Linear(in_features=384, out_features=768, bias=True) + ) + ) + ) +) +[2025-02-22 22:02:35] DiT Parameters: 32.99M +[2025-02-22 22:02:35] DiT Trainable Parameters: 32.89M +[2025-02-22 22:02:35] Optimizer: AdamW, lr=0.0002, beta2=0.95 +[2025-02-22 22:02:35] module.pos_embed.requires_grad : False +[2025-02-22 22:02:35] module.x_embedder.proj.weight.requires_grad : True +[2025-02-22 22:02:35] module.x_embedder.proj.bias.requires_grad : True +[2025-02-22 22:02:35] module.t_embedder.mlp.0.weight.requires_grad : True +[2025-02-22 22:02:35] module.t_embedder.mlp.0.bias.requires_grad : True +[2025-02-22 22:02:35] module.t_embedder.mlp.2.weight.requires_grad : True +[2025-02-22 22:02:35] module.t_embedder.mlp.2.bias.requires_grad : True +[2025-02-22 22:02:35] module.y_embedder.embedding_table.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.0.norm1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.0.norm2.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.0.attn.qkv.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.0.attn.qkv.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.0.attn.q_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.0.attn.k_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.0.attn.proj.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.0.attn.proj.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.0.mlp.w12.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.0.mlp.w12.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.0.mlp.w3.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.0.mlp.w3.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.0.adaLN_modulation.1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.0.adaLN_modulation.1.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.1.norm1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.1.norm2.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.1.attn.qkv.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.1.attn.qkv.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.1.attn.q_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.1.attn.k_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.1.attn.proj.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.1.attn.proj.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.1.mlp.w12.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.1.mlp.w12.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.1.mlp.w3.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.1.mlp.w3.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.1.adaLN_modulation.1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.1.adaLN_modulation.1.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.2.norm1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.2.norm2.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.2.attn.qkv.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.2.attn.qkv.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.2.attn.q_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.2.attn.k_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.2.attn.proj.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.2.attn.proj.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.2.mlp.w12.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.2.mlp.w12.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.2.mlp.w3.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.2.mlp.w3.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.2.adaLN_modulation.1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.2.adaLN_modulation.1.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.3.norm1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.3.norm2.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.3.attn.qkv.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.3.attn.qkv.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.3.attn.q_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.3.attn.k_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.3.attn.proj.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.3.attn.proj.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.3.mlp.w12.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.3.mlp.w12.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.3.mlp.w3.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.3.mlp.w3.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.3.adaLN_modulation.1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.3.adaLN_modulation.1.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.4.norm1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.4.norm2.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.4.attn.qkv.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.4.attn.qkv.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.4.attn.q_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.4.attn.k_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.4.attn.proj.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.4.attn.proj.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.4.mlp.w12.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.4.mlp.w12.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.4.mlp.w3.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.4.mlp.w3.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.4.adaLN_modulation.1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.4.adaLN_modulation.1.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.5.norm1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.5.norm2.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.5.attn.qkv.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.5.attn.qkv.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.5.attn.q_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.5.attn.k_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.5.attn.proj.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.5.attn.proj.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.5.mlp.w12.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.5.mlp.w12.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.5.mlp.w3.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.5.mlp.w3.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.5.adaLN_modulation.1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.5.adaLN_modulation.1.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.6.norm1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.6.norm2.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.6.attn.qkv.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.6.attn.qkv.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.6.attn.q_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.6.attn.k_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.6.attn.proj.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.6.attn.proj.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.6.mlp.w12.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.6.mlp.w12.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.6.mlp.w3.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.6.mlp.w3.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.6.adaLN_modulation.1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.6.adaLN_modulation.1.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.7.norm1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.7.norm2.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.7.attn.qkv.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.7.attn.qkv.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.7.attn.q_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.7.attn.k_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.7.attn.proj.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.7.attn.proj.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.7.mlp.w12.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.7.mlp.w12.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.7.mlp.w3.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.7.mlp.w3.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.7.adaLN_modulation.1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.7.adaLN_modulation.1.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.8.norm1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.8.norm2.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.8.attn.qkv.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.8.attn.qkv.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.8.attn.q_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.8.attn.k_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.8.attn.proj.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.8.attn.proj.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.8.mlp.w12.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.8.mlp.w12.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.8.mlp.w3.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.8.mlp.w3.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.8.adaLN_modulation.1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.8.adaLN_modulation.1.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.9.norm1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.9.norm2.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.9.attn.qkv.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.9.attn.qkv.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.9.attn.q_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.9.attn.k_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.9.attn.proj.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.9.attn.proj.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.9.mlp.w12.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.9.mlp.w12.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.9.mlp.w3.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.9.mlp.w3.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.9.adaLN_modulation.1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.9.adaLN_modulation.1.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.10.norm1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.10.norm2.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.10.attn.qkv.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.10.attn.qkv.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.10.attn.q_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.10.attn.k_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.10.attn.proj.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.10.attn.proj.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.10.mlp.w12.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.10.mlp.w12.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.10.mlp.w3.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.10.mlp.w3.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.10.adaLN_modulation.1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.10.adaLN_modulation.1.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.11.norm1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.11.norm2.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.11.attn.qkv.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.11.attn.qkv.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.11.attn.q_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.11.attn.k_norm.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.11.attn.proj.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.11.attn.proj.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.11.mlp.w12.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.11.mlp.w12.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.11.mlp.w3.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.11.mlp.w3.bias.requires_grad : True +[2025-02-22 22:02:35] module.blocks.11.adaLN_modulation.1.weight.requires_grad : True +[2025-02-22 22:02:35] module.blocks.11.adaLN_modulation.1.bias.requires_grad : True +[2025-02-22 22:02:35] module.final_layer.norm_final.weight.requires_grad : True +[2025-02-22 22:02:35] module.final_layer.linear.weight.requires_grad : True +[2025-02-22 22:02:35] module.final_layer.linear.bias.requires_grad : True +[2025-02-22 22:02:35] module.final_layer.adaLN_modulation.1.weight.requires_grad : True +[2025-02-22 22:02:35] module.final_layer.adaLN_modulation.1.bias.requires_grad : True +[2025-02-22 22:02:35] Dataset contains 1,281,168 images /data/checkpoints/LanguageBind/offline_feature/offline_vavae_256_path/imagenet_train_256 +[2025-02-22 22:02:35] Batch size 64 per gpu, with 1024 global batch size +[2025-02-22 22:02:35] Train config: {'ckpt_path': '/data/logs/flow/flow_s_1000kx1024_vavae/checkpoints/1000000.pt', 'data': {'data_path': '/data/checkpoints/LanguageBind/offline_feature/offline_vavae_256_path/imagenet_train_256', 'fid_reference_file': '/data/checkpoints/VIRTUAL_imagenet256_labeled.npz', 'image_size': 256, 'num_classes': 1000, 'num_workers': 16, 'latent_norm': True, 'latent_multiplier': 1.0}, 'vae': {'model_type': 'vavae', 'model_path': '/data/checkpoints/hustvl/vavae-imagenet256-f16d32-dinov2/vavae-imagenet256-f16d32-dinov2.pt', 'downsample_ratio': 16}, 'model': {'model_type': 'DiT-S/1', 'use_qknorm': True, 'use_swiglu': True, 'use_rope': True, 'use_rmsnorm': True, 'in_chans': 32, 'use_checkpoint': False}, 'train': {'max_steps': 1000000, 'global_batch_size': 1024, 'global_seed': 0, 'output_dir': '../logs/flow/flow_s_1000kx1024_vavae', 'ckpt': None, 'log_every': 100, 'ckpt_every': 50000, 'wandb': True, 'seed': 1234, 'precision': 'bf16', 'resume': False}, 'optimizer': {'lr': 0.0002, 'beta2': 0.95}, 'wandb': {'proj_name': 'flow', 'log_name': 'flow_s_1000kx1024_vavae', 'key': '953e958793b218efb850fa194e85843e2c3bd88b'}, 'scheduler': {'diffusion': False, 'transport': True}, 'diffusion': {'learn_sigma': True, 'diffusion_steps': 1000}, 'transport': {'path_type': 'Linear', 'prediction': 'velocity', 'loss_weight': None, 'sample_eps': None, 'train_eps': None, 'use_cosine_loss': True, 'use_lognorm': True}, 'sample': {'mode': 'ODE', 'sampling_method': 'euler', 'atol': 1e-06, 'rtol': 0.001, 'reverse': False, 'likelihood': False, 'num_sampling_steps': 250, 'cfg_scale': 1.0, 'per_proc_batch_size': 64, 'fid_num': 50000, 'cfg_interval_start': 0.0, 'timestep_shift': 0.0}} +[2025-02-22 22:02:51] (step=0000100) Train Loss: 1.5777, Train Steps/Sec: 6.21, Grad Norm: 1455711.1250 +[2025-02-22 22:02:57] (step=0000200) Train Loss: 0.8494, Train Steps/Sec: 17.16, Grad Norm: 6.5232 +[2025-02-22 22:03:03] (step=0000300) Train Loss: 0.6356, Train Steps/Sec: 17.22, Grad Norm: 6.1277 +[2025-02-22 22:03:09] (step=0000400) Train Loss: 0.5667, Train Steps/Sec: 17.11, Grad Norm: 6.8277 +[2025-02-22 22:03:15] (step=0000500) Train Loss: 0.5280, Train Steps/Sec: 17.14, Grad Norm: 6.4244 +[2025-02-22 22:03:20] (step=0000600) Train Loss: 0.5031, Train Steps/Sec: 17.23, Grad Norm: 5.1034 +[2025-02-22 22:03:26] (step=0000700) Train Loss: 0.4850, Train Steps/Sec: 17.07, Grad Norm: 3.9669 +[2025-02-22 22:03:32] (step=0000800) Train Loss: 0.4714, Train Steps/Sec: 17.06, Grad Norm: 3.2712 +[2025-02-22 22:03:38] (step=0000900) Train Loss: 0.4610, Train Steps/Sec: 17.10, Grad Norm: 2.8888 +[2025-02-22 22:03:44] (step=0001000) Train Loss: 0.4513, Train Steps/Sec: 17.11, Grad Norm: 2.5101 +[2025-02-22 22:03:50] (step=0001100) Train Loss: 0.4436, Train Steps/Sec: 17.10, Grad Norm: 2.2699 +[2025-02-22 22:03:55] (step=0001200) Train Loss: 0.4372, Train Steps/Sec: 17.10, Grad Norm: 1.9712 +[2025-02-22 22:04:02] (step=0001300) Train Loss: 0.4328, Train Steps/Sec: 14.26, Grad Norm: 1.8066 +[2025-02-22 22:04:08] (step=0001400) Train Loss: 0.4280, Train Steps/Sec: 17.28, Grad Norm: 1.6346 +[2025-02-22 22:04:14] (step=0001500) Train Loss: 0.4244, Train Steps/Sec: 17.30, Grad Norm: 1.4727 +[2025-02-22 22:04:22] (step=0001600) Train Loss: 0.4210, Train Steps/Sec: 13.03, Grad Norm: 1.3419 +[2025-02-22 22:04:28] (step=0001700) Train Loss: 0.4177, Train Steps/Sec: 17.19, Grad Norm: 1.2126 +[2025-02-22 22:04:34] (step=0001800) Train Loss: 0.4158, Train Steps/Sec: 16.42, Grad Norm: 1.1316 +[2025-02-22 22:04:41] (step=0001900) Train Loss: 0.4134, Train Steps/Sec: 14.06, Grad Norm: 1.0383 +[2025-02-22 22:04:47] (step=0002000) Train Loss: 0.4115, Train Steps/Sec: 17.14, Grad Norm: 0.9574 +[2025-02-22 22:04:52] (step=0002100) Train Loss: 0.4090, Train Steps/Sec: 17.16, Grad Norm: 0.8834 +[2025-02-22 22:04:58] (step=0002200) Train Loss: 0.4074, Train Steps/Sec: 17.20, Grad Norm: 0.8245 +[2025-02-22 22:05:04] (step=0002300) Train Loss: 0.4052, Train Steps/Sec: 17.10, Grad Norm: 0.7693 +[2025-02-22 22:05:10] (step=0002400) Train Loss: 0.4038, Train Steps/Sec: 17.12, Grad Norm: 0.7069 +[2025-02-22 22:05:16] (step=0002500) Train Loss: 0.4026, Train Steps/Sec: 17.07, Grad Norm: 0.6618 +[2025-02-22 22:05:23] (step=0002600) Train Loss: 0.4009, Train Steps/Sec: 14.01, Grad Norm: 0.6190 +[2025-02-22 22:05:29] (step=0002700) Train Loss: 0.3998, Train Steps/Sec: 17.27, Grad Norm: 0.5785 +[2025-02-22 22:05:34] (step=0002800) Train Loss: 0.3986, Train Steps/Sec: 17.36, Grad Norm: 0.5388 +[2025-02-22 22:05:40] (step=0002900) Train Loss: 0.3971, Train Steps/Sec: 17.32, Grad Norm: 0.5225 +[2025-02-22 22:05:46] (step=0003000) Train Loss: 0.3964, Train Steps/Sec: 17.22, Grad Norm: 0.4841 +[2025-02-22 22:05:52] (step=0003100) Train Loss: 0.3949, Train Steps/Sec: 17.21, Grad Norm: 0.4737 +[2025-02-22 22:05:58] (step=0003200) Train Loss: 0.3936, Train Steps/Sec: 17.32, Grad Norm: 0.4428 +[2025-02-22 22:06:03] (step=0003300) Train Loss: 0.3935, Train Steps/Sec: 17.28, Grad Norm: 0.4211 +[2025-02-22 22:06:09] (step=0003400) Train Loss: 0.3921, Train Steps/Sec: 17.12, Grad Norm: 0.3971 +[2025-02-22 22:06:15] (step=0003500) Train Loss: 0.3911, Train Steps/Sec: 17.15, Grad Norm: 0.3800 +[2025-02-22 22:06:23] (step=0003600) Train Loss: 0.3905, Train Steps/Sec: 13.39, Grad Norm: 0.3645 +[2025-02-22 22:06:28] (step=0003700) Train Loss: 0.3895, Train Steps/Sec: 17.25, Grad Norm: 0.3497 +[2025-02-22 22:06:38] (step=0003800) Train Loss: 0.3887, Train Steps/Sec: 10.95, Grad Norm: 0.3389 +[2025-02-22 22:06:43] (step=0003900) Train Loss: 0.3875, Train Steps/Sec: 17.17, Grad Norm: 0.3247 +[2025-02-22 22:06:49] (step=0004000) Train Loss: 0.3867, Train Steps/Sec: 17.18, Grad Norm: 0.3103 +[2025-02-22 22:06:55] (step=0004100) Train Loss: 0.3856, Train Steps/Sec: 17.28, Grad Norm: 0.3013 +[2025-02-22 22:07:01] (step=0004200) Train Loss: 0.3843, Train Steps/Sec: 17.22, Grad Norm: 0.2979 +[2025-02-22 22:07:07] (step=0004300) Train Loss: 0.3852, Train Steps/Sec: 17.18, Grad Norm: 0.2901 +[2025-02-22 22:07:12] (step=0004400) Train Loss: 0.3844, Train Steps/Sec: 17.25, Grad Norm: 0.2819 +[2025-02-22 22:07:18] (step=0004500) Train Loss: 0.3834, Train Steps/Sec: 17.12, Grad Norm: 0.2714 +[2025-02-22 22:07:24] (step=0004600) Train Loss: 0.3830, Train Steps/Sec: 17.25, Grad Norm: 0.2630 +[2025-02-22 22:07:30] (step=0004700) Train Loss: 0.3818, Train Steps/Sec: 17.22, Grad Norm: 0.2678 +[2025-02-22 22:07:36] (step=0004800) Train Loss: 0.3811, Train Steps/Sec: 17.24, Grad Norm: 0.2560 +[2025-02-22 22:07:41] (step=0004900) Train Loss: 0.3816, Train Steps/Sec: 17.20, Grad Norm: 0.2539 +[2025-02-22 22:07:47] (step=0005000) Train Loss: 0.3807, Train Steps/Sec: 17.18, Grad Norm: 0.2492 +[2025-02-22 22:07:54] (step=0005100) Train Loss: 0.3796, Train Steps/Sec: 14.26, Grad Norm: 0.2414 +[2025-02-22 22:08:00] (step=0005200) Train Loss: 0.3797, Train Steps/Sec: 17.21, Grad Norm: 0.2379 +[2025-02-22 22:08:06] (step=0005300) Train Loss: 0.3784, Train Steps/Sec: 17.23, Grad Norm: 0.2246 +[2025-02-22 22:08:12] (step=0005400) Train Loss: 0.3785, Train Steps/Sec: 17.34, Grad Norm: 0.2314 +[2025-02-22 22:08:17] (step=0005500) Train Loss: 0.3774, Train Steps/Sec: 17.32, Grad Norm: 0.2297 +[2025-02-22 22:08:25] (step=0005600) Train Loss: 0.3767, Train Steps/Sec: 13.11, Grad Norm: 0.2223 +[2025-02-22 22:08:31] (step=0005700) Train Loss: 0.3770, Train Steps/Sec: 16.57, Grad Norm: 0.2180 +[2025-02-22 22:08:38] (step=0005800) Train Loss: 0.3761, Train Steps/Sec: 13.65, Grad Norm: 0.2068 +[2025-02-22 22:08:44] (step=0005900) Train Loss: 0.3758, Train Steps/Sec: 17.13, Grad Norm: 0.2115 +[2025-02-22 22:08:50] (step=0006000) Train Loss: 0.3754, Train Steps/Sec: 17.15, Grad Norm: 0.2137 +[2025-02-22 22:08:56] (step=0006100) Train Loss: 0.3751, Train Steps/Sec: 17.20, Grad Norm: 0.2050 +[2025-02-22 22:09:02] (step=0006200) Train Loss: 0.3746, Train Steps/Sec: 17.28, Grad Norm: 0.2027 +[2025-02-22 22:09:09] (step=0006300) Train Loss: 0.3740, Train Steps/Sec: 14.40, Grad Norm: 0.1964 +[2025-02-22 22:09:14] (step=0006400) Train Loss: 0.3739, Train Steps/Sec: 17.28, Grad Norm: 0.1901 +[2025-02-22 22:09:20] (step=0006500) Train Loss: 0.3733, Train Steps/Sec: 17.30, Grad Norm: 0.1940 +[2025-02-22 22:09:26] (step=0006600) Train Loss: 0.3730, Train Steps/Sec: 17.36, Grad Norm: 0.1865 +[2025-02-22 22:09:32] (step=0006700) Train Loss: 0.3721, Train Steps/Sec: 17.34, Grad Norm: 0.1928 +[2025-02-22 22:09:38] (step=0006800) Train Loss: 0.3721, Train Steps/Sec: 17.38, Grad Norm: 0.1875 +[2025-02-22 22:09:43] (step=0006900) Train Loss: 0.3715, Train Steps/Sec: 17.34, Grad Norm: 0.1870 +[2025-02-22 22:09:49] (step=0007000) Train Loss: 0.3719, Train Steps/Sec: 17.33, Grad Norm: 0.1888 +[2025-02-22 22:09:55] (step=0007100) Train Loss: 0.3713, Train Steps/Sec: 17.35, Grad Norm: 0.1821 +[2025-02-22 22:10:01] (step=0007200) Train Loss: 0.3710, Train Steps/Sec: 17.40, Grad Norm: 0.1818 +[2025-02-22 22:10:06] (step=0007300) Train Loss: 0.3708, Train Steps/Sec: 17.33, Grad Norm: 0.1777 +[2025-02-22 22:10:12] (step=0007400) Train Loss: 0.3701, Train Steps/Sec: 17.39, Grad Norm: 0.1741 +[2025-02-22 22:10:18] (step=0007500) Train Loss: 0.3700, Train Steps/Sec: 17.23, Grad Norm: 0.1764 +[2025-02-22 22:10:26] (step=0007600) Train Loss: 0.3699, Train Steps/Sec: 12.16, Grad Norm: 0.1688 +[2025-02-22 22:10:34] (step=0007700) Train Loss: 0.3692, Train Steps/Sec: 12.79, Grad Norm: 0.1741 +[2025-02-22 22:10:40] (step=0007800) Train Loss: 0.3689, Train Steps/Sec: 17.40, Grad Norm: 0.1707 +[2025-02-22 22:10:46] (step=0007900) Train Loss: 0.3686, Train Steps/Sec: 17.29, Grad Norm: 0.1685 +[2025-02-22 22:10:51] (step=0008000) Train Loss: 0.3679, Train Steps/Sec: 17.29, Grad Norm: 0.1681 +[2025-02-22 22:10:57] (step=0008100) Train Loss: 0.3684, Train Steps/Sec: 17.21, Grad Norm: 0.1648 +[2025-02-22 22:11:03] (step=0008200) Train Loss: 0.3678, Train Steps/Sec: 17.25, Grad Norm: 0.1687 +[2025-02-22 22:11:09] (step=0008300) Train Loss: 0.3678, Train Steps/Sec: 17.30, Grad Norm: 0.1684 +[2025-02-22 22:11:14] (step=0008400) Train Loss: 0.3674, Train Steps/Sec: 17.30, Grad Norm: 0.1613 +[2025-02-22 22:11:20] (step=0008500) Train Loss: 0.3676, Train Steps/Sec: 17.34, Grad Norm: 0.1593 +[2025-02-22 22:11:26] (step=0008600) Train Loss: 0.3664, Train Steps/Sec: 17.33, Grad Norm: 0.1602 +[2025-02-22 22:11:32] (step=0008700) Train Loss: 0.3663, Train Steps/Sec: 17.10, Grad Norm: 0.1629 +[2025-02-22 22:11:39] (step=0008800) Train Loss: 0.3661, Train Steps/Sec: 14.30, Grad Norm: 0.1565 +[2025-02-22 22:11:45] (step=0008900) Train Loss: 0.3660, Train Steps/Sec: 16.99, Grad Norm: 0.1619 +[2025-02-22 22:11:51] (step=0009000) Train Loss: 0.3657, Train Steps/Sec: 17.02, Grad Norm: 0.1568 +[2025-02-22 22:11:57] (step=0009100) Train Loss: 0.3658, Train Steps/Sec: 17.01, Grad Norm: 0.1606 +[2025-02-22 22:12:02] (step=0009200) Train Loss: 0.3648, Train Steps/Sec: 17.04, Grad Norm: 0.1508 +[2025-02-22 22:12:08] (step=0009300) Train Loss: 0.3642, Train Steps/Sec: 17.09, Grad Norm: 0.1539 +[2025-02-22 22:12:14] (step=0009400) Train Loss: 0.3648, Train Steps/Sec: 17.07, Grad Norm: 0.1531 +[2025-02-22 22:12:20] (step=0009500) Train Loss: 0.3644, Train Steps/Sec: 17.04, Grad Norm: 0.1559 +[2025-02-22 22:12:29] (step=0009600) Train Loss: 0.3646, Train Steps/Sec: 11.49, Grad Norm: 0.1511 +[2025-02-22 22:12:36] (step=0009700) Train Loss: 0.3637, Train Steps/Sec: 14.46, Grad Norm: 0.1539 +[2025-02-22 22:12:41] (step=0009800) Train Loss: 0.3634, Train Steps/Sec: 16.98, Grad Norm: 0.1435 +[2025-02-22 22:12:47] (step=0009900) Train Loss: 0.3638, Train Steps/Sec: 16.97, Grad Norm: 0.1482 +[2025-02-22 22:12:53] (step=0010000) Train Loss: 0.3629, Train Steps/Sec: 17.00, Grad Norm: 0.1460 +[2025-02-22 22:13:00] (step=0010100) Train Loss: 0.3636, Train Steps/Sec: 14.20, Grad Norm: 0.1466 +[2025-02-22 22:13:06] (step=0010200) Train Loss: 0.3625, Train Steps/Sec: 17.18, Grad Norm: 0.1487 +[2025-02-22 22:13:12] (step=0010300) Train Loss: 0.3625, Train Steps/Sec: 17.20, Grad Norm: 0.1501 +[2025-02-22 22:13:18] (step=0010400) Train Loss: 0.3618, Train Steps/Sec: 17.29, Grad Norm: 0.1499 +[2025-02-22 22:13:23] (step=0010500) Train Loss: 0.3617, Train Steps/Sec: 17.34, Grad Norm: 0.1454 +[2025-02-22 22:13:29] (step=0010600) Train Loss: 0.3621, Train Steps/Sec: 17.31, Grad Norm: 0.1426 +[2025-02-22 22:13:35] (step=0010700) Train Loss: 0.3623, Train Steps/Sec: 17.33, Grad Norm: 0.1479 +[2025-02-22 22:13:41] (step=0010800) Train Loss: 0.3618, Train Steps/Sec: 17.29, Grad Norm: 0.1427 +[2025-02-22 22:13:47] (step=0010900) Train Loss: 0.3621, Train Steps/Sec: 17.34, Grad Norm: 0.1411 +[2025-02-22 22:13:52] (step=0011000) Train Loss: 0.3604, Train Steps/Sec: 17.33, Grad Norm: 0.1413 +[2025-02-22 22:13:58] (step=0011100) Train Loss: 0.3609, Train Steps/Sec: 17.31, Grad Norm: 0.1384 +[2025-02-22 22:14:04] (step=0011200) Train Loss: 0.3607, Train Steps/Sec: 17.24, Grad Norm: 0.1441 +[2025-02-22 22:14:11] (step=0011300) Train Loss: 0.3604, Train Steps/Sec: 14.48, Grad Norm: 0.1396 +[2025-02-22 22:14:17] (step=0011400) Train Loss: 0.3613, Train Steps/Sec: 17.16, Grad Norm: 0.1347 +[2025-02-22 22:14:23] (step=0011500) Train Loss: 0.3598, Train Steps/Sec: 16.46, Grad Norm: 0.1414 +[2025-02-22 22:14:32] (step=0011600) Train Loss: 0.3597, Train Steps/Sec: 10.36, Grad Norm: 0.1344 +[2025-02-22 22:14:38] (step=0011700) Train Loss: 0.3602, Train Steps/Sec: 17.25, Grad Norm: 0.1344 +[2025-02-22 22:14:44] (step=0011800) Train Loss: 0.3596, Train Steps/Sec: 17.21, Grad Norm: 0.1364 +[2025-02-22 22:14:50] (step=0011900) Train Loss: 0.3597, Train Steps/Sec: 17.22, Grad Norm: 0.1415 +[2025-02-22 22:14:56] (step=0012000) Train Loss: 0.3595, Train Steps/Sec: 17.23, Grad Norm: 0.1347 +[2025-02-22 22:15:01] (step=0012100) Train Loss: 0.3583, Train Steps/Sec: 17.28, Grad Norm: 0.1390 +[2025-02-22 22:15:07] (step=0012200) Train Loss: 0.3590, Train Steps/Sec: 17.25, Grad Norm: 0.1352 +[2025-02-22 22:15:13] (step=0012300) Train Loss: 0.3589, Train Steps/Sec: 17.22, Grad Norm: 0.1355 +[2025-02-22 22:15:19] (step=0012400) Train Loss: 0.3593, Train Steps/Sec: 17.20, Grad Norm: 0.1366 +[2025-02-22 22:15:25] (step=0012500) Train Loss: 0.3584, Train Steps/Sec: 17.15, Grad Norm: 0.1341 +[2025-02-22 22:15:32] (step=0012600) Train Loss: 0.3582, Train Steps/Sec: 14.49, Grad Norm: 0.1326 +[2025-02-22 22:15:37] (step=0012700) Train Loss: 0.3579, Train Steps/Sec: 17.27, Grad Norm: 0.1294 +[2025-02-22 22:15:43] (step=0012800) Train Loss: 0.3584, Train Steps/Sec: 17.24, Grad Norm: 0.1322 +[2025-02-22 22:15:49] (step=0012900) Train Loss: 0.3575, Train Steps/Sec: 17.27, Grad Norm: 0.1271 +[2025-02-22 22:15:55] (step=0013000) Train Loss: 0.3576, Train Steps/Sec: 17.25, Grad Norm: 0.1349 +[2025-02-22 22:16:01] (step=0013100) Train Loss: 0.3573, Train Steps/Sec: 17.24, Grad Norm: 0.1303 +[2025-02-22 22:16:06] (step=0013200) Train Loss: 0.3573, Train Steps/Sec: 17.18, Grad Norm: 0.1332 +[2025-02-22 22:16:12] (step=0013300) Train Loss: 0.3565, Train Steps/Sec: 17.25, Grad Norm: 0.1315 +[2025-02-22 22:16:18] (step=0013400) Train Loss: 0.3574, Train Steps/Sec: 17.30, Grad Norm: 0.1339 +[2025-02-22 22:16:25] (step=0013500) Train Loss: 0.3563, Train Steps/Sec: 14.24, Grad Norm: 0.1312 +[2025-02-22 22:16:32] (step=0013600) Train Loss: 0.3566, Train Steps/Sec: 13.49, Grad Norm: 0.1276 +[2025-02-22 22:16:39] (step=0013700) Train Loss: 0.3564, Train Steps/Sec: 15.18, Grad Norm: 0.1307 +[2025-02-22 22:16:46] (step=0013800) Train Loss: 0.3565, Train Steps/Sec: 14.42, Grad Norm: 0.1295 +[2025-02-22 22:16:52] (step=0013900) Train Loss: 0.3564, Train Steps/Sec: 17.18, Grad Norm: 0.1273 +[2025-02-22 22:16:58] (step=0014000) Train Loss: 0.3562, Train Steps/Sec: 17.18, Grad Norm: 0.1222 +[2025-02-22 22:17:03] (step=0014100) Train Loss: 0.3560, Train Steps/Sec: 17.17, Grad Norm: 0.1254 +[2025-02-22 22:17:09] (step=0014200) Train Loss: 0.3562, Train Steps/Sec: 17.18, Grad Norm: 0.1204 +[2025-02-22 22:17:15] (step=0014300) Train Loss: 0.3550, Train Steps/Sec: 17.15, Grad Norm: 0.1228 +[2025-02-22 22:17:21] (step=0014400) Train Loss: 0.3555, Train Steps/Sec: 17.12, Grad Norm: 0.1312 +[2025-02-22 22:17:27] (step=0014500) Train Loss: 0.3557, Train Steps/Sec: 17.17, Grad Norm: 0.1237 +[2025-02-22 22:17:33] (step=0014600) Train Loss: 0.3553, Train Steps/Sec: 17.14, Grad Norm: 0.1219 +[2025-02-22 22:17:38] (step=0014700) Train Loss: 0.3548, Train Steps/Sec: 17.15, Grad Norm: 0.1232 +[2025-02-22 22:17:44] (step=0014800) Train Loss: 0.3556, Train Steps/Sec: 17.27, Grad Norm: 0.1258 +[2025-02-22 22:17:50] (step=0014900) Train Loss: 0.3550, Train Steps/Sec: 17.27, Grad Norm: 0.1312 +[2025-02-22 22:17:56] (step=0015000) Train Loss: 0.3549, Train Steps/Sec: 17.09, Grad Norm: 0.1217 +[2025-02-22 22:18:03] (step=0015100) Train Loss: 0.3546, Train Steps/Sec: 14.49, Grad Norm: 0.1264 +[2025-02-22 22:18:09] (step=0015200) Train Loss: 0.3542, Train Steps/Sec: 17.31, Grad Norm: 0.1224 +[2025-02-22 22:18:14] (step=0015300) Train Loss: 0.3546, Train Steps/Sec: 17.37, Grad Norm: 0.1184 +[2025-02-22 22:18:20] (step=0015400) Train Loss: 0.3541, Train Steps/Sec: 17.36, Grad Norm: 0.1223 +[2025-02-22 22:18:28] (step=0015500) Train Loss: 0.3540, Train Steps/Sec: 13.25, Grad Norm: 0.1202 +[2025-02-22 22:18:35] (step=0015600) Train Loss: 0.3541, Train Steps/Sec: 14.10, Grad Norm: 0.1252 +[2025-02-22 22:18:41] (step=0015700) Train Loss: 0.3539, Train Steps/Sec: 15.86, Grad Norm: 0.1245 +[2025-02-22 22:18:47] (step=0015800) Train Loss: 0.3539, Train Steps/Sec: 17.33, Grad Norm: 0.1175 +[2025-02-22 22:18:53] (step=0015900) Train Loss: 0.3535, Train Steps/Sec: 17.30, Grad Norm: 0.1186 +[2025-02-22 22:18:58] (step=0016000) Train Loss: 0.3539, Train Steps/Sec: 17.33, Grad Norm: 0.1204 +[2025-02-22 22:19:04] (step=0016100) Train Loss: 0.3534, Train Steps/Sec: 17.29, Grad Norm: 0.1240 +[2025-02-22 22:19:10] (step=0016200) Train Loss: 0.3532, Train Steps/Sec: 17.28, Grad Norm: 0.1276 +[2025-02-22 22:19:17] (step=0016300) Train Loss: 0.3524, Train Steps/Sec: 14.38, Grad Norm: 0.1159 +[2025-02-22 22:19:23] (step=0016400) Train Loss: 0.3534, Train Steps/Sec: 17.13, Grad Norm: 0.1226 +[2025-02-22 22:19:29] (step=0016500) Train Loss: 0.3532, Train Steps/Sec: 17.09, Grad Norm: 0.1173 +[2025-02-22 22:19:34] (step=0016600) Train Loss: 0.3531, Train Steps/Sec: 17.06, Grad Norm: 0.1210 +[2025-02-22 22:19:40] (step=0016700) Train Loss: 0.3527, Train Steps/Sec: 17.02, Grad Norm: 0.1177 +[2025-02-22 22:19:46] (step=0016800) Train Loss: 0.3526, Train Steps/Sec: 17.06, Grad Norm: 0.1218 +[2025-02-22 22:19:52] (step=0016900) Train Loss: 0.3525, Train Steps/Sec: 17.14, Grad Norm: 0.1148 +[2025-02-22 22:19:58] (step=0017000) Train Loss: 0.3528, Train Steps/Sec: 17.12, Grad Norm: 0.1174 +[2025-02-22 22:20:04] (step=0017100) Train Loss: 0.3528, Train Steps/Sec: 17.09, Grad Norm: 0.1194 +[2025-02-22 22:20:09] (step=0017200) Train Loss: 0.3521, Train Steps/Sec: 17.12, Grad Norm: 0.1209 +[2025-02-22 22:20:15] (step=0017300) Train Loss: 0.3522, Train Steps/Sec: 17.06, Grad Norm: 0.1117 +[2025-02-22 22:20:23] (step=0017400) Train Loss: 0.3516, Train Steps/Sec: 13.08, Grad Norm: 0.1154 +[2025-02-22 22:20:29] (step=0017500) Train Loss: 0.3517, Train Steps/Sec: 16.33, Grad Norm: 0.1157 +[2025-02-22 22:20:37] (step=0017600) Train Loss: 0.3514, Train Steps/Sec: 12.28, Grad Norm: 0.1129 +[2025-02-22 22:20:44] (step=0017700) Train Loss: 0.3522, Train Steps/Sec: 15.30, Grad Norm: 0.1178 +[2025-02-22 22:20:50] (step=0017800) Train Loss: 0.3518, Train Steps/Sec: 17.49, Grad Norm: 0.1168 +[2025-02-22 22:20:55] (step=0017900) Train Loss: 0.3519, Train Steps/Sec: 17.45, Grad Norm: 0.1214 +[2025-02-22 22:21:01] (step=0018000) Train Loss: 0.3518, Train Steps/Sec: 17.45, Grad Norm: 0.1072 +[2025-02-22 22:21:07] (step=0018100) Train Loss: 0.3511, Train Steps/Sec: 17.42, Grad Norm: 0.1161 +[2025-02-22 22:21:12] (step=0018200) Train Loss: 0.3513, Train Steps/Sec: 17.41, Grad Norm: 0.1197 +[2025-02-22 22:21:18] (step=0018300) Train Loss: 0.3505, Train Steps/Sec: 17.28, Grad Norm: 0.1117 +[2025-02-22 22:21:24] (step=0018400) Train Loss: 0.3508, Train Steps/Sec: 17.29, Grad Norm: 0.1130 +[2025-02-22 22:21:30] (step=0018500) Train Loss: 0.3509, Train Steps/Sec: 17.33, Grad Norm: 0.1106 +[2025-02-22 22:21:36] (step=0018600) Train Loss: 0.3511, Train Steps/Sec: 17.38, Grad Norm: 0.1191 +[2025-02-22 22:21:41] (step=0018700) Train Loss: 0.3497, Train Steps/Sec: 17.42, Grad Norm: 0.1164 +[2025-02-22 22:21:48] (step=0018800) Train Loss: 0.3511, Train Steps/Sec: 14.58, Grad Norm: 0.1101 +[2025-02-22 22:21:54] (step=0018900) Train Loss: 0.3503, Train Steps/Sec: 17.32, Grad Norm: 0.1100 +[2025-02-22 22:22:00] (step=0019000) Train Loss: 0.3507, Train Steps/Sec: 17.33, Grad Norm: 0.1239 +[2025-02-22 22:22:06] (step=0019100) Train Loss: 0.3503, Train Steps/Sec: 17.29, Grad Norm: 0.1085 +[2025-02-22 22:22:11] (step=0019200) Train Loss: 0.3506, Train Steps/Sec: 17.38, Grad Norm: 0.1063 +[2025-02-22 22:22:18] (step=0019300) Train Loss: 0.3499, Train Steps/Sec: 16.07, Grad Norm: 0.1193 +[2025-02-22 22:22:25] (step=0019400) Train Loss: 0.3505, Train Steps/Sec: 13.64, Grad Norm: 0.1121 +[2025-02-22 22:22:31] (step=0019500) Train Loss: 0.3494, Train Steps/Sec: 17.24, Grad Norm: 0.1123 +[2025-02-22 22:22:37] (step=0019600) Train Loss: 0.3495, Train Steps/Sec: 15.23, Grad Norm: 0.1094 +[2025-02-22 22:22:44] (step=0019700) Train Loss: 0.3498, Train Steps/Sec: 14.66, Grad Norm: 0.1199 +[2025-02-22 22:22:50] (step=0019800) Train Loss: 0.3492, Train Steps/Sec: 17.28, Grad Norm: 0.1154 +[2025-02-22 22:22:56] (step=0019900) Train Loss: 0.3495, Train Steps/Sec: 17.30, Grad Norm: 0.1083 +[2025-02-22 22:23:01] (step=0020000) Train Loss: 0.3497, Train Steps/Sec: 17.26, Grad Norm: 0.1096 +[2025-02-22 22:23:08] (step=0020100) Train Loss: 0.3495, Train Steps/Sec: 14.58, Grad Norm: 0.1101 +[2025-02-22 22:23:14] (step=0020200) Train Loss: 0.3493, Train Steps/Sec: 17.38, Grad Norm: 0.1106 +[2025-02-22 22:23:20] (step=0020300) Train Loss: 0.3496, Train Steps/Sec: 17.38, Grad Norm: 0.1061 +[2025-02-22 22:23:26] (step=0020400) Train Loss: 0.3493, Train Steps/Sec: 17.40, Grad Norm: 0.1047 +[2025-02-22 22:23:31] (step=0020500) Train Loss: 0.3491, Train Steps/Sec: 17.43, Grad Norm: 0.1072 +[2025-02-22 22:23:37] (step=0020600) Train Loss: 0.3483, Train Steps/Sec: 17.43, Grad Norm: 0.1144 +[2025-02-22 22:23:43] (step=0020700) Train Loss: 0.3495, Train Steps/Sec: 17.35, Grad Norm: 0.1083 +[2025-02-22 22:23:49] (step=0020800) Train Loss: 0.3483, Train Steps/Sec: 17.33, Grad Norm: 0.1083 +[2025-02-22 22:23:54] (step=0020900) Train Loss: 0.3486, Train Steps/Sec: 17.29, Grad Norm: 0.1129 +[2025-02-22 22:24:00] (step=0021000) Train Loss: 0.3484, Train Steps/Sec: 17.30, Grad Norm: 0.1024 +[2025-02-22 22:24:06] (step=0021100) Train Loss: 0.3487, Train Steps/Sec: 17.37, Grad Norm: 0.1158 +[2025-02-22 22:24:12] (step=0021200) Train Loss: 0.3487, Train Steps/Sec: 17.31, Grad Norm: 0.1061 +[2025-02-22 22:24:20] (step=0021300) Train Loss: 0.3481, Train Steps/Sec: 11.59, Grad Norm: 0.1204 +[2025-02-22 22:24:26] (step=0021400) Train Loss: 0.3491, Train Steps/Sec: 17.24, Grad Norm: 0.1068 +[2025-02-22 22:24:32] (step=0021500) Train Loss: 0.3481, Train Steps/Sec: 17.32, Grad Norm: 0.1053 +[2025-02-22 22:24:39] (step=0021600) Train Loss: 0.3482, Train Steps/Sec: 14.75, Grad Norm: 0.1127 +[2025-02-22 22:24:45] (step=0021700) Train Loss: 0.3480, Train Steps/Sec: 14.64, Grad Norm: 0.1042 +[2025-02-22 22:24:51] (step=0021800) Train Loss: 0.3484, Train Steps/Sec: 17.40, Grad Norm: 0.1071 +[2025-02-22 22:24:57] (step=0021900) Train Loss: 0.3480, Train Steps/Sec: 17.39, Grad Norm: 0.1118 +[2025-02-22 22:25:03] (step=0022000) Train Loss: 0.3480, Train Steps/Sec: 17.39, Grad Norm: 0.1107 +[2025-02-22 22:25:08] (step=0022100) Train Loss: 0.3481, Train Steps/Sec: 17.36, Grad Norm: 0.1096 +[2025-02-22 22:25:14] (step=0022200) Train Loss: 0.3476, Train Steps/Sec: 17.34, Grad Norm: 0.1099 +[2025-02-22 22:25:20] (step=0022300) Train Loss: 0.3477, Train Steps/Sec: 17.30, Grad Norm: 0.1088 +[2025-02-22 22:25:26] (step=0022400) Train Loss: 0.3476, Train Steps/Sec: 17.35, Grad Norm: 0.1054 +[2025-02-22 22:25:32] (step=0022500) Train Loss: 0.3480, Train Steps/Sec: 17.25, Grad Norm: 0.1126 +[2025-02-22 22:25:38] (step=0022600) Train Loss: 0.3473, Train Steps/Sec: 14.76, Grad Norm: 0.1015 +[2025-02-22 22:25:44] (step=0022700) Train Loss: 0.3471, Train Steps/Sec: 17.45, Grad Norm: 0.1023 +[2025-02-22 22:25:50] (step=0022800) Train Loss: 0.3471, Train Steps/Sec: 17.44, Grad Norm: 0.1011 +[2025-02-22 22:25:56] (step=0022900) Train Loss: 0.3474, Train Steps/Sec: 17.42, Grad Norm: 0.1108 +[2025-02-22 22:26:01] (step=0023000) Train Loss: 0.3469, Train Steps/Sec: 17.44, Grad Norm: 0.1076 +[2025-02-22 22:26:07] (step=0023100) Train Loss: 0.3471, Train Steps/Sec: 17.41, Grad Norm: 0.1101 +[2025-02-22 22:26:13] (step=0023200) Train Loss: 0.3469, Train Steps/Sec: 15.96, Grad Norm: 0.0986 +[2025-02-22 22:26:20] (step=0023300) Train Loss: 0.3469, Train Steps/Sec: 14.23, Grad Norm: 0.0952 +[2025-02-22 22:26:26] (step=0023400) Train Loss: 0.3468, Train Steps/Sec: 17.41, Grad Norm: 0.1053 +[2025-02-22 22:26:32] (step=0023500) Train Loss: 0.3468, Train Steps/Sec: 17.36, Grad Norm: 0.1061 +[2025-02-22 22:26:38] (step=0023600) Train Loss: 0.3467, Train Steps/Sec: 15.95, Grad Norm: 0.1032 +[2025-02-22 22:26:45] (step=0023700) Train Loss: 0.3467, Train Steps/Sec: 13.63, Grad Norm: 0.1044 +[2025-02-22 22:26:52] (step=0023800) Train Loss: 0.3467, Train Steps/Sec: 14.59, Grad Norm: 0.1012 +[2025-02-22 22:26:58] (step=0023900) Train Loss: 0.3467, Train Steps/Sec: 17.20, Grad Norm: 0.1097 +[2025-02-22 22:27:04] (step=0024000) Train Loss: 0.3472, Train Steps/Sec: 17.24, Grad Norm: 0.0985 +[2025-02-22 22:27:10] (step=0024100) Train Loss: 0.3463, Train Steps/Sec: 17.25, Grad Norm: 0.0974 +[2025-02-22 22:27:16] (step=0024200) Train Loss: 0.3458, Train Steps/Sec: 17.24, Grad Norm: 0.1079 +[2025-02-22 22:27:21] (step=0024300) Train Loss: 0.3465, Train Steps/Sec: 17.25, Grad Norm: 0.1089 +[2025-02-22 22:27:27] (step=0024400) Train Loss: 0.3462, Train Steps/Sec: 17.26, Grad Norm: 0.1014 +[2025-02-22 22:27:33] (step=0024500) Train Loss: 0.3466, Train Steps/Sec: 17.24, Grad Norm: 0.0981 +[2025-02-22 22:27:39] (step=0024600) Train Loss: 0.3458, Train Steps/Sec: 17.25, Grad Norm: 0.1002 +[2025-02-22 22:27:45] (step=0024700) Train Loss: 0.3462, Train Steps/Sec: 17.26, Grad Norm: 0.1084 +[2025-02-22 22:27:50] (step=0024800) Train Loss: 0.3458, Train Steps/Sec: 17.25, Grad Norm: 0.0939 +[2025-02-22 22:27:56] (step=0024900) Train Loss: 0.3461, Train Steps/Sec: 17.23, Grad Norm: 0.0976 +[2025-02-22 22:28:02] (step=0025000) Train Loss: 0.3461, Train Steps/Sec: 17.14, Grad Norm: 0.0971 +[2025-02-22 22:28:09] (step=0025100) Train Loss: 0.3452, Train Steps/Sec: 14.72, Grad Norm: 0.1148 +[2025-02-22 22:28:17] (step=0025200) Train Loss: 0.3461, Train Steps/Sec: 12.77, Grad Norm: 0.1006 +[2025-02-22 22:28:22] (step=0025300) Train Loss: 0.3450, Train Steps/Sec: 17.26, Grad Norm: 0.1063 +[2025-02-22 22:28:28] (step=0025400) Train Loss: 0.3458, Train Steps/Sec: 17.34, Grad Norm: 0.1003 +[2025-02-22 22:28:34] (step=0025500) Train Loss: 0.3458, Train Steps/Sec: 17.37, Grad Norm: 0.0940 +[2025-02-22 22:28:40] (step=0025600) Train Loss: 0.3455, Train Steps/Sec: 16.64, Grad Norm: 0.1041 +[2025-02-22 22:28:47] (step=0025700) Train Loss: 0.3458, Train Steps/Sec: 14.12, Grad Norm: 0.0979 +[2025-02-22 22:28:53] (step=0025800) Train Loss: 0.3453, Train Steps/Sec: 17.40, Grad Norm: 0.1045 +[2025-02-22 22:28:59] (step=0025900) Train Loss: 0.3449, Train Steps/Sec: 17.40, Grad Norm: 0.0968 +[2025-02-22 22:29:04] (step=0026000) Train Loss: 0.3451, Train Steps/Sec: 17.36, Grad Norm: 0.0966 +[2025-02-22 22:29:10] (step=0026100) Train Loss: 0.3452, Train Steps/Sec: 17.33, Grad Norm: 0.0990 +[2025-02-22 22:29:16] (step=0026200) Train Loss: 0.3454, Train Steps/Sec: 17.43, Grad Norm: 0.1081 +[2025-02-22 22:29:23] (step=0026300) Train Loss: 0.3451, Train Steps/Sec: 14.67, Grad Norm: 0.1057 +[2025-02-22 22:29:28] (step=0026400) Train Loss: 0.3451, Train Steps/Sec: 17.41, Grad Norm: 0.0967 +[2025-02-22 22:29:34] (step=0026500) Train Loss: 0.3451, Train Steps/Sec: 17.45, Grad Norm: 0.0959 +[2025-02-22 22:29:40] (step=0026600) Train Loss: 0.3443, Train Steps/Sec: 17.42, Grad Norm: 0.1016 +[2025-02-22 22:29:46] (step=0026700) Train Loss: 0.3455, Train Steps/Sec: 17.42, Grad Norm: 0.0966 +[2025-02-22 22:29:51] (step=0026800) Train Loss: 0.3446, Train Steps/Sec: 17.43, Grad Norm: 0.0995 +[2025-02-22 22:29:57] (step=0026900) Train Loss: 0.3441, Train Steps/Sec: 17.49, Grad Norm: 0.1026 +[2025-02-22 22:30:03] (step=0027000) Train Loss: 0.3439, Train Steps/Sec: 17.40, Grad Norm: 0.0937 +[2025-02-22 22:30:09] (step=0027100) Train Loss: 0.3441, Train Steps/Sec: 15.99, Grad Norm: 0.0993 +[2025-02-22 22:30:16] (step=0027200) Train Loss: 0.3442, Train Steps/Sec: 13.60, Grad Norm: 0.1020 +[2025-02-22 22:30:22] (step=0027300) Train Loss: 0.3448, Train Steps/Sec: 17.37, Grad Norm: 0.0937 +[2025-02-22 22:30:28] (step=0027400) Train Loss: 0.3442, Train Steps/Sec: 17.43, Grad Norm: 0.0926 +[2025-02-22 22:30:34] (step=0027500) Train Loss: 0.3450, Train Steps/Sec: 17.32, Grad Norm: 0.0948 +[2025-02-22 22:30:41] (step=0027600) Train Loss: 0.3441, Train Steps/Sec: 13.58, Grad Norm: 0.1019 +[2025-02-22 22:30:48] (step=0027700) Train Loss: 0.3438, Train Steps/Sec: 13.99, Grad Norm: 0.0953 +[2025-02-22 22:30:54] (step=0027800) Train Loss: 0.3444, Train Steps/Sec: 17.17, Grad Norm: 0.1000 +[2025-02-22 22:31:00] (step=0027900) Train Loss: 0.3437, Train Steps/Sec: 17.15, Grad Norm: 0.0906 +[2025-02-22 22:31:06] (step=0028000) Train Loss: 0.3433, Train Steps/Sec: 17.16, Grad Norm: 0.0988 +[2025-02-22 22:31:11] (step=0028100) Train Loss: 0.3448, Train Steps/Sec: 17.18, Grad Norm: 0.0953 +[2025-02-22 22:31:17] (step=0028200) Train Loss: 0.3438, Train Steps/Sec: 17.35, Grad Norm: 0.0957 +[2025-02-22 22:31:23] (step=0028300) Train Loss: 0.3436, Train Steps/Sec: 17.36, Grad Norm: 0.0998 +[2025-02-22 22:31:29] (step=0028400) Train Loss: 0.3438, Train Steps/Sec: 17.36, Grad Norm: 0.0953 +[2025-02-22 22:31:35] (step=0028500) Train Loss: 0.3442, Train Steps/Sec: 17.32, Grad Norm: 0.0978 +[2025-02-22 22:31:40] (step=0028600) Train Loss: 0.3440, Train Steps/Sec: 17.27, Grad Norm: 0.0928 +[2025-02-22 22:31:46] (step=0028700) Train Loss: 0.3433, Train Steps/Sec: 17.14, Grad Norm: 0.0981 +[2025-02-22 22:31:53] (step=0028800) Train Loss: 0.3435, Train Steps/Sec: 14.54, Grad Norm: 0.0977 +[2025-02-22 22:31:59] (step=0028900) Train Loss: 0.3434, Train Steps/Sec: 17.06, Grad Norm: 0.0921 +[2025-02-22 22:32:05] (step=0029000) Train Loss: 0.3438, Train Steps/Sec: 16.42, Grad Norm: 0.1007 +[2025-02-22 22:32:13] (step=0029100) Train Loss: 0.3434, Train Steps/Sec: 13.04, Grad Norm: 0.0946 +[2025-02-22 22:32:19] (step=0029200) Train Loss: 0.3436, Train Steps/Sec: 17.12, Grad Norm: 0.1006 +[2025-02-22 22:32:24] (step=0029300) Train Loss: 0.3438, Train Steps/Sec: 17.09, Grad Norm: 0.0963 +[2025-02-22 22:32:30] (step=0029400) Train Loss: 0.3442, Train Steps/Sec: 17.10, Grad Norm: 0.0896 +[2025-02-22 22:32:36] (step=0029500) Train Loss: 0.3431, Train Steps/Sec: 17.09, Grad Norm: 0.0966 +[2025-02-22 22:32:42] (step=0029600) Train Loss: 0.3445, Train Steps/Sec: 15.71, Grad Norm: 0.0999 +[2025-02-22 22:32:50] (step=0029700) Train Loss: 0.3437, Train Steps/Sec: 13.59, Grad Norm: 0.1013 +[2025-02-22 22:32:56] (step=0029800) Train Loss: 0.3435, Train Steps/Sec: 17.37, Grad Norm: 0.0966 +[2025-02-22 22:33:01] (step=0029900) Train Loss: 0.3428, Train Steps/Sec: 17.33, Grad Norm: 0.1037 +[2025-02-22 22:33:07] (step=0030000) Train Loss: 0.3424, Train Steps/Sec: 17.10, Grad Norm: 0.0960 +[2025-02-22 22:33:14] (step=0030100) Train Loss: 0.3433, Train Steps/Sec: 14.81, Grad Norm: 0.0986 +[2025-02-22 22:33:20] (step=0030200) Train Loss: 0.3426, Train Steps/Sec: 17.32, Grad Norm: 0.0919 +[2025-02-22 22:33:26] (step=0030300) Train Loss: 0.3434, Train Steps/Sec: 17.31, Grad Norm: 0.0943 +[2025-02-22 22:33:31] (step=0030400) Train Loss: 0.3427, Train Steps/Sec: 17.36, Grad Norm: 0.0950 +[2025-02-22 22:33:37] (step=0030500) Train Loss: 0.3430, Train Steps/Sec: 17.37, Grad Norm: 0.0934 +[2025-02-22 22:33:43] (step=0030600) Train Loss: 0.3423, Train Steps/Sec: 17.33, Grad Norm: 0.1020 +[2025-02-22 22:33:49] (step=0030700) Train Loss: 0.3428, Train Steps/Sec: 17.33, Grad Norm: 0.1007 +[2025-02-22 22:33:54] (step=0030800) Train Loss: 0.3423, Train Steps/Sec: 17.32, Grad Norm: 0.0929 +[2025-02-22 22:34:00] (step=0030900) Train Loss: 0.3424, Train Steps/Sec: 17.44, Grad Norm: 0.0955 +[2025-02-22 22:34:07] (step=0031000) Train Loss: 0.3429, Train Steps/Sec: 14.78, Grad Norm: 0.0962 +[2025-02-22 22:34:13] (step=0031100) Train Loss: 0.3428, Train Steps/Sec: 15.29, Grad Norm: 0.0861 +[2025-02-22 22:34:19] (step=0031200) Train Loss: 0.3429, Train Steps/Sec: 17.46, Grad Norm: 0.0944 +[2025-02-22 22:34:26] (step=0031300) Train Loss: 0.3431, Train Steps/Sec: 14.68, Grad Norm: 0.1003 +[2025-02-22 22:34:32] (step=0031400) Train Loss: 0.3424, Train Steps/Sec: 17.40, Grad Norm: 0.0923 +[2025-02-22 22:34:37] (step=0031500) Train Loss: 0.3424, Train Steps/Sec: 17.40, Grad Norm: 0.1030 +[2025-02-22 22:34:44] (step=0031600) Train Loss: 0.3423, Train Steps/Sec: 16.00, Grad Norm: 0.0921 +[2025-02-22 22:34:51] (step=0031700) Train Loss: 0.3426, Train Steps/Sec: 13.63, Grad Norm: 0.0928 +[2025-02-22 22:34:57] (step=0031800) Train Loss: 0.3431, Train Steps/Sec: 17.40, Grad Norm: 0.0992 +[2025-02-22 22:35:03] (step=0031900) Train Loss: 0.3419, Train Steps/Sec: 17.37, Grad Norm: 0.0916 +[2025-02-22 22:35:08] (step=0032000) Train Loss: 0.3421, Train Steps/Sec: 17.46, Grad Norm: 0.0975 +[2025-02-22 22:35:14] (step=0032100) Train Loss: 0.3426, Train Steps/Sec: 17.44, Grad Norm: 0.0973 +[2025-02-22 22:35:20] (step=0032200) Train Loss: 0.3424, Train Steps/Sec: 17.45, Grad Norm: 0.0915 +[2025-02-22 22:35:25] (step=0032300) Train Loss: 0.3424, Train Steps/Sec: 17.40, Grad Norm: 0.0915 +[2025-02-22 22:35:31] (step=0032400) Train Loss: 0.3420, Train Steps/Sec: 17.40, Grad Norm: 0.0936 +[2025-02-22 22:35:37] (step=0032500) Train Loss: 0.3421, Train Steps/Sec: 17.20, Grad Norm: 0.1039 +[2025-02-22 22:35:44] (step=0032600) Train Loss: 0.3424, Train Steps/Sec: 14.87, Grad Norm: 0.0876 +[2025-02-22 22:35:49] (step=0032700) Train Loss: 0.3423, Train Steps/Sec: 17.46, Grad Norm: 0.0984 +[2025-02-22 22:35:55] (step=0032800) Train Loss: 0.3416, Train Steps/Sec: 17.40, Grad Norm: 0.0971 +[2025-02-22 22:36:01] (step=0032900) Train Loss: 0.3419, Train Steps/Sec: 16.67, Grad Norm: 0.0896 +[2025-02-22 22:36:09] (step=0033000) Train Loss: 0.3420, Train Steps/Sec: 13.22, Grad Norm: 0.0892 +[2025-02-22 22:36:15] (step=0033100) Train Loss: 0.3417, Train Steps/Sec: 17.29, Grad Norm: 0.1019 +[2025-02-22 22:36:20] (step=0033200) Train Loss: 0.3415, Train Steps/Sec: 17.31, Grad Norm: 0.1000 +[2025-02-22 22:36:26] (step=0033300) Train Loss: 0.3419, Train Steps/Sec: 17.35, Grad Norm: 0.0935 +[2025-02-22 22:36:32] (step=0033400) Train Loss: 0.3411, Train Steps/Sec: 17.46, Grad Norm: 0.0929 +[2025-02-22 22:36:38] (step=0033500) Train Loss: 0.3409, Train Steps/Sec: 17.41, Grad Norm: 0.0904 +[2025-02-22 22:36:44] (step=0033600) Train Loss: 0.3413, Train Steps/Sec: 16.71, Grad Norm: 0.0932 +[2025-02-22 22:36:51] (step=0033700) Train Loss: 0.3413, Train Steps/Sec: 13.24, Grad Norm: 0.0853 +[2025-02-22 22:36:58] (step=0033800) Train Loss: 0.3416, Train Steps/Sec: 14.62, Grad Norm: 0.1015 +[2025-02-22 22:37:04] (step=0033900) Train Loss: 0.3408, Train Steps/Sec: 17.36, Grad Norm: 0.0885 +[2025-02-22 22:37:10] (step=0034000) Train Loss: 0.3408, Train Steps/Sec: 17.39, Grad Norm: 0.0901 +[2025-02-22 22:37:15] (step=0034100) Train Loss: 0.3412, Train Steps/Sec: 17.41, Grad Norm: 0.0908 +[2025-02-22 22:37:21] (step=0034200) Train Loss: 0.3410, Train Steps/Sec: 17.36, Grad Norm: 0.0908 +[2025-02-22 22:37:27] (step=0034300) Train Loss: 0.3418, Train Steps/Sec: 17.33, Grad Norm: 0.0876 +[2025-02-22 22:37:33] (step=0034400) Train Loss: 0.3416, Train Steps/Sec: 17.33, Grad Norm: 0.0904 +[2025-02-22 22:37:38] (step=0034500) Train Loss: 0.3411, Train Steps/Sec: 17.46, Grad Norm: 0.0950 +[2025-02-22 22:37:44] (step=0034600) Train Loss: 0.3408, Train Steps/Sec: 17.39, Grad Norm: 0.0900 +[2025-02-22 22:37:50] (step=0034700) Train Loss: 0.3412, Train Steps/Sec: 17.39, Grad Norm: 0.0910 +[2025-02-22 22:37:56] (step=0034800) Train Loss: 0.3414, Train Steps/Sec: 17.39, Grad Norm: 0.0962 +[2025-02-22 22:38:02] (step=0034900) Train Loss: 0.3408, Train Steps/Sec: 14.80, Grad Norm: 0.0945 +[2025-02-22 22:38:09] (step=0035000) Train Loss: 0.3407, Train Steps/Sec: 15.14, Grad Norm: 0.0880 +[2025-02-22 22:38:16] (step=0035100) Train Loss: 0.3402, Train Steps/Sec: 14.78, Grad Norm: 0.0874 +[2025-02-22 22:38:21] (step=0035200) Train Loss: 0.3405, Train Steps/Sec: 17.31, Grad Norm: 0.0890 +[2025-02-22 22:38:27] (step=0035300) Train Loss: 0.3407, Train Steps/Sec: 17.34, Grad Norm: 0.0900 +[2025-02-22 22:38:33] (step=0035400) Train Loss: 0.3408, Train Steps/Sec: 17.43, Grad Norm: 0.0900 +[2025-02-22 22:38:39] (step=0035500) Train Loss: 0.3408, Train Steps/Sec: 17.42, Grad Norm: 0.0934 +[2025-02-22 22:38:45] (step=0035600) Train Loss: 0.3400, Train Steps/Sec: 16.70, Grad Norm: 0.0886 +[2025-02-22 22:38:52] (step=0035700) Train Loss: 0.3404, Train Steps/Sec: 13.66, Grad Norm: 0.0900 +[2025-02-22 22:38:58] (step=0035800) Train Loss: 0.3418, Train Steps/Sec: 17.48, Grad Norm: 0.0947 +[2025-02-22 22:39:03] (step=0035900) Train Loss: 0.3409, Train Steps/Sec: 17.45, Grad Norm: 0.0830 +[2025-02-22 22:39:09] (step=0036000) Train Loss: 0.3409, Train Steps/Sec: 17.40, Grad Norm: 0.0934 +[2025-02-22 22:39:15] (step=0036100) Train Loss: 0.3404, Train Steps/Sec: 17.32, Grad Norm: 0.0946 +[2025-02-22 22:39:21] (step=0036200) Train Loss: 0.3404, Train Steps/Sec: 17.35, Grad Norm: 0.0923 +[2025-02-22 22:39:28] (step=0036300) Train Loss: 0.3404, Train Steps/Sec: 14.62, Grad Norm: 0.0874 +[2025-02-22 22:39:33] (step=0036400) Train Loss: 0.3408, Train Steps/Sec: 17.11, Grad Norm: 0.0941 +[2025-02-22 22:39:39] (step=0036500) Train Loss: 0.3400, Train Steps/Sec: 17.13, Grad Norm: 0.0921 +[2025-02-22 22:39:45] (step=0036600) Train Loss: 0.3404, Train Steps/Sec: 17.08, Grad Norm: 0.0829 +[2025-02-22 22:39:51] (step=0036700) Train Loss: 0.3402, Train Steps/Sec: 17.11, Grad Norm: 0.0876 +[2025-02-22 22:39:57] (step=0036800) Train Loss: 0.3400, Train Steps/Sec: 15.81, Grad Norm: 0.0858 +[2025-02-22 22:40:05] (step=0036900) Train Loss: 0.3405, Train Steps/Sec: 13.56, Grad Norm: 0.0847 +[2025-02-22 22:40:11] (step=0037000) Train Loss: 0.3403, Train Steps/Sec: 17.16, Grad Norm: 0.0892 +[2025-02-22 22:40:16] (step=0037100) Train Loss: 0.3398, Train Steps/Sec: 17.16, Grad Norm: 0.0886 +[2025-02-22 22:40:22] (step=0037200) Train Loss: 0.3399, Train Steps/Sec: 17.11, Grad Norm: 0.0887 +[2025-02-22 22:40:28] (step=0037300) Train Loss: 0.3403, Train Steps/Sec: 17.11, Grad Norm: 0.0858 +[2025-02-22 22:40:34] (step=0037400) Train Loss: 0.3401, Train Steps/Sec: 17.14, Grad Norm: 0.0938 +[2025-02-22 22:40:40] (step=0037500) Train Loss: 0.3398, Train Steps/Sec: 17.00, Grad Norm: 0.0812 +[2025-02-22 22:40:47] (step=0037600) Train Loss: 0.3402, Train Steps/Sec: 14.09, Grad Norm: 0.0910 +[2025-02-22 22:40:54] (step=0037700) Train Loss: 0.3400, Train Steps/Sec: 13.13, Grad Norm: 0.0849 +[2025-02-22 22:41:00] (step=0037800) Train Loss: 0.3395, Train Steps/Sec: 17.38, Grad Norm: 0.0890 +[2025-02-22 22:41:06] (step=0037900) Train Loss: 0.3403, Train Steps/Sec: 17.36, Grad Norm: 0.0863 +[2025-02-22 22:41:12] (step=0038000) Train Loss: 0.3399, Train Steps/Sec: 17.35, Grad Norm: 0.0903 +[2025-02-22 22:41:18] (step=0038100) Train Loss: 0.3402, Train Steps/Sec: 17.28, Grad Norm: 0.0907 +[2025-02-22 22:41:23] (step=0038200) Train Loss: 0.3387, Train Steps/Sec: 17.29, Grad Norm: 0.0872 +[2025-02-22 22:41:29] (step=0038300) Train Loss: 0.3397, Train Steps/Sec: 17.36, Grad Norm: 0.0980 +[2025-02-22 22:41:35] (step=0038400) Train Loss: 0.3400, Train Steps/Sec: 17.37, Grad Norm: 0.0785 +[2025-02-22 22:41:41] (step=0038500) Train Loss: 0.3397, Train Steps/Sec: 17.33, Grad Norm: 0.0865 +[2025-02-22 22:41:46] (step=0038600) Train Loss: 0.3401, Train Steps/Sec: 17.27, Grad Norm: 0.0835 +[2025-02-22 22:41:52] (step=0038700) Train Loss: 0.3394, Train Steps/Sec: 17.31, Grad Norm: 0.0937 +[2025-02-22 22:42:01] (step=0038800) Train Loss: 0.3393, Train Steps/Sec: 11.99, Grad Norm: 0.0846 +[2025-02-22 22:42:07] (step=0038900) Train Loss: 0.3397, Train Steps/Sec: 15.77, Grad Norm: 0.0856 +[2025-02-22 22:42:13] (step=0039000) Train Loss: 0.3399, Train Steps/Sec: 17.29, Grad Norm: 0.0879 +[2025-02-22 22:42:18] (step=0039100) Train Loss: 0.3397, Train Steps/Sec: 17.26, Grad Norm: 0.0830 +[2025-02-22 22:42:24] (step=0039200) Train Loss: 0.3394, Train Steps/Sec: 17.37, Grad Norm: 0.0817 +[2025-02-22 22:42:30] (step=0039300) Train Loss: 0.3392, Train Steps/Sec: 17.39, Grad Norm: 0.0859 +[2025-02-22 22:42:36] (step=0039400) Train Loss: 0.3391, Train Steps/Sec: 17.34, Grad Norm: 0.0865 +[2025-02-22 22:42:42] (step=0039500) Train Loss: 0.3395, Train Steps/Sec: 17.39, Grad Norm: 0.0929 +[2025-02-22 22:42:47] (step=0039600) Train Loss: 0.3397, Train Steps/Sec: 17.35, Grad Norm: 0.0873 +[2025-02-22 22:42:55] (step=0039700) Train Loss: 0.3393, Train Steps/Sec: 12.70, Grad Norm: 0.0847 +[2025-02-22 22:43:01] (step=0039800) Train Loss: 0.3387, Train Steps/Sec: 17.22, Grad Norm: 0.0919 +[2025-02-22 22:43:07] (step=0039900) Train Loss: 0.3394, Train Steps/Sec: 17.38, Grad Norm: 0.0805 +[2025-02-22 22:43:13] (step=0040000) Train Loss: 0.3400, Train Steps/Sec: 17.27, Grad Norm: 0.0827 +[2025-02-22 22:43:19] (step=0040100) Train Loss: 0.3389, Train Steps/Sec: 14.72, Grad Norm: 0.0864 +[2025-02-22 22:43:25] (step=0040200) Train Loss: 0.3398, Train Steps/Sec: 17.27, Grad Norm: 0.0878 +[2025-02-22 22:43:31] (step=0040300) Train Loss: 0.3389, Train Steps/Sec: 17.32, Grad Norm: 0.0888 +[2025-02-22 22:43:37] (step=0040400) Train Loss: 0.3390, Train Steps/Sec: 17.33, Grad Norm: 0.0815 +[2025-02-22 22:43:42] (step=0040500) Train Loss: 0.3392, Train Steps/Sec: 17.33, Grad Norm: 0.0872 +[2025-02-22 22:43:48] (step=0040600) Train Loss: 0.3391, Train Steps/Sec: 17.31, Grad Norm: 0.0863 +[2025-02-22 22:43:54] (step=0040700) Train Loss: 0.3393, Train Steps/Sec: 16.59, Grad Norm: 0.0827 +[2025-02-22 22:44:01] (step=0040800) Train Loss: 0.3386, Train Steps/Sec: 14.63, Grad Norm: 0.0777 +[2025-02-22 22:44:07] (step=0040900) Train Loss: 0.3394, Train Steps/Sec: 16.49, Grad Norm: 0.0915 +[2025-02-22 22:44:13] (step=0041000) Train Loss: 0.3385, Train Steps/Sec: 17.29, Grad Norm: 0.0843 +[2025-02-22 22:44:19] (step=0041100) Train Loss: 0.3390, Train Steps/Sec: 17.30, Grad Norm: 0.0858 +[2025-02-22 22:44:25] (step=0041200) Train Loss: 0.3390, Train Steps/Sec: 17.23, Grad Norm: 0.0843 +[2025-02-22 22:44:31] (step=0041300) Train Loss: 0.3390, Train Steps/Sec: 14.59, Grad Norm: 0.0801 +[2025-02-22 22:44:37] (step=0041400) Train Loss: 0.3391, Train Steps/Sec: 17.24, Grad Norm: 0.0832 +[2025-02-22 22:44:43] (step=0041500) Train Loss: 0.3391, Train Steps/Sec: 17.24, Grad Norm: 0.0835 +[2025-02-22 22:44:49] (step=0041600) Train Loss: 0.3382, Train Steps/Sec: 17.31, Grad Norm: 0.0886 +[2025-02-22 22:44:56] (step=0041700) Train Loss: 0.3386, Train Steps/Sec: 13.71, Grad Norm: 0.0804 +[2025-02-22 22:45:02] (step=0041800) Train Loss: 0.3389, Train Steps/Sec: 16.66, Grad Norm: 0.0807 +[2025-02-22 22:45:08] (step=0041900) Train Loss: 0.3389, Train Steps/Sec: 17.42, Grad Norm: 0.0893 +[2025-02-22 22:45:14] (step=0042000) Train Loss: 0.3386, Train Steps/Sec: 17.50, Grad Norm: 0.0854 +[2025-02-22 22:45:19] (step=0042100) Train Loss: 0.3384, Train Steps/Sec: 17.44, Grad Norm: 0.0867 +[2025-02-22 22:45:25] (step=0042200) Train Loss: 0.3391, Train Steps/Sec: 17.48, Grad Norm: 0.0793 +[2025-02-22 22:45:31] (step=0042300) Train Loss: 0.3392, Train Steps/Sec: 17.47, Grad Norm: 0.0862 +[2025-02-22 22:45:36] (step=0042400) Train Loss: 0.3388, Train Steps/Sec: 17.49, Grad Norm: 0.0820 +[2025-02-22 22:45:42] (step=0042500) Train Loss: 0.3385, Train Steps/Sec: 17.49, Grad Norm: 0.0833 +[2025-02-22 22:45:49] (step=0042600) Train Loss: 0.3382, Train Steps/Sec: 14.50, Grad Norm: 0.0912 +[2025-02-22 22:45:56] (step=0042700) Train Loss: 0.3386, Train Steps/Sec: 13.78, Grad Norm: 0.0799 +[2025-02-22 22:46:03] (step=0042800) Train Loss: 0.3383, Train Steps/Sec: 15.86, Grad Norm: 0.0797 +[2025-02-22 22:46:08] (step=0042900) Train Loss: 0.3379, Train Steps/Sec: 17.47, Grad Norm: 0.0884 +[2025-02-22 22:46:14] (step=0043000) Train Loss: 0.3384, Train Steps/Sec: 17.44, Grad Norm: 0.0822 +[2025-02-22 22:46:20] (step=0043100) Train Loss: 0.3386, Train Steps/Sec: 17.42, Grad Norm: 0.0855 +[2025-02-22 22:46:26] (step=0043200) Train Loss: 0.3384, Train Steps/Sec: 17.44, Grad Norm: 0.0851 +[2025-02-22 22:46:31] (step=0043300) Train Loss: 0.3385, Train Steps/Sec: 17.41, Grad Norm: 0.0783 +[2025-02-22 22:46:37] (step=0043400) Train Loss: 0.3380, Train Steps/Sec: 17.42, Grad Norm: 0.0876 +[2025-02-22 22:46:43] (step=0043500) Train Loss: 0.3382, Train Steps/Sec: 17.44, Grad Norm: 0.0930 +[2025-02-22 22:46:49] (step=0043600) Train Loss: 0.3385, Train Steps/Sec: 17.42, Grad Norm: 0.0824 +[2025-02-22 22:46:56] (step=0043700) Train Loss: 0.3384, Train Steps/Sec: 13.27, Grad Norm: 0.0808 +[2025-02-22 22:47:03] (step=0043800) Train Loss: 0.3382, Train Steps/Sec: 13.99, Grad Norm: 0.0844 +[2025-02-22 22:47:09] (step=0043900) Train Loss: 0.3377, Train Steps/Sec: 17.23, Grad Norm: 0.0844 +[2025-02-22 22:47:15] (step=0044000) Train Loss: 0.3384, Train Steps/Sec: 17.27, Grad Norm: 0.0776 +[2025-02-22 22:47:21] (step=0044100) Train Loss: 0.3387, Train Steps/Sec: 17.23, Grad Norm: 0.0813 +[2025-02-22 22:47:26] (step=0044200) Train Loss: 0.3379, Train Steps/Sec: 17.35, Grad Norm: 0.0825 +[2025-02-22 22:47:32] (step=0044300) Train Loss: 0.3377, Train Steps/Sec: 17.38, Grad Norm: 0.0867 +[2025-02-22 22:47:38] (step=0044400) Train Loss: 0.3381, Train Steps/Sec: 17.41, Grad Norm: 0.0814 +[2025-02-22 22:47:44] (step=0044500) Train Loss: 0.3382, Train Steps/Sec: 17.38, Grad Norm: 0.0806 +[2025-02-22 22:47:50] (step=0044600) Train Loss: 0.3374, Train Steps/Sec: 16.03, Grad Norm: 0.0832 +[2025-02-22 22:47:57] (step=0044700) Train Loss: 0.3376, Train Steps/Sec: 14.20, Grad Norm: 0.0851 +[2025-02-22 22:48:03] (step=0044800) Train Loss: 0.3379, Train Steps/Sec: 16.62, Grad Norm: 0.0871 +[2025-02-22 22:48:09] (step=0044900) Train Loss: 0.3379, Train Steps/Sec: 17.42, Grad Norm: 0.0759 +[2025-02-22 22:48:14] (step=0045000) Train Loss: 0.3374, Train Steps/Sec: 17.45, Grad Norm: 0.0949 +[2025-02-22 22:48:21] (step=0045100) Train Loss: 0.3384, Train Steps/Sec: 14.57, Grad Norm: 0.0873 +[2025-02-22 22:48:27] (step=0045200) Train Loss: 0.3380, Train Steps/Sec: 17.34, Grad Norm: 0.0869 +[2025-02-22 22:48:33] (step=0045300) Train Loss: 0.3376, Train Steps/Sec: 17.37, Grad Norm: 0.0790 +[2025-02-22 22:48:39] (step=0045400) Train Loss: 0.3378, Train Steps/Sec: 17.38, Grad Norm: 0.0773 +[2025-02-22 22:48:44] (step=0045500) Train Loss: 0.3368, Train Steps/Sec: 17.38, Grad Norm: 0.0769 +[2025-02-22 22:48:50] (step=0045600) Train Loss: 0.3378, Train Steps/Sec: 17.34, Grad Norm: 0.0838 +[2025-02-22 22:48:58] (step=0045700) Train Loss: 0.3377, Train Steps/Sec: 13.24, Grad Norm: 0.0842 +[2025-02-22 22:49:04] (step=0045800) Train Loss: 0.3379, Train Steps/Sec: 16.58, Grad Norm: 0.0858 +[2025-02-22 22:49:09] (step=0045900) Train Loss: 0.3375, Train Steps/Sec: 17.41, Grad Norm: 0.0830 +[2025-02-22 22:49:15] (step=0046000) Train Loss: 0.3378, Train Steps/Sec: 17.39, Grad Norm: 0.0854 +[2025-02-22 22:49:21] (step=0046100) Train Loss: 0.3380, Train Steps/Sec: 17.40, Grad Norm: 0.0815 +[2025-02-22 22:49:27] (step=0046200) Train Loss: 0.3375, Train Steps/Sec: 17.36, Grad Norm: 0.0741 +[2025-02-22 22:49:33] (step=0046300) Train Loss: 0.3384, Train Steps/Sec: 14.67, Grad Norm: 0.0801 +[2025-02-22 22:49:39] (step=0046400) Train Loss: 0.3377, Train Steps/Sec: 17.41, Grad Norm: 0.0845 +[2025-02-22 22:49:45] (step=0046500) Train Loss: 0.3372, Train Steps/Sec: 16.67, Grad Norm: 0.0848 +[2025-02-22 22:49:52] (step=0046600) Train Loss: 0.3377, Train Steps/Sec: 14.22, Grad Norm: 0.0758 +[2025-02-22 22:49:58] (step=0046700) Train Loss: 0.3378, Train Steps/Sec: 16.63, Grad Norm: 0.0915 +[2025-02-22 22:50:04] (step=0046800) Train Loss: 0.3370, Train Steps/Sec: 17.45, Grad Norm: 0.0777 +[2025-02-22 22:50:10] (step=0046900) Train Loss: 0.3371, Train Steps/Sec: 17.49, Grad Norm: 0.0863 +[2025-02-22 22:50:15] (step=0047000) Train Loss: 0.3372, Train Steps/Sec: 17.49, Grad Norm: 0.0859 +[2025-02-22 22:50:21] (step=0047100) Train Loss: 0.3376, Train Steps/Sec: 17.50, Grad Norm: 0.0882 +[2025-02-22 22:50:27] (step=0047200) Train Loss: 0.3372, Train Steps/Sec: 17.47, Grad Norm: 0.0787 +[2025-02-22 22:50:33] (step=0047300) Train Loss: 0.3373, Train Steps/Sec: 17.48, Grad Norm: 0.0790 +[2025-02-22 22:50:38] (step=0047400) Train Loss: 0.3368, Train Steps/Sec: 17.48, Grad Norm: 0.0725 +[2025-02-22 22:50:44] (step=0047500) Train Loss: 0.3371, Train Steps/Sec: 17.42, Grad Norm: 0.0891 +[2025-02-22 22:50:51] (step=0047600) Train Loss: 0.3376, Train Steps/Sec: 14.58, Grad Norm: 0.0742 +[2025-02-22 22:50:59] (step=0047700) Train Loss: 0.3373, Train Steps/Sec: 13.21, Grad Norm: 0.0834 +[2025-02-22 22:51:05] (step=0047800) Train Loss: 0.3373, Train Steps/Sec: 16.57, Grad Norm: 0.0781 +[2025-02-22 22:51:10] (step=0047900) Train Loss: 0.3373, Train Steps/Sec: 17.42, Grad Norm: 0.0782 +[2025-02-22 22:51:16] (step=0048000) Train Loss: 0.3373, Train Steps/Sec: 17.32, Grad Norm: 0.0858 +[2025-02-22 22:51:22] (step=0048100) Train Loss: 0.3376, Train Steps/Sec: 17.29, Grad Norm: 0.0807 +[2025-02-22 22:51:28] (step=0048200) Train Loss: 0.3373, Train Steps/Sec: 17.41, Grad Norm: 0.0818 +[2025-02-22 22:51:33] (step=0048300) Train Loss: 0.3376, Train Steps/Sec: 17.42, Grad Norm: 0.0780 +[2025-02-22 22:51:39] (step=0048400) Train Loss: 0.3373, Train Steps/Sec: 17.42, Grad Norm: 0.0802 +[2025-02-22 22:51:45] (step=0048500) Train Loss: 0.3373, Train Steps/Sec: 16.15, Grad Norm: 0.0850 +[2025-02-22 22:51:52] (step=0048600) Train Loss: 0.3366, Train Steps/Sec: 14.35, Grad Norm: 0.0849 +[2025-02-22 22:51:58] (step=0048700) Train Loss: 0.3378, Train Steps/Sec: 16.69, Grad Norm: 0.0774 +[2025-02-22 22:52:05] (step=0048800) Train Loss: 0.3370, Train Steps/Sec: 14.51, Grad Norm: 0.0822 +[2025-02-22 22:52:11] (step=0048900) Train Loss: 0.3368, Train Steps/Sec: 17.27, Grad Norm: 0.0777 +[2025-02-22 22:52:17] (step=0049000) Train Loss: 0.3367, Train Steps/Sec: 17.35, Grad Norm: 0.0840 +[2025-02-22 22:52:23] (step=0049100) Train Loss: 0.3367, Train Steps/Sec: 17.27, Grad Norm: 0.0822 +[2025-02-22 22:52:28] (step=0049200) Train Loss: 0.3375, Train Steps/Sec: 17.26, Grad Norm: 0.0776 +[2025-02-22 22:52:34] (step=0049300) Train Loss: 0.3364, Train Steps/Sec: 17.28, Grad Norm: 0.0745 +[2025-02-22 22:52:40] (step=0049400) Train Loss: 0.3369, Train Steps/Sec: 17.28, Grad Norm: 0.0832 +[2025-02-22 22:52:46] (step=0049500) Train Loss: 0.3367, Train Steps/Sec: 17.33, Grad Norm: 0.0754 +[2025-02-22 22:52:51] (step=0049600) Train Loss: 0.3371, Train Steps/Sec: 17.37, Grad Norm: 0.0775 +[2025-02-22 22:52:59] (step=0049700) Train Loss: 0.3362, Train Steps/Sec: 13.28, Grad Norm: 0.0746 +[2025-02-22 22:53:05] (step=0049800) Train Loss: 0.3364, Train Steps/Sec: 16.67, Grad Norm: 0.0792 +[2025-02-22 22:53:11] (step=0049900) Train Loss: 0.3369, Train Steps/Sec: 17.48, Grad Norm: 0.0768 +[2025-02-22 22:53:16] (step=0050000) Train Loss: 0.3365, Train Steps/Sec: 17.45, Grad Norm: 0.0790 +[2025-02-22 22:53:18] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0050000.pt +[2025-02-22 22:53:25] (step=0050100) Train Loss: 0.3360, Train Steps/Sec: 12.05, Grad Norm: 0.0790 +[2025-02-22 22:53:30] (step=0050200) Train Loss: 0.3368, Train Steps/Sec: 17.37, Grad Norm: 0.0738 +[2025-02-22 22:53:36] (step=0050300) Train Loss: 0.3372, Train Steps/Sec: 16.63, Grad Norm: 0.0816 +[2025-02-22 22:53:42] (step=0050400) Train Loss: 0.3365, Train Steps/Sec: 16.67, Grad Norm: 0.0850 +[2025-02-22 22:53:50] (step=0050500) Train Loss: 0.3372, Train Steps/Sec: 14.24, Grad Norm: 0.0802 +[2025-02-22 22:53:56] (step=0050600) Train Loss: 0.3365, Train Steps/Sec: 16.50, Grad Norm: 0.0797 +[2025-02-22 22:54:01] (step=0050700) Train Loss: 0.3363, Train Steps/Sec: 17.27, Grad Norm: 0.0743 +[2025-02-22 22:54:07] (step=0050800) Train Loss: 0.3365, Train Steps/Sec: 17.28, Grad Norm: 0.0771 +[2025-02-22 22:54:13] (step=0050900) Train Loss: 0.3367, Train Steps/Sec: 17.35, Grad Norm: 0.0848 +[2025-02-22 22:54:19] (step=0051000) Train Loss: 0.3363, Train Steps/Sec: 16.83, Grad Norm: 0.0737 +[2025-02-22 22:54:25] (step=0051100) Train Loss: 0.3363, Train Steps/Sec: 16.87, Grad Norm: 0.0802 +[2025-02-22 22:54:31] (step=0051200) Train Loss: 0.3374, Train Steps/Sec: 16.99, Grad Norm: 0.0781 +[2025-02-22 22:54:38] (step=0051300) Train Loss: 0.3366, Train Steps/Sec: 14.21, Grad Norm: 0.0788 +[2025-02-22 22:54:43] (step=0051400) Train Loss: 0.3365, Train Steps/Sec: 17.32, Grad Norm: 0.0804 +[2025-02-22 22:54:49] (step=0051500) Train Loss: 0.3361, Train Steps/Sec: 17.35, Grad Norm: 0.0824 +[2025-02-22 22:54:55] (step=0051600) Train Loss: 0.3364, Train Steps/Sec: 17.41, Grad Norm: 0.0860 +[2025-02-22 22:55:03] (step=0051700) Train Loss: 0.3365, Train Steps/Sec: 13.19, Grad Norm: 0.0724 +[2025-02-22 22:55:09] (step=0051800) Train Loss: 0.3359, Train Steps/Sec: 16.58, Grad Norm: 0.0811 +[2025-02-22 22:55:14] (step=0051900) Train Loss: 0.3360, Train Steps/Sec: 17.37, Grad Norm: 0.0779 +[2025-02-22 22:55:20] (step=0052000) Train Loss: 0.3361, Train Steps/Sec: 17.37, Grad Norm: 0.0798 +[2025-02-22 22:55:26] (step=0052100) Train Loss: 0.3361, Train Steps/Sec: 17.38, Grad Norm: 0.0730 +[2025-02-22 22:55:32] (step=0052200) Train Loss: 0.3361, Train Steps/Sec: 17.35, Grad Norm: 0.0751 +[2025-02-22 22:55:37] (step=0052300) Train Loss: 0.3362, Train Steps/Sec: 17.36, Grad Norm: 0.0748 +[2025-02-22 22:55:44] (step=0052400) Train Loss: 0.3359, Train Steps/Sec: 16.01, Grad Norm: 0.0778 +[2025-02-22 22:55:51] (step=0052500) Train Loss: 0.3364, Train Steps/Sec: 14.19, Grad Norm: 0.0728 +[2025-02-22 22:55:58] (step=0052600) Train Loss: 0.3366, Train Steps/Sec: 14.06, Grad Norm: 0.0839 +[2025-02-22 22:56:04] (step=0052700) Train Loss: 0.3365, Train Steps/Sec: 17.40, Grad Norm: 0.0808 +[2025-02-22 22:56:09] (step=0052800) Train Loss: 0.3362, Train Steps/Sec: 17.42, Grad Norm: 0.0703 +[2025-02-22 22:56:15] (step=0052900) Train Loss: 0.3366, Train Steps/Sec: 17.42, Grad Norm: 0.0802 +[2025-02-22 22:56:21] (step=0053000) Train Loss: 0.3362, Train Steps/Sec: 17.44, Grad Norm: 0.0824 +[2025-02-22 22:56:27] (step=0053100) Train Loss: 0.3360, Train Steps/Sec: 17.38, Grad Norm: 0.0803 +[2025-02-22 22:56:32] (step=0053200) Train Loss: 0.3361, Train Steps/Sec: 17.33, Grad Norm: 0.0778 +[2025-02-22 22:56:38] (step=0053300) Train Loss: 0.3358, Train Steps/Sec: 17.23, Grad Norm: 0.0739 +[2025-02-22 22:56:44] (step=0053400) Train Loss: 0.3369, Train Steps/Sec: 17.36, Grad Norm: 0.0802 +[2025-02-22 22:56:50] (step=0053500) Train Loss: 0.3366, Train Steps/Sec: 17.38, Grad Norm: 0.0799 +[2025-02-22 22:56:55] (step=0053600) Train Loss: 0.3357, Train Steps/Sec: 17.40, Grad Norm: 0.0775 +[2025-02-22 22:57:03] (step=0053700) Train Loss: 0.3360, Train Steps/Sec: 13.69, Grad Norm: 0.0803 +[2025-02-22 22:57:10] (step=0053800) Train Loss: 0.3359, Train Steps/Sec: 13.65, Grad Norm: 0.0820 +[2025-02-22 22:57:16] (step=0053900) Train Loss: 0.3355, Train Steps/Sec: 17.35, Grad Norm: 0.0752 +[2025-02-22 22:57:22] (step=0054000) Train Loss: 0.3359, Train Steps/Sec: 17.42, Grad Norm: 0.0728 +[2025-02-22 22:57:27] (step=0054100) Train Loss: 0.3361, Train Steps/Sec: 17.42, Grad Norm: 0.0846 +[2025-02-22 22:57:33] (step=0054200) Train Loss: 0.3361, Train Steps/Sec: 17.40, Grad Norm: 0.0828 +[2025-02-22 22:57:39] (step=0054300) Train Loss: 0.3354, Train Steps/Sec: 16.73, Grad Norm: 0.0681 +[2025-02-22 22:57:46] (step=0054400) Train Loss: 0.3369, Train Steps/Sec: 14.30, Grad Norm: 0.0803 +[2025-02-22 22:57:52] (step=0054500) Train Loss: 0.3358, Train Steps/Sec: 16.66, Grad Norm: 0.0810 +[2025-02-22 22:57:58] (step=0054600) Train Loss: 0.3357, Train Steps/Sec: 17.41, Grad Norm: 0.0813 +[2025-02-22 22:58:04] (step=0054700) Train Loss: 0.3357, Train Steps/Sec: 17.33, Grad Norm: 0.0690 +[2025-02-22 22:58:09] (step=0054800) Train Loss: 0.3356, Train Steps/Sec: 17.39, Grad Norm: 0.0775 +[2025-02-22 22:58:15] (step=0054900) Train Loss: 0.3360, Train Steps/Sec: 17.39, Grad Norm: 0.0792 +[2025-02-22 22:58:21] (step=0055000) Train Loss: 0.3357, Train Steps/Sec: 17.45, Grad Norm: 0.0791 +[2025-02-22 22:58:28] (step=0055100) Train Loss: 0.3354, Train Steps/Sec: 14.64, Grad Norm: 0.0720 +[2025-02-22 22:58:33] (step=0055200) Train Loss: 0.3357, Train Steps/Sec: 17.34, Grad Norm: 0.0765 +[2025-02-22 22:58:39] (step=0055300) Train Loss: 0.3348, Train Steps/Sec: 17.36, Grad Norm: 0.0723 +[2025-02-22 22:58:45] (step=0055400) Train Loss: 0.3354, Train Steps/Sec: 17.37, Grad Norm: 0.0786 +[2025-02-22 22:58:51] (step=0055500) Train Loss: 0.3352, Train Steps/Sec: 17.35, Grad Norm: 0.0742 +[2025-02-22 22:58:56] (step=0055600) Train Loss: 0.3350, Train Steps/Sec: 17.28, Grad Norm: 0.0712 +[2025-02-22 22:59:04] (step=0055700) Train Loss: 0.3355, Train Steps/Sec: 13.67, Grad Norm: 0.0740 +[2025-02-22 22:59:10] (step=0055800) Train Loss: 0.3342, Train Steps/Sec: 15.95, Grad Norm: 0.0764 +[2025-02-22 22:59:16] (step=0055900) Train Loss: 0.3351, Train Steps/Sec: 17.43, Grad Norm: 0.0773 +[2025-02-22 22:59:22] (step=0056000) Train Loss: 0.3354, Train Steps/Sec: 17.45, Grad Norm: 0.0775 +[2025-02-22 22:59:27] (step=0056100) Train Loss: 0.3354, Train Steps/Sec: 17.41, Grad Norm: 0.0735 +[2025-02-22 22:59:33] (step=0056200) Train Loss: 0.3353, Train Steps/Sec: 17.32, Grad Norm: 0.0750 +[2025-02-22 22:59:41] (step=0056300) Train Loss: 0.3353, Train Steps/Sec: 12.37, Grad Norm: 0.0758 +[2025-02-22 22:59:47] (step=0056400) Train Loss: 0.3347, Train Steps/Sec: 15.77, Grad Norm: 0.0763 +[2025-02-22 22:59:54] (step=0056500) Train Loss: 0.3357, Train Steps/Sec: 16.35, Grad Norm: 0.0725 +[2025-02-22 22:59:59] (step=0056600) Train Loss: 0.3354, Train Steps/Sec: 17.07, Grad Norm: 0.0838 +[2025-02-22 23:00:05] (step=0056700) Train Loss: 0.3357, Train Steps/Sec: 17.04, Grad Norm: 0.0712 +[2025-02-22 23:00:11] (step=0056800) Train Loss: 0.3346, Train Steps/Sec: 17.14, Grad Norm: 0.0706 +[2025-02-22 23:00:17] (step=0056900) Train Loss: 0.3359, Train Steps/Sec: 17.16, Grad Norm: 0.0768 +[2025-02-22 23:00:23] (step=0057000) Train Loss: 0.3357, Train Steps/Sec: 17.29, Grad Norm: 0.0787 +[2025-02-22 23:00:29] (step=0057100) Train Loss: 0.3353, Train Steps/Sec: 17.32, Grad Norm: 0.0706 +[2025-02-22 23:00:34] (step=0057200) Train Loss: 0.3351, Train Steps/Sec: 17.26, Grad Norm: 0.0762 +[2025-02-22 23:00:40] (step=0057300) Train Loss: 0.3349, Train Steps/Sec: 17.22, Grad Norm: 0.0847 +[2025-02-22 23:00:46] (step=0057400) Train Loss: 0.3360, Train Steps/Sec: 17.22, Grad Norm: 0.0745 +[2025-02-22 23:00:52] (step=0057500) Train Loss: 0.3351, Train Steps/Sec: 17.25, Grad Norm: 0.0789 +[2025-02-22 23:00:59] (step=0057600) Train Loss: 0.3356, Train Steps/Sec: 14.47, Grad Norm: 0.0794 +[2025-02-22 23:01:06] (step=0057700) Train Loss: 0.3348, Train Steps/Sec: 13.03, Grad Norm: 0.0709 +[2025-02-22 23:01:12] (step=0057800) Train Loss: 0.3355, Train Steps/Sec: 16.28, Grad Norm: 0.0656 +[2025-02-22 23:01:18] (step=0057900) Train Loss: 0.3350, Train Steps/Sec: 17.06, Grad Norm: 0.0785 +[2025-02-22 23:01:24] (step=0058000) Train Loss: 0.3349, Train Steps/Sec: 17.11, Grad Norm: 0.0766 +[2025-02-22 23:01:30] (step=0058100) Train Loss: 0.3351, Train Steps/Sec: 17.14, Grad Norm: 0.0745 +[2025-02-22 23:01:36] (step=0058200) Train Loss: 0.3346, Train Steps/Sec: 16.47, Grad Norm: 0.0719 +[2025-02-22 23:01:43] (step=0058300) Train Loss: 0.3352, Train Steps/Sec: 13.53, Grad Norm: 0.0706 +[2025-02-22 23:01:50] (step=0058400) Train Loss: 0.3350, Train Steps/Sec: 16.40, Grad Norm: 0.0753 +[2025-02-22 23:01:55] (step=0058500) Train Loss: 0.3349, Train Steps/Sec: 17.15, Grad Norm: 0.0795 +[2025-02-22 23:02:01] (step=0058600) Train Loss: 0.3351, Train Steps/Sec: 17.17, Grad Norm: 0.0800 +[2025-02-22 23:02:07] (step=0058700) Train Loss: 0.3352, Train Steps/Sec: 17.15, Grad Norm: 0.0693 +[2025-02-22 23:02:14] (step=0058800) Train Loss: 0.3351, Train Steps/Sec: 14.52, Grad Norm: 0.0769 +[2025-02-22 23:02:20] (step=0058900) Train Loss: 0.3351, Train Steps/Sec: 16.96, Grad Norm: 0.0814 +[2025-02-22 23:02:26] (step=0059000) Train Loss: 0.3354, Train Steps/Sec: 17.11, Grad Norm: 0.0745 +[2025-02-22 23:02:32] (step=0059100) Train Loss: 0.3348, Train Steps/Sec: 17.24, Grad Norm: 0.0769 +[2025-02-22 23:02:37] (step=0059200) Train Loss: 0.3345, Train Steps/Sec: 17.34, Grad Norm: 0.0735 +[2025-02-22 23:02:43] (step=0059300) Train Loss: 0.3351, Train Steps/Sec: 17.28, Grad Norm: 0.0744 +[2025-02-22 23:02:49] (step=0059400) Train Loss: 0.3349, Train Steps/Sec: 17.31, Grad Norm: 0.0763 +[2025-02-22 23:02:55] (step=0059500) Train Loss: 0.3355, Train Steps/Sec: 17.42, Grad Norm: 0.0688 +[2025-02-22 23:03:00] (step=0059600) Train Loss: 0.3360, Train Steps/Sec: 17.35, Grad Norm: 0.0714 +[2025-02-22 23:03:07] (step=0059700) Train Loss: 0.3351, Train Steps/Sec: 14.12, Grad Norm: 0.0887 +[2025-02-22 23:03:14] (step=0059800) Train Loss: 0.3351, Train Steps/Sec: 15.20, Grad Norm: 0.0772 +[2025-02-22 23:03:20] (step=0059900) Train Loss: 0.3345, Train Steps/Sec: 17.39, Grad Norm: 0.0693 +[2025-02-22 23:03:26] (step=0060000) Train Loss: 0.3342, Train Steps/Sec: 17.41, Grad Norm: 0.0810 +[2025-02-22 23:03:32] (step=0060100) Train Loss: 0.3342, Train Steps/Sec: 14.75, Grad Norm: 0.0755 +[2025-02-22 23:03:39] (step=0060200) Train Loss: 0.3353, Train Steps/Sec: 14.76, Grad Norm: 0.0727 +[2025-02-22 23:03:46] (step=0060300) Train Loss: 0.3350, Train Steps/Sec: 15.50, Grad Norm: 0.0741 +[2025-02-22 23:03:52] (step=0060400) Train Loss: 0.3345, Train Steps/Sec: 16.74, Grad Norm: 0.0815 +[2025-02-22 23:03:57] (step=0060500) Train Loss: 0.3343, Train Steps/Sec: 17.48, Grad Norm: 0.0716 +[2025-02-22 23:04:03] (step=0060600) Train Loss: 0.3344, Train Steps/Sec: 17.53, Grad Norm: 0.0720 +[2025-02-22 23:04:09] (step=0060700) Train Loss: 0.3343, Train Steps/Sec: 17.46, Grad Norm: 0.0776 +[2025-02-22 23:04:14] (step=0060800) Train Loss: 0.3347, Train Steps/Sec: 17.41, Grad Norm: 0.0740 +[2025-02-22 23:04:20] (step=0060900) Train Loss: 0.3343, Train Steps/Sec: 17.28, Grad Norm: 0.0703 +[2025-02-22 23:04:26] (step=0061000) Train Loss: 0.3349, Train Steps/Sec: 17.30, Grad Norm: 0.0786 +[2025-02-22 23:04:32] (step=0061100) Train Loss: 0.3348, Train Steps/Sec: 17.06, Grad Norm: 0.0696 +[2025-02-22 23:04:38] (step=0061200) Train Loss: 0.3347, Train Steps/Sec: 17.08, Grad Norm: 0.0809 +[2025-02-22 23:04:45] (step=0061300) Train Loss: 0.3350, Train Steps/Sec: 14.55, Grad Norm: 0.0754 +[2025-02-22 23:04:50] (step=0061400) Train Loss: 0.3348, Train Steps/Sec: 17.19, Grad Norm: 0.0774 +[2025-02-22 23:04:56] (step=0061500) Train Loss: 0.3342, Train Steps/Sec: 17.42, Grad Norm: 0.0730 +[2025-02-22 23:05:02] (step=0061600) Train Loss: 0.3349, Train Steps/Sec: 17.39, Grad Norm: 0.0728 +[2025-02-22 23:05:09] (step=0061700) Train Loss: 0.3345, Train Steps/Sec: 14.12, Grad Norm: 0.0791 +[2025-02-22 23:05:15] (step=0061800) Train Loss: 0.3345, Train Steps/Sec: 15.35, Grad Norm: 0.0768 +[2025-02-22 23:05:21] (step=0061900) Train Loss: 0.3343, Train Steps/Sec: 17.33, Grad Norm: 0.0716 +[2025-02-22 23:05:27] (step=0062000) Train Loss: 0.3345, Train Steps/Sec: 17.36, Grad Norm: 0.0741 +[2025-02-22 23:05:33] (step=0062100) Train Loss: 0.3351, Train Steps/Sec: 16.04, Grad Norm: 0.0699 +[2025-02-22 23:05:40] (step=0062200) Train Loss: 0.3341, Train Steps/Sec: 14.24, Grad Norm: 0.0781 +[2025-02-22 23:05:46] (step=0062300) Train Loss: 0.3341, Train Steps/Sec: 16.65, Grad Norm: 0.0705 +[2025-02-22 23:05:52] (step=0062400) Train Loss: 0.3346, Train Steps/Sec: 17.41, Grad Norm: 0.0790 +[2025-02-22 23:05:58] (step=0062500) Train Loss: 0.3349, Train Steps/Sec: 17.34, Grad Norm: 0.0743 +[2025-02-22 23:06:05] (step=0062600) Train Loss: 0.3343, Train Steps/Sec: 14.59, Grad Norm: 0.0752 +[2025-02-22 23:06:10] (step=0062700) Train Loss: 0.3348, Train Steps/Sec: 17.24, Grad Norm: 0.0731 +[2025-02-22 23:06:16] (step=0062800) Train Loss: 0.3345, Train Steps/Sec: 16.88, Grad Norm: 0.0769 +[2025-02-22 23:06:22] (step=0062900) Train Loss: 0.3343, Train Steps/Sec: 16.82, Grad Norm: 0.0739 +[2025-02-22 23:06:28] (step=0063000) Train Loss: 0.3344, Train Steps/Sec: 16.82, Grad Norm: 0.0755 +[2025-02-22 23:06:34] (step=0063100) Train Loss: 0.3343, Train Steps/Sec: 16.73, Grad Norm: 0.0743 +[2025-02-22 23:06:40] (step=0063200) Train Loss: 0.3342, Train Steps/Sec: 16.71, Grad Norm: 0.0717 +[2025-02-22 23:06:46] (step=0063300) Train Loss: 0.3342, Train Steps/Sec: 17.19, Grad Norm: 0.0716 +[2025-02-22 23:06:52] (step=0063400) Train Loss: 0.3347, Train Steps/Sec: 17.17, Grad Norm: 0.0739 +[2025-02-22 23:06:58] (step=0063500) Train Loss: 0.3340, Train Steps/Sec: 17.18, Grad Norm: 0.0715 +[2025-02-22 23:07:04] (step=0063600) Train Loss: 0.3346, Train Steps/Sec: 17.08, Grad Norm: 0.0758 +[2025-02-22 23:07:11] (step=0063700) Train Loss: 0.3346, Train Steps/Sec: 14.40, Grad Norm: 0.0686 +[2025-02-22 23:07:17] (step=0063800) Train Loss: 0.3345, Train Steps/Sec: 14.49, Grad Norm: 0.0739 +[2025-02-22 23:07:24] (step=0063900) Train Loss: 0.3336, Train Steps/Sec: 14.41, Grad Norm: 0.0738 +[2025-02-22 23:07:30] (step=0064000) Train Loss: 0.3339, Train Steps/Sec: 17.38, Grad Norm: 0.0633 +[2025-02-22 23:07:37] (step=0064100) Train Loss: 0.3336, Train Steps/Sec: 14.75, Grad Norm: 0.0697 +[2025-02-22 23:07:43] (step=0064200) Train Loss: 0.3341, Train Steps/Sec: 15.85, Grad Norm: 0.0743 +[2025-02-22 23:07:49] (step=0064300) Train Loss: 0.3344, Train Steps/Sec: 16.59, Grad Norm: 0.0743 +[2025-02-22 23:07:55] (step=0064400) Train Loss: 0.3335, Train Steps/Sec: 17.36, Grad Norm: 0.0698 +[2025-02-22 23:08:01] (step=0064500) Train Loss: 0.3340, Train Steps/Sec: 17.38, Grad Norm: 0.0669 +[2025-02-22 23:08:07] (step=0064600) Train Loss: 0.3339, Train Steps/Sec: 17.38, Grad Norm: 0.0723 +[2025-02-22 23:08:12] (step=0064700) Train Loss: 0.3342, Train Steps/Sec: 17.39, Grad Norm: 0.0719 +[2025-02-22 23:08:18] (step=0064800) Train Loss: 0.3334, Train Steps/Sec: 17.44, Grad Norm: 0.0699 +[2025-02-22 23:08:24] (step=0064900) Train Loss: 0.3341, Train Steps/Sec: 17.41, Grad Norm: 0.0736 +[2025-02-22 23:08:30] (step=0065000) Train Loss: 0.3340, Train Steps/Sec: 17.41, Grad Norm: 0.0720 +[2025-02-22 23:08:36] (step=0065100) Train Loss: 0.3339, Train Steps/Sec: 14.56, Grad Norm: 0.0819 +[2025-02-22 23:08:42] (step=0065200) Train Loss: 0.3344, Train Steps/Sec: 17.42, Grad Norm: 0.0715 +[2025-02-22 23:08:48] (step=0065300) Train Loss: 0.3341, Train Steps/Sec: 17.35, Grad Norm: 0.0766 +[2025-02-22 23:08:54] (step=0065400) Train Loss: 0.3343, Train Steps/Sec: 17.38, Grad Norm: 0.0695 +[2025-02-22 23:08:59] (step=0065500) Train Loss: 0.3343, Train Steps/Sec: 17.41, Grad Norm: 0.0758 +[2025-02-22 23:09:05] (step=0065600) Train Loss: 0.3338, Train Steps/Sec: 17.38, Grad Norm: 0.0689 +[2025-02-22 23:09:12] (step=0065700) Train Loss: 0.3342, Train Steps/Sec: 14.06, Grad Norm: 0.0765 +[2025-02-22 23:09:19] (step=0065800) Train Loss: 0.3342, Train Steps/Sec: 15.08, Grad Norm: 0.0727 +[2025-02-22 23:09:25] (step=0065900) Train Loss: 0.3346, Train Steps/Sec: 17.22, Grad Norm: 0.0716 +[2025-02-22 23:09:31] (step=0066000) Train Loss: 0.3344, Train Steps/Sec: 15.22, Grad Norm: 0.0779 +[2025-02-22 23:09:38] (step=0066100) Train Loss: 0.3339, Train Steps/Sec: 15.18, Grad Norm: 0.0761 +[2025-02-22 23:09:44] (step=0066200) Train Loss: 0.3336, Train Steps/Sec: 17.17, Grad Norm: 0.0769 +[2025-02-22 23:09:50] (step=0066300) Train Loss: 0.3339, Train Steps/Sec: 16.36, Grad Norm: 0.0709 +[2025-02-22 23:09:57] (step=0066400) Train Loss: 0.3341, Train Steps/Sec: 14.40, Grad Norm: 0.0769 +[2025-02-22 23:10:03] (step=0066500) Train Loss: 0.3338, Train Steps/Sec: 17.29, Grad Norm: 0.0697 +[2025-02-22 23:10:08] (step=0066600) Train Loss: 0.3343, Train Steps/Sec: 17.33, Grad Norm: 0.0745 +[2025-02-22 23:10:14] (step=0066700) Train Loss: 0.3336, Train Steps/Sec: 17.22, Grad Norm: 0.0687 +[2025-02-22 23:10:20] (step=0066800) Train Loss: 0.3334, Train Steps/Sec: 17.26, Grad Norm: 0.0740 +[2025-02-22 23:10:26] (step=0066900) Train Loss: 0.3334, Train Steps/Sec: 17.23, Grad Norm: 0.0678 +[2025-02-22 23:10:32] (step=0067000) Train Loss: 0.3341, Train Steps/Sec: 17.19, Grad Norm: 0.0733 +[2025-02-22 23:10:37] (step=0067100) Train Loss: 0.3333, Train Steps/Sec: 17.19, Grad Norm: 0.0673 +[2025-02-22 23:10:43] (step=0067200) Train Loss: 0.3339, Train Steps/Sec: 17.26, Grad Norm: 0.0720 +[2025-02-22 23:10:49] (step=0067300) Train Loss: 0.3341, Train Steps/Sec: 17.25, Grad Norm: 0.0684 +[2025-02-22 23:10:55] (step=0067400) Train Loss: 0.3338, Train Steps/Sec: 17.25, Grad Norm: 0.0783 +[2025-02-22 23:11:01] (step=0067500) Train Loss: 0.3339, Train Steps/Sec: 17.25, Grad Norm: 0.0704 +[2025-02-22 23:11:07] (step=0067600) Train Loss: 0.3340, Train Steps/Sec: 14.47, Grad Norm: 0.0698 +[2025-02-22 23:11:15] (step=0067700) Train Loss: 0.3332, Train Steps/Sec: 14.07, Grad Norm: 0.0743 +[2025-02-22 23:11:21] (step=0067800) Train Loss: 0.3343, Train Steps/Sec: 15.29, Grad Norm: 0.0702 +[2025-02-22 23:11:27] (step=0067900) Train Loss: 0.3338, Train Steps/Sec: 16.70, Grad Norm: 0.0715 +[2025-02-22 23:11:34] (step=0068000) Train Loss: 0.3332, Train Steps/Sec: 14.76, Grad Norm: 0.0730 +[2025-02-22 23:11:40] (step=0068100) Train Loss: 0.3331, Train Steps/Sec: 16.55, Grad Norm: 0.0676 +[2025-02-22 23:11:46] (step=0068200) Train Loss: 0.3330, Train Steps/Sec: 16.51, Grad Norm: 0.0759 +[2025-02-22 23:11:52] (step=0068300) Train Loss: 0.3332, Train Steps/Sec: 17.32, Grad Norm: 0.0717 +[2025-02-22 23:11:58] (step=0068400) Train Loss: 0.3337, Train Steps/Sec: 17.25, Grad Norm: 0.0730 +[2025-02-22 23:12:03] (step=0068500) Train Loss: 0.3330, Train Steps/Sec: 17.35, Grad Norm: 0.0728 +[2025-02-22 23:12:09] (step=0068600) Train Loss: 0.3339, Train Steps/Sec: 17.40, Grad Norm: 0.0793 +[2025-02-22 23:12:15] (step=0068700) Train Loss: 0.3335, Train Steps/Sec: 17.33, Grad Norm: 0.0765 +[2025-02-22 23:12:21] (step=0068800) Train Loss: 0.3335, Train Steps/Sec: 17.31, Grad Norm: 0.0743 +[2025-02-22 23:12:28] (step=0068900) Train Loss: 0.3335, Train Steps/Sec: 14.44, Grad Norm: 0.0742 +[2025-02-22 23:12:33] (step=0069000) Train Loss: 0.3337, Train Steps/Sec: 17.31, Grad Norm: 0.0727 +[2025-02-22 23:12:39] (step=0069100) Train Loss: 0.3340, Train Steps/Sec: 17.27, Grad Norm: 0.0720 +[2025-02-22 23:12:45] (step=0069200) Train Loss: 0.3337, Train Steps/Sec: 17.31, Grad Norm: 0.0765 +[2025-02-22 23:12:51] (step=0069300) Train Loss: 0.3329, Train Steps/Sec: 17.38, Grad Norm: 0.0639 +[2025-02-22 23:12:56] (step=0069400) Train Loss: 0.3330, Train Steps/Sec: 17.37, Grad Norm: 0.0701 +[2025-02-22 23:13:02] (step=0069500) Train Loss: 0.3325, Train Steps/Sec: 17.35, Grad Norm: 0.0641 +[2025-02-22 23:13:08] (step=0069600) Train Loss: 0.3332, Train Steps/Sec: 17.35, Grad Norm: 0.0682 +[2025-02-22 23:13:15] (step=0069700) Train Loss: 0.3332, Train Steps/Sec: 14.63, Grad Norm: 0.0745 +[2025-02-22 23:13:22] (step=0069800) Train Loss: 0.3333, Train Steps/Sec: 14.67, Grad Norm: 0.0762 +[2025-02-22 23:13:28] (step=0069900) Train Loss: 0.3338, Train Steps/Sec: 15.40, Grad Norm: 0.0674 +[2025-02-22 23:13:35] (step=0070000) Train Loss: 0.3337, Train Steps/Sec: 14.77, Grad Norm: 0.0720 +[2025-02-22 23:13:42] (step=0070100) Train Loss: 0.3327, Train Steps/Sec: 13.73, Grad Norm: 0.0794 +[2025-02-22 23:13:48] (step=0070200) Train Loss: 0.3326, Train Steps/Sec: 17.18, Grad Norm: 0.0695 +[2025-02-22 23:13:54] (step=0070300) Train Loss: 0.3334, Train Steps/Sec: 17.21, Grad Norm: 0.0657 +[2025-02-22 23:14:00] (step=0070400) Train Loss: 0.3337, Train Steps/Sec: 17.17, Grad Norm: 0.0792 +[2025-02-22 23:14:05] (step=0070500) Train Loss: 0.3335, Train Steps/Sec: 17.19, Grad Norm: 0.0760 +[2025-02-22 23:14:11] (step=0070600) Train Loss: 0.3335, Train Steps/Sec: 17.17, Grad Norm: 0.0715 +[2025-02-22 23:14:17] (step=0070700) Train Loss: 0.3331, Train Steps/Sec: 17.18, Grad Norm: 0.0752 +[2025-02-22 23:14:23] (step=0070800) Train Loss: 0.3337, Train Steps/Sec: 17.31, Grad Norm: 0.0701 +[2025-02-22 23:14:29] (step=0070900) Train Loss: 0.3328, Train Steps/Sec: 17.32, Grad Norm: 0.0729 +[2025-02-22 23:14:34] (step=0071000) Train Loss: 0.3334, Train Steps/Sec: 17.28, Grad Norm: 0.0712 +[2025-02-22 23:14:40] (step=0071100) Train Loss: 0.3331, Train Steps/Sec: 17.28, Grad Norm: 0.0678 +[2025-02-22 23:14:46] (step=0071200) Train Loss: 0.3326, Train Steps/Sec: 17.30, Grad Norm: 0.0769 +[2025-02-22 23:14:52] (step=0071300) Train Loss: 0.3325, Train Steps/Sec: 17.24, Grad Norm: 0.0694 +[2025-02-22 23:14:59] (step=0071400) Train Loss: 0.3338, Train Steps/Sec: 14.35, Grad Norm: 0.0677 +[2025-02-22 23:15:05] (step=0071500) Train Loss: 0.3331, Train Steps/Sec: 17.22, Grad Norm: 0.0727 +[2025-02-22 23:15:10] (step=0071600) Train Loss: 0.3335, Train Steps/Sec: 17.23, Grad Norm: 0.0665 +[2025-02-22 23:15:17] (step=0071700) Train Loss: 0.3328, Train Steps/Sec: 14.57, Grad Norm: 0.0695 +[2025-02-22 23:15:25] (step=0071800) Train Loss: 0.3330, Train Steps/Sec: 13.60, Grad Norm: 0.0669 +[2025-02-22 23:15:31] (step=0071900) Train Loss: 0.3331, Train Steps/Sec: 15.21, Grad Norm: 0.0686 +[2025-02-22 23:15:38] (step=0072000) Train Loss: 0.3332, Train Steps/Sec: 15.81, Grad Norm: 0.0730 +[2025-02-22 23:15:44] (step=0072100) Train Loss: 0.3332, Train Steps/Sec: 16.38, Grad Norm: 0.0783 +[2025-02-22 23:15:49] (step=0072200) Train Loss: 0.3327, Train Steps/Sec: 17.22, Grad Norm: 0.0663 +[2025-02-22 23:15:55] (step=0072300) Train Loss: 0.3330, Train Steps/Sec: 17.21, Grad Norm: 0.0686 +[2025-02-22 23:16:01] (step=0072400) Train Loss: 0.3330, Train Steps/Sec: 17.13, Grad Norm: 0.0664 +[2025-02-22 23:16:07] (step=0072500) Train Loss: 0.3329, Train Steps/Sec: 17.15, Grad Norm: 0.0665 +[2025-02-22 23:16:14] (step=0072600) Train Loss: 0.3331, Train Steps/Sec: 14.38, Grad Norm: 0.0686 +[2025-02-22 23:16:20] (step=0072700) Train Loss: 0.3327, Train Steps/Sec: 17.28, Grad Norm: 0.0633 +[2025-02-22 23:16:25] (step=0072800) Train Loss: 0.3334, Train Steps/Sec: 17.31, Grad Norm: 0.0679 +[2025-02-22 23:16:31] (step=0072900) Train Loss: 0.3329, Train Steps/Sec: 17.26, Grad Norm: 0.0640 +[2025-02-22 23:16:37] (step=0073000) Train Loss: 0.3329, Train Steps/Sec: 17.30, Grad Norm: 0.0749 +[2025-02-22 23:16:43] (step=0073100) Train Loss: 0.3328, Train Steps/Sec: 17.35, Grad Norm: 0.0688 +[2025-02-22 23:16:49] (step=0073200) Train Loss: 0.3331, Train Steps/Sec: 17.35, Grad Norm: 0.0690 +[2025-02-22 23:16:54] (step=0073300) Train Loss: 0.3327, Train Steps/Sec: 17.39, Grad Norm: 0.0720 +[2025-02-22 23:17:00] (step=0073400) Train Loss: 0.3333, Train Steps/Sec: 17.35, Grad Norm: 0.0677 +[2025-02-22 23:17:06] (step=0073500) Train Loss: 0.3339, Train Steps/Sec: 17.30, Grad Norm: 0.0795 +[2025-02-22 23:17:12] (step=0073600) Train Loss: 0.3326, Train Steps/Sec: 17.31, Grad Norm: 0.0713 +[2025-02-22 23:17:18] (step=0073700) Train Loss: 0.3324, Train Steps/Sec: 15.85, Grad Norm: 0.0674 +[2025-02-22 23:17:26] (step=0073800) Train Loss: 0.3328, Train Steps/Sec: 13.12, Grad Norm: 0.0718 +[2025-02-22 23:17:34] (step=0073900) Train Loss: 0.3327, Train Steps/Sec: 12.12, Grad Norm: 0.0680 +[2025-02-22 23:17:40] (step=0074000) Train Loss: 0.3330, Train Steps/Sec: 17.32, Grad Norm: 0.0730 +[2025-02-22 23:17:46] (step=0074100) Train Loss: 0.3326, Train Steps/Sec: 16.52, Grad Norm: 0.0649 +[2025-02-22 23:17:51] (step=0074200) Train Loss: 0.3325, Train Steps/Sec: 17.30, Grad Norm: 0.0659 +[2025-02-22 23:17:57] (step=0074300) Train Loss: 0.3321, Train Steps/Sec: 17.35, Grad Norm: 0.0703 +[2025-02-22 23:18:03] (step=0074400) Train Loss: 0.3332, Train Steps/Sec: 17.37, Grad Norm: 0.0697 +[2025-02-22 23:18:09] (step=0074500) Train Loss: 0.3330, Train Steps/Sec: 17.32, Grad Norm: 0.0618 +[2025-02-22 23:18:15] (step=0074600) Train Loss: 0.3332, Train Steps/Sec: 17.15, Grad Norm: 0.0774 +[2025-02-22 23:18:20] (step=0074700) Train Loss: 0.3338, Train Steps/Sec: 17.25, Grad Norm: 0.0647 +[2025-02-22 23:18:26] (step=0074800) Train Loss: 0.3333, Train Steps/Sec: 17.27, Grad Norm: 0.0697 +[2025-02-22 23:18:32] (step=0074900) Train Loss: 0.3330, Train Steps/Sec: 17.33, Grad Norm: 0.0735 +[2025-02-22 23:18:38] (step=0075000) Train Loss: 0.3320, Train Steps/Sec: 17.36, Grad Norm: 0.0792 +[2025-02-22 23:18:45] (step=0075100) Train Loss: 0.3331, Train Steps/Sec: 14.39, Grad Norm: 0.0687 +[2025-02-22 23:18:50] (step=0075200) Train Loss: 0.3331, Train Steps/Sec: 17.29, Grad Norm: 0.0661 +[2025-02-22 23:18:56] (step=0075300) Train Loss: 0.3329, Train Steps/Sec: 17.21, Grad Norm: 0.0663 +[2025-02-22 23:19:02] (step=0075400) Train Loss: 0.3324, Train Steps/Sec: 17.21, Grad Norm: 0.0672 +[2025-02-22 23:19:08] (step=0075500) Train Loss: 0.3327, Train Steps/Sec: 17.28, Grad Norm: 0.0680 +[2025-02-22 23:19:14] (step=0075600) Train Loss: 0.3328, Train Steps/Sec: 17.36, Grad Norm: 0.0655 +[2025-02-22 23:19:21] (step=0075700) Train Loss: 0.3334, Train Steps/Sec: 14.23, Grad Norm: 0.0679 +[2025-02-22 23:19:29] (step=0075800) Train Loss: 0.3330, Train Steps/Sec: 12.29, Grad Norm: 0.0685 +[2025-02-22 23:19:35] (step=0075900) Train Loss: 0.3324, Train Steps/Sec: 16.57, Grad Norm: 0.0718 +[2025-02-22 23:19:41] (step=0076000) Train Loss: 0.3326, Train Steps/Sec: 16.60, Grad Norm: 0.0673 +[2025-02-22 23:19:47] (step=0076100) Train Loss: 0.3323, Train Steps/Sec: 17.39, Grad Norm: 0.0642 +[2025-02-22 23:19:52] (step=0076200) Train Loss: 0.3324, Train Steps/Sec: 17.33, Grad Norm: 0.0626 +[2025-02-22 23:19:58] (step=0076300) Train Loss: 0.3331, Train Steps/Sec: 17.18, Grad Norm: 0.0685 +[2025-02-22 23:20:05] (step=0076400) Train Loss: 0.3316, Train Steps/Sec: 14.41, Grad Norm: 0.0665 +[2025-02-22 23:20:11] (step=0076500) Train Loss: 0.3328, Train Steps/Sec: 17.38, Grad Norm: 0.0754 +[2025-02-22 23:20:17] (step=0076600) Train Loss: 0.3330, Train Steps/Sec: 17.39, Grad Norm: 0.0723 +[2025-02-22 23:20:22] (step=0076700) Train Loss: 0.3326, Train Steps/Sec: 17.38, Grad Norm: 0.0652 +[2025-02-22 23:20:28] (step=0076800) Train Loss: 0.3323, Train Steps/Sec: 17.35, Grad Norm: 0.0692 +[2025-02-22 23:20:34] (step=0076900) Train Loss: 0.3327, Train Steps/Sec: 17.30, Grad Norm: 0.0679 +[2025-02-22 23:20:40] (step=0077000) Train Loss: 0.3327, Train Steps/Sec: 17.32, Grad Norm: 0.0684 +[2025-02-22 23:20:45] (step=0077100) Train Loss: 0.3323, Train Steps/Sec: 17.33, Grad Norm: 0.0684 +[2025-02-22 23:20:51] (step=0077200) Train Loss: 0.3334, Train Steps/Sec: 17.32, Grad Norm: 0.0696 +[2025-02-22 23:20:57] (step=0077300) Train Loss: 0.3330, Train Steps/Sec: 17.24, Grad Norm: 0.0696 +[2025-02-22 23:21:03] (step=0077400) Train Loss: 0.3331, Train Steps/Sec: 17.22, Grad Norm: 0.0617 +[2025-02-22 23:21:09] (step=0077500) Train Loss: 0.3315, Train Steps/Sec: 17.24, Grad Norm: 0.0702 +[2025-02-22 23:21:16] (step=0077600) Train Loss: 0.3328, Train Steps/Sec: 13.89, Grad Norm: 0.0669 +[2025-02-22 23:21:23] (step=0077700) Train Loss: 0.3326, Train Steps/Sec: 13.21, Grad Norm: 0.0677 +[2025-02-22 23:21:31] (step=0077800) Train Loss: 0.3331, Train Steps/Sec: 13.62, Grad Norm: 0.0668 +[2025-02-22 23:21:37] (step=0077900) Train Loss: 0.3328, Train Steps/Sec: 16.47, Grad Norm: 0.0673 +[2025-02-22 23:21:43] (step=0078000) Train Loss: 0.3324, Train Steps/Sec: 16.45, Grad Norm: 0.0639 +[2025-02-22 23:21:49] (step=0078100) Train Loss: 0.3321, Train Steps/Sec: 17.23, Grad Norm: 0.0673 +[2025-02-22 23:21:55] (step=0078200) Train Loss: 0.3330, Train Steps/Sec: 17.17, Grad Norm: 0.0682 +[2025-02-22 23:22:00] (step=0078300) Train Loss: 0.3323, Train Steps/Sec: 17.27, Grad Norm: 0.0647 +[2025-02-22 23:22:06] (step=0078400) Train Loss: 0.3327, Train Steps/Sec: 17.29, Grad Norm: 0.0711 +[2025-02-22 23:22:12] (step=0078500) Train Loss: 0.3313, Train Steps/Sec: 17.34, Grad Norm: 0.0658 +[2025-02-22 23:22:18] (step=0078600) Train Loss: 0.3327, Train Steps/Sec: 17.36, Grad Norm: 0.0784 +[2025-02-22 23:22:23] (step=0078700) Train Loss: 0.3322, Train Steps/Sec: 17.32, Grad Norm: 0.0767 +[2025-02-22 23:22:29] (step=0078800) Train Loss: 0.3326, Train Steps/Sec: 17.25, Grad Norm: 0.0650 +[2025-02-22 23:22:36] (step=0078900) Train Loss: 0.3325, Train Steps/Sec: 14.43, Grad Norm: 0.0643 +[2025-02-22 23:22:42] (step=0079000) Train Loss: 0.3325, Train Steps/Sec: 17.24, Grad Norm: 0.0687 +[2025-02-22 23:22:48] (step=0079100) Train Loss: 0.3320, Train Steps/Sec: 17.26, Grad Norm: 0.0653 +[2025-02-22 23:22:54] (step=0079200) Train Loss: 0.3323, Train Steps/Sec: 17.26, Grad Norm: 0.0716 +[2025-02-22 23:22:59] (step=0079300) Train Loss: 0.3320, Train Steps/Sec: 17.20, Grad Norm: 0.0669 +[2025-02-22 23:23:05] (step=0079400) Train Loss: 0.3326, Train Steps/Sec: 17.28, Grad Norm: 0.0718 +[2025-02-22 23:23:11] (step=0079500) Train Loss: 0.3320, Train Steps/Sec: 17.31, Grad Norm: 0.0622 +[2025-02-22 23:23:17] (step=0079600) Train Loss: 0.3325, Train Steps/Sec: 15.91, Grad Norm: 0.0609 +[2025-02-22 23:23:25] (step=0079700) Train Loss: 0.3318, Train Steps/Sec: 12.71, Grad Norm: 0.0644 +[2025-02-22 23:23:32] (step=0079800) Train Loss: 0.3322, Train Steps/Sec: 14.50, Grad Norm: 0.0689 +[2025-02-22 23:23:38] (step=0079900) Train Loss: 0.3325, Train Steps/Sec: 15.71, Grad Norm: 0.0645 +[2025-02-22 23:23:44] (step=0080000) Train Loss: 0.3319, Train Steps/Sec: 17.22, Grad Norm: 0.0620 +[2025-02-22 23:23:51] (step=0080100) Train Loss: 0.3327, Train Steps/Sec: 14.30, Grad Norm: 0.0694 +[2025-02-22 23:23:57] (step=0080200) Train Loss: 0.3319, Train Steps/Sec: 17.23, Grad Norm: 0.0706 +[2025-02-22 23:24:03] (step=0080300) Train Loss: 0.3319, Train Steps/Sec: 17.22, Grad Norm: 0.0642 +[2025-02-22 23:24:09] (step=0080400) Train Loss: 0.3319, Train Steps/Sec: 17.35, Grad Norm: 0.0655 +[2025-02-22 23:24:14] (step=0080500) Train Loss: 0.3319, Train Steps/Sec: 17.28, Grad Norm: 0.0692 +[2025-02-22 23:24:20] (step=0080600) Train Loss: 0.3318, Train Steps/Sec: 17.27, Grad Norm: 0.0609 +[2025-02-22 23:24:26] (step=0080700) Train Loss: 0.3325, Train Steps/Sec: 17.20, Grad Norm: 0.0712 +[2025-02-22 23:24:32] (step=0080800) Train Loss: 0.3321, Train Steps/Sec: 17.20, Grad Norm: 0.0624 +[2025-02-22 23:24:38] (step=0080900) Train Loss: 0.3323, Train Steps/Sec: 17.28, Grad Norm: 0.0653 +[2025-02-22 23:24:43] (step=0081000) Train Loss: 0.3324, Train Steps/Sec: 17.38, Grad Norm: 0.0638 +[2025-02-22 23:24:49] (step=0081100) Train Loss: 0.3329, Train Steps/Sec: 17.43, Grad Norm: 0.0666 +[2025-02-22 23:24:55] (step=0081200) Train Loss: 0.3320, Train Steps/Sec: 17.30, Grad Norm: 0.0750 +[2025-02-22 23:25:01] (step=0081300) Train Loss: 0.3316, Train Steps/Sec: 17.26, Grad Norm: 0.0619 +[2025-02-22 23:25:08] (step=0081400) Train Loss: 0.3327, Train Steps/Sec: 13.98, Grad Norm: 0.0731 +[2025-02-22 23:25:14] (step=0081500) Train Loss: 0.3319, Train Steps/Sec: 15.94, Grad Norm: 0.0658 +[2025-02-22 23:25:21] (step=0081600) Train Loss: 0.3321, Train Steps/Sec: 15.34, Grad Norm: 0.0656 +[2025-02-22 23:25:27] (step=0081700) Train Loss: 0.3320, Train Steps/Sec: 14.75, Grad Norm: 0.0716 +[2025-02-22 23:25:34] (step=0081800) Train Loss: 0.3321, Train Steps/Sec: 14.69, Grad Norm: 0.0713 +[2025-02-22 23:25:40] (step=0081900) Train Loss: 0.3322, Train Steps/Sec: 15.89, Grad Norm: 0.0629 +[2025-02-22 23:25:46] (step=0082000) Train Loss: 0.3316, Train Steps/Sec: 17.41, Grad Norm: 0.0685 +[2025-02-22 23:25:52] (step=0082100) Train Loss: 0.3321, Train Steps/Sec: 17.30, Grad Norm: 0.0691 +[2025-02-22 23:25:58] (step=0082200) Train Loss: 0.3316, Train Steps/Sec: 17.39, Grad Norm: 0.0682 +[2025-02-22 23:26:04] (step=0082300) Train Loss: 0.3321, Train Steps/Sec: 17.38, Grad Norm: 0.0614 +[2025-02-22 23:26:09] (step=0082400) Train Loss: 0.3317, Train Steps/Sec: 17.37, Grad Norm: 0.0655 +[2025-02-22 23:26:15] (step=0082500) Train Loss: 0.3324, Train Steps/Sec: 17.31, Grad Norm: 0.0655 +[2025-02-22 23:26:22] (step=0082600) Train Loss: 0.3318, Train Steps/Sec: 14.30, Grad Norm: 0.0733 +[2025-02-22 23:26:28] (step=0082700) Train Loss: 0.3321, Train Steps/Sec: 17.15, Grad Norm: 0.0636 +[2025-02-22 23:26:34] (step=0082800) Train Loss: 0.3320, Train Steps/Sec: 17.27, Grad Norm: 0.0652 +[2025-02-22 23:26:39] (step=0082900) Train Loss: 0.3323, Train Steps/Sec: 17.25, Grad Norm: 0.0627 +[2025-02-22 23:26:45] (step=0083000) Train Loss: 0.3326, Train Steps/Sec: 17.16, Grad Norm: 0.0643 +[2025-02-22 23:26:51] (step=0083100) Train Loss: 0.3315, Train Steps/Sec: 17.13, Grad Norm: 0.0623 +[2025-02-22 23:26:57] (step=0083200) Train Loss: 0.3328, Train Steps/Sec: 17.16, Grad Norm: 0.0609 +[2025-02-22 23:27:03] (step=0083300) Train Loss: 0.3319, Train Steps/Sec: 17.19, Grad Norm: 0.0614 +[2025-02-22 23:27:09] (step=0083400) Train Loss: 0.3318, Train Steps/Sec: 17.24, Grad Norm: 0.0602 +[2025-02-22 23:27:15] (step=0083500) Train Loss: 0.3319, Train Steps/Sec: 15.19, Grad Norm: 0.0703 +[2025-02-22 23:27:22] (step=0083600) Train Loss: 0.3319, Train Steps/Sec: 14.68, Grad Norm: 0.0614 +[2025-02-22 23:27:28] (step=0083700) Train Loss: 0.3316, Train Steps/Sec: 15.90, Grad Norm: 0.0646 +[2025-02-22 23:27:35] (step=0083800) Train Loss: 0.3320, Train Steps/Sec: 14.02, Grad Norm: 0.0615 +[2025-02-22 23:27:43] (step=0083900) Train Loss: 0.3317, Train Steps/Sec: 13.32, Grad Norm: 0.0695 +[2025-02-22 23:27:49] (step=0084000) Train Loss: 0.3314, Train Steps/Sec: 17.19, Grad Norm: 0.0724 +[2025-02-22 23:27:55] (step=0084100) Train Loss: 0.3320, Train Steps/Sec: 17.13, Grad Norm: 0.0602 +[2025-02-22 23:28:00] (step=0084200) Train Loss: 0.3314, Train Steps/Sec: 17.18, Grad Norm: 0.0678 +[2025-02-22 23:28:06] (step=0084300) Train Loss: 0.3316, Train Steps/Sec: 17.22, Grad Norm: 0.0644 +[2025-02-22 23:28:12] (step=0084400) Train Loss: 0.3321, Train Steps/Sec: 17.30, Grad Norm: 0.0608 +[2025-02-22 23:28:18] (step=0084500) Train Loss: 0.3320, Train Steps/Sec: 17.24, Grad Norm: 0.0706 +[2025-02-22 23:28:24] (step=0084600) Train Loss: 0.3318, Train Steps/Sec: 17.28, Grad Norm: 0.0689 +[2025-02-22 23:28:29] (step=0084700) Train Loss: 0.3326, Train Steps/Sec: 17.27, Grad Norm: 0.0678 +[2025-02-22 23:28:35] (step=0084800) Train Loss: 0.3320, Train Steps/Sec: 17.31, Grad Norm: 0.0629 +[2025-02-22 23:28:41] (step=0084900) Train Loss: 0.3319, Train Steps/Sec: 17.26, Grad Norm: 0.0619 +[2025-02-22 23:28:47] (step=0085000) Train Loss: 0.3315, Train Steps/Sec: 17.21, Grad Norm: 0.0613 +[2025-02-22 23:28:54] (step=0085100) Train Loss: 0.3319, Train Steps/Sec: 14.32, Grad Norm: 0.0655 +[2025-02-22 23:29:00] (step=0085200) Train Loss: 0.3313, Train Steps/Sec: 17.27, Grad Norm: 0.0670 +[2025-02-22 23:29:05] (step=0085300) Train Loss: 0.3320, Train Steps/Sec: 17.25, Grad Norm: 0.0735 +[2025-02-22 23:29:12] (step=0085400) Train Loss: 0.3313, Train Steps/Sec: 15.90, Grad Norm: 0.0628 +[2025-02-22 23:29:18] (step=0085500) Train Loss: 0.3314, Train Steps/Sec: 15.13, Grad Norm: 0.0652 +[2025-02-22 23:29:25] (step=0085600) Train Loss: 0.3318, Train Steps/Sec: 15.87, Grad Norm: 0.0676 +[2025-02-22 23:29:31] (step=0085700) Train Loss: 0.3315, Train Steps/Sec: 15.30, Grad Norm: 0.0684 +[2025-02-22 23:29:38] (step=0085800) Train Loss: 0.3317, Train Steps/Sec: 14.03, Grad Norm: 0.0606 +[2025-02-22 23:29:44] (step=0085900) Train Loss: 0.3319, Train Steps/Sec: 16.46, Grad Norm: 0.0702 +[2025-02-22 23:29:50] (step=0086000) Train Loss: 0.3325, Train Steps/Sec: 17.31, Grad Norm: 0.0665 +[2025-02-22 23:29:56] (step=0086100) Train Loss: 0.3311, Train Steps/Sec: 17.29, Grad Norm: 0.0672 +[2025-02-22 23:30:02] (step=0086200) Train Loss: 0.3317, Train Steps/Sec: 17.26, Grad Norm: 0.0642 +[2025-02-22 23:30:07] (step=0086300) Train Loss: 0.3315, Train Steps/Sec: 17.11, Grad Norm: 0.0650 +[2025-02-22 23:30:14] (step=0086400) Train Loss: 0.3320, Train Steps/Sec: 14.29, Grad Norm: 0.0624 +[2025-02-22 23:30:20] (step=0086500) Train Loss: 0.3315, Train Steps/Sec: 17.10, Grad Norm: 0.0710 +[2025-02-22 23:30:26] (step=0086600) Train Loss: 0.3319, Train Steps/Sec: 17.16, Grad Norm: 0.0600 +[2025-02-22 23:30:32] (step=0086700) Train Loss: 0.3314, Train Steps/Sec: 17.18, Grad Norm: 0.0649 +[2025-02-22 23:30:38] (step=0086800) Train Loss: 0.3315, Train Steps/Sec: 17.30, Grad Norm: 0.0619 +[2025-02-22 23:30:44] (step=0086900) Train Loss: 0.3318, Train Steps/Sec: 17.28, Grad Norm: 0.0681 +[2025-02-22 23:30:49] (step=0087000) Train Loss: 0.3316, Train Steps/Sec: 17.16, Grad Norm: 0.0672 +[2025-02-22 23:30:55] (step=0087100) Train Loss: 0.3322, Train Steps/Sec: 17.09, Grad Norm: 0.0607 +[2025-02-22 23:31:01] (step=0087200) Train Loss: 0.3318, Train Steps/Sec: 17.13, Grad Norm: 0.0658 +[2025-02-22 23:31:07] (step=0087300) Train Loss: 0.3311, Train Steps/Sec: 17.11, Grad Norm: 0.0683 +[2025-02-22 23:31:14] (step=0087400) Train Loss: 0.3317, Train Steps/Sec: 15.16, Grad Norm: 0.0631 +[2025-02-22 23:31:20] (step=0087500) Train Loss: 0.3313, Train Steps/Sec: 14.56, Grad Norm: 0.0641 +[2025-02-22 23:31:27] (step=0087600) Train Loss: 0.3317, Train Steps/Sec: 14.15, Grad Norm: 0.0619 +[2025-02-22 23:31:34] (step=0087700) Train Loss: 0.3318, Train Steps/Sec: 15.18, Grad Norm: 0.0629 +[2025-02-22 23:31:41] (step=0087800) Train Loss: 0.3320, Train Steps/Sec: 14.55, Grad Norm: 0.0627 +[2025-02-22 23:31:47] (step=0087900) Train Loss: 0.3313, Train Steps/Sec: 16.44, Grad Norm: 0.0608 +[2025-02-22 23:31:53] (step=0088000) Train Loss: 0.3313, Train Steps/Sec: 17.23, Grad Norm: 0.0643 +[2025-02-22 23:31:59] (step=0088100) Train Loss: 0.3316, Train Steps/Sec: 17.20, Grad Norm: 0.0598 +[2025-02-22 23:32:04] (step=0088200) Train Loss: 0.3316, Train Steps/Sec: 17.14, Grad Norm: 0.0615 +[2025-02-22 23:32:10] (step=0088300) Train Loss: 0.3321, Train Steps/Sec: 17.16, Grad Norm: 0.0634 +[2025-02-22 23:32:16] (step=0088400) Train Loss: 0.3312, Train Steps/Sec: 17.25, Grad Norm: 0.0694 +[2025-02-22 23:32:22] (step=0088500) Train Loss: 0.3314, Train Steps/Sec: 17.28, Grad Norm: 0.0589 +[2025-02-22 23:32:28] (step=0088600) Train Loss: 0.3313, Train Steps/Sec: 17.33, Grad Norm: 0.0611 +[2025-02-22 23:32:33] (step=0088700) Train Loss: 0.3314, Train Steps/Sec: 17.30, Grad Norm: 0.0688 +[2025-02-22 23:32:39] (step=0088800) Train Loss: 0.3321, Train Steps/Sec: 17.17, Grad Norm: 0.0695 +[2025-02-22 23:32:46] (step=0088900) Train Loss: 0.3319, Train Steps/Sec: 14.37, Grad Norm: 0.0662 +[2025-02-22 23:32:52] (step=0089000) Train Loss: 0.3313, Train Steps/Sec: 17.10, Grad Norm: 0.0639 +[2025-02-22 23:32:58] (step=0089100) Train Loss: 0.3314, Train Steps/Sec: 17.20, Grad Norm: 0.0662 +[2025-02-22 23:33:04] (step=0089200) Train Loss: 0.3317, Train Steps/Sec: 17.20, Grad Norm: 0.0647 +[2025-02-22 23:33:10] (step=0089300) Train Loss: 0.3312, Train Steps/Sec: 15.83, Grad Norm: 0.0616 +[2025-02-22 23:33:17] (step=0089400) Train Loss: 0.3318, Train Steps/Sec: 14.05, Grad Norm: 0.0686 +[2025-02-22 23:33:23] (step=0089500) Train Loss: 0.3312, Train Steps/Sec: 17.16, Grad Norm: 0.0616 +[2025-02-22 23:33:29] (step=0089600) Train Loss: 0.3317, Train Steps/Sec: 17.20, Grad Norm: 0.0619 +[2025-02-22 23:33:35] (step=0089700) Train Loss: 0.3315, Train Steps/Sec: 15.21, Grad Norm: 0.0616 +[2025-02-22 23:33:42] (step=0089800) Train Loss: 0.3317, Train Steps/Sec: 14.10, Grad Norm: 0.0613 +[2025-02-22 23:33:49] (step=0089900) Train Loss: 0.3309, Train Steps/Sec: 16.55, Grad Norm: 0.0703 +[2025-02-22 23:33:54] (step=0090000) Train Loss: 0.3310, Train Steps/Sec: 17.38, Grad Norm: 0.0631 +[2025-02-22 23:34:01] (step=0090100) Train Loss: 0.3318, Train Steps/Sec: 14.34, Grad Norm: 0.0640 +[2025-02-22 23:34:07] (step=0090200) Train Loss: 0.3312, Train Steps/Sec: 17.25, Grad Norm: 0.0633 +[2025-02-22 23:34:13] (step=0090300) Train Loss: 0.3313, Train Steps/Sec: 17.27, Grad Norm: 0.0618 +[2025-02-22 23:34:19] (step=0090400) Train Loss: 0.3311, Train Steps/Sec: 17.39, Grad Norm: 0.0592 +[2025-02-22 23:34:24] (step=0090500) Train Loss: 0.3315, Train Steps/Sec: 17.37, Grad Norm: 0.0673 +[2025-02-22 23:34:30] (step=0090600) Train Loss: 0.3307, Train Steps/Sec: 17.33, Grad Norm: 0.0563 +[2025-02-22 23:34:36] (step=0090700) Train Loss: 0.3318, Train Steps/Sec: 17.34, Grad Norm: 0.0628 +[2025-02-22 23:34:42] (step=0090800) Train Loss: 0.3309, Train Steps/Sec: 17.38, Grad Norm: 0.0635 +[2025-02-22 23:34:47] (step=0090900) Train Loss: 0.3316, Train Steps/Sec: 17.39, Grad Norm: 0.0654 +[2025-02-22 23:34:53] (step=0091000) Train Loss: 0.3316, Train Steps/Sec: 17.34, Grad Norm: 0.0620 +[2025-02-22 23:34:59] (step=0091100) Train Loss: 0.3312, Train Steps/Sec: 17.31, Grad Norm: 0.0634 +[2025-02-22 23:35:05] (step=0091200) Train Loss: 0.3315, Train Steps/Sec: 16.62, Grad Norm: 0.0622 +[2025-02-22 23:35:11] (step=0091300) Train Loss: 0.3310, Train Steps/Sec: 15.77, Grad Norm: 0.0659 +[2025-02-22 23:35:19] (step=0091400) Train Loss: 0.3309, Train Steps/Sec: 12.97, Grad Norm: 0.0596 +[2025-02-22 23:35:25] (step=0091500) Train Loss: 0.3309, Train Steps/Sec: 17.31, Grad Norm: 0.0639 +[2025-02-22 23:35:31] (step=0091600) Train Loss: 0.3314, Train Steps/Sec: 17.27, Grad Norm: 0.0652 +[2025-02-22 23:35:37] (step=0091700) Train Loss: 0.3307, Train Steps/Sec: 15.31, Grad Norm: 0.0622 +[2025-02-22 23:35:44] (step=0091800) Train Loss: 0.3308, Train Steps/Sec: 14.11, Grad Norm: 0.0644 +[2025-02-22 23:35:50] (step=0091900) Train Loss: 0.3308, Train Steps/Sec: 16.52, Grad Norm: 0.0642 +[2025-02-22 23:35:56] (step=0092000) Train Loss: 0.3309, Train Steps/Sec: 17.30, Grad Norm: 0.0627 +[2025-02-22 23:36:02] (step=0092100) Train Loss: 0.3311, Train Steps/Sec: 17.27, Grad Norm: 0.0692 +[2025-02-22 23:36:08] (step=0092200) Train Loss: 0.3311, Train Steps/Sec: 17.25, Grad Norm: 0.0643 +[2025-02-22 23:36:13] (step=0092300) Train Loss: 0.3304, Train Steps/Sec: 17.32, Grad Norm: 0.0572 +[2025-02-22 23:36:19] (step=0092400) Train Loss: 0.3308, Train Steps/Sec: 17.37, Grad Norm: 0.0665 +[2025-02-22 23:36:25] (step=0092500) Train Loss: 0.3309, Train Steps/Sec: 17.41, Grad Norm: 0.0629 +[2025-02-22 23:36:32] (step=0092600) Train Loss: 0.3310, Train Steps/Sec: 14.42, Grad Norm: 0.0703 +[2025-02-22 23:36:38] (step=0092700) Train Loss: 0.3306, Train Steps/Sec: 17.23, Grad Norm: 0.0589 +[2025-02-22 23:36:43] (step=0092800) Train Loss: 0.3305, Train Steps/Sec: 17.16, Grad Norm: 0.0678 +[2025-02-22 23:36:49] (step=0092900) Train Loss: 0.3312, Train Steps/Sec: 17.14, Grad Norm: 0.0606 +[2025-02-22 23:36:55] (step=0093000) Train Loss: 0.3313, Train Steps/Sec: 17.19, Grad Norm: 0.0613 +[2025-02-22 23:37:01] (step=0093100) Train Loss: 0.3317, Train Steps/Sec: 17.18, Grad Norm: 0.0653 +[2025-02-22 23:37:08] (step=0093200) Train Loss: 0.3309, Train Steps/Sec: 15.19, Grad Norm: 0.0679 +[2025-02-22 23:37:14] (step=0093300) Train Loss: 0.3307, Train Steps/Sec: 14.62, Grad Norm: 0.0674 +[2025-02-22 23:37:20] (step=0093400) Train Loss: 0.3317, Train Steps/Sec: 17.23, Grad Norm: 0.0613 +[2025-02-22 23:37:26] (step=0093500) Train Loss: 0.3309, Train Steps/Sec: 17.26, Grad Norm: 0.0618 +[2025-02-22 23:37:32] (step=0093600) Train Loss: 0.3305, Train Steps/Sec: 16.54, Grad Norm: 0.0618 +[2025-02-22 23:37:38] (step=0093700) Train Loss: 0.3303, Train Steps/Sec: 16.44, Grad Norm: 0.0588 +[2025-02-22 23:37:46] (step=0093800) Train Loss: 0.3311, Train Steps/Sec: 13.49, Grad Norm: 0.0650 +[2025-02-22 23:37:53] (step=0093900) Train Loss: 0.3308, Train Steps/Sec: 13.84, Grad Norm: 0.0652 +[2025-02-22 23:37:59] (step=0094000) Train Loss: 0.3308, Train Steps/Sec: 17.15, Grad Norm: 0.0661 +[2025-02-22 23:38:04] (step=0094100) Train Loss: 0.3308, Train Steps/Sec: 17.16, Grad Norm: 0.0621 +[2025-02-22 23:38:10] (step=0094200) Train Loss: 0.3306, Train Steps/Sec: 17.17, Grad Norm: 0.0585 +[2025-02-22 23:38:16] (step=0094300) Train Loss: 0.3311, Train Steps/Sec: 17.12, Grad Norm: 0.0609 +[2025-02-22 23:38:22] (step=0094400) Train Loss: 0.3311, Train Steps/Sec: 17.16, Grad Norm: 0.0626 +[2025-02-22 23:38:28] (step=0094500) Train Loss: 0.3308, Train Steps/Sec: 17.25, Grad Norm: 0.0657 +[2025-02-22 23:38:34] (step=0094600) Train Loss: 0.3313, Train Steps/Sec: 17.23, Grad Norm: 0.0574 +[2025-02-22 23:38:39] (step=0094700) Train Loss: 0.3309, Train Steps/Sec: 17.25, Grad Norm: 0.0655 +[2025-02-22 23:38:45] (step=0094800) Train Loss: 0.3307, Train Steps/Sec: 17.20, Grad Norm: 0.0619 +[2025-02-22 23:38:51] (step=0094900) Train Loss: 0.3306, Train Steps/Sec: 17.13, Grad Norm: 0.0586 +[2025-02-22 23:38:57] (step=0095000) Train Loss: 0.3312, Train Steps/Sec: 17.18, Grad Norm: 0.0587 +[2025-02-22 23:39:04] (step=0095100) Train Loss: 0.3313, Train Steps/Sec: 13.77, Grad Norm: 0.0652 +[2025-02-22 23:39:10] (step=0095200) Train Loss: 0.3309, Train Steps/Sec: 15.70, Grad Norm: 0.0649 +[2025-02-22 23:39:17] (step=0095300) Train Loss: 0.3312, Train Steps/Sec: 14.60, Grad Norm: 0.0611 +[2025-02-22 23:39:23] (step=0095400) Train Loss: 0.3311, Train Steps/Sec: 17.27, Grad Norm: 0.0661 +[2025-02-22 23:39:29] (step=0095500) Train Loss: 0.3310, Train Steps/Sec: 17.37, Grad Norm: 0.0672 +[2025-02-22 23:39:35] (step=0095600) Train Loss: 0.3305, Train Steps/Sec: 16.61, Grad Norm: 0.0659 +[2025-02-22 23:39:41] (step=0095700) Train Loss: 0.3307, Train Steps/Sec: 16.56, Grad Norm: 0.0582 +[2025-02-22 23:39:48] (step=0095800) Train Loss: 0.3308, Train Steps/Sec: 13.47, Grad Norm: 0.0615 +[2025-02-22 23:39:54] (step=0095900) Train Loss: 0.3311, Train Steps/Sec: 17.17, Grad Norm: 0.0601 +[2025-02-22 23:40:00] (step=0096000) Train Loss: 0.3311, Train Steps/Sec: 16.41, Grad Norm: 0.0651 +[2025-02-22 23:40:06] (step=0096100) Train Loss: 0.3310, Train Steps/Sec: 17.16, Grad Norm: 0.0600 +[2025-02-22 23:40:12] (step=0096200) Train Loss: 0.3302, Train Steps/Sec: 17.27, Grad Norm: 0.0582 +[2025-02-22 23:40:18] (step=0096300) Train Loss: 0.3307, Train Steps/Sec: 17.24, Grad Norm: 0.0734 +[2025-02-22 23:40:25] (step=0096400) Train Loss: 0.3306, Train Steps/Sec: 14.50, Grad Norm: 0.0630 +[2025-02-22 23:40:30] (step=0096500) Train Loss: 0.3311, Train Steps/Sec: 17.30, Grad Norm: 0.0599 +[2025-02-22 23:40:36] (step=0096600) Train Loss: 0.3308, Train Steps/Sec: 17.34, Grad Norm: 0.0616 +[2025-02-22 23:40:42] (step=0096700) Train Loss: 0.3304, Train Steps/Sec: 17.39, Grad Norm: 0.0662 +[2025-02-22 23:40:48] (step=0096800) Train Loss: 0.3305, Train Steps/Sec: 17.39, Grad Norm: 0.0670 +[2025-02-22 23:40:53] (step=0096900) Train Loss: 0.3310, Train Steps/Sec: 17.43, Grad Norm: 0.0611 +[2025-02-22 23:40:59] (step=0097000) Train Loss: 0.3306, Train Steps/Sec: 17.40, Grad Norm: 0.0616 +[2025-02-22 23:41:06] (step=0097100) Train Loss: 0.3311, Train Steps/Sec: 15.29, Grad Norm: 0.0643 +[2025-02-22 23:41:12] (step=0097200) Train Loss: 0.3308, Train Steps/Sec: 14.67, Grad Norm: 0.0657 +[2025-02-22 23:41:18] (step=0097300) Train Loss: 0.3313, Train Steps/Sec: 17.30, Grad Norm: 0.0618 +[2025-02-22 23:41:24] (step=0097400) Train Loss: 0.3307, Train Steps/Sec: 17.24, Grad Norm: 0.0597 +[2025-02-22 23:41:30] (step=0097500) Train Loss: 0.3311, Train Steps/Sec: 16.45, Grad Norm: 0.0632 +[2025-02-22 23:41:37] (step=0097600) Train Loss: 0.3303, Train Steps/Sec: 14.36, Grad Norm: 0.0633 +[2025-02-22 23:41:43] (step=0097700) Train Loss: 0.3307, Train Steps/Sec: 16.44, Grad Norm: 0.0620 +[2025-02-22 23:41:51] (step=0097800) Train Loss: 0.3311, Train Steps/Sec: 13.49, Grad Norm: 0.0594 +[2025-02-22 23:41:56] (step=0097900) Train Loss: 0.3304, Train Steps/Sec: 17.30, Grad Norm: 0.0612 +[2025-02-22 23:42:02] (step=0098000) Train Loss: 0.3306, Train Steps/Sec: 16.40, Grad Norm: 0.0600 +[2025-02-22 23:42:08] (step=0098100) Train Loss: 0.3312, Train Steps/Sec: 17.22, Grad Norm: 0.0573 +[2025-02-22 23:42:14] (step=0098200) Train Loss: 0.3306, Train Steps/Sec: 17.20, Grad Norm: 0.0601 +[2025-02-22 23:42:20] (step=0098300) Train Loss: 0.3304, Train Steps/Sec: 17.21, Grad Norm: 0.0669 +[2025-02-22 23:42:26] (step=0098400) Train Loss: 0.3308, Train Steps/Sec: 17.13, Grad Norm: 0.0657 +[2025-02-22 23:42:32] (step=0098500) Train Loss: 0.3301, Train Steps/Sec: 17.11, Grad Norm: 0.0634 +[2025-02-22 23:42:37] (step=0098600) Train Loss: 0.3308, Train Steps/Sec: 17.17, Grad Norm: 0.0622 +[2025-02-22 23:42:43] (step=0098700) Train Loss: 0.3301, Train Steps/Sec: 17.11, Grad Norm: 0.0733 +[2025-02-22 23:42:49] (step=0098800) Train Loss: 0.3309, Train Steps/Sec: 17.02, Grad Norm: 0.0688 +[2025-02-22 23:42:56] (step=0098900) Train Loss: 0.3303, Train Steps/Sec: 14.52, Grad Norm: 0.0586 +[2025-02-22 23:43:02] (step=0099000) Train Loss: 0.3308, Train Steps/Sec: 15.91, Grad Norm: 0.0675 +[2025-02-22 23:43:09] (step=0099100) Train Loss: 0.3305, Train Steps/Sec: 14.57, Grad Norm: 0.0595 +[2025-02-22 23:43:15] (step=0099200) Train Loss: 0.3304, Train Steps/Sec: 16.50, Grad Norm: 0.0659 +[2025-02-22 23:43:21] (step=0099300) Train Loss: 0.3310, Train Steps/Sec: 17.27, Grad Norm: 0.0628 +[2025-02-22 23:43:27] (step=0099400) Train Loss: 0.3307, Train Steps/Sec: 17.13, Grad Norm: 0.0609 +[2025-02-22 23:43:33] (step=0099500) Train Loss: 0.3304, Train Steps/Sec: 16.57, Grad Norm: 0.0592 +[2025-02-22 23:43:39] (step=0099600) Train Loss: 0.3308, Train Steps/Sec: 17.20, Grad Norm: 0.0634 +[2025-02-22 23:43:45] (step=0099700) Train Loss: 0.3305, Train Steps/Sec: 17.19, Grad Norm: 0.0654 +[2025-02-22 23:43:52] (step=0099800) Train Loss: 0.3305, Train Steps/Sec: 13.46, Grad Norm: 0.0625 +[2025-02-22 23:43:58] (step=0099900) Train Loss: 0.3303, Train Steps/Sec: 16.49, Grad Norm: 0.0647 +[2025-02-22 23:44:04] (step=0100000) Train Loss: 0.3305, Train Steps/Sec: 16.35, Grad Norm: 0.0528 +[2025-02-22 23:44:05] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0100000.pt +[2025-02-22 23:44:12] (step=0100100) Train Loss: 0.3306, Train Steps/Sec: 12.75, Grad Norm: 0.0640 +[2025-02-22 23:44:18] (step=0100200) Train Loss: 0.3308, Train Steps/Sec: 17.20, Grad Norm: 0.0617 +[2025-02-22 23:44:24] (step=0100300) Train Loss: 0.3304, Train Steps/Sec: 17.25, Grad Norm: 0.0590 +[2025-02-22 23:44:29] (step=0100400) Train Loss: 0.3310, Train Steps/Sec: 17.34, Grad Norm: 0.0648 +[2025-02-22 23:44:35] (step=0100500) Train Loss: 0.3298, Train Steps/Sec: 17.44, Grad Norm: 0.0579 +[2025-02-22 23:44:41] (step=0100600) Train Loss: 0.3300, Train Steps/Sec: 17.44, Grad Norm: 0.0672 +[2025-02-22 23:44:47] (step=0100700) Train Loss: 0.3300, Train Steps/Sec: 17.46, Grad Norm: 0.0556 +[2025-02-22 23:44:52] (step=0100800) Train Loss: 0.3309, Train Steps/Sec: 17.42, Grad Norm: 0.0668 +[2025-02-22 23:44:58] (step=0100900) Train Loss: 0.3309, Train Steps/Sec: 17.40, Grad Norm: 0.0587 +[2025-02-22 23:45:05] (step=0101000) Train Loss: 0.3306, Train Steps/Sec: 15.36, Grad Norm: 0.0604 +[2025-02-22 23:45:11] (step=0101100) Train Loss: 0.3304, Train Steps/Sec: 14.78, Grad Norm: 0.0574 +[2025-02-22 23:45:17] (step=0101200) Train Loss: 0.3307, Train Steps/Sec: 17.43, Grad Norm: 0.0577 +[2025-02-22 23:45:23] (step=0101300) Train Loss: 0.3303, Train Steps/Sec: 17.27, Grad Norm: 0.0626 +[2025-02-22 23:45:30] (step=0101400) Train Loss: 0.3308, Train Steps/Sec: 13.72, Grad Norm: 0.0620 +[2025-02-22 23:45:36] (step=0101500) Train Loss: 0.3303, Train Steps/Sec: 17.26, Grad Norm: 0.0626 +[2025-02-22 23:45:42] (step=0101600) Train Loss: 0.3306, Train Steps/Sec: 17.13, Grad Norm: 0.0622 +[2025-02-22 23:45:48] (step=0101700) Train Loss: 0.3302, Train Steps/Sec: 16.47, Grad Norm: 0.0559 +[2025-02-22 23:45:55] (step=0101800) Train Loss: 0.3305, Train Steps/Sec: 13.95, Grad Norm: 0.0599 +[2025-02-22 23:46:01] (step=0101900) Train Loss: 0.3304, Train Steps/Sec: 16.49, Grad Norm: 0.0629 +[2025-02-22 23:46:07] (step=0102000) Train Loss: 0.3304, Train Steps/Sec: 16.52, Grad Norm: 0.0629 +[2025-02-22 23:46:13] (step=0102100) Train Loss: 0.3311, Train Steps/Sec: 17.28, Grad Norm: 0.0614 +[2025-02-22 23:46:19] (step=0102200) Train Loss: 0.3306, Train Steps/Sec: 17.24, Grad Norm: 0.0607 +[2025-02-22 23:46:25] (step=0102300) Train Loss: 0.3300, Train Steps/Sec: 17.30, Grad Norm: 0.0584 +[2025-02-22 23:46:30] (step=0102400) Train Loss: 0.3305, Train Steps/Sec: 17.23, Grad Norm: 0.0633 +[2025-02-22 23:46:36] (step=0102500) Train Loss: 0.3308, Train Steps/Sec: 17.24, Grad Norm: 0.0606 +[2025-02-22 23:46:43] (step=0102600) Train Loss: 0.3305, Train Steps/Sec: 14.03, Grad Norm: 0.0563 +[2025-02-22 23:46:49] (step=0102700) Train Loss: 0.3302, Train Steps/Sec: 17.24, Grad Norm: 0.0621 +[2025-02-22 23:46:55] (step=0102800) Train Loss: 0.3306, Train Steps/Sec: 17.24, Grad Norm: 0.0628 +[2025-02-22 23:47:01] (step=0102900) Train Loss: 0.3310, Train Steps/Sec: 15.96, Grad Norm: 0.0570 +[2025-02-22 23:47:08] (step=0103000) Train Loss: 0.3299, Train Steps/Sec: 14.65, Grad Norm: 0.0604 +[2025-02-22 23:47:14] (step=0103100) Train Loss: 0.3303, Train Steps/Sec: 16.61, Grad Norm: 0.0661 +[2025-02-22 23:47:20] (step=0103200) Train Loss: 0.3305, Train Steps/Sec: 17.38, Grad Norm: 0.0614 +[2025-02-22 23:47:26] (step=0103300) Train Loss: 0.3304, Train Steps/Sec: 16.64, Grad Norm: 0.0593 +[2025-02-22 23:47:32] (step=0103400) Train Loss: 0.3307, Train Steps/Sec: 17.48, Grad Norm: 0.0582 +[2025-02-22 23:47:37] (step=0103500) Train Loss: 0.3303, Train Steps/Sec: 17.43, Grad Norm: 0.0653 +[2025-02-22 23:47:43] (step=0103600) Train Loss: 0.3304, Train Steps/Sec: 17.41, Grad Norm: 0.0590 +[2025-02-22 23:47:49] (step=0103700) Train Loss: 0.3307, Train Steps/Sec: 17.47, Grad Norm: 0.0521 +[2025-02-22 23:47:56] (step=0103800) Train Loss: 0.3303, Train Steps/Sec: 14.16, Grad Norm: 0.0638 +[2025-02-22 23:48:03] (step=0103900) Train Loss: 0.3308, Train Steps/Sec: 13.34, Grad Norm: 0.0642 +[2025-02-22 23:48:09] (step=0104000) Train Loss: 0.3303, Train Steps/Sec: 16.54, Grad Norm: 0.0647 +[2025-02-22 23:48:15] (step=0104100) Train Loss: 0.3305, Train Steps/Sec: 17.24, Grad Norm: 0.0598 +[2025-02-22 23:48:21] (step=0104200) Train Loss: 0.3298, Train Steps/Sec: 17.15, Grad Norm: 0.0640 +[2025-02-22 23:48:27] (step=0104300) Train Loss: 0.3295, Train Steps/Sec: 17.08, Grad Norm: 0.0616 +[2025-02-22 23:48:33] (step=0104400) Train Loss: 0.3307, Train Steps/Sec: 17.10, Grad Norm: 0.0624 +[2025-02-22 23:48:39] (step=0104500) Train Loss: 0.3301, Train Steps/Sec: 17.09, Grad Norm: 0.0519 +[2025-02-22 23:48:44] (step=0104600) Train Loss: 0.3304, Train Steps/Sec: 17.11, Grad Norm: 0.0597 +[2025-02-22 23:48:50] (step=0104700) Train Loss: 0.3305, Train Steps/Sec: 17.23, Grad Norm: 0.0616 +[2025-02-22 23:48:56] (step=0104800) Train Loss: 0.3307, Train Steps/Sec: 17.14, Grad Norm: 0.0618 +[2025-02-22 23:49:03] (step=0104900) Train Loss: 0.3293, Train Steps/Sec: 15.17, Grad Norm: 0.0615 +[2025-02-22 23:49:10] (step=0105000) Train Loss: 0.3299, Train Steps/Sec: 14.53, Grad Norm: 0.0613 +[2025-02-22 23:49:17] (step=0105100) Train Loss: 0.3303, Train Steps/Sec: 14.12, Grad Norm: 0.0605 +[2025-02-22 23:49:22] (step=0105200) Train Loss: 0.3298, Train Steps/Sec: 17.30, Grad Norm: 0.0627 +[2025-02-22 23:49:28] (step=0105300) Train Loss: 0.3303, Train Steps/Sec: 16.49, Grad Norm: 0.0547 +[2025-02-22 23:49:34] (step=0105400) Train Loss: 0.3304, Train Steps/Sec: 17.25, Grad Norm: 0.0619 +[2025-02-22 23:49:40] (step=0105500) Train Loss: 0.3303, Train Steps/Sec: 17.22, Grad Norm: 0.0688 +[2025-02-22 23:49:46] (step=0105600) Train Loss: 0.3301, Train Steps/Sec: 17.14, Grad Norm: 0.0696 +[2025-02-22 23:49:52] (step=0105700) Train Loss: 0.3305, Train Steps/Sec: 17.14, Grad Norm: 0.0602 +[2025-02-22 23:49:59] (step=0105800) Train Loss: 0.3300, Train Steps/Sec: 13.88, Grad Norm: 0.0582 +[2025-02-22 23:50:05] (step=0105900) Train Loss: 0.3302, Train Steps/Sec: 15.61, Grad Norm: 0.0603 +[2025-02-22 23:50:11] (step=0106000) Train Loss: 0.3290, Train Steps/Sec: 16.35, Grad Norm: 0.0617 +[2025-02-22 23:50:17] (step=0106100) Train Loss: 0.3297, Train Steps/Sec: 17.13, Grad Norm: 0.0601 +[2025-02-22 23:50:23] (step=0106200) Train Loss: 0.3303, Train Steps/Sec: 17.13, Grad Norm: 0.0559 +[2025-02-22 23:50:29] (step=0106300) Train Loss: 0.3307, Train Steps/Sec: 17.05, Grad Norm: 0.0627 +[2025-02-22 23:50:36] (step=0106400) Train Loss: 0.3302, Train Steps/Sec: 14.14, Grad Norm: 0.0635 +[2025-02-22 23:50:42] (step=0106500) Train Loss: 0.3304, Train Steps/Sec: 17.08, Grad Norm: 0.0566 +[2025-02-22 23:50:48] (step=0106600) Train Loss: 0.3305, Train Steps/Sec: 16.97, Grad Norm: 0.0618 +[2025-02-22 23:50:54] (step=0106700) Train Loss: 0.3301, Train Steps/Sec: 17.06, Grad Norm: 0.0655 +[2025-02-22 23:51:00] (step=0106800) Train Loss: 0.3300, Train Steps/Sec: 15.73, Grad Norm: 0.0568 +[2025-02-22 23:51:07] (step=0106900) Train Loss: 0.3300, Train Steps/Sec: 14.62, Grad Norm: 0.0643 +[2025-02-22 23:51:13] (step=0107000) Train Loss: 0.3302, Train Steps/Sec: 16.53, Grad Norm: 0.0547 +[2025-02-22 23:51:19] (step=0107100) Train Loss: 0.3308, Train Steps/Sec: 17.23, Grad Norm: 0.0563 +[2025-02-22 23:51:25] (step=0107200) Train Loss: 0.3303, Train Steps/Sec: 17.20, Grad Norm: 0.0567 +[2025-02-22 23:51:31] (step=0107300) Train Loss: 0.3298, Train Steps/Sec: 16.45, Grad Norm: 0.0595 +[2025-02-22 23:51:36] (step=0107400) Train Loss: 0.3305, Train Steps/Sec: 17.16, Grad Norm: 0.0564 +[2025-02-22 23:51:42] (step=0107500) Train Loss: 0.3296, Train Steps/Sec: 17.22, Grad Norm: 0.0600 +[2025-02-22 23:51:49] (step=0107600) Train Loss: 0.3302, Train Steps/Sec: 14.33, Grad Norm: 0.0583 +[2025-02-22 23:51:55] (step=0107700) Train Loss: 0.3301, Train Steps/Sec: 17.14, Grad Norm: 0.0576 +[2025-02-22 23:52:02] (step=0107800) Train Loss: 0.3296, Train Steps/Sec: 13.84, Grad Norm: 0.0579 +[2025-02-22 23:52:08] (step=0107900) Train Loss: 0.3293, Train Steps/Sec: 16.36, Grad Norm: 0.0554 +[2025-02-22 23:52:15] (step=0108000) Train Loss: 0.3295, Train Steps/Sec: 16.39, Grad Norm: 0.0618 +[2025-02-22 23:52:20] (step=0108100) Train Loss: 0.3296, Train Steps/Sec: 17.16, Grad Norm: 0.0591 +[2025-02-22 23:52:26] (step=0108200) Train Loss: 0.3296, Train Steps/Sec: 17.16, Grad Norm: 0.0602 +[2025-02-22 23:52:32] (step=0108300) Train Loss: 0.3304, Train Steps/Sec: 17.36, Grad Norm: 0.0534 +[2025-02-22 23:52:38] (step=0108400) Train Loss: 0.3300, Train Steps/Sec: 17.38, Grad Norm: 0.0607 +[2025-02-22 23:52:43] (step=0108500) Train Loss: 0.3302, Train Steps/Sec: 17.36, Grad Norm: 0.0547 +[2025-02-22 23:52:49] (step=0108600) Train Loss: 0.3301, Train Steps/Sec: 17.36, Grad Norm: 0.0607 +[2025-02-22 23:52:55] (step=0108700) Train Loss: 0.3305, Train Steps/Sec: 16.68, Grad Norm: 0.0581 +[2025-02-22 23:53:02] (step=0108800) Train Loss: 0.3302, Train Steps/Sec: 15.88, Grad Norm: 0.0610 +[2025-02-22 23:53:09] (step=0108900) Train Loss: 0.3300, Train Steps/Sec: 12.56, Grad Norm: 0.0544 +[2025-02-22 23:53:15] (step=0109000) Train Loss: 0.3295, Train Steps/Sec: 17.31, Grad Norm: 0.0579 +[2025-02-22 23:53:21] (step=0109100) Train Loss: 0.3299, Train Steps/Sec: 17.35, Grad Norm: 0.0592 +[2025-02-22 23:53:27] (step=0109200) Train Loss: 0.3307, Train Steps/Sec: 16.63, Grad Norm: 0.0560 +[2025-02-22 23:53:33] (step=0109300) Train Loss: 0.3298, Train Steps/Sec: 17.38, Grad Norm: 0.0568 +[2025-02-22 23:53:39] (step=0109400) Train Loss: 0.3302, Train Steps/Sec: 17.42, Grad Norm: 0.0597 +[2025-02-22 23:53:44] (step=0109500) Train Loss: 0.3300, Train Steps/Sec: 17.43, Grad Norm: 0.0611 +[2025-02-22 23:53:50] (step=0109600) Train Loss: 0.3299, Train Steps/Sec: 17.41, Grad Norm: 0.0574 +[2025-02-22 23:53:56] (step=0109700) Train Loss: 0.3301, Train Steps/Sec: 17.34, Grad Norm: 0.0564 +[2025-02-22 23:54:03] (step=0109800) Train Loss: 0.3305, Train Steps/Sec: 14.03, Grad Norm: 0.0595 +[2025-02-22 23:54:09] (step=0109900) Train Loss: 0.3297, Train Steps/Sec: 15.87, Grad Norm: 0.0575 +[2025-02-22 23:54:15] (step=0110000) Train Loss: 0.3303, Train Steps/Sec: 16.57, Grad Norm: 0.0573 +[2025-02-22 23:54:22] (step=0110100) Train Loss: 0.3301, Train Steps/Sec: 14.40, Grad Norm: 0.0571 +[2025-02-22 23:54:28] (step=0110200) Train Loss: 0.3303, Train Steps/Sec: 17.38, Grad Norm: 0.0656 +[2025-02-22 23:54:34] (step=0110300) Train Loss: 0.3301, Train Steps/Sec: 17.42, Grad Norm: 0.0584 +[2025-02-22 23:54:39] (step=0110400) Train Loss: 0.3298, Train Steps/Sec: 17.39, Grad Norm: 0.0568 +[2025-02-22 23:54:45] (step=0110500) Train Loss: 0.3303, Train Steps/Sec: 17.44, Grad Norm: 0.0606 +[2025-02-22 23:54:51] (step=0110600) Train Loss: 0.3297, Train Steps/Sec: 17.45, Grad Norm: 0.0596 +[2025-02-22 23:54:57] (step=0110700) Train Loss: 0.3303, Train Steps/Sec: 15.96, Grad Norm: 0.0575 +[2025-02-22 23:55:04] (step=0110800) Train Loss: 0.3298, Train Steps/Sec: 14.65, Grad Norm: 0.0538 +[2025-02-22 23:55:10] (step=0110900) Train Loss: 0.3296, Train Steps/Sec: 16.59, Grad Norm: 0.0557 +[2025-02-22 23:55:16] (step=0111000) Train Loss: 0.3298, Train Steps/Sec: 17.34, Grad Norm: 0.0577 +[2025-02-22 23:55:22] (step=0111100) Train Loss: 0.3302, Train Steps/Sec: 17.32, Grad Norm: 0.0575 +[2025-02-22 23:55:28] (step=0111200) Train Loss: 0.3298, Train Steps/Sec: 16.58, Grad Norm: 0.0567 +[2025-02-22 23:55:33] (step=0111300) Train Loss: 0.3296, Train Steps/Sec: 17.32, Grad Norm: 0.0525 +[2025-02-22 23:55:40] (step=0111400) Train Loss: 0.3293, Train Steps/Sec: 14.31, Grad Norm: 0.0655 +[2025-02-22 23:55:46] (step=0111500) Train Loss: 0.3302, Train Steps/Sec: 17.31, Grad Norm: 0.0557 +[2025-02-22 23:55:52] (step=0111600) Train Loss: 0.3298, Train Steps/Sec: 17.31, Grad Norm: 0.0575 +[2025-02-22 23:55:58] (step=0111700) Train Loss: 0.3302, Train Steps/Sec: 17.42, Grad Norm: 0.0570 +[2025-02-22 23:56:05] (step=0111800) Train Loss: 0.3303, Train Steps/Sec: 14.66, Grad Norm: 0.0581 +[2025-02-22 23:56:11] (step=0111900) Train Loss: 0.3302, Train Steps/Sec: 15.95, Grad Norm: 0.0594 +[2025-02-22 23:56:17] (step=0112000) Train Loss: 0.3297, Train Steps/Sec: 16.62, Grad Norm: 0.0637 +[2025-02-22 23:56:23] (step=0112100) Train Loss: 0.3303, Train Steps/Sec: 17.44, Grad Norm: 0.0610 +[2025-02-22 23:56:28] (step=0112200) Train Loss: 0.3296, Train Steps/Sec: 17.41, Grad Norm: 0.0569 +[2025-02-22 23:56:34] (step=0112300) Train Loss: 0.3302, Train Steps/Sec: 17.43, Grad Norm: 0.0591 +[2025-02-22 23:56:40] (step=0112400) Train Loss: 0.3300, Train Steps/Sec: 17.43, Grad Norm: 0.0597 +[2025-02-22 23:56:46] (step=0112500) Train Loss: 0.3297, Train Steps/Sec: 17.40, Grad Norm: 0.0590 +[2025-02-22 23:56:53] (step=0112600) Train Loss: 0.3294, Train Steps/Sec: 13.41, Grad Norm: 0.0574 +[2025-02-22 23:57:00] (step=0112700) Train Loss: 0.3299, Train Steps/Sec: 15.26, Grad Norm: 0.0537 +[2025-02-22 23:57:06] (step=0112800) Train Loss: 0.3299, Train Steps/Sec: 15.88, Grad Norm: 0.0534 +[2025-02-22 23:57:12] (step=0112900) Train Loss: 0.3294, Train Steps/Sec: 17.42, Grad Norm: 0.0623 +[2025-02-22 23:57:17] (step=0113000) Train Loss: 0.3301, Train Steps/Sec: 17.51, Grad Norm: 0.0528 +[2025-02-22 23:57:23] (step=0113100) Train Loss: 0.3297, Train Steps/Sec: 16.70, Grad Norm: 0.0591 +[2025-02-22 23:57:29] (step=0113200) Train Loss: 0.3299, Train Steps/Sec: 17.51, Grad Norm: 0.0559 +[2025-02-22 23:57:35] (step=0113300) Train Loss: 0.3298, Train Steps/Sec: 17.42, Grad Norm: 0.0549 +[2025-02-22 23:57:40] (step=0113400) Train Loss: 0.3299, Train Steps/Sec: 17.38, Grad Norm: 0.0613 +[2025-02-22 23:57:46] (step=0113500) Train Loss: 0.3297, Train Steps/Sec: 17.41, Grad Norm: 0.0523 +[2025-02-22 23:57:52] (step=0113600) Train Loss: 0.3299, Train Steps/Sec: 17.42, Grad Norm: 0.0538 +[2025-02-22 23:57:58] (step=0113700) Train Loss: 0.3296, Train Steps/Sec: 17.47, Grad Norm: 0.0544 +[2025-02-22 23:58:04] (step=0113800) Train Loss: 0.3297, Train Steps/Sec: 15.20, Grad Norm: 0.0611 +[2025-02-22 23:58:12] (step=0113900) Train Loss: 0.3294, Train Steps/Sec: 12.57, Grad Norm: 0.0557 +[2025-02-22 23:58:18] (step=0114000) Train Loss: 0.3296, Train Steps/Sec: 16.57, Grad Norm: 0.0615 +[2025-02-22 23:58:24] (step=0114100) Train Loss: 0.3298, Train Steps/Sec: 17.35, Grad Norm: 0.0614 +[2025-02-22 23:58:30] (step=0114200) Train Loss: 0.3297, Train Steps/Sec: 17.39, Grad Norm: 0.0578 +[2025-02-22 23:58:36] (step=0114300) Train Loss: 0.3300, Train Steps/Sec: 17.37, Grad Norm: 0.0552 +[2025-02-22 23:58:41] (step=0114400) Train Loss: 0.3297, Train Steps/Sec: 17.32, Grad Norm: 0.0547 +[2025-02-22 23:58:47] (step=0114500) Train Loss: 0.3294, Train Steps/Sec: 17.24, Grad Norm: 0.0588 +[2025-02-22 23:58:54] (step=0114600) Train Loss: 0.3294, Train Steps/Sec: 15.32, Grad Norm: 0.0544 +[2025-02-22 23:59:00] (step=0114700) Train Loss: 0.3290, Train Steps/Sec: 15.38, Grad Norm: 0.0624 +[2025-02-22 23:59:06] (step=0114800) Train Loss: 0.3298, Train Steps/Sec: 16.69, Grad Norm: 0.0570 +[2025-02-22 23:59:12] (step=0114900) Train Loss: 0.3297, Train Steps/Sec: 17.46, Grad Norm: 0.0609 +[2025-02-22 23:59:18] (step=0115000) Train Loss: 0.3293, Train Steps/Sec: 17.42, Grad Norm: 0.0565 +[2025-02-22 23:59:25] (step=0115100) Train Loss: 0.3297, Train Steps/Sec: 13.87, Grad Norm: 0.0571 +[2025-02-22 23:59:31] (step=0115200) Train Loss: 0.3302, Train Steps/Sec: 17.11, Grad Norm: 0.0585 +[2025-02-22 23:59:36] (step=0115300) Train Loss: 0.3292, Train Steps/Sec: 17.38, Grad Norm: 0.0586 +[2025-02-22 23:59:42] (step=0115400) Train Loss: 0.3295, Train Steps/Sec: 17.43, Grad Norm: 0.0572 +[2025-02-22 23:59:48] (step=0115500) Train Loss: 0.3289, Train Steps/Sec: 17.33, Grad Norm: 0.0615 +[2025-02-22 23:59:54] (step=0115600) Train Loss: 0.3296, Train Steps/Sec: 17.36, Grad Norm: 0.0602 +[2025-02-22 23:59:59] (step=0115700) Train Loss: 0.3301, Train Steps/Sec: 17.46, Grad Norm: 0.0556 +[2025-02-23 00:00:06] (step=0115800) Train Loss: 0.3300, Train Steps/Sec: 15.20, Grad Norm: 0.0573 +[2025-02-23 00:00:13] (step=0115900) Train Loss: 0.3300, Train Steps/Sec: 14.50, Grad Norm: 0.0590 +[2025-02-23 00:00:19] (step=0116000) Train Loss: 0.3296, Train Steps/Sec: 16.34, Grad Norm: 0.0569 +[2025-02-23 00:00:25] (step=0116100) Train Loss: 0.3292, Train Steps/Sec: 17.13, Grad Norm: 0.0525 +[2025-02-23 00:00:31] (step=0116200) Train Loss: 0.3296, Train Steps/Sec: 17.08, Grad Norm: 0.0578 +[2025-02-23 00:00:37] (step=0116300) Train Loss: 0.3290, Train Steps/Sec: 17.08, Grad Norm: 0.0569 +[2025-02-23 00:00:44] (step=0116400) Train Loss: 0.3296, Train Steps/Sec: 14.35, Grad Norm: 0.0554 +[2025-02-23 00:00:50] (step=0116500) Train Loss: 0.3295, Train Steps/Sec: 16.00, Grad Norm: 0.0537 +[2025-02-23 00:00:56] (step=0116600) Train Loss: 0.3293, Train Steps/Sec: 15.35, Grad Norm: 0.0616 +[2025-02-23 00:01:03] (step=0116700) Train Loss: 0.3291, Train Steps/Sec: 15.90, Grad Norm: 0.0555 +[2025-02-23 00:01:08] (step=0116800) Train Loss: 0.3303, Train Steps/Sec: 17.38, Grad Norm: 0.0602 +[2025-02-23 00:01:14] (step=0116900) Train Loss: 0.3291, Train Steps/Sec: 17.37, Grad Norm: 0.0595 +[2025-02-23 00:01:20] (step=0117000) Train Loss: 0.3296, Train Steps/Sec: 16.65, Grad Norm: 0.0575 +[2025-02-23 00:01:26] (step=0117100) Train Loss: 0.3294, Train Steps/Sec: 17.39, Grad Norm: 0.0563 +[2025-02-23 00:01:32] (step=0117200) Train Loss: 0.3299, Train Steps/Sec: 17.40, Grad Norm: 0.0569 +[2025-02-23 00:01:37] (step=0117300) Train Loss: 0.3295, Train Steps/Sec: 17.42, Grad Norm: 0.0539 +[2025-02-23 00:01:43] (step=0117400) Train Loss: 0.3295, Train Steps/Sec: 17.36, Grad Norm: 0.0575 +[2025-02-23 00:01:49] (step=0117500) Train Loss: 0.3301, Train Steps/Sec: 17.37, Grad Norm: 0.0588 +[2025-02-23 00:01:56] (step=0117600) Train Loss: 0.3297, Train Steps/Sec: 14.46, Grad Norm: 0.0572 +[2025-02-23 00:02:02] (step=0117700) Train Loss: 0.3296, Train Steps/Sec: 17.01, Grad Norm: 0.0578 +[2025-02-23 00:02:08] (step=0117800) Train Loss: 0.3290, Train Steps/Sec: 15.00, Grad Norm: 0.0582 +[2025-02-23 00:02:15] (step=0117900) Train Loss: 0.3300, Train Steps/Sec: 14.49, Grad Norm: 0.0576 +[2025-02-23 00:02:21] (step=0118000) Train Loss: 0.3291, Train Steps/Sec: 16.45, Grad Norm: 0.0548 +[2025-02-23 00:02:27] (step=0118100) Train Loss: 0.3293, Train Steps/Sec: 17.16, Grad Norm: 0.0556 +[2025-02-23 00:02:33] (step=0118200) Train Loss: 0.3298, Train Steps/Sec: 17.18, Grad Norm: 0.0593 +[2025-02-23 00:02:39] (step=0118300) Train Loss: 0.3297, Train Steps/Sec: 17.16, Grad Norm: 0.0552 +[2025-02-23 00:02:45] (step=0118400) Train Loss: 0.3296, Train Steps/Sec: 17.20, Grad Norm: 0.0534 +[2025-02-23 00:02:51] (step=0118500) Train Loss: 0.3292, Train Steps/Sec: 15.16, Grad Norm: 0.0540 +[2025-02-23 00:02:58] (step=0118600) Train Loss: 0.3292, Train Steps/Sec: 14.70, Grad Norm: 0.0596 +[2025-02-23 00:03:04] (step=0118700) Train Loss: 0.3295, Train Steps/Sec: 17.37, Grad Norm: 0.0532 +[2025-02-23 00:03:10] (step=0118800) Train Loss: 0.3296, Train Steps/Sec: 17.41, Grad Norm: 0.0539 +[2025-02-23 00:03:17] (step=0118900) Train Loss: 0.3301, Train Steps/Sec: 13.86, Grad Norm: 0.0589 +[2025-02-23 00:03:23] (step=0119000) Train Loss: 0.3290, Train Steps/Sec: 17.41, Grad Norm: 0.0587 +[2025-02-23 00:03:28] (step=0119100) Train Loss: 0.3296, Train Steps/Sec: 17.39, Grad Norm: 0.0598 +[2025-02-23 00:03:34] (step=0119200) Train Loss: 0.3292, Train Steps/Sec: 17.39, Grad Norm: 0.0506 +[2025-02-23 00:03:40] (step=0119300) Train Loss: 0.3296, Train Steps/Sec: 17.13, Grad Norm: 0.0654 +[2025-02-23 00:03:46] (step=0119400) Train Loss: 0.3293, Train Steps/Sec: 17.36, Grad Norm: 0.0554 +[2025-02-23 00:03:51] (step=0119500) Train Loss: 0.3290, Train Steps/Sec: 17.31, Grad Norm: 0.0530 +[2025-02-23 00:03:57] (step=0119600) Train Loss: 0.3291, Train Steps/Sec: 17.26, Grad Norm: 0.0665 +[2025-02-23 00:04:03] (step=0119700) Train Loss: 0.3295, Train Steps/Sec: 17.27, Grad Norm: 0.0620 +[2025-02-23 00:04:09] (step=0119800) Train Loss: 0.3295, Train Steps/Sec: 15.74, Grad Norm: 0.0634 +[2025-02-23 00:04:16] (step=0119900) Train Loss: 0.3297, Train Steps/Sec: 14.04, Grad Norm: 0.0554 +[2025-02-23 00:04:22] (step=0120000) Train Loss: 0.3300, Train Steps/Sec: 16.63, Grad Norm: 0.0554 +[2025-02-23 00:04:29] (step=0120100) Train Loss: 0.3293, Train Steps/Sec: 14.33, Grad Norm: 0.0566 +[2025-02-23 00:04:35] (step=0120200) Train Loss: 0.3297, Train Steps/Sec: 17.24, Grad Norm: 0.0542 +[2025-02-23 00:04:41] (step=0120300) Train Loss: 0.3295, Train Steps/Sec: 17.34, Grad Norm: 0.0562 +[2025-02-23 00:04:47] (step=0120400) Train Loss: 0.3292, Train Steps/Sec: 15.92, Grad Norm: 0.0589 +[2025-02-23 00:04:54] (step=0120500) Train Loss: 0.3293, Train Steps/Sec: 15.40, Grad Norm: 0.0553 +[2025-02-23 00:05:00] (step=0120600) Train Loss: 0.3294, Train Steps/Sec: 15.90, Grad Norm: 0.0570 +[2025-02-23 00:05:06] (step=0120700) Train Loss: 0.3289, Train Steps/Sec: 17.25, Grad Norm: 0.0636 +[2025-02-23 00:05:12] (step=0120800) Train Loss: 0.3292, Train Steps/Sec: 17.32, Grad Norm: 0.0560 +[2025-02-23 00:05:18] (step=0120900) Train Loss: 0.3290, Train Steps/Sec: 16.56, Grad Norm: 0.0541 +[2025-02-23 00:05:23] (step=0121000) Train Loss: 0.3290, Train Steps/Sec: 17.35, Grad Norm: 0.0529 +[2025-02-23 00:05:29] (step=0121100) Train Loss: 0.3297, Train Steps/Sec: 17.31, Grad Norm: 0.0515 +[2025-02-23 00:05:35] (step=0121200) Train Loss: 0.3292, Train Steps/Sec: 17.29, Grad Norm: 0.0549 +[2025-02-23 00:05:41] (step=0121300) Train Loss: 0.3293, Train Steps/Sec: 17.35, Grad Norm: 0.0558 +[2025-02-23 00:05:48] (step=0121400) Train Loss: 0.3291, Train Steps/Sec: 14.32, Grad Norm: 0.0598 +[2025-02-23 00:05:54] (step=0121500) Train Loss: 0.3294, Train Steps/Sec: 17.27, Grad Norm: 0.0594 +[2025-02-23 00:05:59] (step=0121600) Train Loss: 0.3289, Train Steps/Sec: 17.39, Grad Norm: 0.0522 +[2025-02-23 00:06:05] (step=0121700) Train Loss: 0.3288, Train Steps/Sec: 17.29, Grad Norm: 0.0556 +[2025-02-23 00:06:12] (step=0121800) Train Loss: 0.3295, Train Steps/Sec: 15.15, Grad Norm: 0.0578 +[2025-02-23 00:06:19] (step=0121900) Train Loss: 0.3295, Train Steps/Sec: 14.62, Grad Norm: 0.0564 +[2025-02-23 00:06:25] (step=0122000) Train Loss: 0.3288, Train Steps/Sec: 16.74, Grad Norm: 0.0545 +[2025-02-23 00:06:30] (step=0122100) Train Loss: 0.3294, Train Steps/Sec: 17.35, Grad Norm: 0.0578 +[2025-02-23 00:06:36] (step=0122200) Train Loss: 0.3297, Train Steps/Sec: 17.40, Grad Norm: 0.0568 +[2025-02-23 00:06:42] (step=0122300) Train Loss: 0.3290, Train Steps/Sec: 17.38, Grad Norm: 0.0529 +[2025-02-23 00:06:48] (step=0122400) Train Loss: 0.3291, Train Steps/Sec: 15.33, Grad Norm: 0.0531 +[2025-02-23 00:06:55] (step=0122500) Train Loss: 0.3293, Train Steps/Sec: 14.79, Grad Norm: 0.0602 +[2025-02-23 00:07:02] (step=0122600) Train Loss: 0.3292, Train Steps/Sec: 14.41, Grad Norm: 0.0567 +[2025-02-23 00:07:08] (step=0122700) Train Loss: 0.3297, Train Steps/Sec: 17.09, Grad Norm: 0.0559 +[2025-02-23 00:07:14] (step=0122800) Train Loss: 0.3294, Train Steps/Sec: 16.47, Grad Norm: 0.0519 +[2025-02-23 00:07:20] (step=0122900) Train Loss: 0.3288, Train Steps/Sec: 17.16, Grad Norm: 0.0570 +[2025-02-23 00:07:26] (step=0123000) Train Loss: 0.3293, Train Steps/Sec: 17.30, Grad Norm: 0.0556 +[2025-02-23 00:07:31] (step=0123100) Train Loss: 0.3290, Train Steps/Sec: 17.41, Grad Norm: 0.0553 +[2025-02-23 00:07:37] (step=0123200) Train Loss: 0.3296, Train Steps/Sec: 17.46, Grad Norm: 0.0564 +[2025-02-23 00:07:43] (step=0123300) Train Loss: 0.3290, Train Steps/Sec: 17.42, Grad Norm: 0.0539 +[2025-02-23 00:07:49] (step=0123400) Train Loss: 0.3287, Train Steps/Sec: 17.38, Grad Norm: 0.0543 +[2025-02-23 00:07:54] (step=0123500) Train Loss: 0.3291, Train Steps/Sec: 17.31, Grad Norm: 0.0557 +[2025-02-23 00:08:00] (step=0123600) Train Loss: 0.3294, Train Steps/Sec: 17.31, Grad Norm: 0.0536 +[2025-02-23 00:08:06] (step=0123700) Train Loss: 0.3291, Train Steps/Sec: 17.29, Grad Norm: 0.0506 +[2025-02-23 00:08:12] (step=0123800) Train Loss: 0.3289, Train Steps/Sec: 15.83, Grad Norm: 0.0585 +[2025-02-23 00:08:21] (step=0123900) Train Loss: 0.3292, Train Steps/Sec: 11.97, Grad Norm: 0.0558 +[2025-02-23 00:08:27] (step=0124000) Train Loss: 0.3286, Train Steps/Sec: 16.48, Grad Norm: 0.0530 +[2025-02-23 00:08:32] (step=0124100) Train Loss: 0.3288, Train Steps/Sec: 17.15, Grad Norm: 0.0515 +[2025-02-23 00:08:38] (step=0124200) Train Loss: 0.3292, Train Steps/Sec: 17.14, Grad Norm: 0.0581 +[2025-02-23 00:08:45] (step=0124300) Train Loss: 0.3294, Train Steps/Sec: 15.74, Grad Norm: 0.0556 +[2025-02-23 00:08:51] (step=0124400) Train Loss: 0.3291, Train Steps/Sec: 15.32, Grad Norm: 0.0566 +[2025-02-23 00:08:57] (step=0124500) Train Loss: 0.3286, Train Steps/Sec: 16.61, Grad Norm: 0.0529 +[2025-02-23 00:09:03] (step=0124600) Train Loss: 0.3288, Train Steps/Sec: 17.27, Grad Norm: 0.0522 +[2025-02-23 00:09:09] (step=0124700) Train Loss: 0.3292, Train Steps/Sec: 17.30, Grad Norm: 0.0561 +[2025-02-23 00:09:15] (step=0124800) Train Loss: 0.3295, Train Steps/Sec: 16.57, Grad Norm: 0.0630 +[2025-02-23 00:09:21] (step=0124900) Train Loss: 0.3287, Train Steps/Sec: 17.28, Grad Norm: 0.0569 +[2025-02-23 00:09:26] (step=0125000) Train Loss: 0.3294, Train Steps/Sec: 17.25, Grad Norm: 0.0573 +[2025-02-23 00:09:33] (step=0125100) Train Loss: 0.3298, Train Steps/Sec: 15.81, Grad Norm: 0.0582 +[2025-02-23 00:09:39] (step=0125200) Train Loss: 0.3293, Train Steps/Sec: 15.43, Grad Norm: 0.0568 +[2025-02-23 00:09:45] (step=0125300) Train Loss: 0.3288, Train Steps/Sec: 17.11, Grad Norm: 0.0577 +[2025-02-23 00:09:51] (step=0125400) Train Loss: 0.3288, Train Steps/Sec: 17.07, Grad Norm: 0.0546 +[2025-02-23 00:09:57] (step=0125500) Train Loss: 0.3286, Train Steps/Sec: 17.08, Grad Norm: 0.0585 +[2025-02-23 00:10:03] (step=0125600) Train Loss: 0.3292, Train Steps/Sec: 17.09, Grad Norm: 0.0524 +[2025-02-23 00:10:08] (step=0125700) Train Loss: 0.3289, Train Steps/Sec: 17.04, Grad Norm: 0.0573 +[2025-02-23 00:10:15] (step=0125800) Train Loss: 0.3293, Train Steps/Sec: 15.69, Grad Norm: 0.0558 +[2025-02-23 00:10:22] (step=0125900) Train Loss: 0.3289, Train Steps/Sec: 13.87, Grad Norm: 0.0572 +[2025-02-23 00:10:28] (step=0126000) Train Loss: 0.3287, Train Steps/Sec: 16.48, Grad Norm: 0.0576 +[2025-02-23 00:10:34] (step=0126100) Train Loss: 0.3293, Train Steps/Sec: 17.19, Grad Norm: 0.0539 +[2025-02-23 00:10:40] (step=0126200) Train Loss: 0.3287, Train Steps/Sec: 17.12, Grad Norm: 0.0526 +[2025-02-23 00:10:47] (step=0126300) Train Loss: 0.3290, Train Steps/Sec: 14.09, Grad Norm: 0.0524 +[2025-02-23 00:10:54] (step=0126400) Train Loss: 0.3287, Train Steps/Sec: 13.31, Grad Norm: 0.0541 +[2025-02-23 00:11:00] (step=0126500) Train Loss: 0.3289, Train Steps/Sec: 17.28, Grad Norm: 0.0519 +[2025-02-23 00:11:06] (step=0126600) Train Loss: 0.3294, Train Steps/Sec: 17.40, Grad Norm: 0.0539 +[2025-02-23 00:11:12] (step=0126700) Train Loss: 0.3284, Train Steps/Sec: 17.41, Grad Norm: 0.0549 +[2025-02-23 00:11:18] (step=0126800) Train Loss: 0.3286, Train Steps/Sec: 16.67, Grad Norm: 0.0548 +[2025-02-23 00:11:24] (step=0126900) Train Loss: 0.3293, Train Steps/Sec: 16.75, Grad Norm: 0.0521 +[2025-02-23 00:11:29] (step=0127000) Train Loss: 0.3289, Train Steps/Sec: 17.37, Grad Norm: 0.0512 +[2025-02-23 00:11:35] (step=0127100) Train Loss: 0.3295, Train Steps/Sec: 17.35, Grad Norm: 0.0566 +[2025-02-23 00:11:41] (step=0127200) Train Loss: 0.3290, Train Steps/Sec: 17.38, Grad Norm: 0.0504 +[2025-02-23 00:11:47] (step=0127300) Train Loss: 0.3294, Train Steps/Sec: 17.34, Grad Norm: 0.0565 +[2025-02-23 00:11:53] (step=0127400) Train Loss: 0.3294, Train Steps/Sec: 17.31, Grad Norm: 0.0533 +[2025-02-23 00:11:58] (step=0127500) Train Loss: 0.3285, Train Steps/Sec: 17.31, Grad Norm: 0.0531 +[2025-02-23 00:12:04] (step=0127600) Train Loss: 0.3290, Train Steps/Sec: 17.27, Grad Norm: 0.0590 +[2025-02-23 00:12:11] (step=0127700) Train Loss: 0.3286, Train Steps/Sec: 14.34, Grad Norm: 0.0550 +[2025-02-23 00:12:17] (step=0127800) Train Loss: 0.3298, Train Steps/Sec: 15.78, Grad Norm: 0.0520 +[2025-02-23 00:12:24] (step=0127900) Train Loss: 0.3293, Train Steps/Sec: 14.08, Grad Norm: 0.0563 +[2025-02-23 00:12:30] (step=0128000) Train Loss: 0.3290, Train Steps/Sec: 16.71, Grad Norm: 0.0516 +[2025-02-23 00:12:36] (step=0128100) Train Loss: 0.3294, Train Steps/Sec: 17.46, Grad Norm: 0.0510 +[2025-02-23 00:12:43] (step=0128200) Train Loss: 0.3289, Train Steps/Sec: 15.20, Grad Norm: 0.0565 +[2025-02-23 00:12:49] (step=0128300) Train Loss: 0.3291, Train Steps/Sec: 15.52, Grad Norm: 0.0518 +[2025-02-23 00:12:56] (step=0128400) Train Loss: 0.3292, Train Steps/Sec: 15.97, Grad Norm: 0.0519 +[2025-02-23 00:13:01] (step=0128500) Train Loss: 0.3286, Train Steps/Sec: 17.47, Grad Norm: 0.0511 +[2025-02-23 00:13:07] (step=0128600) Train Loss: 0.3287, Train Steps/Sec: 17.50, Grad Norm: 0.0540 +[2025-02-23 00:13:13] (step=0128700) Train Loss: 0.3292, Train Steps/Sec: 16.73, Grad Norm: 0.0554 +[2025-02-23 00:13:19] (step=0128800) Train Loss: 0.3293, Train Steps/Sec: 17.40, Grad Norm: 0.0581 +[2025-02-23 00:13:26] (step=0128900) Train Loss: 0.3290, Train Steps/Sec: 14.17, Grad Norm: 0.0509 +[2025-02-23 00:13:32] (step=0129000) Train Loss: 0.3290, Train Steps/Sec: 17.19, Grad Norm: 0.0561 +[2025-02-23 00:13:37] (step=0129100) Train Loss: 0.3285, Train Steps/Sec: 17.29, Grad Norm: 0.0562 +[2025-02-23 00:13:43] (step=0129200) Train Loss: 0.3294, Train Steps/Sec: 17.34, Grad Norm: 0.0486 +[2025-02-23 00:13:49] (step=0129300) Train Loss: 0.3286, Train Steps/Sec: 17.32, Grad Norm: 0.0557 +[2025-02-23 00:13:55] (step=0129400) Train Loss: 0.3286, Train Steps/Sec: 17.38, Grad Norm: 0.0550 +[2025-02-23 00:14:00] (step=0129500) Train Loss: 0.3291, Train Steps/Sec: 17.31, Grad Norm: 0.0528 +[2025-02-23 00:14:06] (step=0129600) Train Loss: 0.3285, Train Steps/Sec: 17.22, Grad Norm: 0.0512 +[2025-02-23 00:14:12] (step=0129700) Train Loss: 0.3287, Train Steps/Sec: 17.24, Grad Norm: 0.0485 +[2025-02-23 00:14:18] (step=0129800) Train Loss: 0.3289, Train Steps/Sec: 15.77, Grad Norm: 0.0561 +[2025-02-23 00:14:26] (step=0129900) Train Loss: 0.3285, Train Steps/Sec: 13.92, Grad Norm: 0.0528 +[2025-02-23 00:14:31] (step=0130000) Train Loss: 0.3293, Train Steps/Sec: 17.15, Grad Norm: 0.0569 +[2025-02-23 00:14:37] (step=0130100) Train Loss: 0.3292, Train Steps/Sec: 16.39, Grad Norm: 0.0523 +[2025-02-23 00:14:46] (step=0130200) Train Loss: 0.3285, Train Steps/Sec: 11.98, Grad Norm: 0.0549 +[2025-02-23 00:14:52] (step=0130300) Train Loss: 0.3286, Train Steps/Sec: 15.69, Grad Norm: 0.0568 +[2025-02-23 00:14:58] (step=0130400) Train Loss: 0.3284, Train Steps/Sec: 17.25, Grad Norm: 0.0567 +[2025-02-23 00:15:04] (step=0130500) Train Loss: 0.3290, Train Steps/Sec: 17.29, Grad Norm: 0.0543 +[2025-02-23 00:15:10] (step=0130600) Train Loss: 0.3290, Train Steps/Sec: 17.24, Grad Norm: 0.0499 +[2025-02-23 00:15:16] (step=0130700) Train Loss: 0.3287, Train Steps/Sec: 16.51, Grad Norm: 0.0563 +[2025-02-23 00:15:21] (step=0130800) Train Loss: 0.3292, Train Steps/Sec: 17.23, Grad Norm: 0.0555 +[2025-02-23 00:15:27] (step=0130900) Train Loss: 0.3286, Train Steps/Sec: 17.28, Grad Norm: 0.0527 +[2025-02-23 00:15:33] (step=0131000) Train Loss: 0.3290, Train Steps/Sec: 17.46, Grad Norm: 0.0529 +[2025-02-23 00:15:39] (step=0131100) Train Loss: 0.3284, Train Steps/Sec: 17.40, Grad Norm: 0.0507 +[2025-02-23 00:15:45] (step=0131200) Train Loss: 0.3288, Train Steps/Sec: 17.31, Grad Norm: 0.0553 +[2025-02-23 00:15:50] (step=0131300) Train Loss: 0.3288, Train Steps/Sec: 17.36, Grad Norm: 0.0503 +[2025-02-23 00:15:57] (step=0131400) Train Loss: 0.3299, Train Steps/Sec: 14.24, Grad Norm: 0.0559 +[2025-02-23 00:16:03] (step=0131500) Train Loss: 0.3291, Train Steps/Sec: 17.18, Grad Norm: 0.0517 +[2025-02-23 00:16:09] (step=0131600) Train Loss: 0.3290, Train Steps/Sec: 17.23, Grad Norm: 0.0530 +[2025-02-23 00:16:15] (step=0131700) Train Loss: 0.3288, Train Steps/Sec: 17.18, Grad Norm: 0.0518 +[2025-02-23 00:16:21] (step=0131800) Train Loss: 0.3289, Train Steps/Sec: 15.69, Grad Norm: 0.0540 +[2025-02-23 00:16:28] (step=0131900) Train Loss: 0.3286, Train Steps/Sec: 13.86, Grad Norm: 0.0532 +[2025-02-23 00:16:34] (step=0132000) Train Loss: 0.3286, Train Steps/Sec: 17.02, Grad Norm: 0.0519 +[2025-02-23 00:16:41] (step=0132100) Train Loss: 0.3291, Train Steps/Sec: 14.44, Grad Norm: 0.0526 +[2025-02-23 00:16:48] (step=0132200) Train Loss: 0.3287, Train Steps/Sec: 15.16, Grad Norm: 0.0537 +[2025-02-23 00:16:54] (step=0132300) Train Loss: 0.3292, Train Steps/Sec: 16.40, Grad Norm: 0.0550 +[2025-02-23 00:17:00] (step=0132400) Train Loss: 0.3287, Train Steps/Sec: 17.14, Grad Norm: 0.0545 +[2025-02-23 00:17:06] (step=0132500) Train Loss: 0.3286, Train Steps/Sec: 17.07, Grad Norm: 0.0565 +[2025-02-23 00:17:12] (step=0132600) Train Loss: 0.3290, Train Steps/Sec: 16.34, Grad Norm: 0.0508 +[2025-02-23 00:17:19] (step=0132700) Train Loss: 0.3284, Train Steps/Sec: 14.25, Grad Norm: 0.0538 +[2025-02-23 00:17:24] (step=0132800) Train Loss: 0.3290, Train Steps/Sec: 17.35, Grad Norm: 0.0518 +[2025-02-23 00:17:30] (step=0132900) Train Loss: 0.3287, Train Steps/Sec: 17.33, Grad Norm: 0.0560 +[2025-02-23 00:17:36] (step=0133000) Train Loss: 0.3285, Train Steps/Sec: 17.26, Grad Norm: 0.0587 +[2025-02-23 00:17:42] (step=0133100) Train Loss: 0.3286, Train Steps/Sec: 17.08, Grad Norm: 0.0523 +[2025-02-23 00:17:48] (step=0133200) Train Loss: 0.3287, Train Steps/Sec: 17.07, Grad Norm: 0.0485 +[2025-02-23 00:17:54] (step=0133300) Train Loss: 0.3290, Train Steps/Sec: 17.23, Grad Norm: 0.0514 +[2025-02-23 00:17:59] (step=0133400) Train Loss: 0.3286, Train Steps/Sec: 17.24, Grad Norm: 0.0542 +[2025-02-23 00:18:05] (step=0133500) Train Loss: 0.3289, Train Steps/Sec: 17.18, Grad Norm: 0.0527 +[2025-02-23 00:18:11] (step=0133600) Train Loss: 0.3288, Train Steps/Sec: 17.27, Grad Norm: 0.0570 +[2025-02-23 00:18:17] (step=0133700) Train Loss: 0.3289, Train Steps/Sec: 17.30, Grad Norm: 0.0515 +[2025-02-23 00:18:23] (step=0133800) Train Loss: 0.3294, Train Steps/Sec: 16.59, Grad Norm: 0.0523 +[2025-02-23 00:18:31] (step=0133900) Train Loss: 0.3288, Train Steps/Sec: 11.64, Grad Norm: 0.0532 +[2025-02-23 00:18:37] (step=0134000) Train Loss: 0.3284, Train Steps/Sec: 17.42, Grad Norm: 0.0555 +[2025-02-23 00:18:44] (step=0134100) Train Loss: 0.3284, Train Steps/Sec: 13.68, Grad Norm: 0.0517 +[2025-02-23 00:18:51] (step=0134200) Train Loss: 0.3282, Train Steps/Sec: 16.01, Grad Norm: 0.0573 +[2025-02-23 00:18:56] (step=0134300) Train Loss: 0.3289, Train Steps/Sec: 17.46, Grad Norm: 0.0549 +[2025-02-23 00:19:02] (step=0134400) Train Loss: 0.3285, Train Steps/Sec: 17.44, Grad Norm: 0.0484 +[2025-02-23 00:19:08] (step=0134500) Train Loss: 0.3286, Train Steps/Sec: 17.45, Grad Norm: 0.0530 +[2025-02-23 00:19:14] (step=0134600) Train Loss: 0.3286, Train Steps/Sec: 16.73, Grad Norm: 0.0563 +[2025-02-23 00:19:20] (step=0134700) Train Loss: 0.3285, Train Steps/Sec: 17.47, Grad Norm: 0.0563 +[2025-02-23 00:19:25] (step=0134800) Train Loss: 0.3285, Train Steps/Sec: 17.38, Grad Norm: 0.0528 +[2025-02-23 00:19:31] (step=0134900) Train Loss: 0.3286, Train Steps/Sec: 17.41, Grad Norm: 0.0586 +[2025-02-23 00:19:37] (step=0135000) Train Loss: 0.3294, Train Steps/Sec: 17.47, Grad Norm: 0.0520 +[2025-02-23 00:19:43] (step=0135100) Train Loss: 0.3291, Train Steps/Sec: 17.40, Grad Norm: 0.0523 +[2025-02-23 00:19:49] (step=0135200) Train Loss: 0.3293, Train Steps/Sec: 14.47, Grad Norm: 0.0561 +[2025-02-23 00:19:55] (step=0135300) Train Loss: 0.3284, Train Steps/Sec: 17.29, Grad Norm: 0.0514 +[2025-02-23 00:20:01] (step=0135400) Train Loss: 0.3284, Train Steps/Sec: 17.25, Grad Norm: 0.0543 +[2025-02-23 00:20:07] (step=0135500) Train Loss: 0.3290, Train Steps/Sec: 17.30, Grad Norm: 0.0556 +[2025-02-23 00:20:13] (step=0135600) Train Loss: 0.3288, Train Steps/Sec: 17.36, Grad Norm: 0.0545 +[2025-02-23 00:20:18] (step=0135700) Train Loss: 0.3291, Train Steps/Sec: 17.40, Grad Norm: 0.0538 +[2025-02-23 00:20:24] (step=0135800) Train Loss: 0.3283, Train Steps/Sec: 16.61, Grad Norm: 0.0514 +[2025-02-23 00:20:32] (step=0135900) Train Loss: 0.3289, Train Steps/Sec: 14.02, Grad Norm: 0.0494 +[2025-02-23 00:20:38] (step=0136000) Train Loss: 0.3289, Train Steps/Sec: 14.69, Grad Norm: 0.0538 +[2025-02-23 00:20:45] (step=0136100) Train Loss: 0.3291, Train Steps/Sec: 14.09, Grad Norm: 0.0485 +[2025-02-23 00:20:51] (step=0136200) Train Loss: 0.3285, Train Steps/Sec: 17.35, Grad Norm: 0.0495 +[2025-02-23 00:20:57] (step=0136300) Train Loss: 0.3286, Train Steps/Sec: 17.40, Grad Norm: 0.0550 +[2025-02-23 00:21:04] (step=0136400) Train Loss: 0.3284, Train Steps/Sec: 14.41, Grad Norm: 0.0503 +[2025-02-23 00:21:10] (step=0136500) Train Loss: 0.3287, Train Steps/Sec: 16.64, Grad Norm: 0.0541 +[2025-02-23 00:21:16] (step=0136600) Train Loss: 0.3289, Train Steps/Sec: 17.44, Grad Norm: 0.0536 +[2025-02-23 00:21:21] (step=0136700) Train Loss: 0.3290, Train Steps/Sec: 17.50, Grad Norm: 0.0517 +[2025-02-23 00:21:27] (step=0136800) Train Loss: 0.3283, Train Steps/Sec: 17.53, Grad Norm: 0.0463 +[2025-02-23 00:21:33] (step=0136900) Train Loss: 0.3287, Train Steps/Sec: 17.48, Grad Norm: 0.0546 +[2025-02-23 00:21:38] (step=0137000) Train Loss: 0.3285, Train Steps/Sec: 17.49, Grad Norm: 0.0546 +[2025-02-23 00:21:44] (step=0137100) Train Loss: 0.3289, Train Steps/Sec: 17.49, Grad Norm: 0.0487 +[2025-02-23 00:21:50] (step=0137200) Train Loss: 0.3284, Train Steps/Sec: 17.44, Grad Norm: 0.0525 +[2025-02-23 00:21:56] (step=0137300) Train Loss: 0.3285, Train Steps/Sec: 17.53, Grad Norm: 0.0490 +[2025-02-23 00:22:01] (step=0137400) Train Loss: 0.3283, Train Steps/Sec: 17.49, Grad Norm: 0.0523 +[2025-02-23 00:22:07] (step=0137500) Train Loss: 0.3282, Train Steps/Sec: 17.43, Grad Norm: 0.0508 +[2025-02-23 00:22:13] (step=0137600) Train Loss: 0.3289, Train Steps/Sec: 17.29, Grad Norm: 0.0496 +[2025-02-23 00:22:20] (step=0137700) Train Loss: 0.3281, Train Steps/Sec: 14.48, Grad Norm: 0.0552 +[2025-02-23 00:22:26] (step=0137800) Train Loss: 0.3285, Train Steps/Sec: 16.68, Grad Norm: 0.0564 +[2025-02-23 00:22:33] (step=0137900) Train Loss: 0.3288, Train Steps/Sec: 13.12, Grad Norm: 0.0505 +[2025-02-23 00:22:40] (step=0138000) Train Loss: 0.3281, Train Steps/Sec: 14.79, Grad Norm: 0.0520 +[2025-02-23 00:22:47] (step=0138100) Train Loss: 0.3286, Train Steps/Sec: 15.33, Grad Norm: 0.0536 +[2025-02-23 00:22:52] (step=0138200) Train Loss: 0.3287, Train Steps/Sec: 17.53, Grad Norm: 0.0516 +[2025-02-23 00:22:58] (step=0138300) Train Loss: 0.3284, Train Steps/Sec: 17.46, Grad Norm: 0.0569 +[2025-02-23 00:23:04] (step=0138400) Train Loss: 0.3283, Train Steps/Sec: 17.40, Grad Norm: 0.0538 +[2025-02-23 00:23:10] (step=0138500) Train Loss: 0.3288, Train Steps/Sec: 16.64, Grad Norm: 0.0500 +[2025-02-23 00:23:16] (step=0138600) Train Loss: 0.3293, Train Steps/Sec: 17.35, Grad Norm: 0.0585 +[2025-02-23 00:23:21] (step=0138700) Train Loss: 0.3288, Train Steps/Sec: 17.39, Grad Norm: 0.0518 +[2025-02-23 00:23:27] (step=0138800) Train Loss: 0.3284, Train Steps/Sec: 17.47, Grad Norm: 0.0512 +[2025-02-23 00:23:34] (step=0138900) Train Loss: 0.3288, Train Steps/Sec: 14.45, Grad Norm: 0.0533 +[2025-02-23 00:23:40] (step=0139000) Train Loss: 0.3287, Train Steps/Sec: 17.19, Grad Norm: 0.0465 +[2025-02-23 00:23:46] (step=0139100) Train Loss: 0.3285, Train Steps/Sec: 17.16, Grad Norm: 0.0546 +[2025-02-23 00:23:52] (step=0139200) Train Loss: 0.3281, Train Steps/Sec: 17.10, Grad Norm: 0.0510 +[2025-02-23 00:23:57] (step=0139300) Train Loss: 0.3284, Train Steps/Sec: 17.15, Grad Norm: 0.0498 +[2025-02-23 00:24:03] (step=0139400) Train Loss: 0.3281, Train Steps/Sec: 17.01, Grad Norm: 0.0538 +[2025-02-23 00:24:09] (step=0139500) Train Loss: 0.3286, Train Steps/Sec: 17.02, Grad Norm: 0.0534 +[2025-02-23 00:24:15] (step=0139600) Train Loss: 0.3286, Train Steps/Sec: 17.08, Grad Norm: 0.0520 +[2025-02-23 00:24:21] (step=0139700) Train Loss: 0.3288, Train Steps/Sec: 17.11, Grad Norm: 0.0530 +[2025-02-23 00:24:27] (step=0139800) Train Loss: 0.3283, Train Steps/Sec: 16.43, Grad Norm: 0.0541 +[2025-02-23 00:24:35] (step=0139900) Train Loss: 0.3282, Train Steps/Sec: 11.82, Grad Norm: 0.0566 +[2025-02-23 00:24:42] (step=0140000) Train Loss: 0.3282, Train Steps/Sec: 15.03, Grad Norm: 0.0535 +[2025-02-23 00:24:48] (step=0140100) Train Loss: 0.3295, Train Steps/Sec: 16.33, Grad Norm: 0.0568 +[2025-02-23 00:24:55] (step=0140200) Train Loss: 0.3278, Train Steps/Sec: 14.50, Grad Norm: 0.0524 +[2025-02-23 00:25:01] (step=0140300) Train Loss: 0.3284, Train Steps/Sec: 17.36, Grad Norm: 0.0533 +[2025-02-23 00:25:07] (step=0140400) Train Loss: 0.3287, Train Steps/Sec: 16.61, Grad Norm: 0.0529 +[2025-02-23 00:25:13] (step=0140500) Train Loss: 0.3285, Train Steps/Sec: 17.33, Grad Norm: 0.0505 +[2025-02-23 00:25:18] (step=0140600) Train Loss: 0.3280, Train Steps/Sec: 17.30, Grad Norm: 0.0529 +[2025-02-23 00:25:24] (step=0140700) Train Loss: 0.3284, Train Steps/Sec: 17.20, Grad Norm: 0.0470 +[2025-02-23 00:25:30] (step=0140800) Train Loss: 0.3286, Train Steps/Sec: 17.26, Grad Norm: 0.0539 +[2025-02-23 00:25:36] (step=0140900) Train Loss: 0.3286, Train Steps/Sec: 17.35, Grad Norm: 0.0483 +[2025-02-23 00:25:42] (step=0141000) Train Loss: 0.3280, Train Steps/Sec: 17.36, Grad Norm: 0.0485 +[2025-02-23 00:25:47] (step=0141100) Train Loss: 0.3284, Train Steps/Sec: 17.41, Grad Norm: 0.0481 +[2025-02-23 00:25:53] (step=0141200) Train Loss: 0.3279, Train Steps/Sec: 17.40, Grad Norm: 0.0556 +[2025-02-23 00:25:59] (step=0141300) Train Loss: 0.3280, Train Steps/Sec: 17.39, Grad Norm: 0.0503 +[2025-02-23 00:26:06] (step=0141400) Train Loss: 0.3284, Train Steps/Sec: 14.31, Grad Norm: 0.0555 +[2025-02-23 00:26:12] (step=0141500) Train Loss: 0.3283, Train Steps/Sec: 17.27, Grad Norm: 0.0520 +[2025-02-23 00:26:17] (step=0141600) Train Loss: 0.3279, Train Steps/Sec: 17.10, Grad Norm: 0.0539 +[2025-02-23 00:26:23] (step=0141700) Train Loss: 0.3273, Train Steps/Sec: 17.11, Grad Norm: 0.0524 +[2025-02-23 00:26:30] (step=0141800) Train Loss: 0.3279, Train Steps/Sec: 15.11, Grad Norm: 0.0601 +[2025-02-23 00:26:38] (step=0141900) Train Loss: 0.3284, Train Steps/Sec: 12.51, Grad Norm: 0.0504 +[2025-02-23 00:26:45] (step=0142000) Train Loss: 0.3286, Train Steps/Sec: 15.09, Grad Norm: 0.0508 +[2025-02-23 00:26:51] (step=0142100) Train Loss: 0.3281, Train Steps/Sec: 16.40, Grad Norm: 0.0513 +[2025-02-23 00:26:56] (step=0142200) Train Loss: 0.3278, Train Steps/Sec: 17.17, Grad Norm: 0.0494 +[2025-02-23 00:27:02] (step=0142300) Train Loss: 0.3289, Train Steps/Sec: 17.10, Grad Norm: 0.0553 +[2025-02-23 00:27:08] (step=0142400) Train Loss: 0.3284, Train Steps/Sec: 16.40, Grad Norm: 0.0572 +[2025-02-23 00:27:14] (step=0142500) Train Loss: 0.3283, Train Steps/Sec: 17.09, Grad Norm: 0.0515 +[2025-02-23 00:27:20] (step=0142600) Train Loss: 0.3285, Train Steps/Sec: 16.99, Grad Norm: 0.0522 +[2025-02-23 00:27:27] (step=0142700) Train Loss: 0.3287, Train Steps/Sec: 14.24, Grad Norm: 0.0478 +[2025-02-23 00:27:33] (step=0142800) Train Loss: 0.3286, Train Steps/Sec: 17.22, Grad Norm: 0.0572 +[2025-02-23 00:27:39] (step=0142900) Train Loss: 0.3289, Train Steps/Sec: 17.20, Grad Norm: 0.0474 +[2025-02-23 00:27:45] (step=0143000) Train Loss: 0.3288, Train Steps/Sec: 17.22, Grad Norm: 0.0501 +[2025-02-23 00:27:50] (step=0143100) Train Loss: 0.3282, Train Steps/Sec: 17.30, Grad Norm: 0.0511 +[2025-02-23 00:27:56] (step=0143200) Train Loss: 0.3284, Train Steps/Sec: 17.33, Grad Norm: 0.0501 +[2025-02-23 00:28:02] (step=0143300) Train Loss: 0.3282, Train Steps/Sec: 17.28, Grad Norm: 0.0512 +[2025-02-23 00:28:08] (step=0143400) Train Loss: 0.3282, Train Steps/Sec: 17.28, Grad Norm: 0.0511 +[2025-02-23 00:28:14] (step=0143500) Train Loss: 0.3278, Train Steps/Sec: 17.35, Grad Norm: 0.0529 +[2025-02-23 00:28:19] (step=0143600) Train Loss: 0.3280, Train Steps/Sec: 17.41, Grad Norm: 0.0572 +[2025-02-23 00:28:25] (step=0143700) Train Loss: 0.3285, Train Steps/Sec: 17.41, Grad Norm: 0.0520 +[2025-02-23 00:28:32] (step=0143800) Train Loss: 0.3276, Train Steps/Sec: 14.20, Grad Norm: 0.0496 +[2025-02-23 00:28:41] (step=0143900) Train Loss: 0.3281, Train Steps/Sec: 11.36, Grad Norm: 0.0510 +[2025-02-23 00:28:47] (step=0144000) Train Loss: 0.3282, Train Steps/Sec: 15.67, Grad Norm: 0.0524 +[2025-02-23 00:28:53] (step=0144100) Train Loss: 0.3280, Train Steps/Sec: 16.51, Grad Norm: 0.0515 +[2025-02-23 00:28:59] (step=0144200) Train Loss: 0.3286, Train Steps/Sec: 17.36, Grad Norm: 0.0497 +[2025-02-23 00:29:05] (step=0144300) Train Loss: 0.3283, Train Steps/Sec: 16.61, Grad Norm: 0.0491 +[2025-02-23 00:29:11] (step=0144400) Train Loss: 0.3286, Train Steps/Sec: 17.27, Grad Norm: 0.0515 +[2025-02-23 00:29:17] (step=0144500) Train Loss: 0.3284, Train Steps/Sec: 17.24, Grad Norm: 0.0538 +[2025-02-23 00:29:22] (step=0144600) Train Loss: 0.3288, Train Steps/Sec: 17.27, Grad Norm: 0.0500 +[2025-02-23 00:29:28] (step=0144700) Train Loss: 0.3283, Train Steps/Sec: 17.29, Grad Norm: 0.0523 +[2025-02-23 00:29:34] (step=0144800) Train Loss: 0.3286, Train Steps/Sec: 17.33, Grad Norm: 0.0527 +[2025-02-23 00:29:40] (step=0144900) Train Loss: 0.3281, Train Steps/Sec: 17.34, Grad Norm: 0.0486 +[2025-02-23 00:29:46] (step=0145000) Train Loss: 0.3282, Train Steps/Sec: 17.29, Grad Norm: 0.0531 +[2025-02-23 00:29:51] (step=0145100) Train Loss: 0.3280, Train Steps/Sec: 17.16, Grad Norm: 0.0473 +[2025-02-23 00:29:58] (step=0145200) Train Loss: 0.3288, Train Steps/Sec: 14.19, Grad Norm: 0.0482 +[2025-02-23 00:30:04] (step=0145300) Train Loss: 0.3282, Train Steps/Sec: 17.06, Grad Norm: 0.0645 +[2025-02-23 00:30:10] (step=0145400) Train Loss: 0.3281, Train Steps/Sec: 17.22, Grad Norm: 0.0496 +[2025-02-23 00:30:16] (step=0145500) Train Loss: 0.3280, Train Steps/Sec: 17.37, Grad Norm: 0.0499 +[2025-02-23 00:30:22] (step=0145600) Train Loss: 0.3285, Train Steps/Sec: 17.38, Grad Norm: 0.0519 +[2025-02-23 00:30:28] (step=0145700) Train Loss: 0.3285, Train Steps/Sec: 15.32, Grad Norm: 0.0488 +[2025-02-23 00:30:35] (step=0145800) Train Loss: 0.3280, Train Steps/Sec: 15.89, Grad Norm: 0.0526 +[2025-02-23 00:30:42] (step=0145900) Train Loss: 0.3286, Train Steps/Sec: 14.07, Grad Norm: 0.0523 +[2025-02-23 00:30:48] (step=0146000) Train Loss: 0.3281, Train Steps/Sec: 15.18, Grad Norm: 0.0472 +[2025-02-23 00:30:54] (step=0146100) Train Loss: 0.3280, Train Steps/Sec: 16.48, Grad Norm: 0.0497 +[2025-02-23 00:31:00] (step=0146200) Train Loss: 0.3280, Train Steps/Sec: 17.34, Grad Norm: 0.0454 +[2025-02-23 00:31:06] (step=0146300) Train Loss: 0.3285, Train Steps/Sec: 16.60, Grad Norm: 0.0530 +[2025-02-23 00:31:13] (step=0146400) Train Loss: 0.3281, Train Steps/Sec: 14.40, Grad Norm: 0.0508 +[2025-02-23 00:31:19] (step=0146500) Train Loss: 0.3281, Train Steps/Sec: 17.33, Grad Norm: 0.0510 +[2025-02-23 00:31:25] (step=0146600) Train Loss: 0.3279, Train Steps/Sec: 17.30, Grad Norm: 0.0511 +[2025-02-23 00:31:30] (step=0146700) Train Loss: 0.3272, Train Steps/Sec: 17.23, Grad Norm: 0.0544 +[2025-02-23 00:31:36] (step=0146800) Train Loss: 0.3280, Train Steps/Sec: 17.23, Grad Norm: 0.0493 +[2025-02-23 00:31:42] (step=0146900) Train Loss: 0.3281, Train Steps/Sec: 17.34, Grad Norm: 0.0526 +[2025-02-23 00:31:48] (step=0147000) Train Loss: 0.3277, Train Steps/Sec: 17.39, Grad Norm: 0.0479 +[2025-02-23 00:31:53] (step=0147100) Train Loss: 0.3282, Train Steps/Sec: 17.36, Grad Norm: 0.0514 +[2025-02-23 00:31:59] (step=0147200) Train Loss: 0.3289, Train Steps/Sec: 17.39, Grad Norm: 0.0543 +[2025-02-23 00:32:05] (step=0147300) Train Loss: 0.3284, Train Steps/Sec: 17.35, Grad Norm: 0.0487 +[2025-02-23 00:32:11] (step=0147400) Train Loss: 0.3286, Train Steps/Sec: 17.35, Grad Norm: 0.0567 +[2025-02-23 00:32:17] (step=0147500) Train Loss: 0.3286, Train Steps/Sec: 17.41, Grad Norm: 0.0526 +[2025-02-23 00:32:22] (step=0147600) Train Loss: 0.3286, Train Steps/Sec: 17.30, Grad Norm: 0.0509 +[2025-02-23 00:32:31] (step=0147700) Train Loss: 0.3282, Train Steps/Sec: 12.14, Grad Norm: 0.0483 +[2025-02-23 00:32:37] (step=0147800) Train Loss: 0.3279, Train Steps/Sec: 15.84, Grad Norm: 0.0570 +[2025-02-23 00:32:44] (step=0147900) Train Loss: 0.3282, Train Steps/Sec: 14.63, Grad Norm: 0.0505 +[2025-02-23 00:32:50] (step=0148000) Train Loss: 0.3277, Train Steps/Sec: 15.17, Grad Norm: 0.0508 +[2025-02-23 00:32:56] (step=0148100) Train Loss: 0.3281, Train Steps/Sec: 16.56, Grad Norm: 0.0514 +[2025-02-23 00:33:02] (step=0148200) Train Loss: 0.3279, Train Steps/Sec: 16.59, Grad Norm: 0.0481 +[2025-02-23 00:33:08] (step=0148300) Train Loss: 0.3279, Train Steps/Sec: 17.39, Grad Norm: 0.0531 +[2025-02-23 00:33:14] (step=0148400) Train Loss: 0.3277, Train Steps/Sec: 17.42, Grad Norm: 0.0492 +[2025-02-23 00:33:20] (step=0148500) Train Loss: 0.3286, Train Steps/Sec: 17.43, Grad Norm: 0.0531 +[2025-02-23 00:33:25] (step=0148600) Train Loss: 0.3281, Train Steps/Sec: 17.44, Grad Norm: 0.0536 +[2025-02-23 00:33:31] (step=0148700) Train Loss: 0.3288, Train Steps/Sec: 17.46, Grad Norm: 0.0504 +[2025-02-23 00:33:37] (step=0148800) Train Loss: 0.3280, Train Steps/Sec: 17.43, Grad Norm: 0.0504 +[2025-02-23 00:33:44] (step=0148900) Train Loss: 0.3278, Train Steps/Sec: 14.41, Grad Norm: 0.0467 +[2025-02-23 00:33:49] (step=0149000) Train Loss: 0.3278, Train Steps/Sec: 17.37, Grad Norm: 0.0541 +[2025-02-23 00:33:55] (step=0149100) Train Loss: 0.3281, Train Steps/Sec: 17.35, Grad Norm: 0.0505 +[2025-02-23 00:34:01] (step=0149200) Train Loss: 0.3275, Train Steps/Sec: 17.13, Grad Norm: 0.0548 +[2025-02-23 00:34:07] (step=0149300) Train Loss: 0.3275, Train Steps/Sec: 17.16, Grad Norm: 0.0506 +[2025-02-23 00:34:13] (step=0149400) Train Loss: 0.3286, Train Steps/Sec: 17.11, Grad Norm: 0.0449 +[2025-02-23 00:34:19] (step=0149500) Train Loss: 0.3282, Train Steps/Sec: 17.10, Grad Norm: 0.0494 +[2025-02-23 00:34:25] (step=0149600) Train Loss: 0.3280, Train Steps/Sec: 15.03, Grad Norm: 0.0525 +[2025-02-23 00:34:32] (step=0149700) Train Loss: 0.3282, Train Steps/Sec: 15.72, Grad Norm: 0.0467 +[2025-02-23 00:34:38] (step=0149800) Train Loss: 0.3280, Train Steps/Sec: 15.70, Grad Norm: 0.0502 +[2025-02-23 00:34:45] (step=0149900) Train Loss: 0.3282, Train Steps/Sec: 15.06, Grad Norm: 0.0474 +[2025-02-23 00:34:52] (step=0150000) Train Loss: 0.3273, Train Steps/Sec: 14.46, Grad Norm: 0.0453 +[2025-02-23 00:34:53] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0150000.pt +[2025-02-23 00:34:59] (step=0150100) Train Loss: 0.3280, Train Steps/Sec: 13.15, Grad Norm: 0.0503 +[2025-02-23 00:35:06] (step=0150200) Train Loss: 0.3281, Train Steps/Sec: 14.26, Grad Norm: 0.0498 +[2025-02-23 00:35:12] (step=0150300) Train Loss: 0.3272, Train Steps/Sec: 17.11, Grad Norm: 0.0515 +[2025-02-23 00:35:18] (step=0150400) Train Loss: 0.3279, Train Steps/Sec: 17.11, Grad Norm: 0.0487 +[2025-02-23 00:35:24] (step=0150500) Train Loss: 0.3282, Train Steps/Sec: 17.09, Grad Norm: 0.0492 +[2025-02-23 00:35:30] (step=0150600) Train Loss: 0.3278, Train Steps/Sec: 17.07, Grad Norm: 0.0495 +[2025-02-23 00:35:35] (step=0150700) Train Loss: 0.3282, Train Steps/Sec: 17.01, Grad Norm: 0.0513 +[2025-02-23 00:35:41] (step=0150800) Train Loss: 0.3279, Train Steps/Sec: 17.06, Grad Norm: 0.0482 +[2025-02-23 00:35:47] (step=0150900) Train Loss: 0.3288, Train Steps/Sec: 17.05, Grad Norm: 0.0468 +[2025-02-23 00:35:53] (step=0151000) Train Loss: 0.3286, Train Steps/Sec: 17.12, Grad Norm: 0.0496 +[2025-02-23 00:35:59] (step=0151100) Train Loss: 0.3280, Train Steps/Sec: 17.09, Grad Norm: 0.0499 +[2025-02-23 00:36:05] (step=0151200) Train Loss: 0.3281, Train Steps/Sec: 17.10, Grad Norm: 0.0476 +[2025-02-23 00:36:11] (step=0151300) Train Loss: 0.3282, Train Steps/Sec: 17.21, Grad Norm: 0.0490 +[2025-02-23 00:36:18] (step=0151400) Train Loss: 0.3279, Train Steps/Sec: 14.32, Grad Norm: 0.0489 +[2025-02-23 00:36:24] (step=0151500) Train Loss: 0.3284, Train Steps/Sec: 16.53, Grad Norm: 0.0515 +[2025-02-23 00:36:31] (step=0151600) Train Loss: 0.3280, Train Steps/Sec: 14.42, Grad Norm: 0.0507 +[2025-02-23 00:36:37] (step=0151700) Train Loss: 0.3282, Train Steps/Sec: 15.62, Grad Norm: 0.0507 +[2025-02-23 00:36:43] (step=0151800) Train Loss: 0.3281, Train Steps/Sec: 16.98, Grad Norm: 0.0550 +[2025-02-23 00:36:50] (step=0151900) Train Loss: 0.3278, Train Steps/Sec: 14.92, Grad Norm: 0.0495 +[2025-02-23 00:36:56] (step=0152000) Train Loss: 0.3277, Train Steps/Sec: 14.42, Grad Norm: 0.0484 +[2025-02-23 00:37:03] (step=0152100) Train Loss: 0.3279, Train Steps/Sec: 15.60, Grad Norm: 0.0544 +[2025-02-23 00:37:09] (step=0152200) Train Loss: 0.3284, Train Steps/Sec: 17.10, Grad Norm: 0.0475 +[2025-02-23 00:37:15] (step=0152300) Train Loss: 0.3277, Train Steps/Sec: 17.22, Grad Norm: 0.0567 +[2025-02-23 00:37:20] (step=0152400) Train Loss: 0.3276, Train Steps/Sec: 17.26, Grad Norm: 0.0529 +[2025-02-23 00:37:26] (step=0152500) Train Loss: 0.3279, Train Steps/Sec: 17.27, Grad Norm: 0.0509 +[2025-02-23 00:37:32] (step=0152600) Train Loss: 0.3275, Train Steps/Sec: 17.18, Grad Norm: 0.0488 +[2025-02-23 00:37:39] (step=0152700) Train Loss: 0.3278, Train Steps/Sec: 14.45, Grad Norm: 0.0522 +[2025-02-23 00:37:45] (step=0152800) Train Loss: 0.3279, Train Steps/Sec: 17.32, Grad Norm: 0.0463 +[2025-02-23 00:37:50] (step=0152900) Train Loss: 0.3281, Train Steps/Sec: 17.28, Grad Norm: 0.0474 +[2025-02-23 00:37:56] (step=0153000) Train Loss: 0.3283, Train Steps/Sec: 17.27, Grad Norm: 0.0561 +[2025-02-23 00:38:02] (step=0153100) Train Loss: 0.3275, Train Steps/Sec: 17.21, Grad Norm: 0.0489 +[2025-02-23 00:38:08] (step=0153200) Train Loss: 0.3278, Train Steps/Sec: 17.26, Grad Norm: 0.0482 +[2025-02-23 00:38:14] (step=0153300) Train Loss: 0.3283, Train Steps/Sec: 17.30, Grad Norm: 0.0505 +[2025-02-23 00:38:19] (step=0153400) Train Loss: 0.3279, Train Steps/Sec: 17.31, Grad Norm: 0.0511 +[2025-02-23 00:38:26] (step=0153500) Train Loss: 0.3286, Train Steps/Sec: 15.25, Grad Norm: 0.0492 +[2025-02-23 00:38:32] (step=0153600) Train Loss: 0.3277, Train Steps/Sec: 15.89, Grad Norm: 0.0493 +[2025-02-23 00:38:39] (step=0153700) Train Loss: 0.3275, Train Steps/Sec: 15.87, Grad Norm: 0.0534 +[2025-02-23 00:38:44] (step=0153800) Train Loss: 0.3278, Train Steps/Sec: 17.36, Grad Norm: 0.0496 +[2025-02-23 00:38:52] (step=0153900) Train Loss: 0.3282, Train Steps/Sec: 12.42, Grad Norm: 0.0515 +[2025-02-23 00:38:59] (step=0154000) Train Loss: 0.3277, Train Steps/Sec: 14.52, Grad Norm: 0.0495 +[2025-02-23 00:39:05] (step=0154100) Train Loss: 0.3273, Train Steps/Sec: 16.42, Grad Norm: 0.0482 +[2025-02-23 00:39:11] (step=0154200) Train Loss: 0.3276, Train Steps/Sec: 17.20, Grad Norm: 0.0488 +[2025-02-23 00:39:17] (step=0154300) Train Loss: 0.3277, Train Steps/Sec: 17.21, Grad Norm: 0.0529 +[2025-02-23 00:39:23] (step=0154400) Train Loss: 0.3273, Train Steps/Sec: 17.22, Grad Norm: 0.0501 +[2025-02-23 00:39:29] (step=0154500) Train Loss: 0.3280, Train Steps/Sec: 17.21, Grad Norm: 0.0506 +[2025-02-23 00:39:34] (step=0154600) Train Loss: 0.3280, Train Steps/Sec: 17.31, Grad Norm: 0.0499 +[2025-02-23 00:39:40] (step=0154700) Train Loss: 0.3286, Train Steps/Sec: 17.28, Grad Norm: 0.0530 +[2025-02-23 00:39:46] (step=0154800) Train Loss: 0.3273, Train Steps/Sec: 17.28, Grad Norm: 0.0509 +[2025-02-23 00:39:52] (step=0154900) Train Loss: 0.3280, Train Steps/Sec: 17.30, Grad Norm: 0.0475 +[2025-02-23 00:39:58] (step=0155000) Train Loss: 0.3276, Train Steps/Sec: 17.27, Grad Norm: 0.0477 +[2025-02-23 00:40:03] (step=0155100) Train Loss: 0.3280, Train Steps/Sec: 17.22, Grad Norm: 0.0547 +[2025-02-23 00:40:10] (step=0155200) Train Loss: 0.3278, Train Steps/Sec: 14.47, Grad Norm: 0.0496 +[2025-02-23 00:40:16] (step=0155300) Train Loss: 0.3274, Train Steps/Sec: 17.27, Grad Norm: 0.0548 +[2025-02-23 00:40:22] (step=0155400) Train Loss: 0.3279, Train Steps/Sec: 16.56, Grad Norm: 0.0505 +[2025-02-23 00:40:29] (step=0155500) Train Loss: 0.3276, Train Steps/Sec: 14.60, Grad Norm: 0.0510 +[2025-02-23 00:40:35] (step=0155600) Train Loss: 0.3278, Train Steps/Sec: 15.86, Grad Norm: 0.0512 +[2025-02-23 00:40:41] (step=0155700) Train Loss: 0.3283, Train Steps/Sec: 17.33, Grad Norm: 0.0488 +[2025-02-23 00:40:47] (step=0155800) Train Loss: 0.3279, Train Steps/Sec: 17.32, Grad Norm: 0.0508 +[2025-02-23 00:40:53] (step=0155900) Train Loss: 0.3278, Train Steps/Sec: 15.20, Grad Norm: 0.0457 +[2025-02-23 00:41:01] (step=0156000) Train Loss: 0.3274, Train Steps/Sec: 14.04, Grad Norm: 0.0468 +[2025-02-23 00:41:07] (step=0156100) Train Loss: 0.3276, Train Steps/Sec: 16.44, Grad Norm: 0.0478 +[2025-02-23 00:41:12] (step=0156200) Train Loss: 0.3277, Train Steps/Sec: 17.26, Grad Norm: 0.0484 +[2025-02-23 00:41:18] (step=0156300) Train Loss: 0.3277, Train Steps/Sec: 17.27, Grad Norm: 0.0511 +[2025-02-23 00:41:25] (step=0156400) Train Loss: 0.3282, Train Steps/Sec: 14.34, Grad Norm: 0.0508 +[2025-02-23 00:41:31] (step=0156500) Train Loss: 0.3272, Train Steps/Sec: 17.33, Grad Norm: 0.0474 +[2025-02-23 00:41:37] (step=0156600) Train Loss: 0.3282, Train Steps/Sec: 17.31, Grad Norm: 0.0475 +[2025-02-23 00:41:42] (step=0156700) Train Loss: 0.3274, Train Steps/Sec: 17.37, Grad Norm: 0.0475 +[2025-02-23 00:41:48] (step=0156800) Train Loss: 0.3277, Train Steps/Sec: 17.42, Grad Norm: 0.0499 +[2025-02-23 00:41:54] (step=0156900) Train Loss: 0.3284, Train Steps/Sec: 17.36, Grad Norm: 0.0480 +[2025-02-23 00:42:00] (step=0157000) Train Loss: 0.3274, Train Steps/Sec: 17.38, Grad Norm: 0.0503 +[2025-02-23 00:42:05] (step=0157100) Train Loss: 0.3278, Train Steps/Sec: 17.37, Grad Norm: 0.0465 +[2025-02-23 00:42:11] (step=0157200) Train Loss: 0.3276, Train Steps/Sec: 17.40, Grad Norm: 0.0502 +[2025-02-23 00:42:17] (step=0157300) Train Loss: 0.3275, Train Steps/Sec: 17.40, Grad Norm: 0.0536 +[2025-02-23 00:42:24] (step=0157400) Train Loss: 0.3281, Train Steps/Sec: 14.73, Grad Norm: 0.0504 +[2025-02-23 00:42:30] (step=0157500) Train Loss: 0.3276, Train Steps/Sec: 15.90, Grad Norm: 0.0458 +[2025-02-23 00:42:36] (step=0157600) Train Loss: 0.3269, Train Steps/Sec: 16.47, Grad Norm: 0.0489 +[2025-02-23 00:42:43] (step=0157700) Train Loss: 0.3277, Train Steps/Sec: 14.52, Grad Norm: 0.0480 +[2025-02-23 00:42:49] (step=0157800) Train Loss: 0.3268, Train Steps/Sec: 17.38, Grad Norm: 0.0519 +[2025-02-23 00:42:56] (step=0157900) Train Loss: 0.3276, Train Steps/Sec: 14.66, Grad Norm: 0.0468 +[2025-02-23 00:43:02] (step=0158000) Train Loss: 0.3282, Train Steps/Sec: 14.55, Grad Norm: 0.0487 +[2025-02-23 00:43:09] (step=0158100) Train Loss: 0.3279, Train Steps/Sec: 16.57, Grad Norm: 0.0505 +[2025-02-23 00:43:14] (step=0158200) Train Loss: 0.3279, Train Steps/Sec: 17.38, Grad Norm: 0.0452 +[2025-02-23 00:43:20] (step=0158300) Train Loss: 0.3275, Train Steps/Sec: 17.39, Grad Norm: 0.0536 +[2025-02-23 00:43:26] (step=0158400) Train Loss: 0.3281, Train Steps/Sec: 17.37, Grad Norm: 0.0464 +[2025-02-23 00:43:32] (step=0158500) Train Loss: 0.3277, Train Steps/Sec: 17.36, Grad Norm: 0.0479 +[2025-02-23 00:43:37] (step=0158600) Train Loss: 0.3278, Train Steps/Sec: 17.38, Grad Norm: 0.0476 +[2025-02-23 00:43:43] (step=0158700) Train Loss: 0.3270, Train Steps/Sec: 17.40, Grad Norm: 0.0491 +[2025-02-23 00:43:49] (step=0158800) Train Loss: 0.3272, Train Steps/Sec: 17.30, Grad Norm: 0.0495 +[2025-02-23 00:43:56] (step=0158900) Train Loss: 0.3279, Train Steps/Sec: 14.32, Grad Norm: 0.0525 +[2025-02-23 00:44:02] (step=0159000) Train Loss: 0.3273, Train Steps/Sec: 17.25, Grad Norm: 0.0448 +[2025-02-23 00:44:07] (step=0159100) Train Loss: 0.3276, Train Steps/Sec: 17.35, Grad Norm: 0.0461 +[2025-02-23 00:44:13] (step=0159200) Train Loss: 0.3282, Train Steps/Sec: 17.36, Grad Norm: 0.0545 +[2025-02-23 00:44:19] (step=0159300) Train Loss: 0.3279, Train Steps/Sec: 15.92, Grad Norm: 0.0473 +[2025-02-23 00:44:26] (step=0159400) Train Loss: 0.3275, Train Steps/Sec: 15.29, Grad Norm: 0.0495 +[2025-02-23 00:44:32] (step=0159500) Train Loss: 0.3277, Train Steps/Sec: 15.90, Grad Norm: 0.0491 +[2025-02-23 00:44:38] (step=0159600) Train Loss: 0.3277, Train Steps/Sec: 17.44, Grad Norm: 0.0487 +[2025-02-23 00:44:44] (step=0159700) Train Loss: 0.3279, Train Steps/Sec: 17.37, Grad Norm: 0.0460 +[2025-02-23 00:44:50] (step=0159800) Train Loss: 0.3275, Train Steps/Sec: 17.34, Grad Norm: 0.0475 +[2025-02-23 00:44:56] (step=0159900) Train Loss: 0.3279, Train Steps/Sec: 14.64, Grad Norm: 0.0501 +[2025-02-23 00:45:03] (step=0160000) Train Loss: 0.3278, Train Steps/Sec: 14.60, Grad Norm: 0.0454 +[2025-02-23 00:45:09] (step=0160100) Train Loss: 0.3275, Train Steps/Sec: 16.46, Grad Norm: 0.0492 +[2025-02-23 00:45:16] (step=0160200) Train Loss: 0.3277, Train Steps/Sec: 14.43, Grad Norm: 0.0466 +[2025-02-23 00:45:22] (step=0160300) Train Loss: 0.3276, Train Steps/Sec: 17.33, Grad Norm: 0.0468 +[2025-02-23 00:45:28] (step=0160400) Train Loss: 0.3275, Train Steps/Sec: 17.38, Grad Norm: 0.0494 +[2025-02-23 00:45:33] (step=0160500) Train Loss: 0.3283, Train Steps/Sec: 17.39, Grad Norm: 0.0520 +[2025-02-23 00:45:39] (step=0160600) Train Loss: 0.3283, Train Steps/Sec: 17.36, Grad Norm: 0.0499 +[2025-02-23 00:45:45] (step=0160700) Train Loss: 0.3281, Train Steps/Sec: 17.31, Grad Norm: 0.0503 +[2025-02-23 00:45:51] (step=0160800) Train Loss: 0.3277, Train Steps/Sec: 17.25, Grad Norm: 0.0512 +[2025-02-23 00:45:57] (step=0160900) Train Loss: 0.3275, Train Steps/Sec: 17.30, Grad Norm: 0.0493 +[2025-02-23 00:46:02] (step=0161000) Train Loss: 0.3283, Train Steps/Sec: 17.22, Grad Norm: 0.0471 +[2025-02-23 00:46:08] (step=0161100) Train Loss: 0.3281, Train Steps/Sec: 17.20, Grad Norm: 0.0521 +[2025-02-23 00:46:14] (step=0161200) Train Loss: 0.3273, Train Steps/Sec: 17.25, Grad Norm: 0.0487 +[2025-02-23 00:46:21] (step=0161300) Train Loss: 0.3278, Train Steps/Sec: 14.19, Grad Norm: 0.0506 +[2025-02-23 00:46:29] (step=0161400) Train Loss: 0.3283, Train Steps/Sec: 13.35, Grad Norm: 0.0504 +[2025-02-23 00:46:34] (step=0161500) Train Loss: 0.3277, Train Steps/Sec: 17.38, Grad Norm: 0.0443 +[2025-02-23 00:46:40] (step=0161600) Train Loss: 0.3276, Train Steps/Sec: 17.35, Grad Norm: 0.0501 +[2025-02-23 00:46:46] (step=0161700) Train Loss: 0.3274, Train Steps/Sec: 17.37, Grad Norm: 0.0509 +[2025-02-23 00:46:52] (step=0161800) Train Loss: 0.3281, Train Steps/Sec: 16.69, Grad Norm: 0.0495 +[2025-02-23 00:46:58] (step=0161900) Train Loss: 0.3275, Train Steps/Sec: 15.28, Grad Norm: 0.0450 +[2025-02-23 00:47:05] (step=0162000) Train Loss: 0.3271, Train Steps/Sec: 14.61, Grad Norm: 0.0478 +[2025-02-23 00:47:11] (step=0162100) Train Loss: 0.3276, Train Steps/Sec: 16.57, Grad Norm: 0.0519 +[2025-02-23 00:47:17] (step=0162200) Train Loss: 0.3277, Train Steps/Sec: 17.38, Grad Norm: 0.0477 +[2025-02-23 00:47:23] (step=0162300) Train Loss: 0.3280, Train Steps/Sec: 17.41, Grad Norm: 0.0487 +[2025-02-23 00:47:29] (step=0162400) Train Loss: 0.3269, Train Steps/Sec: 17.36, Grad Norm: 0.0479 +[2025-02-23 00:47:34] (step=0162500) Train Loss: 0.3283, Train Steps/Sec: 17.40, Grad Norm: 0.0560 +[2025-02-23 00:47:40] (step=0162600) Train Loss: 0.3275, Train Steps/Sec: 17.30, Grad Norm: 0.0513 +[2025-02-23 00:47:47] (step=0162700) Train Loss: 0.3273, Train Steps/Sec: 14.38, Grad Norm: 0.0494 +[2025-02-23 00:47:53] (step=0162800) Train Loss: 0.3272, Train Steps/Sec: 17.34, Grad Norm: 0.0495 +[2025-02-23 00:47:59] (step=0162900) Train Loss: 0.3274, Train Steps/Sec: 17.32, Grad Norm: 0.0461 +[2025-02-23 00:48:04] (step=0163000) Train Loss: 0.3280, Train Steps/Sec: 17.37, Grad Norm: 0.0467 +[2025-02-23 00:48:10] (step=0163100) Train Loss: 0.3274, Train Steps/Sec: 17.33, Grad Norm: 0.0487 +[2025-02-23 00:48:17] (step=0163200) Train Loss: 0.3272, Train Steps/Sec: 15.30, Grad Norm: 0.0455 +[2025-02-23 00:48:23] (step=0163300) Train Loss: 0.3272, Train Steps/Sec: 15.98, Grad Norm: 0.0457 +[2025-02-23 00:48:29] (step=0163400) Train Loss: 0.3273, Train Steps/Sec: 15.94, Grad Norm: 0.0516 +[2025-02-23 00:48:35] (step=0163500) Train Loss: 0.3280, Train Steps/Sec: 17.41, Grad Norm: 0.0478 +[2025-02-23 00:48:41] (step=0163600) Train Loss: 0.3279, Train Steps/Sec: 17.37, Grad Norm: 0.0487 +[2025-02-23 00:48:46] (step=0163700) Train Loss: 0.3271, Train Steps/Sec: 17.26, Grad Norm: 0.0478 +[2025-02-23 00:48:53] (step=0163800) Train Loss: 0.3275, Train Steps/Sec: 16.55, Grad Norm: 0.0488 +[2025-02-23 00:49:00] (step=0163900) Train Loss: 0.3277, Train Steps/Sec: 12.94, Grad Norm: 0.0512 +[2025-02-23 00:49:07] (step=0164000) Train Loss: 0.3277, Train Steps/Sec: 14.59, Grad Norm: 0.0511 +[2025-02-23 00:49:13] (step=0164100) Train Loss: 0.3271, Train Steps/Sec: 16.43, Grad Norm: 0.0446 +[2025-02-23 00:49:19] (step=0164200) Train Loss: 0.3279, Train Steps/Sec: 17.25, Grad Norm: 0.0471 +[2025-02-23 00:49:25] (step=0164300) Train Loss: 0.3275, Train Steps/Sec: 17.27, Grad Norm: 0.0476 +[2025-02-23 00:49:31] (step=0164400) Train Loss: 0.3275, Train Steps/Sec: 17.30, Grad Norm: 0.0477 +[2025-02-23 00:49:36] (step=0164500) Train Loss: 0.3274, Train Steps/Sec: 17.35, Grad Norm: 0.0501 +[2025-02-23 00:49:42] (step=0164600) Train Loss: 0.3274, Train Steps/Sec: 17.42, Grad Norm: 0.0439 +[2025-02-23 00:49:48] (step=0164700) Train Loss: 0.3268, Train Steps/Sec: 17.37, Grad Norm: 0.0487 +[2025-02-23 00:49:54] (step=0164800) Train Loss: 0.3279, Train Steps/Sec: 17.29, Grad Norm: 0.0484 +[2025-02-23 00:49:59] (step=0164900) Train Loss: 0.3270, Train Steps/Sec: 17.38, Grad Norm: 0.0520 +[2025-02-23 00:50:05] (step=0165000) Train Loss: 0.3271, Train Steps/Sec: 17.40, Grad Norm: 0.0463 +[2025-02-23 00:50:11] (step=0165100) Train Loss: 0.3275, Train Steps/Sec: 17.27, Grad Norm: 0.0449 +[2025-02-23 00:50:19] (step=0165200) Train Loss: 0.3275, Train Steps/Sec: 12.14, Grad Norm: 0.0446 +[2025-02-23 00:50:26] (step=0165300) Train Loss: 0.3274, Train Steps/Sec: 15.80, Grad Norm: 0.0487 +[2025-02-23 00:50:31] (step=0165400) Train Loss: 0.3266, Train Steps/Sec: 17.32, Grad Norm: 0.0498 +[2025-02-23 00:50:37] (step=0165500) Train Loss: 0.3277, Train Steps/Sec: 17.39, Grad Norm: 0.0466 +[2025-02-23 00:50:43] (step=0165600) Train Loss: 0.3267, Train Steps/Sec: 17.37, Grad Norm: 0.0461 +[2025-02-23 00:50:49] (step=0165700) Train Loss: 0.3277, Train Steps/Sec: 17.34, Grad Norm: 0.0540 +[2025-02-23 00:50:55] (step=0165800) Train Loss: 0.3274, Train Steps/Sec: 16.70, Grad Norm: 0.0444 +[2025-02-23 00:51:01] (step=0165900) Train Loss: 0.3276, Train Steps/Sec: 15.22, Grad Norm: 0.0543 +[2025-02-23 00:51:08] (step=0166000) Train Loss: 0.3280, Train Steps/Sec: 14.61, Grad Norm: 0.0478 +[2025-02-23 00:51:14] (step=0166100) Train Loss: 0.3279, Train Steps/Sec: 17.35, Grad Norm: 0.0463 +[2025-02-23 00:51:20] (step=0166200) Train Loss: 0.3278, Train Steps/Sec: 16.56, Grad Norm: 0.0469 +[2025-02-23 00:51:26] (step=0166300) Train Loss: 0.3272, Train Steps/Sec: 17.40, Grad Norm: 0.0455 +[2025-02-23 00:51:33] (step=0166400) Train Loss: 0.3269, Train Steps/Sec: 14.28, Grad Norm: 0.0527 +[2025-02-23 00:51:38] (step=0166500) Train Loss: 0.3273, Train Steps/Sec: 17.18, Grad Norm: 0.0447 +[2025-02-23 00:51:44] (step=0166600) Train Loss: 0.3273, Train Steps/Sec: 17.21, Grad Norm: 0.0452 +[2025-02-23 00:51:50] (step=0166700) Train Loss: 0.3273, Train Steps/Sec: 17.22, Grad Norm: 0.0473 +[2025-02-23 00:51:56] (step=0166800) Train Loss: 0.3277, Train Steps/Sec: 17.23, Grad Norm: 0.0492 +[2025-02-23 00:52:02] (step=0166900) Train Loss: 0.3268, Train Steps/Sec: 17.20, Grad Norm: 0.0511 +[2025-02-23 00:52:07] (step=0167000) Train Loss: 0.3279, Train Steps/Sec: 17.19, Grad Norm: 0.0498 +[2025-02-23 00:52:14] (step=0167100) Train Loss: 0.3273, Train Steps/Sec: 15.83, Grad Norm: 0.0454 +[2025-02-23 00:52:20] (step=0167200) Train Loss: 0.3268, Train Steps/Sec: 15.79, Grad Norm: 0.0507 +[2025-02-23 00:52:26] (step=0167300) Train Loss: 0.3276, Train Steps/Sec: 15.88, Grad Norm: 0.0488 +[2025-02-23 00:52:32] (step=0167400) Train Loss: 0.3274, Train Steps/Sec: 17.29, Grad Norm: 0.0541 +[2025-02-23 00:52:38] (step=0167500) Train Loss: 0.3276, Train Steps/Sec: 17.37, Grad Norm: 0.0462 +[2025-02-23 00:52:44] (step=0167600) Train Loss: 0.3277, Train Steps/Sec: 17.39, Grad Norm: 0.0456 +[2025-02-23 00:52:51] (step=0167700) Train Loss: 0.3278, Train Steps/Sec: 13.90, Grad Norm: 0.0445 +[2025-02-23 00:52:57] (step=0167800) Train Loss: 0.3270, Train Steps/Sec: 17.37, Grad Norm: 0.0488 +[2025-02-23 00:53:03] (step=0167900) Train Loss: 0.3268, Train Steps/Sec: 15.24, Grad Norm: 0.0512 +[2025-02-23 00:53:10] (step=0168000) Train Loss: 0.3278, Train Steps/Sec: 14.61, Grad Norm: 0.0512 +[2025-02-23 00:53:16] (step=0168100) Train Loss: 0.3272, Train Steps/Sec: 17.27, Grad Norm: 0.0485 +[2025-02-23 00:53:22] (step=0168200) Train Loss: 0.3274, Train Steps/Sec: 16.55, Grad Norm: 0.0450 +[2025-02-23 00:53:28] (step=0168300) Train Loss: 0.3273, Train Steps/Sec: 17.40, Grad Norm: 0.0510 +[2025-02-23 00:53:33] (step=0168400) Train Loss: 0.3274, Train Steps/Sec: 17.33, Grad Norm: 0.0463 +[2025-02-23 00:53:39] (step=0168500) Train Loss: 0.3277, Train Steps/Sec: 17.39, Grad Norm: 0.0449 +[2025-02-23 00:53:45] (step=0168600) Train Loss: 0.3271, Train Steps/Sec: 17.37, Grad Norm: 0.0469 +[2025-02-23 00:53:51] (step=0168700) Train Loss: 0.3273, Train Steps/Sec: 17.32, Grad Norm: 0.0461 +[2025-02-23 00:53:56] (step=0168800) Train Loss: 0.3279, Train Steps/Sec: 17.29, Grad Norm: 0.0541 +[2025-02-23 00:54:03] (step=0168900) Train Loss: 0.3270, Train Steps/Sec: 14.29, Grad Norm: 0.0442 +[2025-02-23 00:54:10] (step=0169000) Train Loss: 0.3268, Train Steps/Sec: 16.61, Grad Norm: 0.0483 +[2025-02-23 00:54:16] (step=0169100) Train Loss: 0.3274, Train Steps/Sec: 14.64, Grad Norm: 0.0463 +[2025-02-23 00:54:23] (step=0169200) Train Loss: 0.3267, Train Steps/Sec: 15.87, Grad Norm: 0.0464 +[2025-02-23 00:54:28] (step=0169300) Train Loss: 0.3267, Train Steps/Sec: 17.32, Grad Norm: 0.0481 +[2025-02-23 00:54:34] (step=0169400) Train Loss: 0.3278, Train Steps/Sec: 17.32, Grad Norm: 0.0513 +[2025-02-23 00:54:40] (step=0169500) Train Loss: 0.3275, Train Steps/Sec: 17.22, Grad Norm: 0.0449 +[2025-02-23 00:54:46] (step=0169600) Train Loss: 0.3277, Train Steps/Sec: 16.47, Grad Norm: 0.0451 +[2025-02-23 00:54:52] (step=0169700) Train Loss: 0.3277, Train Steps/Sec: 17.21, Grad Norm: 0.0458 +[2025-02-23 00:54:58] (step=0169800) Train Loss: 0.3275, Train Steps/Sec: 17.27, Grad Norm: 0.0500 +[2025-02-23 00:55:04] (step=0169900) Train Loss: 0.3274, Train Steps/Sec: 15.75, Grad Norm: 0.0516 +[2025-02-23 00:55:11] (step=0170000) Train Loss: 0.3275, Train Steps/Sec: 14.02, Grad Norm: 0.0509 +[2025-02-23 00:55:17] (step=0170100) Train Loss: 0.3279, Train Steps/Sec: 17.26, Grad Norm: 0.0498 +[2025-02-23 00:55:24] (step=0170200) Train Loss: 0.3273, Train Steps/Sec: 13.89, Grad Norm: 0.0457 +[2025-02-23 00:55:30] (step=0170300) Train Loss: 0.3273, Train Steps/Sec: 17.37, Grad Norm: 0.0477 +[2025-02-23 00:55:36] (step=0170400) Train Loss: 0.3275, Train Steps/Sec: 17.39, Grad Norm: 0.0489 +[2025-02-23 00:55:41] (step=0170500) Train Loss: 0.3269, Train Steps/Sec: 17.39, Grad Norm: 0.0457 +[2025-02-23 00:55:47] (step=0170600) Train Loss: 0.3269, Train Steps/Sec: 17.42, Grad Norm: 0.0478 +[2025-02-23 00:55:53] (step=0170700) Train Loss: 0.3270, Train Steps/Sec: 17.40, Grad Norm: 0.0479 +[2025-02-23 00:55:59] (step=0170800) Train Loss: 0.3265, Train Steps/Sec: 17.41, Grad Norm: 0.0516 +[2025-02-23 00:56:04] (step=0170900) Train Loss: 0.3273, Train Steps/Sec: 17.39, Grad Norm: 0.0518 +[2025-02-23 00:56:11] (step=0171000) Train Loss: 0.3265, Train Steps/Sec: 15.91, Grad Norm: 0.0475 +[2025-02-23 00:56:18] (step=0171100) Train Loss: 0.3279, Train Steps/Sec: 14.69, Grad Norm: 0.0446 +[2025-02-23 00:56:24] (step=0171200) Train Loss: 0.3273, Train Steps/Sec: 16.51, Grad Norm: 0.0473 +[2025-02-23 00:56:29] (step=0171300) Train Loss: 0.3277, Train Steps/Sec: 17.33, Grad Norm: 0.0457 +[2025-02-23 00:56:36] (step=0171400) Train Loss: 0.3269, Train Steps/Sec: 14.27, Grad Norm: 0.0438 +[2025-02-23 00:56:42] (step=0171500) Train Loss: 0.3272, Train Steps/Sec: 17.30, Grad Norm: 0.0500 +[2025-02-23 00:56:48] (step=0171600) Train Loss: 0.3272, Train Steps/Sec: 16.65, Grad Norm: 0.0481 +[2025-02-23 00:56:54] (step=0171700) Train Loss: 0.3276, Train Steps/Sec: 17.37, Grad Norm: 0.0454 +[2025-02-23 00:57:00] (step=0171800) Train Loss: 0.3272, Train Steps/Sec: 17.34, Grad Norm: 0.0445 +[2025-02-23 00:57:06] (step=0171900) Train Loss: 0.3265, Train Steps/Sec: 16.50, Grad Norm: 0.0463 +[2025-02-23 00:57:13] (step=0172000) Train Loss: 0.3279, Train Steps/Sec: 14.06, Grad Norm: 0.0482 +[2025-02-23 00:57:19] (step=0172100) Train Loss: 0.3271, Train Steps/Sec: 17.32, Grad Norm: 0.0517 +[2025-02-23 00:57:25] (step=0172200) Train Loss: 0.3275, Train Steps/Sec: 16.57, Grad Norm: 0.0442 +[2025-02-23 00:57:30] (step=0172300) Train Loss: 0.3272, Train Steps/Sec: 17.38, Grad Norm: 0.0471 +[2025-02-23 00:57:36] (step=0172400) Train Loss: 0.3272, Train Steps/Sec: 17.34, Grad Norm: 0.0503 +[2025-02-23 00:57:42] (step=0172500) Train Loss: 0.3270, Train Steps/Sec: 17.31, Grad Norm: 0.0506 +[2025-02-23 00:57:48] (step=0172600) Train Loss: 0.3275, Train Steps/Sec: 17.38, Grad Norm: 0.0502 +[2025-02-23 00:57:55] (step=0172700) Train Loss: 0.3270, Train Steps/Sec: 14.17, Grad Norm: 0.0436 +[2025-02-23 00:58:01] (step=0172800) Train Loss: 0.3274, Train Steps/Sec: 16.82, Grad Norm: 0.0469 +[2025-02-23 00:58:07] (step=0172900) Train Loss: 0.3268, Train Steps/Sec: 16.04, Grad Norm: 0.0481 +[2025-02-23 00:58:14] (step=0173000) Train Loss: 0.3272, Train Steps/Sec: 14.28, Grad Norm: 0.0486 +[2025-02-23 00:58:20] (step=0173100) Train Loss: 0.3269, Train Steps/Sec: 15.45, Grad Norm: 0.0487 +[2025-02-23 00:58:26] (step=0173200) Train Loss: 0.3275, Train Steps/Sec: 16.83, Grad Norm: 0.0435 +[2025-02-23 00:58:32] (step=0173300) Train Loss: 0.3273, Train Steps/Sec: 16.79, Grad Norm: 0.0458 +[2025-02-23 00:58:38] (step=0173400) Train Loss: 0.3275, Train Steps/Sec: 16.95, Grad Norm: 0.0499 +[2025-02-23 00:58:44] (step=0173500) Train Loss: 0.3273, Train Steps/Sec: 16.32, Grad Norm: 0.0477 +[2025-02-23 00:58:50] (step=0173600) Train Loss: 0.3269, Train Steps/Sec: 17.02, Grad Norm: 0.0449 +[2025-02-23 00:58:56] (step=0173700) Train Loss: 0.3271, Train Steps/Sec: 17.06, Grad Norm: 0.0479 +[2025-02-23 00:59:02] (step=0173800) Train Loss: 0.3270, Train Steps/Sec: 17.04, Grad Norm: 0.0506 +[2025-02-23 00:59:10] (step=0173900) Train Loss: 0.3281, Train Steps/Sec: 13.19, Grad Norm: 0.0518 +[2025-02-23 00:59:17] (step=0174000) Train Loss: 0.3272, Train Steps/Sec: 13.86, Grad Norm: 0.0445 +[2025-02-23 00:59:23] (step=0174100) Train Loss: 0.3266, Train Steps/Sec: 17.07, Grad Norm: 0.0441 +[2025-02-23 00:59:29] (step=0174200) Train Loss: 0.3274, Train Steps/Sec: 16.29, Grad Norm: 0.0468 +[2025-02-23 00:59:35] (step=0174300) Train Loss: 0.3269, Train Steps/Sec: 17.07, Grad Norm: 0.0465 +[2025-02-23 00:59:41] (step=0174400) Train Loss: 0.3274, Train Steps/Sec: 17.08, Grad Norm: 0.0470 +[2025-02-23 00:59:46] (step=0174500) Train Loss: 0.3270, Train Steps/Sec: 17.04, Grad Norm: 0.0496 +[2025-02-23 00:59:52] (step=0174600) Train Loss: 0.3277, Train Steps/Sec: 17.06, Grad Norm: 0.0470 +[2025-02-23 00:59:58] (step=0174700) Train Loss: 0.3269, Train Steps/Sec: 17.07, Grad Norm: 0.0489 +[2025-02-23 01:00:04] (step=0174800) Train Loss: 0.3268, Train Steps/Sec: 17.04, Grad Norm: 0.0449 +[2025-02-23 01:00:10] (step=0174900) Train Loss: 0.3272, Train Steps/Sec: 15.60, Grad Norm: 0.0468 +[2025-02-23 01:00:17] (step=0175000) Train Loss: 0.3272, Train Steps/Sec: 14.63, Grad Norm: 0.0501 +[2025-02-23 01:00:23] (step=0175100) Train Loss: 0.3275, Train Steps/Sec: 16.48, Grad Norm: 0.0443 +[2025-02-23 01:00:30] (step=0175200) Train Loss: 0.3270, Train Steps/Sec: 14.04, Grad Norm: 0.0489 +[2025-02-23 01:00:36] (step=0175300) Train Loss: 0.3274, Train Steps/Sec: 17.31, Grad Norm: 0.0455 +[2025-02-23 01:00:42] (step=0175400) Train Loss: 0.3273, Train Steps/Sec: 17.31, Grad Norm: 0.0463 +[2025-02-23 01:00:48] (step=0175500) Train Loss: 0.3271, Train Steps/Sec: 16.58, Grad Norm: 0.0429 +[2025-02-23 01:00:54] (step=0175600) Train Loss: 0.3268, Train Steps/Sec: 17.37, Grad Norm: 0.0456 +[2025-02-23 01:01:00] (step=0175700) Train Loss: 0.3271, Train Steps/Sec: 17.29, Grad Norm: 0.0498 +[2025-02-23 01:01:05] (step=0175800) Train Loss: 0.3270, Train Steps/Sec: 17.25, Grad Norm: 0.0442 +[2025-02-23 01:01:12] (step=0175900) Train Loss: 0.3276, Train Steps/Sec: 15.85, Grad Norm: 0.0424 +[2025-02-23 01:01:19] (step=0176000) Train Loss: 0.3274, Train Steps/Sec: 14.02, Grad Norm: 0.0496 +[2025-02-23 01:01:25] (step=0176100) Train Loss: 0.3274, Train Steps/Sec: 17.26, Grad Norm: 0.0452 +[2025-02-23 01:01:31] (step=0176200) Train Loss: 0.3271, Train Steps/Sec: 16.48, Grad Norm: 0.0463 +[2025-02-23 01:01:37] (step=0176300) Train Loss: 0.3275, Train Steps/Sec: 17.23, Grad Norm: 0.0420 +[2025-02-23 01:01:44] (step=0176400) Train Loss: 0.3269, Train Steps/Sec: 14.21, Grad Norm: 0.0454 +[2025-02-23 01:01:49] (step=0176500) Train Loss: 0.3266, Train Steps/Sec: 17.17, Grad Norm: 0.0457 +[2025-02-23 01:01:55] (step=0176600) Train Loss: 0.3269, Train Steps/Sec: 17.26, Grad Norm: 0.0472 +[2025-02-23 01:02:01] (step=0176700) Train Loss: 0.3269, Train Steps/Sec: 17.23, Grad Norm: 0.0458 +[2025-02-23 01:02:07] (step=0176800) Train Loss: 0.3273, Train Steps/Sec: 16.44, Grad Norm: 0.0439 +[2025-02-23 01:02:14] (step=0176900) Train Loss: 0.3265, Train Steps/Sec: 14.65, Grad Norm: 0.0480 +[2025-02-23 01:02:20] (step=0177000) Train Loss: 0.3281, Train Steps/Sec: 15.83, Grad Norm: 0.0493 +[2025-02-23 01:02:26] (step=0177100) Train Loss: 0.3272, Train Steps/Sec: 17.27, Grad Norm: 0.0455 +[2025-02-23 01:02:32] (step=0177200) Train Loss: 0.3269, Train Steps/Sec: 17.29, Grad Norm: 0.0428 +[2025-02-23 01:02:38] (step=0177300) Train Loss: 0.3271, Train Steps/Sec: 17.25, Grad Norm: 0.0506 +[2025-02-23 01:02:44] (step=0177400) Train Loss: 0.3274, Train Steps/Sec: 16.57, Grad Norm: 0.0504 +[2025-02-23 01:02:49] (step=0177500) Train Loss: 0.3276, Train Steps/Sec: 17.36, Grad Norm: 0.0464 +[2025-02-23 01:02:55] (step=0177600) Train Loss: 0.3271, Train Steps/Sec: 17.42, Grad Norm: 0.0458 +[2025-02-23 01:03:02] (step=0177700) Train Loss: 0.3273, Train Steps/Sec: 14.06, Grad Norm: 0.0464 +[2025-02-23 01:03:08] (step=0177800) Train Loss: 0.3273, Train Steps/Sec: 17.10, Grad Norm: 0.0516 +[2025-02-23 01:03:14] (step=0177900) Train Loss: 0.3269, Train Steps/Sec: 15.70, Grad Norm: 0.0421 +[2025-02-23 01:03:21] (step=0178000) Train Loss: 0.3272, Train Steps/Sec: 14.54, Grad Norm: 0.0463 +[2025-02-23 01:03:27] (step=0178100) Train Loss: 0.3266, Train Steps/Sec: 16.62, Grad Norm: 0.0513 +[2025-02-23 01:03:33] (step=0178200) Train Loss: 0.3268, Train Steps/Sec: 16.53, Grad Norm: 0.0487 +[2025-02-23 01:03:39] (step=0178300) Train Loss: 0.3271, Train Steps/Sec: 17.28, Grad Norm: 0.0489 +[2025-02-23 01:03:45] (step=0178400) Train Loss: 0.3265, Train Steps/Sec: 17.39, Grad Norm: 0.0482 +[2025-02-23 01:03:51] (step=0178500) Train Loss: 0.3274, Train Steps/Sec: 17.27, Grad Norm: 0.0420 +[2025-02-23 01:03:57] (step=0178600) Train Loss: 0.3269, Train Steps/Sec: 17.31, Grad Norm: 0.0446 +[2025-02-23 01:04:02] (step=0178700) Train Loss: 0.3269, Train Steps/Sec: 17.34, Grad Norm: 0.0447 +[2025-02-23 01:04:09] (step=0178800) Train Loss: 0.3268, Train Steps/Sec: 15.25, Grad Norm: 0.0510 +[2025-02-23 01:04:17] (step=0178900) Train Loss: 0.3266, Train Steps/Sec: 12.91, Grad Norm: 0.0473 +[2025-02-23 01:04:23] (step=0179000) Train Loss: 0.3268, Train Steps/Sec: 16.48, Grad Norm: 0.0480 +[2025-02-23 01:04:28] (step=0179100) Train Loss: 0.3269, Train Steps/Sec: 17.26, Grad Norm: 0.0443 +[2025-02-23 01:04:34] (step=0179200) Train Loss: 0.3274, Train Steps/Sec: 17.28, Grad Norm: 0.0476 +[2025-02-23 01:04:40] (step=0179300) Train Loss: 0.3278, Train Steps/Sec: 17.24, Grad Norm: 0.0460 +[2025-02-23 01:04:46] (step=0179400) Train Loss: 0.3261, Train Steps/Sec: 16.61, Grad Norm: 0.0458 +[2025-02-23 01:04:52] (step=0179500) Train Loss: 0.3269, Train Steps/Sec: 17.32, Grad Norm: 0.0492 +[2025-02-23 01:04:58] (step=0179600) Train Loss: 0.3274, Train Steps/Sec: 17.36, Grad Norm: 0.0454 +[2025-02-23 01:05:03] (step=0179700) Train Loss: 0.3271, Train Steps/Sec: 17.34, Grad Norm: 0.0465 +[2025-02-23 01:05:09] (step=0179800) Train Loss: 0.3274, Train Steps/Sec: 17.37, Grad Norm: 0.0463 +[2025-02-23 01:05:15] (step=0179900) Train Loss: 0.3266, Train Steps/Sec: 15.86, Grad Norm: 0.0446 +[2025-02-23 01:05:22] (step=0180000) Train Loss: 0.3280, Train Steps/Sec: 15.21, Grad Norm: 0.0465 +[2025-02-23 01:05:28] (step=0180100) Train Loss: 0.3271, Train Steps/Sec: 15.88, Grad Norm: 0.0418 +[2025-02-23 01:05:36] (step=0180200) Train Loss: 0.3269, Train Steps/Sec: 13.76, Grad Norm: 0.0508 +[2025-02-23 01:05:41] (step=0180300) Train Loss: 0.3267, Train Steps/Sec: 17.38, Grad Norm: 0.0475 +[2025-02-23 01:05:47] (step=0180400) Train Loss: 0.3265, Train Steps/Sec: 17.32, Grad Norm: 0.0487 +[2025-02-23 01:05:53] (step=0180500) Train Loss: 0.3272, Train Steps/Sec: 17.34, Grad Norm: 0.0490 +[2025-02-23 01:05:59] (step=0180600) Train Loss: 0.3270, Train Steps/Sec: 17.27, Grad Norm: 0.0413 +[2025-02-23 01:06:05] (step=0180700) Train Loss: 0.3274, Train Steps/Sec: 16.55, Grad Norm: 0.0470 +[2025-02-23 01:06:12] (step=0180800) Train Loss: 0.3266, Train Steps/Sec: 14.67, Grad Norm: 0.0451 +[2025-02-23 01:06:18] (step=0180900) Train Loss: 0.3267, Train Steps/Sec: 15.99, Grad Norm: 0.0479 +[2025-02-23 01:06:24] (step=0181000) Train Loss: 0.3271, Train Steps/Sec: 17.39, Grad Norm: 0.0454 +[2025-02-23 01:06:29] (step=0181100) Train Loss: 0.3272, Train Steps/Sec: 17.27, Grad Norm: 0.0439 +[2025-02-23 01:06:35] (step=0181200) Train Loss: 0.3275, Train Steps/Sec: 17.38, Grad Norm: 0.0429 +[2025-02-23 01:06:41] (step=0181300) Train Loss: 0.3267, Train Steps/Sec: 17.39, Grad Norm: 0.0476 +[2025-02-23 01:06:48] (step=0181400) Train Loss: 0.3262, Train Steps/Sec: 13.87, Grad Norm: 0.0454 +[2025-02-23 01:06:54] (step=0181500) Train Loss: 0.3270, Train Steps/Sec: 17.25, Grad Norm: 0.0442 +[2025-02-23 01:07:00] (step=0181600) Train Loss: 0.3272, Train Steps/Sec: 17.34, Grad Norm: 0.0447 +[2025-02-23 01:07:05] (step=0181700) Train Loss: 0.3268, Train Steps/Sec: 17.27, Grad Norm: 0.0447 +[2025-02-23 01:07:11] (step=0181800) Train Loss: 0.3275, Train Steps/Sec: 17.25, Grad Norm: 0.0452 +[2025-02-23 01:07:18] (step=0181900) Train Loss: 0.3277, Train Steps/Sec: 15.70, Grad Norm: 0.0466 +[2025-02-23 01:07:24] (step=0182000) Train Loss: 0.3270, Train Steps/Sec: 14.87, Grad Norm: 0.0486 +[2025-02-23 01:07:31] (step=0182100) Train Loss: 0.3265, Train Steps/Sec: 15.75, Grad Norm: 0.0468 +[2025-02-23 01:07:37] (step=0182200) Train Loss: 0.3263, Train Steps/Sec: 16.53, Grad Norm: 0.0447 +[2025-02-23 01:07:43] (step=0182300) Train Loss: 0.3273, Train Steps/Sec: 17.29, Grad Norm: 0.0445 +[2025-02-23 01:07:48] (step=0182400) Train Loss: 0.3275, Train Steps/Sec: 17.33, Grad Norm: 0.0484 +[2025-02-23 01:07:54] (step=0182500) Train Loss: 0.3270, Train Steps/Sec: 17.33, Grad Norm: 0.0433 +[2025-02-23 01:08:00] (step=0182600) Train Loss: 0.3269, Train Steps/Sec: 17.28, Grad Norm: 0.0487 +[2025-02-23 01:08:08] (step=0182700) Train Loss: 0.3267, Train Steps/Sec: 11.94, Grad Norm: 0.0459 +[2025-02-23 01:08:14] (step=0182800) Train Loss: 0.3273, Train Steps/Sec: 16.49, Grad Norm: 0.0465 +[2025-02-23 01:08:20] (step=0182900) Train Loss: 0.3271, Train Steps/Sec: 16.51, Grad Norm: 0.0450 +[2025-02-23 01:08:26] (step=0183000) Train Loss: 0.3271, Train Steps/Sec: 17.22, Grad Norm: 0.0455 +[2025-02-23 01:08:32] (step=0183100) Train Loss: 0.3271, Train Steps/Sec: 17.22, Grad Norm: 0.0455 +[2025-02-23 01:08:38] (step=0183200) Train Loss: 0.3267, Train Steps/Sec: 17.19, Grad Norm: 0.0472 +[2025-02-23 01:08:44] (step=0183300) Train Loss: 0.3267, Train Steps/Sec: 16.51, Grad Norm: 0.0468 +[2025-02-23 01:08:50] (step=0183400) Train Loss: 0.3269, Train Steps/Sec: 17.26, Grad Norm: 0.0525 +[2025-02-23 01:08:55] (step=0183500) Train Loss: 0.3269, Train Steps/Sec: 17.34, Grad Norm: 0.0483 +[2025-02-23 01:09:01] (step=0183600) Train Loss: 0.3267, Train Steps/Sec: 17.42, Grad Norm: 0.0430 +[2025-02-23 01:09:07] (step=0183700) Train Loss: 0.3271, Train Steps/Sec: 17.36, Grad Norm: 0.0535 +[2025-02-23 01:09:13] (step=0183800) Train Loss: 0.3269, Train Steps/Sec: 17.09, Grad Norm: 0.0441 +[2025-02-23 01:09:20] (step=0183900) Train Loss: 0.3267, Train Steps/Sec: 13.75, Grad Norm: 0.0455 +[2025-02-23 01:09:27] (step=0184000) Train Loss: 0.3265, Train Steps/Sec: 13.81, Grad Norm: 0.0470 +[2025-02-23 01:09:33] (step=0184100) Train Loss: 0.3269, Train Steps/Sec: 16.28, Grad Norm: 0.0453 +[2025-02-23 01:09:40] (step=0184200) Train Loss: 0.3268, Train Steps/Sec: 16.25, Grad Norm: 0.0445 +[2025-02-23 01:09:45] (step=0184300) Train Loss: 0.3266, Train Steps/Sec: 17.03, Grad Norm: 0.0444 +[2025-02-23 01:09:51] (step=0184400) Train Loss: 0.3270, Train Steps/Sec: 17.04, Grad Norm: 0.0453 +[2025-02-23 01:09:57] (step=0184500) Train Loss: 0.3264, Train Steps/Sec: 17.05, Grad Norm: 0.0501 +[2025-02-23 01:10:03] (step=0184600) Train Loss: 0.3275, Train Steps/Sec: 16.30, Grad Norm: 0.0471 +[2025-02-23 01:10:10] (step=0184700) Train Loss: 0.3264, Train Steps/Sec: 14.58, Grad Norm: 0.0490 +[2025-02-23 01:10:17] (step=0184800) Train Loss: 0.3269, Train Steps/Sec: 15.87, Grad Norm: 0.0455 +[2025-02-23 01:10:22] (step=0184900) Train Loss: 0.3270, Train Steps/Sec: 17.31, Grad Norm: 0.0465 +[2025-02-23 01:10:28] (step=0185000) Train Loss: 0.3271, Train Steps/Sec: 17.26, Grad Norm: 0.0439 +[2025-02-23 01:10:34] (step=0185100) Train Loss: 0.3266, Train Steps/Sec: 17.30, Grad Norm: 0.0457 +[2025-02-23 01:10:41] (step=0185200) Train Loss: 0.3268, Train Steps/Sec: 14.30, Grad Norm: 0.0437 +[2025-02-23 01:10:47] (step=0185300) Train Loss: 0.3271, Train Steps/Sec: 16.63, Grad Norm: 0.0448 +[2025-02-23 01:10:53] (step=0185400) Train Loss: 0.3266, Train Steps/Sec: 17.33, Grad Norm: 0.0447 +[2025-02-23 01:10:58] (step=0185500) Train Loss: 0.3269, Train Steps/Sec: 17.27, Grad Norm: 0.0432 +[2025-02-23 01:11:04] (step=0185600) Train Loss: 0.3275, Train Steps/Sec: 17.09, Grad Norm: 0.0440 +[2025-02-23 01:11:10] (step=0185700) Train Loss: 0.3270, Train Steps/Sec: 17.11, Grad Norm: 0.0472 +[2025-02-23 01:11:16] (step=0185800) Train Loss: 0.3273, Train Steps/Sec: 17.12, Grad Norm: 0.0449 +[2025-02-23 01:11:22] (step=0185900) Train Loss: 0.3268, Train Steps/Sec: 16.35, Grad Norm: 0.0462 +[2025-02-23 01:11:29] (step=0186000) Train Loss: 0.3271, Train Steps/Sec: 14.45, Grad Norm: 0.0456 +[2025-02-23 01:11:35] (step=0186100) Train Loss: 0.3267, Train Steps/Sec: 15.71, Grad Norm: 0.0458 +[2025-02-23 01:11:42] (step=0186200) Train Loss: 0.3265, Train Steps/Sec: 16.38, Grad Norm: 0.0470 +[2025-02-23 01:11:47] (step=0186300) Train Loss: 0.3264, Train Steps/Sec: 17.17, Grad Norm: 0.0439 +[2025-02-23 01:11:54] (step=0186400) Train Loss: 0.3271, Train Steps/Sec: 14.20, Grad Norm: 0.0453 +[2025-02-23 01:12:00] (step=0186500) Train Loss: 0.3273, Train Steps/Sec: 17.08, Grad Norm: 0.0459 +[2025-02-23 01:12:07] (step=0186600) Train Loss: 0.3270, Train Steps/Sec: 13.97, Grad Norm: 0.0459 +[2025-02-23 01:12:13] (step=0186700) Train Loss: 0.3267, Train Steps/Sec: 16.49, Grad Norm: 0.0465 +[2025-02-23 01:12:20] (step=0186800) Train Loss: 0.3268, Train Steps/Sec: 16.59, Grad Norm: 0.0458 +[2025-02-23 01:12:25] (step=0186900) Train Loss: 0.3268, Train Steps/Sec: 17.37, Grad Norm: 0.0448 +[2025-02-23 01:12:31] (step=0187000) Train Loss: 0.3267, Train Steps/Sec: 17.36, Grad Norm: 0.0419 +[2025-02-23 01:12:37] (step=0187100) Train Loss: 0.3269, Train Steps/Sec: 17.40, Grad Norm: 0.0445 +[2025-02-23 01:12:43] (step=0187200) Train Loss: 0.3273, Train Steps/Sec: 16.59, Grad Norm: 0.0456 +[2025-02-23 01:12:49] (step=0187300) Train Loss: 0.3271, Train Steps/Sec: 17.29, Grad Norm: 0.0485 +[2025-02-23 01:12:54] (step=0187400) Train Loss: 0.3265, Train Steps/Sec: 17.28, Grad Norm: 0.0425 +[2025-02-23 01:13:00] (step=0187500) Train Loss: 0.3267, Train Steps/Sec: 17.36, Grad Norm: 0.0499 +[2025-02-23 01:13:06] (step=0187600) Train Loss: 0.3269, Train Steps/Sec: 17.36, Grad Norm: 0.0451 +[2025-02-23 01:13:13] (step=0187700) Train Loss: 0.3267, Train Steps/Sec: 14.20, Grad Norm: 0.0452 +[2025-02-23 01:13:19] (step=0187800) Train Loss: 0.3263, Train Steps/Sec: 17.13, Grad Norm: 0.0466 +[2025-02-23 01:13:25] (step=0187900) Train Loss: 0.3269, Train Steps/Sec: 16.31, Grad Norm: 0.0470 +[2025-02-23 01:13:32] (step=0188000) Train Loss: 0.3268, Train Steps/Sec: 14.46, Grad Norm: 0.0463 +[2025-02-23 01:13:38] (step=0188100) Train Loss: 0.3261, Train Steps/Sec: 15.63, Grad Norm: 0.0466 +[2025-02-23 01:13:44] (step=0188200) Train Loss: 0.3268, Train Steps/Sec: 16.30, Grad Norm: 0.0470 +[2025-02-23 01:13:50] (step=0188300) Train Loss: 0.3269, Train Steps/Sec: 17.08, Grad Norm: 0.0475 +[2025-02-23 01:13:56] (step=0188400) Train Loss: 0.3270, Train Steps/Sec: 17.09, Grad Norm: 0.0433 +[2025-02-23 01:14:02] (step=0188500) Train Loss: 0.3266, Train Steps/Sec: 16.49, Grad Norm: 0.0413 +[2025-02-23 01:14:09] (step=0188600) Train Loss: 0.3269, Train Steps/Sec: 14.10, Grad Norm: 0.0490 +[2025-02-23 01:14:15] (step=0188700) Train Loss: 0.3267, Train Steps/Sec: 16.69, Grad Norm: 0.0495 +[2025-02-23 01:14:21] (step=0188800) Train Loss: 0.3269, Train Steps/Sec: 17.40, Grad Norm: 0.0432 +[2025-02-23 01:14:27] (step=0188900) Train Loss: 0.3269, Train Steps/Sec: 17.32, Grad Norm: 0.0457 +[2025-02-23 01:14:34] (step=0189000) Train Loss: 0.3265, Train Steps/Sec: 14.34, Grad Norm: 0.0442 +[2025-02-23 01:14:40] (step=0189100) Train Loss: 0.3269, Train Steps/Sec: 17.24, Grad Norm: 0.0450 +[2025-02-23 01:14:46] (step=0189200) Train Loss: 0.3267, Train Steps/Sec: 16.53, Grad Norm: 0.0453 +[2025-02-23 01:14:51] (step=0189300) Train Loss: 0.3264, Train Steps/Sec: 17.26, Grad Norm: 0.0464 +[2025-02-23 01:14:57] (step=0189400) Train Loss: 0.3270, Train Steps/Sec: 17.34, Grad Norm: 0.0451 +[2025-02-23 01:15:03] (step=0189500) Train Loss: 0.3265, Train Steps/Sec: 17.29, Grad Norm: 0.0422 +[2025-02-23 01:15:09] (step=0189600) Train Loss: 0.3270, Train Steps/Sec: 17.27, Grad Norm: 0.0445 +[2025-02-23 01:15:15] (step=0189700) Train Loss: 0.3268, Train Steps/Sec: 17.23, Grad Norm: 0.0458 +[2025-02-23 01:15:20] (step=0189800) Train Loss: 0.3269, Train Steps/Sec: 17.24, Grad Norm: 0.0437 +[2025-02-23 01:15:27] (step=0189900) Train Loss: 0.3264, Train Steps/Sec: 16.28, Grad Norm: 0.0470 +[2025-02-23 01:15:33] (step=0190000) Train Loss: 0.3270, Train Steps/Sec: 14.45, Grad Norm: 0.0465 +[2025-02-23 01:15:40] (step=0190100) Train Loss: 0.3269, Train Steps/Sec: 15.67, Grad Norm: 0.0455 +[2025-02-23 01:15:47] (step=0190200) Train Loss: 0.3268, Train Steps/Sec: 13.70, Grad Norm: 0.0475 +[2025-02-23 01:15:53] (step=0190300) Train Loss: 0.3270, Train Steps/Sec: 17.00, Grad Norm: 0.0419 +[2025-02-23 01:15:59] (step=0190400) Train Loss: 0.3273, Train Steps/Sec: 16.34, Grad Norm: 0.0449 +[2025-02-23 01:16:06] (step=0190500) Train Loss: 0.3270, Train Steps/Sec: 14.44, Grad Norm: 0.0473 +[2025-02-23 01:16:12] (step=0190600) Train Loss: 0.3265, Train Steps/Sec: 15.69, Grad Norm: 0.0442 +[2025-02-23 01:16:18] (step=0190700) Train Loss: 0.3259, Train Steps/Sec: 17.06, Grad Norm: 0.0435 +[2025-02-23 01:16:24] (step=0190800) Train Loss: 0.3272, Train Steps/Sec: 17.04, Grad Norm: 0.0428 +[2025-02-23 01:16:30] (step=0190900) Train Loss: 0.3275, Train Steps/Sec: 17.05, Grad Norm: 0.0411 +[2025-02-23 01:16:36] (step=0191000) Train Loss: 0.3271, Train Steps/Sec: 17.03, Grad Norm: 0.0436 +[2025-02-23 01:16:42] (step=0191100) Train Loss: 0.3268, Train Steps/Sec: 16.32, Grad Norm: 0.0440 +[2025-02-23 01:16:48] (step=0191200) Train Loss: 0.3269, Train Steps/Sec: 16.97, Grad Norm: 0.0478 +[2025-02-23 01:16:54] (step=0191300) Train Loss: 0.3264, Train Steps/Sec: 17.07, Grad Norm: 0.0478 +[2025-02-23 01:17:00] (step=0191400) Train Loss: 0.3261, Train Steps/Sec: 17.13, Grad Norm: 0.0426 +[2025-02-23 01:17:07] (step=0191500) Train Loss: 0.3264, Train Steps/Sec: 14.27, Grad Norm: 0.0457 +[2025-02-23 01:17:12] (step=0191600) Train Loss: 0.3268, Train Steps/Sec: 17.19, Grad Norm: 0.0450 +[2025-02-23 01:17:18] (step=0191700) Train Loss: 0.3267, Train Steps/Sec: 17.17, Grad Norm: 0.0479 +[2025-02-23 01:17:24] (step=0191800) Train Loss: 0.3266, Train Steps/Sec: 17.16, Grad Norm: 0.0466 +[2025-02-23 01:17:30] (step=0191900) Train Loss: 0.3268, Train Steps/Sec: 16.41, Grad Norm: 0.0456 +[2025-02-23 01:17:37] (step=0192000) Train Loss: 0.3266, Train Steps/Sec: 14.55, Grad Norm: 0.0474 +[2025-02-23 01:17:43] (step=0192100) Train Loss: 0.3266, Train Steps/Sec: 15.82, Grad Norm: 0.0470 +[2025-02-23 01:17:49] (step=0192200) Train Loss: 0.3270, Train Steps/Sec: 16.52, Grad Norm: 0.0449 +[2025-02-23 01:17:55] (step=0192300) Train Loss: 0.3265, Train Steps/Sec: 17.36, Grad Norm: 0.0493 +[2025-02-23 01:18:01] (step=0192400) Train Loss: 0.3265, Train Steps/Sec: 16.44, Grad Norm: 0.0429 +[2025-02-23 01:18:08] (step=0192500) Train Loss: 0.3266, Train Steps/Sec: 14.68, Grad Norm: 0.0439 +[2025-02-23 01:18:14] (step=0192600) Train Loss: 0.3269, Train Steps/Sec: 16.66, Grad Norm: 0.0457 +[2025-02-23 01:18:21] (step=0192700) Train Loss: 0.3263, Train Steps/Sec: 14.32, Grad Norm: 0.0428 +[2025-02-23 01:18:27] (step=0192800) Train Loss: 0.3261, Train Steps/Sec: 17.07, Grad Norm: 0.0431 +[2025-02-23 01:18:33] (step=0192900) Train Loss: 0.3264, Train Steps/Sec: 17.09, Grad Norm: 0.0418 +[2025-02-23 01:18:39] (step=0193000) Train Loss: 0.3268, Train Steps/Sec: 16.40, Grad Norm: 0.0447 +[2025-02-23 01:18:45] (step=0193100) Train Loss: 0.3263, Train Steps/Sec: 17.09, Grad Norm: 0.0470 +[2025-02-23 01:18:51] (step=0193200) Train Loss: 0.3267, Train Steps/Sec: 17.06, Grad Norm: 0.0429 +[2025-02-23 01:18:57] (step=0193300) Train Loss: 0.3265, Train Steps/Sec: 17.06, Grad Norm: 0.0411 +[2025-02-23 01:19:02] (step=0193400) Train Loss: 0.3268, Train Steps/Sec: 17.10, Grad Norm: 0.0473 +[2025-02-23 01:19:08] (step=0193500) Train Loss: 0.3267, Train Steps/Sec: 17.07, Grad Norm: 0.0422 +[2025-02-23 01:19:14] (step=0193600) Train Loss: 0.3267, Train Steps/Sec: 17.03, Grad Norm: 0.0425 +[2025-02-23 01:19:20] (step=0193700) Train Loss: 0.3267, Train Steps/Sec: 17.03, Grad Norm: 0.0445 +[2025-02-23 01:19:26] (step=0193800) Train Loss: 0.3262, Train Steps/Sec: 17.04, Grad Norm: 0.0431 +[2025-02-23 01:19:32] (step=0193900) Train Loss: 0.3266, Train Steps/Sec: 17.07, Grad Norm: 0.0445 +[2025-02-23 01:19:40] (step=0194000) Train Loss: 0.3266, Train Steps/Sec: 11.92, Grad Norm: 0.0481 +[2025-02-23 01:19:47] (step=0194100) Train Loss: 0.3268, Train Steps/Sec: 15.62, Grad Norm: 0.0468 +[2025-02-23 01:19:53] (step=0194200) Train Loss: 0.3267, Train Steps/Sec: 16.32, Grad Norm: 0.0455 +[2025-02-23 01:19:59] (step=0194300) Train Loss: 0.3266, Train Steps/Sec: 16.30, Grad Norm: 0.0431 +[2025-02-23 01:20:06] (step=0194400) Train Loss: 0.3261, Train Steps/Sec: 14.48, Grad Norm: 0.0438 +[2025-02-23 01:20:12] (step=0194500) Train Loss: 0.3259, Train Steps/Sec: 15.71, Grad Norm: 0.0425 +[2025-02-23 01:20:18] (step=0194600) Train Loss: 0.3269, Train Steps/Sec: 17.34, Grad Norm: 0.0418 +[2025-02-23 01:20:24] (step=0194700) Train Loss: 0.3267, Train Steps/Sec: 17.43, Grad Norm: 0.0440 +[2025-02-23 01:20:29] (step=0194800) Train Loss: 0.3269, Train Steps/Sec: 17.38, Grad Norm: 0.0444 +[2025-02-23 01:20:35] (step=0194900) Train Loss: 0.3260, Train Steps/Sec: 17.38, Grad Norm: 0.0423 +[2025-02-23 01:20:41] (step=0195000) Train Loss: 0.3262, Train Steps/Sec: 16.70, Grad Norm: 0.0427 +[2025-02-23 01:20:47] (step=0195100) Train Loss: 0.3263, Train Steps/Sec: 17.44, Grad Norm: 0.0446 +[2025-02-23 01:20:54] (step=0195200) Train Loss: 0.3269, Train Steps/Sec: 14.33, Grad Norm: 0.0438 +[2025-02-23 01:21:00] (step=0195300) Train Loss: 0.3262, Train Steps/Sec: 17.22, Grad Norm: 0.0436 +[2025-02-23 01:21:05] (step=0195400) Train Loss: 0.3264, Train Steps/Sec: 17.22, Grad Norm: 0.0442 +[2025-02-23 01:21:11] (step=0195500) Train Loss: 0.3262, Train Steps/Sec: 17.17, Grad Norm: 0.0445 +[2025-02-23 01:21:17] (step=0195600) Train Loss: 0.3265, Train Steps/Sec: 17.20, Grad Norm: 0.0503 +[2025-02-23 01:21:23] (step=0195700) Train Loss: 0.3268, Train Steps/Sec: 17.16, Grad Norm: 0.0420 +[2025-02-23 01:21:29] (step=0195800) Train Loss: 0.3265, Train Steps/Sec: 17.17, Grad Norm: 0.0441 +[2025-02-23 01:21:34] (step=0195900) Train Loss: 0.3268, Train Steps/Sec: 17.32, Grad Norm: 0.0456 +[2025-02-23 01:21:41] (step=0196000) Train Loss: 0.3266, Train Steps/Sec: 14.44, Grad Norm: 0.0418 +[2025-02-23 01:21:48] (step=0196100) Train Loss: 0.3270, Train Steps/Sec: 15.71, Grad Norm: 0.0473 +[2025-02-23 01:21:54] (step=0196200) Train Loss: 0.3265, Train Steps/Sec: 16.46, Grad Norm: 0.0429 +[2025-02-23 01:22:00] (step=0196300) Train Loss: 0.3268, Train Steps/Sec: 15.88, Grad Norm: 0.0459 +[2025-02-23 01:22:07] (step=0196400) Train Loss: 0.3262, Train Steps/Sec: 14.53, Grad Norm: 0.0472 +[2025-02-23 01:22:14] (step=0196500) Train Loss: 0.3267, Train Steps/Sec: 13.85, Grad Norm: 0.0421 +[2025-02-23 01:22:20] (step=0196600) Train Loss: 0.3267, Train Steps/Sec: 17.32, Grad Norm: 0.0441 +[2025-02-23 01:22:26] (step=0196700) Train Loss: 0.3258, Train Steps/Sec: 17.35, Grad Norm: 0.0420 +[2025-02-23 01:22:32] (step=0196800) Train Loss: 0.3268, Train Steps/Sec: 17.32, Grad Norm: 0.0442 +[2025-02-23 01:22:37] (step=0196900) Train Loss: 0.3266, Train Steps/Sec: 17.36, Grad Norm: 0.0443 +[2025-02-23 01:22:43] (step=0197000) Train Loss: 0.3272, Train Steps/Sec: 16.64, Grad Norm: 0.0429 +[2025-02-23 01:22:49] (step=0197100) Train Loss: 0.3266, Train Steps/Sec: 17.32, Grad Norm: 0.0413 +[2025-02-23 01:22:55] (step=0197200) Train Loss: 0.3266, Train Steps/Sec: 17.27, Grad Norm: 0.0453 +[2025-02-23 01:23:01] (step=0197300) Train Loss: 0.3265, Train Steps/Sec: 17.28, Grad Norm: 0.0431 +[2025-02-23 01:23:07] (step=0197400) Train Loss: 0.3272, Train Steps/Sec: 17.25, Grad Norm: 0.0458 +[2025-02-23 01:23:12] (step=0197500) Train Loss: 0.3258, Train Steps/Sec: 17.19, Grad Norm: 0.0438 +[2025-02-23 01:23:18] (step=0197600) Train Loss: 0.3270, Train Steps/Sec: 17.23, Grad Norm: 0.0475 +[2025-02-23 01:23:25] (step=0197700) Train Loss: 0.3263, Train Steps/Sec: 14.39, Grad Norm: 0.0407 +[2025-02-23 01:23:31] (step=0197800) Train Loss: 0.3269, Train Steps/Sec: 17.20, Grad Norm: 0.0423 +[2025-02-23 01:23:37] (step=0197900) Train Loss: 0.3264, Train Steps/Sec: 17.23, Grad Norm: 0.0469 +[2025-02-23 01:23:44] (step=0198000) Train Loss: 0.3270, Train Steps/Sec: 14.01, Grad Norm: 0.0421 +[2025-02-23 01:23:50] (step=0198100) Train Loss: 0.3265, Train Steps/Sec: 15.85, Grad Norm: 0.0451 +[2025-02-23 01:23:56] (step=0198200) Train Loss: 0.3265, Train Steps/Sec: 15.78, Grad Norm: 0.0461 +[2025-02-23 01:24:03] (step=0198300) Train Loss: 0.3269, Train Steps/Sec: 14.61, Grad Norm: 0.0452 +[2025-02-23 01:24:10] (step=0198400) Train Loss: 0.3263, Train Steps/Sec: 15.88, Grad Norm: 0.0423 +[2025-02-23 01:24:15] (step=0198500) Train Loss: 0.3259, Train Steps/Sec: 17.33, Grad Norm: 0.0397 +[2025-02-23 01:24:21] (step=0198600) Train Loss: 0.3265, Train Steps/Sec: 17.31, Grad Norm: 0.0416 +[2025-02-23 01:24:27] (step=0198700) Train Loss: 0.3265, Train Steps/Sec: 17.36, Grad Norm: 0.0435 +[2025-02-23 01:24:33] (step=0198800) Train Loss: 0.3267, Train Steps/Sec: 17.31, Grad Norm: 0.0447 +[2025-02-23 01:24:39] (step=0198900) Train Loss: 0.3267, Train Steps/Sec: 16.57, Grad Norm: 0.0464 +[2025-02-23 01:24:46] (step=0199000) Train Loss: 0.3266, Train Steps/Sec: 14.22, Grad Norm: 0.0417 +[2025-02-23 01:24:52] (step=0199100) Train Loss: 0.3260, Train Steps/Sec: 17.00, Grad Norm: 0.0490 +[2025-02-23 01:24:58] (step=0199200) Train Loss: 0.3261, Train Steps/Sec: 16.99, Grad Norm: 0.0450 +[2025-02-23 01:25:03] (step=0199300) Train Loss: 0.3265, Train Steps/Sec: 17.00, Grad Norm: 0.0455 +[2025-02-23 01:25:09] (step=0199400) Train Loss: 0.3266, Train Steps/Sec: 17.00, Grad Norm: 0.0437 +[2025-02-23 01:25:15] (step=0199500) Train Loss: 0.3260, Train Steps/Sec: 17.00, Grad Norm: 0.0435 +[2025-02-23 01:25:21] (step=0199600) Train Loss: 0.3263, Train Steps/Sec: 16.99, Grad Norm: 0.0439 +[2025-02-23 01:25:27] (step=0199700) Train Loss: 0.3270, Train Steps/Sec: 16.97, Grad Norm: 0.0421 +[2025-02-23 01:25:33] (step=0199800) Train Loss: 0.3260, Train Steps/Sec: 17.00, Grad Norm: 0.0467 +[2025-02-23 01:25:39] (step=0199900) Train Loss: 0.3261, Train Steps/Sec: 16.98, Grad Norm: 0.0451 +[2025-02-23 01:25:45] (step=0200000) Train Loss: 0.3269, Train Steps/Sec: 14.97, Grad Norm: 0.0426 +[2025-02-23 01:25:47] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0200000.pt +[2025-02-23 01:25:54] (step=0200100) Train Loss: 0.3266, Train Steps/Sec: 12.22, Grad Norm: 0.0457 +[2025-02-23 01:26:02] (step=0200200) Train Loss: 0.3261, Train Steps/Sec: 11.56, Grad Norm: 0.0432 +[2025-02-23 01:26:08] (step=0200300) Train Loss: 0.3261, Train Steps/Sec: 16.30, Grad Norm: 0.0423 +[2025-02-23 01:26:15] (step=0200400) Train Loss: 0.3264, Train Steps/Sec: 16.34, Grad Norm: 0.0413 +[2025-02-23 01:26:20] (step=0200500) Train Loss: 0.3261, Train Steps/Sec: 17.02, Grad Norm: 0.0447 +[2025-02-23 01:26:26] (step=0200600) Train Loss: 0.3265, Train Steps/Sec: 17.05, Grad Norm: 0.0427 +[2025-02-23 01:26:32] (step=0200700) Train Loss: 0.3268, Train Steps/Sec: 17.06, Grad Norm: 0.0448 +[2025-02-23 01:26:38] (step=0200800) Train Loss: 0.3266, Train Steps/Sec: 16.30, Grad Norm: 0.0427 +[2025-02-23 01:26:44] (step=0200900) Train Loss: 0.3269, Train Steps/Sec: 16.95, Grad Norm: 0.0427 +[2025-02-23 01:26:50] (step=0201000) Train Loss: 0.3259, Train Steps/Sec: 16.99, Grad Norm: 0.0419 +[2025-02-23 01:26:56] (step=0201100) Train Loss: 0.3265, Train Steps/Sec: 16.99, Grad Norm: 0.0486 +[2025-02-23 01:27:02] (step=0201200) Train Loss: 0.3265, Train Steps/Sec: 17.00, Grad Norm: 0.0404 +[2025-02-23 01:27:08] (step=0201300) Train Loss: 0.3270, Train Steps/Sec: 16.99, Grad Norm: 0.0477 +[2025-02-23 01:27:14] (step=0201400) Train Loss: 0.3262, Train Steps/Sec: 16.94, Grad Norm: 0.0488 +[2025-02-23 01:27:21] (step=0201500) Train Loss: 0.3265, Train Steps/Sec: 14.29, Grad Norm: 0.0447 +[2025-02-23 01:27:27] (step=0201600) Train Loss: 0.3265, Train Steps/Sec: 17.01, Grad Norm: 0.0423 +[2025-02-23 01:27:32] (step=0201700) Train Loss: 0.3263, Train Steps/Sec: 16.99, Grad Norm: 0.0507 +[2025-02-23 01:27:38] (step=0201800) Train Loss: 0.3260, Train Steps/Sec: 16.99, Grad Norm: 0.0429 +[2025-02-23 01:27:44] (step=0201900) Train Loss: 0.3267, Train Steps/Sec: 17.19, Grad Norm: 0.0426 +[2025-02-23 01:27:51] (step=0202000) Train Loss: 0.3263, Train Steps/Sec: 15.19, Grad Norm: 0.0451 +[2025-02-23 01:27:58] (step=0202100) Train Loss: 0.3261, Train Steps/Sec: 14.05, Grad Norm: 0.0424 +[2025-02-23 01:28:05] (step=0202200) Train Loss: 0.3266, Train Steps/Sec: 13.47, Grad Norm: 0.0454 +[2025-02-23 01:28:11] (step=0202300) Train Loss: 0.3270, Train Steps/Sec: 16.56, Grad Norm: 0.0458 +[2025-02-23 01:28:17] (step=0202400) Train Loss: 0.3273, Train Steps/Sec: 17.37, Grad Norm: 0.0445 +[2025-02-23 01:28:23] (step=0202500) Train Loss: 0.3264, Train Steps/Sec: 17.30, Grad Norm: 0.0449 +[2025-02-23 01:28:29] (step=0202600) Train Loss: 0.3260, Train Steps/Sec: 17.36, Grad Norm: 0.0396 +[2025-02-23 01:28:36] (step=0202700) Train Loss: 0.3276, Train Steps/Sec: 14.37, Grad Norm: 0.0453 +[2025-02-23 01:28:42] (step=0202800) Train Loss: 0.3269, Train Steps/Sec: 16.53, Grad Norm: 0.0407 +[2025-02-23 01:28:47] (step=0202900) Train Loss: 0.3264, Train Steps/Sec: 17.26, Grad Norm: 0.0422 +[2025-02-23 01:28:53] (step=0203000) Train Loss: 0.3262, Train Steps/Sec: 17.24, Grad Norm: 0.0466 +[2025-02-23 01:28:59] (step=0203100) Train Loss: 0.3265, Train Steps/Sec: 17.23, Grad Norm: 0.0474 +[2025-02-23 01:29:05] (step=0203200) Train Loss: 0.3264, Train Steps/Sec: 17.22, Grad Norm: 0.0417 +[2025-02-23 01:29:11] (step=0203300) Train Loss: 0.3265, Train Steps/Sec: 17.21, Grad Norm: 0.0436 +[2025-02-23 01:29:17] (step=0203400) Train Loss: 0.3266, Train Steps/Sec: 17.12, Grad Norm: 0.0433 +[2025-02-23 01:29:22] (step=0203500) Train Loss: 0.3264, Train Steps/Sec: 17.08, Grad Norm: 0.0443 +[2025-02-23 01:29:28] (step=0203600) Train Loss: 0.3264, Train Steps/Sec: 17.12, Grad Norm: 0.0415 +[2025-02-23 01:29:34] (step=0203700) Train Loss: 0.3266, Train Steps/Sec: 17.20, Grad Norm: 0.0456 +[2025-02-23 01:29:40] (step=0203800) Train Loss: 0.3264, Train Steps/Sec: 17.37, Grad Norm: 0.0458 +[2025-02-23 01:29:46] (step=0203900) Train Loss: 0.3259, Train Steps/Sec: 17.29, Grad Norm: 0.0428 +[2025-02-23 01:29:54] (step=0204000) Train Loss: 0.3263, Train Steps/Sec: 12.32, Grad Norm: 0.0434 +[2025-02-23 01:30:02] (step=0204100) Train Loss: 0.3261, Train Steps/Sec: 12.60, Grad Norm: 0.0411 +[2025-02-23 01:30:08] (step=0204200) Train Loss: 0.3266, Train Steps/Sec: 15.16, Grad Norm: 0.0457 +[2025-02-23 01:30:14] (step=0204300) Train Loss: 0.3264, Train Steps/Sec: 17.16, Grad Norm: 0.0450 +[2025-02-23 01:30:20] (step=0204400) Train Loss: 0.3257, Train Steps/Sec: 17.12, Grad Norm: 0.0465 +[2025-02-23 01:30:26] (step=0204500) Train Loss: 0.3267, Train Steps/Sec: 17.10, Grad Norm: 0.0428 +[2025-02-23 01:30:32] (step=0204600) Train Loss: 0.3266, Train Steps/Sec: 17.25, Grad Norm: 0.0434 +[2025-02-23 01:30:38] (step=0204700) Train Loss: 0.3265, Train Steps/Sec: 16.51, Grad Norm: 0.0464 +[2025-02-23 01:30:43] (step=0204800) Train Loss: 0.3261, Train Steps/Sec: 17.32, Grad Norm: 0.0447 +[2025-02-23 01:30:49] (step=0204900) Train Loss: 0.3261, Train Steps/Sec: 17.35, Grad Norm: 0.0433 +[2025-02-23 01:30:55] (step=0205000) Train Loss: 0.3261, Train Steps/Sec: 17.21, Grad Norm: 0.0411 +[2025-02-23 01:31:01] (step=0205100) Train Loss: 0.3266, Train Steps/Sec: 17.29, Grad Norm: 0.0441 +[2025-02-23 01:31:08] (step=0205200) Train Loss: 0.3267, Train Steps/Sec: 14.23, Grad Norm: 0.0450 +[2025-02-23 01:31:14] (step=0205300) Train Loss: 0.3262, Train Steps/Sec: 17.14, Grad Norm: 0.0437 +[2025-02-23 01:31:19] (step=0205400) Train Loss: 0.3257, Train Steps/Sec: 17.08, Grad Norm: 0.0443 +[2025-02-23 01:31:25] (step=0205500) Train Loss: 0.3255, Train Steps/Sec: 17.04, Grad Norm: 0.0445 +[2025-02-23 01:31:31] (step=0205600) Train Loss: 0.3263, Train Steps/Sec: 17.12, Grad Norm: 0.0437 +[2025-02-23 01:31:37] (step=0205700) Train Loss: 0.3263, Train Steps/Sec: 17.16, Grad Norm: 0.0439 +[2025-02-23 01:31:43] (step=0205800) Train Loss: 0.3265, Train Steps/Sec: 17.14, Grad Norm: 0.0440 +[2025-02-23 01:31:49] (step=0205900) Train Loss: 0.3269, Train Steps/Sec: 17.25, Grad Norm: 0.0432 +[2025-02-23 01:31:56] (step=0206000) Train Loss: 0.3259, Train Steps/Sec: 13.93, Grad Norm: 0.0464 +[2025-02-23 01:32:04] (step=0206100) Train Loss: 0.3254, Train Steps/Sec: 12.59, Grad Norm: 0.0428 +[2025-02-23 01:32:10] (step=0206200) Train Loss: 0.3257, Train Steps/Sec: 15.74, Grad Norm: 0.0413 +[2025-02-23 01:32:16] (step=0206300) Train Loss: 0.3264, Train Steps/Sec: 17.23, Grad Norm: 0.0440 +[2025-02-23 01:32:22] (step=0206400) Train Loss: 0.3262, Train Steps/Sec: 17.19, Grad Norm: 0.0437 +[2025-02-23 01:32:29] (step=0206500) Train Loss: 0.3265, Train Steps/Sec: 14.30, Grad Norm: 0.0452 +[2025-02-23 01:32:35] (step=0206600) Train Loss: 0.3259, Train Steps/Sec: 16.35, Grad Norm: 0.0441 +[2025-02-23 01:32:41] (step=0206700) Train Loss: 0.3259, Train Steps/Sec: 17.14, Grad Norm: 0.0433 +[2025-02-23 01:32:47] (step=0206800) Train Loss: 0.3259, Train Steps/Sec: 17.23, Grad Norm: 0.0429 +[2025-02-23 01:32:52] (step=0206900) Train Loss: 0.3263, Train Steps/Sec: 17.21, Grad Norm: 0.0458 +[2025-02-23 01:32:58] (step=0207000) Train Loss: 0.3264, Train Steps/Sec: 17.26, Grad Norm: 0.0430 +[2025-02-23 01:33:04] (step=0207100) Train Loss: 0.3272, Train Steps/Sec: 17.24, Grad Norm: 0.0454 +[2025-02-23 01:33:10] (step=0207200) Train Loss: 0.3262, Train Steps/Sec: 17.26, Grad Norm: 0.0445 +[2025-02-23 01:33:15] (step=0207300) Train Loss: 0.3266, Train Steps/Sec: 17.32, Grad Norm: 0.0400 +[2025-02-23 01:33:21] (step=0207400) Train Loss: 0.3261, Train Steps/Sec: 17.34, Grad Norm: 0.0479 +[2025-02-23 01:33:27] (step=0207500) Train Loss: 0.3263, Train Steps/Sec: 17.33, Grad Norm: 0.0462 +[2025-02-23 01:33:33] (step=0207600) Train Loss: 0.3265, Train Steps/Sec: 17.34, Grad Norm: 0.0428 +[2025-02-23 01:33:40] (step=0207700) Train Loss: 0.3263, Train Steps/Sec: 14.38, Grad Norm: 0.0417 +[2025-02-23 01:33:46] (step=0207800) Train Loss: 0.3266, Train Steps/Sec: 17.32, Grad Norm: 0.0448 +[2025-02-23 01:33:51] (step=0207900) Train Loss: 0.3259, Train Steps/Sec: 17.10, Grad Norm: 0.0453 +[2025-02-23 01:33:59] (step=0208000) Train Loss: 0.3266, Train Steps/Sec: 13.02, Grad Norm: 0.0419 +[2025-02-23 01:34:07] (step=0208100) Train Loss: 0.3259, Train Steps/Sec: 13.01, Grad Norm: 0.0483 +[2025-02-23 01:34:13] (step=0208200) Train Loss: 0.3267, Train Steps/Sec: 17.12, Grad Norm: 0.0440 +[2025-02-23 01:34:19] (step=0208300) Train Loss: 0.3268, Train Steps/Sec: 16.38, Grad Norm: 0.0442 +[2025-02-23 01:34:25] (step=0208400) Train Loss: 0.3267, Train Steps/Sec: 17.19, Grad Norm: 0.0487 +[2025-02-23 01:34:30] (step=0208500) Train Loss: 0.3259, Train Steps/Sec: 17.15, Grad Norm: 0.0479 +[2025-02-23 01:34:36] (step=0208600) Train Loss: 0.3269, Train Steps/Sec: 16.31, Grad Norm: 0.0451 +[2025-02-23 01:34:42] (step=0208700) Train Loss: 0.3270, Train Steps/Sec: 17.11, Grad Norm: 0.0443 +[2025-02-23 01:34:48] (step=0208800) Train Loss: 0.3262, Train Steps/Sec: 17.11, Grad Norm: 0.0416 +[2025-02-23 01:34:54] (step=0208900) Train Loss: 0.3259, Train Steps/Sec: 17.03, Grad Norm: 0.0418 +[2025-02-23 01:35:01] (step=0209000) Train Loss: 0.3263, Train Steps/Sec: 14.19, Grad Norm: 0.0454 +[2025-02-23 01:35:07] (step=0209100) Train Loss: 0.3259, Train Steps/Sec: 17.21, Grad Norm: 0.0428 +[2025-02-23 01:35:13] (step=0209200) Train Loss: 0.3265, Train Steps/Sec: 17.22, Grad Norm: 0.0430 +[2025-02-23 01:35:19] (step=0209300) Train Loss: 0.3261, Train Steps/Sec: 17.27, Grad Norm: 0.0444 +[2025-02-23 01:35:24] (step=0209400) Train Loss: 0.3259, Train Steps/Sec: 17.20, Grad Norm: 0.0416 +[2025-02-23 01:35:30] (step=0209500) Train Loss: 0.3262, Train Steps/Sec: 17.22, Grad Norm: 0.0471 +[2025-02-23 01:35:36] (step=0209600) Train Loss: 0.3264, Train Steps/Sec: 17.15, Grad Norm: 0.0412 +[2025-02-23 01:35:42] (step=0209700) Train Loss: 0.3266, Train Steps/Sec: 17.11, Grad Norm: 0.0413 +[2025-02-23 01:35:48] (step=0209800) Train Loss: 0.3259, Train Steps/Sec: 17.20, Grad Norm: 0.0435 +[2025-02-23 01:35:54] (step=0209900) Train Loss: 0.3259, Train Steps/Sec: 15.80, Grad Norm: 0.0456 +[2025-02-23 01:36:01] (step=0210000) Train Loss: 0.3261, Train Steps/Sec: 13.51, Grad Norm: 0.0434 +[2025-02-23 01:36:09] (step=0210100) Train Loss: 0.3262, Train Steps/Sec: 13.47, Grad Norm: 0.0432 +[2025-02-23 01:36:16] (step=0210200) Train Loss: 0.3263, Train Steps/Sec: 14.28, Grad Norm: 0.0449 +[2025-02-23 01:36:22] (step=0210300) Train Loss: 0.3254, Train Steps/Sec: 16.37, Grad Norm: 0.0410 +[2025-02-23 01:36:28] (step=0210400) Train Loss: 0.3262, Train Steps/Sec: 17.22, Grad Norm: 0.0432 +[2025-02-23 01:36:34] (step=0210500) Train Loss: 0.3262, Train Steps/Sec: 17.20, Grad Norm: 0.0406 +[2025-02-23 01:36:40] (step=0210600) Train Loss: 0.3266, Train Steps/Sec: 16.47, Grad Norm: 0.0406 +[2025-02-23 01:36:45] (step=0210700) Train Loss: 0.3263, Train Steps/Sec: 17.33, Grad Norm: 0.0425 +[2025-02-23 01:36:51] (step=0210800) Train Loss: 0.3262, Train Steps/Sec: 17.41, Grad Norm: 0.0428 +[2025-02-23 01:36:57] (step=0210900) Train Loss: 0.3261, Train Steps/Sec: 17.41, Grad Norm: 0.0415 +[2025-02-23 01:37:03] (step=0211000) Train Loss: 0.3261, Train Steps/Sec: 17.40, Grad Norm: 0.0414 +[2025-02-23 01:37:08] (step=0211100) Train Loss: 0.3263, Train Steps/Sec: 17.41, Grad Norm: 0.0453 +[2025-02-23 01:37:14] (step=0211200) Train Loss: 0.3257, Train Steps/Sec: 17.41, Grad Norm: 0.0437 +[2025-02-23 01:37:20] (step=0211300) Train Loss: 0.3265, Train Steps/Sec: 17.28, Grad Norm: 0.0471 +[2025-02-23 01:37:26] (step=0211400) Train Loss: 0.3262, Train Steps/Sec: 16.99, Grad Norm: 0.0422 +[2025-02-23 01:37:33] (step=0211500) Train Loss: 0.3259, Train Steps/Sec: 14.29, Grad Norm: 0.0429 +[2025-02-23 01:37:39] (step=0211600) Train Loss: 0.3262, Train Steps/Sec: 17.34, Grad Norm: 0.0417 +[2025-02-23 01:37:44] (step=0211700) Train Loss: 0.3264, Train Steps/Sec: 17.32, Grad Norm: 0.0438 +[2025-02-23 01:37:50] (step=0211800) Train Loss: 0.3258, Train Steps/Sec: 16.60, Grad Norm: 0.0436 +[2025-02-23 01:37:57] (step=0211900) Train Loss: 0.3259, Train Steps/Sec: 14.66, Grad Norm: 0.0434 +[2025-02-23 01:38:04] (step=0212000) Train Loss: 0.3264, Train Steps/Sec: 14.69, Grad Norm: 0.0416 +[2025-02-23 01:38:11] (step=0212100) Train Loss: 0.3261, Train Steps/Sec: 14.09, Grad Norm: 0.0413 +[2025-02-23 01:38:17] (step=0212200) Train Loss: 0.3252, Train Steps/Sec: 17.43, Grad Norm: 0.0443 +[2025-02-23 01:38:23] (step=0212300) Train Loss: 0.3255, Train Steps/Sec: 16.55, Grad Norm: 0.0414 +[2025-02-23 01:38:29] (step=0212400) Train Loss: 0.3263, Train Steps/Sec: 17.42, Grad Norm: 0.0460 +[2025-02-23 01:38:35] (step=0212500) Train Loss: 0.3261, Train Steps/Sec: 16.63, Grad Norm: 0.0409 +[2025-02-23 01:38:40] (step=0212600) Train Loss: 0.3260, Train Steps/Sec: 17.47, Grad Norm: 0.0420 +[2025-02-23 01:38:47] (step=0212700) Train Loss: 0.3265, Train Steps/Sec: 14.36, Grad Norm: 0.0410 +[2025-02-23 01:38:53] (step=0212800) Train Loss: 0.3262, Train Steps/Sec: 17.36, Grad Norm: 0.0485 +[2025-02-23 01:38:59] (step=0212900) Train Loss: 0.3263, Train Steps/Sec: 17.37, Grad Norm: 0.0452 +[2025-02-23 01:39:05] (step=0213000) Train Loss: 0.3264, Train Steps/Sec: 17.39, Grad Norm: 0.0413 +[2025-02-23 01:39:10] (step=0213100) Train Loss: 0.3263, Train Steps/Sec: 17.41, Grad Norm: 0.0446 +[2025-02-23 01:39:16] (step=0213200) Train Loss: 0.3263, Train Steps/Sec: 17.40, Grad Norm: 0.0434 +[2025-02-23 01:39:22] (step=0213300) Train Loss: 0.3259, Train Steps/Sec: 17.34, Grad Norm: 0.0402 +[2025-02-23 01:39:28] (step=0213400) Train Loss: 0.3259, Train Steps/Sec: 17.30, Grad Norm: 0.0411 +[2025-02-23 01:39:33] (step=0213500) Train Loss: 0.3258, Train Steps/Sec: 17.28, Grad Norm: 0.0398 +[2025-02-23 01:39:39] (step=0213600) Train Loss: 0.3260, Train Steps/Sec: 17.31, Grad Norm: 0.0398 +[2025-02-23 01:39:45] (step=0213700) Train Loss: 0.3260, Train Steps/Sec: 17.33, Grad Norm: 0.0428 +[2025-02-23 01:39:51] (step=0213800) Train Loss: 0.3260, Train Steps/Sec: 15.98, Grad Norm: 0.0460 +[2025-02-23 01:39:58] (step=0213900) Train Loss: 0.3263, Train Steps/Sec: 14.52, Grad Norm: 0.0485 +[2025-02-23 01:40:06] (step=0214000) Train Loss: 0.3258, Train Steps/Sec: 12.94, Grad Norm: 0.0402 +[2025-02-23 01:40:13] (step=0214100) Train Loss: 0.3254, Train Steps/Sec: 14.01, Grad Norm: 0.0403 +[2025-02-23 01:40:19] (step=0214200) Train Loss: 0.3268, Train Steps/Sec: 17.32, Grad Norm: 0.0449 +[2025-02-23 01:40:25] (step=0214300) Train Loss: 0.3266, Train Steps/Sec: 16.50, Grad Norm: 0.0433 +[2025-02-23 01:40:31] (step=0214400) Train Loss: 0.3261, Train Steps/Sec: 17.29, Grad Norm: 0.0452 +[2025-02-23 01:40:37] (step=0214500) Train Loss: 0.3259, Train Steps/Sec: 16.52, Grad Norm: 0.0397 +[2025-02-23 01:40:42] (step=0214600) Train Loss: 0.3266, Train Steps/Sec: 17.36, Grad Norm: 0.0428 +[2025-02-23 01:40:48] (step=0214700) Train Loss: 0.3253, Train Steps/Sec: 17.28, Grad Norm: 0.0440 +[2025-02-23 01:40:54] (step=0214800) Train Loss: 0.3264, Train Steps/Sec: 17.25, Grad Norm: 0.0391 +[2025-02-23 01:41:00] (step=0214900) Train Loss: 0.3261, Train Steps/Sec: 17.27, Grad Norm: 0.0384 +[2025-02-23 01:41:06] (step=0215000) Train Loss: 0.3258, Train Steps/Sec: 17.30, Grad Norm: 0.0427 +[2025-02-23 01:41:11] (step=0215100) Train Loss: 0.3264, Train Steps/Sec: 17.32, Grad Norm: 0.0446 +[2025-02-23 01:41:18] (step=0215200) Train Loss: 0.3265, Train Steps/Sec: 14.30, Grad Norm: 0.0415 +[2025-02-23 01:41:24] (step=0215300) Train Loss: 0.3265, Train Steps/Sec: 17.32, Grad Norm: 0.0402 +[2025-02-23 01:41:30] (step=0215400) Train Loss: 0.3259, Train Steps/Sec: 17.33, Grad Norm: 0.0421 +[2025-02-23 01:41:36] (step=0215500) Train Loss: 0.3262, Train Steps/Sec: 17.32, Grad Norm: 0.0405 +[2025-02-23 01:41:41] (step=0215600) Train Loss: 0.3269, Train Steps/Sec: 17.40, Grad Norm: 0.0416 +[2025-02-23 01:41:47] (step=0215700) Train Loss: 0.3271, Train Steps/Sec: 16.65, Grad Norm: 0.0472 +[2025-02-23 01:41:55] (step=0215800) Train Loss: 0.3269, Train Steps/Sec: 14.14, Grad Norm: 0.0435 +[2025-02-23 01:42:01] (step=0215900) Train Loss: 0.3263, Train Steps/Sec: 16.61, Grad Norm: 0.0452 +[2025-02-23 01:42:07] (step=0216000) Train Loss: 0.3257, Train Steps/Sec: 15.83, Grad Norm: 0.0411 +[2025-02-23 01:42:14] (step=0216100) Train Loss: 0.3259, Train Steps/Sec: 14.06, Grad Norm: 0.0429 +[2025-02-23 01:42:20] (step=0216200) Train Loss: 0.3258, Train Steps/Sec: 17.38, Grad Norm: 0.0433 +[2025-02-23 01:42:26] (step=0216300) Train Loss: 0.3255, Train Steps/Sec: 16.62, Grad Norm: 0.0396 +[2025-02-23 01:42:32] (step=0216400) Train Loss: 0.3263, Train Steps/Sec: 16.45, Grad Norm: 0.0428 +[2025-02-23 01:42:39] (step=0216500) Train Loss: 0.3257, Train Steps/Sec: 14.27, Grad Norm: 0.0415 +[2025-02-23 01:42:45] (step=0216600) Train Loss: 0.3252, Train Steps/Sec: 17.24, Grad Norm: 0.0389 +[2025-02-23 01:42:50] (step=0216700) Train Loss: 0.3259, Train Steps/Sec: 17.34, Grad Norm: 0.0435 +[2025-02-23 01:42:56] (step=0216800) Train Loss: 0.3254, Train Steps/Sec: 17.34, Grad Norm: 0.0398 +[2025-02-23 01:43:02] (step=0216900) Train Loss: 0.3264, Train Steps/Sec: 17.34, Grad Norm: 0.0423 +[2025-02-23 01:43:08] (step=0217000) Train Loss: 0.3258, Train Steps/Sec: 17.35, Grad Norm: 0.0440 +[2025-02-23 01:43:13] (step=0217100) Train Loss: 0.3265, Train Steps/Sec: 17.31, Grad Norm: 0.0407 +[2025-02-23 01:43:19] (step=0217200) Train Loss: 0.3259, Train Steps/Sec: 17.37, Grad Norm: 0.0495 +[2025-02-23 01:43:25] (step=0217300) Train Loss: 0.3257, Train Steps/Sec: 17.37, Grad Norm: 0.0458 +[2025-02-23 01:43:31] (step=0217400) Train Loss: 0.3259, Train Steps/Sec: 17.39, Grad Norm: 0.0425 +[2025-02-23 01:43:37] (step=0217500) Train Loss: 0.3260, Train Steps/Sec: 17.37, Grad Norm: 0.0441 +[2025-02-23 01:43:42] (step=0217600) Train Loss: 0.3263, Train Steps/Sec: 17.38, Grad Norm: 0.0419 +[2025-02-23 01:43:50] (step=0217700) Train Loss: 0.3265, Train Steps/Sec: 12.44, Grad Norm: 0.0400 +[2025-02-23 01:43:57] (step=0217800) Train Loss: 0.3255, Train Steps/Sec: 15.03, Grad Norm: 0.0411 +[2025-02-23 01:44:03] (step=0217900) Train Loss: 0.3261, Train Steps/Sec: 17.25, Grad Norm: 0.0464 +[2025-02-23 01:44:09] (step=0218000) Train Loss: 0.3266, Train Steps/Sec: 15.84, Grad Norm: 0.0468 +[2025-02-23 01:44:16] (step=0218100) Train Loss: 0.3265, Train Steps/Sec: 13.90, Grad Norm: 0.0420 +[2025-02-23 01:44:22] (step=0218200) Train Loss: 0.3262, Train Steps/Sec: 17.31, Grad Norm: 0.0401 +[2025-02-23 01:44:28] (step=0218300) Train Loss: 0.3268, Train Steps/Sec: 16.49, Grad Norm: 0.0444 +[2025-02-23 01:44:34] (step=0218400) Train Loss: 0.3260, Train Steps/Sec: 16.52, Grad Norm: 0.0434 +[2025-02-23 01:44:40] (step=0218500) Train Loss: 0.3260, Train Steps/Sec: 17.26, Grad Norm: 0.0436 +[2025-02-23 01:44:46] (step=0218600) Train Loss: 0.3254, Train Steps/Sec: 17.27, Grad Norm: 0.0439 +[2025-02-23 01:44:52] (step=0218700) Train Loss: 0.3257, Train Steps/Sec: 17.28, Grad Norm: 0.0424 +[2025-02-23 01:44:57] (step=0218800) Train Loss: 0.3265, Train Steps/Sec: 17.34, Grad Norm: 0.0392 +[2025-02-23 01:45:03] (step=0218900) Train Loss: 0.3260, Train Steps/Sec: 17.21, Grad Norm: 0.0420 +[2025-02-23 01:45:10] (step=0219000) Train Loss: 0.3255, Train Steps/Sec: 14.12, Grad Norm: 0.0448 +[2025-02-23 01:45:16] (step=0219100) Train Loss: 0.3260, Train Steps/Sec: 16.93, Grad Norm: 0.0506 +[2025-02-23 01:45:22] (step=0219200) Train Loss: 0.3267, Train Steps/Sec: 16.99, Grad Norm: 0.0462 +[2025-02-23 01:45:28] (step=0219300) Train Loss: 0.3259, Train Steps/Sec: 17.03, Grad Norm: 0.0404 +[2025-02-23 01:45:34] (step=0219400) Train Loss: 0.3261, Train Steps/Sec: 17.01, Grad Norm: 0.0414 +[2025-02-23 01:45:40] (step=0219500) Train Loss: 0.3261, Train Steps/Sec: 17.17, Grad Norm: 0.0457 +[2025-02-23 01:45:46] (step=0219600) Train Loss: 0.3262, Train Steps/Sec: 16.48, Grad Norm: 0.0421 +[2025-02-23 01:45:53] (step=0219700) Train Loss: 0.3258, Train Steps/Sec: 13.98, Grad Norm: 0.0398 +[2025-02-23 01:45:59] (step=0219800) Train Loss: 0.3264, Train Steps/Sec: 16.51, Grad Norm: 0.0414 +[2025-02-23 01:46:05] (step=0219900) Train Loss: 0.3260, Train Steps/Sec: 17.22, Grad Norm: 0.0442 +[2025-02-23 01:46:11] (step=0220000) Train Loss: 0.3255, Train Steps/Sec: 15.82, Grad Norm: 0.0434 +[2025-02-23 01:46:18] (step=0220100) Train Loss: 0.3259, Train Steps/Sec: 13.94, Grad Norm: 0.0458 +[2025-02-23 01:46:25] (step=0220200) Train Loss: 0.3250, Train Steps/Sec: 14.14, Grad Norm: 0.0436 +[2025-02-23 01:46:32] (step=0220300) Train Loss: 0.3263, Train Steps/Sec: 15.75, Grad Norm: 0.0419 +[2025-02-23 01:46:37] (step=0220400) Train Loss: 0.3264, Train Steps/Sec: 17.25, Grad Norm: 0.0443 +[2025-02-23 01:46:43] (step=0220500) Train Loss: 0.3261, Train Steps/Sec: 17.27, Grad Norm: 0.0414 +[2025-02-23 01:46:49] (step=0220600) Train Loss: 0.3260, Train Steps/Sec: 17.27, Grad Norm: 0.0382 +[2025-02-23 01:46:55] (step=0220700) Train Loss: 0.3257, Train Steps/Sec: 17.25, Grad Norm: 0.0446 +[2025-02-23 01:47:01] (step=0220800) Train Loss: 0.3264, Train Steps/Sec: 17.21, Grad Norm: 0.0420 +[2025-02-23 01:47:06] (step=0220900) Train Loss: 0.3259, Train Steps/Sec: 17.23, Grad Norm: 0.0443 +[2025-02-23 01:47:12] (step=0221000) Train Loss: 0.3257, Train Steps/Sec: 17.23, Grad Norm: 0.0425 +[2025-02-23 01:47:18] (step=0221100) Train Loss: 0.3255, Train Steps/Sec: 17.24, Grad Norm: 0.0413 +[2025-02-23 01:47:24] (step=0221200) Train Loss: 0.3261, Train Steps/Sec: 17.22, Grad Norm: 0.0403 +[2025-02-23 01:47:30] (step=0221300) Train Loss: 0.3259, Train Steps/Sec: 17.15, Grad Norm: 0.0415 +[2025-02-23 01:47:36] (step=0221400) Train Loss: 0.3251, Train Steps/Sec: 17.06, Grad Norm: 0.0423 +[2025-02-23 01:47:43] (step=0221500) Train Loss: 0.3256, Train Steps/Sec: 14.04, Grad Norm: 0.0412 +[2025-02-23 01:47:50] (step=0221600) Train Loss: 0.3259, Train Steps/Sec: 14.07, Grad Norm: 0.0409 +[2025-02-23 01:47:56] (step=0221700) Train Loss: 0.3261, Train Steps/Sec: 15.88, Grad Norm: 0.0416 +[2025-02-23 01:48:02] (step=0221800) Train Loss: 0.3262, Train Steps/Sec: 17.30, Grad Norm: 0.0433 +[2025-02-23 01:48:08] (step=0221900) Train Loss: 0.3259, Train Steps/Sec: 17.35, Grad Norm: 0.0394 +[2025-02-23 01:48:14] (step=0222000) Train Loss: 0.3265, Train Steps/Sec: 15.91, Grad Norm: 0.0426 +[2025-02-23 01:48:21] (step=0222100) Train Loss: 0.3262, Train Steps/Sec: 14.54, Grad Norm: 0.0437 +[2025-02-23 01:48:27] (step=0222200) Train Loss: 0.3263, Train Steps/Sec: 16.53, Grad Norm: 0.0419 +[2025-02-23 01:48:33] (step=0222300) Train Loss: 0.3261, Train Steps/Sec: 15.85, Grad Norm: 0.0445 +[2025-02-23 01:48:39] (step=0222400) Train Loss: 0.3260, Train Steps/Sec: 17.42, Grad Norm: 0.0431 +[2025-02-23 01:48:45] (step=0222500) Train Loss: 0.3255, Train Steps/Sec: 17.43, Grad Norm: 0.0464 +[2025-02-23 01:48:50] (step=0222600) Train Loss: 0.3257, Train Steps/Sec: 17.42, Grad Norm: 0.0439 +[2025-02-23 01:48:57] (step=0222700) Train Loss: 0.3257, Train Steps/Sec: 14.10, Grad Norm: 0.0418 +[2025-02-23 01:49:03] (step=0222800) Train Loss: 0.3255, Train Steps/Sec: 17.14, Grad Norm: 0.0434 +[2025-02-23 01:49:09] (step=0222900) Train Loss: 0.3259, Train Steps/Sec: 17.24, Grad Norm: 0.0414 +[2025-02-23 01:49:15] (step=0223000) Train Loss: 0.3269, Train Steps/Sec: 17.24, Grad Norm: 0.0431 +[2025-02-23 01:49:21] (step=0223100) Train Loss: 0.3258, Train Steps/Sec: 17.24, Grad Norm: 0.0420 +[2025-02-23 01:49:26] (step=0223200) Train Loss: 0.3259, Train Steps/Sec: 17.23, Grad Norm: 0.0416 +[2025-02-23 01:49:32] (step=0223300) Train Loss: 0.3259, Train Steps/Sec: 17.28, Grad Norm: 0.0403 +[2025-02-23 01:49:38] (step=0223400) Train Loss: 0.3256, Train Steps/Sec: 17.37, Grad Norm: 0.0448 +[2025-02-23 01:49:44] (step=0223500) Train Loss: 0.3257, Train Steps/Sec: 16.61, Grad Norm: 0.0411 +[2025-02-23 01:49:51] (step=0223600) Train Loss: 0.3255, Train Steps/Sec: 14.59, Grad Norm: 0.0402 +[2025-02-23 01:49:57] (step=0223700) Train Loss: 0.3260, Train Steps/Sec: 16.58, Grad Norm: 0.0399 +[2025-02-23 01:50:03] (step=0223800) Train Loss: 0.3258, Train Steps/Sec: 17.26, Grad Norm: 0.0421 +[2025-02-23 01:50:09] (step=0223900) Train Loss: 0.3260, Train Steps/Sec: 17.13, Grad Norm: 0.0451 +[2025-02-23 01:50:16] (step=0224000) Train Loss: 0.3253, Train Steps/Sec: 13.15, Grad Norm: 0.0390 +[2025-02-23 01:50:23] (step=0224100) Train Loss: 0.3262, Train Steps/Sec: 14.42, Grad Norm: 0.0438 +[2025-02-23 01:50:29] (step=0224200) Train Loss: 0.3263, Train Steps/Sec: 15.71, Grad Norm: 0.0394 +[2025-02-23 01:50:36] (step=0224300) Train Loss: 0.3266, Train Steps/Sec: 16.44, Grad Norm: 0.0411 +[2025-02-23 01:50:41] (step=0224400) Train Loss: 0.3263, Train Steps/Sec: 17.20, Grad Norm: 0.0420 +[2025-02-23 01:50:47] (step=0224500) Train Loss: 0.3262, Train Steps/Sec: 17.18, Grad Norm: 0.0450 +[2025-02-23 01:50:53] (step=0224600) Train Loss: 0.3265, Train Steps/Sec: 17.15, Grad Norm: 0.0438 +[2025-02-23 01:50:59] (step=0224700) Train Loss: 0.3258, Train Steps/Sec: 17.01, Grad Norm: 0.0444 +[2025-02-23 01:51:05] (step=0224800) Train Loss: 0.3261, Train Steps/Sec: 17.13, Grad Norm: 0.0437 +[2025-02-23 01:51:11] (step=0224900) Train Loss: 0.3251, Train Steps/Sec: 17.14, Grad Norm: 0.0426 +[2025-02-23 01:51:16] (step=0225000) Train Loss: 0.3259, Train Steps/Sec: 17.11, Grad Norm: 0.0417 +[2025-02-23 01:51:22] (step=0225100) Train Loss: 0.3257, Train Steps/Sec: 17.13, Grad Norm: 0.0421 +[2025-02-23 01:51:29] (step=0225200) Train Loss: 0.3256, Train Steps/Sec: 13.91, Grad Norm: 0.0468 +[2025-02-23 01:51:35] (step=0225300) Train Loss: 0.3261, Train Steps/Sec: 17.19, Grad Norm: 0.0412 +[2025-02-23 01:51:41] (step=0225400) Train Loss: 0.3263, Train Steps/Sec: 17.22, Grad Norm: 0.0413 +[2025-02-23 01:51:48] (step=0225500) Train Loss: 0.3257, Train Steps/Sec: 14.07, Grad Norm: 0.0414 +[2025-02-23 01:51:55] (step=0225600) Train Loss: 0.3255, Train Steps/Sec: 15.89, Grad Norm: 0.0376 +[2025-02-23 01:52:00] (step=0225700) Train Loss: 0.3252, Train Steps/Sec: 17.40, Grad Norm: 0.0429 +[2025-02-23 01:52:06] (step=0225800) Train Loss: 0.3255, Train Steps/Sec: 17.38, Grad Norm: 0.0436 +[2025-02-23 01:52:12] (step=0225900) Train Loss: 0.3261, Train Steps/Sec: 17.40, Grad Norm: 0.0417 +[2025-02-23 01:52:18] (step=0226000) Train Loss: 0.3253, Train Steps/Sec: 15.89, Grad Norm: 0.0449 +[2025-02-23 01:52:25] (step=0226100) Train Loss: 0.3259, Train Steps/Sec: 15.15, Grad Norm: 0.0403 +[2025-02-23 01:52:31] (step=0226200) Train Loss: 0.3256, Train Steps/Sec: 15.14, Grad Norm: 0.0402 +[2025-02-23 01:52:37] (step=0226300) Train Loss: 0.3259, Train Steps/Sec: 16.48, Grad Norm: 0.0412 +[2025-02-23 01:52:43] (step=0226400) Train Loss: 0.3259, Train Steps/Sec: 17.15, Grad Norm: 0.0409 +[2025-02-23 01:52:50] (step=0226500) Train Loss: 0.3260, Train Steps/Sec: 14.11, Grad Norm: 0.0408 +[2025-02-23 01:52:56] (step=0226600) Train Loss: 0.3258, Train Steps/Sec: 17.01, Grad Norm: 0.0400 +[2025-02-23 01:53:02] (step=0226700) Train Loss: 0.3263, Train Steps/Sec: 16.99, Grad Norm: 0.0449 +[2025-02-23 01:53:08] (step=0226800) Train Loss: 0.3264, Train Steps/Sec: 16.99, Grad Norm: 0.0412 +[2025-02-23 01:53:14] (step=0226900) Train Loss: 0.3260, Train Steps/Sec: 17.01, Grad Norm: 0.0400 +[2025-02-23 01:53:20] (step=0227000) Train Loss: 0.3258, Train Steps/Sec: 16.99, Grad Norm: 0.0384 +[2025-02-23 01:53:26] (step=0227100) Train Loss: 0.3261, Train Steps/Sec: 17.00, Grad Norm: 0.0397 +[2025-02-23 01:53:31] (step=0227200) Train Loss: 0.3257, Train Steps/Sec: 17.02, Grad Norm: 0.0413 +[2025-02-23 01:53:37] (step=0227300) Train Loss: 0.3262, Train Steps/Sec: 17.05, Grad Norm: 0.0430 +[2025-02-23 01:53:44] (step=0227400) Train Loss: 0.3255, Train Steps/Sec: 15.74, Grad Norm: 0.0416 +[2025-02-23 01:53:51] (step=0227500) Train Loss: 0.3260, Train Steps/Sec: 14.36, Grad Norm: 0.0476 +[2025-02-23 01:53:57] (step=0227600) Train Loss: 0.3257, Train Steps/Sec: 16.35, Grad Norm: 0.0410 +[2025-02-23 01:54:04] (step=0227700) Train Loss: 0.3256, Train Steps/Sec: 14.03, Grad Norm: 0.0417 +[2025-02-23 01:54:10] (step=0227800) Train Loss: 0.3259, Train Steps/Sec: 17.33, Grad Norm: 0.0418 +[2025-02-23 01:54:15] (step=0227900) Train Loss: 0.3259, Train Steps/Sec: 17.31, Grad Norm: 0.0437 +[2025-02-23 01:54:22] (step=0228000) Train Loss: 0.3253, Train Steps/Sec: 15.82, Grad Norm: 0.0452 +[2025-02-23 01:54:29] (step=0228100) Train Loss: 0.3262, Train Steps/Sec: 14.59, Grad Norm: 0.0450 +[2025-02-23 01:54:35] (step=0228200) Train Loss: 0.3254, Train Steps/Sec: 16.54, Grad Norm: 0.0406 +[2025-02-23 01:54:41] (step=0228300) Train Loss: 0.3261, Train Steps/Sec: 16.48, Grad Norm: 0.0382 +[2025-02-23 01:54:47] (step=0228400) Train Loss: 0.3263, Train Steps/Sec: 17.23, Grad Norm: 0.0408 +[2025-02-23 01:54:52] (step=0228500) Train Loss: 0.3255, Train Steps/Sec: 17.21, Grad Norm: 0.0404 +[2025-02-23 01:54:58] (step=0228600) Train Loss: 0.3259, Train Steps/Sec: 17.19, Grad Norm: 0.0396 +[2025-02-23 01:55:04] (step=0228700) Train Loss: 0.3260, Train Steps/Sec: 17.16, Grad Norm: 0.0433 +[2025-02-23 01:55:10] (step=0228800) Train Loss: 0.3254, Train Steps/Sec: 17.18, Grad Norm: 0.0399 +[2025-02-23 01:55:16] (step=0228900) Train Loss: 0.3258, Train Steps/Sec: 17.14, Grad Norm: 0.0415 +[2025-02-23 01:55:23] (step=0229000) Train Loss: 0.3252, Train Steps/Sec: 14.10, Grad Norm: 0.0382 +[2025-02-23 01:55:29] (step=0229100) Train Loss: 0.3262, Train Steps/Sec: 17.19, Grad Norm: 0.0426 +[2025-02-23 01:55:34] (step=0229200) Train Loss: 0.3259, Train Steps/Sec: 17.20, Grad Norm: 0.0408 +[2025-02-23 01:55:40] (step=0229300) Train Loss: 0.3259, Train Steps/Sec: 17.22, Grad Norm: 0.0455 +[2025-02-23 01:55:47] (step=0229400) Train Loss: 0.3259, Train Steps/Sec: 14.61, Grad Norm: 0.0406 +[2025-02-23 01:55:53] (step=0229500) Train Loss: 0.3262, Train Steps/Sec: 15.81, Grad Norm: 0.0394 +[2025-02-23 01:55:59] (step=0229600) Train Loss: 0.3260, Train Steps/Sec: 17.26, Grad Norm: 0.0392 +[2025-02-23 01:56:05] (step=0229700) Train Loss: 0.3251, Train Steps/Sec: 17.26, Grad Norm: 0.0419 +[2025-02-23 01:56:11] (step=0229800) Train Loss: 0.3262, Train Steps/Sec: 17.29, Grad Norm: 0.0443 +[2025-02-23 01:56:17] (step=0229900) Train Loss: 0.3260, Train Steps/Sec: 17.11, Grad Norm: 0.0417 +[2025-02-23 01:56:23] (step=0230000) Train Loss: 0.3258, Train Steps/Sec: 15.75, Grad Norm: 0.0402 +[2025-02-23 01:56:30] (step=0230100) Train Loss: 0.3253, Train Steps/Sec: 14.54, Grad Norm: 0.0408 +[2025-02-23 01:56:37] (step=0230200) Train Loss: 0.3258, Train Steps/Sec: 13.19, Grad Norm: 0.0399 +[2025-02-23 01:56:44] (step=0230300) Train Loss: 0.3252, Train Steps/Sec: 16.37, Grad Norm: 0.0401 +[2025-02-23 01:56:49] (step=0230400) Train Loss: 0.3260, Train Steps/Sec: 17.14, Grad Norm: 0.0389 +[2025-02-23 01:56:55] (step=0230500) Train Loss: 0.3256, Train Steps/Sec: 17.26, Grad Norm: 0.0412 +[2025-02-23 01:57:01] (step=0230600) Train Loss: 0.3260, Train Steps/Sec: 17.16, Grad Norm: 0.0449 +[2025-02-23 01:57:07] (step=0230700) Train Loss: 0.3266, Train Steps/Sec: 17.15, Grad Norm: 0.0433 +[2025-02-23 01:57:13] (step=0230800) Train Loss: 0.3254, Train Steps/Sec: 17.28, Grad Norm: 0.0429 +[2025-02-23 01:57:18] (step=0230900) Train Loss: 0.3261, Train Steps/Sec: 17.24, Grad Norm: 0.0424 +[2025-02-23 01:57:24] (step=0231000) Train Loss: 0.3254, Train Steps/Sec: 17.19, Grad Norm: 0.0410 +[2025-02-23 01:57:30] (step=0231100) Train Loss: 0.3261, Train Steps/Sec: 17.24, Grad Norm: 0.0426 +[2025-02-23 01:57:36] (step=0231200) Train Loss: 0.3259, Train Steps/Sec: 17.28, Grad Norm: 0.0401 +[2025-02-23 01:57:42] (step=0231300) Train Loss: 0.3253, Train Steps/Sec: 15.26, Grad Norm: 0.0415 +[2025-02-23 01:57:49] (step=0231400) Train Loss: 0.3259, Train Steps/Sec: 15.16, Grad Norm: 0.0399 +[2025-02-23 01:57:56] (step=0231500) Train Loss: 0.3253, Train Steps/Sec: 13.91, Grad Norm: 0.0414 +[2025-02-23 01:58:02] (step=0231600) Train Loss: 0.3260, Train Steps/Sec: 17.10, Grad Norm: 0.0405 +[2025-02-23 01:58:08] (step=0231700) Train Loss: 0.3255, Train Steps/Sec: 17.03, Grad Norm: 0.0427 +[2025-02-23 01:58:14] (step=0231800) Train Loss: 0.3254, Train Steps/Sec: 17.09, Grad Norm: 0.0429 +[2025-02-23 01:58:20] (step=0231900) Train Loss: 0.3257, Train Steps/Sec: 17.16, Grad Norm: 0.0402 +[2025-02-23 01:58:26] (step=0232000) Train Loss: 0.3260, Train Steps/Sec: 15.12, Grad Norm: 0.0415 +[2025-02-23 01:58:33] (step=0232100) Train Loss: 0.3256, Train Steps/Sec: 15.10, Grad Norm: 0.0421 +[2025-02-23 01:58:39] (step=0232200) Train Loss: 0.3260, Train Steps/Sec: 15.73, Grad Norm: 0.0387 +[2025-02-23 01:58:45] (step=0232300) Train Loss: 0.3260, Train Steps/Sec: 16.37, Grad Norm: 0.0439 +[2025-02-23 01:58:51] (step=0232400) Train Loss: 0.3256, Train Steps/Sec: 17.19, Grad Norm: 0.0403 +[2025-02-23 01:58:57] (step=0232500) Train Loss: 0.3259, Train Steps/Sec: 17.24, Grad Norm: 0.0434 +[2025-02-23 01:59:03] (step=0232600) Train Loss: 0.3256, Train Steps/Sec: 17.08, Grad Norm: 0.0400 +[2025-02-23 01:59:10] (step=0232700) Train Loss: 0.3253, Train Steps/Sec: 14.29, Grad Norm: 0.0421 +[2025-02-23 01:59:16] (step=0232800) Train Loss: 0.3258, Train Steps/Sec: 17.17, Grad Norm: 0.0420 +[2025-02-23 01:59:21] (step=0232900) Train Loss: 0.3260, Train Steps/Sec: 17.23, Grad Norm: 0.0407 +[2025-02-23 01:59:27] (step=0233000) Train Loss: 0.3259, Train Steps/Sec: 17.26, Grad Norm: 0.0413 +[2025-02-23 01:59:33] (step=0233100) Train Loss: 0.3261, Train Steps/Sec: 17.33, Grad Norm: 0.0413 +[2025-02-23 01:59:39] (step=0233200) Train Loss: 0.3255, Train Steps/Sec: 17.39, Grad Norm: 0.0410 +[2025-02-23 01:59:46] (step=0233300) Train Loss: 0.3257, Train Steps/Sec: 13.66, Grad Norm: 0.0476 +[2025-02-23 01:59:52] (step=0233400) Train Loss: 0.3262, Train Steps/Sec: 16.65, Grad Norm: 0.0402 +[2025-02-23 01:59:58] (step=0233500) Train Loss: 0.3260, Train Steps/Sec: 17.40, Grad Norm: 0.0410 +[2025-02-23 02:00:04] (step=0233600) Train Loss: 0.3256, Train Steps/Sec: 17.37, Grad Norm: 0.0415 +[2025-02-23 02:00:09] (step=0233700) Train Loss: 0.3255, Train Steps/Sec: 17.39, Grad Norm: 0.0410 +[2025-02-23 02:00:15] (step=0233800) Train Loss: 0.3258, Train Steps/Sec: 17.39, Grad Norm: 0.0419 +[2025-02-23 02:00:21] (step=0233900) Train Loss: 0.3259, Train Steps/Sec: 17.36, Grad Norm: 0.0436 +[2025-02-23 02:00:29] (step=0234000) Train Loss: 0.3258, Train Steps/Sec: 12.82, Grad Norm: 0.0440 +[2025-02-23 02:00:35] (step=0234100) Train Loss: 0.3258, Train Steps/Sec: 15.12, Grad Norm: 0.0399 +[2025-02-23 02:00:42] (step=0234200) Train Loss: 0.3258, Train Steps/Sec: 15.79, Grad Norm: 0.0441 +[2025-02-23 02:00:48] (step=0234300) Train Loss: 0.3263, Train Steps/Sec: 16.41, Grad Norm: 0.0425 +[2025-02-23 02:00:53] (step=0234400) Train Loss: 0.3259, Train Steps/Sec: 17.20, Grad Norm: 0.0393 +[2025-02-23 02:00:59] (step=0234500) Train Loss: 0.3256, Train Steps/Sec: 17.22, Grad Norm: 0.0411 +[2025-02-23 02:01:05] (step=0234600) Train Loss: 0.3254, Train Steps/Sec: 17.17, Grad Norm: 0.0397 +[2025-02-23 02:01:11] (step=0234700) Train Loss: 0.3253, Train Steps/Sec: 17.22, Grad Norm: 0.0403 +[2025-02-23 02:01:17] (step=0234800) Train Loss: 0.3255, Train Steps/Sec: 17.17, Grad Norm: 0.0419 +[2025-02-23 02:01:23] (step=0234900) Train Loss: 0.3256, Train Steps/Sec: 17.25, Grad Norm: 0.0460 +[2025-02-23 02:01:28] (step=0235000) Train Loss: 0.3260, Train Steps/Sec: 17.29, Grad Norm: 0.0423 +[2025-02-23 02:01:34] (step=0235100) Train Loss: 0.3258, Train Steps/Sec: 17.26, Grad Norm: 0.0415 +[2025-02-23 02:01:42] (step=0235200) Train Loss: 0.3252, Train Steps/Sec: 12.12, Grad Norm: 0.0421 +[2025-02-23 02:01:48] (step=0235300) Train Loss: 0.3262, Train Steps/Sec: 16.43, Grad Norm: 0.0415 +[2025-02-23 02:01:54] (step=0235400) Train Loss: 0.3260, Train Steps/Sec: 16.52, Grad Norm: 0.0440 +[2025-02-23 02:02:00] (step=0235500) Train Loss: 0.3257, Train Steps/Sec: 17.23, Grad Norm: 0.0403 +[2025-02-23 02:02:06] (step=0235600) Train Loss: 0.3256, Train Steps/Sec: 17.15, Grad Norm: 0.0427 +[2025-02-23 02:02:12] (step=0235700) Train Loss: 0.3258, Train Steps/Sec: 17.22, Grad Norm: 0.0398 +[2025-02-23 02:02:18] (step=0235800) Train Loss: 0.3259, Train Steps/Sec: 17.25, Grad Norm: 0.0405 +[2025-02-23 02:02:24] (step=0235900) Train Loss: 0.3261, Train Steps/Sec: 16.50, Grad Norm: 0.0459 +[2025-02-23 02:02:30] (step=0236000) Train Loss: 0.3253, Train Steps/Sec: 16.47, Grad Norm: 0.0390 +[2025-02-23 02:02:37] (step=0236100) Train Loss: 0.3252, Train Steps/Sec: 14.55, Grad Norm: 0.0403 +[2025-02-23 02:02:43] (step=0236200) Train Loss: 0.3251, Train Steps/Sec: 15.77, Grad Norm: 0.0395 +[2025-02-23 02:02:49] (step=0236300) Train Loss: 0.3255, Train Steps/Sec: 16.45, Grad Norm: 0.0376 +[2025-02-23 02:02:55] (step=0236400) Train Loss: 0.3256, Train Steps/Sec: 17.25, Grad Norm: 0.0413 +[2025-02-23 02:03:02] (step=0236500) Train Loss: 0.3260, Train Steps/Sec: 14.24, Grad Norm: 0.0424 +[2025-02-23 02:03:08] (step=0236600) Train Loss: 0.3255, Train Steps/Sec: 17.30, Grad Norm: 0.0401 +[2025-02-23 02:03:14] (step=0236700) Train Loss: 0.3260, Train Steps/Sec: 17.29, Grad Norm: 0.0419 +[2025-02-23 02:03:19] (step=0236800) Train Loss: 0.3253, Train Steps/Sec: 17.25, Grad Norm: 0.0421 +[2025-02-23 02:03:25] (step=0236900) Train Loss: 0.3257, Train Steps/Sec: 17.18, Grad Norm: 0.0438 +[2025-02-23 02:03:31] (step=0237000) Train Loss: 0.3257, Train Steps/Sec: 17.21, Grad Norm: 0.0397 +[2025-02-23 02:03:37] (step=0237100) Train Loss: 0.3260, Train Steps/Sec: 16.48, Grad Norm: 0.0391 +[2025-02-23 02:03:44] (step=0237200) Train Loss: 0.3254, Train Steps/Sec: 14.01, Grad Norm: 0.0394 +[2025-02-23 02:03:50] (step=0237300) Train Loss: 0.3250, Train Steps/Sec: 16.55, Grad Norm: 0.0389 +[2025-02-23 02:03:56] (step=0237400) Train Loss: 0.3254, Train Steps/Sec: 17.29, Grad Norm: 0.0430 +[2025-02-23 02:04:02] (step=0237500) Train Loss: 0.3259, Train Steps/Sec: 17.29, Grad Norm: 0.0424 +[2025-02-23 02:04:08] (step=0237600) Train Loss: 0.3251, Train Steps/Sec: 17.26, Grad Norm: 0.0423 +[2025-02-23 02:04:15] (step=0237700) Train Loss: 0.3255, Train Steps/Sec: 14.27, Grad Norm: 0.0423 +[2025-02-23 02:04:20] (step=0237800) Train Loss: 0.3259, Train Steps/Sec: 17.12, Grad Norm: 0.0398 +[2025-02-23 02:04:27] (step=0237900) Train Loss: 0.3256, Train Steps/Sec: 16.26, Grad Norm: 0.0375 +[2025-02-23 02:04:33] (step=0238000) Train Loss: 0.3262, Train Steps/Sec: 16.28, Grad Norm: 0.0433 +[2025-02-23 02:04:40] (step=0238100) Train Loss: 0.3245, Train Steps/Sec: 14.48, Grad Norm: 0.0403 +[2025-02-23 02:04:46] (step=0238200) Train Loss: 0.3257, Train Steps/Sec: 15.64, Grad Norm: 0.0387 +[2025-02-23 02:04:52] (step=0238300) Train Loss: 0.3250, Train Steps/Sec: 16.37, Grad Norm: 0.0424 +[2025-02-23 02:04:58] (step=0238400) Train Loss: 0.3260, Train Steps/Sec: 17.08, Grad Norm: 0.0405 +[2025-02-23 02:05:04] (step=0238500) Train Loss: 0.3255, Train Steps/Sec: 17.07, Grad Norm: 0.0405 +[2025-02-23 02:05:10] (step=0238600) Train Loss: 0.3256, Train Steps/Sec: 17.10, Grad Norm: 0.0397 +[2025-02-23 02:05:16] (step=0238700) Train Loss: 0.3259, Train Steps/Sec: 17.07, Grad Norm: 0.0440 +[2025-02-23 02:05:21] (step=0238800) Train Loss: 0.3255, Train Steps/Sec: 17.08, Grad Norm: 0.0402 +[2025-02-23 02:05:27] (step=0238900) Train Loss: 0.3254, Train Steps/Sec: 17.07, Grad Norm: 0.0420 +[2025-02-23 02:05:34] (step=0239000) Train Loss: 0.3255, Train Steps/Sec: 14.09, Grad Norm: 0.0404 +[2025-02-23 02:05:42] (step=0239100) Train Loss: 0.3259, Train Steps/Sec: 13.96, Grad Norm: 0.0432 +[2025-02-23 02:05:48] (step=0239200) Train Loss: 0.3255, Train Steps/Sec: 16.40, Grad Norm: 0.0434 +[2025-02-23 02:05:54] (step=0239300) Train Loss: 0.3252, Train Steps/Sec: 16.44, Grad Norm: 0.0398 +[2025-02-23 02:06:00] (step=0239400) Train Loss: 0.3250, Train Steps/Sec: 17.26, Grad Norm: 0.0434 +[2025-02-23 02:06:05] (step=0239500) Train Loss: 0.3255, Train Steps/Sec: 17.28, Grad Norm: 0.0409 +[2025-02-23 02:06:11] (step=0239600) Train Loss: 0.3259, Train Steps/Sec: 17.25, Grad Norm: 0.0432 +[2025-02-23 02:06:17] (step=0239700) Train Loss: 0.3252, Train Steps/Sec: 17.31, Grad Norm: 0.0432 +[2025-02-23 02:06:23] (step=0239800) Train Loss: 0.3257, Train Steps/Sec: 17.28, Grad Norm: 0.0444 +[2025-02-23 02:06:29] (step=0239900) Train Loss: 0.3250, Train Steps/Sec: 16.46, Grad Norm: 0.0370 +[2025-02-23 02:06:35] (step=0240000) Train Loss: 0.3260, Train Steps/Sec: 16.45, Grad Norm: 0.0470 +[2025-02-23 02:06:41] (step=0240100) Train Loss: 0.3258, Train Steps/Sec: 15.14, Grad Norm: 0.0408 +[2025-02-23 02:06:49] (step=0240200) Train Loss: 0.3257, Train Steps/Sec: 12.78, Grad Norm: 0.0391 +[2025-02-23 02:06:55] (step=0240300) Train Loss: 0.3256, Train Steps/Sec: 16.50, Grad Norm: 0.0421 +[2025-02-23 02:07:01] (step=0240400) Train Loss: 0.3256, Train Steps/Sec: 17.37, Grad Norm: 0.0431 +[2025-02-23 02:07:07] (step=0240500) Train Loss: 0.3257, Train Steps/Sec: 17.34, Grad Norm: 0.0414 +[2025-02-23 02:07:13] (step=0240600) Train Loss: 0.3260, Train Steps/Sec: 17.32, Grad Norm: 0.0420 +[2025-02-23 02:07:18] (step=0240700) Train Loss: 0.3258, Train Steps/Sec: 17.30, Grad Norm: 0.0397 +[2025-02-23 02:07:24] (step=0240800) Train Loss: 0.3254, Train Steps/Sec: 17.22, Grad Norm: 0.0389 +[2025-02-23 02:07:30] (step=0240900) Train Loss: 0.3255, Train Steps/Sec: 17.17, Grad Norm: 0.0370 +[2025-02-23 02:07:36] (step=0241000) Train Loss: 0.3257, Train Steps/Sec: 15.78, Grad Norm: 0.0407 +[2025-02-23 02:07:43] (step=0241100) Train Loss: 0.3262, Train Steps/Sec: 14.58, Grad Norm: 0.0448 +[2025-02-23 02:07:49] (step=0241200) Train Loss: 0.3257, Train Steps/Sec: 16.59, Grad Norm: 0.0381 +[2025-02-23 02:07:55] (step=0241300) Train Loss: 0.3255, Train Steps/Sec: 17.28, Grad Norm: 0.0429 +[2025-02-23 02:08:01] (step=0241400) Train Loss: 0.3253, Train Steps/Sec: 17.24, Grad Norm: 0.0418 +[2025-02-23 02:08:08] (step=0241500) Train Loss: 0.3261, Train Steps/Sec: 14.27, Grad Norm: 0.0420 +[2025-02-23 02:08:14] (step=0241600) Train Loss: 0.3257, Train Steps/Sec: 17.23, Grad Norm: 0.0388 +[2025-02-23 02:08:20] (step=0241700) Train Loss: 0.3258, Train Steps/Sec: 17.24, Grad Norm: 0.0414 +[2025-02-23 02:08:26] (step=0241800) Train Loss: 0.3249, Train Steps/Sec: 16.40, Grad Norm: 0.0396 +[2025-02-23 02:08:31] (step=0241900) Train Loss: 0.3257, Train Steps/Sec: 17.18, Grad Norm: 0.0447 +[2025-02-23 02:08:38] (step=0242000) Train Loss: 0.3255, Train Steps/Sec: 16.31, Grad Norm: 0.0386 +[2025-02-23 02:08:44] (step=0242100) Train Loss: 0.3253, Train Steps/Sec: 15.00, Grad Norm: 0.0386 +[2025-02-23 02:08:51] (step=0242200) Train Loss: 0.3254, Train Steps/Sec: 14.97, Grad Norm: 0.0405 +[2025-02-23 02:08:57] (step=0242300) Train Loss: 0.3254, Train Steps/Sec: 16.30, Grad Norm: 0.0431 +[2025-02-23 02:09:03] (step=0242400) Train Loss: 0.3253, Train Steps/Sec: 17.10, Grad Norm: 0.0388 +[2025-02-23 02:09:09] (step=0242500) Train Loss: 0.3259, Train Steps/Sec: 17.12, Grad Norm: 0.0407 +[2025-02-23 02:09:15] (step=0242600) Train Loss: 0.3259, Train Steps/Sec: 17.11, Grad Norm: 0.0410 +[2025-02-23 02:09:22] (step=0242700) Train Loss: 0.3256, Train Steps/Sec: 14.17, Grad Norm: 0.0429 +[2025-02-23 02:09:27] (step=0242800) Train Loss: 0.3253, Train Steps/Sec: 17.19, Grad Norm: 0.0404 +[2025-02-23 02:09:33] (step=0242900) Train Loss: 0.3251, Train Steps/Sec: 17.34, Grad Norm: 0.0384 +[2025-02-23 02:09:41] (step=0243000) Train Loss: 0.3261, Train Steps/Sec: 13.63, Grad Norm: 0.0403 +[2025-02-23 02:09:46] (step=0243100) Train Loss: 0.3256, Train Steps/Sec: 17.28, Grad Norm: 0.0461 +[2025-02-23 02:09:52] (step=0243200) Train Loss: 0.3256, Train Steps/Sec: 16.58, Grad Norm: 0.0396 +[2025-02-23 02:09:58] (step=0243300) Train Loss: 0.3254, Train Steps/Sec: 17.29, Grad Norm: 0.0414 +[2025-02-23 02:10:04] (step=0243400) Train Loss: 0.3254, Train Steps/Sec: 17.26, Grad Norm: 0.0408 +[2025-02-23 02:10:10] (step=0243500) Train Loss: 0.3257, Train Steps/Sec: 17.27, Grad Norm: 0.0436 +[2025-02-23 02:10:16] (step=0243600) Train Loss: 0.3251, Train Steps/Sec: 17.22, Grad Norm: 0.0402 +[2025-02-23 02:10:21] (step=0243700) Train Loss: 0.3250, Train Steps/Sec: 17.23, Grad Norm: 0.0398 +[2025-02-23 02:10:28] (step=0243800) Train Loss: 0.3259, Train Steps/Sec: 16.40, Grad Norm: 0.0422 +[2025-02-23 02:10:33] (step=0243900) Train Loss: 0.3259, Train Steps/Sec: 17.11, Grad Norm: 0.0401 +[2025-02-23 02:10:41] (step=0244000) Train Loss: 0.3257, Train Steps/Sec: 13.56, Grad Norm: 0.0419 +[2025-02-23 02:10:47] (step=0244100) Train Loss: 0.3253, Train Steps/Sec: 15.14, Grad Norm: 0.0389 +[2025-02-23 02:10:54] (step=0244200) Train Loss: 0.3255, Train Steps/Sec: 15.20, Grad Norm: 0.0384 +[2025-02-23 02:11:00] (step=0244300) Train Loss: 0.3259, Train Steps/Sec: 16.50, Grad Norm: 0.0416 +[2025-02-23 02:11:06] (step=0244400) Train Loss: 0.3250, Train Steps/Sec: 17.26, Grad Norm: 0.0409 +[2025-02-23 02:11:12] (step=0244500) Train Loss: 0.3257, Train Steps/Sec: 17.36, Grad Norm: 0.0394 +[2025-02-23 02:11:17] (step=0244600) Train Loss: 0.3254, Train Steps/Sec: 17.38, Grad Norm: 0.0429 +[2025-02-23 02:11:23] (step=0244700) Train Loss: 0.3256, Train Steps/Sec: 17.40, Grad Norm: 0.0385 +[2025-02-23 02:11:29] (step=0244800) Train Loss: 0.3248, Train Steps/Sec: 17.36, Grad Norm: 0.0456 +[2025-02-23 02:11:35] (step=0244900) Train Loss: 0.3264, Train Steps/Sec: 15.22, Grad Norm: 0.0415 +[2025-02-23 02:11:42] (step=0245000) Train Loss: 0.3253, Train Steps/Sec: 15.12, Grad Norm: 0.0378 +[2025-02-23 02:11:48] (step=0245100) Train Loss: 0.3260, Train Steps/Sec: 16.46, Grad Norm: 0.0450 +[2025-02-23 02:11:55] (step=0245200) Train Loss: 0.3251, Train Steps/Sec: 14.18, Grad Norm: 0.0426 +[2025-02-23 02:12:01] (step=0245300) Train Loss: 0.3252, Train Steps/Sec: 17.16, Grad Norm: 0.0395 +[2025-02-23 02:12:07] (step=0245400) Train Loss: 0.3252, Train Steps/Sec: 17.32, Grad Norm: 0.0399 +[2025-02-23 02:12:12] (step=0245500) Train Loss: 0.3252, Train Steps/Sec: 17.33, Grad Norm: 0.0362 +[2025-02-23 02:12:18] (step=0245600) Train Loss: 0.3259, Train Steps/Sec: 17.21, Grad Norm: 0.0416 +[2025-02-23 02:12:24] (step=0245700) Train Loss: 0.3260, Train Steps/Sec: 16.44, Grad Norm: 0.0416 +[2025-02-23 02:12:30] (step=0245800) Train Loss: 0.3254, Train Steps/Sec: 17.28, Grad Norm: 0.0401 +[2025-02-23 02:12:36] (step=0245900) Train Loss: 0.3252, Train Steps/Sec: 17.31, Grad Norm: 0.0369 +[2025-02-23 02:12:42] (step=0246000) Train Loss: 0.3264, Train Steps/Sec: 16.52, Grad Norm: 0.0406 +[2025-02-23 02:12:49] (step=0246100) Train Loss: 0.3260, Train Steps/Sec: 15.16, Grad Norm: 0.0397 +[2025-02-23 02:12:55] (step=0246200) Train Loss: 0.3255, Train Steps/Sec: 15.17, Grad Norm: 0.0413 +[2025-02-23 02:13:01] (step=0246300) Train Loss: 0.3253, Train Steps/Sec: 16.46, Grad Norm: 0.0419 +[2025-02-23 02:13:07] (step=0246400) Train Loss: 0.3253, Train Steps/Sec: 17.22, Grad Norm: 0.0453 +[2025-02-23 02:13:14] (step=0246500) Train Loss: 0.3257, Train Steps/Sec: 14.25, Grad Norm: 0.0407 +[2025-02-23 02:13:20] (step=0246600) Train Loss: 0.3256, Train Steps/Sec: 17.19, Grad Norm: 0.0421 +[2025-02-23 02:13:26] (step=0246700) Train Loss: 0.3252, Train Steps/Sec: 17.26, Grad Norm: 0.0373 +[2025-02-23 02:13:32] (step=0246800) Train Loss: 0.3257, Train Steps/Sec: 17.28, Grad Norm: 0.0428 +[2025-02-23 02:13:39] (step=0246900) Train Loss: 0.3251, Train Steps/Sec: 13.61, Grad Norm: 0.0405 +[2025-02-23 02:13:45] (step=0247000) Train Loss: 0.3251, Train Steps/Sec: 17.37, Grad Norm: 0.0365 +[2025-02-23 02:13:51] (step=0247100) Train Loss: 0.3252, Train Steps/Sec: 16.63, Grad Norm: 0.0468 +[2025-02-23 02:13:56] (step=0247200) Train Loss: 0.3258, Train Steps/Sec: 17.29, Grad Norm: 0.0377 +[2025-02-23 02:14:02] (step=0247300) Train Loss: 0.3258, Train Steps/Sec: 17.29, Grad Norm: 0.0424 +[2025-02-23 02:14:08] (step=0247400) Train Loss: 0.3257, Train Steps/Sec: 17.38, Grad Norm: 0.0406 +[2025-02-23 02:14:14] (step=0247500) Train Loss: 0.3260, Train Steps/Sec: 17.27, Grad Norm: 0.0401 +[2025-02-23 02:14:20] (step=0247600) Train Loss: 0.3255, Train Steps/Sec: 16.46, Grad Norm: 0.0396 +[2025-02-23 02:14:27] (step=0247700) Train Loss: 0.3253, Train Steps/Sec: 14.28, Grad Norm: 0.0399 +[2025-02-23 02:14:33] (step=0247800) Train Loss: 0.3254, Train Steps/Sec: 17.17, Grad Norm: 0.0411 +[2025-02-23 02:14:38] (step=0247900) Train Loss: 0.3255, Train Steps/Sec: 17.29, Grad Norm: 0.0420 +[2025-02-23 02:14:45] (step=0248000) Train Loss: 0.3254, Train Steps/Sec: 16.48, Grad Norm: 0.0373 +[2025-02-23 02:14:51] (step=0248100) Train Loss: 0.3256, Train Steps/Sec: 15.88, Grad Norm: 0.0420 +[2025-02-23 02:14:57] (step=0248200) Train Loss: 0.3256, Train Steps/Sec: 15.26, Grad Norm: 0.0385 +[2025-02-23 02:15:03] (step=0248300) Train Loss: 0.3255, Train Steps/Sec: 16.49, Grad Norm: 0.0398 +[2025-02-23 02:15:09] (step=0248400) Train Loss: 0.3251, Train Steps/Sec: 16.92, Grad Norm: 0.0422 +[2025-02-23 02:15:15] (step=0248500) Train Loss: 0.3254, Train Steps/Sec: 17.25, Grad Norm: 0.0397 +[2025-02-23 02:15:21] (step=0248600) Train Loss: 0.3254, Train Steps/Sec: 17.26, Grad Norm: 0.0401 +[2025-02-23 02:15:27] (step=0248700) Train Loss: 0.3250, Train Steps/Sec: 17.28, Grad Norm: 0.0428 +[2025-02-23 02:15:33] (step=0248800) Train Loss: 0.3256, Train Steps/Sec: 15.18, Grad Norm: 0.0400 +[2025-02-23 02:15:40] (step=0248900) Train Loss: 0.3256, Train Steps/Sec: 15.15, Grad Norm: 0.0379 +[2025-02-23 02:15:47] (step=0249000) Train Loss: 0.3256, Train Steps/Sec: 13.73, Grad Norm: 0.0428 +[2025-02-23 02:15:53] (step=0249100) Train Loss: 0.3252, Train Steps/Sec: 17.25, Grad Norm: 0.0379 +[2025-02-23 02:15:59] (step=0249200) Train Loss: 0.3259, Train Steps/Sec: 17.16, Grad Norm: 0.0403 +[2025-02-23 02:16:05] (step=0249300) Train Loss: 0.3263, Train Steps/Sec: 17.07, Grad Norm: 0.0393 +[2025-02-23 02:16:10] (step=0249400) Train Loss: 0.3251, Train Steps/Sec: 17.30, Grad Norm: 0.0405 +[2025-02-23 02:16:16] (step=0249500) Train Loss: 0.3246, Train Steps/Sec: 17.24, Grad Norm: 0.0391 +[2025-02-23 02:16:22] (step=0249600) Train Loss: 0.3256, Train Steps/Sec: 16.46, Grad Norm: 0.0453 +[2025-02-23 02:16:28] (step=0249700) Train Loss: 0.3256, Train Steps/Sec: 17.19, Grad Norm: 0.0406 +[2025-02-23 02:16:34] (step=0249800) Train Loss: 0.3264, Train Steps/Sec: 17.21, Grad Norm: 0.0397 +[2025-02-23 02:16:40] (step=0249900) Train Loss: 0.3250, Train Steps/Sec: 17.22, Grad Norm: 0.0379 +[2025-02-23 02:16:46] (step=0250000) Train Loss: 0.3252, Train Steps/Sec: 16.46, Grad Norm: 0.0403 +[2025-02-23 02:16:47] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0250000.pt +[2025-02-23 02:16:53] (step=0250100) Train Loss: 0.3247, Train Steps/Sec: 13.72, Grad Norm: 0.0408 +[2025-02-23 02:17:01] (step=0250200) Train Loss: 0.3255, Train Steps/Sec: 13.08, Grad Norm: 0.0410 +[2025-02-23 02:17:08] (step=0250300) Train Loss: 0.3252, Train Steps/Sec: 14.69, Grad Norm: 0.0413 +[2025-02-23 02:17:13] (step=0250400) Train Loss: 0.3249, Train Steps/Sec: 17.24, Grad Norm: 0.0402 +[2025-02-23 02:17:19] (step=0250500) Train Loss: 0.3256, Train Steps/Sec: 17.27, Grad Norm: 0.0387 +[2025-02-23 02:17:25] (step=0250600) Train Loss: 0.3253, Train Steps/Sec: 17.30, Grad Norm: 0.0418 +[2025-02-23 02:17:31] (step=0250700) Train Loss: 0.3254, Train Steps/Sec: 16.57, Grad Norm: 0.0436 +[2025-02-23 02:17:38] (step=0250800) Train Loss: 0.3261, Train Steps/Sec: 14.09, Grad Norm: 0.0424 +[2025-02-23 02:17:44] (step=0250900) Train Loss: 0.3257, Train Steps/Sec: 17.32, Grad Norm: 0.0370 +[2025-02-23 02:17:50] (step=0251000) Train Loss: 0.3250, Train Steps/Sec: 16.55, Grad Norm: 0.0413 +[2025-02-23 02:17:56] (step=0251100) Train Loss: 0.3252, Train Steps/Sec: 17.14, Grad Norm: 0.0408 +[2025-02-23 02:18:02] (step=0251200) Train Loss: 0.3253, Train Steps/Sec: 17.13, Grad Norm: 0.0372 +[2025-02-23 02:18:07] (step=0251300) Train Loss: 0.3254, Train Steps/Sec: 17.24, Grad Norm: 0.0395 +[2025-02-23 02:18:13] (step=0251400) Train Loss: 0.3255, Train Steps/Sec: 17.22, Grad Norm: 0.0437 +[2025-02-23 02:18:21] (step=0251500) Train Loss: 0.3253, Train Steps/Sec: 13.62, Grad Norm: 0.0394 +[2025-02-23 02:18:26] (step=0251600) Train Loss: 0.3253, Train Steps/Sec: 17.19, Grad Norm: 0.0434 +[2025-02-23 02:18:32] (step=0251700) Train Loss: 0.3252, Train Steps/Sec: 17.21, Grad Norm: 0.0434 +[2025-02-23 02:18:38] (step=0251800) Train Loss: 0.3257, Train Steps/Sec: 17.23, Grad Norm: 0.0381 +[2025-02-23 02:18:44] (step=0251900) Train Loss: 0.3259, Train Steps/Sec: 17.22, Grad Norm: 0.0439 +[2025-02-23 02:18:50] (step=0252000) Train Loss: 0.3252, Train Steps/Sec: 16.41, Grad Norm: 0.0417 +[2025-02-23 02:18:56] (step=0252100) Train Loss: 0.3249, Train Steps/Sec: 15.77, Grad Norm: 0.0440 +[2025-02-23 02:19:03] (step=0252200) Train Loss: 0.3259, Train Steps/Sec: 14.49, Grad Norm: 0.0358 +[2025-02-23 02:19:09] (step=0252300) Train Loss: 0.3251, Train Steps/Sec: 16.37, Grad Norm: 0.0457 +[2025-02-23 02:19:15] (step=0252400) Train Loss: 0.3250, Train Steps/Sec: 17.19, Grad Norm: 0.0401 +[2025-02-23 02:19:21] (step=0252500) Train Loss: 0.3256, Train Steps/Sec: 17.22, Grad Norm: 0.0421 +[2025-02-23 02:19:27] (step=0252600) Train Loss: 0.3255, Train Steps/Sec: 17.31, Grad Norm: 0.0393 +[2025-02-23 02:19:33] (step=0252700) Train Loss: 0.3255, Train Steps/Sec: 15.21, Grad Norm: 0.0402 +[2025-02-23 02:19:41] (step=0252800) Train Loss: 0.3246, Train Steps/Sec: 13.36, Grad Norm: 0.0377 +[2025-02-23 02:19:47] (step=0252900) Train Loss: 0.3252, Train Steps/Sec: 16.55, Grad Norm: 0.0394 +[2025-02-23 02:19:53] (step=0253000) Train Loss: 0.3252, Train Steps/Sec: 17.24, Grad Norm: 0.0390 +[2025-02-23 02:19:58] (step=0253100) Train Loss: 0.3249, Train Steps/Sec: 17.24, Grad Norm: 0.0413 +[2025-02-23 02:20:04] (step=0253200) Train Loss: 0.3256, Train Steps/Sec: 17.27, Grad Norm: 0.0412 +[2025-02-23 02:20:10] (step=0253300) Train Loss: 0.3253, Train Steps/Sec: 17.28, Grad Norm: 0.0385 +[2025-02-23 02:20:16] (step=0253400) Train Loss: 0.3248, Train Steps/Sec: 17.09, Grad Norm: 0.0423 +[2025-02-23 02:20:22] (step=0253500) Train Loss: 0.3265, Train Steps/Sec: 16.40, Grad Norm: 0.0401 +[2025-02-23 02:20:28] (step=0253600) Train Loss: 0.3258, Train Steps/Sec: 17.13, Grad Norm: 0.0419 +[2025-02-23 02:20:34] (step=0253700) Train Loss: 0.3254, Train Steps/Sec: 17.19, Grad Norm: 0.0372 +[2025-02-23 02:20:39] (step=0253800) Train Loss: 0.3260, Train Steps/Sec: 17.19, Grad Norm: 0.0403 +[2025-02-23 02:20:45] (step=0253900) Train Loss: 0.3251, Train Steps/Sec: 17.15, Grad Norm: 0.0391 +[2025-02-23 02:20:53] (step=0254000) Train Loss: 0.3250, Train Steps/Sec: 13.73, Grad Norm: 0.0424 +[2025-02-23 02:20:59] (step=0254100) Train Loss: 0.3248, Train Steps/Sec: 15.81, Grad Norm: 0.0379 +[2025-02-23 02:21:06] (step=0254200) Train Loss: 0.3251, Train Steps/Sec: 14.60, Grad Norm: 0.0469 +[2025-02-23 02:21:12] (step=0254300) Train Loss: 0.3249, Train Steps/Sec: 16.53, Grad Norm: 0.0412 +[2025-02-23 02:21:18] (step=0254400) Train Loss: 0.3262, Train Steps/Sec: 17.32, Grad Norm: 0.0408 +[2025-02-23 02:21:23] (step=0254500) Train Loss: 0.3248, Train Steps/Sec: 17.22, Grad Norm: 0.0435 +[2025-02-23 02:21:29] (step=0254600) Train Loss: 0.3260, Train Steps/Sec: 16.63, Grad Norm: 0.0390 +[2025-02-23 02:21:36] (step=0254700) Train Loss: 0.3256, Train Steps/Sec: 14.10, Grad Norm: 0.0405 +[2025-02-23 02:21:42] (step=0254800) Train Loss: 0.3252, Train Steps/Sec: 16.55, Grad Norm: 0.0407 +[2025-02-23 02:21:48] (step=0254900) Train Loss: 0.3249, Train Steps/Sec: 17.23, Grad Norm: 0.0419 +[2025-02-23 02:21:54] (step=0255000) Train Loss: 0.3255, Train Steps/Sec: 17.30, Grad Norm: 0.0391 +[2025-02-23 02:22:00] (step=0255100) Train Loss: 0.3256, Train Steps/Sec: 17.28, Grad Norm: 0.0477 +[2025-02-23 02:22:06] (step=0255200) Train Loss: 0.3255, Train Steps/Sec: 17.29, Grad Norm: 0.0435 +[2025-02-23 02:22:13] (step=0255300) Train Loss: 0.3255, Train Steps/Sec: 14.16, Grad Norm: 0.0398 +[2025-02-23 02:22:19] (step=0255400) Train Loss: 0.3249, Train Steps/Sec: 16.24, Grad Norm: 0.0411 +[2025-02-23 02:22:25] (step=0255500) Train Loss: 0.3256, Train Steps/Sec: 17.05, Grad Norm: 0.0413 +[2025-02-23 02:22:31] (step=0255600) Train Loss: 0.3250, Train Steps/Sec: 17.06, Grad Norm: 0.0384 +[2025-02-23 02:22:36] (step=0255700) Train Loss: 0.3256, Train Steps/Sec: 17.10, Grad Norm: 0.0405 +[2025-02-23 02:22:42] (step=0255800) Train Loss: 0.3255, Train Steps/Sec: 17.14, Grad Norm: 0.0400 +[2025-02-23 02:22:48] (step=0255900) Train Loss: 0.3252, Train Steps/Sec: 17.09, Grad Norm: 0.0376 +[2025-02-23 02:22:54] (step=0256000) Train Loss: 0.3264, Train Steps/Sec: 16.26, Grad Norm: 0.0390 +[2025-02-23 02:23:00] (step=0256100) Train Loss: 0.3248, Train Steps/Sec: 16.31, Grad Norm: 0.0417 +[2025-02-23 02:23:08] (step=0256200) Train Loss: 0.3253, Train Steps/Sec: 13.87, Grad Norm: 0.0471 +[2025-02-23 02:23:14] (step=0256300) Train Loss: 0.3253, Train Steps/Sec: 16.31, Grad Norm: 0.0430 +[2025-02-23 02:23:20] (step=0256400) Train Loss: 0.3247, Train Steps/Sec: 17.11, Grad Norm: 0.0416 +[2025-02-23 02:23:27] (step=0256500) Train Loss: 0.3252, Train Steps/Sec: 14.31, Grad Norm: 0.0458 +[2025-02-23 02:23:34] (step=0256600) Train Loss: 0.3251, Train Steps/Sec: 14.04, Grad Norm: 0.0431 +[2025-02-23 02:23:40] (step=0256700) Train Loss: 0.3256, Train Steps/Sec: 16.52, Grad Norm: 0.0393 +[2025-02-23 02:23:46] (step=0256800) Train Loss: 0.3247, Train Steps/Sec: 16.51, Grad Norm: 0.0395 +[2025-02-23 02:23:52] (step=0256900) Train Loss: 0.3254, Train Steps/Sec: 17.19, Grad Norm: 0.0428 +[2025-02-23 02:23:57] (step=0257000) Train Loss: 0.3257, Train Steps/Sec: 17.18, Grad Norm: 0.0423 +[2025-02-23 02:24:03] (step=0257100) Train Loss: 0.3248, Train Steps/Sec: 17.22, Grad Norm: 0.0377 +[2025-02-23 02:24:09] (step=0257200) Train Loss: 0.3256, Train Steps/Sec: 17.32, Grad Norm: 0.0387 +[2025-02-23 02:24:15] (step=0257300) Train Loss: 0.3253, Train Steps/Sec: 17.35, Grad Norm: 0.0381 +[2025-02-23 02:24:21] (step=0257400) Train Loss: 0.3255, Train Steps/Sec: 16.53, Grad Norm: 0.0423 +[2025-02-23 02:24:27] (step=0257500) Train Loss: 0.3254, Train Steps/Sec: 17.37, Grad Norm: 0.0409 +[2025-02-23 02:24:32] (step=0257600) Train Loss: 0.3251, Train Steps/Sec: 17.42, Grad Norm: 0.0383 +[2025-02-23 02:24:38] (step=0257700) Train Loss: 0.3259, Train Steps/Sec: 17.34, Grad Norm: 0.0424 +[2025-02-23 02:24:45] (step=0257800) Train Loss: 0.3253, Train Steps/Sec: 14.28, Grad Norm: 0.0382 +[2025-02-23 02:24:51] (step=0257900) Train Loss: 0.3252, Train Steps/Sec: 17.35, Grad Norm: 0.0412 +[2025-02-23 02:24:57] (step=0258000) Train Loss: 0.3252, Train Steps/Sec: 16.48, Grad Norm: 0.0405 +[2025-02-23 02:25:03] (step=0258100) Train Loss: 0.3255, Train Steps/Sec: 16.54, Grad Norm: 0.0391 +[2025-02-23 02:25:10] (step=0258200) Train Loss: 0.3255, Train Steps/Sec: 13.99, Grad Norm: 0.0419 +[2025-02-23 02:25:16] (step=0258300) Train Loss: 0.3252, Train Steps/Sec: 16.52, Grad Norm: 0.0404 +[2025-02-23 02:25:22] (step=0258400) Train Loss: 0.3253, Train Steps/Sec: 17.31, Grad Norm: 0.0397 +[2025-02-23 02:25:28] (step=0258500) Train Loss: 0.3254, Train Steps/Sec: 15.95, Grad Norm: 0.0412 +[2025-02-23 02:25:35] (step=0258600) Train Loss: 0.3254, Train Steps/Sec: 14.75, Grad Norm: 0.0425 +[2025-02-23 02:25:41] (step=0258700) Train Loss: 0.3250, Train Steps/Sec: 16.70, Grad Norm: 0.0385 +[2025-02-23 02:25:47] (step=0258800) Train Loss: 0.3256, Train Steps/Sec: 17.44, Grad Norm: 0.0404 +[2025-02-23 02:25:53] (step=0258900) Train Loss: 0.3251, Train Steps/Sec: 17.44, Grad Norm: 0.0389 +[2025-02-23 02:26:00] (step=0259000) Train Loss: 0.3254, Train Steps/Sec: 14.27, Grad Norm: 0.0402 +[2025-02-23 02:26:05] (step=0259100) Train Loss: 0.3253, Train Steps/Sec: 17.23, Grad Norm: 0.0399 +[2025-02-23 02:26:11] (step=0259200) Train Loss: 0.3254, Train Steps/Sec: 17.32, Grad Norm: 0.0424 +[2025-02-23 02:26:17] (step=0259300) Train Loss: 0.3250, Train Steps/Sec: 16.51, Grad Norm: 0.0433 +[2025-02-23 02:26:23] (step=0259400) Train Loss: 0.3244, Train Steps/Sec: 17.24, Grad Norm: 0.0418 +[2025-02-23 02:26:29] (step=0259500) Train Loss: 0.3252, Train Steps/Sec: 17.24, Grad Norm: 0.0379 +[2025-02-23 02:26:35] (step=0259600) Train Loss: 0.3253, Train Steps/Sec: 17.26, Grad Norm: 0.0401 +[2025-02-23 02:26:40] (step=0259700) Train Loss: 0.3257, Train Steps/Sec: 17.23, Grad Norm: 0.0428 +[2025-02-23 02:26:46] (step=0259800) Train Loss: 0.3248, Train Steps/Sec: 17.23, Grad Norm: 0.0397 +[2025-02-23 02:26:52] (step=0259900) Train Loss: 0.3256, Train Steps/Sec: 17.28, Grad Norm: 0.0377 +[2025-02-23 02:26:58] (step=0260000) Train Loss: 0.3254, Train Steps/Sec: 16.41, Grad Norm: 0.0396 +[2025-02-23 02:27:04] (step=0260100) Train Loss: 0.3251, Train Steps/Sec: 16.51, Grad Norm: 0.0402 +[2025-02-23 02:27:11] (step=0260200) Train Loss: 0.3252, Train Steps/Sec: 14.05, Grad Norm: 0.0370 +[2025-02-23 02:27:19] (step=0260300) Train Loss: 0.3249, Train Steps/Sec: 13.69, Grad Norm: 0.0403 +[2025-02-23 02:27:24] (step=0260400) Train Loss: 0.3249, Train Steps/Sec: 17.14, Grad Norm: 0.0452 +[2025-02-23 02:27:32] (step=0260500) Train Loss: 0.3257, Train Steps/Sec: 14.06, Grad Norm: 0.0416 +[2025-02-23 02:27:38] (step=0260600) Train Loss: 0.3255, Train Steps/Sec: 16.50, Grad Norm: 0.0385 +[2025-02-23 02:27:44] (step=0260700) Train Loss: 0.3249, Train Steps/Sec: 16.58, Grad Norm: 0.0395 +[2025-02-23 02:27:49] (step=0260800) Train Loss: 0.3258, Train Steps/Sec: 17.33, Grad Norm: 0.0414 +[2025-02-23 02:27:55] (step=0260900) Train Loss: 0.3253, Train Steps/Sec: 17.29, Grad Norm: 0.0391 +[2025-02-23 02:28:01] (step=0261000) Train Loss: 0.3259, Train Steps/Sec: 17.23, Grad Norm: 0.0405 +[2025-02-23 02:28:07] (step=0261100) Train Loss: 0.3253, Train Steps/Sec: 17.30, Grad Norm: 0.0376 +[2025-02-23 02:28:13] (step=0261200) Train Loss: 0.3253, Train Steps/Sec: 17.31, Grad Norm: 0.0434 +[2025-02-23 02:28:19] (step=0261300) Train Loss: 0.3253, Train Steps/Sec: 16.61, Grad Norm: 0.0400 +[2025-02-23 02:28:24] (step=0261400) Train Loss: 0.3248, Train Steps/Sec: 17.39, Grad Norm: 0.0388 +[2025-02-23 02:28:31] (step=0261500) Train Loss: 0.3251, Train Steps/Sec: 14.45, Grad Norm: 0.0394 +[2025-02-23 02:28:37] (step=0261600) Train Loss: 0.3249, Train Steps/Sec: 17.25, Grad Norm: 0.0410 +[2025-02-23 02:28:43] (step=0261700) Train Loss: 0.3257, Train Steps/Sec: 17.30, Grad Norm: 0.0385 +[2025-02-23 02:28:49] (step=0261800) Train Loss: 0.3250, Train Steps/Sec: 17.33, Grad Norm: 0.0408 +[2025-02-23 02:28:54] (step=0261900) Train Loss: 0.3255, Train Steps/Sec: 17.22, Grad Norm: 0.0399 +[2025-02-23 02:29:00] (step=0262000) Train Loss: 0.3252, Train Steps/Sec: 16.53, Grad Norm: 0.0397 +[2025-02-23 02:29:06] (step=0262100) Train Loss: 0.3250, Train Steps/Sec: 16.60, Grad Norm: 0.0443 +[2025-02-23 02:29:14] (step=0262200) Train Loss: 0.3251, Train Steps/Sec: 14.02, Grad Norm: 0.0397 +[2025-02-23 02:29:19] (step=0262300) Train Loss: 0.3254, Train Steps/Sec: 17.31, Grad Norm: 0.0413 +[2025-02-23 02:29:26] (step=0262400) Train Loss: 0.3257, Train Steps/Sec: 15.21, Grad Norm: 0.0394 +[2025-02-23 02:29:33] (step=0262500) Train Loss: 0.3248, Train Steps/Sec: 14.77, Grad Norm: 0.0441 +[2025-02-23 02:29:39] (step=0262600) Train Loss: 0.3245, Train Steps/Sec: 16.70, Grad Norm: 0.0365 +[2025-02-23 02:29:45] (step=0262700) Train Loss: 0.3250, Train Steps/Sec: 17.27, Grad Norm: 0.0391 +[2025-02-23 02:29:52] (step=0262800) Train Loss: 0.3254, Train Steps/Sec: 14.30, Grad Norm: 0.0432 +[2025-02-23 02:29:57] (step=0262900) Train Loss: 0.3259, Train Steps/Sec: 17.14, Grad Norm: 0.0392 +[2025-02-23 02:30:03] (step=0263000) Train Loss: 0.3252, Train Steps/Sec: 17.21, Grad Norm: 0.0398 +[2025-02-23 02:30:09] (step=0263100) Train Loss: 0.3259, Train Steps/Sec: 16.91, Grad Norm: 0.0401 +[2025-02-23 02:30:15] (step=0263200) Train Loss: 0.3256, Train Steps/Sec: 16.45, Grad Norm: 0.0397 +[2025-02-23 02:30:21] (step=0263300) Train Loss: 0.3255, Train Steps/Sec: 17.37, Grad Norm: 0.0408 +[2025-02-23 02:30:27] (step=0263400) Train Loss: 0.3260, Train Steps/Sec: 17.36, Grad Norm: 0.0383 +[2025-02-23 02:30:32] (step=0263500) Train Loss: 0.3253, Train Steps/Sec: 17.35, Grad Norm: 0.0403 +[2025-02-23 02:30:38] (step=0263600) Train Loss: 0.3252, Train Steps/Sec: 17.34, Grad Norm: 0.0412 +[2025-02-23 02:30:44] (step=0263700) Train Loss: 0.3249, Train Steps/Sec: 17.32, Grad Norm: 0.0397 +[2025-02-23 02:30:50] (step=0263800) Train Loss: 0.3253, Train Steps/Sec: 17.32, Grad Norm: 0.0417 +[2025-02-23 02:30:56] (step=0263900) Train Loss: 0.3245, Train Steps/Sec: 17.30, Grad Norm: 0.0382 +[2025-02-23 02:31:03] (step=0264000) Train Loss: 0.3252, Train Steps/Sec: 13.74, Grad Norm: 0.0398 +[2025-02-23 02:31:09] (step=0264100) Train Loss: 0.3243, Train Steps/Sec: 16.49, Grad Norm: 0.0399 +[2025-02-23 02:31:16] (step=0264200) Train Loss: 0.3253, Train Steps/Sec: 13.97, Grad Norm: 0.0401 +[2025-02-23 02:31:22] (step=0264300) Train Loss: 0.3253, Train Steps/Sec: 16.63, Grad Norm: 0.0386 +[2025-02-23 02:31:29] (step=0264400) Train Loss: 0.3249, Train Steps/Sec: 14.10, Grad Norm: 0.0411 +[2025-02-23 02:31:35] (step=0264500) Train Loss: 0.3248, Train Steps/Sec: 16.58, Grad Norm: 0.0403 +[2025-02-23 02:31:41] (step=0264600) Train Loss: 0.3256, Train Steps/Sec: 16.65, Grad Norm: 0.0440 +[2025-02-23 02:31:47] (step=0264700) Train Loss: 0.3248, Train Steps/Sec: 17.33, Grad Norm: 0.0415 +[2025-02-23 02:31:53] (step=0264800) Train Loss: 0.3246, Train Steps/Sec: 17.35, Grad Norm: 0.0368 +[2025-02-23 02:31:59] (step=0264900) Train Loss: 0.3246, Train Steps/Sec: 17.37, Grad Norm: 0.0408 +[2025-02-23 02:32:04] (step=0265000) Train Loss: 0.3242, Train Steps/Sec: 17.40, Grad Norm: 0.0434 +[2025-02-23 02:32:10] (step=0265100) Train Loss: 0.3255, Train Steps/Sec: 17.41, Grad Norm: 0.0414 +[2025-02-23 02:32:16] (step=0265200) Train Loss: 0.3257, Train Steps/Sec: 16.50, Grad Norm: 0.0375 +[2025-02-23 02:32:23] (step=0265300) Train Loss: 0.3251, Train Steps/Sec: 14.43, Grad Norm: 0.0369 +[2025-02-23 02:32:29] (step=0265400) Train Loss: 0.3249, Train Steps/Sec: 17.28, Grad Norm: 0.0402 +[2025-02-23 02:32:35] (step=0265500) Train Loss: 0.3250, Train Steps/Sec: 17.34, Grad Norm: 0.0379 +[2025-02-23 02:32:40] (step=0265600) Train Loss: 0.3252, Train Steps/Sec: 17.35, Grad Norm: 0.0405 +[2025-02-23 02:32:46] (step=0265700) Train Loss: 0.3253, Train Steps/Sec: 17.23, Grad Norm: 0.0414 +[2025-02-23 02:32:52] (step=0265800) Train Loss: 0.3250, Train Steps/Sec: 17.25, Grad Norm: 0.0446 +[2025-02-23 02:32:58] (step=0265900) Train Loss: 0.3253, Train Steps/Sec: 17.30, Grad Norm: 0.0388 +[2025-02-23 02:33:04] (step=0266000) Train Loss: 0.3253, Train Steps/Sec: 16.49, Grad Norm: 0.0429 +[2025-02-23 02:33:10] (step=0266100) Train Loss: 0.3252, Train Steps/Sec: 16.56, Grad Norm: 0.0391 +[2025-02-23 02:33:17] (step=0266200) Train Loss: 0.3249, Train Steps/Sec: 14.56, Grad Norm: 0.0409 +[2025-02-23 02:33:23] (step=0266300) Train Loss: 0.3250, Train Steps/Sec: 15.24, Grad Norm: 0.0392 +[2025-02-23 02:33:30] (step=0266400) Train Loss: 0.3253, Train Steps/Sec: 14.64, Grad Norm: 0.0406 +[2025-02-23 02:33:37] (step=0266500) Train Loss: 0.3251, Train Steps/Sec: 13.87, Grad Norm: 0.0422 +[2025-02-23 02:33:43] (step=0266600) Train Loss: 0.3246, Train Steps/Sec: 17.20, Grad Norm: 0.0430 +[2025-02-23 02:33:49] (step=0266700) Train Loss: 0.3254, Train Steps/Sec: 17.21, Grad Norm: 0.0427 +[2025-02-23 02:33:55] (step=0266800) Train Loss: 0.3256, Train Steps/Sec: 17.29, Grad Norm: 0.0425 +[2025-02-23 02:34:00] (step=0266900) Train Loss: 0.3254, Train Steps/Sec: 17.34, Grad Norm: 0.0425 +[2025-02-23 02:34:06] (step=0267000) Train Loss: 0.3254, Train Steps/Sec: 17.33, Grad Norm: 0.0417 +[2025-02-23 02:34:12] (step=0267100) Train Loss: 0.3252, Train Steps/Sec: 16.58, Grad Norm: 0.0375 +[2025-02-23 02:34:18] (step=0267200) Train Loss: 0.3252, Train Steps/Sec: 17.43, Grad Norm: 0.0425 +[2025-02-23 02:34:24] (step=0267300) Train Loss: 0.3250, Train Steps/Sec: 17.44, Grad Norm: 0.0417 +[2025-02-23 02:34:29] (step=0267400) Train Loss: 0.3249, Train Steps/Sec: 17.44, Grad Norm: 0.0371 +[2025-02-23 02:34:35] (step=0267500) Train Loss: 0.3251, Train Steps/Sec: 17.44, Grad Norm: 0.0408 +[2025-02-23 02:34:41] (step=0267600) Train Loss: 0.3248, Train Steps/Sec: 17.40, Grad Norm: 0.0422 +[2025-02-23 02:34:47] (step=0267700) Train Loss: 0.3251, Train Steps/Sec: 17.29, Grad Norm: 0.0384 +[2025-02-23 02:34:54] (step=0267800) Train Loss: 0.3249, Train Steps/Sec: 14.43, Grad Norm: 0.0392 +[2025-02-23 02:34:59] (step=0267900) Train Loss: 0.3253, Train Steps/Sec: 17.34, Grad Norm: 0.0388 +[2025-02-23 02:35:06] (step=0268000) Train Loss: 0.3257, Train Steps/Sec: 16.51, Grad Norm: 0.0403 +[2025-02-23 02:35:12] (step=0268100) Train Loss: 0.3248, Train Steps/Sec: 16.57, Grad Norm: 0.0400 +[2025-02-23 02:35:19] (step=0268200) Train Loss: 0.3247, Train Steps/Sec: 13.53, Grad Norm: 0.0418 +[2025-02-23 02:35:26] (step=0268300) Train Loss: 0.3256, Train Steps/Sec: 14.66, Grad Norm: 0.0392 +[2025-02-23 02:35:32] (step=0268400) Train Loss: 0.3256, Train Steps/Sec: 15.68, Grad Norm: 0.0380 +[2025-02-23 02:35:38] (step=0268500) Train Loss: 0.3254, Train Steps/Sec: 16.45, Grad Norm: 0.0413 +[2025-02-23 02:35:44] (step=0268600) Train Loss: 0.3249, Train Steps/Sec: 17.17, Grad Norm: 0.0393 +[2025-02-23 02:35:50] (step=0268700) Train Loss: 0.3247, Train Steps/Sec: 17.21, Grad Norm: 0.0395 +[2025-02-23 02:35:56] (step=0268800) Train Loss: 0.3248, Train Steps/Sec: 17.27, Grad Norm: 0.0409 +[2025-02-23 02:36:01] (step=0268900) Train Loss: 0.3254, Train Steps/Sec: 17.37, Grad Norm: 0.0408 +[2025-02-23 02:36:08] (step=0269000) Train Loss: 0.3250, Train Steps/Sec: 14.29, Grad Norm: 0.0376 +[2025-02-23 02:36:15] (step=0269100) Train Loss: 0.3251, Train Steps/Sec: 16.35, Grad Norm: 0.0404 +[2025-02-23 02:36:20] (step=0269200) Train Loss: 0.3255, Train Steps/Sec: 17.19, Grad Norm: 0.0386 +[2025-02-23 02:36:26] (step=0269300) Train Loss: 0.3255, Train Steps/Sec: 17.19, Grad Norm: 0.0382 +[2025-02-23 02:36:32] (step=0269400) Train Loss: 0.3248, Train Steps/Sec: 17.24, Grad Norm: 0.0405 +[2025-02-23 02:36:38] (step=0269500) Train Loss: 0.3249, Train Steps/Sec: 17.20, Grad Norm: 0.0380 +[2025-02-23 02:36:44] (step=0269600) Train Loss: 0.3247, Train Steps/Sec: 17.13, Grad Norm: 0.0396 +[2025-02-23 02:36:49] (step=0269700) Train Loss: 0.3248, Train Steps/Sec: 17.16, Grad Norm: 0.0385 +[2025-02-23 02:36:55] (step=0269800) Train Loss: 0.3249, Train Steps/Sec: 17.11, Grad Norm: 0.0397 +[2025-02-23 02:37:01] (step=0269900) Train Loss: 0.3249, Train Steps/Sec: 17.11, Grad Norm: 0.0417 +[2025-02-23 02:37:07] (step=0270000) Train Loss: 0.3252, Train Steps/Sec: 16.33, Grad Norm: 0.0405 +[2025-02-23 02:37:13] (step=0270100) Train Loss: 0.3253, Train Steps/Sec: 16.42, Grad Norm: 0.0402 +[2025-02-23 02:37:21] (step=0270200) Train Loss: 0.3249, Train Steps/Sec: 13.92, Grad Norm: 0.0409 +[2025-02-23 02:37:29] (step=0270300) Train Loss: 0.3249, Train Steps/Sec: 12.15, Grad Norm: 0.0408 +[2025-02-23 02:37:35] (step=0270400) Train Loss: 0.3254, Train Steps/Sec: 15.79, Grad Norm: 0.0401 +[2025-02-23 02:37:41] (step=0270500) Train Loss: 0.3250, Train Steps/Sec: 17.20, Grad Norm: 0.0373 +[2025-02-23 02:37:47] (step=0270600) Train Loss: 0.3247, Train Steps/Sec: 17.16, Grad Norm: 0.0410 +[2025-02-23 02:37:53] (step=0270700) Train Loss: 0.3255, Train Steps/Sec: 17.23, Grad Norm: 0.0354 +[2025-02-23 02:37:58] (step=0270800) Train Loss: 0.3255, Train Steps/Sec: 17.16, Grad Norm: 0.0461 +[2025-02-23 02:38:04] (step=0270900) Train Loss: 0.3248, Train Steps/Sec: 17.15, Grad Norm: 0.0363 +[2025-02-23 02:38:10] (step=0271000) Train Loss: 0.3253, Train Steps/Sec: 16.40, Grad Norm: 0.0433 +[2025-02-23 02:38:16] (step=0271100) Train Loss: 0.3253, Train Steps/Sec: 17.24, Grad Norm: 0.0371 +[2025-02-23 02:38:22] (step=0271200) Train Loss: 0.3253, Train Steps/Sec: 17.19, Grad Norm: 0.0383 +[2025-02-23 02:38:28] (step=0271300) Train Loss: 0.3253, Train Steps/Sec: 17.26, Grad Norm: 0.0387 +[2025-02-23 02:38:34] (step=0271400) Train Loss: 0.3248, Train Steps/Sec: 17.23, Grad Norm: 0.0428 +[2025-02-23 02:38:41] (step=0271500) Train Loss: 0.3248, Train Steps/Sec: 14.30, Grad Norm: 0.0375 +[2025-02-23 02:38:46] (step=0271600) Train Loss: 0.3246, Train Steps/Sec: 17.26, Grad Norm: 0.0391 +[2025-02-23 02:38:52] (step=0271700) Train Loss: 0.3252, Train Steps/Sec: 17.29, Grad Norm: 0.0402 +[2025-02-23 02:38:58] (step=0271800) Train Loss: 0.3252, Train Steps/Sec: 17.31, Grad Norm: 0.0365 +[2025-02-23 02:39:04] (step=0271900) Train Loss: 0.3244, Train Steps/Sec: 17.30, Grad Norm: 0.0421 +[2025-02-23 02:39:10] (step=0272000) Train Loss: 0.3250, Train Steps/Sec: 16.51, Grad Norm: 0.0377 +[2025-02-23 02:39:16] (step=0272100) Train Loss: 0.3248, Train Steps/Sec: 15.30, Grad Norm: 0.0394 +[2025-02-23 02:39:24] (step=0272200) Train Loss: 0.3249, Train Steps/Sec: 12.67, Grad Norm: 0.0425 +[2025-02-23 02:39:30] (step=0272300) Train Loss: 0.3247, Train Steps/Sec: 17.35, Grad Norm: 0.0399 +[2025-02-23 02:39:36] (step=0272400) Train Loss: 0.3259, Train Steps/Sec: 15.87, Grad Norm: 0.0362 +[2025-02-23 02:39:42] (step=0272500) Train Loss: 0.3253, Train Steps/Sec: 17.30, Grad Norm: 0.0395 +[2025-02-23 02:39:48] (step=0272600) Train Loss: 0.3254, Train Steps/Sec: 17.32, Grad Norm: 0.0407 +[2025-02-23 02:39:54] (step=0272700) Train Loss: 0.3252, Train Steps/Sec: 17.26, Grad Norm: 0.0396 +[2025-02-23 02:40:01] (step=0272800) Train Loss: 0.3248, Train Steps/Sec: 14.46, Grad Norm: 0.0399 +[2025-02-23 02:40:06] (step=0272900) Train Loss: 0.3252, Train Steps/Sec: 17.38, Grad Norm: 0.0407 +[2025-02-23 02:40:12] (step=0273000) Train Loss: 0.3252, Train Steps/Sec: 16.60, Grad Norm: 0.0383 +[2025-02-23 02:40:18] (step=0273100) Train Loss: 0.3254, Train Steps/Sec: 17.39, Grad Norm: 0.0356 +[2025-02-23 02:40:24] (step=0273200) Train Loss: 0.3258, Train Steps/Sec: 17.43, Grad Norm: 0.0417 +[2025-02-23 02:40:30] (step=0273300) Train Loss: 0.3252, Train Steps/Sec: 17.35, Grad Norm: 0.0421 +[2025-02-23 02:40:35] (step=0273400) Train Loss: 0.3248, Train Steps/Sec: 17.32, Grad Norm: 0.0422 +[2025-02-23 02:40:41] (step=0273500) Train Loss: 0.3249, Train Steps/Sec: 17.19, Grad Norm: 0.0371 +[2025-02-23 02:40:47] (step=0273600) Train Loss: 0.3246, Train Steps/Sec: 17.27, Grad Norm: 0.0414 +[2025-02-23 02:40:53] (step=0273700) Train Loss: 0.3255, Train Steps/Sec: 17.33, Grad Norm: 0.0428 +[2025-02-23 02:40:58] (step=0273800) Train Loss: 0.3252, Train Steps/Sec: 17.41, Grad Norm: 0.0432 +[2025-02-23 02:41:04] (step=0273900) Train Loss: 0.3249, Train Steps/Sec: 17.40, Grad Norm: 0.0389 +[2025-02-23 02:41:11] (step=0274000) Train Loss: 0.3252, Train Steps/Sec: 13.88, Grad Norm: 0.0410 +[2025-02-23 02:41:19] (step=0274100) Train Loss: 0.3250, Train Steps/Sec: 13.44, Grad Norm: 0.0389 +[2025-02-23 02:41:26] (step=0274200) Train Loss: 0.3250, Train Steps/Sec: 13.36, Grad Norm: 0.0414 +[2025-02-23 02:41:32] (step=0274300) Train Loss: 0.3250, Train Steps/Sec: 16.39, Grad Norm: 0.0393 +[2025-02-23 02:41:39] (step=0274400) Train Loss: 0.3252, Train Steps/Sec: 16.43, Grad Norm: 0.0385 +[2025-02-23 02:41:44] (step=0274500) Train Loss: 0.3253, Train Steps/Sec: 17.30, Grad Norm: 0.0394 +[2025-02-23 02:41:50] (step=0274600) Train Loss: 0.3252, Train Steps/Sec: 17.40, Grad Norm: 0.0416 +[2025-02-23 02:41:56] (step=0274700) Train Loss: 0.3249, Train Steps/Sec: 17.47, Grad Norm: 0.0417 +[2025-02-23 02:42:02] (step=0274800) Train Loss: 0.3250, Train Steps/Sec: 17.40, Grad Norm: 0.0379 +[2025-02-23 02:42:08] (step=0274900) Train Loss: 0.3249, Train Steps/Sec: 16.55, Grad Norm: 0.0376 +[2025-02-23 02:42:13] (step=0275000) Train Loss: 0.3252, Train Steps/Sec: 17.34, Grad Norm: 0.0412 +[2025-02-23 02:42:19] (step=0275100) Train Loss: 0.3246, Train Steps/Sec: 17.36, Grad Norm: 0.0409 +[2025-02-23 02:42:25] (step=0275200) Train Loss: 0.3255, Train Steps/Sec: 17.20, Grad Norm: 0.0384 +[2025-02-23 02:42:32] (step=0275300) Train Loss: 0.3254, Train Steps/Sec: 14.55, Grad Norm: 0.0379 +[2025-02-23 02:42:38] (step=0275400) Train Loss: 0.3250, Train Steps/Sec: 17.41, Grad Norm: 0.0397 +[2025-02-23 02:42:43] (step=0275500) Train Loss: 0.3254, Train Steps/Sec: 17.35, Grad Norm: 0.0422 +[2025-02-23 02:42:49] (step=0275600) Train Loss: 0.3247, Train Steps/Sec: 17.38, Grad Norm: 0.0388 +[2025-02-23 02:42:55] (step=0275700) Train Loss: 0.3247, Train Steps/Sec: 17.32, Grad Norm: 0.0389 +[2025-02-23 02:43:01] (step=0275800) Train Loss: 0.3250, Train Steps/Sec: 17.40, Grad Norm: 0.0394 +[2025-02-23 02:43:06] (step=0275900) Train Loss: 0.3253, Train Steps/Sec: 17.41, Grad Norm: 0.0372 +[2025-02-23 02:43:13] (step=0276000) Train Loss: 0.3248, Train Steps/Sec: 15.86, Grad Norm: 0.0385 +[2025-02-23 02:43:20] (step=0276100) Train Loss: 0.3246, Train Steps/Sec: 13.59, Grad Norm: 0.0395 +[2025-02-23 02:43:27] (step=0276200) Train Loss: 0.3251, Train Steps/Sec: 14.53, Grad Norm: 0.0418 +[2025-02-23 02:43:33] (step=0276300) Train Loss: 0.3251, Train Steps/Sec: 15.85, Grad Norm: 0.0413 +[2025-02-23 02:43:39] (step=0276400) Train Loss: 0.3249, Train Steps/Sec: 16.62, Grad Norm: 0.0397 +[2025-02-23 02:43:46] (step=0276500) Train Loss: 0.3249, Train Steps/Sec: 14.37, Grad Norm: 0.0444 +[2025-02-23 02:43:52] (step=0276600) Train Loss: 0.3244, Train Steps/Sec: 17.25, Grad Norm: 0.0424 +[2025-02-23 02:43:58] (step=0276700) Train Loss: 0.3247, Train Steps/Sec: 17.24, Grad Norm: 0.0392 +[2025-02-23 02:44:04] (step=0276800) Train Loss: 0.3239, Train Steps/Sec: 17.29, Grad Norm: 0.0404 +[2025-02-23 02:44:10] (step=0276900) Train Loss: 0.3251, Train Steps/Sec: 16.54, Grad Norm: 0.0358 +[2025-02-23 02:44:15] (step=0277000) Train Loss: 0.3247, Train Steps/Sec: 17.27, Grad Norm: 0.0459 +[2025-02-23 02:44:21] (step=0277100) Train Loss: 0.3253, Train Steps/Sec: 17.33, Grad Norm: 0.0392 +[2025-02-23 02:44:27] (step=0277200) Train Loss: 0.3250, Train Steps/Sec: 17.24, Grad Norm: 0.0398 +[2025-02-23 02:44:33] (step=0277300) Train Loss: 0.3255, Train Steps/Sec: 17.24, Grad Norm: 0.0385 +[2025-02-23 02:44:39] (step=0277400) Train Loss: 0.3248, Train Steps/Sec: 17.38, Grad Norm: 0.0440 +[2025-02-23 02:44:44] (step=0277500) Train Loss: 0.3245, Train Steps/Sec: 17.37, Grad Norm: 0.0400 +[2025-02-23 02:44:50] (step=0277600) Train Loss: 0.3248, Train Steps/Sec: 17.41, Grad Norm: 0.0363 +[2025-02-23 02:44:56] (step=0277700) Train Loss: 0.3251, Train Steps/Sec: 17.34, Grad Norm: 0.0365 +[2025-02-23 02:45:03] (step=0277800) Train Loss: 0.3252, Train Steps/Sec: 14.49, Grad Norm: 0.0413 +[2025-02-23 02:45:09] (step=0277900) Train Loss: 0.3253, Train Steps/Sec: 16.63, Grad Norm: 0.0377 +[2025-02-23 02:45:16] (step=0278000) Train Loss: 0.3252, Train Steps/Sec: 14.79, Grad Norm: 0.0397 +[2025-02-23 02:45:22] (step=0278100) Train Loss: 0.3252, Train Steps/Sec: 15.33, Grad Norm: 0.0406 +[2025-02-23 02:45:29] (step=0278200) Train Loss: 0.3250, Train Steps/Sec: 14.15, Grad Norm: 0.0375 +[2025-02-23 02:45:35] (step=0278300) Train Loss: 0.3252, Train Steps/Sec: 16.68, Grad Norm: 0.0392 +[2025-02-23 02:45:41] (step=0278400) Train Loss: 0.3248, Train Steps/Sec: 16.63, Grad Norm: 0.0398 +[2025-02-23 02:45:47] (step=0278500) Train Loss: 0.3248, Train Steps/Sec: 17.45, Grad Norm: 0.0376 +[2025-02-23 02:45:53] (step=0278600) Train Loss: 0.3251, Train Steps/Sec: 17.43, Grad Norm: 0.0417 +[2025-02-23 02:45:58] (step=0278700) Train Loss: 0.3255, Train Steps/Sec: 17.41, Grad Norm: 0.0371 +[2025-02-23 02:46:04] (step=0278800) Train Loss: 0.3243, Train Steps/Sec: 17.34, Grad Norm: 0.0387 +[2025-02-23 02:46:10] (step=0278900) Train Loss: 0.3253, Train Steps/Sec: 16.49, Grad Norm: 0.0418 +[2025-02-23 02:46:17] (step=0279000) Train Loss: 0.3251, Train Steps/Sec: 14.32, Grad Norm: 0.0383 +[2025-02-23 02:46:23] (step=0279100) Train Loss: 0.3251, Train Steps/Sec: 17.26, Grad Norm: 0.0402 +[2025-02-23 02:46:29] (step=0279200) Train Loss: 0.3256, Train Steps/Sec: 17.29, Grad Norm: 0.0374 +[2025-02-23 02:46:35] (step=0279300) Train Loss: 0.3250, Train Steps/Sec: 17.28, Grad Norm: 0.0403 +[2025-02-23 02:46:40] (step=0279400) Train Loss: 0.3254, Train Steps/Sec: 17.24, Grad Norm: 0.0386 +[2025-02-23 02:46:46] (step=0279500) Train Loss: 0.3243, Train Steps/Sec: 17.27, Grad Norm: 0.0394 +[2025-02-23 02:46:52] (step=0279600) Train Loss: 0.3246, Train Steps/Sec: 17.32, Grad Norm: 0.0419 +[2025-02-23 02:46:58] (step=0279700) Train Loss: 0.3251, Train Steps/Sec: 17.29, Grad Norm: 0.0420 +[2025-02-23 02:47:03] (step=0279800) Train Loss: 0.3248, Train Steps/Sec: 17.26, Grad Norm: 0.0366 +[2025-02-23 02:47:10] (step=0279900) Train Loss: 0.3248, Train Steps/Sec: 15.82, Grad Norm: 0.0381 +[2025-02-23 02:47:17] (step=0280000) Train Loss: 0.3243, Train Steps/Sec: 14.66, Grad Norm: 0.0399 +[2025-02-23 02:47:23] (step=0280100) Train Loss: 0.3247, Train Steps/Sec: 15.20, Grad Norm: 0.0420 +[2025-02-23 02:47:30] (step=0280200) Train Loss: 0.3253, Train Steps/Sec: 15.62, Grad Norm: 0.0415 +[2025-02-23 02:47:38] (step=0280300) Train Loss: 0.3253, Train Steps/Sec: 12.63, Grad Norm: 0.0401 +[2025-02-23 02:47:44] (step=0280400) Train Loss: 0.3246, Train Steps/Sec: 16.40, Grad Norm: 0.0385 +[2025-02-23 02:47:49] (step=0280500) Train Loss: 0.3249, Train Steps/Sec: 17.28, Grad Norm: 0.0419 +[2025-02-23 02:47:55] (step=0280600) Train Loss: 0.3251, Train Steps/Sec: 17.39, Grad Norm: 0.0390 +[2025-02-23 02:48:01] (step=0280700) Train Loss: 0.3254, Train Steps/Sec: 17.41, Grad Norm: 0.0375 +[2025-02-23 02:48:07] (step=0280800) Train Loss: 0.3250, Train Steps/Sec: 16.60, Grad Norm: 0.0390 +[2025-02-23 02:48:13] (step=0280900) Train Loss: 0.3251, Train Steps/Sec: 17.27, Grad Norm: 0.0401 +[2025-02-23 02:48:19] (step=0281000) Train Loss: 0.3246, Train Steps/Sec: 17.31, Grad Norm: 0.0391 +[2025-02-23 02:48:24] (step=0281100) Train Loss: 0.3252, Train Steps/Sec: 17.35, Grad Norm: 0.0395 +[2025-02-23 02:48:30] (step=0281200) Train Loss: 0.3249, Train Steps/Sec: 17.36, Grad Norm: 0.0426 +[2025-02-23 02:48:36] (step=0281300) Train Loss: 0.3252, Train Steps/Sec: 17.37, Grad Norm: 0.0385 +[2025-02-23 02:48:42] (step=0281400) Train Loss: 0.3245, Train Steps/Sec: 17.34, Grad Norm: 0.0418 +[2025-02-23 02:48:49] (step=0281500) Train Loss: 0.3248, Train Steps/Sec: 13.97, Grad Norm: 0.0372 +[2025-02-23 02:48:55] (step=0281600) Train Loss: 0.3252, Train Steps/Sec: 17.00, Grad Norm: 0.0378 +[2025-02-23 02:49:00] (step=0281700) Train Loss: 0.3248, Train Steps/Sec: 17.16, Grad Norm: 0.0380 +[2025-02-23 02:49:06] (step=0281800) Train Loss: 0.3254, Train Steps/Sec: 16.60, Grad Norm: 0.0399 +[2025-02-23 02:49:14] (step=0281900) Train Loss: 0.3247, Train Steps/Sec: 14.21, Grad Norm: 0.0362 +[2025-02-23 02:49:19] (step=0282000) Train Loss: 0.3254, Train Steps/Sec: 17.30, Grad Norm: 0.0383 +[2025-02-23 02:49:26] (step=0282100) Train Loss: 0.3245, Train Steps/Sec: 15.30, Grad Norm: 0.0396 +[2025-02-23 02:49:32] (step=0282200) Train Loss: 0.3246, Train Steps/Sec: 15.78, Grad Norm: 0.0367 +[2025-02-23 02:49:39] (step=0282300) Train Loss: 0.3250, Train Steps/Sec: 15.11, Grad Norm: 0.0392 +[2025-02-23 02:49:45] (step=0282400) Train Loss: 0.3252, Train Steps/Sec: 16.54, Grad Norm: 0.0417 +[2025-02-23 02:49:51] (step=0282500) Train Loss: 0.3254, Train Steps/Sec: 17.22, Grad Norm: 0.0401 +[2025-02-23 02:49:56] (step=0282600) Train Loss: 0.3252, Train Steps/Sec: 17.15, Grad Norm: 0.0381 +[2025-02-23 02:50:02] (step=0282700) Train Loss: 0.3250, Train Steps/Sec: 17.04, Grad Norm: 0.0380 +[2025-02-23 02:50:10] (step=0282800) Train Loss: 0.3251, Train Steps/Sec: 13.82, Grad Norm: 0.0442 +[2025-02-23 02:50:15] (step=0282900) Train Loss: 0.3251, Train Steps/Sec: 17.37, Grad Norm: 0.0388 +[2025-02-23 02:50:21] (step=0283000) Train Loss: 0.3249, Train Steps/Sec: 17.36, Grad Norm: 0.0398 +[2025-02-23 02:50:27] (step=0283100) Train Loss: 0.3253, Train Steps/Sec: 17.34, Grad Norm: 0.0390 +[2025-02-23 02:50:33] (step=0283200) Train Loss: 0.3250, Train Steps/Sec: 17.33, Grad Norm: 0.0453 +[2025-02-23 02:50:38] (step=0283300) Train Loss: 0.3244, Train Steps/Sec: 17.24, Grad Norm: 0.0358 +[2025-02-23 02:50:44] (step=0283400) Train Loss: 0.3244, Train Steps/Sec: 17.33, Grad Norm: 0.0398 +[2025-02-23 02:50:50] (step=0283500) Train Loss: 0.3249, Train Steps/Sec: 17.32, Grad Norm: 0.0451 +[2025-02-23 02:50:56] (step=0283600) Train Loss: 0.3248, Train Steps/Sec: 17.22, Grad Norm: 0.0415 +[2025-02-23 02:51:02] (step=0283700) Train Loss: 0.3243, Train Steps/Sec: 17.21, Grad Norm: 0.0365 +[2025-02-23 02:51:08] (step=0283800) Train Loss: 0.3250, Train Steps/Sec: 15.17, Grad Norm: 0.0419 +[2025-02-23 02:51:15] (step=0283900) Train Loss: 0.3244, Train Steps/Sec: 15.12, Grad Norm: 0.0365 +[2025-02-23 02:51:22] (step=0284000) Train Loss: 0.3247, Train Steps/Sec: 13.84, Grad Norm: 0.0383 +[2025-02-23 02:51:28] (step=0284100) Train Loss: 0.3250, Train Steps/Sec: 15.87, Grad Norm: 0.0425 +[2025-02-23 02:51:35] (step=0284200) Train Loss: 0.3251, Train Steps/Sec: 15.12, Grad Norm: 0.0405 +[2025-02-23 02:51:41] (step=0284300) Train Loss: 0.3245, Train Steps/Sec: 15.84, Grad Norm: 0.0366 +[2025-02-23 02:51:47] (step=0284400) Train Loss: 0.3246, Train Steps/Sec: 16.59, Grad Norm: 0.0411 +[2025-02-23 02:51:53] (step=0284500) Train Loss: 0.3247, Train Steps/Sec: 17.28, Grad Norm: 0.0407 +[2025-02-23 02:51:59] (step=0284600) Train Loss: 0.3255, Train Steps/Sec: 17.34, Grad Norm: 0.0379 +[2025-02-23 02:52:05] (step=0284700) Train Loss: 0.3240, Train Steps/Sec: 16.53, Grad Norm: 0.0418 +[2025-02-23 02:52:11] (step=0284800) Train Loss: 0.3248, Train Steps/Sec: 17.31, Grad Norm: 0.0383 +[2025-02-23 02:52:16] (step=0284900) Train Loss: 0.3252, Train Steps/Sec: 17.32, Grad Norm: 0.0385 +[2025-02-23 02:52:22] (step=0285000) Train Loss: 0.3253, Train Steps/Sec: 17.33, Grad Norm: 0.0365 +[2025-02-23 02:52:28] (step=0285100) Train Loss: 0.3247, Train Steps/Sec: 17.34, Grad Norm: 0.0416 +[2025-02-23 02:52:34] (step=0285200) Train Loss: 0.3250, Train Steps/Sec: 17.17, Grad Norm: 0.0400 +[2025-02-23 02:52:41] (step=0285300) Train Loss: 0.3253, Train Steps/Sec: 14.38, Grad Norm: 0.0400 +[2025-02-23 02:52:47] (step=0285400) Train Loss: 0.3244, Train Steps/Sec: 17.25, Grad Norm: 0.0452 +[2025-02-23 02:52:52] (step=0285500) Train Loss: 0.3245, Train Steps/Sec: 17.23, Grad Norm: 0.0385 +[2025-02-23 02:52:58] (step=0285600) Train Loss: 0.3248, Train Steps/Sec: 17.39, Grad Norm: 0.0409 +[2025-02-23 02:53:04] (step=0285700) Train Loss: 0.3244, Train Steps/Sec: 15.93, Grad Norm: 0.0399 +[2025-02-23 02:53:11] (step=0285800) Train Loss: 0.3255, Train Steps/Sec: 14.75, Grad Norm: 0.0398 +[2025-02-23 02:53:17] (step=0285900) Train Loss: 0.3246, Train Steps/Sec: 17.32, Grad Norm: 0.0401 +[2025-02-23 02:53:23] (step=0286000) Train Loss: 0.3248, Train Steps/Sec: 16.61, Grad Norm: 0.0384 +[2025-02-23 02:53:29] (step=0286100) Train Loss: 0.3247, Train Steps/Sec: 15.88, Grad Norm: 0.0369 +[2025-02-23 02:53:36] (step=0286200) Train Loss: 0.3253, Train Steps/Sec: 15.79, Grad Norm: 0.0401 +[2025-02-23 02:53:42] (step=0286300) Train Loss: 0.3250, Train Steps/Sec: 15.17, Grad Norm: 0.0393 +[2025-02-23 02:53:48] (step=0286400) Train Loss: 0.3249, Train Steps/Sec: 16.53, Grad Norm: 0.0408 +[2025-02-23 02:53:55] (step=0286500) Train Loss: 0.3242, Train Steps/Sec: 14.27, Grad Norm: 0.0381 +[2025-02-23 02:54:01] (step=0286600) Train Loss: 0.3250, Train Steps/Sec: 17.31, Grad Norm: 0.0374 +[2025-02-23 02:54:07] (step=0286700) Train Loss: 0.3252, Train Steps/Sec: 16.61, Grad Norm: 0.0404 +[2025-02-23 02:54:13] (step=0286800) Train Loss: 0.3255, Train Steps/Sec: 17.39, Grad Norm: 0.0369 +[2025-02-23 02:54:19] (step=0286900) Train Loss: 0.3246, Train Steps/Sec: 17.41, Grad Norm: 0.0381 +[2025-02-23 02:54:24] (step=0287000) Train Loss: 0.3242, Train Steps/Sec: 17.41, Grad Norm: 0.0401 +[2025-02-23 02:54:30] (step=0287100) Train Loss: 0.3253, Train Steps/Sec: 17.39, Grad Norm: 0.0387 +[2025-02-23 02:54:36] (step=0287200) Train Loss: 0.3247, Train Steps/Sec: 17.33, Grad Norm: 0.0373 +[2025-02-23 02:54:42] (step=0287300) Train Loss: 0.3245, Train Steps/Sec: 17.33, Grad Norm: 0.0365 +[2025-02-23 02:54:47] (step=0287400) Train Loss: 0.3247, Train Steps/Sec: 17.35, Grad Norm: 0.0396 +[2025-02-23 02:54:53] (step=0287500) Train Loss: 0.3254, Train Steps/Sec: 17.36, Grad Norm: 0.0377 +[2025-02-23 02:54:59] (step=0287600) Train Loss: 0.3244, Train Steps/Sec: 17.39, Grad Norm: 0.0420 +[2025-02-23 02:55:05] (step=0287700) Train Loss: 0.3247, Train Steps/Sec: 15.24, Grad Norm: 0.0436 +[2025-02-23 02:55:13] (step=0287800) Train Loss: 0.3249, Train Steps/Sec: 12.99, Grad Norm: 0.0400 +[2025-02-23 02:55:19] (step=0287900) Train Loss: 0.3242, Train Steps/Sec: 16.59, Grad Norm: 0.0404 +[2025-02-23 02:55:25] (step=0288000) Train Loss: 0.3250, Train Steps/Sec: 17.29, Grad Norm: 0.0395 +[2025-02-23 02:55:31] (step=0288100) Train Loss: 0.3246, Train Steps/Sec: 15.83, Grad Norm: 0.0369 +[2025-02-23 02:55:38] (step=0288200) Train Loss: 0.3251, Train Steps/Sec: 15.68, Grad Norm: 0.0383 +[2025-02-23 02:55:44] (step=0288300) Train Loss: 0.3245, Train Steps/Sec: 15.07, Grad Norm: 0.0380 +[2025-02-23 02:55:50] (step=0288400) Train Loss: 0.3248, Train Steps/Sec: 16.48, Grad Norm: 0.0413 +[2025-02-23 02:55:56] (step=0288500) Train Loss: 0.3248, Train Steps/Sec: 17.26, Grad Norm: 0.0386 +[2025-02-23 02:56:02] (step=0288600) Train Loss: 0.3253, Train Steps/Sec: 16.57, Grad Norm: 0.0396 +[2025-02-23 02:56:08] (step=0288700) Train Loss: 0.3247, Train Steps/Sec: 17.33, Grad Norm: 0.0397 +[2025-02-23 02:56:14] (step=0288800) Train Loss: 0.3245, Train Steps/Sec: 17.35, Grad Norm: 0.0426 +[2025-02-23 02:56:20] (step=0288900) Train Loss: 0.3245, Train Steps/Sec: 17.37, Grad Norm: 0.0382 +[2025-02-23 02:56:26] (step=0289000) Train Loss: 0.3247, Train Steps/Sec: 14.38, Grad Norm: 0.0413 +[2025-02-23 02:56:32] (step=0289100) Train Loss: 0.3248, Train Steps/Sec: 17.25, Grad Norm: 0.0392 +[2025-02-23 02:56:38] (step=0289200) Train Loss: 0.3248, Train Steps/Sec: 17.28, Grad Norm: 0.0402 +[2025-02-23 02:56:44] (step=0289300) Train Loss: 0.3244, Train Steps/Sec: 17.36, Grad Norm: 0.0398 +[2025-02-23 02:56:50] (step=0289400) Train Loss: 0.3250, Train Steps/Sec: 17.17, Grad Norm: 0.0385 +[2025-02-23 02:56:56] (step=0289500) Train Loss: 0.3250, Train Steps/Sec: 17.07, Grad Norm: 0.0404 +[2025-02-23 02:57:02] (step=0289600) Train Loss: 0.3244, Train Steps/Sec: 15.73, Grad Norm: 0.0364 +[2025-02-23 02:57:09] (step=0289700) Train Loss: 0.3249, Train Steps/Sec: 15.08, Grad Norm: 0.0395 +[2025-02-23 02:57:15] (step=0289800) Train Loss: 0.3251, Train Steps/Sec: 16.37, Grad Norm: 0.0434 +[2025-02-23 02:57:21] (step=0289900) Train Loss: 0.3247, Train Steps/Sec: 16.41, Grad Norm: 0.0398 +[2025-02-23 02:57:27] (step=0290000) Train Loss: 0.3246, Train Steps/Sec: 17.04, Grad Norm: 0.0395 +[2025-02-23 02:57:33] (step=0290100) Train Loss: 0.3248, Train Steps/Sec: 15.62, Grad Norm: 0.0400 +[2025-02-23 02:57:39] (step=0290200) Train Loss: 0.3253, Train Steps/Sec: 15.47, Grad Norm: 0.0416 +[2025-02-23 02:57:47] (step=0290300) Train Loss: 0.3246, Train Steps/Sec: 12.59, Grad Norm: 0.0413 +[2025-02-23 02:57:54] (step=0290400) Train Loss: 0.3243, Train Steps/Sec: 16.35, Grad Norm: 0.0366 +[2025-02-23 02:57:59] (step=0290500) Train Loss: 0.3251, Train Steps/Sec: 17.04, Grad Norm: 0.0383 +[2025-02-23 02:58:06] (step=0290600) Train Loss: 0.3246, Train Steps/Sec: 16.33, Grad Norm: 0.0401 +[2025-02-23 02:58:11] (step=0290700) Train Loss: 0.3247, Train Steps/Sec: 17.14, Grad Norm: 0.0391 +[2025-02-23 02:58:17] (step=0290800) Train Loss: 0.3244, Train Steps/Sec: 17.40, Grad Norm: 0.0382 +[2025-02-23 02:58:23] (step=0290900) Train Loss: 0.3249, Train Steps/Sec: 17.40, Grad Norm: 0.0417 +[2025-02-23 02:58:29] (step=0291000) Train Loss: 0.3257, Train Steps/Sec: 17.36, Grad Norm: 0.0397 +[2025-02-23 02:58:34] (step=0291100) Train Loss: 0.3248, Train Steps/Sec: 17.36, Grad Norm: 0.0389 +[2025-02-23 02:58:40] (step=0291200) Train Loss: 0.3248, Train Steps/Sec: 17.37, Grad Norm: 0.0410 +[2025-02-23 02:58:46] (step=0291300) Train Loss: 0.3248, Train Steps/Sec: 17.39, Grad Norm: 0.0416 +[2025-02-23 02:58:52] (step=0291400) Train Loss: 0.3252, Train Steps/Sec: 17.35, Grad Norm: 0.0364 +[2025-02-23 02:58:59] (step=0291500) Train Loss: 0.3251, Train Steps/Sec: 14.21, Grad Norm: 0.0376 +[2025-02-23 02:59:05] (step=0291600) Train Loss: 0.3248, Train Steps/Sec: 14.71, Grad Norm: 0.0383 +[2025-02-23 02:59:12] (step=0291700) Train Loss: 0.3254, Train Steps/Sec: 16.00, Grad Norm: 0.0369 +[2025-02-23 02:59:18] (step=0291800) Train Loss: 0.3242, Train Steps/Sec: 16.63, Grad Norm: 0.0399 +[2025-02-23 02:59:23] (step=0291900) Train Loss: 0.3250, Train Steps/Sec: 17.41, Grad Norm: 0.0386 +[2025-02-23 02:59:29] (step=0292000) Train Loss: 0.3245, Train Steps/Sec: 17.39, Grad Norm: 0.0387 +[2025-02-23 02:59:36] (step=0292100) Train Loss: 0.3246, Train Steps/Sec: 15.93, Grad Norm: 0.0375 +[2025-02-23 02:59:42] (step=0292200) Train Loss: 0.3253, Train Steps/Sec: 15.90, Grad Norm: 0.0374 +[2025-02-23 02:59:48] (step=0292300) Train Loss: 0.3249, Train Steps/Sec: 15.27, Grad Norm: 0.0412 +[2025-02-23 02:59:54] (step=0292400) Train Loss: 0.3247, Train Steps/Sec: 16.71, Grad Norm: 0.0395 +[2025-02-23 03:00:00] (step=0292500) Train Loss: 0.3248, Train Steps/Sec: 16.69, Grad Norm: 0.0399 +[2025-02-23 03:00:06] (step=0292600) Train Loss: 0.3245, Train Steps/Sec: 17.42, Grad Norm: 0.0397 +[2025-02-23 03:00:12] (step=0292700) Train Loss: 0.3243, Train Steps/Sec: 17.46, Grad Norm: 0.0405 +[2025-02-23 03:00:19] (step=0292800) Train Loss: 0.3251, Train Steps/Sec: 14.33, Grad Norm: 0.0388 +[2025-02-23 03:00:25] (step=0292900) Train Loss: 0.3245, Train Steps/Sec: 17.43, Grad Norm: 0.0439 +[2025-02-23 03:00:30] (step=0293000) Train Loss: 0.3240, Train Steps/Sec: 17.46, Grad Norm: 0.0357 +[2025-02-23 03:00:36] (step=0293100) Train Loss: 0.3248, Train Steps/Sec: 17.46, Grad Norm: 0.0382 +[2025-02-23 03:00:42] (step=0293200) Train Loss: 0.3246, Train Steps/Sec: 17.41, Grad Norm: 0.0397 +[2025-02-23 03:00:48] (step=0293300) Train Loss: 0.3245, Train Steps/Sec: 17.21, Grad Norm: 0.0381 +[2025-02-23 03:00:53] (step=0293400) Train Loss: 0.3246, Train Steps/Sec: 17.25, Grad Norm: 0.0393 +[2025-02-23 03:01:00] (step=0293500) Train Loss: 0.3251, Train Steps/Sec: 15.82, Grad Norm: 0.0358 +[2025-02-23 03:01:07] (step=0293600) Train Loss: 0.3251, Train Steps/Sec: 14.64, Grad Norm: 0.0380 +[2025-02-23 03:01:13] (step=0293700) Train Loss: 0.3245, Train Steps/Sec: 16.57, Grad Norm: 0.0413 +[2025-02-23 03:01:18] (step=0293800) Train Loss: 0.3238, Train Steps/Sec: 17.23, Grad Norm: 0.0388 +[2025-02-23 03:01:24] (step=0293900) Train Loss: 0.3245, Train Steps/Sec: 17.19, Grad Norm: 0.0386 +[2025-02-23 03:01:31] (step=0294000) Train Loss: 0.3248, Train Steps/Sec: 14.42, Grad Norm: 0.0377 +[2025-02-23 03:01:37] (step=0294100) Train Loss: 0.3250, Train Steps/Sec: 15.74, Grad Norm: 0.0396 +[2025-02-23 03:01:44] (step=0294200) Train Loss: 0.3242, Train Steps/Sec: 15.69, Grad Norm: 0.0394 +[2025-02-23 03:01:50] (step=0294300) Train Loss: 0.3241, Train Steps/Sec: 15.08, Grad Norm: 0.0365 +[2025-02-23 03:01:57] (step=0294400) Train Loss: 0.3246, Train Steps/Sec: 16.39, Grad Norm: 0.0391 +[2025-02-23 03:02:03] (step=0294500) Train Loss: 0.3245, Train Steps/Sec: 16.47, Grad Norm: 0.0403 +[2025-02-23 03:02:08] (step=0294600) Train Loss: 0.3247, Train Steps/Sec: 17.18, Grad Norm: 0.0369 +[2025-02-23 03:02:14] (step=0294700) Train Loss: 0.3241, Train Steps/Sec: 17.15, Grad Norm: 0.0390 +[2025-02-23 03:02:20] (step=0294800) Train Loss: 0.3250, Train Steps/Sec: 17.10, Grad Norm: 0.0407 +[2025-02-23 03:02:26] (step=0294900) Train Loss: 0.3249, Train Steps/Sec: 17.21, Grad Norm: 0.0378 +[2025-02-23 03:02:32] (step=0295000) Train Loss: 0.3248, Train Steps/Sec: 17.19, Grad Norm: 0.0410 +[2025-02-23 03:02:38] (step=0295100) Train Loss: 0.3242, Train Steps/Sec: 17.21, Grad Norm: 0.0380 +[2025-02-23 03:02:43] (step=0295200) Train Loss: 0.3248, Train Steps/Sec: 17.19, Grad Norm: 0.0401 +[2025-02-23 03:02:50] (step=0295300) Train Loss: 0.3250, Train Steps/Sec: 14.23, Grad Norm: 0.0396 +[2025-02-23 03:02:56] (step=0295400) Train Loss: 0.3245, Train Steps/Sec: 17.22, Grad Norm: 0.0385 +[2025-02-23 03:03:03] (step=0295500) Train Loss: 0.3247, Train Steps/Sec: 14.64, Grad Norm: 0.0408 +[2025-02-23 03:03:09] (step=0295600) Train Loss: 0.3247, Train Steps/Sec: 15.95, Grad Norm: 0.0366 +[2025-02-23 03:03:15] (step=0295700) Train Loss: 0.3245, Train Steps/Sec: 16.58, Grad Norm: 0.0404 +[2025-02-23 03:03:21] (step=0295800) Train Loss: 0.3243, Train Steps/Sec: 17.25, Grad Norm: 0.0399 +[2025-02-23 03:03:27] (step=0295900) Train Loss: 0.3251, Train Steps/Sec: 17.23, Grad Norm: 0.0386 +[2025-02-23 03:03:33] (step=0296000) Train Loss: 0.3249, Train Steps/Sec: 17.28, Grad Norm: 0.0399 +[2025-02-23 03:03:39] (step=0296100) Train Loss: 0.3242, Train Steps/Sec: 15.77, Grad Norm: 0.0384 +[2025-02-23 03:03:45] (step=0296200) Train Loss: 0.3250, Train Steps/Sec: 15.83, Grad Norm: 0.0405 +[2025-02-23 03:03:52] (step=0296300) Train Loss: 0.3244, Train Steps/Sec: 15.26, Grad Norm: 0.0373 +[2025-02-23 03:03:58] (step=0296400) Train Loss: 0.3243, Train Steps/Sec: 16.00, Grad Norm: 0.0386 +[2025-02-23 03:04:05] (step=0296500) Train Loss: 0.3249, Train Steps/Sec: 14.34, Grad Norm: 0.0372 +[2025-02-23 03:04:11] (step=0296600) Train Loss: 0.3248, Train Steps/Sec: 17.32, Grad Norm: 0.0420 +[2025-02-23 03:04:17] (step=0296700) Train Loss: 0.3248, Train Steps/Sec: 17.26, Grad Norm: 0.0372 +[2025-02-23 03:04:23] (step=0296800) Train Loss: 0.3242, Train Steps/Sec: 17.18, Grad Norm: 0.0369 +[2025-02-23 03:04:28] (step=0296900) Train Loss: 0.3250, Train Steps/Sec: 17.17, Grad Norm: 0.0406 +[2025-02-23 03:04:34] (step=0297000) Train Loss: 0.3249, Train Steps/Sec: 17.19, Grad Norm: 0.0355 +[2025-02-23 03:04:40] (step=0297100) Train Loss: 0.3249, Train Steps/Sec: 17.18, Grad Norm: 0.0374 +[2025-02-23 03:04:46] (step=0297200) Train Loss: 0.3248, Train Steps/Sec: 17.35, Grad Norm: 0.0385 +[2025-02-23 03:04:52] (step=0297300) Train Loss: 0.3239, Train Steps/Sec: 17.43, Grad Norm: 0.0391 +[2025-02-23 03:04:58] (step=0297400) Train Loss: 0.3242, Train Steps/Sec: 16.65, Grad Norm: 0.0373 +[2025-02-23 03:05:04] (step=0297500) Train Loss: 0.3244, Train Steps/Sec: 14.81, Grad Norm: 0.0379 +[2025-02-23 03:05:10] (step=0297600) Train Loss: 0.3243, Train Steps/Sec: 16.75, Grad Norm: 0.0361 +[2025-02-23 03:05:16] (step=0297700) Train Loss: 0.3246, Train Steps/Sec: 17.41, Grad Norm: 0.0395 +[2025-02-23 03:05:23] (step=0297800) Train Loss: 0.3254, Train Steps/Sec: 14.46, Grad Norm: 0.0427 +[2025-02-23 03:05:29] (step=0297900) Train Loss: 0.3249, Train Steps/Sec: 17.39, Grad Norm: 0.0367 +[2025-02-23 03:05:34] (step=0298000) Train Loss: 0.3242, Train Steps/Sec: 17.47, Grad Norm: 0.0415 +[2025-02-23 03:05:41] (step=0298100) Train Loss: 0.3250, Train Steps/Sec: 15.89, Grad Norm: 0.0422 +[2025-02-23 03:05:47] (step=0298200) Train Loss: 0.3248, Train Steps/Sec: 15.91, Grad Norm: 0.0389 +[2025-02-23 03:05:54] (step=0298300) Train Loss: 0.3242, Train Steps/Sec: 15.28, Grad Norm: 0.0386 +[2025-02-23 03:06:00] (step=0298400) Train Loss: 0.3240, Train Steps/Sec: 16.09, Grad Norm: 0.0398 +[2025-02-23 03:06:06] (step=0298500) Train Loss: 0.3247, Train Steps/Sec: 17.47, Grad Norm: 0.0389 +[2025-02-23 03:06:11] (step=0298600) Train Loss: 0.3245, Train Steps/Sec: 17.47, Grad Norm: 0.0370 +[2025-02-23 03:06:17] (step=0298700) Train Loss: 0.3246, Train Steps/Sec: 17.34, Grad Norm: 0.0381 +[2025-02-23 03:06:23] (step=0298800) Train Loss: 0.3245, Train Steps/Sec: 17.40, Grad Norm: 0.0392 +[2025-02-23 03:06:29] (step=0298900) Train Loss: 0.3243, Train Steps/Sec: 17.39, Grad Norm: 0.0387 +[2025-02-23 03:06:35] (step=0299000) Train Loss: 0.3247, Train Steps/Sec: 14.52, Grad Norm: 0.0408 +[2025-02-23 03:06:41] (step=0299100) Train Loss: 0.3243, Train Steps/Sec: 17.25, Grad Norm: 0.0366 +[2025-02-23 03:06:47] (step=0299200) Train Loss: 0.3250, Train Steps/Sec: 17.30, Grad Norm: 0.0422 +[2025-02-23 03:06:53] (step=0299300) Train Loss: 0.3243, Train Steps/Sec: 17.30, Grad Norm: 0.0393 +[2025-02-23 03:07:00] (step=0299400) Train Loss: 0.3248, Train Steps/Sec: 14.69, Grad Norm: 0.0379 +[2025-02-23 03:07:06] (step=0299500) Train Loss: 0.3245, Train Steps/Sec: 15.95, Grad Norm: 0.0415 +[2025-02-23 03:07:12] (step=0299600) Train Loss: 0.3245, Train Steps/Sec: 16.54, Grad Norm: 0.0366 +[2025-02-23 03:07:18] (step=0299700) Train Loss: 0.3246, Train Steps/Sec: 17.16, Grad Norm: 0.0388 +[2025-02-23 03:07:24] (step=0299800) Train Loss: 0.3246, Train Steps/Sec: 17.22, Grad Norm: 0.0409 +[2025-02-23 03:07:29] (step=0299900) Train Loss: 0.3242, Train Steps/Sec: 17.24, Grad Norm: 0.0394 +[2025-02-23 03:07:35] (step=0300000) Train Loss: 0.3247, Train Steps/Sec: 17.24, Grad Norm: 0.0365 +[2025-02-23 03:07:36] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0300000.pt +[2025-02-23 03:07:42] (step=0300100) Train Loss: 0.3248, Train Steps/Sec: 14.42, Grad Norm: 0.0378 +[2025-02-23 03:07:48] (step=0300200) Train Loss: 0.3249, Train Steps/Sec: 16.42, Grad Norm: 0.0408 +[2025-02-23 03:07:57] (step=0300300) Train Loss: 0.3247, Train Steps/Sec: 11.97, Grad Norm: 0.0373 +[2025-02-23 03:08:03] (step=0300400) Train Loss: 0.3250, Train Steps/Sec: 16.51, Grad Norm: 0.0397 +[2025-02-23 03:08:08] (step=0300500) Train Loss: 0.3249, Train Steps/Sec: 17.28, Grad Norm: 0.0373 +[2025-02-23 03:08:14] (step=0300600) Train Loss: 0.3249, Train Steps/Sec: 17.31, Grad Norm: 0.0406 +[2025-02-23 03:08:20] (step=0300700) Train Loss: 0.3243, Train Steps/Sec: 17.26, Grad Norm: 0.0396 +[2025-02-23 03:08:26] (step=0300800) Train Loss: 0.3245, Train Steps/Sec: 17.23, Grad Norm: 0.0389 +[2025-02-23 03:08:32] (step=0300900) Train Loss: 0.3252, Train Steps/Sec: 17.20, Grad Norm: 0.0426 +[2025-02-23 03:08:37] (step=0301000) Train Loss: 0.3239, Train Steps/Sec: 17.22, Grad Norm: 0.0390 +[2025-02-23 03:08:43] (step=0301100) Train Loss: 0.3241, Train Steps/Sec: 17.23, Grad Norm: 0.0408 +[2025-02-23 03:08:49] (step=0301200) Train Loss: 0.3242, Train Steps/Sec: 17.28, Grad Norm: 0.0423 +[2025-02-23 03:08:55] (step=0301300) Train Loss: 0.3252, Train Steps/Sec: 15.86, Grad Norm: 0.0430 +[2025-02-23 03:09:02] (step=0301400) Train Loss: 0.3239, Train Steps/Sec: 14.65, Grad Norm: 0.0376 +[2025-02-23 03:09:09] (step=0301500) Train Loss: 0.3250, Train Steps/Sec: 13.93, Grad Norm: 0.0382 +[2025-02-23 03:09:15] (step=0301600) Train Loss: 0.3242, Train Steps/Sec: 17.26, Grad Norm: 0.0410 +[2025-02-23 03:09:21] (step=0301700) Train Loss: 0.3244, Train Steps/Sec: 17.24, Grad Norm: 0.0409 +[2025-02-23 03:09:27] (step=0301800) Train Loss: 0.3242, Train Steps/Sec: 17.38, Grad Norm: 0.0398 +[2025-02-23 03:09:32] (step=0301900) Train Loss: 0.3242, Train Steps/Sec: 17.30, Grad Norm: 0.0405 +[2025-02-23 03:09:38] (step=0302000) Train Loss: 0.3246, Train Steps/Sec: 17.40, Grad Norm: 0.0358 +[2025-02-23 03:09:44] (step=0302100) Train Loss: 0.3248, Train Steps/Sec: 15.88, Grad Norm: 0.0367 +[2025-02-23 03:09:51] (step=0302200) Train Loss: 0.3244, Train Steps/Sec: 15.89, Grad Norm: 0.0422 +[2025-02-23 03:09:58] (step=0302300) Train Loss: 0.3255, Train Steps/Sec: 14.56, Grad Norm: 0.0393 +[2025-02-23 03:10:04] (step=0302400) Train Loss: 0.3249, Train Steps/Sec: 16.55, Grad Norm: 0.0343 +[2025-02-23 03:10:09] (step=0302500) Train Loss: 0.3247, Train Steps/Sec: 17.32, Grad Norm: 0.0385 +[2025-02-23 03:10:15] (step=0302600) Train Loss: 0.3247, Train Steps/Sec: 17.32, Grad Norm: 0.0384 +[2025-02-23 03:10:21] (step=0302700) Train Loss: 0.3247, Train Steps/Sec: 17.30, Grad Norm: 0.0368 +[2025-02-23 03:10:28] (step=0302800) Train Loss: 0.3242, Train Steps/Sec: 14.53, Grad Norm: 0.0395 +[2025-02-23 03:10:34] (step=0302900) Train Loss: 0.3244, Train Steps/Sec: 17.34, Grad Norm: 0.0438 +[2025-02-23 03:10:39] (step=0303000) Train Loss: 0.3252, Train Steps/Sec: 17.29, Grad Norm: 0.0383 +[2025-02-23 03:10:45] (step=0303100) Train Loss: 0.3245, Train Steps/Sec: 17.31, Grad Norm: 0.0371 +[2025-02-23 03:10:51] (step=0303200) Train Loss: 0.3245, Train Steps/Sec: 17.32, Grad Norm: 0.0383 +[2025-02-23 03:10:58] (step=0303300) Train Loss: 0.3238, Train Steps/Sec: 14.12, Grad Norm: 0.0354 +[2025-02-23 03:11:04] (step=0303400) Train Loss: 0.3239, Train Steps/Sec: 15.88, Grad Norm: 0.0399 +[2025-02-23 03:11:10] (step=0303500) Train Loss: 0.3243, Train Steps/Sec: 17.27, Grad Norm: 0.0386 +[2025-02-23 03:11:16] (step=0303600) Train Loss: 0.3244, Train Steps/Sec: 17.26, Grad Norm: 0.0378 +[2025-02-23 03:11:22] (step=0303700) Train Loss: 0.3239, Train Steps/Sec: 17.21, Grad Norm: 0.0424 +[2025-02-23 03:11:28] (step=0303800) Train Loss: 0.3246, Train Steps/Sec: 17.28, Grad Norm: 0.0385 +[2025-02-23 03:11:33] (step=0303900) Train Loss: 0.3245, Train Steps/Sec: 17.29, Grad Norm: 0.0361 +[2025-02-23 03:11:40] (step=0304000) Train Loss: 0.3242, Train Steps/Sec: 14.40, Grad Norm: 0.0366 +[2025-02-23 03:11:47] (step=0304100) Train Loss: 0.3245, Train Steps/Sec: 15.68, Grad Norm: 0.0441 +[2025-02-23 03:11:53] (step=0304200) Train Loss: 0.3247, Train Steps/Sec: 15.71, Grad Norm: 0.0377 +[2025-02-23 03:12:00] (step=0304300) Train Loss: 0.3247, Train Steps/Sec: 14.55, Grad Norm: 0.0409 +[2025-02-23 03:12:06] (step=0304400) Train Loss: 0.3249, Train Steps/Sec: 16.53, Grad Norm: 0.0420 +[2025-02-23 03:12:12] (step=0304500) Train Loss: 0.3250, Train Steps/Sec: 17.28, Grad Norm: 0.0350 +[2025-02-23 03:12:18] (step=0304600) Train Loss: 0.3246, Train Steps/Sec: 17.30, Grad Norm: 0.0370 +[2025-02-23 03:12:23] (step=0304700) Train Loss: 0.3248, Train Steps/Sec: 17.28, Grad Norm: 0.0372 +[2025-02-23 03:12:29] (step=0304800) Train Loss: 0.3246, Train Steps/Sec: 17.33, Grad Norm: 0.0419 +[2025-02-23 03:12:35] (step=0304900) Train Loss: 0.3245, Train Steps/Sec: 17.22, Grad Norm: 0.0389 +[2025-02-23 03:12:41] (step=0305000) Train Loss: 0.3239, Train Steps/Sec: 17.25, Grad Norm: 0.0358 +[2025-02-23 03:12:47] (step=0305100) Train Loss: 0.3245, Train Steps/Sec: 17.32, Grad Norm: 0.0375 +[2025-02-23 03:12:53] (step=0305200) Train Loss: 0.3245, Train Steps/Sec: 15.23, Grad Norm: 0.0390 +[2025-02-23 03:13:01] (step=0305300) Train Loss: 0.3237, Train Steps/Sec: 12.81, Grad Norm: 0.0418 +[2025-02-23 03:13:07] (step=0305400) Train Loss: 0.3243, Train Steps/Sec: 16.30, Grad Norm: 0.0373 +[2025-02-23 03:13:13] (step=0305500) Train Loss: 0.3244, Train Steps/Sec: 16.99, Grad Norm: 0.0422 +[2025-02-23 03:13:19] (step=0305600) Train Loss: 0.3242, Train Steps/Sec: 17.01, Grad Norm: 0.0378 +[2025-02-23 03:13:25] (step=0305700) Train Loss: 0.3241, Train Steps/Sec: 17.01, Grad Norm: 0.0371 +[2025-02-23 03:13:31] (step=0305800) Train Loss: 0.3249, Train Steps/Sec: 17.06, Grad Norm: 0.0412 +[2025-02-23 03:13:36] (step=0305900) Train Loss: 0.3247, Train Steps/Sec: 17.07, Grad Norm: 0.0425 +[2025-02-23 03:13:42] (step=0306000) Train Loss: 0.3247, Train Steps/Sec: 16.95, Grad Norm: 0.0369 +[2025-02-23 03:13:49] (step=0306100) Train Loss: 0.3246, Train Steps/Sec: 15.65, Grad Norm: 0.0389 +[2025-02-23 03:13:55] (step=0306200) Train Loss: 0.3248, Train Steps/Sec: 15.72, Grad Norm: 0.0415 +[2025-02-23 03:14:02] (step=0306300) Train Loss: 0.3244, Train Steps/Sec: 15.08, Grad Norm: 0.0388 +[2025-02-23 03:14:08] (step=0306400) Train Loss: 0.3243, Train Steps/Sec: 15.83, Grad Norm: 0.0399 +[2025-02-23 03:14:15] (step=0306500) Train Loss: 0.3249, Train Steps/Sec: 14.39, Grad Norm: 0.0372 +[2025-02-23 03:14:21] (step=0306600) Train Loss: 0.3246, Train Steps/Sec: 17.13, Grad Norm: 0.0387 +[2025-02-23 03:14:27] (step=0306700) Train Loss: 0.3246, Train Steps/Sec: 17.20, Grad Norm: 0.0389 +[2025-02-23 03:14:32] (step=0306800) Train Loss: 0.3243, Train Steps/Sec: 17.24, Grad Norm: 0.0371 +[2025-02-23 03:14:38] (step=0306900) Train Loss: 0.3244, Train Steps/Sec: 17.25, Grad Norm: 0.0375 +[2025-02-23 03:14:44] (step=0307000) Train Loss: 0.3244, Train Steps/Sec: 17.24, Grad Norm: 0.0403 +[2025-02-23 03:14:50] (step=0307100) Train Loss: 0.3246, Train Steps/Sec: 16.53, Grad Norm: 0.0365 +[2025-02-23 03:14:57] (step=0307200) Train Loss: 0.3250, Train Steps/Sec: 14.59, Grad Norm: 0.0357 +[2025-02-23 03:15:03] (step=0307300) Train Loss: 0.3245, Train Steps/Sec: 15.79, Grad Norm: 0.0374 +[2025-02-23 03:15:09] (step=0307400) Train Loss: 0.3251, Train Steps/Sec: 17.24, Grad Norm: 0.0372 +[2025-02-23 03:15:15] (step=0307500) Train Loss: 0.3247, Train Steps/Sec: 17.31, Grad Norm: 0.0383 +[2025-02-23 03:15:21] (step=0307600) Train Loss: 0.3246, Train Steps/Sec: 17.27, Grad Norm: 0.0409 +[2025-02-23 03:15:26] (step=0307700) Train Loss: 0.3245, Train Steps/Sec: 17.22, Grad Norm: 0.0432 +[2025-02-23 03:15:33] (step=0307800) Train Loss: 0.3249, Train Steps/Sec: 14.45, Grad Norm: 0.0407 +[2025-02-23 03:15:39] (step=0307900) Train Loss: 0.3245, Train Steps/Sec: 17.35, Grad Norm: 0.0395 +[2025-02-23 03:15:45] (step=0308000) Train Loss: 0.3250, Train Steps/Sec: 17.31, Grad Norm: 0.0406 +[2025-02-23 03:15:51] (step=0308100) Train Loss: 0.3245, Train Steps/Sec: 15.87, Grad Norm: 0.0410 +[2025-02-23 03:15:58] (step=0308200) Train Loss: 0.3241, Train Steps/Sec: 15.88, Grad Norm: 0.0352 +[2025-02-23 03:16:04] (step=0308300) Train Loss: 0.3243, Train Steps/Sec: 15.11, Grad Norm: 0.0402 +[2025-02-23 03:16:10] (step=0308400) Train Loss: 0.3244, Train Steps/Sec: 15.87, Grad Norm: 0.0395 +[2025-02-23 03:16:16] (step=0308500) Train Loss: 0.3245, Train Steps/Sec: 17.33, Grad Norm: 0.0350 +[2025-02-23 03:16:22] (step=0308600) Train Loss: 0.3247, Train Steps/Sec: 17.40, Grad Norm: 0.0415 +[2025-02-23 03:16:28] (step=0308700) Train Loss: 0.3243, Train Steps/Sec: 17.39, Grad Norm: 0.0363 +[2025-02-23 03:16:34] (step=0308800) Train Loss: 0.3244, Train Steps/Sec: 17.34, Grad Norm: 0.0377 +[2025-02-23 03:16:39] (step=0308900) Train Loss: 0.3250, Train Steps/Sec: 17.31, Grad Norm: 0.0392 +[2025-02-23 03:16:46] (step=0309000) Train Loss: 0.3242, Train Steps/Sec: 14.44, Grad Norm: 0.0371 +[2025-02-23 03:16:53] (step=0309100) Train Loss: 0.3245, Train Steps/Sec: 14.51, Grad Norm: 0.0407 +[2025-02-23 03:16:59] (step=0309200) Train Loss: 0.3244, Train Steps/Sec: 16.34, Grad Norm: 0.0369 +[2025-02-23 03:17:05] (step=0309300) Train Loss: 0.3245, Train Steps/Sec: 16.40, Grad Norm: 0.0391 +[2025-02-23 03:17:11] (step=0309400) Train Loss: 0.3247, Train Steps/Sec: 17.13, Grad Norm: 0.0393 +[2025-02-23 03:17:17] (step=0309500) Train Loss: 0.3245, Train Steps/Sec: 17.15, Grad Norm: 0.0385 +[2025-02-23 03:17:23] (step=0309600) Train Loss: 0.3245, Train Steps/Sec: 17.16, Grad Norm: 0.0406 +[2025-02-23 03:17:29] (step=0309700) Train Loss: 0.3241, Train Steps/Sec: 17.19, Grad Norm: 0.0385 +[2025-02-23 03:17:34] (step=0309800) Train Loss: 0.3244, Train Steps/Sec: 17.23, Grad Norm: 0.0385 +[2025-02-23 03:17:40] (step=0309900) Train Loss: 0.3244, Train Steps/Sec: 17.22, Grad Norm: 0.0382 +[2025-02-23 03:17:46] (step=0310000) Train Loss: 0.3245, Train Steps/Sec: 16.46, Grad Norm: 0.0415 +[2025-02-23 03:17:52] (step=0310100) Train Loss: 0.3245, Train Steps/Sec: 16.38, Grad Norm: 0.0375 +[2025-02-23 03:17:59] (step=0310200) Train Loss: 0.3248, Train Steps/Sec: 15.71, Grad Norm: 0.0396 +[2025-02-23 03:18:07] (step=0310300) Train Loss: 0.3244, Train Steps/Sec: 12.88, Grad Norm: 0.0392 +[2025-02-23 03:18:13] (step=0310400) Train Loss: 0.3247, Train Steps/Sec: 15.84, Grad Norm: 0.0374 +[2025-02-23 03:18:19] (step=0310500) Train Loss: 0.3246, Train Steps/Sec: 17.26, Grad Norm: 0.0374 +[2025-02-23 03:18:25] (step=0310600) Train Loss: 0.3244, Train Steps/Sec: 17.22, Grad Norm: 0.0406 +[2025-02-23 03:18:30] (step=0310700) Train Loss: 0.3240, Train Steps/Sec: 17.28, Grad Norm: 0.0391 +[2025-02-23 03:18:36] (step=0310800) Train Loss: 0.3243, Train Steps/Sec: 17.36, Grad Norm: 0.0391 +[2025-02-23 03:18:42] (step=0310900) Train Loss: 0.3247, Train Steps/Sec: 17.38, Grad Norm: 0.0378 +[2025-02-23 03:18:48] (step=0311000) Train Loss: 0.3247, Train Steps/Sec: 16.67, Grad Norm: 0.0358 +[2025-02-23 03:18:55] (step=0311100) Train Loss: 0.3248, Train Steps/Sec: 14.68, Grad Norm: 0.0377 +[2025-02-23 03:19:01] (step=0311200) Train Loss: 0.3241, Train Steps/Sec: 15.91, Grad Norm: 0.0386 +[2025-02-23 03:19:07] (step=0311300) Train Loss: 0.3246, Train Steps/Sec: 17.45, Grad Norm: 0.0377 +[2025-02-23 03:19:12] (step=0311400) Train Loss: 0.3242, Train Steps/Sec: 17.39, Grad Norm: 0.0363 +[2025-02-23 03:19:19] (step=0311500) Train Loss: 0.3243, Train Steps/Sec: 14.49, Grad Norm: 0.0391 +[2025-02-23 03:19:25] (step=0311600) Train Loss: 0.3248, Train Steps/Sec: 17.28, Grad Norm: 0.0367 +[2025-02-23 03:19:31] (step=0311700) Train Loss: 0.3247, Train Steps/Sec: 17.35, Grad Norm: 0.0382 +[2025-02-23 03:19:37] (step=0311800) Train Loss: 0.3242, Train Steps/Sec: 17.23, Grad Norm: 0.0411 +[2025-02-23 03:19:43] (step=0311900) Train Loss: 0.3243, Train Steps/Sec: 17.15, Grad Norm: 0.0395 +[2025-02-23 03:19:49] (step=0312000) Train Loss: 0.3247, Train Steps/Sec: 16.54, Grad Norm: 0.0387 +[2025-02-23 03:19:55] (step=0312100) Train Loss: 0.3240, Train Steps/Sec: 16.49, Grad Norm: 0.0391 +[2025-02-23 03:20:01] (step=0312200) Train Loss: 0.3240, Train Steps/Sec: 15.85, Grad Norm: 0.0422 +[2025-02-23 03:20:08] (step=0312300) Train Loss: 0.3245, Train Steps/Sec: 15.11, Grad Norm: 0.0392 +[2025-02-23 03:20:14] (step=0312400) Train Loss: 0.3243, Train Steps/Sec: 16.61, Grad Norm: 0.0404 +[2025-02-23 03:20:20] (step=0312500) Train Loss: 0.3242, Train Steps/Sec: 16.66, Grad Norm: 0.0405 +[2025-02-23 03:20:25] (step=0312600) Train Loss: 0.3246, Train Steps/Sec: 17.40, Grad Norm: 0.0390 +[2025-02-23 03:20:31] (step=0312700) Train Loss: 0.3244, Train Steps/Sec: 17.41, Grad Norm: 0.0380 +[2025-02-23 03:20:38] (step=0312800) Train Loss: 0.3246, Train Steps/Sec: 14.25, Grad Norm: 0.0389 +[2025-02-23 03:20:44] (step=0312900) Train Loss: 0.3242, Train Steps/Sec: 17.28, Grad Norm: 0.0376 +[2025-02-23 03:20:51] (step=0313000) Train Loss: 0.3248, Train Steps/Sec: 14.10, Grad Norm: 0.0393 +[2025-02-23 03:20:57] (step=0313100) Train Loss: 0.3243, Train Steps/Sec: 16.57, Grad Norm: 0.0350 +[2025-02-23 03:21:03] (step=0313200) Train Loss: 0.3243, Train Steps/Sec: 16.61, Grad Norm: 0.0407 +[2025-02-23 03:21:09] (step=0313300) Train Loss: 0.3246, Train Steps/Sec: 17.30, Grad Norm: 0.0369 +[2025-02-23 03:21:15] (step=0313400) Train Loss: 0.3249, Train Steps/Sec: 17.30, Grad Norm: 0.0395 +[2025-02-23 03:21:20] (step=0313500) Train Loss: 0.3245, Train Steps/Sec: 17.27, Grad Norm: 0.0401 +[2025-02-23 03:21:26] (step=0313600) Train Loss: 0.3243, Train Steps/Sec: 17.31, Grad Norm: 0.0381 +[2025-02-23 03:21:32] (step=0313700) Train Loss: 0.3241, Train Steps/Sec: 17.32, Grad Norm: 0.0384 +[2025-02-23 03:21:38] (step=0313800) Train Loss: 0.3246, Train Steps/Sec: 17.33, Grad Norm: 0.0396 +[2025-02-23 03:21:44] (step=0313900) Train Loss: 0.3249, Train Steps/Sec: 16.61, Grad Norm: 0.0367 +[2025-02-23 03:21:50] (step=0314000) Train Loss: 0.3247, Train Steps/Sec: 17.27, Grad Norm: 0.0394 +[2025-02-23 03:21:57] (step=0314100) Train Loss: 0.3248, Train Steps/Sec: 13.93, Grad Norm: 0.0365 +[2025-02-23 03:22:03] (step=0314200) Train Loss: 0.3244, Train Steps/Sec: 15.79, Grad Norm: 0.0426 +[2025-02-23 03:22:10] (step=0314300) Train Loss: 0.3240, Train Steps/Sec: 14.97, Grad Norm: 0.0415 +[2025-02-23 03:22:16] (step=0314400) Train Loss: 0.3242, Train Steps/Sec: 16.36, Grad Norm: 0.0396 +[2025-02-23 03:22:22] (step=0314500) Train Loss: 0.3241, Train Steps/Sec: 16.43, Grad Norm: 0.0445 +[2025-02-23 03:22:28] (step=0314600) Train Loss: 0.3241, Train Steps/Sec: 17.23, Grad Norm: 0.0358 +[2025-02-23 03:22:34] (step=0314700) Train Loss: 0.3248, Train Steps/Sec: 17.25, Grad Norm: 0.0364 +[2025-02-23 03:22:39] (step=0314800) Train Loss: 0.3240, Train Steps/Sec: 17.19, Grad Norm: 0.0384 +[2025-02-23 03:22:45] (step=0314900) Train Loss: 0.3240, Train Steps/Sec: 16.46, Grad Norm: 0.0382 +[2025-02-23 03:22:52] (step=0315000) Train Loss: 0.3250, Train Steps/Sec: 14.63, Grad Norm: 0.0382 +[2025-02-23 03:22:59] (step=0315100) Train Loss: 0.3246, Train Steps/Sec: 15.77, Grad Norm: 0.0364 +[2025-02-23 03:23:04] (step=0315200) Train Loss: 0.3240, Train Steps/Sec: 17.18, Grad Norm: 0.0374 +[2025-02-23 03:23:11] (step=0315300) Train Loss: 0.3246, Train Steps/Sec: 14.41, Grad Norm: 0.0397 +[2025-02-23 03:23:17] (step=0315400) Train Loss: 0.3248, Train Steps/Sec: 17.36, Grad Norm: 0.0409 +[2025-02-23 03:23:23] (step=0315500) Train Loss: 0.3246, Train Steps/Sec: 17.39, Grad Norm: 0.0408 +[2025-02-23 03:23:29] (step=0315600) Train Loss: 0.3242, Train Steps/Sec: 17.43, Grad Norm: 0.0392 +[2025-02-23 03:23:34] (step=0315700) Train Loss: 0.3246, Train Steps/Sec: 17.45, Grad Norm: 0.0388 +[2025-02-23 03:23:40] (step=0315800) Train Loss: 0.3246, Train Steps/Sec: 17.39, Grad Norm: 0.0400 +[2025-02-23 03:23:46] (step=0315900) Train Loss: 0.3245, Train Steps/Sec: 16.62, Grad Norm: 0.0392 +[2025-02-23 03:23:52] (step=0316000) Train Loss: 0.3244, Train Steps/Sec: 17.35, Grad Norm: 0.0428 +[2025-02-23 03:23:58] (step=0316100) Train Loss: 0.3252, Train Steps/Sec: 16.51, Grad Norm: 0.0367 +[2025-02-23 03:24:04] (step=0316200) Train Loss: 0.3240, Train Steps/Sec: 15.82, Grad Norm: 0.0411 +[2025-02-23 03:24:11] (step=0316300) Train Loss: 0.3245, Train Steps/Sec: 15.80, Grad Norm: 0.0402 +[2025-02-23 03:24:17] (step=0316400) Train Loss: 0.3249, Train Steps/Sec: 15.87, Grad Norm: 0.0406 +[2025-02-23 03:24:23] (step=0316500) Train Loss: 0.3245, Train Steps/Sec: 16.59, Grad Norm: 0.0416 +[2025-02-23 03:24:30] (step=0316600) Train Loss: 0.3247, Train Steps/Sec: 14.46, Grad Norm: 0.0378 +[2025-02-23 03:24:36] (step=0316700) Train Loss: 0.3246, Train Steps/Sec: 17.36, Grad Norm: 0.0430 +[2025-02-23 03:24:42] (step=0316800) Train Loss: 0.3242, Train Steps/Sec: 16.60, Grad Norm: 0.0353 +[2025-02-23 03:24:48] (step=0316900) Train Loss: 0.3241, Train Steps/Sec: 14.73, Grad Norm: 0.0417 +[2025-02-23 03:24:54] (step=0317000) Train Loss: 0.3245, Train Steps/Sec: 16.62, Grad Norm: 0.0361 +[2025-02-23 03:25:00] (step=0317100) Train Loss: 0.3243, Train Steps/Sec: 16.70, Grad Norm: 0.0392 +[2025-02-23 03:25:06] (step=0317200) Train Loss: 0.3248, Train Steps/Sec: 17.40, Grad Norm: 0.0402 +[2025-02-23 03:25:12] (step=0317300) Train Loss: 0.3243, Train Steps/Sec: 17.36, Grad Norm: 0.0374 +[2025-02-23 03:25:18] (step=0317400) Train Loss: 0.3244, Train Steps/Sec: 17.32, Grad Norm: 0.0414 +[2025-02-23 03:25:24] (step=0317500) Train Loss: 0.3248, Train Steps/Sec: 17.33, Grad Norm: 0.0412 +[2025-02-23 03:25:29] (step=0317600) Train Loss: 0.3241, Train Steps/Sec: 17.34, Grad Norm: 0.0417 +[2025-02-23 03:25:35] (step=0317700) Train Loss: 0.3247, Train Steps/Sec: 17.35, Grad Norm: 0.0389 +[2025-02-23 03:25:42] (step=0317800) Train Loss: 0.3244, Train Steps/Sec: 13.64, Grad Norm: 0.0391 +[2025-02-23 03:25:48] (step=0317900) Train Loss: 0.3247, Train Steps/Sec: 17.39, Grad Norm: 0.0373 +[2025-02-23 03:25:54] (step=0318000) Train Loss: 0.3244, Train Steps/Sec: 17.25, Grad Norm: 0.0406 +[2025-02-23 03:26:00] (step=0318100) Train Loss: 0.3246, Train Steps/Sec: 16.42, Grad Norm: 0.0380 +[2025-02-23 03:26:06] (step=0318200) Train Loss: 0.3236, Train Steps/Sec: 15.74, Grad Norm: 0.0409 +[2025-02-23 03:26:13] (step=0318300) Train Loss: 0.3243, Train Steps/Sec: 15.68, Grad Norm: 0.0381 +[2025-02-23 03:26:19] (step=0318400) Train Loss: 0.3237, Train Steps/Sec: 15.78, Grad Norm: 0.0364 +[2025-02-23 03:26:25] (step=0318500) Train Loss: 0.3246, Train Steps/Sec: 16.49, Grad Norm: 0.0391 +[2025-02-23 03:26:31] (step=0318600) Train Loss: 0.3244, Train Steps/Sec: 17.13, Grad Norm: 0.0378 +[2025-02-23 03:26:37] (step=0318700) Train Loss: 0.3242, Train Steps/Sec: 17.20, Grad Norm: 0.0375 +[2025-02-23 03:26:43] (step=0318800) Train Loss: 0.3246, Train Steps/Sec: 15.29, Grad Norm: 0.0437 +[2025-02-23 03:26:50] (step=0318900) Train Loss: 0.3248, Train Steps/Sec: 15.79, Grad Norm: 0.0359 +[2025-02-23 03:26:56] (step=0319000) Train Loss: 0.3245, Train Steps/Sec: 15.85, Grad Norm: 0.0398 +[2025-02-23 03:27:03] (step=0319100) Train Loss: 0.3243, Train Steps/Sec: 14.35, Grad Norm: 0.0391 +[2025-02-23 03:27:09] (step=0319200) Train Loss: 0.3244, Train Steps/Sec: 17.30, Grad Norm: 0.0390 +[2025-02-23 03:27:15] (step=0319300) Train Loss: 0.3244, Train Steps/Sec: 17.42, Grad Norm: 0.0393 +[2025-02-23 03:27:20] (step=0319400) Train Loss: 0.3249, Train Steps/Sec: 17.38, Grad Norm: 0.0389 +[2025-02-23 03:27:26] (step=0319500) Train Loss: 0.3243, Train Steps/Sec: 17.31, Grad Norm: 0.0375 +[2025-02-23 03:27:32] (step=0319600) Train Loss: 0.3245, Train Steps/Sec: 17.31, Grad Norm: 0.0415 +[2025-02-23 03:27:38] (step=0319700) Train Loss: 0.3243, Train Steps/Sec: 17.38, Grad Norm: 0.0388 +[2025-02-23 03:27:44] (step=0319800) Train Loss: 0.3245, Train Steps/Sec: 16.61, Grad Norm: 0.0375 +[2025-02-23 03:27:49] (step=0319900) Train Loss: 0.3240, Train Steps/Sec: 17.14, Grad Norm: 0.0390 +[2025-02-23 03:27:55] (step=0320000) Train Loss: 0.3241, Train Steps/Sec: 17.31, Grad Norm: 0.0383 +[2025-02-23 03:28:01] (step=0320100) Train Loss: 0.3242, Train Steps/Sec: 16.53, Grad Norm: 0.0398 +[2025-02-23 03:28:07] (step=0320200) Train Loss: 0.3242, Train Steps/Sec: 16.64, Grad Norm: 0.0418 +[2025-02-23 03:28:15] (step=0320300) Train Loss: 0.3247, Train Steps/Sec: 12.98, Grad Norm: 0.0422 +[2025-02-23 03:28:21] (step=0320400) Train Loss: 0.3243, Train Steps/Sec: 15.80, Grad Norm: 0.0383 +[2025-02-23 03:28:27] (step=0320500) Train Loss: 0.3247, Train Steps/Sec: 16.66, Grad Norm: 0.0376 +[2025-02-23 03:28:33] (step=0320600) Train Loss: 0.3243, Train Steps/Sec: 17.41, Grad Norm: 0.0387 +[2025-02-23 03:28:39] (step=0320700) Train Loss: 0.3246, Train Steps/Sec: 16.58, Grad Norm: 0.0432 +[2025-02-23 03:28:46] (step=0320800) Train Loss: 0.3243, Train Steps/Sec: 14.76, Grad Norm: 0.0378 +[2025-02-23 03:28:52] (step=0320900) Train Loss: 0.3244, Train Steps/Sec: 15.95, Grad Norm: 0.0369 +[2025-02-23 03:28:58] (step=0321000) Train Loss: 0.3244, Train Steps/Sec: 17.32, Grad Norm: 0.0383 +[2025-02-23 03:29:04] (step=0321100) Train Loss: 0.3242, Train Steps/Sec: 17.34, Grad Norm: 0.0396 +[2025-02-23 03:29:09] (step=0321200) Train Loss: 0.3249, Train Steps/Sec: 17.36, Grad Norm: 0.0368 +[2025-02-23 03:29:15] (step=0321300) Train Loss: 0.3244, Train Steps/Sec: 17.39, Grad Norm: 0.0399 +[2025-02-23 03:29:21] (step=0321400) Train Loss: 0.3247, Train Steps/Sec: 17.41, Grad Norm: 0.0418 +[2025-02-23 03:29:27] (step=0321500) Train Loss: 0.3239, Train Steps/Sec: 17.39, Grad Norm: 0.0361 +[2025-02-23 03:29:34] (step=0321600) Train Loss: 0.3239, Train Steps/Sec: 14.44, Grad Norm: 0.0408 +[2025-02-23 03:29:39] (step=0321700) Train Loss: 0.3234, Train Steps/Sec: 17.42, Grad Norm: 0.0352 +[2025-02-23 03:29:45] (step=0321800) Train Loss: 0.3241, Train Steps/Sec: 16.73, Grad Norm: 0.0420 +[2025-02-23 03:29:51] (step=0321900) Train Loss: 0.3247, Train Steps/Sec: 17.48, Grad Norm: 0.0375 +[2025-02-23 03:29:57] (step=0322000) Train Loss: 0.3247, Train Steps/Sec: 17.51, Grad Norm: 0.0381 +[2025-02-23 03:30:03] (step=0322100) Train Loss: 0.3242, Train Steps/Sec: 16.68, Grad Norm: 0.0387 +[2025-02-23 03:30:09] (step=0322200) Train Loss: 0.3247, Train Steps/Sec: 16.77, Grad Norm: 0.0365 +[2025-02-23 03:30:15] (step=0322300) Train Loss: 0.3241, Train Steps/Sec: 15.27, Grad Norm: 0.0413 +[2025-02-23 03:30:22] (step=0322400) Train Loss: 0.3246, Train Steps/Sec: 15.98, Grad Norm: 0.0374 +[2025-02-23 03:30:28] (step=0322500) Train Loss: 0.3245, Train Steps/Sec: 16.73, Grad Norm: 0.0403 +[2025-02-23 03:30:33] (step=0322600) Train Loss: 0.3244, Train Steps/Sec: 17.51, Grad Norm: 0.0393 +[2025-02-23 03:30:40] (step=0322700) Train Loss: 0.3247, Train Steps/Sec: 14.87, Grad Norm: 0.0378 +[2025-02-23 03:30:47] (step=0322800) Train Loss: 0.3242, Train Steps/Sec: 13.85, Grad Norm: 0.0376 +[2025-02-23 03:30:54] (step=0322900) Train Loss: 0.3246, Train Steps/Sec: 15.79, Grad Norm: 0.0398 +[2025-02-23 03:30:59] (step=0323000) Train Loss: 0.3245, Train Steps/Sec: 17.18, Grad Norm: 0.0412 +[2025-02-23 03:31:05] (step=0323100) Train Loss: 0.3248, Train Steps/Sec: 17.18, Grad Norm: 0.0398 +[2025-02-23 03:31:11] (step=0323200) Train Loss: 0.3246, Train Steps/Sec: 17.24, Grad Norm: 0.0380 +[2025-02-23 03:31:17] (step=0323300) Train Loss: 0.3240, Train Steps/Sec: 17.25, Grad Norm: 0.0360 +[2025-02-23 03:31:23] (step=0323400) Train Loss: 0.3249, Train Steps/Sec: 17.30, Grad Norm: 0.0406 +[2025-02-23 03:31:28] (step=0323500) Train Loss: 0.3244, Train Steps/Sec: 17.27, Grad Norm: 0.0369 +[2025-02-23 03:31:34] (step=0323600) Train Loss: 0.3245, Train Steps/Sec: 17.23, Grad Norm: 0.0387 +[2025-02-23 03:31:40] (step=0323700) Train Loss: 0.3244, Train Steps/Sec: 16.51, Grad Norm: 0.0374 +[2025-02-23 03:31:46] (step=0323800) Train Loss: 0.3240, Train Steps/Sec: 17.24, Grad Norm: 0.0386 +[2025-02-23 03:31:52] (step=0323900) Train Loss: 0.3243, Train Steps/Sec: 17.25, Grad Norm: 0.0414 +[2025-02-23 03:31:58] (step=0324000) Train Loss: 0.3241, Train Steps/Sec: 17.22, Grad Norm: 0.0398 +[2025-02-23 03:32:05] (step=0324100) Train Loss: 0.3245, Train Steps/Sec: 13.76, Grad Norm: 0.0376 +[2025-02-23 03:32:11] (step=0324200) Train Loss: 0.3242, Train Steps/Sec: 16.44, Grad Norm: 0.0366 +[2025-02-23 03:32:18] (step=0324300) Train Loss: 0.3243, Train Steps/Sec: 15.06, Grad Norm: 0.0382 +[2025-02-23 03:32:24] (step=0324400) Train Loss: 0.3245, Train Steps/Sec: 15.73, Grad Norm: 0.0401 +[2025-02-23 03:32:30] (step=0324500) Train Loss: 0.3246, Train Steps/Sec: 16.54, Grad Norm: 0.0383 +[2025-02-23 03:32:36] (step=0324600) Train Loss: 0.3243, Train Steps/Sec: 16.55, Grad Norm: 0.0404 +[2025-02-23 03:32:43] (step=0324700) Train Loss: 0.3243, Train Steps/Sec: 14.64, Grad Norm: 0.0394 +[2025-02-23 03:32:49] (step=0324800) Train Loss: 0.3245, Train Steps/Sec: 15.80, Grad Norm: 0.0416 +[2025-02-23 03:32:55] (step=0324900) Train Loss: 0.3237, Train Steps/Sec: 17.20, Grad Norm: 0.0370 +[2025-02-23 03:33:01] (step=0325000) Train Loss: 0.3245, Train Steps/Sec: 17.20, Grad Norm: 0.0378 +[2025-02-23 03:33:07] (step=0325100) Train Loss: 0.3245, Train Steps/Sec: 17.17, Grad Norm: 0.0393 +[2025-02-23 03:33:13] (step=0325200) Train Loss: 0.3242, Train Steps/Sec: 17.22, Grad Norm: 0.0422 +[2025-02-23 03:33:20] (step=0325300) Train Loss: 0.3242, Train Steps/Sec: 14.36, Grad Norm: 0.0356 +[2025-02-23 03:33:25] (step=0325400) Train Loss: 0.3242, Train Steps/Sec: 17.20, Grad Norm: 0.0401 +[2025-02-23 03:33:31] (step=0325500) Train Loss: 0.3245, Train Steps/Sec: 17.18, Grad Norm: 0.0358 +[2025-02-23 03:33:37] (step=0325600) Train Loss: 0.3248, Train Steps/Sec: 17.18, Grad Norm: 0.0400 +[2025-02-23 03:33:43] (step=0325700) Train Loss: 0.3245, Train Steps/Sec: 16.46, Grad Norm: 0.0392 +[2025-02-23 03:33:49] (step=0325800) Train Loss: 0.3250, Train Steps/Sec: 17.16, Grad Norm: 0.0377 +[2025-02-23 03:33:55] (step=0325900) Train Loss: 0.3248, Train Steps/Sec: 17.17, Grad Norm: 0.0435 +[2025-02-23 03:34:01] (step=0326000) Train Loss: 0.3240, Train Steps/Sec: 17.09, Grad Norm: 0.0372 +[2025-02-23 03:34:07] (step=0326100) Train Loss: 0.3244, Train Steps/Sec: 16.47, Grad Norm: 0.0375 +[2025-02-23 03:34:13] (step=0326200) Train Loss: 0.3243, Train Steps/Sec: 16.52, Grad Norm: 0.0379 +[2025-02-23 03:34:19] (step=0326300) Train Loss: 0.3243, Train Steps/Sec: 15.69, Grad Norm: 0.0414 +[2025-02-23 03:34:26] (step=0326400) Train Loss: 0.3246, Train Steps/Sec: 15.14, Grad Norm: 0.0377 +[2025-02-23 03:34:32] (step=0326500) Train Loss: 0.3238, Train Steps/Sec: 16.51, Grad Norm: 0.0393 +[2025-02-23 03:34:40] (step=0326600) Train Loss: 0.3239, Train Steps/Sec: 12.45, Grad Norm: 0.0367 +[2025-02-23 03:34:46] (step=0326700) Train Loss: 0.3238, Train Steps/Sec: 16.53, Grad Norm: 0.0353 +[2025-02-23 03:34:52] (step=0326800) Train Loss: 0.3243, Train Steps/Sec: 15.79, Grad Norm: 0.0421 +[2025-02-23 03:34:58] (step=0326900) Train Loss: 0.3244, Train Steps/Sec: 17.22, Grad Norm: 0.0350 +[2025-02-23 03:35:04] (step=0327000) Train Loss: 0.3239, Train Steps/Sec: 17.16, Grad Norm: 0.0371 +[2025-02-23 03:35:10] (step=0327100) Train Loss: 0.3239, Train Steps/Sec: 17.19, Grad Norm: 0.0402 +[2025-02-23 03:35:15] (step=0327200) Train Loss: 0.3236, Train Steps/Sec: 17.16, Grad Norm: 0.0377 +[2025-02-23 03:35:21] (step=0327300) Train Loss: 0.3244, Train Steps/Sec: 17.19, Grad Norm: 0.0370 +[2025-02-23 03:35:27] (step=0327400) Train Loss: 0.3243, Train Steps/Sec: 17.24, Grad Norm: 0.0424 +[2025-02-23 03:35:33] (step=0327500) Train Loss: 0.3245, Train Steps/Sec: 17.43, Grad Norm: 0.0401 +[2025-02-23 03:35:39] (step=0327600) Train Loss: 0.3237, Train Steps/Sec: 16.72, Grad Norm: 0.0373 +[2025-02-23 03:35:45] (step=0327700) Train Loss: 0.3243, Train Steps/Sec: 17.42, Grad Norm: 0.0364 +[2025-02-23 03:35:51] (step=0327800) Train Loss: 0.3246, Train Steps/Sec: 14.41, Grad Norm: 0.0431 +[2025-02-23 03:35:57] (step=0327900) Train Loss: 0.3240, Train Steps/Sec: 17.32, Grad Norm: 0.0361 +[2025-02-23 03:36:03] (step=0328000) Train Loss: 0.3243, Train Steps/Sec: 17.36, Grad Norm: 0.0390 +[2025-02-23 03:36:09] (step=0328100) Train Loss: 0.3242, Train Steps/Sec: 16.59, Grad Norm: 0.0409 +[2025-02-23 03:36:15] (step=0328200) Train Loss: 0.3241, Train Steps/Sec: 16.68, Grad Norm: 0.0413 +[2025-02-23 03:36:22] (step=0328300) Train Loss: 0.3242, Train Steps/Sec: 15.21, Grad Norm: 0.0385 +[2025-02-23 03:36:28] (step=0328400) Train Loss: 0.3247, Train Steps/Sec: 15.91, Grad Norm: 0.0418 +[2025-02-23 03:36:34] (step=0328500) Train Loss: 0.3243, Train Steps/Sec: 16.01, Grad Norm: 0.0347 +[2025-02-23 03:36:41] (step=0328600) Train Loss: 0.3247, Train Steps/Sec: 14.73, Grad Norm: 0.0362 +[2025-02-23 03:36:47] (step=0328700) Train Loss: 0.3245, Train Steps/Sec: 15.91, Grad Norm: 0.0369 +[2025-02-23 03:36:53] (step=0328800) Train Loss: 0.3241, Train Steps/Sec: 17.27, Grad Norm: 0.0384 +[2025-02-23 03:36:59] (step=0328900) Train Loss: 0.3242, Train Steps/Sec: 17.22, Grad Norm: 0.0412 +[2025-02-23 03:37:05] (step=0329000) Train Loss: 0.3243, Train Steps/Sec: 17.25, Grad Norm: 0.0379 +[2025-02-23 03:37:12] (step=0329100) Train Loss: 0.3246, Train Steps/Sec: 14.38, Grad Norm: 0.0380 +[2025-02-23 03:37:17] (step=0329200) Train Loss: 0.3246, Train Steps/Sec: 17.37, Grad Norm: 0.0403 +[2025-02-23 03:37:23] (step=0329300) Train Loss: 0.3241, Train Steps/Sec: 17.39, Grad Norm: 0.0368 +[2025-02-23 03:37:29] (step=0329400) Train Loss: 0.3242, Train Steps/Sec: 17.37, Grad Norm: 0.0393 +[2025-02-23 03:37:35] (step=0329500) Train Loss: 0.3238, Train Steps/Sec: 16.70, Grad Norm: 0.0355 +[2025-02-23 03:37:41] (step=0329600) Train Loss: 0.3245, Train Steps/Sec: 17.38, Grad Norm: 0.0365 +[2025-02-23 03:37:46] (step=0329700) Train Loss: 0.3239, Train Steps/Sec: 17.36, Grad Norm: 0.0368 +[2025-02-23 03:37:52] (step=0329800) Train Loss: 0.3238, Train Steps/Sec: 17.15, Grad Norm: 0.0402 +[2025-02-23 03:37:58] (step=0329900) Train Loss: 0.3246, Train Steps/Sec: 17.17, Grad Norm: 0.0401 +[2025-02-23 03:38:04] (step=0330000) Train Loss: 0.3240, Train Steps/Sec: 17.27, Grad Norm: 0.0382 +[2025-02-23 03:38:10] (step=0330100) Train Loss: 0.3245, Train Steps/Sec: 16.57, Grad Norm: 0.0387 +[2025-02-23 03:38:16] (step=0330200) Train Loss: 0.3242, Train Steps/Sec: 16.49, Grad Norm: 0.0379 +[2025-02-23 03:38:24] (step=0330300) Train Loss: 0.3245, Train Steps/Sec: 12.89, Grad Norm: 0.0398 +[2025-02-23 03:38:30] (step=0330400) Train Loss: 0.3244, Train Steps/Sec: 15.33, Grad Norm: 0.0384 +[2025-02-23 03:38:37] (step=0330500) Train Loss: 0.3240, Train Steps/Sec: 14.28, Grad Norm: 0.0391 +[2025-02-23 03:38:43] (step=0330600) Train Loss: 0.3242, Train Steps/Sec: 17.44, Grad Norm: 0.0417 +[2025-02-23 03:38:49] (step=0330700) Train Loss: 0.3241, Train Steps/Sec: 15.86, Grad Norm: 0.0391 +[2025-02-23 03:38:55] (step=0330800) Train Loss: 0.3246, Train Steps/Sec: 17.33, Grad Norm: 0.0390 +[2025-02-23 03:39:01] (step=0330900) Train Loss: 0.3239, Train Steps/Sec: 17.41, Grad Norm: 0.0384 +[2025-02-23 03:39:07] (step=0331000) Train Loss: 0.3245, Train Steps/Sec: 17.33, Grad Norm: 0.0396 +[2025-02-23 03:39:12] (step=0331100) Train Loss: 0.3239, Train Steps/Sec: 17.36, Grad Norm: 0.0367 +[2025-02-23 03:39:18] (step=0331200) Train Loss: 0.3242, Train Steps/Sec: 17.18, Grad Norm: 0.0400 +[2025-02-23 03:39:24] (step=0331300) Train Loss: 0.3245, Train Steps/Sec: 17.14, Grad Norm: 0.0412 +[2025-02-23 03:39:30] (step=0331400) Train Loss: 0.3241, Train Steps/Sec: 17.13, Grad Norm: 0.0398 +[2025-02-23 03:39:36] (step=0331500) Train Loss: 0.3245, Train Steps/Sec: 16.46, Grad Norm: 0.0369 +[2025-02-23 03:39:43] (step=0331600) Train Loss: 0.3242, Train Steps/Sec: 14.31, Grad Norm: 0.0395 +[2025-02-23 03:39:49] (step=0331700) Train Loss: 0.3244, Train Steps/Sec: 17.16, Grad Norm: 0.0389 +[2025-02-23 03:39:54] (step=0331800) Train Loss: 0.3238, Train Steps/Sec: 17.18, Grad Norm: 0.0372 +[2025-02-23 03:40:00] (step=0331900) Train Loss: 0.3241, Train Steps/Sec: 17.16, Grad Norm: 0.0393 +[2025-02-23 03:40:06] (step=0332000) Train Loss: 0.3239, Train Steps/Sec: 17.21, Grad Norm: 0.0362 +[2025-02-23 03:40:12] (step=0332100) Train Loss: 0.3244, Train Steps/Sec: 16.61, Grad Norm: 0.0378 +[2025-02-23 03:40:18] (step=0332200) Train Loss: 0.3242, Train Steps/Sec: 16.69, Grad Norm: 0.0354 +[2025-02-23 03:40:24] (step=0332300) Train Loss: 0.3242, Train Steps/Sec: 15.89, Grad Norm: 0.0388 +[2025-02-23 03:40:31] (step=0332400) Train Loss: 0.3242, Train Steps/Sec: 14.67, Grad Norm: 0.0360 +[2025-02-23 03:40:38] (step=0332500) Train Loss: 0.3242, Train Steps/Sec: 14.27, Grad Norm: 0.0373 +[2025-02-23 03:40:44] (step=0332600) Train Loss: 0.3239, Train Steps/Sec: 16.08, Grad Norm: 0.0361 +[2025-02-23 03:40:50] (step=0332700) Train Loss: 0.3239, Train Steps/Sec: 17.42, Grad Norm: 0.0383 +[2025-02-23 03:40:57] (step=0332800) Train Loss: 0.3247, Train Steps/Sec: 14.36, Grad Norm: 0.0353 +[2025-02-23 03:41:03] (step=0332900) Train Loss: 0.3235, Train Steps/Sec: 17.16, Grad Norm: 0.0404 +[2025-02-23 03:41:09] (step=0333000) Train Loss: 0.3243, Train Steps/Sec: 17.19, Grad Norm: 0.0400 +[2025-02-23 03:41:15] (step=0333100) Train Loss: 0.3239, Train Steps/Sec: 17.25, Grad Norm: 0.0357 +[2025-02-23 03:41:20] (step=0333200) Train Loss: 0.3245, Train Steps/Sec: 17.23, Grad Norm: 0.0365 +[2025-02-23 03:41:26] (step=0333300) Train Loss: 0.3241, Train Steps/Sec: 17.23, Grad Norm: 0.0375 +[2025-02-23 03:41:32] (step=0333400) Train Loss: 0.3244, Train Steps/Sec: 16.52, Grad Norm: 0.0378 +[2025-02-23 03:41:38] (step=0333500) Train Loss: 0.3242, Train Steps/Sec: 17.23, Grad Norm: 0.0404 +[2025-02-23 03:41:44] (step=0333600) Train Loss: 0.3244, Train Steps/Sec: 17.27, Grad Norm: 0.0386 +[2025-02-23 03:41:50] (step=0333700) Train Loss: 0.3237, Train Steps/Sec: 17.25, Grad Norm: 0.0344 +[2025-02-23 03:41:55] (step=0333800) Train Loss: 0.3238, Train Steps/Sec: 17.29, Grad Norm: 0.0393 +[2025-02-23 03:42:01] (step=0333900) Train Loss: 0.3240, Train Steps/Sec: 17.28, Grad Norm: 0.0399 +[2025-02-23 03:42:07] (step=0334000) Train Loss: 0.3234, Train Steps/Sec: 17.16, Grad Norm: 0.0391 +[2025-02-23 03:42:14] (step=0334100) Train Loss: 0.3246, Train Steps/Sec: 13.93, Grad Norm: 0.0367 +[2025-02-23 03:42:20] (step=0334200) Train Loss: 0.3245, Train Steps/Sec: 16.56, Grad Norm: 0.0369 +[2025-02-23 03:42:27] (step=0334300) Train Loss: 0.3239, Train Steps/Sec: 14.55, Grad Norm: 0.0379 +[2025-02-23 03:42:34] (step=0334400) Train Loss: 0.3241, Train Steps/Sec: 14.02, Grad Norm: 0.0376 +[2025-02-23 03:42:41] (step=0334500) Train Loss: 0.3242, Train Steps/Sec: 15.72, Grad Norm: 0.0395 +[2025-02-23 03:42:47] (step=0334600) Train Loss: 0.3240, Train Steps/Sec: 16.43, Grad Norm: 0.0384 +[2025-02-23 03:42:53] (step=0334700) Train Loss: 0.3245, Train Steps/Sec: 17.19, Grad Norm: 0.0421 +[2025-02-23 03:42:58] (step=0334800) Train Loss: 0.3239, Train Steps/Sec: 17.26, Grad Norm: 0.0363 +[2025-02-23 03:43:04] (step=0334900) Train Loss: 0.3238, Train Steps/Sec: 17.30, Grad Norm: 0.0374 +[2025-02-23 03:43:10] (step=0335000) Train Loss: 0.3236, Train Steps/Sec: 17.30, Grad Norm: 0.0373 +[2025-02-23 03:43:16] (step=0335100) Train Loss: 0.3240, Train Steps/Sec: 17.23, Grad Norm: 0.0401 +[2025-02-23 03:43:22] (step=0335200) Train Loss: 0.3247, Train Steps/Sec: 17.20, Grad Norm: 0.0367 +[2025-02-23 03:43:29] (step=0335300) Train Loss: 0.3244, Train Steps/Sec: 14.38, Grad Norm: 0.0402 +[2025-02-23 03:43:35] (step=0335400) Train Loss: 0.3238, Train Steps/Sec: 16.54, Grad Norm: 0.0382 +[2025-02-23 03:43:40] (step=0335500) Train Loss: 0.3244, Train Steps/Sec: 17.27, Grad Norm: 0.0395 +[2025-02-23 03:43:46] (step=0335600) Train Loss: 0.3233, Train Steps/Sec: 17.30, Grad Norm: 0.0382 +[2025-02-23 03:43:52] (step=0335700) Train Loss: 0.3242, Train Steps/Sec: 17.33, Grad Norm: 0.0396 +[2025-02-23 03:43:58] (step=0335800) Train Loss: 0.3245, Train Steps/Sec: 17.35, Grad Norm: 0.0383 +[2025-02-23 03:44:03] (step=0335900) Train Loss: 0.3242, Train Steps/Sec: 17.39, Grad Norm: 0.0404 +[2025-02-23 03:44:09] (step=0336000) Train Loss: 0.3241, Train Steps/Sec: 17.38, Grad Norm: 0.0386 +[2025-02-23 03:44:15] (step=0336100) Train Loss: 0.3240, Train Steps/Sec: 16.57, Grad Norm: 0.0391 +[2025-02-23 03:44:21] (step=0336200) Train Loss: 0.3244, Train Steps/Sec: 17.39, Grad Norm: 0.0384 +[2025-02-23 03:44:28] (step=0336300) Train Loss: 0.3241, Train Steps/Sec: 13.65, Grad Norm: 0.0362 +[2025-02-23 03:44:35] (step=0336400) Train Loss: 0.3245, Train Steps/Sec: 14.17, Grad Norm: 0.0367 +[2025-02-23 03:44:42] (step=0336500) Train Loss: 0.3237, Train Steps/Sec: 15.25, Grad Norm: 0.0408 +[2025-02-23 03:44:49] (step=0336600) Train Loss: 0.3238, Train Steps/Sec: 14.45, Grad Norm: 0.0363 +[2025-02-23 03:44:55] (step=0336700) Train Loss: 0.3237, Train Steps/Sec: 17.44, Grad Norm: 0.0378 +[2025-02-23 03:45:00] (step=0336800) Train Loss: 0.3241, Train Steps/Sec: 17.40, Grad Norm: 0.0399 +[2025-02-23 03:45:06] (step=0336900) Train Loss: 0.3245, Train Steps/Sec: 17.41, Grad Norm: 0.0387 +[2025-02-23 03:45:12] (step=0337000) Train Loss: 0.3238, Train Steps/Sec: 17.38, Grad Norm: 0.0398 +[2025-02-23 03:45:18] (step=0337100) Train Loss: 0.3240, Train Steps/Sec: 17.44, Grad Norm: 0.0404 +[2025-02-23 03:45:23] (step=0337200) Train Loss: 0.3242, Train Steps/Sec: 17.37, Grad Norm: 0.0419 +[2025-02-23 03:45:29] (step=0337300) Train Loss: 0.3244, Train Steps/Sec: 17.22, Grad Norm: 0.0365 +[2025-02-23 03:45:35] (step=0337400) Train Loss: 0.3237, Train Steps/Sec: 16.51, Grad Norm: 0.0386 +[2025-02-23 03:45:41] (step=0337500) Train Loss: 0.3243, Train Steps/Sec: 17.24, Grad Norm: 0.0399 +[2025-02-23 03:45:47] (step=0337600) Train Loss: 0.3248, Train Steps/Sec: 17.38, Grad Norm: 0.0403 +[2025-02-23 03:45:52] (step=0337700) Train Loss: 0.3236, Train Steps/Sec: 17.44, Grad Norm: 0.0377 +[2025-02-23 03:45:59] (step=0337800) Train Loss: 0.3232, Train Steps/Sec: 14.40, Grad Norm: 0.0372 +[2025-02-23 03:46:05] (step=0337900) Train Loss: 0.3240, Train Steps/Sec: 17.31, Grad Norm: 0.0385 +[2025-02-23 03:46:11] (step=0338000) Train Loss: 0.3242, Train Steps/Sec: 17.35, Grad Norm: 0.0382 +[2025-02-23 03:46:17] (step=0338100) Train Loss: 0.3239, Train Steps/Sec: 17.39, Grad Norm: 0.0397 +[2025-02-23 03:46:23] (step=0338200) Train Loss: 0.3240, Train Steps/Sec: 15.91, Grad Norm: 0.0373 +[2025-02-23 03:46:30] (step=0338300) Train Loss: 0.3241, Train Steps/Sec: 13.63, Grad Norm: 0.0369 +[2025-02-23 03:46:38] (step=0338400) Train Loss: 0.3240, Train Steps/Sec: 13.52, Grad Norm: 0.0367 +[2025-02-23 03:46:44] (step=0338500) Train Loss: 0.3238, Train Steps/Sec: 16.62, Grad Norm: 0.0418 +[2025-02-23 03:46:50] (step=0338600) Train Loss: 0.3238, Train Steps/Sec: 17.34, Grad Norm: 0.0432 +[2025-02-23 03:46:55] (step=0338700) Train Loss: 0.3235, Train Steps/Sec: 17.29, Grad Norm: 0.0344 +[2025-02-23 03:47:01] (step=0338800) Train Loss: 0.3246, Train Steps/Sec: 17.31, Grad Norm: 0.0370 +[2025-02-23 03:47:07] (step=0338900) Train Loss: 0.3241, Train Steps/Sec: 17.41, Grad Norm: 0.0403 +[2025-02-23 03:47:13] (step=0339000) Train Loss: 0.3239, Train Steps/Sec: 17.33, Grad Norm: 0.0374 +[2025-02-23 03:47:20] (step=0339100) Train Loss: 0.3241, Train Steps/Sec: 14.45, Grad Norm: 0.0384 +[2025-02-23 03:47:25] (step=0339200) Train Loss: 0.3238, Train Steps/Sec: 17.37, Grad Norm: 0.0376 +[2025-02-23 03:47:31] (step=0339300) Train Loss: 0.3243, Train Steps/Sec: 16.65, Grad Norm: 0.0360 +[2025-02-23 03:47:37] (step=0339400) Train Loss: 0.3240, Train Steps/Sec: 17.32, Grad Norm: 0.0375 +[2025-02-23 03:47:43] (step=0339500) Train Loss: 0.3241, Train Steps/Sec: 17.33, Grad Norm: 0.0361 +[2025-02-23 03:47:49] (step=0339600) Train Loss: 0.3241, Train Steps/Sec: 17.30, Grad Norm: 0.0367 +[2025-02-23 03:47:54] (step=0339700) Train Loss: 0.3242, Train Steps/Sec: 17.32, Grad Norm: 0.0373 +[2025-02-23 03:48:00] (step=0339800) Train Loss: 0.3247, Train Steps/Sec: 17.27, Grad Norm: 0.0355 +[2025-02-23 03:48:06] (step=0339900) Train Loss: 0.3238, Train Steps/Sec: 17.21, Grad Norm: 0.0395 +[2025-02-23 03:48:12] (step=0340000) Train Loss: 0.3236, Train Steps/Sec: 17.17, Grad Norm: 0.0440 +[2025-02-23 03:48:18] (step=0340100) Train Loss: 0.3244, Train Steps/Sec: 17.31, Grad Norm: 0.0374 +[2025-02-23 03:48:25] (step=0340200) Train Loss: 0.3237, Train Steps/Sec: 14.15, Grad Norm: 0.0360 +[2025-02-23 03:48:32] (step=0340300) Train Loss: 0.3234, Train Steps/Sec: 12.89, Grad Norm: 0.0376 +[2025-02-23 03:48:40] (step=0340400) Train Loss: 0.3235, Train Steps/Sec: 13.49, Grad Norm: 0.0406 +[2025-02-23 03:48:46] (step=0340500) Train Loss: 0.3246, Train Steps/Sec: 16.53, Grad Norm: 0.0358 +[2025-02-23 03:48:52] (step=0340600) Train Loss: 0.3234, Train Steps/Sec: 17.24, Grad Norm: 0.0408 +[2025-02-23 03:48:58] (step=0340700) Train Loss: 0.3245, Train Steps/Sec: 17.28, Grad Norm: 0.0386 +[2025-02-23 03:49:03] (step=0340800) Train Loss: 0.3243, Train Steps/Sec: 17.28, Grad Norm: 0.0390 +[2025-02-23 03:49:09] (step=0340900) Train Loss: 0.3246, Train Steps/Sec: 17.37, Grad Norm: 0.0401 +[2025-02-23 03:49:15] (step=0341000) Train Loss: 0.3241, Train Steps/Sec: 17.42, Grad Norm: 0.0366 +[2025-02-23 03:49:21] (step=0341100) Train Loss: 0.3244, Train Steps/Sec: 17.32, Grad Norm: 0.0369 +[2025-02-23 03:49:26] (step=0341200) Train Loss: 0.3241, Train Steps/Sec: 17.33, Grad Norm: 0.0394 +[2025-02-23 03:49:32] (step=0341300) Train Loss: 0.3241, Train Steps/Sec: 16.62, Grad Norm: 0.0375 +[2025-02-23 03:49:38] (step=0341400) Train Loss: 0.3240, Train Steps/Sec: 17.37, Grad Norm: 0.0380 +[2025-02-23 03:49:44] (step=0341500) Train Loss: 0.3239, Train Steps/Sec: 17.31, Grad Norm: 0.0389 +[2025-02-23 03:49:51] (step=0341600) Train Loss: 0.3245, Train Steps/Sec: 14.52, Grad Norm: 0.0398 +[2025-02-23 03:49:57] (step=0341700) Train Loss: 0.3242, Train Steps/Sec: 17.28, Grad Norm: 0.0388 +[2025-02-23 03:50:02] (step=0341800) Train Loss: 0.3242, Train Steps/Sec: 17.23, Grad Norm: 0.0408 +[2025-02-23 03:50:08] (step=0341900) Train Loss: 0.3242, Train Steps/Sec: 17.29, Grad Norm: 0.0363 +[2025-02-23 03:50:14] (step=0342000) Train Loss: 0.3235, Train Steps/Sec: 17.27, Grad Norm: 0.0402 +[2025-02-23 03:50:20] (step=0342100) Train Loss: 0.3245, Train Steps/Sec: 16.60, Grad Norm: 0.0375 +[2025-02-23 03:50:27] (step=0342200) Train Loss: 0.3244, Train Steps/Sec: 14.18, Grad Norm: 0.0361 +[2025-02-23 03:50:34] (step=0342300) Train Loss: 0.3245, Train Steps/Sec: 15.13, Grad Norm: 0.0391 +[2025-02-23 03:50:40] (step=0342400) Train Loss: 0.3234, Train Steps/Sec: 15.07, Grad Norm: 0.0385 +[2025-02-23 03:50:47] (step=0342500) Train Loss: 0.3234, Train Steps/Sec: 15.79, Grad Norm: 0.0373 +[2025-02-23 03:50:52] (step=0342600) Train Loss: 0.3237, Train Steps/Sec: 17.22, Grad Norm: 0.0375 +[2025-02-23 03:50:58] (step=0342700) Train Loss: 0.3245, Train Steps/Sec: 17.23, Grad Norm: 0.0385 +[2025-02-23 03:51:05] (step=0342800) Train Loss: 0.3246, Train Steps/Sec: 14.41, Grad Norm: 0.0373 +[2025-02-23 03:51:11] (step=0342900) Train Loss: 0.3240, Train Steps/Sec: 17.19, Grad Norm: 0.0430 +[2025-02-23 03:51:17] (step=0343000) Train Loss: 0.3242, Train Steps/Sec: 17.17, Grad Norm: 0.0397 +[2025-02-23 03:51:23] (step=0343100) Train Loss: 0.3236, Train Steps/Sec: 17.18, Grad Norm: 0.0378 +[2025-02-23 03:51:29] (step=0343200) Train Loss: 0.3243, Train Steps/Sec: 16.46, Grad Norm: 0.0381 +[2025-02-23 03:51:35] (step=0343300) Train Loss: 0.3245, Train Steps/Sec: 17.19, Grad Norm: 0.0384 +[2025-02-23 03:51:40] (step=0343400) Train Loss: 0.3236, Train Steps/Sec: 17.25, Grad Norm: 0.0388 +[2025-02-23 03:51:46] (step=0343500) Train Loss: 0.3238, Train Steps/Sec: 17.28, Grad Norm: 0.0393 +[2025-02-23 03:51:52] (step=0343600) Train Loss: 0.3237, Train Steps/Sec: 17.29, Grad Norm: 0.0388 +[2025-02-23 03:51:58] (step=0343700) Train Loss: 0.3239, Train Steps/Sec: 17.26, Grad Norm: 0.0400 +[2025-02-23 03:52:04] (step=0343800) Train Loss: 0.3244, Train Steps/Sec: 17.25, Grad Norm: 0.0343 +[2025-02-23 03:52:09] (step=0343900) Train Loss: 0.3249, Train Steps/Sec: 17.28, Grad Norm: 0.0378 +[2025-02-23 03:52:15] (step=0344000) Train Loss: 0.3242, Train Steps/Sec: 17.18, Grad Norm: 0.0394 +[2025-02-23 03:52:23] (step=0344100) Train Loss: 0.3247, Train Steps/Sec: 12.14, Grad Norm: 0.0395 +[2025-02-23 03:52:29] (step=0344200) Train Loss: 0.3244, Train Steps/Sec: 16.40, Grad Norm: 0.0358 +[2025-02-23 03:52:36] (step=0344300) Train Loss: 0.3241, Train Steps/Sec: 14.61, Grad Norm: 0.0419 +[2025-02-23 03:52:43] (step=0344400) Train Loss: 0.3240, Train Steps/Sec: 15.20, Grad Norm: 0.0383 +[2025-02-23 03:52:49] (step=0344500) Train Loss: 0.3242, Train Steps/Sec: 15.93, Grad Norm: 0.0364 +[2025-02-23 03:52:55] (step=0344600) Train Loss: 0.3243, Train Steps/Sec: 17.26, Grad Norm: 0.0390 +[2025-02-23 03:53:01] (step=0344700) Train Loss: 0.3239, Train Steps/Sec: 17.24, Grad Norm: 0.0369 +[2025-02-23 03:53:07] (step=0344800) Train Loss: 0.3241, Train Steps/Sec: 17.21, Grad Norm: 0.0372 +[2025-02-23 03:53:12] (step=0344900) Train Loss: 0.3240, Train Steps/Sec: 17.25, Grad Norm: 0.0378 +[2025-02-23 03:53:18] (step=0345000) Train Loss: 0.3243, Train Steps/Sec: 17.23, Grad Norm: 0.0403 +[2025-02-23 03:53:24] (step=0345100) Train Loss: 0.3245, Train Steps/Sec: 17.25, Grad Norm: 0.0400 +[2025-02-23 03:53:30] (step=0345200) Train Loss: 0.3243, Train Steps/Sec: 16.45, Grad Norm: 0.0365 +[2025-02-23 03:53:37] (step=0345300) Train Loss: 0.3246, Train Steps/Sec: 14.29, Grad Norm: 0.0375 +[2025-02-23 03:53:43] (step=0345400) Train Loss: 0.3238, Train Steps/Sec: 17.34, Grad Norm: 0.0380 +[2025-02-23 03:53:49] (step=0345500) Train Loss: 0.3243, Train Steps/Sec: 17.35, Grad Norm: 0.0375 +[2025-02-23 03:53:54] (step=0345600) Train Loss: 0.3236, Train Steps/Sec: 17.21, Grad Norm: 0.0411 +[2025-02-23 03:54:00] (step=0345700) Train Loss: 0.3242, Train Steps/Sec: 17.21, Grad Norm: 0.0380 +[2025-02-23 03:54:06] (step=0345800) Train Loss: 0.3241, Train Steps/Sec: 17.14, Grad Norm: 0.0383 +[2025-02-23 03:54:12] (step=0345900) Train Loss: 0.3237, Train Steps/Sec: 17.13, Grad Norm: 0.0376 +[2025-02-23 03:54:18] (step=0346000) Train Loss: 0.3243, Train Steps/Sec: 16.35, Grad Norm: 0.0395 +[2025-02-23 03:54:25] (step=0346100) Train Loss: 0.3236, Train Steps/Sec: 14.49, Grad Norm: 0.0374 +[2025-02-23 03:54:32] (step=0346200) Train Loss: 0.3237, Train Steps/Sec: 14.94, Grad Norm: 0.0380 +[2025-02-23 03:54:38] (step=0346300) Train Loss: 0.3247, Train Steps/Sec: 15.75, Grad Norm: 0.0408 +[2025-02-23 03:54:45] (step=0346400) Train Loss: 0.3241, Train Steps/Sec: 15.03, Grad Norm: 0.0377 +[2025-02-23 03:54:51] (step=0346500) Train Loss: 0.3243, Train Steps/Sec: 15.75, Grad Norm: 0.0380 +[2025-02-23 03:54:58] (step=0346600) Train Loss: 0.3243, Train Steps/Sec: 14.34, Grad Norm: 0.0391 +[2025-02-23 03:55:04] (step=0346700) Train Loss: 0.3236, Train Steps/Sec: 17.25, Grad Norm: 0.0385 +[2025-02-23 03:55:10] (step=0346800) Train Loss: 0.3241, Train Steps/Sec: 17.23, Grad Norm: 0.0363 +[2025-02-23 03:55:15] (step=0346900) Train Loss: 0.3244, Train Steps/Sec: 17.28, Grad Norm: 0.0397 +[2025-02-23 03:55:21] (step=0347000) Train Loss: 0.3238, Train Steps/Sec: 17.22, Grad Norm: 0.0350 +[2025-02-23 03:55:27] (step=0347100) Train Loss: 0.3238, Train Steps/Sec: 16.60, Grad Norm: 0.0376 +[2025-02-23 03:55:33] (step=0347200) Train Loss: 0.3244, Train Steps/Sec: 17.21, Grad Norm: 0.0396 +[2025-02-23 03:55:39] (step=0347300) Train Loss: 0.3237, Train Steps/Sec: 17.04, Grad Norm: 0.0368 +[2025-02-23 03:55:45] (step=0347400) Train Loss: 0.3242, Train Steps/Sec: 17.03, Grad Norm: 0.0373 +[2025-02-23 03:55:51] (step=0347500) Train Loss: 0.3244, Train Steps/Sec: 17.03, Grad Norm: 0.0383 +[2025-02-23 03:55:56] (step=0347600) Train Loss: 0.3237, Train Steps/Sec: 16.99, Grad Norm: 0.0355 +[2025-02-23 03:56:02] (step=0347700) Train Loss: 0.3242, Train Steps/Sec: 17.04, Grad Norm: 0.0340 +[2025-02-23 03:56:09] (step=0347800) Train Loss: 0.3240, Train Steps/Sec: 14.27, Grad Norm: 0.0364 +[2025-02-23 03:56:15] (step=0347900) Train Loss: 0.3244, Train Steps/Sec: 16.38, Grad Norm: 0.0391 +[2025-02-23 03:56:22] (step=0348000) Train Loss: 0.3232, Train Steps/Sec: 15.20, Grad Norm: 0.0425 +[2025-02-23 03:56:28] (step=0348100) Train Loss: 0.3239, Train Steps/Sec: 17.19, Grad Norm: 0.0394 +[2025-02-23 03:56:34] (step=0348200) Train Loss: 0.3238, Train Steps/Sec: 15.13, Grad Norm: 0.0383 +[2025-02-23 03:56:41] (step=0348300) Train Loss: 0.3238, Train Steps/Sec: 15.78, Grad Norm: 0.0380 +[2025-02-23 03:56:47] (step=0348400) Train Loss: 0.3235, Train Steps/Sec: 15.08, Grad Norm: 0.0415 +[2025-02-23 03:56:54] (step=0348500) Train Loss: 0.3238, Train Steps/Sec: 15.84, Grad Norm: 0.0360 +[2025-02-23 03:57:00] (step=0348600) Train Loss: 0.3233, Train Steps/Sec: 17.29, Grad Norm: 0.0361 +[2025-02-23 03:57:05] (step=0348700) Train Loss: 0.3235, Train Steps/Sec: 17.25, Grad Norm: 0.0377 +[2025-02-23 03:57:11] (step=0348800) Train Loss: 0.3243, Train Steps/Sec: 17.19, Grad Norm: 0.0396 +[2025-02-23 03:57:17] (step=0348900) Train Loss: 0.3238, Train Steps/Sec: 17.15, Grad Norm: 0.0391 +[2025-02-23 03:57:23] (step=0349000) Train Loss: 0.3238, Train Steps/Sec: 16.39, Grad Norm: 0.0416 +[2025-02-23 03:57:30] (step=0349100) Train Loss: 0.3240, Train Steps/Sec: 14.40, Grad Norm: 0.0384 +[2025-02-23 03:57:36] (step=0349200) Train Loss: 0.3240, Train Steps/Sec: 17.24, Grad Norm: 0.0409 +[2025-02-23 03:57:42] (step=0349300) Train Loss: 0.3238, Train Steps/Sec: 17.20, Grad Norm: 0.0380 +[2025-02-23 03:57:47] (step=0349400) Train Loss: 0.3241, Train Steps/Sec: 17.32, Grad Norm: 0.0365 +[2025-02-23 03:57:53] (step=0349500) Train Loss: 0.3238, Train Steps/Sec: 17.27, Grad Norm: 0.0359 +[2025-02-23 03:57:59] (step=0349600) Train Loss: 0.3243, Train Steps/Sec: 17.24, Grad Norm: 0.0359 +[2025-02-23 03:58:05] (step=0349700) Train Loss: 0.3241, Train Steps/Sec: 17.23, Grad Norm: 0.0378 +[2025-02-23 03:58:11] (step=0349800) Train Loss: 0.3234, Train Steps/Sec: 17.27, Grad Norm: 0.0380 +[2025-02-23 03:58:17] (step=0349900) Train Loss: 0.3239, Train Steps/Sec: 15.21, Grad Norm: 0.0368 +[2025-02-23 03:58:24] (step=0350000) Train Loss: 0.3240, Train Steps/Sec: 15.84, Grad Norm: 0.0398 +[2025-02-23 03:58:25] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0350000.pt +[2025-02-23 03:58:31] (step=0350100) Train Loss: 0.3235, Train Steps/Sec: 13.14, Grad Norm: 0.0401 +[2025-02-23 03:58:37] (step=0350200) Train Loss: 0.3248, Train Steps/Sec: 16.27, Grad Norm: 0.0372 +[2025-02-23 03:58:45] (step=0350300) Train Loss: 0.3241, Train Steps/Sec: 13.10, Grad Norm: 0.0378 +[2025-02-23 03:58:52] (step=0350400) Train Loss: 0.3236, Train Steps/Sec: 14.95, Grad Norm: 0.0375 +[2025-02-23 03:58:58] (step=0350500) Train Loss: 0.3239, Train Steps/Sec: 16.35, Grad Norm: 0.0391 +[2025-02-23 03:59:04] (step=0350600) Train Loss: 0.3242, Train Steps/Sec: 16.35, Grad Norm: 0.0343 +[2025-02-23 03:59:10] (step=0350700) Train Loss: 0.3240, Train Steps/Sec: 17.03, Grad Norm: 0.0371 +[2025-02-23 03:59:16] (step=0350800) Train Loss: 0.3244, Train Steps/Sec: 17.20, Grad Norm: 0.0394 +[2025-02-23 03:59:21] (step=0350900) Train Loss: 0.3245, Train Steps/Sec: 17.28, Grad Norm: 0.0372 +[2025-02-23 03:59:27] (step=0351000) Train Loss: 0.3239, Train Steps/Sec: 16.60, Grad Norm: 0.0365 +[2025-02-23 03:59:33] (step=0351100) Train Loss: 0.3246, Train Steps/Sec: 17.30, Grad Norm: 0.0383 +[2025-02-23 03:59:39] (step=0351200) Train Loss: 0.3241, Train Steps/Sec: 17.28, Grad Norm: 0.0404 +[2025-02-23 03:59:45] (step=0351300) Train Loss: 0.3243, Train Steps/Sec: 17.27, Grad Norm: 0.0356 +[2025-02-23 03:59:50] (step=0351400) Train Loss: 0.3240, Train Steps/Sec: 17.33, Grad Norm: 0.0360 +[2025-02-23 03:59:56] (step=0351500) Train Loss: 0.3242, Train Steps/Sec: 17.22, Grad Norm: 0.0370 +[2025-02-23 04:00:03] (step=0351600) Train Loss: 0.3240, Train Steps/Sec: 14.20, Grad Norm: 0.0396 +[2025-02-23 04:00:09] (step=0351700) Train Loss: 0.3243, Train Steps/Sec: 16.98, Grad Norm: 0.0387 +[2025-02-23 04:00:15] (step=0351800) Train Loss: 0.3242, Train Steps/Sec: 16.32, Grad Norm: 0.0357 +[2025-02-23 04:00:22] (step=0351900) Train Loss: 0.3239, Train Steps/Sec: 14.49, Grad Norm: 0.0353 +[2025-02-23 04:00:28] (step=0352000) Train Loss: 0.3241, Train Steps/Sec: 16.27, Grad Norm: 0.0375 +[2025-02-23 04:00:35] (step=0352100) Train Loss: 0.3235, Train Steps/Sec: 16.28, Grad Norm: 0.0362 +[2025-02-23 04:00:41] (step=0352200) Train Loss: 0.3240, Train Steps/Sec: 16.25, Grad Norm: 0.0394 +[2025-02-23 04:00:47] (step=0352300) Train Loss: 0.3241, Train Steps/Sec: 15.75, Grad Norm: 0.0385 +[2025-02-23 04:00:54] (step=0352400) Train Loss: 0.3240, Train Steps/Sec: 14.99, Grad Norm: 0.0385 +[2025-02-23 04:01:00] (step=0352500) Train Loss: 0.3239, Train Steps/Sec: 16.35, Grad Norm: 0.0414 +[2025-02-23 04:01:06] (step=0352600) Train Loss: 0.3242, Train Steps/Sec: 16.40, Grad Norm: 0.0356 +[2025-02-23 04:01:12] (step=0352700) Train Loss: 0.3237, Train Steps/Sec: 17.09, Grad Norm: 0.0391 +[2025-02-23 04:01:19] (step=0352800) Train Loss: 0.3239, Train Steps/Sec: 14.11, Grad Norm: 0.0354 +[2025-02-23 04:01:25] (step=0352900) Train Loss: 0.3237, Train Steps/Sec: 16.42, Grad Norm: 0.0448 +[2025-02-23 04:01:31] (step=0353000) Train Loss: 0.3235, Train Steps/Sec: 17.14, Grad Norm: 0.0363 +[2025-02-23 04:01:37] (step=0353100) Train Loss: 0.3243, Train Steps/Sec: 17.14, Grad Norm: 0.0388 +[2025-02-23 04:01:43] (step=0353200) Train Loss: 0.3237, Train Steps/Sec: 17.14, Grad Norm: 0.0369 +[2025-02-23 04:01:48] (step=0353300) Train Loss: 0.3237, Train Steps/Sec: 17.21, Grad Norm: 0.0394 +[2025-02-23 04:01:54] (step=0353400) Train Loss: 0.3225, Train Steps/Sec: 17.23, Grad Norm: 0.0375 +[2025-02-23 04:02:00] (step=0353500) Train Loss: 0.3243, Train Steps/Sec: 17.26, Grad Norm: 0.0437 +[2025-02-23 04:02:06] (step=0353600) Train Loss: 0.3231, Train Steps/Sec: 17.29, Grad Norm: 0.0399 +[2025-02-23 04:02:11] (step=0353700) Train Loss: 0.3247, Train Steps/Sec: 17.29, Grad Norm: 0.0374 +[2025-02-23 04:02:18] (step=0353800) Train Loss: 0.3236, Train Steps/Sec: 15.14, Grad Norm: 0.0391 +[2025-02-23 04:02:24] (step=0353900) Train Loss: 0.3239, Train Steps/Sec: 16.48, Grad Norm: 0.0375 +[2025-02-23 04:02:31] (step=0354000) Train Loss: 0.3236, Train Steps/Sec: 15.77, Grad Norm: 0.0359 +[2025-02-23 04:02:38] (step=0354100) Train Loss: 0.3239, Train Steps/Sec: 14.07, Grad Norm: 0.0362 +[2025-02-23 04:02:44] (step=0354200) Train Loss: 0.3241, Train Steps/Sec: 16.40, Grad Norm: 0.0411 +[2025-02-23 04:02:50] (step=0354300) Train Loss: 0.3240, Train Steps/Sec: 15.77, Grad Norm: 0.0390 +[2025-02-23 04:02:57] (step=0354400) Train Loss: 0.3243, Train Steps/Sec: 15.11, Grad Norm: 0.0350 +[2025-02-23 04:03:03] (step=0354500) Train Loss: 0.3237, Train Steps/Sec: 16.54, Grad Norm: 0.0393 +[2025-02-23 04:03:09] (step=0354600) Train Loss: 0.3240, Train Steps/Sec: 16.56, Grad Norm: 0.0368 +[2025-02-23 04:03:15] (step=0354700) Train Loss: 0.3235, Train Steps/Sec: 17.25, Grad Norm: 0.0365 +[2025-02-23 04:03:20] (step=0354800) Train Loss: 0.3244, Train Steps/Sec: 17.25, Grad Norm: 0.0382 +[2025-02-23 04:03:26] (step=0354900) Train Loss: 0.3245, Train Steps/Sec: 16.50, Grad Norm: 0.0396 +[2025-02-23 04:03:32] (step=0355000) Train Loss: 0.3237, Train Steps/Sec: 17.20, Grad Norm: 0.0362 +[2025-02-23 04:03:38] (step=0355100) Train Loss: 0.3239, Train Steps/Sec: 17.18, Grad Norm: 0.0347 +[2025-02-23 04:03:44] (step=0355200) Train Loss: 0.3240, Train Steps/Sec: 17.22, Grad Norm: 0.0447 +[2025-02-23 04:03:51] (step=0355300) Train Loss: 0.3240, Train Steps/Sec: 14.22, Grad Norm: 0.0372 +[2025-02-23 04:03:57] (step=0355400) Train Loss: 0.3235, Train Steps/Sec: 17.32, Grad Norm: 0.0372 +[2025-02-23 04:04:02] (step=0355500) Train Loss: 0.3244, Train Steps/Sec: 17.26, Grad Norm: 0.0366 +[2025-02-23 04:04:08] (step=0355600) Train Loss: 0.3240, Train Steps/Sec: 17.21, Grad Norm: 0.0386 +[2025-02-23 04:04:15] (step=0355700) Train Loss: 0.3242, Train Steps/Sec: 15.80, Grad Norm: 0.0375 +[2025-02-23 04:04:21] (step=0355800) Train Loss: 0.3242, Train Steps/Sec: 15.18, Grad Norm: 0.0422 +[2025-02-23 04:04:27] (step=0355900) Train Loss: 0.3238, Train Steps/Sec: 16.44, Grad Norm: 0.0385 +[2025-02-23 04:04:33] (step=0356000) Train Loss: 0.3246, Train Steps/Sec: 16.57, Grad Norm: 0.0361 +[2025-02-23 04:04:39] (step=0356100) Train Loss: 0.3243, Train Steps/Sec: 17.28, Grad Norm: 0.0380 +[2025-02-23 04:04:45] (step=0356200) Train Loss: 0.3239, Train Steps/Sec: 16.41, Grad Norm: 0.0407 +[2025-02-23 04:04:52] (step=0356300) Train Loss: 0.3240, Train Steps/Sec: 15.79, Grad Norm: 0.0357 +[2025-02-23 04:04:58] (step=0356400) Train Loss: 0.3238, Train Steps/Sec: 15.05, Grad Norm: 0.0348 +[2025-02-23 04:05:04] (step=0356500) Train Loss: 0.3238, Train Steps/Sec: 16.44, Grad Norm: 0.0379 +[2025-02-23 04:05:12] (step=0356600) Train Loss: 0.3245, Train Steps/Sec: 13.68, Grad Norm: 0.0408 +[2025-02-23 04:05:17] (step=0356700) Train Loss: 0.3240, Train Steps/Sec: 17.15, Grad Norm: 0.0377 +[2025-02-23 04:05:24] (step=0356800) Train Loss: 0.3237, Train Steps/Sec: 16.49, Grad Norm: 0.0381 +[2025-02-23 04:05:29] (step=0356900) Train Loss: 0.3248, Train Steps/Sec: 17.18, Grad Norm: 0.0366 +[2025-02-23 04:05:35] (step=0357000) Train Loss: 0.3241, Train Steps/Sec: 17.17, Grad Norm: 0.0370 +[2025-02-23 04:05:41] (step=0357100) Train Loss: 0.3238, Train Steps/Sec: 17.43, Grad Norm: 0.0375 +[2025-02-23 04:05:47] (step=0357200) Train Loss: 0.3240, Train Steps/Sec: 17.40, Grad Norm: 0.0376 +[2025-02-23 04:05:52] (step=0357300) Train Loss: 0.3246, Train Steps/Sec: 17.38, Grad Norm: 0.0381 +[2025-02-23 04:05:58] (step=0357400) Train Loss: 0.3238, Train Steps/Sec: 17.35, Grad Norm: 0.0377 +[2025-02-23 04:06:04] (step=0357500) Train Loss: 0.3237, Train Steps/Sec: 17.26, Grad Norm: 0.0381 +[2025-02-23 04:06:10] (step=0357600) Train Loss: 0.3239, Train Steps/Sec: 17.28, Grad Norm: 0.0394 +[2025-02-23 04:06:17] (step=0357700) Train Loss: 0.3237, Train Steps/Sec: 14.70, Grad Norm: 0.0394 +[2025-02-23 04:06:24] (step=0357800) Train Loss: 0.3237, Train Steps/Sec: 13.71, Grad Norm: 0.0382 +[2025-02-23 04:06:30] (step=0357900) Train Loss: 0.3235, Train Steps/Sec: 15.87, Grad Norm: 0.0375 +[2025-02-23 04:06:36] (step=0358000) Train Loss: 0.3235, Train Steps/Sec: 17.37, Grad Norm: 0.0387 +[2025-02-23 04:06:42] (step=0358100) Train Loss: 0.3238, Train Steps/Sec: 17.38, Grad Norm: 0.0387 +[2025-02-23 04:06:48] (step=0358200) Train Loss: 0.3239, Train Steps/Sec: 16.55, Grad Norm: 0.0389 +[2025-02-23 04:06:54] (step=0358300) Train Loss: 0.3234, Train Steps/Sec: 15.94, Grad Norm: 0.0356 +[2025-02-23 04:07:01] (step=0358400) Train Loss: 0.3241, Train Steps/Sec: 15.22, Grad Norm: 0.0393 +[2025-02-23 04:07:07] (step=0358500) Train Loss: 0.3240, Train Steps/Sec: 16.61, Grad Norm: 0.0372 +[2025-02-23 04:07:13] (step=0358600) Train Loss: 0.3242, Train Steps/Sec: 16.49, Grad Norm: 0.0387 +[2025-02-23 04:07:18] (step=0358700) Train Loss: 0.3239, Train Steps/Sec: 17.23, Grad Norm: 0.0383 +[2025-02-23 04:07:25] (step=0358800) Train Loss: 0.3238, Train Steps/Sec: 16.52, Grad Norm: 0.0381 +[2025-02-23 04:07:30] (step=0358900) Train Loss: 0.3239, Train Steps/Sec: 17.22, Grad Norm: 0.0401 +[2025-02-23 04:07:36] (step=0359000) Train Loss: 0.3235, Train Steps/Sec: 17.39, Grad Norm: 0.0383 +[2025-02-23 04:07:43] (step=0359100) Train Loss: 0.3238, Train Steps/Sec: 14.29, Grad Norm: 0.0364 +[2025-02-23 04:07:49] (step=0359200) Train Loss: 0.3237, Train Steps/Sec: 17.35, Grad Norm: 0.0409 +[2025-02-23 04:07:55] (step=0359300) Train Loss: 0.3241, Train Steps/Sec: 17.27, Grad Norm: 0.0404 +[2025-02-23 04:08:00] (step=0359400) Train Loss: 0.3240, Train Steps/Sec: 17.27, Grad Norm: 0.0404 +[2025-02-23 04:08:06] (step=0359500) Train Loss: 0.3237, Train Steps/Sec: 17.25, Grad Norm: 0.0390 +[2025-02-23 04:08:13] (step=0359600) Train Loss: 0.3240, Train Steps/Sec: 15.86, Grad Norm: 0.0406 +[2025-02-23 04:08:19] (step=0359700) Train Loss: 0.3238, Train Steps/Sec: 15.27, Grad Norm: 0.0379 +[2025-02-23 04:08:25] (step=0359800) Train Loss: 0.3234, Train Steps/Sec: 15.89, Grad Norm: 0.0395 +[2025-02-23 04:08:31] (step=0359900) Train Loss: 0.3238, Train Steps/Sec: 17.47, Grad Norm: 0.0364 +[2025-02-23 04:08:37] (step=0360000) Train Loss: 0.3239, Train Steps/Sec: 17.39, Grad Norm: 0.0363 +[2025-02-23 04:08:43] (step=0360100) Train Loss: 0.3241, Train Steps/Sec: 17.41, Grad Norm: 0.0392 +[2025-02-23 04:08:49] (step=0360200) Train Loss: 0.3235, Train Steps/Sec: 16.60, Grad Norm: 0.0397 +[2025-02-23 04:08:56] (step=0360300) Train Loss: 0.3239, Train Steps/Sec: 13.37, Grad Norm: 0.0364 +[2025-02-23 04:09:03] (step=0360400) Train Loss: 0.3235, Train Steps/Sec: 15.09, Grad Norm: 0.0399 +[2025-02-23 04:09:09] (step=0360500) Train Loss: 0.3244, Train Steps/Sec: 16.52, Grad Norm: 0.0362 +[2025-02-23 04:09:15] (step=0360600) Train Loss: 0.3238, Train Steps/Sec: 16.53, Grad Norm: 0.0394 +[2025-02-23 04:09:21] (step=0360700) Train Loss: 0.3237, Train Steps/Sec: 16.57, Grad Norm: 0.0395 +[2025-02-23 04:09:27] (step=0360800) Train Loss: 0.3236, Train Steps/Sec: 17.25, Grad Norm: 0.0366 +[2025-02-23 04:09:33] (step=0360900) Train Loss: 0.3236, Train Steps/Sec: 17.21, Grad Norm: 0.0396 +[2025-02-23 04:09:38] (step=0361000) Train Loss: 0.3237, Train Steps/Sec: 17.22, Grad Norm: 0.0397 +[2025-02-23 04:09:44] (step=0361100) Train Loss: 0.3239, Train Steps/Sec: 17.28, Grad Norm: 0.0378 +[2025-02-23 04:09:50] (step=0361200) Train Loss: 0.3241, Train Steps/Sec: 17.27, Grad Norm: 0.0356 +[2025-02-23 04:09:56] (step=0361300) Train Loss: 0.3237, Train Steps/Sec: 17.28, Grad Norm: 0.0377 +[2025-02-23 04:10:02] (step=0361400) Train Loss: 0.3237, Train Steps/Sec: 17.23, Grad Norm: 0.0397 +[2025-02-23 04:10:08] (step=0361500) Train Loss: 0.3240, Train Steps/Sec: 16.54, Grad Norm: 0.0396 +[2025-02-23 04:10:15] (step=0361600) Train Loss: 0.3240, Train Steps/Sec: 13.33, Grad Norm: 0.0369 +[2025-02-23 04:10:21] (step=0361700) Train Loss: 0.3238, Train Steps/Sec: 16.50, Grad Norm: 0.0384 +[2025-02-23 04:10:27] (step=0361800) Train Loss: 0.3233, Train Steps/Sec: 15.79, Grad Norm: 0.0410 +[2025-02-23 04:10:33] (step=0361900) Train Loss: 0.3244, Train Steps/Sec: 17.28, Grad Norm: 0.0358 +[2025-02-23 04:10:39] (step=0362000) Train Loss: 0.3248, Train Steps/Sec: 17.27, Grad Norm: 0.0374 +[2025-02-23 04:10:45] (step=0362100) Train Loss: 0.3242, Train Steps/Sec: 17.24, Grad Norm: 0.0360 +[2025-02-23 04:10:51] (step=0362200) Train Loss: 0.3241, Train Steps/Sec: 16.42, Grad Norm: 0.0347 +[2025-02-23 04:10:57] (step=0362300) Train Loss: 0.3244, Train Steps/Sec: 15.78, Grad Norm: 0.0427 +[2025-02-23 04:11:04] (step=0362400) Train Loss: 0.3242, Train Steps/Sec: 15.05, Grad Norm: 0.0377 +[2025-02-23 04:11:10] (step=0362500) Train Loss: 0.3237, Train Steps/Sec: 16.52, Grad Norm: 0.0404 +[2025-02-23 04:11:16] (step=0362600) Train Loss: 0.3239, Train Steps/Sec: 16.54, Grad Norm: 0.0363 +[2025-02-23 04:11:22] (step=0362700) Train Loss: 0.3238, Train Steps/Sec: 16.42, Grad Norm: 0.0409 +[2025-02-23 04:11:29] (step=0362800) Train Loss: 0.3233, Train Steps/Sec: 14.40, Grad Norm: 0.0380 +[2025-02-23 04:11:35] (step=0362900) Train Loss: 0.3235, Train Steps/Sec: 17.19, Grad Norm: 0.0369 +[2025-02-23 04:11:41] (step=0363000) Train Loss: 0.3239, Train Steps/Sec: 17.17, Grad Norm: 0.0415 +[2025-02-23 04:11:47] (step=0363100) Train Loss: 0.3230, Train Steps/Sec: 17.16, Grad Norm: 0.0393 +[2025-02-23 04:11:52] (step=0363200) Train Loss: 0.3242, Train Steps/Sec: 17.15, Grad Norm: 0.0397 +[2025-02-23 04:11:58] (step=0363300) Train Loss: 0.3237, Train Steps/Sec: 17.20, Grad Norm: 0.0402 +[2025-02-23 04:12:04] (step=0363400) Train Loss: 0.3241, Train Steps/Sec: 17.15, Grad Norm: 0.0397 +[2025-02-23 04:12:11] (step=0363500) Train Loss: 0.3239, Train Steps/Sec: 15.10, Grad Norm: 0.0416 +[2025-02-23 04:12:17] (step=0363600) Train Loss: 0.3235, Train Steps/Sec: 15.79, Grad Norm: 0.0347 +[2025-02-23 04:12:23] (step=0363700) Train Loss: 0.3240, Train Steps/Sec: 15.70, Grad Norm: 0.0381 +[2025-02-23 04:12:29] (step=0363800) Train Loss: 0.3236, Train Steps/Sec: 17.23, Grad Norm: 0.0364 +[2025-02-23 04:12:35] (step=0363900) Train Loss: 0.3240, Train Steps/Sec: 17.15, Grad Norm: 0.0388 +[2025-02-23 04:12:41] (step=0364000) Train Loss: 0.3234, Train Steps/Sec: 17.14, Grad Norm: 0.0384 +[2025-02-23 04:12:48] (step=0364100) Train Loss: 0.3236, Train Steps/Sec: 14.21, Grad Norm: 0.0374 +[2025-02-23 04:12:54] (step=0364200) Train Loss: 0.3239, Train Steps/Sec: 16.35, Grad Norm: 0.0392 +[2025-02-23 04:13:00] (step=0364300) Train Loss: 0.3243, Train Steps/Sec: 15.75, Grad Norm: 0.0353 +[2025-02-23 04:13:07] (step=0364400) Train Loss: 0.3236, Train Steps/Sec: 15.69, Grad Norm: 0.0416 +[2025-02-23 04:13:13] (step=0364500) Train Loss: 0.3236, Train Steps/Sec: 15.73, Grad Norm: 0.0366 +[2025-02-23 04:13:19] (step=0364600) Train Loss: 0.3238, Train Steps/Sec: 15.88, Grad Norm: 0.0388 +[2025-02-23 04:13:25] (step=0364700) Train Loss: 0.3240, Train Steps/Sec: 17.26, Grad Norm: 0.0379 +[2025-02-23 04:13:31] (step=0364800) Train Loss: 0.3234, Train Steps/Sec: 17.29, Grad Norm: 0.0375 +[2025-02-23 04:13:37] (step=0364900) Train Loss: 0.3235, Train Steps/Sec: 17.22, Grad Norm: 0.0366 +[2025-02-23 04:13:43] (step=0365000) Train Loss: 0.3238, Train Steps/Sec: 17.25, Grad Norm: 0.0365 +[2025-02-23 04:13:48] (step=0365100) Train Loss: 0.3236, Train Steps/Sec: 17.20, Grad Norm: 0.0380 +[2025-02-23 04:13:54] (step=0365200) Train Loss: 0.3239, Train Steps/Sec: 17.18, Grad Norm: 0.0377 +[2025-02-23 04:14:01] (step=0365300) Train Loss: 0.3234, Train Steps/Sec: 14.30, Grad Norm: 0.0380 +[2025-02-23 04:14:07] (step=0365400) Train Loss: 0.3240, Train Steps/Sec: 16.39, Grad Norm: 0.0373 +[2025-02-23 04:14:14] (step=0365500) Train Loss: 0.3237, Train Steps/Sec: 14.54, Grad Norm: 0.0389 +[2025-02-23 04:14:20] (step=0365600) Train Loss: 0.3230, Train Steps/Sec: 16.47, Grad Norm: 0.0356 +[2025-02-23 04:14:26] (step=0365700) Train Loss: 0.3237, Train Steps/Sec: 16.40, Grad Norm: 0.0379 +[2025-02-23 04:14:32] (step=0365800) Train Loss: 0.3234, Train Steps/Sec: 17.24, Grad Norm: 0.0381 +[2025-02-23 04:14:38] (step=0365900) Train Loss: 0.3241, Train Steps/Sec: 17.26, Grad Norm: 0.0375 +[2025-02-23 04:14:44] (step=0366000) Train Loss: 0.3236, Train Steps/Sec: 17.22, Grad Norm: 0.0384 +[2025-02-23 04:14:50] (step=0366100) Train Loss: 0.3236, Train Steps/Sec: 17.22, Grad Norm: 0.0399 +[2025-02-23 04:14:56] (step=0366200) Train Loss: 0.3240, Train Steps/Sec: 16.40, Grad Norm: 0.0438 +[2025-02-23 04:15:02] (step=0366300) Train Loss: 0.3240, Train Steps/Sec: 15.80, Grad Norm: 0.0423 +[2025-02-23 04:15:08] (step=0366400) Train Loss: 0.3240, Train Steps/Sec: 16.52, Grad Norm: 0.0363 +[2025-02-23 04:15:15] (step=0366500) Train Loss: 0.3238, Train Steps/Sec: 15.15, Grad Norm: 0.0390 +[2025-02-23 04:15:22] (step=0366600) Train Loss: 0.3242, Train Steps/Sec: 13.36, Grad Norm: 0.0396 +[2025-02-23 04:15:28] (step=0366700) Train Loss: 0.3237, Train Steps/Sec: 17.03, Grad Norm: 0.0375 +[2025-02-23 04:15:34] (step=0366800) Train Loss: 0.3241, Train Steps/Sec: 17.05, Grad Norm: 0.0373 +[2025-02-23 04:15:40] (step=0366900) Train Loss: 0.3237, Train Steps/Sec: 17.05, Grad Norm: 0.0387 +[2025-02-23 04:15:46] (step=0367000) Train Loss: 0.3239, Train Steps/Sec: 17.09, Grad Norm: 0.0362 +[2025-02-23 04:15:51] (step=0367100) Train Loss: 0.3239, Train Steps/Sec: 17.08, Grad Norm: 0.0413 +[2025-02-23 04:15:57] (step=0367200) Train Loss: 0.3243, Train Steps/Sec: 17.08, Grad Norm: 0.0398 +[2025-02-23 04:16:03] (step=0367300) Train Loss: 0.3237, Train Steps/Sec: 17.04, Grad Norm: 0.0370 +[2025-02-23 04:16:10] (step=0367400) Train Loss: 0.3243, Train Steps/Sec: 14.48, Grad Norm: 0.0378 +[2025-02-23 04:16:16] (step=0367500) Train Loss: 0.3237, Train Steps/Sec: 16.29, Grad Norm: 0.0370 +[2025-02-23 04:16:23] (step=0367600) Train Loss: 0.3239, Train Steps/Sec: 15.62, Grad Norm: 0.0391 +[2025-02-23 04:16:28] (step=0367700) Train Loss: 0.3239, Train Steps/Sec: 17.05, Grad Norm: 0.0410 +[2025-02-23 04:16:36] (step=0367800) Train Loss: 0.3245, Train Steps/Sec: 14.19, Grad Norm: 0.0356 +[2025-02-23 04:16:41] (step=0367900) Train Loss: 0.3236, Train Steps/Sec: 16.92, Grad Norm: 0.0391 +[2025-02-23 04:16:47] (step=0368000) Train Loss: 0.3244, Train Steps/Sec: 17.02, Grad Norm: 0.0370 +[2025-02-23 04:16:53] (step=0368100) Train Loss: 0.3235, Train Steps/Sec: 17.02, Grad Norm: 0.0401 +[2025-02-23 04:16:59] (step=0368200) Train Loss: 0.3239, Train Steps/Sec: 16.24, Grad Norm: 0.0338 +[2025-02-23 04:17:06] (step=0368300) Train Loss: 0.3237, Train Steps/Sec: 15.57, Grad Norm: 0.0383 +[2025-02-23 04:17:12] (step=0368400) Train Loss: 0.3239, Train Steps/Sec: 16.18, Grad Norm: 0.0353 +[2025-02-23 04:17:19] (step=0368500) Train Loss: 0.3236, Train Steps/Sec: 14.96, Grad Norm: 0.0378 +[2025-02-23 04:17:25] (step=0368600) Train Loss: 0.3236, Train Steps/Sec: 16.25, Grad Norm: 0.0378 +[2025-02-23 04:17:31] (step=0368700) Train Loss: 0.3240, Train Steps/Sec: 16.99, Grad Norm: 0.0372 +[2025-02-23 04:17:37] (step=0368800) Train Loss: 0.3242, Train Steps/Sec: 17.00, Grad Norm: 0.0344 +[2025-02-23 04:17:42] (step=0368900) Train Loss: 0.3241, Train Steps/Sec: 16.99, Grad Norm: 0.0377 +[2025-02-23 04:17:48] (step=0369000) Train Loss: 0.3240, Train Steps/Sec: 16.99, Grad Norm: 0.0389 +[2025-02-23 04:17:55] (step=0369100) Train Loss: 0.3240, Train Steps/Sec: 14.22, Grad Norm: 0.0400 +[2025-02-23 04:18:01] (step=0369200) Train Loss: 0.3238, Train Steps/Sec: 17.09, Grad Norm: 0.0363 +[2025-02-23 04:18:07] (step=0369300) Train Loss: 0.3235, Train Steps/Sec: 16.28, Grad Norm: 0.0404 +[2025-02-23 04:18:14] (step=0369400) Train Loss: 0.3232, Train Steps/Sec: 14.55, Grad Norm: 0.0368 +[2025-02-23 04:18:20] (step=0369500) Train Loss: 0.3235, Train Steps/Sec: 16.38, Grad Norm: 0.0367 +[2025-02-23 04:18:26] (step=0369600) Train Loss: 0.3240, Train Steps/Sec: 16.39, Grad Norm: 0.0393 +[2025-02-23 04:18:32] (step=0369700) Train Loss: 0.3234, Train Steps/Sec: 17.15, Grad Norm: 0.0379 +[2025-02-23 04:18:38] (step=0369800) Train Loss: 0.3239, Train Steps/Sec: 17.08, Grad Norm: 0.0396 +[2025-02-23 04:18:44] (step=0369900) Train Loss: 0.3238, Train Steps/Sec: 17.09, Grad Norm: 0.0376 +[2025-02-23 04:18:50] (step=0370000) Train Loss: 0.3242, Train Steps/Sec: 17.07, Grad Norm: 0.0399 +[2025-02-23 04:18:56] (step=0370100) Train Loss: 0.3236, Train Steps/Sec: 17.03, Grad Norm: 0.0381 +[2025-02-23 04:19:02] (step=0370200) Train Loss: 0.3239, Train Steps/Sec: 16.32, Grad Norm: 0.0391 +[2025-02-23 04:19:10] (step=0370300) Train Loss: 0.3244, Train Steps/Sec: 13.11, Grad Norm: 0.0398 +[2025-02-23 04:19:16] (step=0370400) Train Loss: 0.3240, Train Steps/Sec: 16.29, Grad Norm: 0.0344 +[2025-02-23 04:19:23] (step=0370500) Train Loss: 0.3237, Train Steps/Sec: 14.39, Grad Norm: 0.0375 +[2025-02-23 04:19:29] (step=0370600) Train Loss: 0.3235, Train Steps/Sec: 16.37, Grad Norm: 0.0362 +[2025-02-23 04:19:35] (step=0370700) Train Loss: 0.3236, Train Steps/Sec: 17.09, Grad Norm: 0.0386 +[2025-02-23 04:19:40] (step=0370800) Train Loss: 0.3238, Train Steps/Sec: 17.06, Grad Norm: 0.0393 +[2025-02-23 04:19:46] (step=0370900) Train Loss: 0.3240, Train Steps/Sec: 17.10, Grad Norm: 0.0386 +[2025-02-23 04:19:52] (step=0371000) Train Loss: 0.3246, Train Steps/Sec: 17.08, Grad Norm: 0.0358 +[2025-02-23 04:19:58] (step=0371100) Train Loss: 0.3243, Train Steps/Sec: 17.09, Grad Norm: 0.0383 +[2025-02-23 04:20:04] (step=0371200) Train Loss: 0.3239, Train Steps/Sec: 17.08, Grad Norm: 0.0367 +[2025-02-23 04:20:11] (step=0371300) Train Loss: 0.3237, Train Steps/Sec: 15.02, Grad Norm: 0.0378 +[2025-02-23 04:20:17] (step=0371400) Train Loss: 0.3244, Train Steps/Sec: 15.71, Grad Norm: 0.0398 +[2025-02-23 04:20:23] (step=0371500) Train Loss: 0.3240, Train Steps/Sec: 15.54, Grad Norm: 0.0388 +[2025-02-23 04:20:30] (step=0371600) Train Loss: 0.3240, Train Steps/Sec: 13.99, Grad Norm: 0.0390 +[2025-02-23 04:20:36] (step=0371700) Train Loss: 0.3237, Train Steps/Sec: 17.02, Grad Norm: 0.0377 +[2025-02-23 04:20:42] (step=0371800) Train Loss: 0.3236, Train Steps/Sec: 17.17, Grad Norm: 0.0382 +[2025-02-23 04:20:48] (step=0371900) Train Loss: 0.3240, Train Steps/Sec: 17.33, Grad Norm: 0.0415 +[2025-02-23 04:20:54] (step=0372000) Train Loss: 0.3238, Train Steps/Sec: 17.33, Grad Norm: 0.0348 +[2025-02-23 04:20:59] (step=0372100) Train Loss: 0.3235, Train Steps/Sec: 17.29, Grad Norm: 0.0374 +[2025-02-23 04:21:05] (step=0372200) Train Loss: 0.3236, Train Steps/Sec: 17.37, Grad Norm: 0.0375 +[2025-02-23 04:21:12] (step=0372300) Train Loss: 0.3231, Train Steps/Sec: 15.09, Grad Norm: 0.0366 +[2025-02-23 04:21:18] (step=0372400) Train Loss: 0.3234, Train Steps/Sec: 16.68, Grad Norm: 0.0369 +[2025-02-23 04:21:25] (step=0372500) Train Loss: 0.3237, Train Steps/Sec: 14.53, Grad Norm: 0.0375 +[2025-02-23 04:21:31] (step=0372600) Train Loss: 0.3231, Train Steps/Sec: 16.49, Grad Norm: 0.0384 +[2025-02-23 04:21:37] (step=0372700) Train Loss: 0.3234, Train Steps/Sec: 17.17, Grad Norm: 0.0412 +[2025-02-23 04:21:44] (step=0372800) Train Loss: 0.3241, Train Steps/Sec: 14.03, Grad Norm: 0.0378 +[2025-02-23 04:21:50] (step=0372900) Train Loss: 0.3231, Train Steps/Sec: 16.95, Grad Norm: 0.0385 +[2025-02-23 04:21:56] (step=0373000) Train Loss: 0.3238, Train Steps/Sec: 17.12, Grad Norm: 0.0354 +[2025-02-23 04:22:01] (step=0373100) Train Loss: 0.3233, Train Steps/Sec: 17.14, Grad Norm: 0.0391 +[2025-02-23 04:22:07] (step=0373200) Train Loss: 0.3239, Train Steps/Sec: 16.44, Grad Norm: 0.0362 +[2025-02-23 04:22:14] (step=0373300) Train Loss: 0.3240, Train Steps/Sec: 14.56, Grad Norm: 0.0373 +[2025-02-23 04:22:20] (step=0373400) Train Loss: 0.3239, Train Steps/Sec: 16.44, Grad Norm: 0.0372 +[2025-02-23 04:22:26] (step=0373500) Train Loss: 0.3239, Train Steps/Sec: 16.49, Grad Norm: 0.0354 +[2025-02-23 04:22:32] (step=0373600) Train Loss: 0.3240, Train Steps/Sec: 17.24, Grad Norm: 0.0350 +[2025-02-23 04:22:38] (step=0373700) Train Loss: 0.3242, Train Steps/Sec: 17.22, Grad Norm: 0.0400 +[2025-02-23 04:22:44] (step=0373800) Train Loss: 0.3236, Train Steps/Sec: 17.24, Grad Norm: 0.0382 +[2025-02-23 04:22:50] (step=0373900) Train Loss: 0.3242, Train Steps/Sec: 17.18, Grad Norm: 0.0383 +[2025-02-23 04:22:56] (step=0374000) Train Loss: 0.3235, Train Steps/Sec: 17.16, Grad Norm: 0.0374 +[2025-02-23 04:23:03] (step=0374100) Train Loss: 0.3237, Train Steps/Sec: 14.02, Grad Norm: 0.0374 +[2025-02-23 04:23:09] (step=0374200) Train Loss: 0.3236, Train Steps/Sec: 17.09, Grad Norm: 0.0394 +[2025-02-23 04:23:15] (step=0374300) Train Loss: 0.3240, Train Steps/Sec: 14.51, Grad Norm: 0.0383 +[2025-02-23 04:23:21] (step=0374400) Train Loss: 0.3243, Train Steps/Sec: 17.11, Grad Norm: 0.0402 +[2025-02-23 04:23:28] (step=0374500) Train Loss: 0.3241, Train Steps/Sec: 14.43, Grad Norm: 0.0364 +[2025-02-23 04:23:34] (step=0374600) Train Loss: 0.3238, Train Steps/Sec: 16.53, Grad Norm: 0.0367 +[2025-02-23 04:23:40] (step=0374700) Train Loss: 0.3236, Train Steps/Sec: 17.28, Grad Norm: 0.0362 +[2025-02-23 04:23:46] (step=0374800) Train Loss: 0.3237, Train Steps/Sec: 17.23, Grad Norm: 0.0401 +[2025-02-23 04:23:52] (step=0374900) Train Loss: 0.3236, Train Steps/Sec: 17.31, Grad Norm: 0.0377 +[2025-02-23 04:23:57] (step=0375000) Train Loss: 0.3238, Train Steps/Sec: 17.35, Grad Norm: 0.0379 +[2025-02-23 04:24:03] (step=0375100) Train Loss: 0.3238, Train Steps/Sec: 17.32, Grad Norm: 0.0385 +[2025-02-23 04:24:10] (step=0375200) Train Loss: 0.3240, Train Steps/Sec: 14.61, Grad Norm: 0.0385 +[2025-02-23 04:24:17] (step=0375300) Train Loss: 0.3243, Train Steps/Sec: 14.93, Grad Norm: 0.0393 +[2025-02-23 04:24:24] (step=0375400) Train Loss: 0.3240, Train Steps/Sec: 14.14, Grad Norm: 0.0421 +[2025-02-23 04:24:30] (step=0375500) Train Loss: 0.3235, Train Steps/Sec: 16.96, Grad Norm: 0.0404 +[2025-02-23 04:24:36] (step=0375600) Train Loss: 0.3238, Train Steps/Sec: 16.99, Grad Norm: 0.0391 +[2025-02-23 04:24:41] (step=0375700) Train Loss: 0.3238, Train Steps/Sec: 17.04, Grad Norm: 0.0345 +[2025-02-23 04:24:47] (step=0375800) Train Loss: 0.3234, Train Steps/Sec: 17.06, Grad Norm: 0.0352 +[2025-02-23 04:24:53] (step=0375900) Train Loss: 0.3234, Train Steps/Sec: 17.07, Grad Norm: 0.0389 +[2025-02-23 04:24:59] (step=0376000) Train Loss: 0.3239, Train Steps/Sec: 17.12, Grad Norm: 0.0383 +[2025-02-23 04:25:05] (step=0376100) Train Loss: 0.3240, Train Steps/Sec: 17.10, Grad Norm: 0.0381 +[2025-02-23 04:25:11] (step=0376200) Train Loss: 0.3239, Train Steps/Sec: 17.19, Grad Norm: 0.0409 +[2025-02-23 04:25:18] (step=0376300) Train Loss: 0.3238, Train Steps/Sec: 14.45, Grad Norm: 0.0357 +[2025-02-23 04:25:23] (step=0376400) Train Loss: 0.3241, Train Steps/Sec: 17.12, Grad Norm: 0.0406 +[2025-02-23 04:25:30] (step=0376500) Train Loss: 0.3234, Train Steps/Sec: 14.37, Grad Norm: 0.0362 +[2025-02-23 04:25:38] (step=0376600) Train Loss: 0.3234, Train Steps/Sec: 13.58, Grad Norm: 0.0390 +[2025-02-23 04:25:44] (step=0376700) Train Loss: 0.3233, Train Steps/Sec: 17.28, Grad Norm: 0.0382 +[2025-02-23 04:25:49] (step=0376800) Train Loss: 0.3236, Train Steps/Sec: 17.25, Grad Norm: 0.0388 +[2025-02-23 04:25:55] (step=0376900) Train Loss: 0.3246, Train Steps/Sec: 17.27, Grad Norm: 0.0371 +[2025-02-23 04:26:01] (step=0377000) Train Loss: 0.3232, Train Steps/Sec: 17.26, Grad Norm: 0.0399 +[2025-02-23 04:26:08] (step=0377100) Train Loss: 0.3239, Train Steps/Sec: 15.16, Grad Norm: 0.0357 +[2025-02-23 04:26:14] (step=0377200) Train Loss: 0.3235, Train Steps/Sec: 15.96, Grad Norm: 0.0376 +[2025-02-23 04:26:20] (step=0377300) Train Loss: 0.3241, Train Steps/Sec: 16.58, Grad Norm: 0.0370 +[2025-02-23 04:26:26] (step=0377400) Train Loss: 0.3233, Train Steps/Sec: 16.57, Grad Norm: 0.0401 +[2025-02-23 04:26:32] (step=0377500) Train Loss: 0.3237, Train Steps/Sec: 17.29, Grad Norm: 0.0390 +[2025-02-23 04:26:37] (step=0377600) Train Loss: 0.3237, Train Steps/Sec: 17.24, Grad Norm: 0.0387 +[2025-02-23 04:26:43] (step=0377700) Train Loss: 0.3237, Train Steps/Sec: 17.20, Grad Norm: 0.0363 +[2025-02-23 04:26:49] (step=0377800) Train Loss: 0.3235, Train Steps/Sec: 17.20, Grad Norm: 0.0361 +[2025-02-23 04:26:56] (step=0377900) Train Loss: 0.3235, Train Steps/Sec: 13.98, Grad Norm: 0.0384 +[2025-02-23 04:27:02] (step=0378000) Train Loss: 0.3240, Train Steps/Sec: 17.18, Grad Norm: 0.0368 +[2025-02-23 04:27:08] (step=0378100) Train Loss: 0.3236, Train Steps/Sec: 17.27, Grad Norm: 0.0380 +[2025-02-23 04:27:14] (step=0378200) Train Loss: 0.3239, Train Steps/Sec: 17.30, Grad Norm: 0.0391 +[2025-02-23 04:27:20] (step=0378300) Train Loss: 0.3235, Train Steps/Sec: 14.67, Grad Norm: 0.0371 +[2025-02-23 04:27:26] (step=0378400) Train Loss: 0.3236, Train Steps/Sec: 17.30, Grad Norm: 0.0362 +[2025-02-23 04:27:33] (step=0378500) Train Loss: 0.3236, Train Steps/Sec: 14.52, Grad Norm: 0.0355 +[2025-02-23 04:27:39] (step=0378600) Train Loss: 0.3236, Train Steps/Sec: 16.47, Grad Norm: 0.0354 +[2025-02-23 04:27:45] (step=0378700) Train Loss: 0.3234, Train Steps/Sec: 17.25, Grad Norm: 0.0374 +[2025-02-23 04:27:51] (step=0378800) Train Loss: 0.3239, Train Steps/Sec: 17.27, Grad Norm: 0.0394 +[2025-02-23 04:27:57] (step=0378900) Train Loss: 0.3239, Train Steps/Sec: 17.32, Grad Norm: 0.0353 +[2025-02-23 04:28:02] (step=0379000) Train Loss: 0.3243, Train Steps/Sec: 17.29, Grad Norm: 0.0384 +[2025-02-23 04:28:11] (step=0379100) Train Loss: 0.3232, Train Steps/Sec: 12.00, Grad Norm: 0.0373 +[2025-02-23 04:28:17] (step=0379200) Train Loss: 0.3238, Train Steps/Sec: 17.34, Grad Norm: 0.0366 +[2025-02-23 04:28:23] (step=0379300) Train Loss: 0.3238, Train Steps/Sec: 15.93, Grad Norm: 0.0361 +[2025-02-23 04:28:29] (step=0379400) Train Loss: 0.3236, Train Steps/Sec: 17.33, Grad Norm: 0.0374 +[2025-02-23 04:28:34] (step=0379500) Train Loss: 0.3244, Train Steps/Sec: 17.29, Grad Norm: 0.0365 +[2025-02-23 04:28:40] (step=0379600) Train Loss: 0.3236, Train Steps/Sec: 17.30, Grad Norm: 0.0389 +[2025-02-23 04:28:46] (step=0379700) Train Loss: 0.3242, Train Steps/Sec: 17.24, Grad Norm: 0.0348 +[2025-02-23 04:28:52] (step=0379800) Train Loss: 0.3242, Train Steps/Sec: 17.25, Grad Norm: 0.0405 +[2025-02-23 04:28:58] (step=0379900) Train Loss: 0.3233, Train Steps/Sec: 17.26, Grad Norm: 0.0376 +[2025-02-23 04:29:03] (step=0380000) Train Loss: 0.3240, Train Steps/Sec: 17.28, Grad Norm: 0.0365 +[2025-02-23 04:29:09] (step=0380100) Train Loss: 0.3236, Train Steps/Sec: 17.30, Grad Norm: 0.0363 +[2025-02-23 04:29:15] (step=0380200) Train Loss: 0.3234, Train Steps/Sec: 16.53, Grad Norm: 0.0378 +[2025-02-23 04:29:22] (step=0380300) Train Loss: 0.3232, Train Steps/Sec: 15.23, Grad Norm: 0.0357 +[2025-02-23 04:29:29] (step=0380400) Train Loss: 0.3238, Train Steps/Sec: 14.34, Grad Norm: 0.0360 +[2025-02-23 04:29:36] (step=0380500) Train Loss: 0.3236, Train Steps/Sec: 14.49, Grad Norm: 0.0367 +[2025-02-23 04:29:42] (step=0380600) Train Loss: 0.3235, Train Steps/Sec: 16.63, Grad Norm: 0.0365 +[2025-02-23 04:29:47] (step=0380700) Train Loss: 0.3237, Train Steps/Sec: 17.37, Grad Norm: 0.0358 +[2025-02-23 04:29:53] (step=0380800) Train Loss: 0.3234, Train Steps/Sec: 17.35, Grad Norm: 0.0347 +[2025-02-23 04:29:59] (step=0380900) Train Loss: 0.3239, Train Steps/Sec: 17.30, Grad Norm: 0.0383 +[2025-02-23 04:30:06] (step=0381000) Train Loss: 0.3244, Train Steps/Sec: 15.14, Grad Norm: 0.0378 +[2025-02-23 04:30:12] (step=0381100) Train Loss: 0.3234, Train Steps/Sec: 15.87, Grad Norm: 0.0363 +[2025-02-23 04:30:18] (step=0381200) Train Loss: 0.3238, Train Steps/Sec: 15.91, Grad Norm: 0.0356 +[2025-02-23 04:30:24] (step=0381300) Train Loss: 0.3237, Train Steps/Sec: 17.32, Grad Norm: 0.0391 +[2025-02-23 04:30:30] (step=0381400) Train Loss: 0.3236, Train Steps/Sec: 17.31, Grad Norm: 0.0390 +[2025-02-23 04:30:35] (step=0381500) Train Loss: 0.3230, Train Steps/Sec: 17.37, Grad Norm: 0.0392 +[2025-02-23 04:30:42] (step=0381600) Train Loss: 0.3239, Train Steps/Sec: 14.23, Grad Norm: 0.0370 +[2025-02-23 04:30:48] (step=0381700) Train Loss: 0.3237, Train Steps/Sec: 17.21, Grad Norm: 0.0375 +[2025-02-23 04:30:54] (step=0381800) Train Loss: 0.3235, Train Steps/Sec: 17.28, Grad Norm: 0.0375 +[2025-02-23 04:31:00] (step=0381900) Train Loss: 0.3240, Train Steps/Sec: 17.26, Grad Norm: 0.0401 +[2025-02-23 04:31:06] (step=0382000) Train Loss: 0.3237, Train Steps/Sec: 17.30, Grad Norm: 0.0346 +[2025-02-23 04:31:11] (step=0382100) Train Loss: 0.3232, Train Steps/Sec: 17.26, Grad Norm: 0.0368 +[2025-02-23 04:31:18] (step=0382200) Train Loss: 0.3236, Train Steps/Sec: 16.45, Grad Norm: 0.0397 +[2025-02-23 04:31:24] (step=0382300) Train Loss: 0.3232, Train Steps/Sec: 15.12, Grad Norm: 0.0368 +[2025-02-23 04:31:30] (step=0382400) Train Loss: 0.3246, Train Steps/Sec: 17.16, Grad Norm: 0.0346 +[2025-02-23 04:31:37] (step=0382500) Train Loss: 0.3234, Train Steps/Sec: 14.42, Grad Norm: 0.0376 +[2025-02-23 04:31:43] (step=0382600) Train Loss: 0.3240, Train Steps/Sec: 16.45, Grad Norm: 0.0383 +[2025-02-23 04:31:49] (step=0382700) Train Loss: 0.3243, Train Steps/Sec: 17.15, Grad Norm: 0.0370 +[2025-02-23 04:31:55] (step=0382800) Train Loss: 0.3233, Train Steps/Sec: 17.09, Grad Norm: 0.0366 +[2025-02-23 04:32:02] (step=0382900) Train Loss: 0.3238, Train Steps/Sec: 14.15, Grad Norm: 0.0371 +[2025-02-23 04:32:09] (step=0383000) Train Loss: 0.3232, Train Steps/Sec: 13.91, Grad Norm: 0.0385 +[2025-02-23 04:32:15] (step=0383100) Train Loss: 0.3239, Train Steps/Sec: 17.21, Grad Norm: 0.0382 +[2025-02-23 04:32:21] (step=0383200) Train Loss: 0.3238, Train Steps/Sec: 15.76, Grad Norm: 0.0378 +[2025-02-23 04:32:27] (step=0383300) Train Loss: 0.3243, Train Steps/Sec: 17.18, Grad Norm: 0.0360 +[2025-02-23 04:32:33] (step=0383400) Train Loss: 0.3238, Train Steps/Sec: 17.24, Grad Norm: 0.0395 +[2025-02-23 04:32:39] (step=0383500) Train Loss: 0.3241, Train Steps/Sec: 17.19, Grad Norm: 0.0362 +[2025-02-23 04:32:44] (step=0383600) Train Loss: 0.3239, Train Steps/Sec: 17.19, Grad Norm: 0.0360 +[2025-02-23 04:32:50] (step=0383700) Train Loss: 0.3232, Train Steps/Sec: 17.19, Grad Norm: 0.0392 +[2025-02-23 04:32:56] (step=0383800) Train Loss: 0.3235, Train Steps/Sec: 17.22, Grad Norm: 0.0364 +[2025-02-23 04:33:02] (step=0383900) Train Loss: 0.3230, Train Steps/Sec: 17.24, Grad Norm: 0.0400 +[2025-02-23 04:33:08] (step=0384000) Train Loss: 0.3239, Train Steps/Sec: 17.24, Grad Norm: 0.0372 +[2025-02-23 04:33:15] (step=0384100) Train Loss: 0.3227, Train Steps/Sec: 13.36, Grad Norm: 0.0376 +[2025-02-23 04:33:21] (step=0384200) Train Loss: 0.3236, Train Steps/Sec: 17.10, Grad Norm: 0.0381 +[2025-02-23 04:33:28] (step=0384300) Train Loss: 0.3230, Train Steps/Sec: 15.08, Grad Norm: 0.0363 +[2025-02-23 04:33:33] (step=0384400) Train Loss: 0.3235, Train Steps/Sec: 17.19, Grad Norm: 0.0385 +[2025-02-23 04:33:40] (step=0384500) Train Loss: 0.3236, Train Steps/Sec: 14.46, Grad Norm: 0.0363 +[2025-02-23 04:33:46] (step=0384600) Train Loss: 0.3235, Train Steps/Sec: 16.48, Grad Norm: 0.0353 +[2025-02-23 04:33:52] (step=0384700) Train Loss: 0.3238, Train Steps/Sec: 17.18, Grad Norm: 0.0363 +[2025-02-23 04:33:58] (step=0384800) Train Loss: 0.3234, Train Steps/Sec: 17.29, Grad Norm: 0.0352 +[2025-02-23 04:34:05] (step=0384900) Train Loss: 0.3236, Train Steps/Sec: 15.06, Grad Norm: 0.0367 +[2025-02-23 04:34:11] (step=0385000) Train Loss: 0.3234, Train Steps/Sec: 15.74, Grad Norm: 0.0366 +[2025-02-23 04:34:17] (step=0385100) Train Loss: 0.3235, Train Steps/Sec: 15.83, Grad Norm: 0.0397 +[2025-02-23 04:34:23] (step=0385200) Train Loss: 0.3235, Train Steps/Sec: 17.22, Grad Norm: 0.0345 +[2025-02-23 04:34:29] (step=0385300) Train Loss: 0.3237, Train Steps/Sec: 17.27, Grad Norm: 0.0372 +[2025-02-23 04:34:36] (step=0385400) Train Loss: 0.3238, Train Steps/Sec: 14.05, Grad Norm: 0.0352 +[2025-02-23 04:34:42] (step=0385500) Train Loss: 0.3233, Train Steps/Sec: 17.26, Grad Norm: 0.0374 +[2025-02-23 04:34:48] (step=0385600) Train Loss: 0.3241, Train Steps/Sec: 17.29, Grad Norm: 0.0406 +[2025-02-23 04:34:53] (step=0385700) Train Loss: 0.3240, Train Steps/Sec: 17.38, Grad Norm: 0.0348 +[2025-02-23 04:34:59] (step=0385800) Train Loss: 0.3235, Train Steps/Sec: 17.36, Grad Norm: 0.0367 +[2025-02-23 04:35:05] (step=0385900) Train Loss: 0.3232, Train Steps/Sec: 17.28, Grad Norm: 0.0371 +[2025-02-23 04:35:11] (step=0386000) Train Loss: 0.3244, Train Steps/Sec: 17.30, Grad Norm: 0.0384 +[2025-02-23 04:35:17] (step=0386100) Train Loss: 0.3238, Train Steps/Sec: 16.50, Grad Norm: 0.0360 +[2025-02-23 04:35:23] (step=0386200) Train Loss: 0.3233, Train Steps/Sec: 17.32, Grad Norm: 0.0379 +[2025-02-23 04:35:29] (step=0386300) Train Loss: 0.3240, Train Steps/Sec: 15.24, Grad Norm: 0.0350 +[2025-02-23 04:35:35] (step=0386400) Train Loss: 0.3239, Train Steps/Sec: 17.31, Grad Norm: 0.0367 +[2025-02-23 04:35:42] (step=0386500) Train Loss: 0.3238, Train Steps/Sec: 14.50, Grad Norm: 0.0399 +[2025-02-23 04:35:49] (step=0386600) Train Loss: 0.3240, Train Steps/Sec: 13.44, Grad Norm: 0.0377 +[2025-02-23 04:35:55] (step=0386700) Train Loss: 0.3236, Train Steps/Sec: 17.15, Grad Norm: 0.0368 +[2025-02-23 04:36:01] (step=0386800) Train Loss: 0.3240, Train Steps/Sec: 16.40, Grad Norm: 0.0349 +[2025-02-23 04:36:08] (step=0386900) Train Loss: 0.3231, Train Steps/Sec: 14.54, Grad Norm: 0.0386 +[2025-02-23 04:36:14] (step=0387000) Train Loss: 0.3232, Train Steps/Sec: 16.36, Grad Norm: 0.0360 +[2025-02-23 04:36:20] (step=0387100) Train Loss: 0.3235, Train Steps/Sec: 16.40, Grad Norm: 0.0399 +[2025-02-23 04:36:26] (step=0387200) Train Loss: 0.3237, Train Steps/Sec: 17.10, Grad Norm: 0.0437 +[2025-02-23 04:36:32] (step=0387300) Train Loss: 0.3239, Train Steps/Sec: 17.15, Grad Norm: 0.0370 +[2025-02-23 04:36:38] (step=0387400) Train Loss: 0.3239, Train Steps/Sec: 17.17, Grad Norm: 0.0363 +[2025-02-23 04:36:44] (step=0387500) Train Loss: 0.3243, Train Steps/Sec: 17.24, Grad Norm: 0.0364 +[2025-02-23 04:36:49] (step=0387600) Train Loss: 0.3239, Train Steps/Sec: 17.31, Grad Norm: 0.0386 +[2025-02-23 04:36:55] (step=0387700) Train Loss: 0.3236, Train Steps/Sec: 17.37, Grad Norm: 0.0386 +[2025-02-23 04:37:01] (step=0387800) Train Loss: 0.3232, Train Steps/Sec: 17.27, Grad Norm: 0.0337 +[2025-02-23 04:37:08] (step=0387900) Train Loss: 0.3233, Train Steps/Sec: 14.15, Grad Norm: 0.0377 +[2025-02-23 04:37:14] (step=0388000) Train Loss: 0.3232, Train Steps/Sec: 16.61, Grad Norm: 0.0362 +[2025-02-23 04:37:20] (step=0388100) Train Loss: 0.3232, Train Steps/Sec: 17.26, Grad Norm: 0.0388 +[2025-02-23 04:37:26] (step=0388200) Train Loss: 0.3235, Train Steps/Sec: 17.29, Grad Norm: 0.0428 +[2025-02-23 04:37:32] (step=0388300) Train Loss: 0.3231, Train Steps/Sec: 15.23, Grad Norm: 0.0348 +[2025-02-23 04:37:38] (step=0388400) Train Loss: 0.3237, Train Steps/Sec: 17.36, Grad Norm: 0.0381 +[2025-02-23 04:37:45] (step=0388500) Train Loss: 0.3233, Train Steps/Sec: 14.56, Grad Norm: 0.0362 +[2025-02-23 04:37:51] (step=0388600) Train Loss: 0.3238, Train Steps/Sec: 17.09, Grad Norm: 0.0393 +[2025-02-23 04:37:57] (step=0388700) Train Loss: 0.3236, Train Steps/Sec: 16.56, Grad Norm: 0.0359 +[2025-02-23 04:38:03] (step=0388800) Train Loss: 0.3239, Train Steps/Sec: 15.16, Grad Norm: 0.0352 +[2025-02-23 04:38:10] (step=0388900) Train Loss: 0.3233, Train Steps/Sec: 15.86, Grad Norm: 0.0381 +[2025-02-23 04:38:16] (step=0389000) Train Loss: 0.3233, Train Steps/Sec: 16.47, Grad Norm: 0.0354 +[2025-02-23 04:38:23] (step=0389100) Train Loss: 0.3234, Train Steps/Sec: 13.69, Grad Norm: 0.0362 +[2025-02-23 04:38:29] (step=0389200) Train Loss: 0.3238, Train Steps/Sec: 17.12, Grad Norm: 0.0350 +[2025-02-23 04:38:35] (step=0389300) Train Loss: 0.3236, Train Steps/Sec: 17.15, Grad Norm: 0.0374 +[2025-02-23 04:38:40] (step=0389400) Train Loss: 0.3236, Train Steps/Sec: 17.29, Grad Norm: 0.0375 +[2025-02-23 04:38:46] (step=0389500) Train Loss: 0.3235, Train Steps/Sec: 17.35, Grad Norm: 0.0371 +[2025-02-23 04:38:52] (step=0389600) Train Loss: 0.3235, Train Steps/Sec: 17.31, Grad Norm: 0.0397 +[2025-02-23 04:38:58] (step=0389700) Train Loss: 0.3232, Train Steps/Sec: 17.28, Grad Norm: 0.0383 +[2025-02-23 04:39:04] (step=0389800) Train Loss: 0.3228, Train Steps/Sec: 17.30, Grad Norm: 0.0384 +[2025-02-23 04:39:09] (step=0389900) Train Loss: 0.3232, Train Steps/Sec: 17.30, Grad Norm: 0.0356 +[2025-02-23 04:39:15] (step=0390000) Train Loss: 0.3234, Train Steps/Sec: 16.54, Grad Norm: 0.0373 +[2025-02-23 04:39:21] (step=0390100) Train Loss: 0.3239, Train Steps/Sec: 17.28, Grad Norm: 0.0372 +[2025-02-23 04:39:27] (step=0390200) Train Loss: 0.3240, Train Steps/Sec: 17.22, Grad Norm: 0.0411 +[2025-02-23 04:39:34] (step=0390300) Train Loss: 0.3238, Train Steps/Sec: 15.13, Grad Norm: 0.0388 +[2025-02-23 04:39:41] (step=0390400) Train Loss: 0.3228, Train Steps/Sec: 14.15, Grad Norm: 0.0392 +[2025-02-23 04:39:48] (step=0390500) Train Loss: 0.3241, Train Steps/Sec: 14.49, Grad Norm: 0.0376 +[2025-02-23 04:39:53] (step=0390600) Train Loss: 0.3234, Train Steps/Sec: 17.24, Grad Norm: 0.0400 +[2025-02-23 04:40:00] (step=0390700) Train Loss: 0.3235, Train Steps/Sec: 14.55, Grad Norm: 0.0386 +[2025-02-23 04:40:07] (step=0390800) Train Loss: 0.3239, Train Steps/Sec: 15.80, Grad Norm: 0.0365 +[2025-02-23 04:40:13] (step=0390900) Train Loss: 0.3242, Train Steps/Sec: 16.44, Grad Norm: 0.0385 +[2025-02-23 04:40:19] (step=0391000) Train Loss: 0.3231, Train Steps/Sec: 16.57, Grad Norm: 0.0395 +[2025-02-23 04:40:24] (step=0391100) Train Loss: 0.3235, Train Steps/Sec: 17.31, Grad Norm: 0.0384 +[2025-02-23 04:40:30] (step=0391200) Train Loss: 0.3233, Train Steps/Sec: 17.31, Grad Norm: 0.0383 +[2025-02-23 04:40:36] (step=0391300) Train Loss: 0.3234, Train Steps/Sec: 17.19, Grad Norm: 0.0376 +[2025-02-23 04:40:42] (step=0391400) Train Loss: 0.3237, Train Steps/Sec: 17.14, Grad Norm: 0.0380 +[2025-02-23 04:40:48] (step=0391500) Train Loss: 0.3234, Train Steps/Sec: 17.16, Grad Norm: 0.0357 +[2025-02-23 04:40:55] (step=0391600) Train Loss: 0.3231, Train Steps/Sec: 14.08, Grad Norm: 0.0349 +[2025-02-23 04:41:01] (step=0391700) Train Loss: 0.3236, Train Steps/Sec: 17.24, Grad Norm: 0.0365 +[2025-02-23 04:41:06] (step=0391800) Train Loss: 0.3238, Train Steps/Sec: 17.24, Grad Norm: 0.0375 +[2025-02-23 04:41:12] (step=0391900) Train Loss: 0.3238, Train Steps/Sec: 16.54, Grad Norm: 0.0359 +[2025-02-23 04:41:18] (step=0392000) Train Loss: 0.3240, Train Steps/Sec: 17.22, Grad Norm: 0.0346 +[2025-02-23 04:41:24] (step=0392100) Train Loss: 0.3233, Train Steps/Sec: 17.21, Grad Norm: 0.0375 +[2025-02-23 04:41:30] (step=0392200) Train Loss: 0.3233, Train Steps/Sec: 17.19, Grad Norm: 0.0377 +[2025-02-23 04:41:37] (step=0392300) Train Loss: 0.3233, Train Steps/Sec: 15.12, Grad Norm: 0.0383 +[2025-02-23 04:41:42] (step=0392400) Train Loss: 0.3240, Train Steps/Sec: 17.22, Grad Norm: 0.0384 +[2025-02-23 04:41:49] (step=0392500) Train Loss: 0.3234, Train Steps/Sec: 14.50, Grad Norm: 0.0368 +[2025-02-23 04:41:55] (step=0392600) Train Loss: 0.3238, Train Steps/Sec: 17.28, Grad Norm: 0.0401 +[2025-02-23 04:42:02] (step=0392700) Train Loss: 0.3234, Train Steps/Sec: 14.09, Grad Norm: 0.0372 +[2025-02-23 04:42:08] (step=0392800) Train Loss: 0.3234, Train Steps/Sec: 16.45, Grad Norm: 0.0364 +[2025-02-23 04:42:16] (step=0392900) Train Loss: 0.3236, Train Steps/Sec: 13.29, Grad Norm: 0.0359 +[2025-02-23 04:42:22] (step=0393000) Train Loss: 0.3240, Train Steps/Sec: 17.12, Grad Norm: 0.0387 +[2025-02-23 04:42:27] (step=0393100) Train Loss: 0.3232, Train Steps/Sec: 17.21, Grad Norm: 0.0371 +[2025-02-23 04:42:33] (step=0393200) Train Loss: 0.3238, Train Steps/Sec: 17.26, Grad Norm: 0.0369 +[2025-02-23 04:42:39] (step=0393300) Train Loss: 0.3235, Train Steps/Sec: 17.25, Grad Norm: 0.0370 +[2025-02-23 04:42:45] (step=0393400) Train Loss: 0.3234, Train Steps/Sec: 17.23, Grad Norm: 0.0396 +[2025-02-23 04:42:51] (step=0393500) Train Loss: 0.3234, Train Steps/Sec: 17.19, Grad Norm: 0.0396 +[2025-02-23 04:42:56] (step=0393600) Train Loss: 0.3235, Train Steps/Sec: 17.14, Grad Norm: 0.0371 +[2025-02-23 04:43:02] (step=0393700) Train Loss: 0.3241, Train Steps/Sec: 17.15, Grad Norm: 0.0365 +[2025-02-23 04:43:08] (step=0393800) Train Loss: 0.3238, Train Steps/Sec: 17.12, Grad Norm: 0.0387 +[2025-02-23 04:43:14] (step=0393900) Train Loss: 0.3244, Train Steps/Sec: 16.49, Grad Norm: 0.0385 +[2025-02-23 04:43:20] (step=0394000) Train Loss: 0.3237, Train Steps/Sec: 17.21, Grad Norm: 0.0352 +[2025-02-23 04:43:27] (step=0394100) Train Loss: 0.3238, Train Steps/Sec: 14.24, Grad Norm: 0.0348 +[2025-02-23 04:43:33] (step=0394200) Train Loss: 0.3239, Train Steps/Sec: 17.20, Grad Norm: 0.0374 +[2025-02-23 04:43:39] (step=0394300) Train Loss: 0.3238, Train Steps/Sec: 15.15, Grad Norm: 0.0368 +[2025-02-23 04:43:45] (step=0394400) Train Loss: 0.3235, Train Steps/Sec: 17.29, Grad Norm: 0.0343 +[2025-02-23 04:43:52] (step=0394500) Train Loss: 0.3237, Train Steps/Sec: 14.53, Grad Norm: 0.0368 +[2025-02-23 04:43:58] (step=0394600) Train Loss: 0.3235, Train Steps/Sec: 15.77, Grad Norm: 0.0414 +[2025-02-23 04:44:05] (step=0394700) Train Loss: 0.3237, Train Steps/Sec: 14.69, Grad Norm: 0.0388 +[2025-02-23 04:44:11] (step=0394800) Train Loss: 0.3237, Train Steps/Sec: 16.57, Grad Norm: 0.0391 +[2025-02-23 04:44:17] (step=0394900) Train Loss: 0.3232, Train Steps/Sec: 16.59, Grad Norm: 0.0388 +[2025-02-23 04:44:23] (step=0395000) Train Loss: 0.3237, Train Steps/Sec: 17.28, Grad Norm: 0.0403 +[2025-02-23 04:44:29] (step=0395100) Train Loss: 0.3224, Train Steps/Sec: 17.23, Grad Norm: 0.0349 +[2025-02-23 04:44:35] (step=0395200) Train Loss: 0.3235, Train Steps/Sec: 17.23, Grad Norm: 0.0367 +[2025-02-23 04:44:41] (step=0395300) Train Loss: 0.3233, Train Steps/Sec: 17.16, Grad Norm: 0.0369 +[2025-02-23 04:44:48] (step=0395400) Train Loss: 0.3240, Train Steps/Sec: 14.27, Grad Norm: 0.0397 +[2025-02-23 04:44:53] (step=0395500) Train Loss: 0.3233, Train Steps/Sec: 17.27, Grad Norm: 0.0375 +[2025-02-23 04:44:59] (step=0395600) Train Loss: 0.3233, Train Steps/Sec: 17.25, Grad Norm: 0.0374 +[2025-02-23 04:45:05] (step=0395700) Train Loss: 0.3239, Train Steps/Sec: 17.27, Grad Norm: 0.0397 +[2025-02-23 04:45:11] (step=0395800) Train Loss: 0.3238, Train Steps/Sec: 16.63, Grad Norm: 0.0342 +[2025-02-23 04:45:17] (step=0395900) Train Loss: 0.3235, Train Steps/Sec: 17.32, Grad Norm: 0.0368 +[2025-02-23 04:45:23] (step=0396000) Train Loss: 0.3232, Train Steps/Sec: 17.38, Grad Norm: 0.0468 +[2025-02-23 04:45:28] (step=0396100) Train Loss: 0.3234, Train Steps/Sec: 17.39, Grad Norm: 0.0371 +[2025-02-23 04:45:34] (step=0396200) Train Loss: 0.3243, Train Steps/Sec: 17.38, Grad Norm: 0.0411 +[2025-02-23 04:45:40] (step=0396300) Train Loss: 0.3238, Train Steps/Sec: 15.88, Grad Norm: 0.0375 +[2025-02-23 04:45:46] (step=0396400) Train Loss: 0.3237, Train Steps/Sec: 16.64, Grad Norm: 0.0374 +[2025-02-23 04:45:53] (step=0396500) Train Loss: 0.3232, Train Steps/Sec: 15.89, Grad Norm: 0.0339 +[2025-02-23 04:46:01] (step=0396600) Train Loss: 0.3235, Train Steps/Sec: 12.10, Grad Norm: 0.0353 +[2025-02-23 04:46:07] (step=0396700) Train Loss: 0.3239, Train Steps/Sec: 15.89, Grad Norm: 0.0449 +[2025-02-23 04:46:13] (step=0396800) Train Loss: 0.3240, Train Steps/Sec: 15.95, Grad Norm: 0.0364 +[2025-02-23 04:46:19] (step=0396900) Train Loss: 0.3232, Train Steps/Sec: 17.42, Grad Norm: 0.0368 +[2025-02-23 04:46:25] (step=0397000) Train Loss: 0.3237, Train Steps/Sec: 17.33, Grad Norm: 0.0348 +[2025-02-23 04:46:31] (step=0397100) Train Loss: 0.3235, Train Steps/Sec: 17.25, Grad Norm: 0.0377 +[2025-02-23 04:46:37] (step=0397200) Train Loss: 0.3235, Train Steps/Sec: 17.25, Grad Norm: 0.0397 +[2025-02-23 04:46:42] (step=0397300) Train Loss: 0.3231, Train Steps/Sec: 17.12, Grad Norm: 0.0365 +[2025-02-23 04:46:48] (step=0397400) Train Loss: 0.3239, Train Steps/Sec: 17.11, Grad Norm: 0.0375 +[2025-02-23 04:46:54] (step=0397500) Train Loss: 0.3236, Train Steps/Sec: 17.10, Grad Norm: 0.0378 +[2025-02-23 04:47:00] (step=0397600) Train Loss: 0.3233, Train Steps/Sec: 17.11, Grad Norm: 0.0353 +[2025-02-23 04:47:06] (step=0397700) Train Loss: 0.3231, Train Steps/Sec: 17.07, Grad Norm: 0.0363 +[2025-02-23 04:47:12] (step=0397800) Train Loss: 0.3236, Train Steps/Sec: 16.35, Grad Norm: 0.0374 +[2025-02-23 04:47:19] (step=0397900) Train Loss: 0.3236, Train Steps/Sec: 14.25, Grad Norm: 0.0354 +[2025-02-23 04:47:25] (step=0398000) Train Loss: 0.3233, Train Steps/Sec: 17.32, Grad Norm: 0.0391 +[2025-02-23 04:47:30] (step=0398100) Train Loss: 0.3234, Train Steps/Sec: 17.37, Grad Norm: 0.0352 +[2025-02-23 04:47:36] (step=0398200) Train Loss: 0.3235, Train Steps/Sec: 17.33, Grad Norm: 0.0372 +[2025-02-23 04:47:43] (step=0398300) Train Loss: 0.3240, Train Steps/Sec: 15.81, Grad Norm: 0.0376 +[2025-02-23 04:47:49] (step=0398400) Train Loss: 0.3234, Train Steps/Sec: 16.57, Grad Norm: 0.0365 +[2025-02-23 04:47:56] (step=0398500) Train Loss: 0.3235, Train Steps/Sec: 13.96, Grad Norm: 0.0397 +[2025-02-23 04:48:02] (step=0398600) Train Loss: 0.3234, Train Steps/Sec: 15.16, Grad Norm: 0.0353 +[2025-02-23 04:48:09] (step=0398700) Train Loss: 0.3238, Train Steps/Sec: 15.82, Grad Norm: 0.0375 +[2025-02-23 04:48:15] (step=0398800) Train Loss: 0.3239, Train Steps/Sec: 16.54, Grad Norm: 0.0398 +[2025-02-23 04:48:21] (step=0398900) Train Loss: 0.3236, Train Steps/Sec: 17.28, Grad Norm: 0.0385 +[2025-02-23 04:48:26] (step=0399000) Train Loss: 0.3235, Train Steps/Sec: 17.28, Grad Norm: 0.0336 +[2025-02-23 04:48:33] (step=0399100) Train Loss: 0.3229, Train Steps/Sec: 14.20, Grad Norm: 0.0384 +[2025-02-23 04:48:39] (step=0399200) Train Loss: 0.3233, Train Steps/Sec: 17.02, Grad Norm: 0.0385 +[2025-02-23 04:48:45] (step=0399300) Train Loss: 0.3238, Train Steps/Sec: 17.02, Grad Norm: 0.0354 +[2025-02-23 04:48:51] (step=0399400) Train Loss: 0.3233, Train Steps/Sec: 16.96, Grad Norm: 0.0393 +[2025-02-23 04:48:57] (step=0399500) Train Loss: 0.3236, Train Steps/Sec: 17.00, Grad Norm: 0.0357 +[2025-02-23 04:49:03] (step=0399600) Train Loss: 0.3234, Train Steps/Sec: 17.02, Grad Norm: 0.0349 +[2025-02-23 04:49:09] (step=0399700) Train Loss: 0.3238, Train Steps/Sec: 16.34, Grad Norm: 0.0353 +[2025-02-23 04:49:15] (step=0399800) Train Loss: 0.3237, Train Steps/Sec: 16.97, Grad Norm: 0.0361 +[2025-02-23 04:49:21] (step=0399900) Train Loss: 0.3237, Train Steps/Sec: 16.98, Grad Norm: 0.0393 +[2025-02-23 04:49:27] (step=0400000) Train Loss: 0.3233, Train Steps/Sec: 17.00, Grad Norm: 0.0457 +[2025-02-23 04:49:28] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0400000.pt +[2025-02-23 04:49:33] (step=0400100) Train Loss: 0.3235, Train Steps/Sec: 14.49, Grad Norm: 0.0359 +[2025-02-23 04:49:39] (step=0400200) Train Loss: 0.3230, Train Steps/Sec: 16.97, Grad Norm: 0.0355 +[2025-02-23 04:49:46] (step=0400300) Train Loss: 0.3240, Train Steps/Sec: 15.57, Grad Norm: 0.0366 +[2025-02-23 04:49:53] (step=0400400) Train Loss: 0.3235, Train Steps/Sec: 13.18, Grad Norm: 0.0368 +[2025-02-23 04:50:01] (step=0400500) Train Loss: 0.3241, Train Steps/Sec: 13.47, Grad Norm: 0.0384 +[2025-02-23 04:50:07] (step=0400600) Train Loss: 0.3235, Train Steps/Sec: 15.04, Grad Norm: 0.0358 +[2025-02-23 04:50:14] (step=0400700) Train Loss: 0.3229, Train Steps/Sec: 15.76, Grad Norm: 0.0355 +[2025-02-23 04:50:20] (step=0400800) Train Loss: 0.3238, Train Steps/Sec: 17.14, Grad Norm: 0.0377 +[2025-02-23 04:50:25] (step=0400900) Train Loss: 0.3236, Train Steps/Sec: 17.30, Grad Norm: 0.0378 +[2025-02-23 04:50:31] (step=0401000) Train Loss: 0.3234, Train Steps/Sec: 17.34, Grad Norm: 0.0379 +[2025-02-23 04:50:37] (step=0401100) Train Loss: 0.3232, Train Steps/Sec: 17.35, Grad Norm: 0.0411 +[2025-02-23 04:50:43] (step=0401200) Train Loss: 0.3236, Train Steps/Sec: 17.35, Grad Norm: 0.0342 +[2025-02-23 04:50:49] (step=0401300) Train Loss: 0.3229, Train Steps/Sec: 17.28, Grad Norm: 0.0366 +[2025-02-23 04:50:54] (step=0401400) Train Loss: 0.3238, Train Steps/Sec: 17.29, Grad Norm: 0.0362 +[2025-02-23 04:51:00] (step=0401500) Train Loss: 0.3235, Train Steps/Sec: 17.36, Grad Norm: 0.0347 +[2025-02-23 04:51:07] (step=0401600) Train Loss: 0.3237, Train Steps/Sec: 13.81, Grad Norm: 0.0382 +[2025-02-23 04:51:13] (step=0401700) Train Loss: 0.3235, Train Steps/Sec: 17.33, Grad Norm: 0.0403 +[2025-02-23 04:51:19] (step=0401800) Train Loss: 0.3233, Train Steps/Sec: 17.40, Grad Norm: 0.0411 +[2025-02-23 04:51:25] (step=0401900) Train Loss: 0.3237, Train Steps/Sec: 17.45, Grad Norm: 0.0378 +[2025-02-23 04:51:30] (step=0402000) Train Loss: 0.3237, Train Steps/Sec: 17.40, Grad Norm: 0.0368 +[2025-02-23 04:51:36] (step=0402100) Train Loss: 0.3240, Train Steps/Sec: 17.39, Grad Norm: 0.0406 +[2025-02-23 04:51:42] (step=0402200) Train Loss: 0.3231, Train Steps/Sec: 17.38, Grad Norm: 0.0370 +[2025-02-23 04:51:48] (step=0402300) Train Loss: 0.3231, Train Steps/Sec: 15.89, Grad Norm: 0.0373 +[2025-02-23 04:51:55] (step=0402400) Train Loss: 0.3235, Train Steps/Sec: 14.57, Grad Norm: 0.0390 +[2025-02-23 04:52:02] (step=0402500) Train Loss: 0.3237, Train Steps/Sec: 14.59, Grad Norm: 0.0373 +[2025-02-23 04:52:08] (step=0402600) Train Loss: 0.3236, Train Steps/Sec: 15.12, Grad Norm: 0.0397 +[2025-02-23 04:52:15] (step=0402700) Train Loss: 0.3236, Train Steps/Sec: 15.88, Grad Norm: 0.0385 +[2025-02-23 04:52:21] (step=0402800) Train Loss: 0.3238, Train Steps/Sec: 17.18, Grad Norm: 0.0358 +[2025-02-23 04:52:28] (step=0402900) Train Loss: 0.3229, Train Steps/Sec: 14.28, Grad Norm: 0.0367 +[2025-02-23 04:52:33] (step=0403000) Train Loss: 0.3237, Train Steps/Sec: 17.19, Grad Norm: 0.0390 +[2025-02-23 04:52:39] (step=0403100) Train Loss: 0.3236, Train Steps/Sec: 17.19, Grad Norm: 0.0419 +[2025-02-23 04:52:45] (step=0403200) Train Loss: 0.3228, Train Steps/Sec: 17.21, Grad Norm: 0.0351 +[2025-02-23 04:52:51] (step=0403300) Train Loss: 0.3238, Train Steps/Sec: 17.35, Grad Norm: 0.0399 +[2025-02-23 04:52:57] (step=0403400) Train Loss: 0.3231, Train Steps/Sec: 17.36, Grad Norm: 0.0338 +[2025-02-23 04:53:02] (step=0403500) Train Loss: 0.3237, Train Steps/Sec: 17.37, Grad Norm: 0.0366 +[2025-02-23 04:53:08] (step=0403600) Train Loss: 0.3236, Train Steps/Sec: 16.64, Grad Norm: 0.0344 +[2025-02-23 04:53:14] (step=0403700) Train Loss: 0.3239, Train Steps/Sec: 17.15, Grad Norm: 0.0365 +[2025-02-23 04:53:20] (step=0403800) Train Loss: 0.3234, Train Steps/Sec: 17.24, Grad Norm: 0.0337 +[2025-02-23 04:53:26] (step=0403900) Train Loss: 0.3228, Train Steps/Sec: 17.29, Grad Norm: 0.0350 +[2025-02-23 04:53:32] (step=0404000) Train Loss: 0.3236, Train Steps/Sec: 17.27, Grad Norm: 0.0384 +[2025-02-23 04:53:39] (step=0404100) Train Loss: 0.3239, Train Steps/Sec: 14.08, Grad Norm: 0.0363 +[2025-02-23 04:53:44] (step=0404200) Train Loss: 0.3232, Train Steps/Sec: 17.23, Grad Norm: 0.0364 +[2025-02-23 04:53:51] (step=0404300) Train Loss: 0.3229, Train Steps/Sec: 14.55, Grad Norm: 0.0362 +[2025-02-23 04:53:58] (step=0404400) Train Loss: 0.3233, Train Steps/Sec: 14.66, Grad Norm: 0.0364 +[2025-02-23 04:54:05] (step=0404500) Train Loss: 0.3235, Train Steps/Sec: 15.14, Grad Norm: 0.0382 +[2025-02-23 04:54:11] (step=0404600) Train Loss: 0.3236, Train Steps/Sec: 15.22, Grad Norm: 0.0377 +[2025-02-23 04:54:17] (step=0404700) Train Loss: 0.3233, Train Steps/Sec: 16.57, Grad Norm: 0.0329 +[2025-02-23 04:54:23] (step=0404800) Train Loss: 0.3228, Train Steps/Sec: 17.25, Grad Norm: 0.0353 +[2025-02-23 04:54:29] (step=0404900) Train Loss: 0.3229, Train Steps/Sec: 17.17, Grad Norm: 0.0368 +[2025-02-23 04:54:35] (step=0405000) Train Loss: 0.3233, Train Steps/Sec: 17.26, Grad Norm: 0.0392 +[2025-02-23 04:54:41] (step=0405100) Train Loss: 0.3237, Train Steps/Sec: 17.33, Grad Norm: 0.0350 +[2025-02-23 04:54:46] (step=0405200) Train Loss: 0.3235, Train Steps/Sec: 17.29, Grad Norm: 0.0373 +[2025-02-23 04:54:52] (step=0405300) Train Loss: 0.3234, Train Steps/Sec: 17.21, Grad Norm: 0.0378 +[2025-02-23 04:54:59] (step=0405400) Train Loss: 0.3234, Train Steps/Sec: 14.40, Grad Norm: 0.0361 +[2025-02-23 04:55:05] (step=0405500) Train Loss: 0.3238, Train Steps/Sec: 16.67, Grad Norm: 0.0411 +[2025-02-23 04:55:11] (step=0405600) Train Loss: 0.3238, Train Steps/Sec: 17.39, Grad Norm: 0.0353 +[2025-02-23 04:55:17] (step=0405700) Train Loss: 0.3234, Train Steps/Sec: 17.35, Grad Norm: 0.0412 +[2025-02-23 04:55:22] (step=0405800) Train Loss: 0.3233, Train Steps/Sec: 17.38, Grad Norm: 0.0394 +[2025-02-23 04:55:28] (step=0405900) Train Loss: 0.3231, Train Steps/Sec: 17.29, Grad Norm: 0.0366 +[2025-02-23 04:55:34] (step=0406000) Train Loss: 0.3240, Train Steps/Sec: 17.27, Grad Norm: 0.0406 +[2025-02-23 04:55:40] (step=0406100) Train Loss: 0.3235, Train Steps/Sec: 17.34, Grad Norm: 0.0368 +[2025-02-23 04:55:46] (step=0406200) Train Loss: 0.3237, Train Steps/Sec: 17.37, Grad Norm: 0.0419 +[2025-02-23 04:55:53] (step=0406300) Train Loss: 0.3232, Train Steps/Sec: 14.02, Grad Norm: 0.0386 +[2025-02-23 04:55:59] (step=0406400) Train Loss: 0.3233, Train Steps/Sec: 15.30, Grad Norm: 0.0408 +[2025-02-23 04:56:06] (step=0406500) Train Loss: 0.3231, Train Steps/Sec: 14.60, Grad Norm: 0.0439 +[2025-02-23 04:56:14] (step=0406600) Train Loss: 0.3230, Train Steps/Sec: 13.23, Grad Norm: 0.0322 +[2025-02-23 04:56:20] (step=0406700) Train Loss: 0.3230, Train Steps/Sec: 16.43, Grad Norm: 0.0417 +[2025-02-23 04:56:25] (step=0406800) Train Loss: 0.3238, Train Steps/Sec: 17.28, Grad Norm: 0.0345 +[2025-02-23 04:56:31] (step=0406900) Train Loss: 0.3233, Train Steps/Sec: 17.30, Grad Norm: 0.0364 +[2025-02-23 04:56:37] (step=0407000) Train Loss: 0.3237, Train Steps/Sec: 17.29, Grad Norm: 0.0338 +[2025-02-23 04:56:43] (step=0407100) Train Loss: 0.3240, Train Steps/Sec: 17.33, Grad Norm: 0.0395 +[2025-02-23 04:56:49] (step=0407200) Train Loss: 0.3236, Train Steps/Sec: 17.37, Grad Norm: 0.0403 +[2025-02-23 04:56:54] (step=0407300) Train Loss: 0.3228, Train Steps/Sec: 17.40, Grad Norm: 0.0362 +[2025-02-23 04:57:00] (step=0407400) Train Loss: 0.3237, Train Steps/Sec: 17.44, Grad Norm: 0.0375 +[2025-02-23 04:57:06] (step=0407500) Train Loss: 0.3236, Train Steps/Sec: 16.64, Grad Norm: 0.0384 +[2025-02-23 04:57:12] (step=0407600) Train Loss: 0.3233, Train Steps/Sec: 17.37, Grad Norm: 0.0373 +[2025-02-23 04:57:18] (step=0407700) Train Loss: 0.3239, Train Steps/Sec: 17.38, Grad Norm: 0.0378 +[2025-02-23 04:57:23] (step=0407800) Train Loss: 0.3234, Train Steps/Sec: 17.26, Grad Norm: 0.0342 +[2025-02-23 04:57:30] (step=0407900) Train Loss: 0.3230, Train Steps/Sec: 14.35, Grad Norm: 0.0390 +[2025-02-23 04:57:36] (step=0408000) Train Loss: 0.3228, Train Steps/Sec: 17.28, Grad Norm: 0.0354 +[2025-02-23 04:57:42] (step=0408100) Train Loss: 0.3236, Train Steps/Sec: 17.25, Grad Norm: 0.0363 +[2025-02-23 04:57:48] (step=0408200) Train Loss: 0.3235, Train Steps/Sec: 15.80, Grad Norm: 0.0394 +[2025-02-23 04:57:55] (step=0408300) Train Loss: 0.3233, Train Steps/Sec: 14.11, Grad Norm: 0.0401 +[2025-02-23 04:58:02] (step=0408400) Train Loss: 0.3232, Train Steps/Sec: 15.81, Grad Norm: 0.0368 +[2025-02-23 04:58:08] (step=0408500) Train Loss: 0.3235, Train Steps/Sec: 15.21, Grad Norm: 0.0382 +[2025-02-23 04:58:15] (step=0408600) Train Loss: 0.3240, Train Steps/Sec: 15.78, Grad Norm: 0.0404 +[2025-02-23 04:58:21] (step=0408700) Train Loss: 0.3238, Train Steps/Sec: 16.49, Grad Norm: 0.0372 +[2025-02-23 04:58:26] (step=0408800) Train Loss: 0.3232, Train Steps/Sec: 17.28, Grad Norm: 0.0380 +[2025-02-23 04:58:32] (step=0408900) Train Loss: 0.3224, Train Steps/Sec: 17.28, Grad Norm: 0.0419 +[2025-02-23 04:58:38] (step=0409000) Train Loss: 0.3236, Train Steps/Sec: 17.32, Grad Norm: 0.0357 +[2025-02-23 04:58:45] (step=0409100) Train Loss: 0.3234, Train Steps/Sec: 14.28, Grad Norm: 0.0348 +[2025-02-23 04:58:51] (step=0409200) Train Loss: 0.3237, Train Steps/Sec: 17.29, Grad Norm: 0.0357 +[2025-02-23 04:58:57] (step=0409300) Train Loss: 0.3233, Train Steps/Sec: 17.21, Grad Norm: 0.0385 +[2025-02-23 04:59:03] (step=0409400) Train Loss: 0.3232, Train Steps/Sec: 16.51, Grad Norm: 0.0364 +[2025-02-23 04:59:08] (step=0409500) Train Loss: 0.3241, Train Steps/Sec: 17.30, Grad Norm: 0.0375 +[2025-02-23 04:59:14] (step=0409600) Train Loss: 0.3227, Train Steps/Sec: 17.28, Grad Norm: 0.0377 +[2025-02-23 04:59:20] (step=0409700) Train Loss: 0.3229, Train Steps/Sec: 17.22, Grad Norm: 0.0366 +[2025-02-23 04:59:26] (step=0409800) Train Loss: 0.3238, Train Steps/Sec: 17.22, Grad Norm: 0.0420 +[2025-02-23 04:59:32] (step=0409900) Train Loss: 0.3231, Train Steps/Sec: 17.18, Grad Norm: 0.0421 +[2025-02-23 04:59:38] (step=0410000) Train Loss: 0.3235, Train Steps/Sec: 17.13, Grad Norm: 0.0379 +[2025-02-23 04:59:43] (step=0410100) Train Loss: 0.3236, Train Steps/Sec: 17.21, Grad Norm: 0.0379 +[2025-02-23 04:59:50] (step=0410200) Train Loss: 0.3232, Train Steps/Sec: 14.64, Grad Norm: 0.0382 +[2025-02-23 04:59:57] (step=0410300) Train Loss: 0.3236, Train Steps/Sec: 15.03, Grad Norm: 0.0384 +[2025-02-23 05:00:05] (step=0410400) Train Loss: 0.3238, Train Steps/Sec: 12.74, Grad Norm: 0.0382 +[2025-02-23 05:00:11] (step=0410500) Train Loss: 0.3239, Train Steps/Sec: 15.64, Grad Norm: 0.0370 +[2025-02-23 05:00:17] (step=0410600) Train Loss: 0.3232, Train Steps/Sec: 15.64, Grad Norm: 0.0374 +[2025-02-23 05:00:24] (step=0410700) Train Loss: 0.3234, Train Steps/Sec: 16.40, Grad Norm: 0.0381 +[2025-02-23 05:00:29] (step=0410800) Train Loss: 0.3237, Train Steps/Sec: 17.12, Grad Norm: 0.0391 +[2025-02-23 05:00:35] (step=0410900) Train Loss: 0.3236, Train Steps/Sec: 17.08, Grad Norm: 0.0361 +[2025-02-23 05:00:41] (step=0411000) Train Loss: 0.3233, Train Steps/Sec: 17.10, Grad Norm: 0.0334 +[2025-02-23 05:00:47] (step=0411100) Train Loss: 0.3230, Train Steps/Sec: 16.98, Grad Norm: 0.0392 +[2025-02-23 05:00:53] (step=0411200) Train Loss: 0.3237, Train Steps/Sec: 17.13, Grad Norm: 0.0357 +[2025-02-23 05:00:59] (step=0411300) Train Loss: 0.3236, Train Steps/Sec: 17.12, Grad Norm: 0.0351 +[2025-02-23 05:01:05] (step=0411400) Train Loss: 0.3234, Train Steps/Sec: 16.43, Grad Norm: 0.0367 +[2025-02-23 05:01:11] (step=0411500) Train Loss: 0.3234, Train Steps/Sec: 17.11, Grad Norm: 0.0378 +[2025-02-23 05:01:18] (step=0411600) Train Loss: 0.3232, Train Steps/Sec: 14.16, Grad Norm: 0.0387 +[2025-02-23 05:01:24] (step=0411700) Train Loss: 0.3232, Train Steps/Sec: 17.08, Grad Norm: 0.0362 +[2025-02-23 05:01:29] (step=0411800) Train Loss: 0.3236, Train Steps/Sec: 17.10, Grad Norm: 0.0383 +[2025-02-23 05:01:35] (step=0411900) Train Loss: 0.3236, Train Steps/Sec: 17.07, Grad Norm: 0.0396 +[2025-02-23 05:01:41] (step=0412000) Train Loss: 0.3233, Train Steps/Sec: 17.10, Grad Norm: 0.0406 +[2025-02-23 05:01:48] (step=0412100) Train Loss: 0.3243, Train Steps/Sec: 15.69, Grad Norm: 0.0367 +[2025-02-23 05:01:54] (step=0412200) Train Loss: 0.3234, Train Steps/Sec: 15.21, Grad Norm: 0.0384 +[2025-02-23 05:02:01] (step=0412300) Train Loss: 0.3234, Train Steps/Sec: 15.11, Grad Norm: 0.0370 +[2025-02-23 05:02:07] (step=0412400) Train Loss: 0.3238, Train Steps/Sec: 15.78, Grad Norm: 0.0354 +[2025-02-23 05:02:13] (step=0412500) Train Loss: 0.3233, Train Steps/Sec: 15.66, Grad Norm: 0.0359 +[2025-02-23 05:02:20] (step=0412600) Train Loss: 0.3234, Train Steps/Sec: 15.69, Grad Norm: 0.0356 +[2025-02-23 05:02:26] (step=0412700) Train Loss: 0.3231, Train Steps/Sec: 16.44, Grad Norm: 0.0367 +[2025-02-23 05:02:32] (step=0412800) Train Loss: 0.3236, Train Steps/Sec: 17.26, Grad Norm: 0.0338 +[2025-02-23 05:02:39] (step=0412900) Train Loss: 0.3235, Train Steps/Sec: 14.23, Grad Norm: 0.0352 +[2025-02-23 05:02:45] (step=0413000) Train Loss: 0.3229, Train Steps/Sec: 17.14, Grad Norm: 0.0355 +[2025-02-23 05:02:50] (step=0413100) Train Loss: 0.3232, Train Steps/Sec: 17.26, Grad Norm: 0.0368 +[2025-02-23 05:02:56] (step=0413200) Train Loss: 0.3233, Train Steps/Sec: 17.33, Grad Norm: 0.0377 +[2025-02-23 05:03:02] (step=0413300) Train Loss: 0.3237, Train Steps/Sec: 16.62, Grad Norm: 0.0402 +[2025-02-23 05:03:08] (step=0413400) Train Loss: 0.3237, Train Steps/Sec: 17.31, Grad Norm: 0.0350 +[2025-02-23 05:03:14] (step=0413500) Train Loss: 0.3233, Train Steps/Sec: 17.33, Grad Norm: 0.0385 +[2025-02-23 05:03:19] (step=0413600) Train Loss: 0.3230, Train Steps/Sec: 17.32, Grad Norm: 0.0360 +[2025-02-23 05:03:25] (step=0413700) Train Loss: 0.3236, Train Steps/Sec: 17.27, Grad Norm: 0.0370 +[2025-02-23 05:03:31] (step=0413800) Train Loss: 0.3238, Train Steps/Sec: 17.31, Grad Norm: 0.0369 +[2025-02-23 05:03:37] (step=0413900) Train Loss: 0.3234, Train Steps/Sec: 17.23, Grad Norm: 0.0356 +[2025-02-23 05:03:43] (step=0414000) Train Loss: 0.3232, Train Steps/Sec: 17.13, Grad Norm: 0.0388 +[2025-02-23 05:03:51] (step=0414100) Train Loss: 0.3238, Train Steps/Sec: 11.89, Grad Norm: 0.0374 +[2025-02-23 05:03:57] (step=0414200) Train Loss: 0.3231, Train Steps/Sec: 16.44, Grad Norm: 0.0398 +[2025-02-23 05:04:04] (step=0414300) Train Loss: 0.3237, Train Steps/Sec: 15.07, Grad Norm: 0.0378 +[2025-02-23 05:04:10] (step=0414400) Train Loss: 0.3236, Train Steps/Sec: 16.44, Grad Norm: 0.0341 +[2025-02-23 05:04:16] (step=0414500) Train Loss: 0.3235, Train Steps/Sec: 15.71, Grad Norm: 0.0379 +[2025-02-23 05:04:23] (step=0414600) Train Loss: 0.3231, Train Steps/Sec: 15.86, Grad Norm: 0.0354 +[2025-02-23 05:04:29] (step=0414700) Train Loss: 0.3233, Train Steps/Sec: 16.68, Grad Norm: 0.0354 +[2025-02-23 05:04:34] (step=0414800) Train Loss: 0.3234, Train Steps/Sec: 17.27, Grad Norm: 0.0342 +[2025-02-23 05:04:40] (step=0414900) Train Loss: 0.3230, Train Steps/Sec: 17.31, Grad Norm: 0.0419 +[2025-02-23 05:04:46] (step=0415000) Train Loss: 0.3240, Train Steps/Sec: 17.32, Grad Norm: 0.0384 +[2025-02-23 05:04:52] (step=0415100) Train Loss: 0.3233, Train Steps/Sec: 17.28, Grad Norm: 0.0357 +[2025-02-23 05:04:58] (step=0415200) Train Loss: 0.3228, Train Steps/Sec: 17.26, Grad Norm: 0.0367 +[2025-02-23 05:05:04] (step=0415300) Train Loss: 0.3235, Train Steps/Sec: 16.43, Grad Norm: 0.0380 +[2025-02-23 05:05:11] (step=0415400) Train Loss: 0.3235, Train Steps/Sec: 14.33, Grad Norm: 0.0362 +[2025-02-23 05:05:16] (step=0415500) Train Loss: 0.3233, Train Steps/Sec: 17.20, Grad Norm: 0.0411 +[2025-02-23 05:05:22] (step=0415600) Train Loss: 0.3231, Train Steps/Sec: 17.29, Grad Norm: 0.0404 +[2025-02-23 05:05:28] (step=0415700) Train Loss: 0.3236, Train Steps/Sec: 17.29, Grad Norm: 0.0380 +[2025-02-23 05:05:34] (step=0415800) Train Loss: 0.3230, Train Steps/Sec: 17.41, Grad Norm: 0.0348 +[2025-02-23 05:05:39] (step=0415900) Train Loss: 0.3235, Train Steps/Sec: 17.44, Grad Norm: 0.0386 +[2025-02-23 05:05:46] (step=0416000) Train Loss: 0.3240, Train Steps/Sec: 15.96, Grad Norm: 0.0366 +[2025-02-23 05:05:52] (step=0416100) Train Loss: 0.3243, Train Steps/Sec: 15.36, Grad Norm: 0.0380 +[2025-02-23 05:05:58] (step=0416200) Train Loss: 0.3231, Train Steps/Sec: 16.59, Grad Norm: 0.0369 +[2025-02-23 05:06:05] (step=0416300) Train Loss: 0.3234, Train Steps/Sec: 15.96, Grad Norm: 0.0380 +[2025-02-23 05:06:11] (step=0416400) Train Loss: 0.3235, Train Steps/Sec: 15.89, Grad Norm: 0.0348 +[2025-02-23 05:06:17] (step=0416500) Train Loss: 0.3233, Train Steps/Sec: 15.85, Grad Norm: 0.0385 +[2025-02-23 05:06:25] (step=0416600) Train Loss: 0.3230, Train Steps/Sec: 13.25, Grad Norm: 0.0387 +[2025-02-23 05:06:31] (step=0416700) Train Loss: 0.3236, Train Steps/Sec: 16.43, Grad Norm: 0.0342 +[2025-02-23 05:06:37] (step=0416800) Train Loss: 0.3237, Train Steps/Sec: 17.25, Grad Norm: 0.0338 +[2025-02-23 05:06:42] (step=0416900) Train Loss: 0.3239, Train Steps/Sec: 17.29, Grad Norm: 0.0386 +[2025-02-23 05:06:48] (step=0417000) Train Loss: 0.3236, Train Steps/Sec: 17.21, Grad Norm: 0.0383 +[2025-02-23 05:06:54] (step=0417100) Train Loss: 0.3231, Train Steps/Sec: 17.21, Grad Norm: 0.0366 +[2025-02-23 05:07:00] (step=0417200) Train Loss: 0.3230, Train Steps/Sec: 16.56, Grad Norm: 0.0368 +[2025-02-23 05:07:06] (step=0417300) Train Loss: 0.3231, Train Steps/Sec: 17.24, Grad Norm: 0.0368 +[2025-02-23 05:07:12] (step=0417400) Train Loss: 0.3234, Train Steps/Sec: 17.28, Grad Norm: 0.0390 +[2025-02-23 05:07:17] (step=0417500) Train Loss: 0.3236, Train Steps/Sec: 17.26, Grad Norm: 0.0380 +[2025-02-23 05:07:23] (step=0417600) Train Loss: 0.3239, Train Steps/Sec: 17.31, Grad Norm: 0.0343 +[2025-02-23 05:07:29] (step=0417700) Train Loss: 0.3232, Train Steps/Sec: 17.34, Grad Norm: 0.0367 +[2025-02-23 05:07:35] (step=0417800) Train Loss: 0.3238, Train Steps/Sec: 17.31, Grad Norm: 0.0370 +[2025-02-23 05:07:42] (step=0417900) Train Loss: 0.3231, Train Steps/Sec: 13.70, Grad Norm: 0.0390 +[2025-02-23 05:07:49] (step=0418000) Train Loss: 0.3241, Train Steps/Sec: 14.58, Grad Norm: 0.0401 +[2025-02-23 05:07:55] (step=0418100) Train Loss: 0.3230, Train Steps/Sec: 16.44, Grad Norm: 0.0371 +[2025-02-23 05:08:01] (step=0418200) Train Loss: 0.3236, Train Steps/Sec: 16.56, Grad Norm: 0.0387 +[2025-02-23 05:08:07] (step=0418300) Train Loss: 0.3238, Train Steps/Sec: 15.82, Grad Norm: 0.0403 +[2025-02-23 05:08:13] (step=0418400) Train Loss: 0.3229, Train Steps/Sec: 16.65, Grad Norm: 0.0401 +[2025-02-23 05:08:19] (step=0418500) Train Loss: 0.3232, Train Steps/Sec: 16.56, Grad Norm: 0.0377 +[2025-02-23 05:08:26] (step=0418600) Train Loss: 0.3238, Train Steps/Sec: 15.77, Grad Norm: 0.0398 +[2025-02-23 05:08:32] (step=0418700) Train Loss: 0.3234, Train Steps/Sec: 16.42, Grad Norm: 0.0389 +[2025-02-23 05:08:38] (step=0418800) Train Loss: 0.3236, Train Steps/Sec: 17.19, Grad Norm: 0.0365 +[2025-02-23 05:08:43] (step=0418900) Train Loss: 0.3233, Train Steps/Sec: 17.21, Grad Norm: 0.0393 +[2025-02-23 05:08:49] (step=0419000) Train Loss: 0.3234, Train Steps/Sec: 17.23, Grad Norm: 0.0381 +[2025-02-23 05:08:56] (step=0419100) Train Loss: 0.3235, Train Steps/Sec: 14.19, Grad Norm: 0.0344 +[2025-02-23 05:09:02] (step=0419200) Train Loss: 0.3238, Train Steps/Sec: 16.40, Grad Norm: 0.0371 +[2025-02-23 05:09:08] (step=0419300) Train Loss: 0.3232, Train Steps/Sec: 17.25, Grad Norm: 0.0379 +[2025-02-23 05:09:14] (step=0419400) Train Loss: 0.3240, Train Steps/Sec: 17.27, Grad Norm: 0.0379 +[2025-02-23 05:09:20] (step=0419500) Train Loss: 0.3229, Train Steps/Sec: 17.13, Grad Norm: 0.0386 +[2025-02-23 05:09:26] (step=0419600) Train Loss: 0.3232, Train Steps/Sec: 17.27, Grad Norm: 0.0406 +[2025-02-23 05:09:31] (step=0419700) Train Loss: 0.3232, Train Steps/Sec: 17.25, Grad Norm: 0.0354 +[2025-02-23 05:09:37] (step=0419800) Train Loss: 0.3235, Train Steps/Sec: 17.34, Grad Norm: 0.0375 +[2025-02-23 05:09:44] (step=0419900) Train Loss: 0.3232, Train Steps/Sec: 15.85, Grad Norm: 0.0361 +[2025-02-23 05:09:50] (step=0420000) Train Loss: 0.3233, Train Steps/Sec: 15.34, Grad Norm: 0.0372 +[2025-02-23 05:09:56] (step=0420100) Train Loss: 0.3236, Train Steps/Sec: 15.98, Grad Norm: 0.0376 +[2025-02-23 05:10:02] (step=0420200) Train Loss: 0.3232, Train Steps/Sec: 17.31, Grad Norm: 0.0363 +[2025-02-23 05:10:08] (step=0420300) Train Loss: 0.3235, Train Steps/Sec: 17.26, Grad Norm: 0.0366 +[2025-02-23 05:10:16] (step=0420400) Train Loss: 0.3232, Train Steps/Sec: 12.75, Grad Norm: 0.0385 +[2025-02-23 05:10:22] (step=0420500) Train Loss: 0.3233, Train Steps/Sec: 15.81, Grad Norm: 0.0375 +[2025-02-23 05:10:28] (step=0420600) Train Loss: 0.3234, Train Steps/Sec: 15.82, Grad Norm: 0.0345 +[2025-02-23 05:10:34] (step=0420700) Train Loss: 0.3235, Train Steps/Sec: 16.56, Grad Norm: 0.0386 +[2025-02-23 05:10:40] (step=0420800) Train Loss: 0.3236, Train Steps/Sec: 17.23, Grad Norm: 0.0403 +[2025-02-23 05:10:46] (step=0420900) Train Loss: 0.3238, Train Steps/Sec: 17.21, Grad Norm: 0.0343 +[2025-02-23 05:10:52] (step=0421000) Train Loss: 0.3231, Train Steps/Sec: 17.14, Grad Norm: 0.0382 +[2025-02-23 05:10:58] (step=0421100) Train Loss: 0.3233, Train Steps/Sec: 16.45, Grad Norm: 0.0348 +[2025-02-23 05:11:04] (step=0421200) Train Loss: 0.3232, Train Steps/Sec: 17.21, Grad Norm: 0.0401 +[2025-02-23 05:11:10] (step=0421300) Train Loss: 0.3232, Train Steps/Sec: 17.21, Grad Norm: 0.0403 +[2025-02-23 05:11:15] (step=0421400) Train Loss: 0.3235, Train Steps/Sec: 17.17, Grad Norm: 0.0366 +[2025-02-23 05:11:21] (step=0421500) Train Loss: 0.3238, Train Steps/Sec: 17.26, Grad Norm: 0.0369 +[2025-02-23 05:11:28] (step=0421600) Train Loss: 0.3231, Train Steps/Sec: 14.22, Grad Norm: 0.0342 +[2025-02-23 05:11:34] (step=0421700) Train Loss: 0.3234, Train Steps/Sec: 17.41, Grad Norm: 0.0368 +[2025-02-23 05:11:40] (step=0421800) Train Loss: 0.3235, Train Steps/Sec: 16.63, Grad Norm: 0.0366 +[2025-02-23 05:11:47] (step=0421900) Train Loss: 0.3230, Train Steps/Sec: 14.72, Grad Norm: 0.0352 +[2025-02-23 05:11:53] (step=0422000) Train Loss: 0.3232, Train Steps/Sec: 16.53, Grad Norm: 0.0336 +[2025-02-23 05:11:59] (step=0422100) Train Loss: 0.3237, Train Steps/Sec: 16.63, Grad Norm: 0.0407 +[2025-02-23 05:12:05] (step=0422200) Train Loss: 0.3231, Train Steps/Sec: 17.36, Grad Norm: 0.0370 +[2025-02-23 05:12:11] (step=0422300) Train Loss: 0.3233, Train Steps/Sec: 16.55, Grad Norm: 0.0393 +[2025-02-23 05:12:17] (step=0422400) Train Loss: 0.3234, Train Steps/Sec: 15.92, Grad Norm: 0.0403 +[2025-02-23 05:12:23] (step=0422500) Train Loss: 0.3237, Train Steps/Sec: 15.86, Grad Norm: 0.0363 +[2025-02-23 05:12:30] (step=0422600) Train Loss: 0.3230, Train Steps/Sec: 15.79, Grad Norm: 0.0373 +[2025-02-23 05:12:36] (step=0422700) Train Loss: 0.3236, Train Steps/Sec: 16.32, Grad Norm: 0.0378 +[2025-02-23 05:12:42] (step=0422800) Train Loss: 0.3235, Train Steps/Sec: 17.11, Grad Norm: 0.0368 +[2025-02-23 05:12:49] (step=0422900) Train Loss: 0.3226, Train Steps/Sec: 14.10, Grad Norm: 0.0355 +[2025-02-23 05:12:55] (step=0423000) Train Loss: 0.3235, Train Steps/Sec: 17.07, Grad Norm: 0.0333 +[2025-02-23 05:13:01] (step=0423100) Train Loss: 0.3234, Train Steps/Sec: 16.42, Grad Norm: 0.0364 +[2025-02-23 05:13:06] (step=0423200) Train Loss: 0.3233, Train Steps/Sec: 17.18, Grad Norm: 0.0382 +[2025-02-23 05:13:12] (step=0423300) Train Loss: 0.3235, Train Steps/Sec: 17.17, Grad Norm: 0.0373 +[2025-02-23 05:13:18] (step=0423400) Train Loss: 0.3233, Train Steps/Sec: 17.16, Grad Norm: 0.0333 +[2025-02-23 05:13:24] (step=0423500) Train Loss: 0.3231, Train Steps/Sec: 17.16, Grad Norm: 0.0372 +[2025-02-23 05:13:30] (step=0423600) Train Loss: 0.3237, Train Steps/Sec: 17.19, Grad Norm: 0.0367 +[2025-02-23 05:13:36] (step=0423700) Train Loss: 0.3231, Train Steps/Sec: 17.17, Grad Norm: 0.0342 +[2025-02-23 05:13:42] (step=0423800) Train Loss: 0.3236, Train Steps/Sec: 15.68, Grad Norm: 0.0359 +[2025-02-23 05:13:49] (step=0423900) Train Loss: 0.3237, Train Steps/Sec: 15.14, Grad Norm: 0.0384 +[2025-02-23 05:13:55] (step=0424000) Train Loss: 0.3233, Train Steps/Sec: 15.69, Grad Norm: 0.0413 +[2025-02-23 05:14:02] (step=0424100) Train Loss: 0.3231, Train Steps/Sec: 14.18, Grad Norm: 0.0385 +[2025-02-23 05:14:08] (step=0424200) Train Loss: 0.3236, Train Steps/Sec: 17.26, Grad Norm: 0.0367 +[2025-02-23 05:14:14] (step=0424300) Train Loss: 0.3236, Train Steps/Sec: 16.56, Grad Norm: 0.0364 +[2025-02-23 05:14:20] (step=0424400) Train Loss: 0.3238, Train Steps/Sec: 15.80, Grad Norm: 0.0349 +[2025-02-23 05:14:27] (step=0424500) Train Loss: 0.3235, Train Steps/Sec: 15.76, Grad Norm: 0.0351 +[2025-02-23 05:14:33] (step=0424600) Train Loss: 0.3228, Train Steps/Sec: 15.78, Grad Norm: 0.0373 +[2025-02-23 05:14:39] (step=0424700) Train Loss: 0.3229, Train Steps/Sec: 16.62, Grad Norm: 0.0370 +[2025-02-23 05:14:45] (step=0424800) Train Loss: 0.3231, Train Steps/Sec: 17.29, Grad Norm: 0.0371 +[2025-02-23 05:14:50] (step=0424900) Train Loss: 0.3235, Train Steps/Sec: 17.32, Grad Norm: 0.0374 +[2025-02-23 05:14:57] (step=0425000) Train Loss: 0.3232, Train Steps/Sec: 16.48, Grad Norm: 0.0378 +[2025-02-23 05:15:02] (step=0425100) Train Loss: 0.3232, Train Steps/Sec: 17.27, Grad Norm: 0.0342 +[2025-02-23 05:15:08] (step=0425200) Train Loss: 0.3234, Train Steps/Sec: 17.26, Grad Norm: 0.0426 +[2025-02-23 05:15:14] (step=0425300) Train Loss: 0.3240, Train Steps/Sec: 17.21, Grad Norm: 0.0360 +[2025-02-23 05:15:21] (step=0425400) Train Loss: 0.3232, Train Steps/Sec: 14.18, Grad Norm: 0.0369 +[2025-02-23 05:15:27] (step=0425500) Train Loss: 0.3237, Train Steps/Sec: 17.15, Grad Norm: 0.0357 +[2025-02-23 05:15:33] (step=0425600) Train Loss: 0.3239, Train Steps/Sec: 17.05, Grad Norm: 0.0395 +[2025-02-23 05:15:39] (step=0425700) Train Loss: 0.3233, Train Steps/Sec: 15.81, Grad Norm: 0.0371 +[2025-02-23 05:15:46] (step=0425800) Train Loss: 0.3226, Train Steps/Sec: 15.30, Grad Norm: 0.0339 +[2025-02-23 05:15:52] (step=0425900) Train Loss: 0.3232, Train Steps/Sec: 16.58, Grad Norm: 0.0390 +[2025-02-23 05:15:58] (step=0426000) Train Loss: 0.3234, Train Steps/Sec: 16.61, Grad Norm: 0.0352 +[2025-02-23 05:16:03] (step=0426100) Train Loss: 0.3233, Train Steps/Sec: 17.28, Grad Norm: 0.0359 +[2025-02-23 05:16:09] (step=0426200) Train Loss: 0.3228, Train Steps/Sec: 17.29, Grad Norm: 0.0404 +[2025-02-23 05:16:15] (step=0426300) Train Loss: 0.3231, Train Steps/Sec: 17.35, Grad Norm: 0.0358 +[2025-02-23 05:16:21] (step=0426400) Train Loss: 0.3233, Train Steps/Sec: 15.25, Grad Norm: 0.0365 +[2025-02-23 05:16:28] (step=0426500) Train Loss: 0.3240, Train Steps/Sec: 16.54, Grad Norm: 0.0389 +[2025-02-23 05:16:35] (step=0426600) Train Loss: 0.3234, Train Steps/Sec: 12.89, Grad Norm: 0.0365 +[2025-02-23 05:16:41] (step=0426700) Train Loss: 0.3234, Train Steps/Sec: 16.52, Grad Norm: 0.0353 +[2025-02-23 05:16:47] (step=0426800) Train Loss: 0.3236, Train Steps/Sec: 17.29, Grad Norm: 0.0383 +[2025-02-23 05:16:53] (step=0426900) Train Loss: 0.3228, Train Steps/Sec: 17.34, Grad Norm: 0.0339 +[2025-02-23 05:16:59] (step=0427000) Train Loss: 0.3233, Train Steps/Sec: 16.68, Grad Norm: 0.0376 +[2025-02-23 05:17:05] (step=0427100) Train Loss: 0.3229, Train Steps/Sec: 17.39, Grad Norm: 0.0391 +[2025-02-23 05:17:10] (step=0427200) Train Loss: 0.3244, Train Steps/Sec: 17.35, Grad Norm: 0.0357 +[2025-02-23 05:17:16] (step=0427300) Train Loss: 0.3233, Train Steps/Sec: 17.36, Grad Norm: 0.0334 +[2025-02-23 05:17:22] (step=0427400) Train Loss: 0.3232, Train Steps/Sec: 17.38, Grad Norm: 0.0363 +[2025-02-23 05:17:28] (step=0427500) Train Loss: 0.3230, Train Steps/Sec: 17.40, Grad Norm: 0.0360 +[2025-02-23 05:17:33] (step=0427600) Train Loss: 0.3235, Train Steps/Sec: 17.40, Grad Norm: 0.0372 +[2025-02-23 05:17:40] (step=0427700) Train Loss: 0.3228, Train Steps/Sec: 15.27, Grad Norm: 0.0410 +[2025-02-23 05:17:46] (step=0427800) Train Loss: 0.3231, Train Steps/Sec: 15.94, Grad Norm: 0.0380 +[2025-02-23 05:17:54] (step=0427900) Train Loss: 0.3233, Train Steps/Sec: 13.47, Grad Norm: 0.0370 +[2025-02-23 05:18:00] (step=0428000) Train Loss: 0.3235, Train Steps/Sec: 17.20, Grad Norm: 0.0356 +[2025-02-23 05:18:05] (step=0428100) Train Loss: 0.3225, Train Steps/Sec: 17.21, Grad Norm: 0.0375 +[2025-02-23 05:18:11] (step=0428200) Train Loss: 0.3232, Train Steps/Sec: 17.21, Grad Norm: 0.0367 +[2025-02-23 05:18:17] (step=0428300) Train Loss: 0.3232, Train Steps/Sec: 17.23, Grad Norm: 0.0380 +[2025-02-23 05:18:24] (step=0428400) Train Loss: 0.3236, Train Steps/Sec: 15.16, Grad Norm: 0.0404 +[2025-02-23 05:18:30] (step=0428500) Train Loss: 0.3233, Train Steps/Sec: 16.45, Grad Norm: 0.0397 +[2025-02-23 05:18:36] (step=0428600) Train Loss: 0.3230, Train Steps/Sec: 15.14, Grad Norm: 0.0368 +[2025-02-23 05:18:42] (step=0428700) Train Loss: 0.3230, Train Steps/Sec: 16.49, Grad Norm: 0.0351 +[2025-02-23 05:18:48] (step=0428800) Train Loss: 0.3226, Train Steps/Sec: 17.21, Grad Norm: 0.0346 +[2025-02-23 05:18:54] (step=0428900) Train Loss: 0.3233, Train Steps/Sec: 17.27, Grad Norm: 0.0391 +[2025-02-23 05:19:00] (step=0429000) Train Loss: 0.3230, Train Steps/Sec: 16.60, Grad Norm: 0.0382 +[2025-02-23 05:19:07] (step=0429100) Train Loss: 0.3242, Train Steps/Sec: 14.62, Grad Norm: 0.0408 +[2025-02-23 05:19:13] (step=0429200) Train Loss: 0.3238, Train Steps/Sec: 17.34, Grad Norm: 0.0355 +[2025-02-23 05:19:18] (step=0429300) Train Loss: 0.3235, Train Steps/Sec: 17.39, Grad Norm: 0.0371 +[2025-02-23 05:19:24] (step=0429400) Train Loss: 0.3234, Train Steps/Sec: 17.36, Grad Norm: 0.0382 +[2025-02-23 05:19:30] (step=0429500) Train Loss: 0.3235, Train Steps/Sec: 17.38, Grad Norm: 0.0382 +[2025-02-23 05:19:36] (step=0429600) Train Loss: 0.3229, Train Steps/Sec: 15.95, Grad Norm: 0.0360 +[2025-02-23 05:19:43] (step=0429700) Train Loss: 0.3236, Train Steps/Sec: 15.33, Grad Norm: 0.0404 +[2025-02-23 05:19:49] (step=0429800) Train Loss: 0.3231, Train Steps/Sec: 16.53, Grad Norm: 0.0386 +[2025-02-23 05:19:55] (step=0429900) Train Loss: 0.3230, Train Steps/Sec: 16.58, Grad Norm: 0.0384 +[2025-02-23 05:20:00] (step=0430000) Train Loss: 0.3232, Train Steps/Sec: 17.26, Grad Norm: 0.0406 +[2025-02-23 05:20:06] (step=0430100) Train Loss: 0.3226, Train Steps/Sec: 17.18, Grad Norm: 0.0357 +[2025-02-23 05:20:12] (step=0430200) Train Loss: 0.3228, Train Steps/Sec: 17.23, Grad Norm: 0.0393 +[2025-02-23 05:20:18] (step=0430300) Train Loss: 0.3233, Train Steps/Sec: 17.18, Grad Norm: 0.0382 +[2025-02-23 05:20:26] (step=0430400) Train Loss: 0.3229, Train Steps/Sec: 12.96, Grad Norm: 0.0400 +[2025-02-23 05:20:32] (step=0430500) Train Loss: 0.3233, Train Steps/Sec: 16.31, Grad Norm: 0.0360 +[2025-02-23 05:20:38] (step=0430600) Train Loss: 0.3231, Train Steps/Sec: 15.08, Grad Norm: 0.0359 +[2025-02-23 05:20:45] (step=0430700) Train Loss: 0.3237, Train Steps/Sec: 16.43, Grad Norm: 0.0390 +[2025-02-23 05:20:50] (step=0430800) Train Loss: 0.3231, Train Steps/Sec: 17.21, Grad Norm: 0.0398 +[2025-02-23 05:20:56] (step=0430900) Train Loss: 0.3230, Train Steps/Sec: 16.36, Grad Norm: 0.0396 +[2025-02-23 05:21:02] (step=0431000) Train Loss: 0.3232, Train Steps/Sec: 17.13, Grad Norm: 0.0359 +[2025-02-23 05:21:08] (step=0431100) Train Loss: 0.3228, Train Steps/Sec: 17.10, Grad Norm: 0.0360 +[2025-02-23 05:21:14] (step=0431200) Train Loss: 0.3230, Train Steps/Sec: 17.12, Grad Norm: 0.0366 +[2025-02-23 05:21:20] (step=0431300) Train Loss: 0.3234, Train Steps/Sec: 17.11, Grad Norm: 0.0350 +[2025-02-23 05:21:26] (step=0431400) Train Loss: 0.3234, Train Steps/Sec: 17.08, Grad Norm: 0.0379 +[2025-02-23 05:21:32] (step=0431500) Train Loss: 0.3236, Train Steps/Sec: 16.41, Grad Norm: 0.0407 +[2025-02-23 05:21:39] (step=0431600) Train Loss: 0.3235, Train Steps/Sec: 12.93, Grad Norm: 0.0371 +[2025-02-23 05:21:46] (step=0431700) Train Loss: 0.3233, Train Steps/Sec: 16.38, Grad Norm: 0.0374 +[2025-02-23 05:21:52] (step=0431800) Train Loss: 0.3229, Train Steps/Sec: 15.74, Grad Norm: 0.0369 +[2025-02-23 05:21:58] (step=0431900) Train Loss: 0.3236, Train Steps/Sec: 17.19, Grad Norm: 0.0381 +[2025-02-23 05:22:04] (step=0432000) Train Loss: 0.3232, Train Steps/Sec: 17.21, Grad Norm: 0.0385 +[2025-02-23 05:22:09] (step=0432100) Train Loss: 0.3227, Train Steps/Sec: 17.29, Grad Norm: 0.0390 +[2025-02-23 05:22:15] (step=0432200) Train Loss: 0.3233, Train Steps/Sec: 17.31, Grad Norm: 0.0361 +[2025-02-23 05:22:21] (step=0432300) Train Loss: 0.3230, Train Steps/Sec: 17.32, Grad Norm: 0.0394 +[2025-02-23 05:22:28] (step=0432400) Train Loss: 0.3234, Train Steps/Sec: 15.18, Grad Norm: 0.0358 +[2025-02-23 05:22:33] (step=0432500) Train Loss: 0.3233, Train Steps/Sec: 17.32, Grad Norm: 0.0377 +[2025-02-23 05:22:40] (step=0432600) Train Loss: 0.3233, Train Steps/Sec: 14.57, Grad Norm: 0.0376 +[2025-02-23 05:22:46] (step=0432700) Train Loss: 0.3232, Train Steps/Sec: 17.35, Grad Norm: 0.0370 +[2025-02-23 05:22:52] (step=0432800) Train Loss: 0.3235, Train Steps/Sec: 15.83, Grad Norm: 0.0353 +[2025-02-23 05:22:59] (step=0432900) Train Loss: 0.3239, Train Steps/Sec: 14.26, Grad Norm: 0.0365 +[2025-02-23 05:23:05] (step=0433000) Train Loss: 0.3232, Train Steps/Sec: 17.31, Grad Norm: 0.0358 +[2025-02-23 05:23:11] (step=0433100) Train Loss: 0.3232, Train Steps/Sec: 17.38, Grad Norm: 0.0389 +[2025-02-23 05:23:17] (step=0433200) Train Loss: 0.3232, Train Steps/Sec: 17.39, Grad Norm: 0.0386 +[2025-02-23 05:23:22] (step=0433300) Train Loss: 0.3232, Train Steps/Sec: 17.30, Grad Norm: 0.0389 +[2025-02-23 05:23:28] (step=0433400) Train Loss: 0.3232, Train Steps/Sec: 17.31, Grad Norm: 0.0351 +[2025-02-23 05:23:34] (step=0433500) Train Loss: 0.3233, Train Steps/Sec: 15.89, Grad Norm: 0.0342 +[2025-02-23 05:23:41] (step=0433600) Train Loss: 0.3237, Train Steps/Sec: 15.17, Grad Norm: 0.0332 +[2025-02-23 05:23:47] (step=0433700) Train Loss: 0.3227, Train Steps/Sec: 16.41, Grad Norm: 0.0379 +[2025-02-23 05:23:53] (step=0433800) Train Loss: 0.3234, Train Steps/Sec: 16.34, Grad Norm: 0.0355 +[2025-02-23 05:23:59] (step=0433900) Train Loss: 0.3229, Train Steps/Sec: 17.22, Grad Norm: 0.0457 +[2025-02-23 05:24:05] (step=0434000) Train Loss: 0.3235, Train Steps/Sec: 17.29, Grad Norm: 0.0349 +[2025-02-23 05:24:12] (step=0434100) Train Loss: 0.3233, Train Steps/Sec: 14.24, Grad Norm: 0.0361 +[2025-02-23 05:24:18] (step=0434200) Train Loss: 0.3240, Train Steps/Sec: 17.17, Grad Norm: 0.0364 +[2025-02-23 05:24:23] (step=0434300) Train Loss: 0.3227, Train Steps/Sec: 17.28, Grad Norm: 0.0380 +[2025-02-23 05:24:30] (step=0434400) Train Loss: 0.3226, Train Steps/Sec: 15.20, Grad Norm: 0.0378 +[2025-02-23 05:24:36] (step=0434500) Train Loss: 0.3236, Train Steps/Sec: 17.33, Grad Norm: 0.0378 +[2025-02-23 05:24:43] (step=0434600) Train Loss: 0.3236, Train Steps/Sec: 14.54, Grad Norm: 0.0353 +[2025-02-23 05:24:48] (step=0434700) Train Loss: 0.3227, Train Steps/Sec: 17.28, Grad Norm: 0.0349 +[2025-02-23 05:24:55] (step=0434800) Train Loss: 0.3233, Train Steps/Sec: 15.93, Grad Norm: 0.0348 +[2025-02-23 05:25:01] (step=0434900) Train Loss: 0.3231, Train Steps/Sec: 17.30, Grad Norm: 0.0419 +[2025-02-23 05:25:06] (step=0435000) Train Loss: 0.3232, Train Steps/Sec: 17.23, Grad Norm: 0.0350 +[2025-02-23 05:25:12] (step=0435100) Train Loss: 0.3237, Train Steps/Sec: 17.25, Grad Norm: 0.0397 +[2025-02-23 05:25:18] (step=0435200) Train Loss: 0.3228, Train Steps/Sec: 17.26, Grad Norm: 0.0356 +[2025-02-23 05:25:24] (step=0435300) Train Loss: 0.3236, Train Steps/Sec: 17.31, Grad Norm: 0.0387 +[2025-02-23 05:25:31] (step=0435400) Train Loss: 0.3228, Train Steps/Sec: 13.20, Grad Norm: 0.0374 +[2025-02-23 05:25:38] (step=0435500) Train Loss: 0.3230, Train Steps/Sec: 15.16, Grad Norm: 0.0358 +[2025-02-23 05:25:44] (step=0435600) Train Loss: 0.3238, Train Steps/Sec: 16.44, Grad Norm: 0.0353 +[2025-02-23 05:25:50] (step=0435700) Train Loss: 0.3229, Train Steps/Sec: 16.54, Grad Norm: 0.0352 +[2025-02-23 05:25:56] (step=0435800) Train Loss: 0.3231, Train Steps/Sec: 17.25, Grad Norm: 0.0398 +[2025-02-23 05:26:02] (step=0435900) Train Loss: 0.3233, Train Steps/Sec: 17.18, Grad Norm: 0.0352 +[2025-02-23 05:26:07] (step=0436000) Train Loss: 0.3230, Train Steps/Sec: 17.15, Grad Norm: 0.0366 +[2025-02-23 05:26:13] (step=0436100) Train Loss: 0.3235, Train Steps/Sec: 17.15, Grad Norm: 0.0376 +[2025-02-23 05:26:19] (step=0436200) Train Loss: 0.3237, Train Steps/Sec: 17.08, Grad Norm: 0.0378 +[2025-02-23 05:26:25] (step=0436300) Train Loss: 0.3236, Train Steps/Sec: 17.14, Grad Norm: 0.0376 +[2025-02-23 05:26:32] (step=0436400) Train Loss: 0.3235, Train Steps/Sec: 15.03, Grad Norm: 0.0359 +[2025-02-23 05:26:37] (step=0436500) Train Loss: 0.3235, Train Steps/Sec: 17.15, Grad Norm: 0.0379 +[2025-02-23 05:26:46] (step=0436600) Train Loss: 0.3236, Train Steps/Sec: 12.38, Grad Norm: 0.0364 +[2025-02-23 05:26:52] (step=0436700) Train Loss: 0.3228, Train Steps/Sec: 15.61, Grad Norm: 0.0370 +[2025-02-23 05:26:58] (step=0436800) Train Loss: 0.3231, Train Steps/Sec: 17.16, Grad Norm: 0.0378 +[2025-02-23 05:27:04] (step=0436900) Train Loss: 0.3235, Train Steps/Sec: 17.22, Grad Norm: 0.0350 +[2025-02-23 05:27:09] (step=0437000) Train Loss: 0.3231, Train Steps/Sec: 17.14, Grad Norm: 0.0354 +[2025-02-23 05:27:15] (step=0437100) Train Loss: 0.3232, Train Steps/Sec: 17.19, Grad Norm: 0.0372 +[2025-02-23 05:27:21] (step=0437200) Train Loss: 0.3238, Train Steps/Sec: 17.17, Grad Norm: 0.0364 +[2025-02-23 05:27:27] (step=0437300) Train Loss: 0.3227, Train Steps/Sec: 17.19, Grad Norm: 0.0377 +[2025-02-23 05:27:33] (step=0437400) Train Loss: 0.3234, Train Steps/Sec: 15.16, Grad Norm: 0.0360 +[2025-02-23 05:27:40] (step=0437500) Train Loss: 0.3233, Train Steps/Sec: 15.80, Grad Norm: 0.0343 +[2025-02-23 05:27:46] (step=0437600) Train Loss: 0.3228, Train Steps/Sec: 16.45, Grad Norm: 0.0384 +[2025-02-23 05:27:52] (step=0437700) Train Loss: 0.3232, Train Steps/Sec: 16.34, Grad Norm: 0.0341 +[2025-02-23 05:27:58] (step=0437800) Train Loss: 0.3226, Train Steps/Sec: 17.22, Grad Norm: 0.0349 +[2025-02-23 05:28:05] (step=0437900) Train Loss: 0.3234, Train Steps/Sec: 14.32, Grad Norm: 0.0358 +[2025-02-23 05:28:11] (step=0438000) Train Loss: 0.3229, Train Steps/Sec: 17.22, Grad Norm: 0.0397 +[2025-02-23 05:28:16] (step=0438100) Train Loss: 0.3230, Train Steps/Sec: 17.19, Grad Norm: 0.0372 +[2025-02-23 05:28:22] (step=0438200) Train Loss: 0.3233, Train Steps/Sec: 17.18, Grad Norm: 0.0382 +[2025-02-23 05:28:28] (step=0438300) Train Loss: 0.3232, Train Steps/Sec: 17.17, Grad Norm: 0.0373 +[2025-02-23 05:28:34] (step=0438400) Train Loss: 0.3235, Train Steps/Sec: 15.70, Grad Norm: 0.0369 +[2025-02-23 05:28:41] (step=0438500) Train Loss: 0.3227, Train Steps/Sec: 16.42, Grad Norm: 0.0384 +[2025-02-23 05:28:47] (step=0438600) Train Loss: 0.3225, Train Steps/Sec: 14.50, Grad Norm: 0.0369 +[2025-02-23 05:28:54] (step=0438700) Train Loss: 0.3229, Train Steps/Sec: 16.50, Grad Norm: 0.0384 +[2025-02-23 05:29:00] (step=0438800) Train Loss: 0.3233, Train Steps/Sec: 16.53, Grad Norm: 0.0361 +[2025-02-23 05:29:05] (step=0438900) Train Loss: 0.3230, Train Steps/Sec: 17.29, Grad Norm: 0.0375 +[2025-02-23 05:29:11] (step=0439000) Train Loss: 0.3230, Train Steps/Sec: 17.27, Grad Norm: 0.0384 +[2025-02-23 05:29:17] (step=0439100) Train Loss: 0.3233, Train Steps/Sec: 17.21, Grad Norm: 0.0388 +[2025-02-23 05:29:24] (step=0439200) Train Loss: 0.3228, Train Steps/Sec: 14.50, Grad Norm: 0.0367 +[2025-02-23 05:29:30] (step=0439300) Train Loss: 0.3234, Train Steps/Sec: 15.77, Grad Norm: 0.0373 +[2025-02-23 05:29:37] (step=0439400) Train Loss: 0.3229, Train Steps/Sec: 15.28, Grad Norm: 0.0387 +[2025-02-23 05:29:43] (step=0439500) Train Loss: 0.3233, Train Steps/Sec: 16.44, Grad Norm: 0.0371 +[2025-02-23 05:29:49] (step=0439600) Train Loss: 0.3235, Train Steps/Sec: 16.49, Grad Norm: 0.0350 +[2025-02-23 05:29:55] (step=0439700) Train Loss: 0.3225, Train Steps/Sec: 17.23, Grad Norm: 0.0380 +[2025-02-23 05:30:01] (step=0439800) Train Loss: 0.3238, Train Steps/Sec: 17.20, Grad Norm: 0.0365 +[2025-02-23 05:30:06] (step=0439900) Train Loss: 0.3233, Train Steps/Sec: 17.27, Grad Norm: 0.0356 +[2025-02-23 05:30:12] (step=0440000) Train Loss: 0.3226, Train Steps/Sec: 17.27, Grad Norm: 0.0386 +[2025-02-23 05:30:18] (step=0440100) Train Loss: 0.3235, Train Steps/Sec: 17.24, Grad Norm: 0.0356 +[2025-02-23 05:30:24] (step=0440200) Train Loss: 0.3230, Train Steps/Sec: 17.18, Grad Norm: 0.0377 +[2025-02-23 05:30:30] (step=0440300) Train Loss: 0.3238, Train Steps/Sec: 17.24, Grad Norm: 0.0389 +[2025-02-23 05:30:37] (step=0440400) Train Loss: 0.3233, Train Steps/Sec: 13.46, Grad Norm: 0.0388 +[2025-02-23 05:30:43] (step=0440500) Train Loss: 0.3233, Train Steps/Sec: 16.36, Grad Norm: 0.0391 +[2025-02-23 05:30:50] (step=0440600) Train Loss: 0.3231, Train Steps/Sec: 14.45, Grad Norm: 0.0377 +[2025-02-23 05:30:56] (step=0440700) Train Loss: 0.3228, Train Steps/Sec: 16.44, Grad Norm: 0.0392 +[2025-02-23 05:31:02] (step=0440800) Train Loss: 0.3229, Train Steps/Sec: 16.51, Grad Norm: 0.0368 +[2025-02-23 05:31:08] (step=0440900) Train Loss: 0.3230, Train Steps/Sec: 17.25, Grad Norm: 0.0382 +[2025-02-23 05:31:14] (step=0441000) Train Loss: 0.3232, Train Steps/Sec: 17.25, Grad Norm: 0.0350 +[2025-02-23 05:31:20] (step=0441100) Train Loss: 0.3233, Train Steps/Sec: 17.28, Grad Norm: 0.0351 +[2025-02-23 05:31:25] (step=0441200) Train Loss: 0.3230, Train Steps/Sec: 17.29, Grad Norm: 0.0388 +[2025-02-23 05:31:32] (step=0441300) Train Loss: 0.3236, Train Steps/Sec: 15.26, Grad Norm: 0.0358 +[2025-02-23 05:31:38] (step=0441400) Train Loss: 0.3232, Train Steps/Sec: 15.97, Grad Norm: 0.0405 +[2025-02-23 05:31:44] (step=0441500) Train Loss: 0.3233, Train Steps/Sec: 15.93, Grad Norm: 0.0386 +[2025-02-23 05:31:50] (step=0441600) Train Loss: 0.3228, Train Steps/Sec: 17.26, Grad Norm: 0.0383 +[2025-02-23 05:31:57] (step=0441700) Train Loss: 0.3233, Train Steps/Sec: 14.53, Grad Norm: 0.0373 +[2025-02-23 05:32:03] (step=0441800) Train Loss: 0.3229, Train Steps/Sec: 17.35, Grad Norm: 0.0369 +[2025-02-23 05:32:09] (step=0441900) Train Loss: 0.3233, Train Steps/Sec: 17.38, Grad Norm: 0.0348 +[2025-02-23 05:32:14] (step=0442000) Train Loss: 0.3234, Train Steps/Sec: 17.41, Grad Norm: 0.0365 +[2025-02-23 05:32:20] (step=0442100) Train Loss: 0.3233, Train Steps/Sec: 17.45, Grad Norm: 0.0394 +[2025-02-23 05:32:26] (step=0442200) Train Loss: 0.3234, Train Steps/Sec: 17.44, Grad Norm: 0.0347 +[2025-02-23 05:32:32] (step=0442300) Train Loss: 0.3234, Train Steps/Sec: 17.38, Grad Norm: 0.0365 +[2025-02-23 05:32:38] (step=0442400) Train Loss: 0.3236, Train Steps/Sec: 15.88, Grad Norm: 0.0379 +[2025-02-23 05:32:44] (step=0442500) Train Loss: 0.3227, Train Steps/Sec: 16.61, Grad Norm: 0.0380 +[2025-02-23 05:32:50] (step=0442600) Train Loss: 0.3230, Train Steps/Sec: 15.18, Grad Norm: 0.0398 +[2025-02-23 05:32:57] (step=0442700) Train Loss: 0.3230, Train Steps/Sec: 15.95, Grad Norm: 0.0389 +[2025-02-23 05:33:03] (step=0442800) Train Loss: 0.3228, Train Steps/Sec: 16.66, Grad Norm: 0.0361 +[2025-02-23 05:33:10] (step=0442900) Train Loss: 0.3233, Train Steps/Sec: 14.51, Grad Norm: 0.0388 +[2025-02-23 05:33:15] (step=0443000) Train Loss: 0.3235, Train Steps/Sec: 17.24, Grad Norm: 0.0419 +[2025-02-23 05:33:21] (step=0443100) Train Loss: 0.3230, Train Steps/Sec: 17.28, Grad Norm: 0.0344 +[2025-02-23 05:33:28] (step=0443200) Train Loss: 0.3229, Train Steps/Sec: 15.86, Grad Norm: 0.0379 +[2025-02-23 05:33:34] (step=0443300) Train Loss: 0.3231, Train Steps/Sec: 15.39, Grad Norm: 0.0347 +[2025-02-23 05:33:40] (step=0443400) Train Loss: 0.3230, Train Steps/Sec: 16.61, Grad Norm: 0.0359 +[2025-02-23 05:33:46] (step=0443500) Train Loss: 0.3237, Train Steps/Sec: 16.66, Grad Norm: 0.0393 +[2025-02-23 05:33:52] (step=0443600) Train Loss: 0.3225, Train Steps/Sec: 17.40, Grad Norm: 0.0350 +[2025-02-23 05:33:58] (step=0443700) Train Loss: 0.3228, Train Steps/Sec: 17.32, Grad Norm: 0.0379 +[2025-02-23 05:34:03] (step=0443800) Train Loss: 0.3230, Train Steps/Sec: 17.32, Grad Norm: 0.0378 +[2025-02-23 05:34:09] (step=0443900) Train Loss: 0.3234, Train Steps/Sec: 17.36, Grad Norm: 0.0355 +[2025-02-23 05:34:15] (step=0444000) Train Loss: 0.3228, Train Steps/Sec: 17.38, Grad Norm: 0.0370 +[2025-02-23 05:34:21] (step=0444100) Train Loss: 0.3229, Train Steps/Sec: 17.30, Grad Norm: 0.0412 +[2025-02-23 05:34:28] (step=0444200) Train Loss: 0.3235, Train Steps/Sec: 14.61, Grad Norm: 0.0347 +[2025-02-23 05:34:33] (step=0444300) Train Loss: 0.3230, Train Steps/Sec: 17.35, Grad Norm: 0.0346 +[2025-02-23 05:34:40] (step=0444400) Train Loss: 0.3230, Train Steps/Sec: 15.91, Grad Norm: 0.0381 +[2025-02-23 05:34:46] (step=0444500) Train Loss: 0.3236, Train Steps/Sec: 15.92, Grad Norm: 0.0361 +[2025-02-23 05:34:52] (step=0444600) Train Loss: 0.3232, Train Steps/Sec: 15.26, Grad Norm: 0.0379 +[2025-02-23 05:34:58] (step=0444700) Train Loss: 0.3231, Train Steps/Sec: 16.64, Grad Norm: 0.0367 +[2025-02-23 05:35:04] (step=0444800) Train Loss: 0.3231, Train Steps/Sec: 16.64, Grad Norm: 0.0360 +[2025-02-23 05:35:10] (step=0444900) Train Loss: 0.3231, Train Steps/Sec: 17.45, Grad Norm: 0.0422 +[2025-02-23 05:35:16] (step=0445000) Train Loss: 0.3228, Train Steps/Sec: 17.45, Grad Norm: 0.0363 +[2025-02-23 05:35:22] (step=0445100) Train Loss: 0.3229, Train Steps/Sec: 17.40, Grad Norm: 0.0354 +[2025-02-23 05:35:28] (step=0445200) Train Loss: 0.3231, Train Steps/Sec: 14.81, Grad Norm: 0.0371 +[2025-02-23 05:35:34] (step=0445300) Train Loss: 0.3232, Train Steps/Sec: 16.64, Grad Norm: 0.0377 +[2025-02-23 05:35:42] (step=0445400) Train Loss: 0.3236, Train Steps/Sec: 13.44, Grad Norm: 0.0377 +[2025-02-23 05:35:48] (step=0445500) Train Loss: 0.3231, Train Steps/Sec: 17.27, Grad Norm: 0.0371 +[2025-02-23 05:35:53] (step=0445600) Train Loss: 0.3228, Train Steps/Sec: 17.30, Grad Norm: 0.0406 +[2025-02-23 05:35:59] (step=0445700) Train Loss: 0.3230, Train Steps/Sec: 17.32, Grad Norm: 0.0405 +[2025-02-23 05:36:05] (step=0445800) Train Loss: 0.3230, Train Steps/Sec: 17.32, Grad Norm: 0.0353 +[2025-02-23 05:36:11] (step=0445900) Train Loss: 0.3230, Train Steps/Sec: 17.42, Grad Norm: 0.0375 +[2025-02-23 05:36:16] (step=0446000) Train Loss: 0.3235, Train Steps/Sec: 17.37, Grad Norm: 0.0356 +[2025-02-23 05:36:22] (step=0446100) Train Loss: 0.3234, Train Steps/Sec: 17.37, Grad Norm: 0.0367 +[2025-02-23 05:36:28] (step=0446200) Train Loss: 0.3238, Train Steps/Sec: 17.34, Grad Norm: 0.0359 +[2025-02-23 05:36:34] (step=0446300) Train Loss: 0.3233, Train Steps/Sec: 17.39, Grad Norm: 0.0410 +[2025-02-23 05:36:40] (step=0446400) Train Loss: 0.3238, Train Steps/Sec: 15.96, Grad Norm: 0.0353 +[2025-02-23 05:36:46] (step=0446500) Train Loss: 0.3238, Train Steps/Sec: 15.94, Grad Norm: 0.0369 +[2025-02-23 05:36:53] (step=0446600) Train Loss: 0.3230, Train Steps/Sec: 15.80, Grad Norm: 0.0365 +[2025-02-23 05:37:00] (step=0446700) Train Loss: 0.3226, Train Steps/Sec: 13.97, Grad Norm: 0.0381 +[2025-02-23 05:37:06] (step=0446800) Train Loss: 0.3227, Train Steps/Sec: 16.48, Grad Norm: 0.0356 +[2025-02-23 05:37:12] (step=0446900) Train Loss: 0.3230, Train Steps/Sec: 17.19, Grad Norm: 0.0367 +[2025-02-23 05:37:17] (step=0447000) Train Loss: 0.3230, Train Steps/Sec: 17.25, Grad Norm: 0.0371 +[2025-02-23 05:37:24] (step=0447100) Train Loss: 0.3231, Train Steps/Sec: 15.27, Grad Norm: 0.0368 +[2025-02-23 05:37:30] (step=0447200) Train Loss: 0.3232, Train Steps/Sec: 15.89, Grad Norm: 0.0347 +[2025-02-23 05:37:36] (step=0447300) Train Loss: 0.3234, Train Steps/Sec: 16.51, Grad Norm: 0.0362 +[2025-02-23 05:37:42] (step=0447400) Train Loss: 0.3235, Train Steps/Sec: 16.47, Grad Norm: 0.0368 +[2025-02-23 05:37:48] (step=0447500) Train Loss: 0.3234, Train Steps/Sec: 17.29, Grad Norm: 0.0343 +[2025-02-23 05:37:54] (step=0447600) Train Loss: 0.3227, Train Steps/Sec: 17.34, Grad Norm: 0.0384 +[2025-02-23 05:38:00] (step=0447700) Train Loss: 0.3232, Train Steps/Sec: 17.31, Grad Norm: 0.0352 +[2025-02-23 05:38:06] (step=0447800) Train Loss: 0.3229, Train Steps/Sec: 17.31, Grad Norm: 0.0390 +[2025-02-23 05:38:12] (step=0447900) Train Loss: 0.3233, Train Steps/Sec: 14.43, Grad Norm: 0.0341 +[2025-02-23 05:38:18] (step=0448000) Train Loss: 0.3232, Train Steps/Sec: 17.32, Grad Norm: 0.0367 +[2025-02-23 05:38:24] (step=0448100) Train Loss: 0.3225, Train Steps/Sec: 17.30, Grad Norm: 0.0362 +[2025-02-23 05:38:30] (step=0448200) Train Loss: 0.3236, Train Steps/Sec: 17.30, Grad Norm: 0.0383 +[2025-02-23 05:38:36] (step=0448300) Train Loss: 0.3235, Train Steps/Sec: 17.31, Grad Norm: 0.0385 +[2025-02-23 05:38:42] (step=0448400) Train Loss: 0.3236, Train Steps/Sec: 15.79, Grad Norm: 0.0386 +[2025-02-23 05:38:48] (step=0448500) Train Loss: 0.3228, Train Steps/Sec: 15.83, Grad Norm: 0.0374 +[2025-02-23 05:38:55] (step=0448600) Train Loss: 0.3240, Train Steps/Sec: 15.83, Grad Norm: 0.0397 +[2025-02-23 05:39:01] (step=0448700) Train Loss: 0.3228, Train Steps/Sec: 15.81, Grad Norm: 0.0395 +[2025-02-23 05:39:07] (step=0448800) Train Loss: 0.3232, Train Steps/Sec: 16.62, Grad Norm: 0.0374 +[2025-02-23 05:39:13] (step=0448900) Train Loss: 0.3229, Train Steps/Sec: 17.36, Grad Norm: 0.0370 +[2025-02-23 05:39:18] (step=0449000) Train Loss: 0.3229, Train Steps/Sec: 17.25, Grad Norm: 0.0365 +[2025-02-23 05:39:25] (step=0449100) Train Loss: 0.3232, Train Steps/Sec: 14.73, Grad Norm: 0.0387 +[2025-02-23 05:39:33] (step=0449200) Train Loss: 0.3228, Train Steps/Sec: 13.47, Grad Norm: 0.0339 +[2025-02-23 05:39:39] (step=0449300) Train Loss: 0.3230, Train Steps/Sec: 16.63, Grad Norm: 0.0359 +[2025-02-23 05:39:44] (step=0449400) Train Loss: 0.3231, Train Steps/Sec: 17.30, Grad Norm: 0.0347 +[2025-02-23 05:39:50] (step=0449500) Train Loss: 0.3231, Train Steps/Sec: 17.30, Grad Norm: 0.0367 +[2025-02-23 05:39:56] (step=0449600) Train Loss: 0.3229, Train Steps/Sec: 17.33, Grad Norm: 0.0345 +[2025-02-23 05:40:02] (step=0449700) Train Loss: 0.3232, Train Steps/Sec: 17.38, Grad Norm: 0.0363 +[2025-02-23 05:40:08] (step=0449800) Train Loss: 0.3231, Train Steps/Sec: 17.42, Grad Norm: 0.0392 +[2025-02-23 05:40:13] (step=0449900) Train Loss: 0.3230, Train Steps/Sec: 17.37, Grad Norm: 0.0341 +[2025-02-23 05:40:19] (step=0450000) Train Loss: 0.3233, Train Steps/Sec: 17.39, Grad Norm: 0.0419 +[2025-02-23 05:40:20] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0450000.pt +[2025-02-23 05:40:26] (step=0450100) Train Loss: 0.3231, Train Steps/Sec: 14.79, Grad Norm: 0.0334 +[2025-02-23 05:40:32] (step=0450200) Train Loss: 0.3237, Train Steps/Sec: 17.37, Grad Norm: 0.0387 +[2025-02-23 05:40:37] (step=0450300) Train Loss: 0.3232, Train Steps/Sec: 17.30, Grad Norm: 0.0379 +[2025-02-23 05:40:45] (step=0450400) Train Loss: 0.3229, Train Steps/Sec: 12.86, Grad Norm: 0.0338 +[2025-02-23 05:40:51] (step=0450500) Train Loss: 0.3233, Train Steps/Sec: 16.54, Grad Norm: 0.0398 +[2025-02-23 05:40:58] (step=0450600) Train Loss: 0.3233, Train Steps/Sec: 15.73, Grad Norm: 0.0396 +[2025-02-23 05:41:04] (step=0450700) Train Loss: 0.3230, Train Steps/Sec: 15.82, Grad Norm: 0.0354 +[2025-02-23 05:41:10] (step=0450800) Train Loss: 0.3231, Train Steps/Sec: 16.61, Grad Norm: 0.0366 +[2025-02-23 05:41:16] (step=0450900) Train Loss: 0.3229, Train Steps/Sec: 17.33, Grad Norm: 0.0354 +[2025-02-23 05:41:22] (step=0451000) Train Loss: 0.3232, Train Steps/Sec: 15.87, Grad Norm: 0.0404 +[2025-02-23 05:41:28] (step=0451100) Train Loss: 0.3232, Train Steps/Sec: 15.29, Grad Norm: 0.0371 +[2025-02-23 05:41:35] (step=0451200) Train Loss: 0.3222, Train Steps/Sec: 16.61, Grad Norm: 0.0364 +[2025-02-23 05:41:41] (step=0451300) Train Loss: 0.3226, Train Steps/Sec: 16.62, Grad Norm: 0.0377 +[2025-02-23 05:41:46] (step=0451400) Train Loss: 0.3232, Train Steps/Sec: 17.30, Grad Norm: 0.0371 +[2025-02-23 05:41:52] (step=0451500) Train Loss: 0.3237, Train Steps/Sec: 17.26, Grad Norm: 0.0368 +[2025-02-23 05:41:58] (step=0451600) Train Loss: 0.3232, Train Steps/Sec: 17.20, Grad Norm: 0.0382 +[2025-02-23 05:42:05] (step=0451700) Train Loss: 0.3226, Train Steps/Sec: 14.50, Grad Norm: 0.0397 +[2025-02-23 05:42:11] (step=0451800) Train Loss: 0.3232, Train Steps/Sec: 17.22, Grad Norm: 0.0361 +[2025-02-23 05:42:16] (step=0451900) Train Loss: 0.3234, Train Steps/Sec: 17.29, Grad Norm: 0.0432 +[2025-02-23 05:42:22] (step=0452000) Train Loss: 0.3231, Train Steps/Sec: 17.29, Grad Norm: 0.0349 +[2025-02-23 05:42:28] (step=0452100) Train Loss: 0.3235, Train Steps/Sec: 17.31, Grad Norm: 0.0333 +[2025-02-23 05:42:34] (step=0452200) Train Loss: 0.3228, Train Steps/Sec: 17.35, Grad Norm: 0.0358 +[2025-02-23 05:42:39] (step=0452300) Train Loss: 0.3234, Train Steps/Sec: 17.43, Grad Norm: 0.0428 +[2025-02-23 05:42:46] (step=0452400) Train Loss: 0.3237, Train Steps/Sec: 15.24, Grad Norm: 0.0343 +[2025-02-23 05:42:52] (step=0452500) Train Loss: 0.3231, Train Steps/Sec: 16.60, Grad Norm: 0.0375 +[2025-02-23 05:42:58] (step=0452600) Train Loss: 0.3234, Train Steps/Sec: 15.85, Grad Norm: 0.0344 +[2025-02-23 05:43:05] (step=0452700) Train Loss: 0.3236, Train Steps/Sec: 15.86, Grad Norm: 0.0388 +[2025-02-23 05:43:11] (step=0452800) Train Loss: 0.3231, Train Steps/Sec: 16.55, Grad Norm: 0.0365 +[2025-02-23 05:43:18] (step=0452900) Train Loss: 0.3234, Train Steps/Sec: 13.92, Grad Norm: 0.0328 +[2025-02-23 05:43:24] (step=0453000) Train Loss: 0.3225, Train Steps/Sec: 15.22, Grad Norm: 0.0385 +[2025-02-23 05:43:31] (step=0453100) Train Loss: 0.3228, Train Steps/Sec: 15.77, Grad Norm: 0.0342 +[2025-02-23 05:43:37] (step=0453200) Train Loss: 0.3234, Train Steps/Sec: 16.49, Grad Norm: 0.0376 +[2025-02-23 05:43:43] (step=0453300) Train Loss: 0.3230, Train Steps/Sec: 17.15, Grad Norm: 0.0396 +[2025-02-23 05:43:49] (step=0453400) Train Loss: 0.3233, Train Steps/Sec: 17.15, Grad Norm: 0.0362 +[2025-02-23 05:43:54] (step=0453500) Train Loss: 0.3232, Train Steps/Sec: 17.08, Grad Norm: 0.0358 +[2025-02-23 05:44:00] (step=0453600) Train Loss: 0.3230, Train Steps/Sec: 17.13, Grad Norm: 0.0396 +[2025-02-23 05:44:06] (step=0453700) Train Loss: 0.3229, Train Steps/Sec: 17.16, Grad Norm: 0.0390 +[2025-02-23 05:44:12] (step=0453800) Train Loss: 0.3227, Train Steps/Sec: 17.18, Grad Norm: 0.0392 +[2025-02-23 05:44:18] (step=0453900) Train Loss: 0.3236, Train Steps/Sec: 17.28, Grad Norm: 0.0362 +[2025-02-23 05:44:23] (step=0454000) Train Loss: 0.3227, Train Steps/Sec: 17.31, Grad Norm: 0.0368 +[2025-02-23 05:44:29] (step=0454100) Train Loss: 0.3232, Train Steps/Sec: 17.18, Grad Norm: 0.0380 +[2025-02-23 05:44:36] (step=0454200) Train Loss: 0.3226, Train Steps/Sec: 14.54, Grad Norm: 0.0392 +[2025-02-23 05:44:42] (step=0454300) Train Loss: 0.3235, Train Steps/Sec: 16.32, Grad Norm: 0.0363 +[2025-02-23 05:44:49] (step=0454400) Train Loss: 0.3233, Train Steps/Sec: 15.68, Grad Norm: 0.0375 +[2025-02-23 05:44:55] (step=0454500) Train Loss: 0.3230, Train Steps/Sec: 16.37, Grad Norm: 0.0354 +[2025-02-23 05:45:01] (step=0454600) Train Loss: 0.3234, Train Steps/Sec: 15.69, Grad Norm: 0.0366 +[2025-02-23 05:45:08] (step=0454700) Train Loss: 0.3231, Train Steps/Sec: 15.68, Grad Norm: 0.0355 +[2025-02-23 05:45:14] (step=0454800) Train Loss: 0.3231, Train Steps/Sec: 16.42, Grad Norm: 0.0365 +[2025-02-23 05:45:20] (step=0454900) Train Loss: 0.3232, Train Steps/Sec: 15.87, Grad Norm: 0.0362 +[2025-02-23 05:45:26] (step=0455000) Train Loss: 0.3226, Train Steps/Sec: 15.37, Grad Norm: 0.0373 +[2025-02-23 05:45:33] (step=0455100) Train Loss: 0.3231, Train Steps/Sec: 16.55, Grad Norm: 0.0386 +[2025-02-23 05:45:38] (step=0455200) Train Loss: 0.3231, Train Steps/Sec: 16.69, Grad Norm: 0.0369 +[2025-02-23 05:45:44] (step=0455300) Train Loss: 0.3232, Train Steps/Sec: 17.33, Grad Norm: 0.0347 +[2025-02-23 05:45:51] (step=0455400) Train Loss: 0.3228, Train Steps/Sec: 14.55, Grad Norm: 0.0390 +[2025-02-23 05:45:57] (step=0455500) Train Loss: 0.3227, Train Steps/Sec: 17.24, Grad Norm: 0.0369 +[2025-02-23 05:46:03] (step=0455600) Train Loss: 0.3229, Train Steps/Sec: 17.28, Grad Norm: 0.0371 +[2025-02-23 05:46:09] (step=0455700) Train Loss: 0.3233, Train Steps/Sec: 17.28, Grad Norm: 0.0382 +[2025-02-23 05:46:14] (step=0455800) Train Loss: 0.3236, Train Steps/Sec: 17.30, Grad Norm: 0.0357 +[2025-02-23 05:46:20] (step=0455900) Train Loss: 0.3232, Train Steps/Sec: 17.31, Grad Norm: 0.0374 +[2025-02-23 05:46:26] (step=0456000) Train Loss: 0.3232, Train Steps/Sec: 17.29, Grad Norm: 0.0374 +[2025-02-23 05:46:32] (step=0456100) Train Loss: 0.3223, Train Steps/Sec: 17.26, Grad Norm: 0.0367 +[2025-02-23 05:46:37] (step=0456200) Train Loss: 0.3230, Train Steps/Sec: 17.25, Grad Norm: 0.0397 +[2025-02-23 05:46:44] (step=0456300) Train Loss: 0.3231, Train Steps/Sec: 16.52, Grad Norm: 0.0355 +[2025-02-23 05:46:50] (step=0456400) Train Loss: 0.3227, Train Steps/Sec: 16.49, Grad Norm: 0.0369 +[2025-02-23 05:46:56] (step=0456500) Train Loss: 0.3229, Train Steps/Sec: 16.51, Grad Norm: 0.0435 +[2025-02-23 05:47:02] (step=0456600) Train Loss: 0.3228, Train Steps/Sec: 15.72, Grad Norm: 0.0336 +[2025-02-23 05:47:09] (step=0456700) Train Loss: 0.3226, Train Steps/Sec: 13.45, Grad Norm: 0.0389 +[2025-02-23 05:47:16] (step=0456800) Train Loss: 0.3226, Train Steps/Sec: 15.90, Grad Norm: 0.0348 +[2025-02-23 05:47:22] (step=0456900) Train Loss: 0.3227, Train Steps/Sec: 15.28, Grad Norm: 0.0381 +[2025-02-23 05:47:29] (step=0457000) Train Loss: 0.3237, Train Steps/Sec: 15.82, Grad Norm: 0.0352 +[2025-02-23 05:47:35] (step=0457100) Train Loss: 0.3237, Train Steps/Sec: 16.61, Grad Norm: 0.0382 +[2025-02-23 05:47:40] (step=0457200) Train Loss: 0.3227, Train Steps/Sec: 17.34, Grad Norm: 0.0352 +[2025-02-23 05:47:46] (step=0457300) Train Loss: 0.3234, Train Steps/Sec: 17.34, Grad Norm: 0.0370 +[2025-02-23 05:47:52] (step=0457400) Train Loss: 0.3233, Train Steps/Sec: 17.34, Grad Norm: 0.0386 +[2025-02-23 05:47:58] (step=0457500) Train Loss: 0.3228, Train Steps/Sec: 17.37, Grad Norm: 0.0366 +[2025-02-23 05:48:03] (step=0457600) Train Loss: 0.3235, Train Steps/Sec: 17.31, Grad Norm: 0.0381 +[2025-02-23 05:48:09] (step=0457700) Train Loss: 0.3226, Train Steps/Sec: 17.35, Grad Norm: 0.0381 +[2025-02-23 05:48:15] (step=0457800) Train Loss: 0.3234, Train Steps/Sec: 17.41, Grad Norm: 0.0400 +[2025-02-23 05:48:22] (step=0457900) Train Loss: 0.3236, Train Steps/Sec: 14.49, Grad Norm: 0.0361 +[2025-02-23 05:48:28] (step=0458000) Train Loss: 0.3229, Train Steps/Sec: 17.29, Grad Norm: 0.0445 +[2025-02-23 05:48:33] (step=0458100) Train Loss: 0.3233, Train Steps/Sec: 17.33, Grad Norm: 0.0342 +[2025-02-23 05:48:39] (step=0458200) Train Loss: 0.3231, Train Steps/Sec: 16.60, Grad Norm: 0.0403 +[2025-02-23 05:48:45] (step=0458300) Train Loss: 0.3229, Train Steps/Sec: 17.33, Grad Norm: 0.0380 +[2025-02-23 05:48:52] (step=0458400) Train Loss: 0.3230, Train Steps/Sec: 15.95, Grad Norm: 0.0339 +[2025-02-23 05:48:58] (step=0458500) Train Loss: 0.3226, Train Steps/Sec: 16.66, Grad Norm: 0.0354 +[2025-02-23 05:49:04] (step=0458600) Train Loss: 0.3231, Train Steps/Sec: 15.88, Grad Norm: 0.0373 +[2025-02-23 05:49:10] (step=0458700) Train Loss: 0.3236, Train Steps/Sec: 15.85, Grad Norm: 0.0349 +[2025-02-23 05:49:17] (step=0458800) Train Loss: 0.3236, Train Steps/Sec: 14.78, Grad Norm: 0.0390 +[2025-02-23 05:49:23] (step=0458900) Train Loss: 0.3230, Train Steps/Sec: 16.01, Grad Norm: 0.0382 +[2025-02-23 05:49:29] (step=0459000) Train Loss: 0.3231, Train Steps/Sec: 16.58, Grad Norm: 0.0342 +[2025-02-23 05:49:35] (step=0459100) Train Loss: 0.3231, Train Steps/Sec: 16.55, Grad Norm: 0.0363 +[2025-02-23 05:49:42] (step=0459200) Train Loss: 0.3235, Train Steps/Sec: 14.41, Grad Norm: 0.0363 +[2025-02-23 05:49:48] (step=0459300) Train Loss: 0.3227, Train Steps/Sec: 17.15, Grad Norm: 0.0385 +[2025-02-23 05:49:54] (step=0459400) Train Loss: 0.3230, Train Steps/Sec: 17.22, Grad Norm: 0.0357 +[2025-02-23 05:50:00] (step=0459500) Train Loss: 0.3232, Train Steps/Sec: 17.34, Grad Norm: 0.0361 +[2025-02-23 05:50:05] (step=0459600) Train Loss: 0.3227, Train Steps/Sec: 17.42, Grad Norm: 0.0377 +[2025-02-23 05:50:11] (step=0459700) Train Loss: 0.3232, Train Steps/Sec: 17.44, Grad Norm: 0.0373 +[2025-02-23 05:50:17] (step=0459800) Train Loss: 0.3237, Train Steps/Sec: 17.43, Grad Norm: 0.0360 +[2025-02-23 05:50:23] (step=0459900) Train Loss: 0.3227, Train Steps/Sec: 17.46, Grad Norm: 0.0367 +[2025-02-23 05:50:28] (step=0460000) Train Loss: 0.3238, Train Steps/Sec: 17.36, Grad Norm: 0.0355 +[2025-02-23 05:50:34] (step=0460100) Train Loss: 0.3228, Train Steps/Sec: 17.36, Grad Norm: 0.0400 +[2025-02-23 05:50:40] (step=0460200) Train Loss: 0.3233, Train Steps/Sec: 16.50, Grad Norm: 0.0356 +[2025-02-23 05:50:46] (step=0460300) Train Loss: 0.3227, Train Steps/Sec: 17.32, Grad Norm: 0.0392 +[2025-02-23 05:50:53] (step=0460400) Train Loss: 0.3233, Train Steps/Sec: 13.52, Grad Norm: 0.0356 +[2025-02-23 05:50:59] (step=0460500) Train Loss: 0.3227, Train Steps/Sec: 16.35, Grad Norm: 0.0373 +[2025-02-23 05:51:06] (step=0460600) Train Loss: 0.3234, Train Steps/Sec: 15.64, Grad Norm: 0.0365 +[2025-02-23 05:51:13] (step=0460700) Train Loss: 0.3231, Train Steps/Sec: 14.49, Grad Norm: 0.0379 +[2025-02-23 05:51:19] (step=0460800) Train Loss: 0.3231, Train Steps/Sec: 15.30, Grad Norm: 0.0363 +[2025-02-23 05:51:25] (step=0460900) Train Loss: 0.3230, Train Steps/Sec: 16.50, Grad Norm: 0.0380 +[2025-02-23 05:51:31] (step=0461000) Train Loss: 0.3230, Train Steps/Sec: 16.62, Grad Norm: 0.0379 +[2025-02-23 05:51:37] (step=0461100) Train Loss: 0.3229, Train Steps/Sec: 17.22, Grad Norm: 0.0397 +[2025-02-23 05:51:43] (step=0461200) Train Loss: 0.3235, Train Steps/Sec: 17.21, Grad Norm: 0.0387 +[2025-02-23 05:51:49] (step=0461300) Train Loss: 0.3230, Train Steps/Sec: 17.22, Grad Norm: 0.0362 +[2025-02-23 05:51:55] (step=0461400) Train Loss: 0.3230, Train Steps/Sec: 17.22, Grad Norm: 0.0371 +[2025-02-23 05:52:00] (step=0461500) Train Loss: 0.3228, Train Steps/Sec: 17.25, Grad Norm: 0.0368 +[2025-02-23 05:52:06] (step=0461600) Train Loss: 0.3227, Train Steps/Sec: 17.13, Grad Norm: 0.0373 +[2025-02-23 05:52:13] (step=0461700) Train Loss: 0.3228, Train Steps/Sec: 14.61, Grad Norm: 0.0370 +[2025-02-23 05:52:19] (step=0461800) Train Loss: 0.3229, Train Steps/Sec: 17.33, Grad Norm: 0.0358 +[2025-02-23 05:52:25] (step=0461900) Train Loss: 0.3228, Train Steps/Sec: 17.31, Grad Norm: 0.0348 +[2025-02-23 05:52:30] (step=0462000) Train Loss: 0.3226, Train Steps/Sec: 17.27, Grad Norm: 0.0398 +[2025-02-23 05:52:36] (step=0462100) Train Loss: 0.3228, Train Steps/Sec: 16.57, Grad Norm: 0.0348 +[2025-02-23 05:52:42] (step=0462200) Train Loss: 0.3235, Train Steps/Sec: 17.38, Grad Norm: 0.0377 +[2025-02-23 05:52:48] (step=0462300) Train Loss: 0.3234, Train Steps/Sec: 17.38, Grad Norm: 0.0357 +[2025-02-23 05:52:54] (step=0462400) Train Loss: 0.3225, Train Steps/Sec: 15.92, Grad Norm: 0.0352 +[2025-02-23 05:53:00] (step=0462500) Train Loss: 0.3233, Train Steps/Sec: 17.42, Grad Norm: 0.0373 +[2025-02-23 05:53:06] (step=0462600) Train Loss: 0.3230, Train Steps/Sec: 15.27, Grad Norm: 0.0368 +[2025-02-23 05:53:14] (step=0462700) Train Loss: 0.3233, Train Steps/Sec: 13.74, Grad Norm: 0.0371 +[2025-02-23 05:53:20] (step=0462800) Train Loss: 0.3228, Train Steps/Sec: 16.00, Grad Norm: 0.0346 +[2025-02-23 05:53:27] (step=0462900) Train Loss: 0.3227, Train Steps/Sec: 13.98, Grad Norm: 0.0352 +[2025-02-23 05:53:33] (step=0463000) Train Loss: 0.3227, Train Steps/Sec: 16.75, Grad Norm: 0.0397 +[2025-02-23 05:53:39] (step=0463100) Train Loss: 0.3236, Train Steps/Sec: 17.43, Grad Norm: 0.0379 +[2025-02-23 05:53:45] (step=0463200) Train Loss: 0.3230, Train Steps/Sec: 17.29, Grad Norm: 0.0368 +[2025-02-23 05:53:50] (step=0463300) Train Loss: 0.3230, Train Steps/Sec: 17.35, Grad Norm: 0.0387 +[2025-02-23 05:53:56] (step=0463400) Train Loss: 0.3226, Train Steps/Sec: 17.30, Grad Norm: 0.0357 +[2025-02-23 05:54:02] (step=0463500) Train Loss: 0.3231, Train Steps/Sec: 17.45, Grad Norm: 0.0357 +[2025-02-23 05:54:08] (step=0463600) Train Loss: 0.3228, Train Steps/Sec: 17.44, Grad Norm: 0.0383 +[2025-02-23 05:54:13] (step=0463700) Train Loss: 0.3230, Train Steps/Sec: 17.44, Grad Norm: 0.0368 +[2025-02-23 05:54:19] (step=0463800) Train Loss: 0.3236, Train Steps/Sec: 17.37, Grad Norm: 0.0341 +[2025-02-23 05:54:25] (step=0463900) Train Loss: 0.3234, Train Steps/Sec: 17.38, Grad Norm: 0.0335 +[2025-02-23 05:54:31] (step=0464000) Train Loss: 0.3228, Train Steps/Sec: 17.38, Grad Norm: 0.0395 +[2025-02-23 05:54:37] (step=0464100) Train Loss: 0.3230, Train Steps/Sec: 16.39, Grad Norm: 0.0351 +[2025-02-23 05:54:44] (step=0464200) Train Loss: 0.3227, Train Steps/Sec: 14.61, Grad Norm: 0.0373 +[2025-02-23 05:54:49] (step=0464300) Train Loss: 0.3230, Train Steps/Sec: 17.28, Grad Norm: 0.0365 +[2025-02-23 05:54:56] (step=0464400) Train Loss: 0.3230, Train Steps/Sec: 15.88, Grad Norm: 0.0382 +[2025-02-23 05:55:02] (step=0464500) Train Loss: 0.3231, Train Steps/Sec: 17.27, Grad Norm: 0.0348 +[2025-02-23 05:55:09] (step=0464600) Train Loss: 0.3230, Train Steps/Sec: 14.00, Grad Norm: 0.0382 +[2025-02-23 05:55:16] (step=0464700) Train Loss: 0.3233, Train Steps/Sec: 14.06, Grad Norm: 0.0347 +[2025-02-23 05:55:22] (step=0464800) Train Loss: 0.3227, Train Steps/Sec: 15.79, Grad Norm: 0.0367 +[2025-02-23 05:55:28] (step=0464900) Train Loss: 0.3232, Train Steps/Sec: 16.56, Grad Norm: 0.0348 +[2025-02-23 05:55:34] (step=0465000) Train Loss: 0.3227, Train Steps/Sec: 17.32, Grad Norm: 0.0334 +[2025-02-23 05:55:40] (step=0465100) Train Loss: 0.3231, Train Steps/Sec: 17.32, Grad Norm: 0.0354 +[2025-02-23 05:55:45] (step=0465200) Train Loss: 0.3230, Train Steps/Sec: 17.31, Grad Norm: 0.0373 +[2025-02-23 05:55:51] (step=0465300) Train Loss: 0.3232, Train Steps/Sec: 17.38, Grad Norm: 0.0376 +[2025-02-23 05:55:58] (step=0465400) Train Loss: 0.3223, Train Steps/Sec: 14.50, Grad Norm: 0.0387 +[2025-02-23 05:56:04] (step=0465500) Train Loss: 0.3228, Train Steps/Sec: 17.30, Grad Norm: 0.0373 +[2025-02-23 05:56:10] (step=0465600) Train Loss: 0.3228, Train Steps/Sec: 17.36, Grad Norm: 0.0384 +[2025-02-23 05:56:15] (step=0465700) Train Loss: 0.3230, Train Steps/Sec: 17.40, Grad Norm: 0.0388 +[2025-02-23 05:56:21] (step=0465800) Train Loss: 0.3231, Train Steps/Sec: 17.40, Grad Norm: 0.0334 +[2025-02-23 05:56:27] (step=0465900) Train Loss: 0.3229, Train Steps/Sec: 17.39, Grad Norm: 0.0391 +[2025-02-23 05:56:33] (step=0466000) Train Loss: 0.3231, Train Steps/Sec: 16.57, Grad Norm: 0.0361 +[2025-02-23 05:56:39] (step=0466100) Train Loss: 0.3237, Train Steps/Sec: 17.39, Grad Norm: 0.0361 +[2025-02-23 05:56:44] (step=0466200) Train Loss: 0.3231, Train Steps/Sec: 17.40, Grad Norm: 0.0360 +[2025-02-23 05:56:50] (step=0466300) Train Loss: 0.3232, Train Steps/Sec: 17.48, Grad Norm: 0.0362 +[2025-02-23 05:56:56] (step=0466400) Train Loss: 0.3237, Train Steps/Sec: 15.88, Grad Norm: 0.0377 +[2025-02-23 05:57:03] (step=0466500) Train Loss: 0.3240, Train Steps/Sec: 16.52, Grad Norm: 0.0354 +[2025-02-23 05:57:09] (step=0466600) Train Loss: 0.3232, Train Steps/Sec: 14.56, Grad Norm: 0.0373 +[2025-02-23 05:57:18] (step=0466700) Train Loss: 0.3230, Train Steps/Sec: 12.18, Grad Norm: 0.0394 +[2025-02-23 05:57:24] (step=0466800) Train Loss: 0.3225, Train Steps/Sec: 15.76, Grad Norm: 0.0384 +[2025-02-23 05:57:30] (step=0466900) Train Loss: 0.3233, Train Steps/Sec: 16.46, Grad Norm: 0.0381 +[2025-02-23 05:57:36] (step=0467000) Train Loss: 0.3235, Train Steps/Sec: 17.22, Grad Norm: 0.0380 +[2025-02-23 05:57:42] (step=0467100) Train Loss: 0.3235, Train Steps/Sec: 17.22, Grad Norm: 0.0355 +[2025-02-23 05:57:47] (step=0467200) Train Loss: 0.3228, Train Steps/Sec: 17.23, Grad Norm: 0.0349 +[2025-02-23 05:57:53] (step=0467300) Train Loss: 0.3230, Train Steps/Sec: 17.39, Grad Norm: 0.0357 +[2025-02-23 05:57:59] (step=0467400) Train Loss: 0.3223, Train Steps/Sec: 17.48, Grad Norm: 0.0364 +[2025-02-23 05:58:05] (step=0467500) Train Loss: 0.3230, Train Steps/Sec: 17.44, Grad Norm: 0.0378 +[2025-02-23 05:58:10] (step=0467600) Train Loss: 0.3229, Train Steps/Sec: 17.42, Grad Norm: 0.0384 +[2025-02-23 05:58:16] (step=0467700) Train Loss: 0.3233, Train Steps/Sec: 17.36, Grad Norm: 0.0387 +[2025-02-23 05:58:22] (step=0467800) Train Loss: 0.3227, Train Steps/Sec: 17.39, Grad Norm: 0.0359 +[2025-02-23 05:58:29] (step=0467900) Train Loss: 0.3232, Train Steps/Sec: 14.28, Grad Norm: 0.0382 +[2025-02-23 05:58:35] (step=0468000) Train Loss: 0.3231, Train Steps/Sec: 16.54, Grad Norm: 0.0359 +[2025-02-23 05:58:41] (step=0468100) Train Loss: 0.3223, Train Steps/Sec: 17.32, Grad Norm: 0.0368 +[2025-02-23 05:58:47] (step=0468200) Train Loss: 0.3230, Train Steps/Sec: 17.28, Grad Norm: 0.0384 +[2025-02-23 05:58:52] (step=0468300) Train Loss: 0.3234, Train Steps/Sec: 17.18, Grad Norm: 0.0395 +[2025-02-23 05:58:59] (step=0468400) Train Loss: 0.3237, Train Steps/Sec: 15.70, Grad Norm: 0.0343 +[2025-02-23 05:59:05] (step=0468500) Train Loss: 0.3233, Train Steps/Sec: 15.69, Grad Norm: 0.0369 +[2025-02-23 05:59:12] (step=0468600) Train Loss: 0.3225, Train Steps/Sec: 13.56, Grad Norm: 0.0410 +[2025-02-23 05:59:19] (step=0468700) Train Loss: 0.3229, Train Steps/Sec: 14.99, Grad Norm: 0.0409 +[2025-02-23 05:59:25] (step=0468800) Train Loss: 0.3229, Train Steps/Sec: 16.45, Grad Norm: 0.0352 +[2025-02-23 05:59:31] (step=0468900) Train Loss: 0.3227, Train Steps/Sec: 16.42, Grad Norm: 0.0368 +[2025-02-23 05:59:37] (step=0469000) Train Loss: 0.3231, Train Steps/Sec: 17.21, Grad Norm: 0.0339 +[2025-02-23 05:59:43] (step=0469100) Train Loss: 0.3232, Train Steps/Sec: 17.18, Grad Norm: 0.0374 +[2025-02-23 05:59:50] (step=0469200) Train Loss: 0.3228, Train Steps/Sec: 14.22, Grad Norm: 0.0343 +[2025-02-23 05:59:56] (step=0469300) Train Loss: 0.3232, Train Steps/Sec: 17.27, Grad Norm: 0.0362 +[2025-02-23 06:00:02] (step=0469400) Train Loss: 0.3226, Train Steps/Sec: 17.31, Grad Norm: 0.0360 +[2025-02-23 06:00:07] (step=0469500) Train Loss: 0.3230, Train Steps/Sec: 17.32, Grad Norm: 0.0382 +[2025-02-23 06:00:13] (step=0469600) Train Loss: 0.3232, Train Steps/Sec: 17.34, Grad Norm: 0.0345 +[2025-02-23 06:00:19] (step=0469700) Train Loss: 0.3229, Train Steps/Sec: 17.32, Grad Norm: 0.0349 +[2025-02-23 06:00:25] (step=0469800) Train Loss: 0.3232, Train Steps/Sec: 17.24, Grad Norm: 0.0362 +[2025-02-23 06:00:31] (step=0469900) Train Loss: 0.3234, Train Steps/Sec: 16.47, Grad Norm: 0.0359 +[2025-02-23 06:00:37] (step=0470000) Train Loss: 0.3229, Train Steps/Sec: 17.36, Grad Norm: 0.0362 +[2025-02-23 06:00:42] (step=0470100) Train Loss: 0.3224, Train Steps/Sec: 17.34, Grad Norm: 0.0377 +[2025-02-23 06:00:48] (step=0470200) Train Loss: 0.3228, Train Steps/Sec: 17.33, Grad Norm: 0.0388 +[2025-02-23 06:00:54] (step=0470300) Train Loss: 0.3232, Train Steps/Sec: 17.29, Grad Norm: 0.0367 +[2025-02-23 06:01:02] (step=0470400) Train Loss: 0.3227, Train Steps/Sec: 13.01, Grad Norm: 0.0402 +[2025-02-23 06:01:08] (step=0470500) Train Loss: 0.3234, Train Steps/Sec: 14.64, Grad Norm: 0.0357 +[2025-02-23 06:01:15] (step=0470600) Train Loss: 0.3226, Train Steps/Sec: 15.04, Grad Norm: 0.0359 +[2025-02-23 06:01:22] (step=0470700) Train Loss: 0.3231, Train Steps/Sec: 15.08, Grad Norm: 0.0406 +[2025-02-23 06:01:28] (step=0470800) Train Loss: 0.3233, Train Steps/Sec: 16.56, Grad Norm: 0.0368 +[2025-02-23 06:01:34] (step=0470900) Train Loss: 0.3226, Train Steps/Sec: 16.54, Grad Norm: 0.0378 +[2025-02-23 06:01:40] (step=0471000) Train Loss: 0.3227, Train Steps/Sec: 17.38, Grad Norm: 0.0338 +[2025-02-23 06:01:45] (step=0471100) Train Loss: 0.3232, Train Steps/Sec: 17.44, Grad Norm: 0.0347 +[2025-02-23 06:01:51] (step=0471200) Train Loss: 0.3229, Train Steps/Sec: 17.45, Grad Norm: 0.0348 +[2025-02-23 06:01:57] (step=0471300) Train Loss: 0.3228, Train Steps/Sec: 17.45, Grad Norm: 0.0344 +[2025-02-23 06:02:02] (step=0471400) Train Loss: 0.3229, Train Steps/Sec: 17.41, Grad Norm: 0.0391 +[2025-02-23 06:02:08] (step=0471500) Train Loss: 0.3226, Train Steps/Sec: 17.40, Grad Norm: 0.0370 +[2025-02-23 06:02:14] (step=0471600) Train Loss: 0.3229, Train Steps/Sec: 17.26, Grad Norm: 0.0371 +[2025-02-23 06:02:21] (step=0471700) Train Loss: 0.3224, Train Steps/Sec: 14.51, Grad Norm: 0.0387 +[2025-02-23 06:02:27] (step=0471800) Train Loss: 0.3228, Train Steps/Sec: 17.28, Grad Norm: 0.0358 +[2025-02-23 06:02:33] (step=0471900) Train Loss: 0.3231, Train Steps/Sec: 16.56, Grad Norm: 0.0342 +[2025-02-23 06:02:38] (step=0472000) Train Loss: 0.3231, Train Steps/Sec: 17.39, Grad Norm: 0.0360 +[2025-02-23 06:02:44] (step=0472100) Train Loss: 0.3228, Train Steps/Sec: 17.40, Grad Norm: 0.0385 +[2025-02-23 06:02:50] (step=0472200) Train Loss: 0.3225, Train Steps/Sec: 17.38, Grad Norm: 0.0362 +[2025-02-23 06:02:56] (step=0472300) Train Loss: 0.3236, Train Steps/Sec: 17.34, Grad Norm: 0.0390 +[2025-02-23 06:03:03] (step=0472400) Train Loss: 0.3227, Train Steps/Sec: 14.12, Grad Norm: 0.0351 +[2025-02-23 06:03:09] (step=0472500) Train Loss: 0.3229, Train Steps/Sec: 15.96, Grad Norm: 0.0350 +[2025-02-23 06:03:16] (step=0472600) Train Loss: 0.3229, Train Steps/Sec: 15.19, Grad Norm: 0.0361 +[2025-02-23 06:03:23] (step=0472700) Train Loss: 0.3229, Train Steps/Sec: 14.63, Grad Norm: 0.0400 +[2025-02-23 06:03:28] (step=0472800) Train Loss: 0.3229, Train Steps/Sec: 17.22, Grad Norm: 0.0354 +[2025-02-23 06:03:35] (step=0472900) Train Loss: 0.3232, Train Steps/Sec: 13.96, Grad Norm: 0.0353 +[2025-02-23 06:03:41] (step=0473000) Train Loss: 0.3224, Train Steps/Sec: 17.30, Grad Norm: 0.0359 +[2025-02-23 06:03:47] (step=0473100) Train Loss: 0.3236, Train Steps/Sec: 17.34, Grad Norm: 0.0381 +[2025-02-23 06:03:53] (step=0473200) Train Loss: 0.3225, Train Steps/Sec: 17.40, Grad Norm: 0.0374 +[2025-02-23 06:03:59] (step=0473300) Train Loss: 0.3234, Train Steps/Sec: 17.43, Grad Norm: 0.0382 +[2025-02-23 06:04:04] (step=0473400) Train Loss: 0.3227, Train Steps/Sec: 17.41, Grad Norm: 0.0384 +[2025-02-23 06:04:10] (step=0473500) Train Loss: 0.3232, Train Steps/Sec: 17.41, Grad Norm: 0.0365 +[2025-02-23 06:04:16] (step=0473600) Train Loss: 0.3223, Train Steps/Sec: 17.37, Grad Norm: 0.0361 +[2025-02-23 06:04:22] (step=0473700) Train Loss: 0.3228, Train Steps/Sec: 17.37, Grad Norm: 0.0360 +[2025-02-23 06:04:28] (step=0473800) Train Loss: 0.3234, Train Steps/Sec: 16.66, Grad Norm: 0.0385 +[2025-02-23 06:04:33] (step=0473900) Train Loss: 0.3235, Train Steps/Sec: 17.41, Grad Norm: 0.0375 +[2025-02-23 06:04:39] (step=0474000) Train Loss: 0.3227, Train Steps/Sec: 17.42, Grad Norm: 0.0355 +[2025-02-23 06:04:45] (step=0474100) Train Loss: 0.3226, Train Steps/Sec: 17.18, Grad Norm: 0.0400 +[2025-02-23 06:04:52] (step=0474200) Train Loss: 0.3229, Train Steps/Sec: 14.64, Grad Norm: 0.0353 +[2025-02-23 06:04:58] (step=0474300) Train Loss: 0.3226, Train Steps/Sec: 15.93, Grad Norm: 0.0369 +[2025-02-23 06:05:05] (step=0474400) Train Loss: 0.3229, Train Steps/Sec: 14.26, Grad Norm: 0.0353 +[2025-02-23 06:05:11] (step=0474500) Train Loss: 0.3220, Train Steps/Sec: 17.46, Grad Norm: 0.0391 +[2025-02-23 06:05:17] (step=0474600) Train Loss: 0.3230, Train Steps/Sec: 14.75, Grad Norm: 0.0349 +[2025-02-23 06:05:24] (step=0474700) Train Loss: 0.3237, Train Steps/Sec: 15.32, Grad Norm: 0.0355 +[2025-02-23 06:05:30] (step=0474800) Train Loss: 0.3229, Train Steps/Sec: 17.47, Grad Norm: 0.0339 +[2025-02-23 06:05:36] (step=0474900) Train Loss: 0.3231, Train Steps/Sec: 16.72, Grad Norm: 0.0361 +[2025-02-23 06:05:41] (step=0475000) Train Loss: 0.3233, Train Steps/Sec: 17.43, Grad Norm: 0.0392 +[2025-02-23 06:05:47] (step=0475100) Train Loss: 0.3231, Train Steps/Sec: 17.42, Grad Norm: 0.0357 +[2025-02-23 06:05:53] (step=0475200) Train Loss: 0.3234, Train Steps/Sec: 17.46, Grad Norm: 0.0359 +[2025-02-23 06:05:59] (step=0475300) Train Loss: 0.3230, Train Steps/Sec: 17.46, Grad Norm: 0.0356 +[2025-02-23 06:06:06] (step=0475400) Train Loss: 0.3233, Train Steps/Sec: 14.55, Grad Norm: 0.0337 +[2025-02-23 06:06:11] (step=0475500) Train Loss: 0.3228, Train Steps/Sec: 17.26, Grad Norm: 0.0403 +[2025-02-23 06:06:17] (step=0475600) Train Loss: 0.3231, Train Steps/Sec: 17.24, Grad Norm: 0.0386 +[2025-02-23 06:06:23] (step=0475700) Train Loss: 0.3223, Train Steps/Sec: 17.29, Grad Norm: 0.0350 +[2025-02-23 06:06:29] (step=0475800) Train Loss: 0.3228, Train Steps/Sec: 16.51, Grad Norm: 0.0365 +[2025-02-23 06:06:35] (step=0475900) Train Loss: 0.3226, Train Steps/Sec: 17.29, Grad Norm: 0.0359 +[2025-02-23 06:06:41] (step=0476000) Train Loss: 0.3228, Train Steps/Sec: 17.32, Grad Norm: 0.0361 +[2025-02-23 06:06:46] (step=0476100) Train Loss: 0.3235, Train Steps/Sec: 17.42, Grad Norm: 0.0380 +[2025-02-23 06:06:52] (step=0476200) Train Loss: 0.3227, Train Steps/Sec: 17.42, Grad Norm: 0.0347 +[2025-02-23 06:06:59] (step=0476300) Train Loss: 0.3226, Train Steps/Sec: 15.36, Grad Norm: 0.0411 +[2025-02-23 06:07:05] (step=0476400) Train Loss: 0.3230, Train Steps/Sec: 14.73, Grad Norm: 0.0369 +[2025-02-23 06:07:11] (step=0476500) Train Loss: 0.3233, Train Steps/Sec: 16.63, Grad Norm: 0.0381 +[2025-02-23 06:07:18] (step=0476600) Train Loss: 0.3237, Train Steps/Sec: 15.89, Grad Norm: 0.0368 +[2025-02-23 06:07:26] (step=0476700) Train Loss: 0.3223, Train Steps/Sec: 12.62, Grad Norm: 0.0356 +[2025-02-23 06:07:31] (step=0476800) Train Loss: 0.3230, Train Steps/Sec: 17.20, Grad Norm: 0.0369 +[2025-02-23 06:07:37] (step=0476900) Train Loss: 0.3225, Train Steps/Sec: 16.54, Grad Norm: 0.0350 +[2025-02-23 06:07:43] (step=0477000) Train Loss: 0.3226, Train Steps/Sec: 17.29, Grad Norm: 0.0368 +[2025-02-23 06:07:49] (step=0477100) Train Loss: 0.3232, Train Steps/Sec: 17.26, Grad Norm: 0.0379 +[2025-02-23 06:07:55] (step=0477200) Train Loss: 0.3227, Train Steps/Sec: 17.26, Grad Norm: 0.0356 +[2025-02-23 06:08:01] (step=0477300) Train Loss: 0.3228, Train Steps/Sec: 17.27, Grad Norm: 0.0383 +[2025-02-23 06:08:06] (step=0477400) Train Loss: 0.3227, Train Steps/Sec: 17.32, Grad Norm: 0.0377 +[2025-02-23 06:08:12] (step=0477500) Train Loss: 0.3224, Train Steps/Sec: 17.24, Grad Norm: 0.0356 +[2025-02-23 06:08:18] (step=0477600) Train Loss: 0.3227, Train Steps/Sec: 17.23, Grad Norm: 0.0389 +[2025-02-23 06:08:24] (step=0477700) Train Loss: 0.3230, Train Steps/Sec: 16.48, Grad Norm: 0.0360 +[2025-02-23 06:08:30] (step=0477800) Train Loss: 0.3232, Train Steps/Sec: 17.24, Grad Norm: 0.0381 +[2025-02-23 06:08:37] (step=0477900) Train Loss: 0.3227, Train Steps/Sec: 14.45, Grad Norm: 0.0371 +[2025-02-23 06:08:43] (step=0478000) Train Loss: 0.3234, Train Steps/Sec: 17.24, Grad Norm: 0.0366 +[2025-02-23 06:08:48] (step=0478100) Train Loss: 0.3232, Train Steps/Sec: 17.27, Grad Norm: 0.0360 +[2025-02-23 06:08:55] (step=0478200) Train Loss: 0.3228, Train Steps/Sec: 15.88, Grad Norm: 0.0387 +[2025-02-23 06:09:01] (step=0478300) Train Loss: 0.3229, Train Steps/Sec: 15.32, Grad Norm: 0.0340 +[2025-02-23 06:09:07] (step=0478400) Train Loss: 0.3229, Train Steps/Sec: 15.87, Grad Norm: 0.0338 +[2025-02-23 06:09:14] (step=0478500) Train Loss: 0.3228, Train Steps/Sec: 15.83, Grad Norm: 0.0371 +[2025-02-23 06:09:20] (step=0478600) Train Loss: 0.3232, Train Steps/Sec: 16.51, Grad Norm: 0.0363 +[2025-02-23 06:09:27] (step=0478700) Train Loss: 0.3227, Train Steps/Sec: 14.52, Grad Norm: 0.0395 +[2025-02-23 06:09:33] (step=0478800) Train Loss: 0.3231, Train Steps/Sec: 17.25, Grad Norm: 0.0370 +[2025-02-23 06:09:39] (step=0478900) Train Loss: 0.3230, Train Steps/Sec: 16.52, Grad Norm: 0.0385 +[2025-02-23 06:09:44] (step=0479000) Train Loss: 0.3233, Train Steps/Sec: 17.22, Grad Norm: 0.0376 +[2025-02-23 06:09:50] (step=0479100) Train Loss: 0.3231, Train Steps/Sec: 17.14, Grad Norm: 0.0389 +[2025-02-23 06:09:57] (step=0479200) Train Loss: 0.3228, Train Steps/Sec: 14.42, Grad Norm: 0.0384 +[2025-02-23 06:10:03] (step=0479300) Train Loss: 0.3231, Train Steps/Sec: 17.14, Grad Norm: 0.0353 +[2025-02-23 06:10:09] (step=0479400) Train Loss: 0.3229, Train Steps/Sec: 17.21, Grad Norm: 0.0376 +[2025-02-23 06:10:15] (step=0479500) Train Loss: 0.3229, Train Steps/Sec: 17.27, Grad Norm: 0.0381 +[2025-02-23 06:10:20] (step=0479600) Train Loss: 0.3226, Train Steps/Sec: 17.22, Grad Norm: 0.0365 +[2025-02-23 06:10:27] (step=0479700) Train Loss: 0.3231, Train Steps/Sec: 16.40, Grad Norm: 0.0366 +[2025-02-23 06:10:32] (step=0479800) Train Loss: 0.3235, Train Steps/Sec: 17.35, Grad Norm: 0.0375 +[2025-02-23 06:10:38] (step=0479900) Train Loss: 0.3224, Train Steps/Sec: 17.35, Grad Norm: 0.0373 +[2025-02-23 06:10:44] (step=0480000) Train Loss: 0.3225, Train Steps/Sec: 17.35, Grad Norm: 0.0372 +[2025-02-23 06:10:50] (step=0480100) Train Loss: 0.3232, Train Steps/Sec: 17.37, Grad Norm: 0.0343 +[2025-02-23 06:10:56] (step=0480200) Train Loss: 0.3227, Train Steps/Sec: 15.31, Grad Norm: 0.0349 +[2025-02-23 06:11:02] (step=0480300) Train Loss: 0.3226, Train Steps/Sec: 15.99, Grad Norm: 0.0382 +[2025-02-23 06:11:10] (step=0480400) Train Loss: 0.3232, Train Steps/Sec: 13.02, Grad Norm: 0.0369 +[2025-02-23 06:11:16] (step=0480500) Train Loss: 0.3224, Train Steps/Sec: 16.64, Grad Norm: 0.0334 +[2025-02-23 06:11:22] (step=0480600) Train Loss: 0.3227, Train Steps/Sec: 16.60, Grad Norm: 0.0358 +[2025-02-23 06:11:29] (step=0480700) Train Loss: 0.3231, Train Steps/Sec: 14.70, Grad Norm: 0.0393 +[2025-02-23 06:11:35] (step=0480800) Train Loss: 0.3229, Train Steps/Sec: 17.47, Grad Norm: 0.0365 +[2025-02-23 06:11:41] (step=0480900) Train Loss: 0.3232, Train Steps/Sec: 16.72, Grad Norm: 0.0368 +[2025-02-23 06:11:46] (step=0481000) Train Loss: 0.3231, Train Steps/Sec: 17.44, Grad Norm: 0.0344 +[2025-02-23 06:11:52] (step=0481100) Train Loss: 0.3225, Train Steps/Sec: 17.42, Grad Norm: 0.0386 +[2025-02-23 06:11:58] (step=0481200) Train Loss: 0.3235, Train Steps/Sec: 17.39, Grad Norm: 0.0385 +[2025-02-23 06:12:04] (step=0481300) Train Loss: 0.3228, Train Steps/Sec: 17.41, Grad Norm: 0.0349 +[2025-02-23 06:12:09] (step=0481400) Train Loss: 0.3227, Train Steps/Sec: 17.41, Grad Norm: 0.0342 +[2025-02-23 06:12:15] (step=0481500) Train Loss: 0.3230, Train Steps/Sec: 17.42, Grad Norm: 0.0409 +[2025-02-23 06:12:21] (step=0481600) Train Loss: 0.3226, Train Steps/Sec: 16.59, Grad Norm: 0.0359 +[2025-02-23 06:12:28] (step=0481700) Train Loss: 0.3229, Train Steps/Sec: 14.57, Grad Norm: 0.0357 +[2025-02-23 06:12:34] (step=0481800) Train Loss: 0.3223, Train Steps/Sec: 17.32, Grad Norm: 0.0363 +[2025-02-23 06:12:40] (step=0481900) Train Loss: 0.3228, Train Steps/Sec: 17.38, Grad Norm: 0.0370 +[2025-02-23 06:12:45] (step=0482000) Train Loss: 0.3231, Train Steps/Sec: 17.50, Grad Norm: 0.0362 +[2025-02-23 06:12:52] (step=0482100) Train Loss: 0.3230, Train Steps/Sec: 15.37, Grad Norm: 0.0365 +[2025-02-23 06:12:58] (step=0482200) Train Loss: 0.3228, Train Steps/Sec: 16.06, Grad Norm: 0.0387 +[2025-02-23 06:13:04] (step=0482300) Train Loss: 0.3232, Train Steps/Sec: 17.43, Grad Norm: 0.0367 +[2025-02-23 06:13:10] (step=0482400) Train Loss: 0.3220, Train Steps/Sec: 14.71, Grad Norm: 0.0367 +[2025-02-23 06:13:16] (step=0482500) Train Loss: 0.3232, Train Steps/Sec: 17.43, Grad Norm: 0.0383 +[2025-02-23 06:13:22] (step=0482600) Train Loss: 0.3228, Train Steps/Sec: 16.67, Grad Norm: 0.0350 +[2025-02-23 06:13:29] (step=0482700) Train Loss: 0.3229, Train Steps/Sec: 15.22, Grad Norm: 0.0409 +[2025-02-23 06:13:35] (step=0482800) Train Loss: 0.3226, Train Steps/Sec: 16.62, Grad Norm: 0.0334 +[2025-02-23 06:13:42] (step=0482900) Train Loss: 0.3228, Train Steps/Sec: 14.11, Grad Norm: 0.0374 +[2025-02-23 06:13:48] (step=0483000) Train Loss: 0.3227, Train Steps/Sec: 17.38, Grad Norm: 0.0382 +[2025-02-23 06:13:53] (step=0483100) Train Loss: 0.3226, Train Steps/Sec: 17.43, Grad Norm: 0.0396 +[2025-02-23 06:13:59] (step=0483200) Train Loss: 0.3225, Train Steps/Sec: 17.43, Grad Norm: 0.0363 +[2025-02-23 06:14:05] (step=0483300) Train Loss: 0.3228, Train Steps/Sec: 17.30, Grad Norm: 0.0363 +[2025-02-23 06:14:11] (step=0483400) Train Loss: 0.3233, Train Steps/Sec: 17.36, Grad Norm: 0.0352 +[2025-02-23 06:14:16] (step=0483500) Train Loss: 0.3230, Train Steps/Sec: 17.32, Grad Norm: 0.0375 +[2025-02-23 06:14:23] (step=0483600) Train Loss: 0.3231, Train Steps/Sec: 16.54, Grad Norm: 0.0371 +[2025-02-23 06:14:28] (step=0483700) Train Loss: 0.3230, Train Steps/Sec: 17.25, Grad Norm: 0.0374 +[2025-02-23 06:14:34] (step=0483800) Train Loss: 0.3232, Train Steps/Sec: 17.22, Grad Norm: 0.0372 +[2025-02-23 06:14:40] (step=0483900) Train Loss: 0.3222, Train Steps/Sec: 17.22, Grad Norm: 0.0365 +[2025-02-23 06:14:46] (step=0484000) Train Loss: 0.3226, Train Steps/Sec: 17.26, Grad Norm: 0.0347 +[2025-02-23 06:14:52] (step=0484100) Train Loss: 0.3225, Train Steps/Sec: 15.25, Grad Norm: 0.0360 +[2025-02-23 06:15:00] (step=0484200) Train Loss: 0.3225, Train Steps/Sec: 13.55, Grad Norm: 0.0352 +[2025-02-23 06:15:06] (step=0484300) Train Loss: 0.3224, Train Steps/Sec: 16.47, Grad Norm: 0.0376 +[2025-02-23 06:15:12] (step=0484400) Train Loss: 0.3228, Train Steps/Sec: 15.20, Grad Norm: 0.0358 +[2025-02-23 06:15:18] (step=0484500) Train Loss: 0.3229, Train Steps/Sec: 17.27, Grad Norm: 0.0366 +[2025-02-23 06:15:24] (step=0484600) Train Loss: 0.3221, Train Steps/Sec: 16.51, Grad Norm: 0.0381 +[2025-02-23 06:15:31] (step=0484700) Train Loss: 0.3226, Train Steps/Sec: 15.24, Grad Norm: 0.0368 +[2025-02-23 06:15:37] (step=0484800) Train Loss: 0.3226, Train Steps/Sec: 16.63, Grad Norm: 0.0410 +[2025-02-23 06:15:43] (step=0484900) Train Loss: 0.3228, Train Steps/Sec: 16.66, Grad Norm: 0.0384 +[2025-02-23 06:15:48] (step=0485000) Train Loss: 0.3228, Train Steps/Sec: 17.43, Grad Norm: 0.0360 +[2025-02-23 06:15:54] (step=0485100) Train Loss: 0.3229, Train Steps/Sec: 17.45, Grad Norm: 0.0380 +[2025-02-23 06:16:00] (step=0485200) Train Loss: 0.3227, Train Steps/Sec: 17.41, Grad Norm: 0.0404 +[2025-02-23 06:16:06] (step=0485300) Train Loss: 0.3223, Train Steps/Sec: 17.44, Grad Norm: 0.0375 +[2025-02-23 06:16:13] (step=0485400) Train Loss: 0.3229, Train Steps/Sec: 14.54, Grad Norm: 0.0382 +[2025-02-23 06:16:19] (step=0485500) Train Loss: 0.3226, Train Steps/Sec: 16.45, Grad Norm: 0.0365 +[2025-02-23 06:16:24] (step=0485600) Train Loss: 0.3233, Train Steps/Sec: 17.26, Grad Norm: 0.0368 +[2025-02-23 06:16:30] (step=0485700) Train Loss: 0.3229, Train Steps/Sec: 17.23, Grad Norm: 0.0396 +[2025-02-23 06:16:36] (step=0485800) Train Loss: 0.3224, Train Steps/Sec: 17.25, Grad Norm: 0.0371 +[2025-02-23 06:16:42] (step=0485900) Train Loss: 0.3234, Train Steps/Sec: 17.23, Grad Norm: 0.0359 +[2025-02-23 06:16:48] (step=0486000) Train Loss: 0.3232, Train Steps/Sec: 15.24, Grad Norm: 0.0394 +[2025-02-23 06:16:55] (step=0486100) Train Loss: 0.3230, Train Steps/Sec: 15.90, Grad Norm: 0.0360 +[2025-02-23 06:17:01] (step=0486200) Train Loss: 0.3231, Train Steps/Sec: 16.50, Grad Norm: 0.0361 +[2025-02-23 06:17:07] (step=0486300) Train Loss: 0.3224, Train Steps/Sec: 16.57, Grad Norm: 0.0358 +[2025-02-23 06:17:13] (step=0486400) Train Loss: 0.3229, Train Steps/Sec: 16.51, Grad Norm: 0.0414 +[2025-02-23 06:17:19] (step=0486500) Train Loss: 0.3229, Train Steps/Sec: 16.47, Grad Norm: 0.0388 +[2025-02-23 06:17:25] (step=0486600) Train Loss: 0.3234, Train Steps/Sec: 16.54, Grad Norm: 0.0362 +[2025-02-23 06:17:33] (step=0486700) Train Loss: 0.3227, Train Steps/Sec: 12.94, Grad Norm: 0.0388 +[2025-02-23 06:17:39] (step=0486800) Train Loss: 0.3222, Train Steps/Sec: 16.51, Grad Norm: 0.0414 +[2025-02-23 06:17:45] (step=0486900) Train Loss: 0.3231, Train Steps/Sec: 16.53, Grad Norm: 0.0348 +[2025-02-23 06:17:51] (step=0487000) Train Loss: 0.3229, Train Steps/Sec: 17.23, Grad Norm: 0.0369 +[2025-02-23 06:17:56] (step=0487100) Train Loss: 0.3227, Train Steps/Sec: 17.30, Grad Norm: 0.0407 +[2025-02-23 06:18:02] (step=0487200) Train Loss: 0.3222, Train Steps/Sec: 17.39, Grad Norm: 0.0382 +[2025-02-23 06:18:08] (step=0487300) Train Loss: 0.3230, Train Steps/Sec: 17.40, Grad Norm: 0.0347 +[2025-02-23 06:18:14] (step=0487400) Train Loss: 0.3229, Train Steps/Sec: 17.43, Grad Norm: 0.0353 +[2025-02-23 06:18:20] (step=0487500) Train Loss: 0.3232, Train Steps/Sec: 16.60, Grad Norm: 0.0354 +[2025-02-23 06:18:25] (step=0487600) Train Loss: 0.3224, Train Steps/Sec: 17.36, Grad Norm: 0.0361 +[2025-02-23 06:18:31] (step=0487700) Train Loss: 0.3224, Train Steps/Sec: 17.40, Grad Norm: 0.0362 +[2025-02-23 06:18:37] (step=0487800) Train Loss: 0.3226, Train Steps/Sec: 17.43, Grad Norm: 0.0344 +[2025-02-23 06:18:44] (step=0487900) Train Loss: 0.3228, Train Steps/Sec: 14.04, Grad Norm: 0.0353 +[2025-02-23 06:18:51] (step=0488000) Train Loss: 0.3231, Train Steps/Sec: 14.64, Grad Norm: 0.0399 +[2025-02-23 06:18:57] (step=0488100) Train Loss: 0.3231, Train Steps/Sec: 17.33, Grad Norm: 0.0360 +[2025-02-23 06:19:03] (step=0488200) Train Loss: 0.3228, Train Steps/Sec: 16.55, Grad Norm: 0.0349 +[2025-02-23 06:19:09] (step=0488300) Train Loss: 0.3225, Train Steps/Sec: 16.67, Grad Norm: 0.0356 +[2025-02-23 06:19:15] (step=0488400) Train Loss: 0.3225, Train Steps/Sec: 16.64, Grad Norm: 0.0375 +[2025-02-23 06:19:21] (step=0488500) Train Loss: 0.3217, Train Steps/Sec: 16.58, Grad Norm: 0.0360 +[2025-02-23 06:19:27] (step=0488600) Train Loss: 0.3232, Train Steps/Sec: 16.61, Grad Norm: 0.0337 +[2025-02-23 06:19:33] (step=0488700) Train Loss: 0.3225, Train Steps/Sec: 15.82, Grad Norm: 0.0381 +[2025-02-23 06:19:39] (step=0488800) Train Loss: 0.3226, Train Steps/Sec: 15.84, Grad Norm: 0.0362 +[2025-02-23 06:19:45] (step=0488900) Train Loss: 0.3225, Train Steps/Sec: 16.63, Grad Norm: 0.0368 +[2025-02-23 06:19:51] (step=0489000) Train Loss: 0.3228, Train Steps/Sec: 17.33, Grad Norm: 0.0342 +[2025-02-23 06:19:57] (step=0489100) Train Loss: 0.3231, Train Steps/Sec: 17.41, Grad Norm: 0.0378 +[2025-02-23 06:20:04] (step=0489200) Train Loss: 0.3228, Train Steps/Sec: 14.49, Grad Norm: 0.0363 +[2025-02-23 06:20:10] (step=0489300) Train Loss: 0.3220, Train Steps/Sec: 17.28, Grad Norm: 0.0403 +[2025-02-23 06:20:16] (step=0489400) Train Loss: 0.3230, Train Steps/Sec: 16.53, Grad Norm: 0.0330 +[2025-02-23 06:20:21] (step=0489500) Train Loss: 0.3233, Train Steps/Sec: 17.30, Grad Norm: 0.0400 +[2025-02-23 06:20:27] (step=0489600) Train Loss: 0.3231, Train Steps/Sec: 17.27, Grad Norm: 0.0372 +[2025-02-23 06:20:33] (step=0489700) Train Loss: 0.3225, Train Steps/Sec: 17.34, Grad Norm: 0.0366 +[2025-02-23 06:20:39] (step=0489800) Train Loss: 0.3229, Train Steps/Sec: 17.35, Grad Norm: 0.0355 +[2025-02-23 06:20:45] (step=0489900) Train Loss: 0.3224, Train Steps/Sec: 15.29, Grad Norm: 0.0372 +[2025-02-23 06:20:52] (step=0490000) Train Loss: 0.3226, Train Steps/Sec: 15.94, Grad Norm: 0.0358 +[2025-02-23 06:20:58] (step=0490100) Train Loss: 0.3229, Train Steps/Sec: 16.53, Grad Norm: 0.0352 +[2025-02-23 06:21:04] (step=0490200) Train Loss: 0.3227, Train Steps/Sec: 16.50, Grad Norm: 0.0366 +[2025-02-23 06:21:10] (step=0490300) Train Loss: 0.3226, Train Steps/Sec: 17.33, Grad Norm: 0.0363 +[2025-02-23 06:21:17] (step=0490400) Train Loss: 0.3231, Train Steps/Sec: 13.94, Grad Norm: 0.0373 +[2025-02-23 06:21:23] (step=0490500) Train Loss: 0.3220, Train Steps/Sec: 16.50, Grad Norm: 0.0380 +[2025-02-23 06:21:29] (step=0490600) Train Loss: 0.3227, Train Steps/Sec: 16.48, Grad Norm: 0.0380 +[2025-02-23 06:21:35] (step=0490700) Train Loss: 0.3228, Train Steps/Sec: 15.76, Grad Norm: 0.0367 +[2025-02-23 06:21:42] (step=0490800) Train Loss: 0.3227, Train Steps/Sec: 15.75, Grad Norm: 0.0367 +[2025-02-23 06:21:48] (step=0490900) Train Loss: 0.3230, Train Steps/Sec: 16.53, Grad Norm: 0.0362 +[2025-02-23 06:21:53] (step=0491000) Train Loss: 0.3224, Train Steps/Sec: 17.40, Grad Norm: 0.0352 +[2025-02-23 06:21:59] (step=0491100) Train Loss: 0.3225, Train Steps/Sec: 17.40, Grad Norm: 0.0380 +[2025-02-23 06:22:05] (step=0491200) Train Loss: 0.3229, Train Steps/Sec: 17.29, Grad Norm: 0.0352 +[2025-02-23 06:22:11] (step=0491300) Train Loss: 0.3224, Train Steps/Sec: 17.30, Grad Norm: 0.0354 +[2025-02-23 06:22:17] (step=0491400) Train Loss: 0.3225, Train Steps/Sec: 16.53, Grad Norm: 0.0395 +[2025-02-23 06:22:22] (step=0491500) Train Loss: 0.3225, Train Steps/Sec: 17.34, Grad Norm: 0.0334 +[2025-02-23 06:22:28] (step=0491600) Train Loss: 0.3231, Train Steps/Sec: 17.36, Grad Norm: 0.0357 +[2025-02-23 06:22:35] (step=0491700) Train Loss: 0.3232, Train Steps/Sec: 14.57, Grad Norm: 0.0376 +[2025-02-23 06:22:41] (step=0491800) Train Loss: 0.3230, Train Steps/Sec: 16.51, Grad Norm: 0.0357 +[2025-02-23 06:22:48] (step=0491900) Train Loss: 0.3233, Train Steps/Sec: 14.67, Grad Norm: 0.0384 +[2025-02-23 06:22:54] (step=0492000) Train Loss: 0.3226, Train Steps/Sec: 17.33, Grad Norm: 0.0415 +[2025-02-23 06:23:00] (step=0492100) Train Loss: 0.3226, Train Steps/Sec: 15.87, Grad Norm: 0.0370 +[2025-02-23 06:23:06] (step=0492200) Train Loss: 0.3231, Train Steps/Sec: 17.23, Grad Norm: 0.0339 +[2025-02-23 06:23:12] (step=0492300) Train Loss: 0.3227, Train Steps/Sec: 17.27, Grad Norm: 0.0372 +[2025-02-23 06:23:17] (step=0492400) Train Loss: 0.3227, Train Steps/Sec: 17.25, Grad Norm: 0.0366 +[2025-02-23 06:23:23] (step=0492500) Train Loss: 0.3226, Train Steps/Sec: 16.46, Grad Norm: 0.0352 +[2025-02-23 06:23:30] (step=0492600) Train Loss: 0.3234, Train Steps/Sec: 16.52, Grad Norm: 0.0397 +[2025-02-23 06:23:36] (step=0492700) Train Loss: 0.3233, Train Steps/Sec: 15.81, Grad Norm: 0.0381 +[2025-02-23 06:23:42] (step=0492800) Train Loss: 0.3224, Train Steps/Sec: 15.84, Grad Norm: 0.0364 +[2025-02-23 06:23:49] (step=0492900) Train Loss: 0.3228, Train Steps/Sec: 14.52, Grad Norm: 0.0383 +[2025-02-23 06:23:55] (step=0493000) Train Loss: 0.3230, Train Steps/Sec: 16.55, Grad Norm: 0.0369 +[2025-02-23 06:24:01] (step=0493100) Train Loss: 0.3226, Train Steps/Sec: 17.39, Grad Norm: 0.0376 +[2025-02-23 06:24:07] (step=0493200) Train Loss: 0.3225, Train Steps/Sec: 17.35, Grad Norm: 0.0367 +[2025-02-23 06:24:13] (step=0493300) Train Loss: 0.3224, Train Steps/Sec: 16.52, Grad Norm: 0.0408 +[2025-02-23 06:24:18] (step=0493400) Train Loss: 0.3223, Train Steps/Sec: 17.31, Grad Norm: 0.0355 +[2025-02-23 06:24:24] (step=0493500) Train Loss: 0.3222, Train Steps/Sec: 17.38, Grad Norm: 0.0413 +[2025-02-23 06:24:30] (step=0493600) Train Loss: 0.3225, Train Steps/Sec: 17.33, Grad Norm: 0.0353 +[2025-02-23 06:24:36] (step=0493700) Train Loss: 0.3230, Train Steps/Sec: 17.32, Grad Norm: 0.0378 +[2025-02-23 06:24:42] (step=0493800) Train Loss: 0.3230, Train Steps/Sec: 15.25, Grad Norm: 0.0374 +[2025-02-23 06:24:49] (step=0493900) Train Loss: 0.3225, Train Steps/Sec: 15.94, Grad Norm: 0.0348 +[2025-02-23 06:24:55] (step=0494000) Train Loss: 0.3230, Train Steps/Sec: 16.57, Grad Norm: 0.0394 +[2025-02-23 06:25:01] (step=0494100) Train Loss: 0.3226, Train Steps/Sec: 16.58, Grad Norm: 0.0399 +[2025-02-23 06:25:08] (step=0494200) Train Loss: 0.3224, Train Steps/Sec: 14.45, Grad Norm: 0.0383 +[2025-02-23 06:25:13] (step=0494300) Train Loss: 0.3228, Train Steps/Sec: 17.29, Grad Norm: 0.0398 +[2025-02-23 06:25:19] (step=0494400) Train Loss: 0.3234, Train Steps/Sec: 17.25, Grad Norm: 0.0352 +[2025-02-23 06:25:25] (step=0494500) Train Loss: 0.3228, Train Steps/Sec: 15.86, Grad Norm: 0.0348 +[2025-02-23 06:25:32] (step=0494600) Train Loss: 0.3219, Train Steps/Sec: 16.61, Grad Norm: 0.0356 +[2025-02-23 06:25:38] (step=0494700) Train Loss: 0.3227, Train Steps/Sec: 15.75, Grad Norm: 0.0359 +[2025-02-23 06:25:44] (step=0494800) Train Loss: 0.3227, Train Steps/Sec: 15.76, Grad Norm: 0.0342 +[2025-02-23 06:25:50] (step=0494900) Train Loss: 0.3230, Train Steps/Sec: 17.27, Grad Norm: 0.0361 +[2025-02-23 06:25:56] (step=0495000) Train Loss: 0.3230, Train Steps/Sec: 16.54, Grad Norm: 0.0347 +[2025-02-23 06:26:02] (step=0495100) Train Loss: 0.3231, Train Steps/Sec: 17.31, Grad Norm: 0.0374 +[2025-02-23 06:26:08] (step=0495200) Train Loss: 0.3225, Train Steps/Sec: 17.35, Grad Norm: 0.0344 +[2025-02-23 06:26:14] (step=0495300) Train Loss: 0.3226, Train Steps/Sec: 16.52, Grad Norm: 0.0376 +[2025-02-23 06:26:21] (step=0495400) Train Loss: 0.3223, Train Steps/Sec: 14.61, Grad Norm: 0.0361 +[2025-02-23 06:26:26] (step=0495500) Train Loss: 0.3228, Train Steps/Sec: 17.29, Grad Norm: 0.0344 +[2025-02-23 06:26:32] (step=0495600) Train Loss: 0.3230, Train Steps/Sec: 17.36, Grad Norm: 0.0349 +[2025-02-23 06:26:38] (step=0495700) Train Loss: 0.3225, Train Steps/Sec: 15.89, Grad Norm: 0.0395 +[2025-02-23 06:26:45] (step=0495800) Train Loss: 0.3230, Train Steps/Sec: 15.32, Grad Norm: 0.0375 +[2025-02-23 06:26:51] (step=0495900) Train Loss: 0.3226, Train Steps/Sec: 17.29, Grad Norm: 0.0341 +[2025-02-23 06:26:57] (step=0496000) Train Loss: 0.3231, Train Steps/Sec: 15.76, Grad Norm: 0.0379 +[2025-02-23 06:27:03] (step=0496100) Train Loss: 0.3226, Train Steps/Sec: 17.19, Grad Norm: 0.0385 +[2025-02-23 06:27:09] (step=0496200) Train Loss: 0.3230, Train Steps/Sec: 17.24, Grad Norm: 0.0372 +[2025-02-23 06:27:14] (step=0496300) Train Loss: 0.3230, Train Steps/Sec: 17.23, Grad Norm: 0.0342 +[2025-02-23 06:27:20] (step=0496400) Train Loss: 0.3226, Train Steps/Sec: 17.24, Grad Norm: 0.0374 +[2025-02-23 06:27:27] (step=0496500) Train Loss: 0.3223, Train Steps/Sec: 15.75, Grad Norm: 0.0353 +[2025-02-23 06:27:33] (step=0496600) Train Loss: 0.3238, Train Steps/Sec: 16.54, Grad Norm: 0.0364 +[2025-02-23 06:27:40] (step=0496700) Train Loss: 0.3220, Train Steps/Sec: 13.38, Grad Norm: 0.0367 +[2025-02-23 06:27:46] (step=0496800) Train Loss: 0.3225, Train Steps/Sec: 15.69, Grad Norm: 0.0359 +[2025-02-23 06:27:52] (step=0496900) Train Loss: 0.3228, Train Steps/Sec: 17.24, Grad Norm: 0.0389 +[2025-02-23 06:27:58] (step=0497000) Train Loss: 0.3229, Train Steps/Sec: 16.52, Grad Norm: 0.0375 +[2025-02-23 06:28:04] (step=0497100) Train Loss: 0.3224, Train Steps/Sec: 17.26, Grad Norm: 0.0395 +[2025-02-23 06:28:10] (step=0497200) Train Loss: 0.3225, Train Steps/Sec: 16.44, Grad Norm: 0.0353 +[2025-02-23 06:28:16] (step=0497300) Train Loss: 0.3225, Train Steps/Sec: 17.32, Grad Norm: 0.0362 +[2025-02-23 06:28:22] (step=0497400) Train Loss: 0.3229, Train Steps/Sec: 17.32, Grad Norm: 0.0378 +[2025-02-23 06:28:28] (step=0497500) Train Loss: 0.3228, Train Steps/Sec: 17.31, Grad Norm: 0.0352 +[2025-02-23 06:28:33] (step=0497600) Train Loss: 0.3227, Train Steps/Sec: 17.32, Grad Norm: 0.0371 +[2025-02-23 06:28:40] (step=0497700) Train Loss: 0.3233, Train Steps/Sec: 15.33, Grad Norm: 0.0414 +[2025-02-23 06:28:46] (step=0497800) Train Loss: 0.3223, Train Steps/Sec: 15.87, Grad Norm: 0.0362 +[2025-02-23 06:28:53] (step=0497900) Train Loss: 0.3231, Train Steps/Sec: 14.09, Grad Norm: 0.0393 +[2025-02-23 06:28:59] (step=0498000) Train Loss: 0.3229, Train Steps/Sec: 16.30, Grad Norm: 0.0347 +[2025-02-23 06:29:05] (step=0498100) Train Loss: 0.3226, Train Steps/Sec: 17.15, Grad Norm: 0.0358 +[2025-02-23 06:29:11] (step=0498200) Train Loss: 0.3230, Train Steps/Sec: 17.24, Grad Norm: 0.0347 +[2025-02-23 06:29:17] (step=0498300) Train Loss: 0.3224, Train Steps/Sec: 17.26, Grad Norm: 0.0374 +[2025-02-23 06:29:23] (step=0498400) Train Loss: 0.3229, Train Steps/Sec: 17.26, Grad Norm: 0.0351 +[2025-02-23 06:29:29] (step=0498500) Train Loss: 0.3225, Train Steps/Sec: 15.77, Grad Norm: 0.0400 +[2025-02-23 06:29:35] (step=0498600) Train Loss: 0.3224, Train Steps/Sec: 16.55, Grad Norm: 0.0361 +[2025-02-23 06:29:41] (step=0498700) Train Loss: 0.3231, Train Steps/Sec: 15.72, Grad Norm: 0.0378 +[2025-02-23 06:29:48] (step=0498800) Train Loss: 0.3227, Train Steps/Sec: 15.71, Grad Norm: 0.0351 +[2025-02-23 06:29:54] (step=0498900) Train Loss: 0.3227, Train Steps/Sec: 17.22, Grad Norm: 0.0342 +[2025-02-23 06:30:00] (step=0499000) Train Loss: 0.3229, Train Steps/Sec: 16.44, Grad Norm: 0.0360 +[2025-02-23 06:30:05] (step=0499100) Train Loss: 0.3228, Train Steps/Sec: 17.16, Grad Norm: 0.0388 +[2025-02-23 06:30:13] (step=0499200) Train Loss: 0.3227, Train Steps/Sec: 13.77, Grad Norm: 0.0362 +[2025-02-23 06:30:19] (step=0499300) Train Loss: 0.3225, Train Steps/Sec: 17.21, Grad Norm: 0.0352 +[2025-02-23 06:30:24] (step=0499400) Train Loss: 0.3228, Train Steps/Sec: 17.24, Grad Norm: 0.0383 +[2025-02-23 06:30:30] (step=0499500) Train Loss: 0.3228, Train Steps/Sec: 17.25, Grad Norm: 0.0382 +[2025-02-23 06:30:36] (step=0499600) Train Loss: 0.3230, Train Steps/Sec: 15.79, Grad Norm: 0.0354 +[2025-02-23 06:30:43] (step=0499700) Train Loss: 0.3226, Train Steps/Sec: 15.26, Grad Norm: 0.0354 +[2025-02-23 06:30:49] (step=0499800) Train Loss: 0.3227, Train Steps/Sec: 16.43, Grad Norm: 0.0348 +[2025-02-23 06:30:55] (step=0499900) Train Loss: 0.3226, Train Steps/Sec: 16.24, Grad Norm: 0.0388 +[2025-02-23 06:31:01] (step=0500000) Train Loss: 0.3229, Train Steps/Sec: 16.92, Grad Norm: 0.0347 +[2025-02-23 06:31:02] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0500000.pt +[2025-02-23 06:31:08] (step=0500100) Train Loss: 0.3232, Train Steps/Sec: 14.84, Grad Norm: 0.0353 +[2025-02-23 06:31:14] (step=0500200) Train Loss: 0.3225, Train Steps/Sec: 16.98, Grad Norm: 0.0389 +[2025-02-23 06:31:20] (step=0500300) Train Loss: 0.3230, Train Steps/Sec: 17.01, Grad Norm: 0.0368 +[2025-02-23 06:31:26] (step=0500400) Train Loss: 0.3228, Train Steps/Sec: 15.53, Grad Norm: 0.0366 +[2025-02-23 06:31:33] (step=0500500) Train Loss: 0.3224, Train Steps/Sec: 14.17, Grad Norm: 0.0332 +[2025-02-23 06:31:39] (step=0500600) Train Loss: 0.3225, Train Steps/Sec: 16.33, Grad Norm: 0.0381 +[2025-02-23 06:31:45] (step=0500700) Train Loss: 0.3231, Train Steps/Sec: 16.38, Grad Norm: 0.0372 +[2025-02-23 06:31:52] (step=0500800) Train Loss: 0.3226, Train Steps/Sec: 15.80, Grad Norm: 0.0323 +[2025-02-23 06:31:58] (step=0500900) Train Loss: 0.3226, Train Steps/Sec: 17.25, Grad Norm: 0.0360 +[2025-02-23 06:32:04] (step=0501000) Train Loss: 0.3232, Train Steps/Sec: 16.47, Grad Norm: 0.0353 +[2025-02-23 06:32:09] (step=0501100) Train Loss: 0.3228, Train Steps/Sec: 17.20, Grad Norm: 0.0326 +[2025-02-23 06:32:16] (step=0501200) Train Loss: 0.3223, Train Steps/Sec: 16.40, Grad Norm: 0.0391 +[2025-02-23 06:32:21] (step=0501300) Train Loss: 0.3231, Train Steps/Sec: 17.25, Grad Norm: 0.0349 +[2025-02-23 06:32:27] (step=0501400) Train Loss: 0.3230, Train Steps/Sec: 17.26, Grad Norm: 0.0380 +[2025-02-23 06:32:33] (step=0501500) Train Loss: 0.3223, Train Steps/Sec: 16.59, Grad Norm: 0.0350 +[2025-02-23 06:32:40] (step=0501600) Train Loss: 0.3226, Train Steps/Sec: 15.30, Grad Norm: 0.0387 +[2025-02-23 06:32:47] (step=0501700) Train Loss: 0.3226, Train Steps/Sec: 13.76, Grad Norm: 0.0353 +[2025-02-23 06:32:53] (step=0501800) Train Loss: 0.3228, Train Steps/Sec: 16.52, Grad Norm: 0.0422 +[2025-02-23 06:32:59] (step=0501900) Train Loss: 0.3228, Train Steps/Sec: 16.49, Grad Norm: 0.0365 +[2025-02-23 06:33:05] (step=0502000) Train Loss: 0.3229, Train Steps/Sec: 17.28, Grad Norm: 0.0336 +[2025-02-23 06:33:11] (step=0502100) Train Loss: 0.3226, Train Steps/Sec: 17.24, Grad Norm: 0.0379 +[2025-02-23 06:33:16] (step=0502200) Train Loss: 0.3228, Train Steps/Sec: 17.26, Grad Norm: 0.0382 +[2025-02-23 06:33:22] (step=0502300) Train Loss: 0.3227, Train Steps/Sec: 17.25, Grad Norm: 0.0350 +[2025-02-23 06:33:28] (step=0502400) Train Loss: 0.3226, Train Steps/Sec: 17.21, Grad Norm: 0.0392 +[2025-02-23 06:33:34] (step=0502500) Train Loss: 0.3225, Train Steps/Sec: 15.74, Grad Norm: 0.0334 +[2025-02-23 06:33:40] (step=0502600) Train Loss: 0.3220, Train Steps/Sec: 16.61, Grad Norm: 0.0427 +[2025-02-23 06:33:47] (step=0502700) Train Loss: 0.3225, Train Steps/Sec: 15.85, Grad Norm: 0.0350 +[2025-02-23 06:33:53] (step=0502800) Train Loss: 0.3228, Train Steps/Sec: 15.86, Grad Norm: 0.0346 +[2025-02-23 06:33:59] (step=0502900) Train Loss: 0.3224, Train Steps/Sec: 17.27, Grad Norm: 0.0368 +[2025-02-23 06:34:06] (step=0503000) Train Loss: 0.3228, Train Steps/Sec: 13.95, Grad Norm: 0.0342 +[2025-02-23 06:34:12] (step=0503100) Train Loss: 0.3228, Train Steps/Sec: 16.46, Grad Norm: 0.0368 +[2025-02-23 06:34:18] (step=0503200) Train Loss: 0.3233, Train Steps/Sec: 17.15, Grad Norm: 0.0373 +[2025-02-23 06:34:24] (step=0503300) Train Loss: 0.3231, Train Steps/Sec: 17.08, Grad Norm: 0.0366 +[2025-02-23 06:34:30] (step=0503400) Train Loss: 0.3227, Train Steps/Sec: 17.06, Grad Norm: 0.0371 +[2025-02-23 06:34:36] (step=0503500) Train Loss: 0.3231, Train Steps/Sec: 15.66, Grad Norm: 0.0359 +[2025-02-23 06:34:43] (step=0503600) Train Loss: 0.3221, Train Steps/Sec: 15.17, Grad Norm: 0.0334 +[2025-02-23 06:34:49] (step=0503700) Train Loss: 0.3224, Train Steps/Sec: 16.35, Grad Norm: 0.0352 +[2025-02-23 06:34:55] (step=0503800) Train Loss: 0.3231, Train Steps/Sec: 16.27, Grad Norm: 0.0350 +[2025-02-23 06:35:01] (step=0503900) Train Loss: 0.3225, Train Steps/Sec: 17.04, Grad Norm: 0.0347 +[2025-02-23 06:35:07] (step=0504000) Train Loss: 0.3229, Train Steps/Sec: 17.03, Grad Norm: 0.0373 +[2025-02-23 06:35:13] (step=0504100) Train Loss: 0.3223, Train Steps/Sec: 17.05, Grad Norm: 0.0343 +[2025-02-23 06:35:20] (step=0504200) Train Loss: 0.3227, Train Steps/Sec: 14.34, Grad Norm: 0.0377 +[2025-02-23 06:35:25] (step=0504300) Train Loss: 0.3225, Train Steps/Sec: 17.06, Grad Norm: 0.0364 +[2025-02-23 06:35:31] (step=0504400) Train Loss: 0.3231, Train Steps/Sec: 17.36, Grad Norm: 0.0384 +[2025-02-23 06:35:37] (step=0504500) Train Loss: 0.3227, Train Steps/Sec: 15.79, Grad Norm: 0.0365 +[2025-02-23 06:35:44] (step=0504600) Train Loss: 0.3227, Train Steps/Sec: 16.54, Grad Norm: 0.0367 +[2025-02-23 06:35:50] (step=0504700) Train Loss: 0.3228, Train Steps/Sec: 15.76, Grad Norm: 0.0371 +[2025-02-23 06:35:56] (step=0504800) Train Loss: 0.3232, Train Steps/Sec: 15.82, Grad Norm: 0.0370 +[2025-02-23 06:36:02] (step=0504900) Train Loss: 0.3223, Train Steps/Sec: 17.24, Grad Norm: 0.0373 +[2025-02-23 06:36:08] (step=0505000) Train Loss: 0.3222, Train Steps/Sec: 16.52, Grad Norm: 0.0376 +[2025-02-23 06:36:14] (step=0505100) Train Loss: 0.3226, Train Steps/Sec: 16.52, Grad Norm: 0.0361 +[2025-02-23 06:36:20] (step=0505200) Train Loss: 0.3229, Train Steps/Sec: 17.29, Grad Norm: 0.0384 +[2025-02-23 06:36:26] (step=0505300) Train Loss: 0.3230, Train Steps/Sec: 17.23, Grad Norm: 0.0389 +[2025-02-23 06:36:32] (step=0505400) Train Loss: 0.3227, Train Steps/Sec: 16.53, Grad Norm: 0.0392 +[2025-02-23 06:36:40] (step=0505500) Train Loss: 0.3226, Train Steps/Sec: 12.34, Grad Norm: 0.0361 +[2025-02-23 06:36:46] (step=0505600) Train Loss: 0.3221, Train Steps/Sec: 17.26, Grad Norm: 0.0368 +[2025-02-23 06:36:52] (step=0505700) Train Loss: 0.3229, Train Steps/Sec: 15.83, Grad Norm: 0.0395 +[2025-02-23 06:36:58] (step=0505800) Train Loss: 0.3226, Train Steps/Sec: 17.37, Grad Norm: 0.0356 +[2025-02-23 06:37:04] (step=0505900) Train Loss: 0.3234, Train Steps/Sec: 17.30, Grad Norm: 0.0375 +[2025-02-23 06:37:09] (step=0506000) Train Loss: 0.3225, Train Steps/Sec: 17.30, Grad Norm: 0.0373 +[2025-02-23 06:37:15] (step=0506100) Train Loss: 0.3226, Train Steps/Sec: 17.27, Grad Norm: 0.0349 +[2025-02-23 06:37:21] (step=0506200) Train Loss: 0.3228, Train Steps/Sec: 17.27, Grad Norm: 0.0410 +[2025-02-23 06:37:27] (step=0506300) Train Loss: 0.3228, Train Steps/Sec: 17.25, Grad Norm: 0.0363 +[2025-02-23 06:37:32] (step=0506400) Train Loss: 0.3223, Train Steps/Sec: 17.24, Grad Norm: 0.0406 +[2025-02-23 06:37:39] (step=0506500) Train Loss: 0.3230, Train Steps/Sec: 15.72, Grad Norm: 0.0358 +[2025-02-23 06:37:45] (step=0506600) Train Loss: 0.3228, Train Steps/Sec: 17.30, Grad Norm: 0.0358 +[2025-02-23 06:37:52] (step=0506700) Train Loss: 0.3226, Train Steps/Sec: 12.74, Grad Norm: 0.0358 +[2025-02-23 06:37:59] (step=0506800) Train Loss: 0.3234, Train Steps/Sec: 15.79, Grad Norm: 0.0341 +[2025-02-23 06:38:05] (step=0506900) Train Loss: 0.3228, Train Steps/Sec: 17.25, Grad Norm: 0.0352 +[2025-02-23 06:38:11] (step=0507000) Train Loss: 0.3234, Train Steps/Sec: 15.84, Grad Norm: 0.0382 +[2025-02-23 06:38:17] (step=0507100) Train Loss: 0.3229, Train Steps/Sec: 17.27, Grad Norm: 0.0350 +[2025-02-23 06:38:23] (step=0507200) Train Loss: 0.3232, Train Steps/Sec: 17.26, Grad Norm: 0.0382 +[2025-02-23 06:38:28] (step=0507300) Train Loss: 0.3231, Train Steps/Sec: 17.28, Grad Norm: 0.0340 +[2025-02-23 06:38:35] (step=0507400) Train Loss: 0.3221, Train Steps/Sec: 15.88, Grad Norm: 0.0363 +[2025-02-23 06:38:41] (step=0507500) Train Loss: 0.3223, Train Steps/Sec: 15.29, Grad Norm: 0.0328 +[2025-02-23 06:38:47] (step=0507600) Train Loss: 0.3226, Train Steps/Sec: 16.64, Grad Norm: 0.0372 +[2025-02-23 06:38:53] (step=0507700) Train Loss: 0.3224, Train Steps/Sec: 16.59, Grad Norm: 0.0368 +[2025-02-23 06:38:59] (step=0507800) Train Loss: 0.3224, Train Steps/Sec: 17.38, Grad Norm: 0.0370 +[2025-02-23 06:39:05] (step=0507900) Train Loss: 0.3227, Train Steps/Sec: 17.29, Grad Norm: 0.0362 +[2025-02-23 06:39:12] (step=0508000) Train Loss: 0.3227, Train Steps/Sec: 14.26, Grad Norm: 0.0411 +[2025-02-23 06:39:18] (step=0508100) Train Loss: 0.3227, Train Steps/Sec: 17.22, Grad Norm: 0.0390 +[2025-02-23 06:39:23] (step=0508200) Train Loss: 0.3230, Train Steps/Sec: 17.25, Grad Norm: 0.0378 +[2025-02-23 06:39:29] (step=0508300) Train Loss: 0.3229, Train Steps/Sec: 17.24, Grad Norm: 0.0395 +[2025-02-23 06:39:35] (step=0508400) Train Loss: 0.3225, Train Steps/Sec: 17.25, Grad Norm: 0.0384 +[2025-02-23 06:39:41] (step=0508500) Train Loss: 0.3227, Train Steps/Sec: 15.79, Grad Norm: 0.0386 +[2025-02-23 06:39:47] (step=0508600) Train Loss: 0.3225, Train Steps/Sec: 16.57, Grad Norm: 0.0341 +[2025-02-23 06:39:54] (step=0508700) Train Loss: 0.3230, Train Steps/Sec: 15.82, Grad Norm: 0.0408 +[2025-02-23 06:40:00] (step=0508800) Train Loss: 0.3222, Train Steps/Sec: 15.81, Grad Norm: 0.0374 +[2025-02-23 06:40:06] (step=0508900) Train Loss: 0.3232, Train Steps/Sec: 17.27, Grad Norm: 0.0368 +[2025-02-23 06:40:12] (step=0509000) Train Loss: 0.3230, Train Steps/Sec: 15.88, Grad Norm: 0.0366 +[2025-02-23 06:40:18] (step=0509100) Train Loss: 0.3226, Train Steps/Sec: 17.33, Grad Norm: 0.0385 +[2025-02-23 06:40:25] (step=0509200) Train Loss: 0.3223, Train Steps/Sec: 14.14, Grad Norm: 0.0357 +[2025-02-23 06:40:31] (step=0509300) Train Loss: 0.3226, Train Steps/Sec: 16.30, Grad Norm: 0.0368 +[2025-02-23 06:40:38] (step=0509400) Train Loss: 0.3223, Train Steps/Sec: 14.52, Grad Norm: 0.0365 +[2025-02-23 06:40:44] (step=0509500) Train Loss: 0.3226, Train Steps/Sec: 17.03, Grad Norm: 0.0352 +[2025-02-23 06:40:50] (step=0509600) Train Loss: 0.3227, Train Steps/Sec: 15.67, Grad Norm: 0.0399 +[2025-02-23 06:40:56] (step=0509700) Train Loss: 0.3227, Train Steps/Sec: 17.03, Grad Norm: 0.0344 +[2025-02-23 06:41:02] (step=0509800) Train Loss: 0.3226, Train Steps/Sec: 17.03, Grad Norm: 0.0365 +[2025-02-23 06:41:08] (step=0509900) Train Loss: 0.3233, Train Steps/Sec: 17.03, Grad Norm: 0.0366 +[2025-02-23 06:41:14] (step=0510000) Train Loss: 0.3229, Train Steps/Sec: 17.04, Grad Norm: 0.0380 +[2025-02-23 06:41:20] (step=0510100) Train Loss: 0.3229, Train Steps/Sec: 17.06, Grad Norm: 0.0382 +[2025-02-23 06:41:25] (step=0510200) Train Loss: 0.3228, Train Steps/Sec: 17.05, Grad Norm: 0.0355 +[2025-02-23 06:41:31] (step=0510300) Train Loss: 0.3228, Train Steps/Sec: 17.10, Grad Norm: 0.0350 +[2025-02-23 06:41:37] (step=0510400) Train Loss: 0.3221, Train Steps/Sec: 17.31, Grad Norm: 0.0373 +[2025-02-23 06:41:45] (step=0510500) Train Loss: 0.3225, Train Steps/Sec: 13.17, Grad Norm: 0.0345 +[2025-02-23 06:41:51] (step=0510600) Train Loss: 0.3230, Train Steps/Sec: 16.48, Grad Norm: 0.0372 +[2025-02-23 06:41:57] (step=0510700) Train Loss: 0.3232, Train Steps/Sec: 15.79, Grad Norm: 0.0395 +[2025-02-23 06:42:03] (step=0510800) Train Loss: 0.3225, Train Steps/Sec: 15.75, Grad Norm: 0.0411 +[2025-02-23 06:42:09] (step=0510900) Train Loss: 0.3224, Train Steps/Sec: 16.50, Grad Norm: 0.0398 +[2025-02-23 06:42:15] (step=0511000) Train Loss: 0.3229, Train Steps/Sec: 16.56, Grad Norm: 0.0346 +[2025-02-23 06:42:21] (step=0511100) Train Loss: 0.3224, Train Steps/Sec: 17.29, Grad Norm: 0.0369 +[2025-02-23 06:42:27] (step=0511200) Train Loss: 0.3226, Train Steps/Sec: 17.26, Grad Norm: 0.0381 +[2025-02-23 06:42:34] (step=0511300) Train Loss: 0.3233, Train Steps/Sec: 15.28, Grad Norm: 0.0375 +[2025-02-23 06:42:40] (step=0511400) Train Loss: 0.3225, Train Steps/Sec: 15.74, Grad Norm: 0.0364 +[2025-02-23 06:42:46] (step=0511500) Train Loss: 0.3227, Train Steps/Sec: 16.43, Grad Norm: 0.0404 +[2025-02-23 06:42:52] (step=0511600) Train Loss: 0.3227, Train Steps/Sec: 16.39, Grad Norm: 0.0356 +[2025-02-23 06:42:59] (step=0511700) Train Loss: 0.3231, Train Steps/Sec: 14.18, Grad Norm: 0.0378 +[2025-02-23 06:43:05] (step=0511800) Train Loss: 0.3221, Train Steps/Sec: 16.89, Grad Norm: 0.0360 +[2025-02-23 06:43:11] (step=0511900) Train Loss: 0.3228, Train Steps/Sec: 16.94, Grad Norm: 0.0384 +[2025-02-23 06:43:17] (step=0512000) Train Loss: 0.3232, Train Steps/Sec: 16.96, Grad Norm: 0.0357 +[2025-02-23 06:43:23] (step=0512100) Train Loss: 0.3224, Train Steps/Sec: 16.96, Grad Norm: 0.0372 +[2025-02-23 06:43:29] (step=0512200) Train Loss: 0.3225, Train Steps/Sec: 16.96, Grad Norm: 0.0387 +[2025-02-23 06:43:35] (step=0512300) Train Loss: 0.3229, Train Steps/Sec: 16.94, Grad Norm: 0.0349 +[2025-02-23 06:43:40] (step=0512400) Train Loss: 0.3222, Train Steps/Sec: 16.99, Grad Norm: 0.0393 +[2025-02-23 06:43:47] (step=0512500) Train Loss: 0.3232, Train Steps/Sec: 15.50, Grad Norm: 0.0371 +[2025-02-23 06:43:53] (step=0512600) Train Loss: 0.3230, Train Steps/Sec: 16.96, Grad Norm: 0.0339 +[2025-02-23 06:44:00] (step=0512700) Train Loss: 0.3227, Train Steps/Sec: 14.94, Grad Norm: 0.0380 +[2025-02-23 06:44:06] (step=0512800) Train Loss: 0.3225, Train Steps/Sec: 15.57, Grad Norm: 0.0374 +[2025-02-23 06:44:12] (step=0512900) Train Loss: 0.3223, Train Steps/Sec: 16.17, Grad Norm: 0.0371 +[2025-02-23 06:44:19] (step=0513000) Train Loss: 0.3221, Train Steps/Sec: 13.74, Grad Norm: 0.0410 +[2025-02-23 06:44:25] (step=0513100) Train Loss: 0.3228, Train Steps/Sec: 16.93, Grad Norm: 0.0354 +[2025-02-23 06:44:31] (step=0513200) Train Loss: 0.3223, Train Steps/Sec: 16.25, Grad Norm: 0.0340 +[2025-02-23 06:44:38] (step=0513300) Train Loss: 0.3230, Train Steps/Sec: 14.45, Grad Norm: 0.0354 +[2025-02-23 06:44:45] (step=0513400) Train Loss: 0.3225, Train Steps/Sec: 16.23, Grad Norm: 0.0346 +[2025-02-23 06:44:51] (step=0513500) Train Loss: 0.3231, Train Steps/Sec: 16.17, Grad Norm: 0.0373 +[2025-02-23 06:44:57] (step=0513600) Train Loss: 0.3227, Train Steps/Sec: 16.94, Grad Norm: 0.0380 +[2025-02-23 06:45:03] (step=0513700) Train Loss: 0.3220, Train Steps/Sec: 16.93, Grad Norm: 0.0368 +[2025-02-23 06:45:08] (step=0513800) Train Loss: 0.3226, Train Steps/Sec: 17.01, Grad Norm: 0.0410 +[2025-02-23 06:45:14] (step=0513900) Train Loss: 0.3217, Train Steps/Sec: 16.97, Grad Norm: 0.0370 +[2025-02-23 06:45:20] (step=0514000) Train Loss: 0.3235, Train Steps/Sec: 17.01, Grad Norm: 0.0387 +[2025-02-23 06:45:26] (step=0514100) Train Loss: 0.3231, Train Steps/Sec: 16.98, Grad Norm: 0.0351 +[2025-02-23 06:45:33] (step=0514200) Train Loss: 0.3230, Train Steps/Sec: 14.39, Grad Norm: 0.0383 +[2025-02-23 06:45:39] (step=0514300) Train Loss: 0.3226, Train Steps/Sec: 17.15, Grad Norm: 0.0342 +[2025-02-23 06:45:45] (step=0514400) Train Loss: 0.3229, Train Steps/Sec: 17.18, Grad Norm: 0.0347 +[2025-02-23 06:45:51] (step=0514500) Train Loss: 0.3225, Train Steps/Sec: 15.71, Grad Norm: 0.0379 +[2025-02-23 06:45:57] (step=0514600) Train Loss: 0.3222, Train Steps/Sec: 16.49, Grad Norm: 0.0360 +[2025-02-23 06:46:03] (step=0514700) Train Loss: 0.3223, Train Steps/Sec: 15.81, Grad Norm: 0.0384 +[2025-02-23 06:46:10] (step=0514800) Train Loss: 0.3225, Train Steps/Sec: 15.24, Grad Norm: 0.0400 +[2025-02-23 06:46:16] (step=0514900) Train Loss: 0.3222, Train Steps/Sec: 17.22, Grad Norm: 0.0363 +[2025-02-23 06:46:22] (step=0515000) Train Loss: 0.3222, Train Steps/Sec: 16.48, Grad Norm: 0.0369 +[2025-02-23 06:46:28] (step=0515100) Train Loss: 0.3232, Train Steps/Sec: 17.26, Grad Norm: 0.0363 +[2025-02-23 06:46:35] (step=0515200) Train Loss: 0.3227, Train Steps/Sec: 14.73, Grad Norm: 0.0361 +[2025-02-23 06:46:41] (step=0515300) Train Loss: 0.3227, Train Steps/Sec: 16.60, Grad Norm: 0.0386 +[2025-02-23 06:46:47] (step=0515400) Train Loss: 0.3224, Train Steps/Sec: 16.49, Grad Norm: 0.0398 +[2025-02-23 06:46:54] (step=0515500) Train Loss: 0.3228, Train Steps/Sec: 13.90, Grad Norm: 0.0331 +[2025-02-23 06:47:00] (step=0515600) Train Loss: 0.3225, Train Steps/Sec: 17.15, Grad Norm: 0.0357 +[2025-02-23 06:47:05] (step=0515700) Train Loss: 0.3229, Train Steps/Sec: 17.19, Grad Norm: 0.0348 +[2025-02-23 06:47:11] (step=0515800) Train Loss: 0.3221, Train Steps/Sec: 17.19, Grad Norm: 0.0387 +[2025-02-23 06:47:17] (step=0515900) Train Loss: 0.3227, Train Steps/Sec: 17.22, Grad Norm: 0.0391 +[2025-02-23 06:47:23] (step=0516000) Train Loss: 0.3225, Train Steps/Sec: 17.32, Grad Norm: 0.0399 +[2025-02-23 06:47:29] (step=0516100) Train Loss: 0.3227, Train Steps/Sec: 17.31, Grad Norm: 0.0383 +[2025-02-23 06:47:34] (step=0516200) Train Loss: 0.3223, Train Steps/Sec: 17.42, Grad Norm: 0.0382 +[2025-02-23 06:47:40] (step=0516300) Train Loss: 0.3229, Train Steps/Sec: 17.32, Grad Norm: 0.0343 +[2025-02-23 06:47:46] (step=0516400) Train Loss: 0.3228, Train Steps/Sec: 17.35, Grad Norm: 0.0362 +[2025-02-23 06:47:52] (step=0516500) Train Loss: 0.3222, Train Steps/Sec: 15.86, Grad Norm: 0.0350 +[2025-02-23 06:47:58] (step=0516600) Train Loss: 0.3229, Train Steps/Sec: 17.31, Grad Norm: 0.0391 +[2025-02-23 06:48:06] (step=0516700) Train Loss: 0.3228, Train Steps/Sec: 12.57, Grad Norm: 0.0369 +[2025-02-23 06:48:12] (step=0516800) Train Loss: 0.3231, Train Steps/Sec: 15.51, Grad Norm: 0.0384 +[2025-02-23 06:48:18] (step=0516900) Train Loss: 0.3224, Train Steps/Sec: 16.93, Grad Norm: 0.0357 +[2025-02-23 06:48:25] (step=0517000) Train Loss: 0.3226, Train Steps/Sec: 16.17, Grad Norm: 0.0381 +[2025-02-23 06:48:31] (step=0517100) Train Loss: 0.3223, Train Steps/Sec: 16.21, Grad Norm: 0.0345 +[2025-02-23 06:48:38] (step=0517200) Train Loss: 0.3229, Train Steps/Sec: 14.46, Grad Norm: 0.0364 +[2025-02-23 06:48:44] (step=0517300) Train Loss: 0.3225, Train Steps/Sec: 16.25, Grad Norm: 0.0343 +[2025-02-23 06:48:50] (step=0517400) Train Loss: 0.3225, Train Steps/Sec: 16.10, Grad Norm: 0.0385 +[2025-02-23 06:48:56] (step=0517500) Train Loss: 0.3228, Train Steps/Sec: 16.86, Grad Norm: 0.0339 +[2025-02-23 06:49:02] (step=0517600) Train Loss: 0.3225, Train Steps/Sec: 16.88, Grad Norm: 0.0385 +[2025-02-23 06:49:08] (step=0517700) Train Loss: 0.3228, Train Steps/Sec: 16.88, Grad Norm: 0.0350 +[2025-02-23 06:49:14] (step=0517800) Train Loss: 0.3227, Train Steps/Sec: 16.92, Grad Norm: 0.0352 +[2025-02-23 06:49:20] (step=0517900) Train Loss: 0.3226, Train Steps/Sec: 16.91, Grad Norm: 0.0391 +[2025-02-23 06:49:26] (step=0518000) Train Loss: 0.3231, Train Steps/Sec: 14.45, Grad Norm: 0.0333 +[2025-02-23 06:49:32] (step=0518100) Train Loss: 0.3227, Train Steps/Sec: 17.29, Grad Norm: 0.0364 +[2025-02-23 06:49:38] (step=0518200) Train Loss: 0.3231, Train Steps/Sec: 17.38, Grad Norm: 0.0335 +[2025-02-23 06:49:44] (step=0518300) Train Loss: 0.3226, Train Steps/Sec: 17.35, Grad Norm: 0.0364 +[2025-02-23 06:49:50] (step=0518400) Train Loss: 0.3226, Train Steps/Sec: 17.25, Grad Norm: 0.0331 +[2025-02-23 06:49:56] (step=0518500) Train Loss: 0.3228, Train Steps/Sec: 15.73, Grad Norm: 0.0419 +[2025-02-23 06:50:02] (step=0518600) Train Loss: 0.3223, Train Steps/Sec: 17.19, Grad Norm: 0.0374 +[2025-02-23 06:50:09] (step=0518700) Train Loss: 0.3224, Train Steps/Sec: 14.60, Grad Norm: 0.0355 +[2025-02-23 06:50:15] (step=0518800) Train Loss: 0.3223, Train Steps/Sec: 15.81, Grad Norm: 0.0369 +[2025-02-23 06:50:21] (step=0518900) Train Loss: 0.3229, Train Steps/Sec: 17.29, Grad Norm: 0.0394 +[2025-02-23 06:50:27] (step=0519000) Train Loss: 0.3226, Train Steps/Sec: 16.57, Grad Norm: 0.0383 +[2025-02-23 06:50:34] (step=0519100) Train Loss: 0.3227, Train Steps/Sec: 14.23, Grad Norm: 0.0376 +[2025-02-23 06:50:41] (step=0519200) Train Loss: 0.3223, Train Steps/Sec: 14.45, Grad Norm: 0.0348 +[2025-02-23 06:50:47] (step=0519300) Train Loss: 0.3223, Train Steps/Sec: 16.56, Grad Norm: 0.0341 +[2025-02-23 06:50:53] (step=0519400) Train Loss: 0.3229, Train Steps/Sec: 16.46, Grad Norm: 0.0412 +[2025-02-23 06:50:59] (step=0519500) Train Loss: 0.3228, Train Steps/Sec: 17.31, Grad Norm: 0.0361 +[2025-02-23 06:51:04] (step=0519600) Train Loss: 0.3222, Train Steps/Sec: 17.32, Grad Norm: 0.0348 +[2025-02-23 06:51:10] (step=0519700) Train Loss: 0.3221, Train Steps/Sec: 17.39, Grad Norm: 0.0356 +[2025-02-23 06:51:16] (step=0519800) Train Loss: 0.3225, Train Steps/Sec: 17.36, Grad Norm: 0.0359 +[2025-02-23 06:51:22] (step=0519900) Train Loss: 0.3229, Train Steps/Sec: 17.35, Grad Norm: 0.0394 +[2025-02-23 06:51:27] (step=0520000) Train Loss: 0.3230, Train Steps/Sec: 17.28, Grad Norm: 0.0359 +[2025-02-23 06:51:33] (step=0520100) Train Loss: 0.3220, Train Steps/Sec: 17.28, Grad Norm: 0.0373 +[2025-02-23 06:51:39] (step=0520200) Train Loss: 0.3231, Train Steps/Sec: 17.31, Grad Norm: 0.0351 +[2025-02-23 06:51:45] (step=0520300) Train Loss: 0.3229, Train Steps/Sec: 17.21, Grad Norm: 0.0383 +[2025-02-23 06:51:51] (step=0520400) Train Loss: 0.3234, Train Steps/Sec: 17.15, Grad Norm: 0.0389 +[2025-02-23 06:51:58] (step=0520500) Train Loss: 0.3221, Train Steps/Sec: 13.37, Grad Norm: 0.0365 +[2025-02-23 06:52:04] (step=0520600) Train Loss: 0.3225, Train Steps/Sec: 16.34, Grad Norm: 0.0363 +[2025-02-23 06:52:11] (step=0520700) Train Loss: 0.3221, Train Steps/Sec: 15.08, Grad Norm: 0.0377 +[2025-02-23 06:52:17] (step=0520800) Train Loss: 0.3230, Train Steps/Sec: 15.71, Grad Norm: 0.0366 +[2025-02-23 06:52:23] (step=0520900) Train Loss: 0.3225, Train Steps/Sec: 17.23, Grad Norm: 0.0381 +[2025-02-23 06:52:29] (step=0521000) Train Loss: 0.3225, Train Steps/Sec: 16.02, Grad Norm: 0.0365 +[2025-02-23 06:52:36] (step=0521100) Train Loss: 0.3224, Train Steps/Sec: 14.80, Grad Norm: 0.0345 +[2025-02-23 06:52:42] (step=0521200) Train Loss: 0.3223, Train Steps/Sec: 16.59, Grad Norm: 0.0387 +[2025-02-23 06:52:48] (step=0521300) Train Loss: 0.3226, Train Steps/Sec: 16.54, Grad Norm: 0.0370 +[2025-02-23 06:52:54] (step=0521400) Train Loss: 0.3229, Train Steps/Sec: 17.30, Grad Norm: 0.0362 +[2025-02-23 06:53:00] (step=0521500) Train Loss: 0.3224, Train Steps/Sec: 17.36, Grad Norm: 0.0359 +[2025-02-23 06:53:05] (step=0521600) Train Loss: 0.3228, Train Steps/Sec: 17.36, Grad Norm: 0.0407 +[2025-02-23 06:53:12] (step=0521700) Train Loss: 0.3225, Train Steps/Sec: 14.47, Grad Norm: 0.0342 +[2025-02-23 06:53:18] (step=0521800) Train Loss: 0.3220, Train Steps/Sec: 17.23, Grad Norm: 0.0382 +[2025-02-23 06:53:24] (step=0521900) Train Loss: 0.3229, Train Steps/Sec: 17.31, Grad Norm: 0.0400 +[2025-02-23 06:53:30] (step=0522000) Train Loss: 0.3228, Train Steps/Sec: 17.30, Grad Norm: 0.0370 +[2025-02-23 06:53:35] (step=0522100) Train Loss: 0.3222, Train Steps/Sec: 17.41, Grad Norm: 0.0372 +[2025-02-23 06:53:41] (step=0522200) Train Loss: 0.3227, Train Steps/Sec: 17.47, Grad Norm: 0.0354 +[2025-02-23 06:53:47] (step=0522300) Train Loss: 0.3223, Train Steps/Sec: 17.46, Grad Norm: 0.0389 +[2025-02-23 06:53:53] (step=0522400) Train Loss: 0.3225, Train Steps/Sec: 17.46, Grad Norm: 0.0402 +[2025-02-23 06:53:59] (step=0522500) Train Loss: 0.3226, Train Steps/Sec: 15.92, Grad Norm: 0.0380 +[2025-02-23 06:54:05] (step=0522600) Train Loss: 0.3226, Train Steps/Sec: 16.59, Grad Norm: 0.0338 +[2025-02-23 06:54:12] (step=0522700) Train Loss: 0.3233, Train Steps/Sec: 15.23, Grad Norm: 0.0351 +[2025-02-23 06:54:18] (step=0522800) Train Loss: 0.3226, Train Steps/Sec: 15.70, Grad Norm: 0.0339 +[2025-02-23 06:54:24] (step=0522900) Train Loss: 0.3230, Train Steps/Sec: 16.38, Grad Norm: 0.0371 +[2025-02-23 06:54:32] (step=0523000) Train Loss: 0.3229, Train Steps/Sec: 12.10, Grad Norm: 0.0350 +[2025-02-23 06:54:38] (step=0523100) Train Loss: 0.3227, Train Steps/Sec: 17.16, Grad Norm: 0.0375 +[2025-02-23 06:54:44] (step=0523200) Train Loss: 0.3224, Train Steps/Sec: 16.63, Grad Norm: 0.0386 +[2025-02-23 06:54:50] (step=0523300) Train Loss: 0.3230, Train Steps/Sec: 16.56, Grad Norm: 0.0356 +[2025-02-23 06:54:56] (step=0523400) Train Loss: 0.3222, Train Steps/Sec: 17.37, Grad Norm: 0.0377 +[2025-02-23 06:55:02] (step=0523500) Train Loss: 0.3225, Train Steps/Sec: 17.38, Grad Norm: 0.0349 +[2025-02-23 06:55:07] (step=0523600) Train Loss: 0.3227, Train Steps/Sec: 17.40, Grad Norm: 0.0364 +[2025-02-23 06:55:13] (step=0523700) Train Loss: 0.3225, Train Steps/Sec: 17.41, Grad Norm: 0.0363 +[2025-02-23 06:55:19] (step=0523800) Train Loss: 0.3226, Train Steps/Sec: 17.34, Grad Norm: 0.0358 +[2025-02-23 06:55:25] (step=0523900) Train Loss: 0.3222, Train Steps/Sec: 17.31, Grad Norm: 0.0370 +[2025-02-23 06:55:31] (step=0524000) Train Loss: 0.3222, Train Steps/Sec: 17.28, Grad Norm: 0.0372 +[2025-02-23 06:55:36] (step=0524100) Train Loss: 0.3222, Train Steps/Sec: 17.31, Grad Norm: 0.0393 +[2025-02-23 06:55:43] (step=0524200) Train Loss: 0.3223, Train Steps/Sec: 14.46, Grad Norm: 0.0351 +[2025-02-23 06:55:49] (step=0524300) Train Loss: 0.3227, Train Steps/Sec: 17.12, Grad Norm: 0.0420 +[2025-02-23 06:55:55] (step=0524400) Train Loss: 0.3229, Train Steps/Sec: 17.19, Grad Norm: 0.0383 +[2025-02-23 06:56:01] (step=0524500) Train Loss: 0.3228, Train Steps/Sec: 15.11, Grad Norm: 0.0387 +[2025-02-23 06:56:07] (step=0524600) Train Loss: 0.3225, Train Steps/Sec: 17.28, Grad Norm: 0.0352 +[2025-02-23 06:56:14] (step=0524700) Train Loss: 0.3223, Train Steps/Sec: 15.19, Grad Norm: 0.0349 +[2025-02-23 06:56:20] (step=0524800) Train Loss: 0.3224, Train Steps/Sec: 15.81, Grad Norm: 0.0354 +[2025-02-23 06:56:27] (step=0524900) Train Loss: 0.3223, Train Steps/Sec: 15.85, Grad Norm: 0.0381 +[2025-02-23 06:56:33] (step=0525000) Train Loss: 0.3228, Train Steps/Sec: 14.68, Grad Norm: 0.0347 +[2025-02-23 06:56:39] (step=0525100) Train Loss: 0.3226, Train Steps/Sec: 16.57, Grad Norm: 0.0374 +[2025-02-23 06:56:45] (step=0525200) Train Loss: 0.3231, Train Steps/Sec: 16.60, Grad Norm: 0.0354 +[2025-02-23 06:56:51] (step=0525300) Train Loss: 0.3228, Train Steps/Sec: 17.39, Grad Norm: 0.0373 +[2025-02-23 06:56:57] (step=0525400) Train Loss: 0.3224, Train Steps/Sec: 17.32, Grad Norm: 0.0336 +[2025-02-23 06:57:04] (step=0525500) Train Loss: 0.3227, Train Steps/Sec: 14.39, Grad Norm: 0.0382 +[2025-02-23 06:57:10] (step=0525600) Train Loss: 0.3226, Train Steps/Sec: 17.23, Grad Norm: 0.0385 +[2025-02-23 06:57:15] (step=0525700) Train Loss: 0.3221, Train Steps/Sec: 17.19, Grad Norm: 0.0367 +[2025-02-23 06:57:21] (step=0525800) Train Loss: 0.3224, Train Steps/Sec: 17.22, Grad Norm: 0.0361 +[2025-02-23 06:57:27] (step=0525900) Train Loss: 0.3224, Train Steps/Sec: 17.24, Grad Norm: 0.0410 +[2025-02-23 06:57:33] (step=0526000) Train Loss: 0.3223, Train Steps/Sec: 17.26, Grad Norm: 0.0395 +[2025-02-23 06:57:39] (step=0526100) Train Loss: 0.3233, Train Steps/Sec: 17.15, Grad Norm: 0.0379 +[2025-02-23 06:57:45] (step=0526200) Train Loss: 0.3221, Train Steps/Sec: 17.12, Grad Norm: 0.0385 +[2025-02-23 06:57:50] (step=0526300) Train Loss: 0.3230, Train Steps/Sec: 17.23, Grad Norm: 0.0344 +[2025-02-23 06:57:56] (step=0526400) Train Loss: 0.3226, Train Steps/Sec: 17.17, Grad Norm: 0.0378 +[2025-02-23 06:58:03] (step=0526500) Train Loss: 0.3220, Train Steps/Sec: 15.67, Grad Norm: 0.0389 +[2025-02-23 06:58:09] (step=0526600) Train Loss: 0.3230, Train Steps/Sec: 16.39, Grad Norm: 0.0329 +[2025-02-23 06:58:16] (step=0526700) Train Loss: 0.3228, Train Steps/Sec: 12.88, Grad Norm: 0.0356 +[2025-02-23 06:58:23] (step=0526800) Train Loss: 0.3222, Train Steps/Sec: 15.23, Grad Norm: 0.0330 +[2025-02-23 06:58:30] (step=0526900) Train Loss: 0.3224, Train Steps/Sec: 14.79, Grad Norm: 0.0413 +[2025-02-23 06:58:36] (step=0527000) Train Loss: 0.3225, Train Steps/Sec: 16.00, Grad Norm: 0.0389 +[2025-02-23 06:58:42] (step=0527100) Train Loss: 0.3223, Train Steps/Sec: 17.34, Grad Norm: 0.0369 +[2025-02-23 06:58:48] (step=0527200) Train Loss: 0.3227, Train Steps/Sec: 16.58, Grad Norm: 0.0390 +[2025-02-23 06:58:54] (step=0527300) Train Loss: 0.3227, Train Steps/Sec: 17.36, Grad Norm: 0.0390 +[2025-02-23 06:58:59] (step=0527400) Train Loss: 0.3226, Train Steps/Sec: 17.28, Grad Norm: 0.0358 +[2025-02-23 06:59:05] (step=0527500) Train Loss: 0.3226, Train Steps/Sec: 17.16, Grad Norm: 0.0374 +[2025-02-23 06:59:11] (step=0527600) Train Loss: 0.3223, Train Steps/Sec: 17.37, Grad Norm: 0.0383 +[2025-02-23 06:59:17] (step=0527700) Train Loss: 0.3223, Train Steps/Sec: 17.42, Grad Norm: 0.0334 +[2025-02-23 06:59:22] (step=0527800) Train Loss: 0.3222, Train Steps/Sec: 17.41, Grad Norm: 0.0358 +[2025-02-23 06:59:28] (step=0527900) Train Loss: 0.3225, Train Steps/Sec: 17.29, Grad Norm: 0.0390 +[2025-02-23 06:59:35] (step=0528000) Train Loss: 0.3226, Train Steps/Sec: 14.55, Grad Norm: 0.0360 +[2025-02-23 06:59:41] (step=0528100) Train Loss: 0.3224, Train Steps/Sec: 17.25, Grad Norm: 0.0346 +[2025-02-23 06:59:47] (step=0528200) Train Loss: 0.3224, Train Steps/Sec: 17.27, Grad Norm: 0.0389 +[2025-02-23 06:59:52] (step=0528300) Train Loss: 0.3230, Train Steps/Sec: 17.31, Grad Norm: 0.0344 +[2025-02-23 06:59:59] (step=0528400) Train Loss: 0.3225, Train Steps/Sec: 16.55, Grad Norm: 0.0360 +[2025-02-23 07:00:05] (step=0528500) Train Loss: 0.3231, Train Steps/Sec: 16.57, Grad Norm: 0.0419 +[2025-02-23 07:00:11] (step=0528600) Train Loss: 0.3224, Train Steps/Sec: 16.63, Grad Norm: 0.0381 +[2025-02-23 07:00:17] (step=0528700) Train Loss: 0.3219, Train Steps/Sec: 16.00, Grad Norm: 0.0349 +[2025-02-23 07:00:24] (step=0528800) Train Loss: 0.3222, Train Steps/Sec: 13.66, Grad Norm: 0.0377 +[2025-02-23 07:00:30] (step=0528900) Train Loss: 0.3222, Train Steps/Sec: 15.96, Grad Norm: 0.0373 +[2025-02-23 07:00:37] (step=0529000) Train Loss: 0.3225, Train Steps/Sec: 15.97, Grad Norm: 0.0348 +[2025-02-23 07:00:43] (step=0529100) Train Loss: 0.3225, Train Steps/Sec: 16.63, Grad Norm: 0.0372 +[2025-02-23 07:00:50] (step=0529200) Train Loss: 0.3219, Train Steps/Sec: 14.49, Grad Norm: 0.0374 +[2025-02-23 07:00:55] (step=0529300) Train Loss: 0.3224, Train Steps/Sec: 17.39, Grad Norm: 0.0388 +[2025-02-23 07:01:01] (step=0529400) Train Loss: 0.3229, Train Steps/Sec: 17.35, Grad Norm: 0.0362 +[2025-02-23 07:01:07] (step=0529500) Train Loss: 0.3229, Train Steps/Sec: 17.33, Grad Norm: 0.0345 +[2025-02-23 07:01:13] (step=0529600) Train Loss: 0.3227, Train Steps/Sec: 17.32, Grad Norm: 0.0386 +[2025-02-23 07:01:18] (step=0529700) Train Loss: 0.3228, Train Steps/Sec: 17.42, Grad Norm: 0.0362 +[2025-02-23 07:01:24] (step=0529800) Train Loss: 0.3225, Train Steps/Sec: 17.47, Grad Norm: 0.0369 +[2025-02-23 07:01:30] (step=0529900) Train Loss: 0.3225, Train Steps/Sec: 17.43, Grad Norm: 0.0337 +[2025-02-23 07:01:36] (step=0530000) Train Loss: 0.3223, Train Steps/Sec: 17.35, Grad Norm: 0.0360 +[2025-02-23 07:01:41] (step=0530100) Train Loss: 0.3226, Train Steps/Sec: 17.37, Grad Norm: 0.0339 +[2025-02-23 07:01:47] (step=0530200) Train Loss: 0.3227, Train Steps/Sec: 17.45, Grad Norm: 0.0377 +[2025-02-23 07:01:53] (step=0530300) Train Loss: 0.3224, Train Steps/Sec: 17.39, Grad Norm: 0.0377 +[2025-02-23 07:01:59] (step=0530400) Train Loss: 0.3225, Train Steps/Sec: 16.47, Grad Norm: 0.0350 +[2025-02-23 07:02:06] (step=0530500) Train Loss: 0.3226, Train Steps/Sec: 13.95, Grad Norm: 0.0345 +[2025-02-23 07:02:12] (step=0530600) Train Loss: 0.3223, Train Steps/Sec: 16.48, Grad Norm: 0.0363 +[2025-02-23 07:02:19] (step=0530700) Train Loss: 0.3226, Train Steps/Sec: 14.71, Grad Norm: 0.0366 +[2025-02-23 07:02:26] (step=0530800) Train Loss: 0.3220, Train Steps/Sec: 13.66, Grad Norm: 0.0369 +[2025-02-23 07:02:32] (step=0530900) Train Loss: 0.3225, Train Steps/Sec: 16.65, Grad Norm: 0.0349 +[2025-02-23 07:02:38] (step=0531000) Train Loss: 0.3230, Train Steps/Sec: 17.33, Grad Norm: 0.0358 +[2025-02-23 07:02:44] (step=0531100) Train Loss: 0.3226, Train Steps/Sec: 15.96, Grad Norm: 0.0374 +[2025-02-23 07:02:50] (step=0531200) Train Loss: 0.3221, Train Steps/Sec: 17.47, Grad Norm: 0.0342 +[2025-02-23 07:02:56] (step=0531300) Train Loss: 0.3223, Train Steps/Sec: 17.44, Grad Norm: 0.0358 +[2025-02-23 07:03:02] (step=0531400) Train Loss: 0.3224, Train Steps/Sec: 17.26, Grad Norm: 0.0336 +[2025-02-23 07:03:07] (step=0531500) Train Loss: 0.3225, Train Steps/Sec: 17.19, Grad Norm: 0.0384 +[2025-02-23 07:03:13] (step=0531600) Train Loss: 0.3226, Train Steps/Sec: 17.31, Grad Norm: 0.0358 +[2025-02-23 07:03:20] (step=0531700) Train Loss: 0.3225, Train Steps/Sec: 14.48, Grad Norm: 0.0352 +[2025-02-23 07:03:26] (step=0531800) Train Loss: 0.3224, Train Steps/Sec: 17.39, Grad Norm: 0.0354 +[2025-02-23 07:03:32] (step=0531900) Train Loss: 0.3231, Train Steps/Sec: 17.28, Grad Norm: 0.0379 +[2025-02-23 07:03:37] (step=0532000) Train Loss: 0.3226, Train Steps/Sec: 17.22, Grad Norm: 0.0380 +[2025-02-23 07:03:43] (step=0532100) Train Loss: 0.3222, Train Steps/Sec: 17.34, Grad Norm: 0.0368 +[2025-02-23 07:03:49] (step=0532200) Train Loss: 0.3227, Train Steps/Sec: 17.47, Grad Norm: 0.0380 +[2025-02-23 07:03:55] (step=0532300) Train Loss: 0.3227, Train Steps/Sec: 16.62, Grad Norm: 0.0350 +[2025-02-23 07:04:01] (step=0532400) Train Loss: 0.3222, Train Steps/Sec: 17.45, Grad Norm: 0.0391 +[2025-02-23 07:04:07] (step=0532500) Train Loss: 0.3224, Train Steps/Sec: 16.56, Grad Norm: 0.0342 +[2025-02-23 07:04:13] (step=0532600) Train Loss: 0.3229, Train Steps/Sec: 16.59, Grad Norm: 0.0339 +[2025-02-23 07:04:20] (step=0532700) Train Loss: 0.3222, Train Steps/Sec: 14.13, Grad Norm: 0.0356 +[2025-02-23 07:04:27] (step=0532800) Train Loss: 0.3229, Train Steps/Sec: 14.79, Grad Norm: 0.0391 +[2025-02-23 07:04:33] (step=0532900) Train Loss: 0.3225, Train Steps/Sec: 16.63, Grad Norm: 0.0385 +[2025-02-23 07:04:40] (step=0533000) Train Loss: 0.3224, Train Steps/Sec: 13.95, Grad Norm: 0.0356 +[2025-02-23 07:04:46] (step=0533100) Train Loss: 0.3226, Train Steps/Sec: 16.46, Grad Norm: 0.0345 +[2025-02-23 07:04:52] (step=0533200) Train Loss: 0.3220, Train Steps/Sec: 17.24, Grad Norm: 0.0345 +[2025-02-23 07:04:57] (step=0533300) Train Loss: 0.3225, Train Steps/Sec: 17.29, Grad Norm: 0.0382 +[2025-02-23 07:05:03] (step=0533400) Train Loss: 0.3219, Train Steps/Sec: 17.29, Grad Norm: 0.0386 +[2025-02-23 07:05:09] (step=0533500) Train Loss: 0.3229, Train Steps/Sec: 17.27, Grad Norm: 0.0378 +[2025-02-23 07:05:15] (step=0533600) Train Loss: 0.3231, Train Steps/Sec: 17.24, Grad Norm: 0.0384 +[2025-02-23 07:05:21] (step=0533700) Train Loss: 0.3226, Train Steps/Sec: 17.20, Grad Norm: 0.0364 +[2025-02-23 07:05:26] (step=0533800) Train Loss: 0.3222, Train Steps/Sec: 17.27, Grad Norm: 0.0335 +[2025-02-23 07:05:32] (step=0533900) Train Loss: 0.3225, Train Steps/Sec: 17.26, Grad Norm: 0.0396 +[2025-02-23 07:05:38] (step=0534000) Train Loss: 0.3224, Train Steps/Sec: 17.25, Grad Norm: 0.0360 +[2025-02-23 07:05:44] (step=0534100) Train Loss: 0.3229, Train Steps/Sec: 17.31, Grad Norm: 0.0346 +[2025-02-23 07:05:51] (step=0534200) Train Loss: 0.3222, Train Steps/Sec: 14.46, Grad Norm: 0.0346 +[2025-02-23 07:05:57] (step=0534300) Train Loss: 0.3224, Train Steps/Sec: 16.39, Grad Norm: 0.0412 +[2025-02-23 07:06:03] (step=0534400) Train Loss: 0.3227, Train Steps/Sec: 17.12, Grad Norm: 0.0340 +[2025-02-23 07:06:09] (step=0534500) Train Loss: 0.3224, Train Steps/Sec: 16.33, Grad Norm: 0.0348 +[2025-02-23 07:06:15] (step=0534600) Train Loss: 0.3224, Train Steps/Sec: 15.71, Grad Norm: 0.0370 +[2025-02-23 07:06:23] (step=0534700) Train Loss: 0.3228, Train Steps/Sec: 13.59, Grad Norm: 0.0331 +[2025-02-23 07:06:29] (step=0534800) Train Loss: 0.3221, Train Steps/Sec: 14.60, Grad Norm: 0.0375 +[2025-02-23 07:06:35] (step=0534900) Train Loss: 0.3225, Train Steps/Sec: 17.35, Grad Norm: 0.0378 +[2025-02-23 07:06:41] (step=0535000) Train Loss: 0.3225, Train Steps/Sec: 16.68, Grad Norm: 0.0371 +[2025-02-23 07:06:47] (step=0535100) Train Loss: 0.3218, Train Steps/Sec: 16.71, Grad Norm: 0.0355 +[2025-02-23 07:06:53] (step=0535200) Train Loss: 0.3225, Train Steps/Sec: 17.37, Grad Norm: 0.0368 +[2025-02-23 07:06:59] (step=0535300) Train Loss: 0.3227, Train Steps/Sec: 17.33, Grad Norm: 0.0360 +[2025-02-23 07:07:04] (step=0535400) Train Loss: 0.3230, Train Steps/Sec: 17.25, Grad Norm: 0.0405 +[2025-02-23 07:07:11] (step=0535500) Train Loss: 0.3221, Train Steps/Sec: 14.29, Grad Norm: 0.0333 +[2025-02-23 07:07:17] (step=0535600) Train Loss: 0.3226, Train Steps/Sec: 17.32, Grad Norm: 0.0367 +[2025-02-23 07:07:23] (step=0535700) Train Loss: 0.3226, Train Steps/Sec: 17.43, Grad Norm: 0.0367 +[2025-02-23 07:07:29] (step=0535800) Train Loss: 0.3227, Train Steps/Sec: 17.55, Grad Norm: 0.0411 +[2025-02-23 07:07:34] (step=0535900) Train Loss: 0.3227, Train Steps/Sec: 17.51, Grad Norm: 0.0344 +[2025-02-23 07:07:40] (step=0536000) Train Loss: 0.3223, Train Steps/Sec: 17.45, Grad Norm: 0.0371 +[2025-02-23 07:07:46] (step=0536100) Train Loss: 0.3223, Train Steps/Sec: 17.42, Grad Norm: 0.0379 +[2025-02-23 07:07:52] (step=0536200) Train Loss: 0.3227, Train Steps/Sec: 16.56, Grad Norm: 0.0358 +[2025-02-23 07:07:58] (step=0536300) Train Loss: 0.3227, Train Steps/Sec: 17.36, Grad Norm: 0.0357 +[2025-02-23 07:08:03] (step=0536400) Train Loss: 0.3226, Train Steps/Sec: 17.33, Grad Norm: 0.0404 +[2025-02-23 07:08:10] (step=0536500) Train Loss: 0.3222, Train Steps/Sec: 16.46, Grad Norm: 0.0348 +[2025-02-23 07:08:16] (step=0536600) Train Loss: 0.3225, Train Steps/Sec: 14.65, Grad Norm: 0.0345 +[2025-02-23 07:08:25] (step=0536700) Train Loss: 0.3227, Train Steps/Sec: 11.99, Grad Norm: 0.0348 +[2025-02-23 07:08:31] (step=0536800) Train Loss: 0.3226, Train Steps/Sec: 15.20, Grad Norm: 0.0403 +[2025-02-23 07:08:37] (step=0536900) Train Loss: 0.3228, Train Steps/Sec: 16.63, Grad Norm: 0.0372 +[2025-02-23 07:08:43] (step=0537000) Train Loss: 0.3226, Train Steps/Sec: 17.47, Grad Norm: 0.0355 +[2025-02-23 07:08:49] (step=0537100) Train Loss: 0.3226, Train Steps/Sec: 16.74, Grad Norm: 0.0365 +[2025-02-23 07:08:55] (step=0537200) Train Loss: 0.3226, Train Steps/Sec: 17.46, Grad Norm: 0.0391 +[2025-02-23 07:09:00] (step=0537300) Train Loss: 0.3227, Train Steps/Sec: 17.51, Grad Norm: 0.0363 +[2025-02-23 07:09:06] (step=0537400) Train Loss: 0.3227, Train Steps/Sec: 17.53, Grad Norm: 0.0417 +[2025-02-23 07:09:12] (step=0537500) Train Loss: 0.3224, Train Steps/Sec: 17.53, Grad Norm: 0.0365 +[2025-02-23 07:09:18] (step=0537600) Train Loss: 0.3224, Train Steps/Sec: 17.46, Grad Norm: 0.0382 +[2025-02-23 07:09:23] (step=0537700) Train Loss: 0.3228, Train Steps/Sec: 17.36, Grad Norm: 0.0403 +[2025-02-23 07:09:29] (step=0537800) Train Loss: 0.3229, Train Steps/Sec: 17.41, Grad Norm: 0.0342 +[2025-02-23 07:09:35] (step=0537900) Train Loss: 0.3224, Train Steps/Sec: 17.31, Grad Norm: 0.0428 +[2025-02-23 07:09:42] (step=0538000) Train Loss: 0.3226, Train Steps/Sec: 14.43, Grad Norm: 0.0379 +[2025-02-23 07:09:48] (step=0538100) Train Loss: 0.3228, Train Steps/Sec: 17.26, Grad Norm: 0.0341 +[2025-02-23 07:09:54] (step=0538200) Train Loss: 0.3226, Train Steps/Sec: 16.47, Grad Norm: 0.0356 +[2025-02-23 07:09:59] (step=0538300) Train Loss: 0.3227, Train Steps/Sec: 17.28, Grad Norm: 0.0351 +[2025-02-23 07:10:05] (step=0538400) Train Loss: 0.3224, Train Steps/Sec: 17.28, Grad Norm: 0.0368 +[2025-02-23 07:10:12] (step=0538500) Train Loss: 0.3225, Train Steps/Sec: 15.83, Grad Norm: 0.0371 +[2025-02-23 07:10:18] (step=0538600) Train Loss: 0.3220, Train Steps/Sec: 14.64, Grad Norm: 0.0336 +[2025-02-23 07:10:25] (step=0538700) Train Loss: 0.3221, Train Steps/Sec: 15.40, Grad Norm: 0.0355 +[2025-02-23 07:10:32] (step=0538800) Train Loss: 0.3221, Train Steps/Sec: 14.74, Grad Norm: 0.0365 +[2025-02-23 07:10:37] (step=0538900) Train Loss: 0.3224, Train Steps/Sec: 17.51, Grad Norm: 0.0388 +[2025-02-23 07:10:43] (step=0539000) Train Loss: 0.3227, Train Steps/Sec: 17.59, Grad Norm: 0.0374 +[2025-02-23 07:10:49] (step=0539100) Train Loss: 0.3226, Train Steps/Sec: 16.82, Grad Norm: 0.0374 +[2025-02-23 07:10:56] (step=0539200) Train Loss: 0.3224, Train Steps/Sec: 14.55, Grad Norm: 0.0424 +[2025-02-23 07:11:02] (step=0539300) Train Loss: 0.3220, Train Steps/Sec: 17.46, Grad Norm: 0.0371 +[2025-02-23 07:11:07] (step=0539400) Train Loss: 0.3225, Train Steps/Sec: 17.40, Grad Norm: 0.0328 +[2025-02-23 07:11:13] (step=0539500) Train Loss: 0.3232, Train Steps/Sec: 17.39, Grad Norm: 0.0382 +[2025-02-23 07:11:19] (step=0539600) Train Loss: 0.3226, Train Steps/Sec: 17.39, Grad Norm: 0.0388 +[2025-02-23 07:11:25] (step=0539700) Train Loss: 0.3226, Train Steps/Sec: 17.37, Grad Norm: 0.0335 +[2025-02-23 07:11:30] (step=0539800) Train Loss: 0.3226, Train Steps/Sec: 17.37, Grad Norm: 0.0372 +[2025-02-23 07:11:36] (step=0539900) Train Loss: 0.3223, Train Steps/Sec: 17.40, Grad Norm: 0.0367 +[2025-02-23 07:11:42] (step=0540000) Train Loss: 0.3224, Train Steps/Sec: 17.46, Grad Norm: 0.0359 +[2025-02-23 07:11:48] (step=0540100) Train Loss: 0.3223, Train Steps/Sec: 16.70, Grad Norm: 0.0402 +[2025-02-23 07:11:54] (step=0540200) Train Loss: 0.3227, Train Steps/Sec: 17.49, Grad Norm: 0.0350 +[2025-02-23 07:11:59] (step=0540300) Train Loss: 0.3230, Train Steps/Sec: 17.51, Grad Norm: 0.0397 +[2025-02-23 07:12:05] (step=0540400) Train Loss: 0.3222, Train Steps/Sec: 16.61, Grad Norm: 0.0357 +[2025-02-23 07:12:14] (step=0540500) Train Loss: 0.3223, Train Steps/Sec: 12.12, Grad Norm: 0.0365 +[2025-02-23 07:12:20] (step=0540600) Train Loss: 0.3224, Train Steps/Sec: 15.88, Grad Norm: 0.0358 +[2025-02-23 07:12:26] (step=0540700) Train Loss: 0.3223, Train Steps/Sec: 16.55, Grad Norm: 0.0361 +[2025-02-23 07:12:33] (step=0540800) Train Loss: 0.3219, Train Steps/Sec: 14.05, Grad Norm: 0.0345 +[2025-02-23 07:12:39] (step=0540900) Train Loss: 0.3223, Train Steps/Sec: 17.37, Grad Norm: 0.0366 +[2025-02-23 07:12:44] (step=0541000) Train Loss: 0.3225, Train Steps/Sec: 17.49, Grad Norm: 0.0375 +[2025-02-23 07:12:50] (step=0541100) Train Loss: 0.3220, Train Steps/Sec: 16.80, Grad Norm: 0.0411 +[2025-02-23 07:12:56] (step=0541200) Train Loss: 0.3225, Train Steps/Sec: 17.54, Grad Norm: 0.0363 +[2025-02-23 07:13:02] (step=0541300) Train Loss: 0.3225, Train Steps/Sec: 17.50, Grad Norm: 0.0362 +[2025-02-23 07:13:08] (step=0541400) Train Loss: 0.3222, Train Steps/Sec: 17.48, Grad Norm: 0.0442 +[2025-02-23 07:13:13] (step=0541500) Train Loss: 0.3229, Train Steps/Sec: 17.47, Grad Norm: 0.0350 +[2025-02-23 07:13:19] (step=0541600) Train Loss: 0.3224, Train Steps/Sec: 17.46, Grad Norm: 0.0369 +[2025-02-23 07:13:26] (step=0541700) Train Loss: 0.3226, Train Steps/Sec: 14.46, Grad Norm: 0.0352 +[2025-02-23 07:13:32] (step=0541800) Train Loss: 0.3222, Train Steps/Sec: 17.39, Grad Norm: 0.0341 +[2025-02-23 07:13:37] (step=0541900) Train Loss: 0.3230, Train Steps/Sec: 17.48, Grad Norm: 0.0380 +[2025-02-23 07:13:43] (step=0542000) Train Loss: 0.3228, Train Steps/Sec: 17.49, Grad Norm: 0.0384 +[2025-02-23 07:13:49] (step=0542100) Train Loss: 0.3231, Train Steps/Sec: 16.66, Grad Norm: 0.0333 +[2025-02-23 07:13:55] (step=0542200) Train Loss: 0.3218, Train Steps/Sec: 17.51, Grad Norm: 0.0361 +[2025-02-23 07:14:01] (step=0542300) Train Loss: 0.3228, Train Steps/Sec: 17.48, Grad Norm: 0.0373 +[2025-02-23 07:14:07] (step=0542400) Train Loss: 0.3226, Train Steps/Sec: 15.25, Grad Norm: 0.0388 +[2025-02-23 07:14:14] (step=0542500) Train Loss: 0.3227, Train Steps/Sec: 15.29, Grad Norm: 0.0356 +[2025-02-23 07:14:20] (step=0542600) Train Loss: 0.3230, Train Steps/Sec: 15.93, Grad Norm: 0.0381 +[2025-02-23 07:14:26] (step=0542700) Train Loss: 0.3225, Train Steps/Sec: 15.92, Grad Norm: 0.0381 +[2025-02-23 07:14:33] (step=0542800) Train Loss: 0.3224, Train Steps/Sec: 15.25, Grad Norm: 0.0344 +[2025-02-23 07:14:39] (step=0542900) Train Loss: 0.3227, Train Steps/Sec: 16.63, Grad Norm: 0.0375 +[2025-02-23 07:14:46] (step=0543000) Train Loss: 0.3220, Train Steps/Sec: 14.52, Grad Norm: 0.0362 +[2025-02-23 07:14:52] (step=0543100) Train Loss: 0.3219, Train Steps/Sec: 16.68, Grad Norm: 0.0366 +[2025-02-23 07:14:57] (step=0543200) Train Loss: 0.3224, Train Steps/Sec: 17.43, Grad Norm: 0.0328 +[2025-02-23 07:15:03] (step=0543300) Train Loss: 0.3222, Train Steps/Sec: 17.40, Grad Norm: 0.0365 +[2025-02-23 07:15:09] (step=0543400) Train Loss: 0.3225, Train Steps/Sec: 17.45, Grad Norm: 0.0375 +[2025-02-23 07:15:15] (step=0543500) Train Loss: 0.3227, Train Steps/Sec: 17.43, Grad Norm: 0.0381 +[2025-02-23 07:15:20] (step=0543600) Train Loss: 0.3222, Train Steps/Sec: 17.49, Grad Norm: 0.0363 +[2025-02-23 07:15:26] (step=0543700) Train Loss: 0.3229, Train Steps/Sec: 17.45, Grad Norm: 0.0375 +[2025-02-23 07:15:32] (step=0543800) Train Loss: 0.3227, Train Steps/Sec: 17.31, Grad Norm: 0.0397 +[2025-02-23 07:15:38] (step=0543900) Train Loss: 0.3225, Train Steps/Sec: 17.44, Grad Norm: 0.0385 +[2025-02-23 07:15:44] (step=0544000) Train Loss: 0.3223, Train Steps/Sec: 16.51, Grad Norm: 0.0381 +[2025-02-23 07:15:49] (step=0544100) Train Loss: 0.3226, Train Steps/Sec: 17.38, Grad Norm: 0.0375 +[2025-02-23 07:15:56] (step=0544200) Train Loss: 0.3226, Train Steps/Sec: 14.47, Grad Norm: 0.0339 +[2025-02-23 07:16:02] (step=0544300) Train Loss: 0.3222, Train Steps/Sec: 16.65, Grad Norm: 0.0362 +[2025-02-23 07:16:09] (step=0544400) Train Loss: 0.3225, Train Steps/Sec: 14.82, Grad Norm: 0.0388 +[2025-02-23 07:16:15] (step=0544500) Train Loss: 0.3227, Train Steps/Sec: 15.97, Grad Norm: 0.0350 +[2025-02-23 07:16:21] (step=0544600) Train Loss: 0.3231, Train Steps/Sec: 16.60, Grad Norm: 0.0388 +[2025-02-23 07:16:28] (step=0544700) Train Loss: 0.3234, Train Steps/Sec: 16.00, Grad Norm: 0.0374 +[2025-02-23 07:16:34] (step=0544800) Train Loss: 0.3227, Train Steps/Sec: 15.38, Grad Norm: 0.0345 +[2025-02-23 07:16:40] (step=0544900) Train Loss: 0.3222, Train Steps/Sec: 16.80, Grad Norm: 0.0381 +[2025-02-23 07:16:46] (step=0545000) Train Loss: 0.3222, Train Steps/Sec: 17.57, Grad Norm: 0.0354 +[2025-02-23 07:16:52] (step=0545100) Train Loss: 0.3228, Train Steps/Sec: 16.80, Grad Norm: 0.0327 +[2025-02-23 07:16:57] (step=0545200) Train Loss: 0.3222, Train Steps/Sec: 17.51, Grad Norm: 0.0358 +[2025-02-23 07:17:03] (step=0545300) Train Loss: 0.3220, Train Steps/Sec: 17.50, Grad Norm: 0.0379 +[2025-02-23 07:17:09] (step=0545400) Train Loss: 0.3228, Train Steps/Sec: 17.51, Grad Norm: 0.0367 +[2025-02-23 07:17:16] (step=0545500) Train Loss: 0.3225, Train Steps/Sec: 14.35, Grad Norm: 0.0346 +[2025-02-23 07:17:22] (step=0545600) Train Loss: 0.3225, Train Steps/Sec: 17.22, Grad Norm: 0.0347 +[2025-02-23 07:17:27] (step=0545700) Train Loss: 0.3229, Train Steps/Sec: 17.22, Grad Norm: 0.0352 +[2025-02-23 07:17:33] (step=0545800) Train Loss: 0.3224, Train Steps/Sec: 17.19, Grad Norm: 0.0375 +[2025-02-23 07:17:39] (step=0545900) Train Loss: 0.3218, Train Steps/Sec: 17.19, Grad Norm: 0.0354 +[2025-02-23 07:17:45] (step=0546000) Train Loss: 0.3219, Train Steps/Sec: 16.43, Grad Norm: 0.0343 +[2025-02-23 07:17:51] (step=0546100) Train Loss: 0.3226, Train Steps/Sec: 17.23, Grad Norm: 0.0372 +[2025-02-23 07:17:57] (step=0546200) Train Loss: 0.3225, Train Steps/Sec: 17.25, Grad Norm: 0.0391 +[2025-02-23 07:18:03] (step=0546300) Train Loss: 0.3227, Train Steps/Sec: 15.20, Grad Norm: 0.0360 +[2025-02-23 07:18:10] (step=0546400) Train Loss: 0.3231, Train Steps/Sec: 15.86, Grad Norm: 0.0346 +[2025-02-23 07:18:16] (step=0546500) Train Loss: 0.3220, Train Steps/Sec: 15.76, Grad Norm: 0.0394 +[2025-02-23 07:18:22] (step=0546600) Train Loss: 0.3225, Train Steps/Sec: 15.78, Grad Norm: 0.0368 +[2025-02-23 07:18:30] (step=0546700) Train Loss: 0.3226, Train Steps/Sec: 13.78, Grad Norm: 0.0383 +[2025-02-23 07:18:36] (step=0546800) Train Loss: 0.3223, Train Steps/Sec: 15.59, Grad Norm: 0.0379 +[2025-02-23 07:18:42] (step=0546900) Train Loss: 0.3226, Train Steps/Sec: 15.69, Grad Norm: 0.0359 +[2025-02-23 07:18:48] (step=0547000) Train Loss: 0.3230, Train Steps/Sec: 17.13, Grad Norm: 0.0389 +[2025-02-23 07:18:54] (step=0547100) Train Loss: 0.3228, Train Steps/Sec: 16.47, Grad Norm: 0.0391 +[2025-02-23 07:19:00] (step=0547200) Train Loss: 0.3222, Train Steps/Sec: 17.33, Grad Norm: 0.0350 +[2025-02-23 07:19:06] (step=0547300) Train Loss: 0.3225, Train Steps/Sec: 17.22, Grad Norm: 0.0363 +[2025-02-23 07:19:12] (step=0547400) Train Loss: 0.3233, Train Steps/Sec: 17.32, Grad Norm: 0.0364 +[2025-02-23 07:19:17] (step=0547500) Train Loss: 0.3226, Train Steps/Sec: 17.32, Grad Norm: 0.0361 +[2025-02-23 07:19:23] (step=0547600) Train Loss: 0.3224, Train Steps/Sec: 17.21, Grad Norm: 0.0373 +[2025-02-23 07:19:29] (step=0547700) Train Loss: 0.3219, Train Steps/Sec: 17.21, Grad Norm: 0.0341 +[2025-02-23 07:19:35] (step=0547800) Train Loss: 0.3228, Train Steps/Sec: 17.22, Grad Norm: 0.0364 +[2025-02-23 07:19:41] (step=0547900) Train Loss: 0.3220, Train Steps/Sec: 16.43, Grad Norm: 0.0397 +[2025-02-23 07:19:48] (step=0548000) Train Loss: 0.3225, Train Steps/Sec: 14.13, Grad Norm: 0.0355 +[2025-02-23 07:19:54] (step=0548100) Train Loss: 0.3224, Train Steps/Sec: 17.12, Grad Norm: 0.0363 +[2025-02-23 07:20:00] (step=0548200) Train Loss: 0.3227, Train Steps/Sec: 16.39, Grad Norm: 0.0361 +[2025-02-23 07:20:07] (step=0548300) Train Loss: 0.3221, Train Steps/Sec: 14.57, Grad Norm: 0.0358 +[2025-02-23 07:20:13] (step=0548400) Train Loss: 0.3216, Train Steps/Sec: 16.44, Grad Norm: 0.0349 +[2025-02-23 07:20:19] (step=0548500) Train Loss: 0.3227, Train Steps/Sec: 16.41, Grad Norm: 0.0346 +[2025-02-23 07:20:25] (step=0548600) Train Loss: 0.3226, Train Steps/Sec: 15.78, Grad Norm: 0.0377 +[2025-02-23 07:20:31] (step=0548700) Train Loss: 0.3223, Train Steps/Sec: 16.55, Grad Norm: 0.0373 +[2025-02-23 07:20:38] (step=0548800) Train Loss: 0.3225, Train Steps/Sec: 15.73, Grad Norm: 0.0356 +[2025-02-23 07:20:44] (step=0548900) Train Loss: 0.3224, Train Steps/Sec: 15.73, Grad Norm: 0.0346 +[2025-02-23 07:20:50] (step=0549000) Train Loss: 0.3219, Train Steps/Sec: 17.28, Grad Norm: 0.0400 +[2025-02-23 07:20:56] (step=0549100) Train Loss: 0.3228, Train Steps/Sec: 16.55, Grad Norm: 0.0368 +[2025-02-23 07:21:03] (step=0549200) Train Loss: 0.3224, Train Steps/Sec: 14.01, Grad Norm: 0.0362 +[2025-02-23 07:21:09] (step=0549300) Train Loss: 0.3222, Train Steps/Sec: 17.33, Grad Norm: 0.0353 +[2025-02-23 07:21:15] (step=0549400) Train Loss: 0.3224, Train Steps/Sec: 17.31, Grad Norm: 0.0412 +[2025-02-23 07:21:20] (step=0549500) Train Loss: 0.3229, Train Steps/Sec: 17.27, Grad Norm: 0.0347 +[2025-02-23 07:21:26] (step=0549600) Train Loss: 0.3219, Train Steps/Sec: 17.28, Grad Norm: 0.0343 +[2025-02-23 07:21:32] (step=0549700) Train Loss: 0.3224, Train Steps/Sec: 17.28, Grad Norm: 0.0349 +[2025-02-23 07:21:38] (step=0549800) Train Loss: 0.3227, Train Steps/Sec: 17.30, Grad Norm: 0.0378 +[2025-02-23 07:21:44] (step=0549900) Train Loss: 0.3221, Train Steps/Sec: 16.48, Grad Norm: 0.0358 +[2025-02-23 07:21:50] (step=0550000) Train Loss: 0.3224, Train Steps/Sec: 17.30, Grad Norm: 0.0366 +[2025-02-23 07:21:51] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0550000.pt +[2025-02-23 07:21:57] (step=0550100) Train Loss: 0.3224, Train Steps/Sec: 14.01, Grad Norm: 0.0376 +[2025-02-23 07:22:03] (step=0550200) Train Loss: 0.3222, Train Steps/Sec: 15.89, Grad Norm: 0.0341 +[2025-02-23 07:22:10] (step=0550300) Train Loss: 0.3223, Train Steps/Sec: 15.40, Grad Norm: 0.0348 +[2025-02-23 07:22:15] (step=0550400) Train Loss: 0.3225, Train Steps/Sec: 17.37, Grad Norm: 0.0350 +[2025-02-23 07:22:23] (step=0550500) Train Loss: 0.3218, Train Steps/Sec: 13.13, Grad Norm: 0.0359 +[2025-02-23 07:22:29] (step=0550600) Train Loss: 0.3219, Train Steps/Sec: 16.47, Grad Norm: 0.0355 +[2025-02-23 07:22:35] (step=0550700) Train Loss: 0.3224, Train Steps/Sec: 16.43, Grad Norm: 0.0354 +[2025-02-23 07:22:42] (step=0550800) Train Loss: 0.3221, Train Steps/Sec: 15.75, Grad Norm: 0.0410 +[2025-02-23 07:22:48] (step=0550900) Train Loss: 0.3225, Train Steps/Sec: 15.83, Grad Norm: 0.0375 +[2025-02-23 07:22:54] (step=0551000) Train Loss: 0.3225, Train Steps/Sec: 17.21, Grad Norm: 0.0359 +[2025-02-23 07:23:00] (step=0551100) Train Loss: 0.3222, Train Steps/Sec: 16.45, Grad Norm: 0.0381 +[2025-02-23 07:23:06] (step=0551200) Train Loss: 0.3226, Train Steps/Sec: 17.16, Grad Norm: 0.0359 +[2025-02-23 07:23:11] (step=0551300) Train Loss: 0.3223, Train Steps/Sec: 17.20, Grad Norm: 0.0356 +[2025-02-23 07:23:17] (step=0551400) Train Loss: 0.3225, Train Steps/Sec: 17.29, Grad Norm: 0.0354 +[2025-02-23 07:23:23] (step=0551500) Train Loss: 0.3227, Train Steps/Sec: 17.33, Grad Norm: 0.0374 +[2025-02-23 07:23:29] (step=0551600) Train Loss: 0.3224, Train Steps/Sec: 17.35, Grad Norm: 0.0319 +[2025-02-23 07:23:36] (step=0551700) Train Loss: 0.3228, Train Steps/Sec: 14.03, Grad Norm: 0.0387 +[2025-02-23 07:23:42] (step=0551800) Train Loss: 0.3225, Train Steps/Sec: 16.43, Grad Norm: 0.0381 +[2025-02-23 07:23:48] (step=0551900) Train Loss: 0.3225, Train Steps/Sec: 17.21, Grad Norm: 0.0382 +[2025-02-23 07:23:54] (step=0552000) Train Loss: 0.3223, Train Steps/Sec: 17.29, Grad Norm: 0.0369 +[2025-02-23 07:24:00] (step=0552100) Train Loss: 0.3221, Train Steps/Sec: 15.39, Grad Norm: 0.0391 +[2025-02-23 07:24:06] (step=0552200) Train Loss: 0.3221, Train Steps/Sec: 16.00, Grad Norm: 0.0325 +[2025-02-23 07:24:12] (step=0552300) Train Loss: 0.3223, Train Steps/Sec: 16.70, Grad Norm: 0.0354 +[2025-02-23 07:24:18] (step=0552400) Train Loss: 0.3224, Train Steps/Sec: 17.32, Grad Norm: 0.0369 +[2025-02-23 07:24:24] (step=0552500) Train Loss: 0.3222, Train Steps/Sec: 15.81, Grad Norm: 0.0372 +[2025-02-23 07:24:30] (step=0552600) Train Loss: 0.3223, Train Steps/Sec: 16.60, Grad Norm: 0.0367 +[2025-02-23 07:24:36] (step=0552700) Train Loss: 0.3224, Train Steps/Sec: 16.58, Grad Norm: 0.0351 +[2025-02-23 07:24:43] (step=0552800) Train Loss: 0.3222, Train Steps/Sec: 15.92, Grad Norm: 0.0380 +[2025-02-23 07:24:49] (step=0552900) Train Loss: 0.3222, Train Steps/Sec: 15.98, Grad Norm: 0.0360 +[2025-02-23 07:24:56] (step=0553000) Train Loss: 0.3222, Train Steps/Sec: 14.37, Grad Norm: 0.0342 +[2025-02-23 07:25:02] (step=0553100) Train Loss: 0.3222, Train Steps/Sec: 16.55, Grad Norm: 0.0356 +[2025-02-23 07:25:08] (step=0553200) Train Loss: 0.3226, Train Steps/Sec: 17.30, Grad Norm: 0.0399 +[2025-02-23 07:25:14] (step=0553300) Train Loss: 0.3220, Train Steps/Sec: 17.34, Grad Norm: 0.0375 +[2025-02-23 07:25:19] (step=0553400) Train Loss: 0.3222, Train Steps/Sec: 17.40, Grad Norm: 0.0377 +[2025-02-23 07:25:25] (step=0553500) Train Loss: 0.3223, Train Steps/Sec: 17.42, Grad Norm: 0.0359 +[2025-02-23 07:25:31] (step=0553600) Train Loss: 0.3223, Train Steps/Sec: 17.34, Grad Norm: 0.0358 +[2025-02-23 07:25:37] (step=0553700) Train Loss: 0.3228, Train Steps/Sec: 17.34, Grad Norm: 0.0352 +[2025-02-23 07:25:43] (step=0553800) Train Loss: 0.3225, Train Steps/Sec: 16.46, Grad Norm: 0.0386 +[2025-02-23 07:25:48] (step=0553900) Train Loss: 0.3228, Train Steps/Sec: 17.32, Grad Norm: 0.0413 +[2025-02-23 07:25:54] (step=0554000) Train Loss: 0.3222, Train Steps/Sec: 16.60, Grad Norm: 0.0329 +[2025-02-23 07:26:01] (step=0554100) Train Loss: 0.3231, Train Steps/Sec: 15.95, Grad Norm: 0.0376 +[2025-02-23 07:26:08] (step=0554200) Train Loss: 0.3219, Train Steps/Sec: 13.02, Grad Norm: 0.0371 +[2025-02-23 07:26:14] (step=0554300) Train Loss: 0.3222, Train Steps/Sec: 17.34, Grad Norm: 0.0349 +[2025-02-23 07:26:20] (step=0554400) Train Loss: 0.3229, Train Steps/Sec: 16.63, Grad Norm: 0.0362 +[2025-02-23 07:26:26] (step=0554500) Train Loss: 0.3222, Train Steps/Sec: 16.48, Grad Norm: 0.0379 +[2025-02-23 07:26:32] (step=0554600) Train Loss: 0.3216, Train Steps/Sec: 16.55, Grad Norm: 0.0366 +[2025-02-23 07:26:38] (step=0554700) Train Loss: 0.3224, Train Steps/Sec: 16.54, Grad Norm: 0.0379 +[2025-02-23 07:26:45] (step=0554800) Train Loss: 0.3228, Train Steps/Sec: 15.90, Grad Norm: 0.0354 +[2025-02-23 07:26:51] (step=0554900) Train Loss: 0.3220, Train Steps/Sec: 15.94, Grad Norm: 0.0392 +[2025-02-23 07:26:57] (step=0555000) Train Loss: 0.3224, Train Steps/Sec: 17.43, Grad Norm: 0.0381 +[2025-02-23 07:27:03] (step=0555100) Train Loss: 0.3224, Train Steps/Sec: 16.65, Grad Norm: 0.0342 +[2025-02-23 07:27:08] (step=0555200) Train Loss: 0.3220, Train Steps/Sec: 17.37, Grad Norm: 0.0352 +[2025-02-23 07:27:14] (step=0555300) Train Loss: 0.3225, Train Steps/Sec: 17.39, Grad Norm: 0.0349 +[2025-02-23 07:27:20] (step=0555400) Train Loss: 0.3224, Train Steps/Sec: 17.43, Grad Norm: 0.0386 +[2025-02-23 07:27:27] (step=0555500) Train Loss: 0.3229, Train Steps/Sec: 14.40, Grad Norm: 0.0376 +[2025-02-23 07:27:33] (step=0555600) Train Loss: 0.3229, Train Steps/Sec: 17.17, Grad Norm: 0.0402 +[2025-02-23 07:27:39] (step=0555700) Train Loss: 0.3223, Train Steps/Sec: 16.42, Grad Norm: 0.0383 +[2025-02-23 07:27:45] (step=0555800) Train Loss: 0.3220, Train Steps/Sec: 17.29, Grad Norm: 0.0342 +[2025-02-23 07:27:50] (step=0555900) Train Loss: 0.3229, Train Steps/Sec: 17.29, Grad Norm: 0.0422 +[2025-02-23 07:27:57] (step=0556000) Train Loss: 0.3225, Train Steps/Sec: 15.25, Grad Norm: 0.0353 +[2025-02-23 07:28:03] (step=0556100) Train Loss: 0.3224, Train Steps/Sec: 15.95, Grad Norm: 0.0353 +[2025-02-23 07:28:09] (step=0556200) Train Loss: 0.3227, Train Steps/Sec: 16.53, Grad Norm: 0.0379 +[2025-02-23 07:28:15] (step=0556300) Train Loss: 0.3227, Train Steps/Sec: 17.28, Grad Norm: 0.0354 +[2025-02-23 07:28:21] (step=0556400) Train Loss: 0.3221, Train Steps/Sec: 16.60, Grad Norm: 0.0356 +[2025-02-23 07:28:27] (step=0556500) Train Loss: 0.3222, Train Steps/Sec: 16.59, Grad Norm: 0.0378 +[2025-02-23 07:28:33] (step=0556600) Train Loss: 0.3219, Train Steps/Sec: 16.61, Grad Norm: 0.0396 +[2025-02-23 07:28:40] (step=0556700) Train Loss: 0.3221, Train Steps/Sec: 14.01, Grad Norm: 0.0363 +[2025-02-23 07:28:47] (step=0556800) Train Loss: 0.3227, Train Steps/Sec: 15.71, Grad Norm: 0.0325 +[2025-02-23 07:28:53] (step=0556900) Train Loss: 0.3222, Train Steps/Sec: 15.69, Grad Norm: 0.0414 +[2025-02-23 07:28:59] (step=0557000) Train Loss: 0.3226, Train Steps/Sec: 17.07, Grad Norm: 0.0409 +[2025-02-23 07:29:05] (step=0557100) Train Loss: 0.3223, Train Steps/Sec: 16.38, Grad Norm: 0.0365 +[2025-02-23 07:29:11] (step=0557200) Train Loss: 0.3222, Train Steps/Sec: 17.24, Grad Norm: 0.0350 +[2025-02-23 07:29:16] (step=0557300) Train Loss: 0.3224, Train Steps/Sec: 17.31, Grad Norm: 0.0349 +[2025-02-23 07:29:22] (step=0557400) Train Loss: 0.3224, Train Steps/Sec: 17.34, Grad Norm: 0.0363 +[2025-02-23 07:29:28] (step=0557500) Train Loss: 0.3225, Train Steps/Sec: 17.30, Grad Norm: 0.0354 +[2025-02-23 07:29:34] (step=0557600) Train Loss: 0.3227, Train Steps/Sec: 17.33, Grad Norm: 0.0353 +[2025-02-23 07:29:40] (step=0557700) Train Loss: 0.3226, Train Steps/Sec: 16.52, Grad Norm: 0.0388 +[2025-02-23 07:29:46] (step=0557800) Train Loss: 0.3225, Train Steps/Sec: 17.29, Grad Norm: 0.0362 +[2025-02-23 07:29:52] (step=0557900) Train Loss: 0.3223, Train Steps/Sec: 16.56, Grad Norm: 0.0392 +[2025-02-23 07:30:00] (step=0558000) Train Loss: 0.3220, Train Steps/Sec: 12.50, Grad Norm: 0.0346 +[2025-02-23 07:30:06] (step=0558100) Train Loss: 0.3227, Train Steps/Sec: 16.48, Grad Norm: 0.0350 +[2025-02-23 07:30:12] (step=0558200) Train Loss: 0.3222, Train Steps/Sec: 17.17, Grad Norm: 0.0421 +[2025-02-23 07:30:18] (step=0558300) Train Loss: 0.3219, Train Steps/Sec: 16.42, Grad Norm: 0.0354 +[2025-02-23 07:30:24] (step=0558400) Train Loss: 0.3223, Train Steps/Sec: 17.14, Grad Norm: 0.0397 +[2025-02-23 07:30:29] (step=0558500) Train Loss: 0.3221, Train Steps/Sec: 17.16, Grad Norm: 0.0351 +[2025-02-23 07:30:36] (step=0558600) Train Loss: 0.3224, Train Steps/Sec: 15.73, Grad Norm: 0.0347 +[2025-02-23 07:30:42] (step=0558700) Train Loss: 0.3222, Train Steps/Sec: 16.36, Grad Norm: 0.0368 +[2025-02-23 07:30:48] (step=0558800) Train Loss: 0.3220, Train Steps/Sec: 15.63, Grad Norm: 0.0353 +[2025-02-23 07:30:55] (step=0558900) Train Loss: 0.3222, Train Steps/Sec: 15.76, Grad Norm: 0.0343 +[2025-02-23 07:31:00] (step=0559000) Train Loss: 0.3226, Train Steps/Sec: 17.25, Grad Norm: 0.0388 +[2025-02-23 07:31:06] (step=0559100) Train Loss: 0.3225, Train Steps/Sec: 17.16, Grad Norm: 0.0350 +[2025-02-23 07:31:13] (step=0559200) Train Loss: 0.3220, Train Steps/Sec: 13.98, Grad Norm: 0.0379 +[2025-02-23 07:31:19] (step=0559300) Train Loss: 0.3221, Train Steps/Sec: 17.10, Grad Norm: 0.0381 +[2025-02-23 07:31:25] (step=0559400) Train Loss: 0.3224, Train Steps/Sec: 17.16, Grad Norm: 0.0384 +[2025-02-23 07:31:31] (step=0559500) Train Loss: 0.3224, Train Steps/Sec: 17.30, Grad Norm: 0.0408 +[2025-02-23 07:31:37] (step=0559600) Train Loss: 0.3227, Train Steps/Sec: 16.49, Grad Norm: 0.0371 +[2025-02-23 07:31:43] (step=0559700) Train Loss: 0.3222, Train Steps/Sec: 17.27, Grad Norm: 0.0350 +[2025-02-23 07:31:48] (step=0559800) Train Loss: 0.3225, Train Steps/Sec: 17.37, Grad Norm: 0.0382 +[2025-02-23 07:31:55] (step=0559900) Train Loss: 0.3223, Train Steps/Sec: 15.34, Grad Norm: 0.0384 +[2025-02-23 07:32:01] (step=0560000) Train Loss: 0.3222, Train Steps/Sec: 15.87, Grad Norm: 0.0379 +[2025-02-23 07:32:07] (step=0560100) Train Loss: 0.3224, Train Steps/Sec: 16.55, Grad Norm: 0.0354 +[2025-02-23 07:32:13] (step=0560200) Train Loss: 0.3221, Train Steps/Sec: 16.53, Grad Norm: 0.0419 +[2025-02-23 07:32:19] (step=0560300) Train Loss: 0.3221, Train Steps/Sec: 17.33, Grad Norm: 0.0386 +[2025-02-23 07:32:25] (step=0560400) Train Loss: 0.3225, Train Steps/Sec: 17.33, Grad Norm: 0.0375 +[2025-02-23 07:32:32] (step=0560500) Train Loss: 0.3218, Train Steps/Sec: 14.28, Grad Norm: 0.0379 +[2025-02-23 07:32:38] (step=0560600) Train Loss: 0.3226, Train Steps/Sec: 15.81, Grad Norm: 0.0366 +[2025-02-23 07:32:44] (step=0560700) Train Loss: 0.3226, Train Steps/Sec: 16.59, Grad Norm: 0.0373 +[2025-02-23 07:32:51] (step=0560800) Train Loss: 0.3225, Train Steps/Sec: 15.84, Grad Norm: 0.0328 +[2025-02-23 07:32:57] (step=0560900) Train Loss: 0.3222, Train Steps/Sec: 16.55, Grad Norm: 0.0351 +[2025-02-23 07:33:02] (step=0561000) Train Loss: 0.3224, Train Steps/Sec: 17.32, Grad Norm: 0.0361 +[2025-02-23 07:33:08] (step=0561100) Train Loss: 0.3223, Train Steps/Sec: 17.43, Grad Norm: 0.0354 +[2025-02-23 07:33:14] (step=0561200) Train Loss: 0.3225, Train Steps/Sec: 16.70, Grad Norm: 0.0371 +[2025-02-23 07:33:20] (step=0561300) Train Loss: 0.3219, Train Steps/Sec: 17.39, Grad Norm: 0.0399 +[2025-02-23 07:33:26] (step=0561400) Train Loss: 0.3230, Train Steps/Sec: 17.37, Grad Norm: 0.0335 +[2025-02-23 07:33:31] (step=0561500) Train Loss: 0.3224, Train Steps/Sec: 17.41, Grad Norm: 0.0370 +[2025-02-23 07:33:37] (step=0561600) Train Loss: 0.3221, Train Steps/Sec: 16.58, Grad Norm: 0.0377 +[2025-02-23 07:33:44] (step=0561700) Train Loss: 0.3226, Train Steps/Sec: 14.36, Grad Norm: 0.0335 +[2025-02-23 07:33:50] (step=0561800) Train Loss: 0.3223, Train Steps/Sec: 16.35, Grad Norm: 0.0337 +[2025-02-23 07:33:57] (step=0561900) Train Loss: 0.3218, Train Steps/Sec: 14.58, Grad Norm: 0.0397 +[2025-02-23 07:34:03] (step=0562000) Train Loss: 0.3222, Train Steps/Sec: 16.61, Grad Norm: 0.0352 +[2025-02-23 07:34:09] (step=0562100) Train Loss: 0.3226, Train Steps/Sec: 17.31, Grad Norm: 0.0356 +[2025-02-23 07:34:15] (step=0562200) Train Loss: 0.3225, Train Steps/Sec: 16.54, Grad Norm: 0.0348 +[2025-02-23 07:34:21] (step=0562300) Train Loss: 0.3227, Train Steps/Sec: 17.24, Grad Norm: 0.0394 +[2025-02-23 07:34:27] (step=0562400) Train Loss: 0.3228, Train Steps/Sec: 17.14, Grad Norm: 0.0387 +[2025-02-23 07:34:33] (step=0562500) Train Loss: 0.3219, Train Steps/Sec: 17.11, Grad Norm: 0.0365 +[2025-02-23 07:34:39] (step=0562600) Train Loss: 0.3224, Train Steps/Sec: 15.67, Grad Norm: 0.0380 +[2025-02-23 07:34:45] (step=0562700) Train Loss: 0.3229, Train Steps/Sec: 16.35, Grad Norm: 0.0346 +[2025-02-23 07:34:51] (step=0562800) Train Loss: 0.3218, Train Steps/Sec: 16.27, Grad Norm: 0.0376 +[2025-02-23 07:34:58] (step=0562900) Train Loss: 0.3228, Train Steps/Sec: 15.12, Grad Norm: 0.0361 +[2025-02-23 07:35:05] (step=0563000) Train Loss: 0.3228, Train Steps/Sec: 14.19, Grad Norm: 0.0364 +[2025-02-23 07:35:11] (step=0563100) Train Loss: 0.3224, Train Steps/Sec: 17.30, Grad Norm: 0.0385 +[2025-02-23 07:35:17] (step=0563200) Train Loss: 0.3228, Train Steps/Sec: 16.65, Grad Norm: 0.0343 +[2025-02-23 07:35:23] (step=0563300) Train Loss: 0.3229, Train Steps/Sec: 17.34, Grad Norm: 0.0358 +[2025-02-23 07:35:28] (step=0563400) Train Loss: 0.3223, Train Steps/Sec: 17.20, Grad Norm: 0.0386 +[2025-02-23 07:35:34] (step=0563500) Train Loss: 0.3227, Train Steps/Sec: 16.37, Grad Norm: 0.0353 +[2025-02-23 07:35:40] (step=0563600) Train Loss: 0.3227, Train Steps/Sec: 17.09, Grad Norm: 0.0370 +[2025-02-23 07:35:46] (step=0563700) Train Loss: 0.3221, Train Steps/Sec: 17.08, Grad Norm: 0.0370 +[2025-02-23 07:35:53] (step=0563800) Train Loss: 0.3220, Train Steps/Sec: 15.15, Grad Norm: 0.0365 +[2025-02-23 07:35:59] (step=0563900) Train Loss: 0.3220, Train Steps/Sec: 15.17, Grad Norm: 0.0376 +[2025-02-23 07:36:05] (step=0564000) Train Loss: 0.3220, Train Steps/Sec: 17.22, Grad Norm: 0.0349 +[2025-02-23 07:36:11] (step=0564100) Train Loss: 0.3227, Train Steps/Sec: 16.45, Grad Norm: 0.0357 +[2025-02-23 07:36:17] (step=0564200) Train Loss: 0.3225, Train Steps/Sec: 17.11, Grad Norm: 0.0374 +[2025-02-23 07:36:24] (step=0564300) Train Loss: 0.3216, Train Steps/Sec: 14.57, Grad Norm: 0.0354 +[2025-02-23 07:36:30] (step=0564400) Train Loss: 0.3222, Train Steps/Sec: 17.39, Grad Norm: 0.0365 +[2025-02-23 07:36:36] (step=0564500) Train Loss: 0.3217, Train Steps/Sec: 17.36, Grad Norm: 0.0351 +[2025-02-23 07:36:42] (step=0564600) Train Loss: 0.3223, Train Steps/Sec: 15.84, Grad Norm: 0.0356 +[2025-02-23 07:36:48] (step=0564700) Train Loss: 0.3220, Train Steps/Sec: 16.50, Grad Norm: 0.0367 +[2025-02-23 07:36:54] (step=0564800) Train Loss: 0.3221, Train Steps/Sec: 16.46, Grad Norm: 0.0364 +[2025-02-23 07:37:01] (step=0564900) Train Loss: 0.3229, Train Steps/Sec: 15.19, Grad Norm: 0.0382 +[2025-02-23 07:37:06] (step=0565000) Train Loss: 0.3225, Train Steps/Sec: 17.31, Grad Norm: 0.0357 +[2025-02-23 07:37:12] (step=0565100) Train Loss: 0.3219, Train Steps/Sec: 17.38, Grad Norm: 0.0361 +[2025-02-23 07:37:18] (step=0565200) Train Loss: 0.3221, Train Steps/Sec: 16.60, Grad Norm: 0.0443 +[2025-02-23 07:37:24] (step=0565300) Train Loss: 0.3218, Train Steps/Sec: 17.35, Grad Norm: 0.0348 +[2025-02-23 07:37:30] (step=0565400) Train Loss: 0.3223, Train Steps/Sec: 17.39, Grad Norm: 0.0351 +[2025-02-23 07:37:37] (step=0565500) Train Loss: 0.3219, Train Steps/Sec: 13.80, Grad Norm: 0.0327 +[2025-02-23 07:37:43] (step=0565600) Train Loss: 0.3220, Train Steps/Sec: 17.31, Grad Norm: 0.0352 +[2025-02-23 07:37:49] (step=0565700) Train Loss: 0.3228, Train Steps/Sec: 15.25, Grad Norm: 0.0343 +[2025-02-23 07:37:56] (step=0565800) Train Loss: 0.3223, Train Steps/Sec: 15.89, Grad Norm: 0.0399 +[2025-02-23 07:38:02] (step=0565900) Train Loss: 0.3220, Train Steps/Sec: 16.63, Grad Norm: 0.0373 +[2025-02-23 07:38:07] (step=0566000) Train Loss: 0.3225, Train Steps/Sec: 17.36, Grad Norm: 0.0412 +[2025-02-23 07:38:13] (step=0566100) Train Loss: 0.3220, Train Steps/Sec: 16.63, Grad Norm: 0.0349 +[2025-02-23 07:38:19] (step=0566200) Train Loss: 0.3232, Train Steps/Sec: 17.34, Grad Norm: 0.0365 +[2025-02-23 07:38:25] (step=0566300) Train Loss: 0.3223, Train Steps/Sec: 17.24, Grad Norm: 0.0349 +[2025-02-23 07:38:31] (step=0566400) Train Loss: 0.3222, Train Steps/Sec: 17.31, Grad Norm: 0.0358 +[2025-02-23 07:38:36] (step=0566500) Train Loss: 0.3227, Train Steps/Sec: 17.34, Grad Norm: 0.0404 +[2025-02-23 07:38:42] (step=0566600) Train Loss: 0.3224, Train Steps/Sec: 16.56, Grad Norm: 0.0362 +[2025-02-23 07:38:48] (step=0566700) Train Loss: 0.3223, Train Steps/Sec: 16.60, Grad Norm: 0.0339 +[2025-02-23 07:38:56] (step=0566800) Train Loss: 0.3222, Train Steps/Sec: 13.35, Grad Norm: 0.0367 +[2025-02-23 07:39:03] (step=0566900) Train Loss: 0.3225, Train Steps/Sec: 15.18, Grad Norm: 0.0373 +[2025-02-23 07:39:08] (step=0567000) Train Loss: 0.3226, Train Steps/Sec: 17.38, Grad Norm: 0.0338 +[2025-02-23 07:39:14] (step=0567100) Train Loss: 0.3226, Train Steps/Sec: 17.33, Grad Norm: 0.0382 +[2025-02-23 07:39:20] (step=0567200) Train Loss: 0.3224, Train Steps/Sec: 16.64, Grad Norm: 0.0349 +[2025-02-23 07:39:26] (step=0567300) Train Loss: 0.3226, Train Steps/Sec: 17.39, Grad Norm: 0.0390 +[2025-02-23 07:39:32] (step=0567400) Train Loss: 0.3221, Train Steps/Sec: 16.59, Grad Norm: 0.0331 +[2025-02-23 07:39:38] (step=0567500) Train Loss: 0.3220, Train Steps/Sec: 17.36, Grad Norm: 0.0357 +[2025-02-23 07:39:43] (step=0567600) Train Loss: 0.3222, Train Steps/Sec: 17.34, Grad Norm: 0.0367 +[2025-02-23 07:39:51] (step=0567700) Train Loss: 0.3223, Train Steps/Sec: 14.12, Grad Norm: 0.0369 +[2025-02-23 07:39:57] (step=0567800) Train Loss: 0.3222, Train Steps/Sec: 16.63, Grad Norm: 0.0360 +[2025-02-23 07:40:02] (step=0567900) Train Loss: 0.3216, Train Steps/Sec: 17.39, Grad Norm: 0.0396 +[2025-02-23 07:40:10] (step=0568000) Train Loss: 0.3224, Train Steps/Sec: 13.81, Grad Norm: 0.0379 +[2025-02-23 07:40:15] (step=0568100) Train Loss: 0.3229, Train Steps/Sec: 16.85, Grad Norm: 0.0361 +[2025-02-23 07:40:21] (step=0568200) Train Loss: 0.3222, Train Steps/Sec: 16.87, Grad Norm: 0.0403 +[2025-02-23 07:40:27] (step=0568300) Train Loss: 0.3218, Train Steps/Sec: 16.85, Grad Norm: 0.0337 +[2025-02-23 07:40:33] (step=0568400) Train Loss: 0.3233, Train Steps/Sec: 16.85, Grad Norm: 0.0358 +[2025-02-23 07:40:39] (step=0568500) Train Loss: 0.3223, Train Steps/Sec: 16.84, Grad Norm: 0.0360 +[2025-02-23 07:40:45] (step=0568600) Train Loss: 0.3216, Train Steps/Sec: 16.10, Grad Norm: 0.0401 +[2025-02-23 07:40:52] (step=0568700) Train Loss: 0.3219, Train Steps/Sec: 16.14, Grad Norm: 0.0375 +[2025-02-23 07:40:58] (step=0568800) Train Loss: 0.3224, Train Steps/Sec: 16.00, Grad Norm: 0.0367 +[2025-02-23 07:41:05] (step=0568900) Train Loss: 0.3224, Train Steps/Sec: 14.22, Grad Norm: 0.0353 +[2025-02-23 07:41:11] (step=0569000) Train Loss: 0.3223, Train Steps/Sec: 16.78, Grad Norm: 0.0373 +[2025-02-23 07:41:17] (step=0569100) Train Loss: 0.3222, Train Steps/Sec: 16.80, Grad Norm: 0.0383 +[2025-02-23 07:41:23] (step=0569200) Train Loss: 0.3221, Train Steps/Sec: 16.05, Grad Norm: 0.0358 +[2025-02-23 07:41:30] (step=0569300) Train Loss: 0.3226, Train Steps/Sec: 14.17, Grad Norm: 0.0338 +[2025-02-23 07:41:36] (step=0569400) Train Loss: 0.3224, Train Steps/Sec: 16.23, Grad Norm: 0.0392 +[2025-02-23 07:41:42] (step=0569500) Train Loss: 0.3218, Train Steps/Sec: 17.22, Grad Norm: 0.0356 +[2025-02-23 07:41:49] (step=0569600) Train Loss: 0.3224, Train Steps/Sec: 15.26, Grad Norm: 0.0377 +[2025-02-23 07:41:55] (step=0569700) Train Loss: 0.3220, Train Steps/Sec: 15.81, Grad Norm: 0.0347 +[2025-02-23 07:42:01] (step=0569800) Train Loss: 0.3219, Train Steps/Sec: 16.54, Grad Norm: 0.0372 +[2025-02-23 07:42:07] (step=0569900) Train Loss: 0.3225, Train Steps/Sec: 17.28, Grad Norm: 0.0346 +[2025-02-23 07:42:13] (step=0570000) Train Loss: 0.3221, Train Steps/Sec: 16.51, Grad Norm: 0.0356 +[2025-02-23 07:42:19] (step=0570100) Train Loss: 0.3224, Train Steps/Sec: 17.26, Grad Norm: 0.0364 +[2025-02-23 07:42:24] (step=0570200) Train Loss: 0.3228, Train Steps/Sec: 17.25, Grad Norm: 0.0397 +[2025-02-23 07:42:30] (step=0570300) Train Loss: 0.3228, Train Steps/Sec: 17.34, Grad Norm: 0.0351 +[2025-02-23 07:42:36] (step=0570400) Train Loss: 0.3220, Train Steps/Sec: 17.35, Grad Norm: 0.0355 +[2025-02-23 07:42:43] (step=0570500) Train Loss: 0.3221, Train Steps/Sec: 14.26, Grad Norm: 0.0400 +[2025-02-23 07:42:49] (step=0570600) Train Loss: 0.3221, Train Steps/Sec: 16.47, Grad Norm: 0.0342 +[2025-02-23 07:42:55] (step=0570700) Train Loss: 0.3223, Train Steps/Sec: 16.64, Grad Norm: 0.0383 +[2025-02-23 07:43:01] (step=0570800) Train Loss: 0.3227, Train Steps/Sec: 16.62, Grad Norm: 0.0353 +[2025-02-23 07:43:08] (step=0570900) Train Loss: 0.3223, Train Steps/Sec: 14.69, Grad Norm: 0.0356 +[2025-02-23 07:43:14] (step=0571000) Train Loss: 0.3229, Train Steps/Sec: 17.29, Grad Norm: 0.0350 +[2025-02-23 07:43:19] (step=0571100) Train Loss: 0.3224, Train Steps/Sec: 17.35, Grad Norm: 0.0363 +[2025-02-23 07:43:25] (step=0571200) Train Loss: 0.3220, Train Steps/Sec: 16.59, Grad Norm: 0.0363 +[2025-02-23 07:43:32] (step=0571300) Train Loss: 0.3221, Train Steps/Sec: 16.66, Grad Norm: 0.0338 +[2025-02-23 07:43:37] (step=0571400) Train Loss: 0.3223, Train Steps/Sec: 17.43, Grad Norm: 0.0377 +[2025-02-23 07:43:43] (step=0571500) Train Loss: 0.3227, Train Steps/Sec: 16.68, Grad Norm: 0.0389 +[2025-02-23 07:43:50] (step=0571600) Train Loss: 0.3225, Train Steps/Sec: 14.72, Grad Norm: 0.0330 +[2025-02-23 07:43:56] (step=0571700) Train Loss: 0.3221, Train Steps/Sec: 16.65, Grad Norm: 0.0406 +[2025-02-23 07:44:03] (step=0571800) Train Loss: 0.3220, Train Steps/Sec: 14.58, Grad Norm: 0.0368 +[2025-02-23 07:44:09] (step=0571900) Train Loss: 0.3220, Train Steps/Sec: 16.72, Grad Norm: 0.0402 +[2025-02-23 07:44:15] (step=0572000) Train Loss: 0.3225, Train Steps/Sec: 17.36, Grad Norm: 0.0351 +[2025-02-23 07:44:20] (step=0572100) Train Loss: 0.3223, Train Steps/Sec: 17.35, Grad Norm: 0.0369 +[2025-02-23 07:44:26] (step=0572200) Train Loss: 0.3216, Train Steps/Sec: 17.29, Grad Norm: 0.0365 +[2025-02-23 07:44:32] (step=0572300) Train Loss: 0.3222, Train Steps/Sec: 17.32, Grad Norm: 0.0378 +[2025-02-23 07:44:38] (step=0572400) Train Loss: 0.3216, Train Steps/Sec: 17.41, Grad Norm: 0.0365 +[2025-02-23 07:44:43] (step=0572500) Train Loss: 0.3224, Train Steps/Sec: 17.38, Grad Norm: 0.0353 +[2025-02-23 07:44:50] (step=0572600) Train Loss: 0.3224, Train Steps/Sec: 16.55, Grad Norm: 0.0353 +[2025-02-23 07:44:56] (step=0572700) Train Loss: 0.3220, Train Steps/Sec: 16.62, Grad Norm: 0.0359 +[2025-02-23 07:45:02] (step=0572800) Train Loss: 0.3225, Train Steps/Sec: 16.65, Grad Norm: 0.0381 +[2025-02-23 07:45:08] (step=0572900) Train Loss: 0.3219, Train Steps/Sec: 15.31, Grad Norm: 0.0377 +[2025-02-23 07:45:15] (step=0573000) Train Loss: 0.3226, Train Steps/Sec: 14.05, Grad Norm: 0.0339 +[2025-02-23 07:45:21] (step=0573100) Train Loss: 0.3220, Train Steps/Sec: 17.27, Grad Norm: 0.0427 +[2025-02-23 07:45:27] (step=0573200) Train Loss: 0.3219, Train Steps/Sec: 16.51, Grad Norm: 0.0356 +[2025-02-23 07:45:33] (step=0573300) Train Loss: 0.3226, Train Steps/Sec: 16.57, Grad Norm: 0.0348 +[2025-02-23 07:45:39] (step=0573400) Train Loss: 0.3223, Train Steps/Sec: 17.35, Grad Norm: 0.0394 +[2025-02-23 07:45:45] (step=0573500) Train Loss: 0.3222, Train Steps/Sec: 15.32, Grad Norm: 0.0346 +[2025-02-23 07:45:52] (step=0573600) Train Loss: 0.3224, Train Steps/Sec: 15.33, Grad Norm: 0.0388 +[2025-02-23 07:45:58] (step=0573700) Train Loss: 0.3221, Train Steps/Sec: 17.40, Grad Norm: 0.0368 +[2025-02-23 07:46:04] (step=0573800) Train Loss: 0.3219, Train Steps/Sec: 16.65, Grad Norm: 0.0346 +[2025-02-23 07:46:09] (step=0573900) Train Loss: 0.3224, Train Steps/Sec: 17.30, Grad Norm: 0.0353 +[2025-02-23 07:46:15] (step=0574000) Train Loss: 0.3222, Train Steps/Sec: 17.24, Grad Norm: 0.0396 +[2025-02-23 07:46:21] (step=0574100) Train Loss: 0.3222, Train Steps/Sec: 17.33, Grad Norm: 0.0342 +[2025-02-23 07:46:27] (step=0574200) Train Loss: 0.3223, Train Steps/Sec: 17.31, Grad Norm: 0.0396 +[2025-02-23 07:46:34] (step=0574300) Train Loss: 0.3222, Train Steps/Sec: 14.55, Grad Norm: 0.0373 +[2025-02-23 07:46:39] (step=0574400) Train Loss: 0.3219, Train Steps/Sec: 17.32, Grad Norm: 0.0344 +[2025-02-23 07:46:45] (step=0574500) Train Loss: 0.3222, Train Steps/Sec: 17.38, Grad Norm: 0.0362 +[2025-02-23 07:46:51] (step=0574600) Train Loss: 0.3220, Train Steps/Sec: 16.49, Grad Norm: 0.0379 +[2025-02-23 07:46:57] (step=0574700) Train Loss: 0.3222, Train Steps/Sec: 16.57, Grad Norm: 0.0384 +[2025-02-23 07:47:03] (step=0574800) Train Loss: 0.3224, Train Steps/Sec: 16.54, Grad Norm: 0.0376 +[2025-02-23 07:47:10] (step=0574900) Train Loss: 0.3221, Train Steps/Sec: 15.28, Grad Norm: 0.0377 +[2025-02-23 07:47:16] (step=0575000) Train Loss: 0.3227, Train Steps/Sec: 16.58, Grad Norm: 0.0352 +[2025-02-23 07:47:22] (step=0575100) Train Loss: 0.3220, Train Steps/Sec: 17.31, Grad Norm: 0.0391 +[2025-02-23 07:47:28] (step=0575200) Train Loss: 0.3223, Train Steps/Sec: 15.88, Grad Norm: 0.0374 +[2025-02-23 07:47:34] (step=0575300) Train Loss: 0.3222, Train Steps/Sec: 17.35, Grad Norm: 0.0387 +[2025-02-23 07:47:40] (step=0575400) Train Loss: 0.3225, Train Steps/Sec: 16.66, Grad Norm: 0.0352 +[2025-02-23 07:47:48] (step=0575500) Train Loss: 0.3221, Train Steps/Sec: 12.64, Grad Norm: 0.0382 +[2025-02-23 07:47:54] (step=0575600) Train Loss: 0.3220, Train Steps/Sec: 16.59, Grad Norm: 0.0353 +[2025-02-23 07:47:59] (step=0575700) Train Loss: 0.3218, Train Steps/Sec: 17.37, Grad Norm: 0.0348 +[2025-02-23 07:48:06] (step=0575800) Train Loss: 0.3220, Train Steps/Sec: 16.64, Grad Norm: 0.0346 +[2025-02-23 07:48:11] (step=0575900) Train Loss: 0.3231, Train Steps/Sec: 17.36, Grad Norm: 0.0377 +[2025-02-23 07:48:17] (step=0576000) Train Loss: 0.3221, Train Steps/Sec: 17.41, Grad Norm: 0.0370 +[2025-02-23 07:48:23] (step=0576100) Train Loss: 0.3221, Train Steps/Sec: 17.44, Grad Norm: 0.0387 +[2025-02-23 07:48:29] (step=0576200) Train Loss: 0.3224, Train Steps/Sec: 17.32, Grad Norm: 0.0356 +[2025-02-23 07:48:34] (step=0576300) Train Loss: 0.3225, Train Steps/Sec: 17.34, Grad Norm: 0.0341 +[2025-02-23 07:48:40] (step=0576400) Train Loss: 0.3225, Train Steps/Sec: 17.34, Grad Norm: 0.0388 +[2025-02-23 07:48:46] (step=0576500) Train Loss: 0.3228, Train Steps/Sec: 17.33, Grad Norm: 0.0343 +[2025-02-23 07:48:52] (step=0576600) Train Loss: 0.3228, Train Steps/Sec: 16.47, Grad Norm: 0.0389 +[2025-02-23 07:48:58] (step=0576700) Train Loss: 0.3225, Train Steps/Sec: 16.55, Grad Norm: 0.0340 +[2025-02-23 07:49:05] (step=0576800) Train Loss: 0.3226, Train Steps/Sec: 14.00, Grad Norm: 0.0401 +[2025-02-23 07:49:12] (step=0576900) Train Loss: 0.3225, Train Steps/Sec: 15.28, Grad Norm: 0.0337 +[2025-02-23 07:49:18] (step=0577000) Train Loss: 0.3226, Train Steps/Sec: 16.56, Grad Norm: 0.0368 +[2025-02-23 07:49:23] (step=0577100) Train Loss: 0.3222, Train Steps/Sec: 17.25, Grad Norm: 0.0380 +[2025-02-23 07:49:30] (step=0577200) Train Loss: 0.3225, Train Steps/Sec: 15.76, Grad Norm: 0.0358 +[2025-02-23 07:49:36] (step=0577300) Train Loss: 0.3221, Train Steps/Sec: 17.31, Grad Norm: 0.0360 +[2025-02-23 07:49:42] (step=0577400) Train Loss: 0.3224, Train Steps/Sec: 15.86, Grad Norm: 0.0357 +[2025-02-23 07:49:48] (step=0577500) Train Loss: 0.3222, Train Steps/Sec: 15.22, Grad Norm: 0.0392 +[2025-02-23 07:49:54] (step=0577600) Train Loss: 0.3225, Train Steps/Sec: 17.20, Grad Norm: 0.0372 +[2025-02-23 07:50:00] (step=0577700) Train Loss: 0.3221, Train Steps/Sec: 16.67, Grad Norm: 0.0369 +[2025-02-23 07:50:06] (step=0577800) Train Loss: 0.3225, Train Steps/Sec: 17.28, Grad Norm: 0.0369 +[2025-02-23 07:50:12] (step=0577900) Train Loss: 0.3225, Train Steps/Sec: 17.24, Grad Norm: 0.0362 +[2025-02-23 07:50:19] (step=0578000) Train Loss: 0.3218, Train Steps/Sec: 14.55, Grad Norm: 0.0364 +[2025-02-23 07:50:25] (step=0578100) Train Loss: 0.3224, Train Steps/Sec: 17.29, Grad Norm: 0.0352 +[2025-02-23 07:50:30] (step=0578200) Train Loss: 0.3220, Train Steps/Sec: 17.27, Grad Norm: 0.0351 +[2025-02-23 07:50:36] (step=0578300) Train Loss: 0.3223, Train Steps/Sec: 17.32, Grad Norm: 0.0349 +[2025-02-23 07:50:42] (step=0578400) Train Loss: 0.3228, Train Steps/Sec: 17.31, Grad Norm: 0.0336 +[2025-02-23 07:50:48] (step=0578500) Train Loss: 0.3217, Train Steps/Sec: 17.31, Grad Norm: 0.0387 +[2025-02-23 07:50:54] (step=0578600) Train Loss: 0.3226, Train Steps/Sec: 16.50, Grad Norm: 0.0351 +[2025-02-23 07:51:00] (step=0578700) Train Loss: 0.3222, Train Steps/Sec: 16.65, Grad Norm: 0.0376 +[2025-02-23 07:51:06] (step=0578800) Train Loss: 0.3224, Train Steps/Sec: 16.60, Grad Norm: 0.0342 +[2025-02-23 07:51:12] (step=0578900) Train Loss: 0.3218, Train Steps/Sec: 15.31, Grad Norm: 0.0373 +[2025-02-23 07:51:18] (step=0579000) Train Loss: 0.3220, Train Steps/Sec: 16.59, Grad Norm: 0.0377 +[2025-02-23 07:51:24] (step=0579100) Train Loss: 0.3221, Train Steps/Sec: 16.61, Grad Norm: 0.0371 +[2025-02-23 07:51:30] (step=0579200) Train Loss: 0.3222, Train Steps/Sec: 16.54, Grad Norm: 0.0366 +[2025-02-23 07:51:38] (step=0579300) Train Loss: 0.3221, Train Steps/Sec: 13.80, Grad Norm: 0.0342 +[2025-02-23 07:51:45] (step=0579400) Train Loss: 0.3219, Train Steps/Sec: 14.53, Grad Norm: 0.0369 +[2025-02-23 07:51:51] (step=0579500) Train Loss: 0.3216, Train Steps/Sec: 16.43, Grad Norm: 0.0351 +[2025-02-23 07:51:56] (step=0579600) Train Loss: 0.3226, Train Steps/Sec: 17.10, Grad Norm: 0.0361 +[2025-02-23 07:52:03] (step=0579700) Train Loss: 0.3224, Train Steps/Sec: 16.44, Grad Norm: 0.0346 +[2025-02-23 07:52:08] (step=0579800) Train Loss: 0.3228, Train Steps/Sec: 17.12, Grad Norm: 0.0374 +[2025-02-23 07:52:14] (step=0579900) Train Loss: 0.3221, Train Steps/Sec: 17.12, Grad Norm: 0.0340 +[2025-02-23 07:52:20] (step=0580000) Train Loss: 0.3223, Train Steps/Sec: 17.11, Grad Norm: 0.0353 +[2025-02-23 07:52:26] (step=0580100) Train Loss: 0.3220, Train Steps/Sec: 17.06, Grad Norm: 0.0354 +[2025-02-23 07:52:32] (step=0580200) Train Loss: 0.3223, Train Steps/Sec: 17.28, Grad Norm: 0.0356 +[2025-02-23 07:52:38] (step=0580300) Train Loss: 0.3225, Train Steps/Sec: 17.34, Grad Norm: 0.0356 +[2025-02-23 07:52:43] (step=0580400) Train Loss: 0.3222, Train Steps/Sec: 17.32, Grad Norm: 0.0352 +[2025-02-23 07:52:50] (step=0580500) Train Loss: 0.3226, Train Steps/Sec: 14.45, Grad Norm: 0.0377 +[2025-02-23 07:52:56] (step=0580600) Train Loss: 0.3224, Train Steps/Sec: 16.41, Grad Norm: 0.0363 +[2025-02-23 07:53:02] (step=0580700) Train Loss: 0.3217, Train Steps/Sec: 16.48, Grad Norm: 0.0327 +[2025-02-23 07:53:08] (step=0580800) Train Loss: 0.3222, Train Steps/Sec: 16.41, Grad Norm: 0.0382 +[2025-02-23 07:53:15] (step=0580900) Train Loss: 0.3223, Train Steps/Sec: 15.15, Grad Norm: 0.0394 +[2025-02-23 07:53:21] (step=0581000) Train Loss: 0.3223, Train Steps/Sec: 16.39, Grad Norm: 0.0335 +[2025-02-23 07:53:27] (step=0581100) Train Loss: 0.3223, Train Steps/Sec: 16.43, Grad Norm: 0.0370 +[2025-02-23 07:53:33] (step=0581200) Train Loss: 0.3222, Train Steps/Sec: 16.49, Grad Norm: 0.0364 +[2025-02-23 07:53:40] (step=0581300) Train Loss: 0.3223, Train Steps/Sec: 15.36, Grad Norm: 0.0343 +[2025-02-23 07:53:46] (step=0581400) Train Loss: 0.3222, Train Steps/Sec: 15.94, Grad Norm: 0.0359 +[2025-02-23 07:53:52] (step=0581500) Train Loss: 0.3221, Train Steps/Sec: 17.35, Grad Norm: 0.0340 +[2025-02-23 07:53:58] (step=0581600) Train Loss: 0.3223, Train Steps/Sec: 16.66, Grad Norm: 0.0335 +[2025-02-23 07:54:04] (step=0581700) Train Loss: 0.3220, Train Steps/Sec: 17.27, Grad Norm: 0.0389 +[2025-02-23 07:54:11] (step=0581800) Train Loss: 0.3224, Train Steps/Sec: 14.55, Grad Norm: 0.0331 +[2025-02-23 07:54:16] (step=0581900) Train Loss: 0.3229, Train Steps/Sec: 17.38, Grad Norm: 0.0377 +[2025-02-23 07:54:22] (step=0582000) Train Loss: 0.3226, Train Steps/Sec: 17.29, Grad Norm: 0.0343 +[2025-02-23 07:54:28] (step=0582100) Train Loss: 0.3223, Train Steps/Sec: 17.38, Grad Norm: 0.0362 +[2025-02-23 07:54:34] (step=0582200) Train Loss: 0.3223, Train Steps/Sec: 17.35, Grad Norm: 0.0383 +[2025-02-23 07:54:39] (step=0582300) Train Loss: 0.3228, Train Steps/Sec: 17.37, Grad Norm: 0.0360 +[2025-02-23 07:54:45] (step=0582400) Train Loss: 0.3224, Train Steps/Sec: 17.32, Grad Norm: 0.0381 +[2025-02-23 07:54:51] (step=0582500) Train Loss: 0.3224, Train Steps/Sec: 17.39, Grad Norm: 0.0346 +[2025-02-23 07:54:57] (step=0582600) Train Loss: 0.3219, Train Steps/Sec: 16.58, Grad Norm: 0.0364 +[2025-02-23 07:55:03] (step=0582700) Train Loss: 0.3220, Train Steps/Sec: 16.64, Grad Norm: 0.0358 +[2025-02-23 07:55:09] (step=0582800) Train Loss: 0.3219, Train Steps/Sec: 16.65, Grad Norm: 0.0405 +[2025-02-23 07:55:16] (step=0582900) Train Loss: 0.3224, Train Steps/Sec: 15.28, Grad Norm: 0.0400 +[2025-02-23 07:55:23] (step=0583000) Train Loss: 0.3223, Train Steps/Sec: 13.44, Grad Norm: 0.0368 +[2025-02-23 07:55:29] (step=0583100) Train Loss: 0.3222, Train Steps/Sec: 17.28, Grad Norm: 0.0353 +[2025-02-23 07:55:35] (step=0583200) Train Loss: 0.3216, Train Steps/Sec: 15.90, Grad Norm: 0.0350 +[2025-02-23 07:55:42] (step=0583300) Train Loss: 0.3226, Train Steps/Sec: 15.26, Grad Norm: 0.0352 +[2025-02-23 07:55:48] (step=0583400) Train Loss: 0.3223, Train Steps/Sec: 16.71, Grad Norm: 0.0373 +[2025-02-23 07:55:53] (step=0583500) Train Loss: 0.3216, Train Steps/Sec: 17.37, Grad Norm: 0.0331 +[2025-02-23 07:55:59] (step=0583600) Train Loss: 0.3222, Train Steps/Sec: 16.63, Grad Norm: 0.0366 +[2025-02-23 07:56:05] (step=0583700) Train Loss: 0.3226, Train Steps/Sec: 17.29, Grad Norm: 0.0396 +[2025-02-23 07:56:11] (step=0583800) Train Loss: 0.3223, Train Steps/Sec: 17.35, Grad Norm: 0.0341 +[2025-02-23 07:56:17] (step=0583900) Train Loss: 0.3226, Train Steps/Sec: 17.41, Grad Norm: 0.0372 +[2025-02-23 07:56:22] (step=0584000) Train Loss: 0.3226, Train Steps/Sec: 17.47, Grad Norm: 0.0372 +[2025-02-23 07:56:28] (step=0584100) Train Loss: 0.3226, Train Steps/Sec: 17.37, Grad Norm: 0.0377 +[2025-02-23 07:56:34] (step=0584200) Train Loss: 0.3221, Train Steps/Sec: 17.26, Grad Norm: 0.0352 +[2025-02-23 07:56:41] (step=0584300) Train Loss: 0.3225, Train Steps/Sec: 14.59, Grad Norm: 0.0354 +[2025-02-23 07:56:47] (step=0584400) Train Loss: 0.3221, Train Steps/Sec: 17.28, Grad Norm: 0.0375 +[2025-02-23 07:56:52] (step=0584500) Train Loss: 0.3222, Train Steps/Sec: 17.28, Grad Norm: 0.0341 +[2025-02-23 07:56:58] (step=0584600) Train Loss: 0.3225, Train Steps/Sec: 16.53, Grad Norm: 0.0406 +[2025-02-23 07:57:04] (step=0584700) Train Loss: 0.3224, Train Steps/Sec: 16.57, Grad Norm: 0.0346 +[2025-02-23 07:57:10] (step=0584800) Train Loss: 0.3226, Train Steps/Sec: 16.61, Grad Norm: 0.0360 +[2025-02-23 07:57:17] (step=0584900) Train Loss: 0.3230, Train Steps/Sec: 15.31, Grad Norm: 0.0384 +[2025-02-23 07:57:23] (step=0585000) Train Loss: 0.3227, Train Steps/Sec: 15.86, Grad Norm: 0.0356 +[2025-02-23 07:57:29] (step=0585100) Train Loss: 0.3226, Train Steps/Sec: 17.37, Grad Norm: 0.0397 +[2025-02-23 07:57:36] (step=0585200) Train Loss: 0.3221, Train Steps/Sec: 14.26, Grad Norm: 0.0350 +[2025-02-23 07:57:42] (step=0585300) Train Loss: 0.3221, Train Steps/Sec: 15.96, Grad Norm: 0.0382 +[2025-02-23 07:57:48] (step=0585400) Train Loss: 0.3224, Train Steps/Sec: 17.45, Grad Norm: 0.0396 +[2025-02-23 07:57:55] (step=0585500) Train Loss: 0.3221, Train Steps/Sec: 14.01, Grad Norm: 0.0335 +[2025-02-23 07:58:01] (step=0585600) Train Loss: 0.3230, Train Steps/Sec: 17.27, Grad Norm: 0.0342 +[2025-02-23 07:58:07] (step=0585700) Train Loss: 0.3222, Train Steps/Sec: 17.27, Grad Norm: 0.0360 +[2025-02-23 07:58:13] (step=0585800) Train Loss: 0.3228, Train Steps/Sec: 17.33, Grad Norm: 0.0366 +[2025-02-23 07:58:18] (step=0585900) Train Loss: 0.3216, Train Steps/Sec: 17.35, Grad Norm: 0.0388 +[2025-02-23 07:58:24] (step=0586000) Train Loss: 0.3224, Train Steps/Sec: 17.29, Grad Norm: 0.0373 +[2025-02-23 07:58:30] (step=0586100) Train Loss: 0.3226, Train Steps/Sec: 17.38, Grad Norm: 0.0347 +[2025-02-23 07:58:36] (step=0586200) Train Loss: 0.3228, Train Steps/Sec: 17.33, Grad Norm: 0.0378 +[2025-02-23 07:58:41] (step=0586300) Train Loss: 0.3224, Train Steps/Sec: 17.28, Grad Norm: 0.0372 +[2025-02-23 07:58:47] (step=0586400) Train Loss: 0.3226, Train Steps/Sec: 17.31, Grad Norm: 0.0391 +[2025-02-23 07:58:53] (step=0586500) Train Loss: 0.3227, Train Steps/Sec: 17.31, Grad Norm: 0.0341 +[2025-02-23 07:58:59] (step=0586600) Train Loss: 0.3226, Train Steps/Sec: 16.44, Grad Norm: 0.0404 +[2025-02-23 07:59:05] (step=0586700) Train Loss: 0.3228, Train Steps/Sec: 16.43, Grad Norm: 0.0373 +[2025-02-23 07:59:12] (step=0586800) Train Loss: 0.3223, Train Steps/Sec: 13.97, Grad Norm: 0.0342 +[2025-02-23 07:59:19] (step=0586900) Train Loss: 0.3224, Train Steps/Sec: 14.65, Grad Norm: 0.0371 +[2025-02-23 07:59:25] (step=0587000) Train Loss: 0.3218, Train Steps/Sec: 16.65, Grad Norm: 0.0365 +[2025-02-23 07:59:31] (step=0587100) Train Loss: 0.3220, Train Steps/Sec: 15.95, Grad Norm: 0.0379 +[2025-02-23 07:59:38] (step=0587200) Train Loss: 0.3228, Train Steps/Sec: 15.36, Grad Norm: 0.0368 +[2025-02-23 07:59:44] (step=0587300) Train Loss: 0.3221, Train Steps/Sec: 15.95, Grad Norm: 0.0350 +[2025-02-23 07:59:50] (step=0587400) Train Loss: 0.3223, Train Steps/Sec: 17.41, Grad Norm: 0.0377 +[2025-02-23 07:59:56] (step=0587500) Train Loss: 0.3226, Train Steps/Sec: 16.65, Grad Norm: 0.0358 +[2025-02-23 08:00:02] (step=0587600) Train Loss: 0.3220, Train Steps/Sec: 17.38, Grad Norm: 0.0346 +[2025-02-23 08:00:07] (step=0587700) Train Loss: 0.3223, Train Steps/Sec: 17.38, Grad Norm: 0.0378 +[2025-02-23 08:00:13] (step=0587800) Train Loss: 0.3227, Train Steps/Sec: 17.42, Grad Norm: 0.0360 +[2025-02-23 08:00:19] (step=0587900) Train Loss: 0.3225, Train Steps/Sec: 17.44, Grad Norm: 0.0399 +[2025-02-23 08:00:26] (step=0588000) Train Loss: 0.3216, Train Steps/Sec: 14.48, Grad Norm: 0.0387 +[2025-02-23 08:00:32] (step=0588100) Train Loss: 0.3225, Train Steps/Sec: 17.25, Grad Norm: 0.0348 +[2025-02-23 08:00:37] (step=0588200) Train Loss: 0.3221, Train Steps/Sec: 17.22, Grad Norm: 0.0347 +[2025-02-23 08:00:43] (step=0588300) Train Loss: 0.3224, Train Steps/Sec: 17.26, Grad Norm: 0.0362 +[2025-02-23 08:00:49] (step=0588400) Train Loss: 0.3218, Train Steps/Sec: 17.27, Grad Norm: 0.0384 +[2025-02-23 08:00:55] (step=0588500) Train Loss: 0.3224, Train Steps/Sec: 17.25, Grad Norm: 0.0385 +[2025-02-23 08:01:01] (step=0588600) Train Loss: 0.3227, Train Steps/Sec: 16.52, Grad Norm: 0.0345 +[2025-02-23 08:01:07] (step=0588700) Train Loss: 0.3223, Train Steps/Sec: 16.56, Grad Norm: 0.0371 +[2025-02-23 08:01:13] (step=0588800) Train Loss: 0.3220, Train Steps/Sec: 16.58, Grad Norm: 0.0366 +[2025-02-23 08:01:20] (step=0588900) Train Loss: 0.3223, Train Steps/Sec: 14.62, Grad Norm: 0.0326 +[2025-02-23 08:01:26] (step=0589000) Train Loss: 0.3222, Train Steps/Sec: 16.60, Grad Norm: 0.0386 +[2025-02-23 08:01:33] (step=0589100) Train Loss: 0.3224, Train Steps/Sec: 14.76, Grad Norm: 0.0374 +[2025-02-23 08:01:39] (step=0589200) Train Loss: 0.3219, Train Steps/Sec: 15.83, Grad Norm: 0.0406 +[2025-02-23 08:01:46] (step=0589300) Train Loss: 0.3219, Train Steps/Sec: 14.00, Grad Norm: 0.0348 +[2025-02-23 08:01:52] (step=0589400) Train Loss: 0.3221, Train Steps/Sec: 16.49, Grad Norm: 0.0372 +[2025-02-23 08:01:58] (step=0589500) Train Loss: 0.3222, Train Steps/Sec: 17.20, Grad Norm: 0.0368 +[2025-02-23 08:02:04] (step=0589600) Train Loss: 0.3223, Train Steps/Sec: 17.24, Grad Norm: 0.0369 +[2025-02-23 08:02:10] (step=0589700) Train Loss: 0.3223, Train Steps/Sec: 17.27, Grad Norm: 0.0374 +[2025-02-23 08:02:15] (step=0589800) Train Loss: 0.3222, Train Steps/Sec: 17.28, Grad Norm: 0.0354 +[2025-02-23 08:02:21] (step=0589900) Train Loss: 0.3221, Train Steps/Sec: 17.26, Grad Norm: 0.0378 +[2025-02-23 08:02:27] (step=0590000) Train Loss: 0.3223, Train Steps/Sec: 17.21, Grad Norm: 0.0358 +[2025-02-23 08:02:33] (step=0590100) Train Loss: 0.3223, Train Steps/Sec: 17.26, Grad Norm: 0.0379 +[2025-02-23 08:02:39] (step=0590200) Train Loss: 0.3224, Train Steps/Sec: 17.29, Grad Norm: 0.0375 +[2025-02-23 08:02:44] (step=0590300) Train Loss: 0.3229, Train Steps/Sec: 17.28, Grad Norm: 0.0396 +[2025-02-23 08:02:50] (step=0590400) Train Loss: 0.3226, Train Steps/Sec: 17.29, Grad Norm: 0.0390 +[2025-02-23 08:02:57] (step=0590500) Train Loss: 0.3216, Train Steps/Sec: 14.51, Grad Norm: 0.0381 +[2025-02-23 08:03:03] (step=0590600) Train Loss: 0.3216, Train Steps/Sec: 16.50, Grad Norm: 0.0353 +[2025-02-23 08:03:09] (step=0590700) Train Loss: 0.3220, Train Steps/Sec: 16.64, Grad Norm: 0.0403 +[2025-02-23 08:03:15] (step=0590800) Train Loss: 0.3215, Train Steps/Sec: 15.92, Grad Norm: 0.0331 +[2025-02-23 08:03:22] (step=0590900) Train Loss: 0.3220, Train Steps/Sec: 15.30, Grad Norm: 0.0380 +[2025-02-23 08:03:28] (step=0591000) Train Loss: 0.3228, Train Steps/Sec: 15.34, Grad Norm: 0.0355 +[2025-02-23 08:03:35] (step=0591100) Train Loss: 0.3218, Train Steps/Sec: 14.80, Grad Norm: 0.0380 +[2025-02-23 08:03:41] (step=0591200) Train Loss: 0.3222, Train Steps/Sec: 17.41, Grad Norm: 0.0375 +[2025-02-23 08:03:47] (step=0591300) Train Loss: 0.3225, Train Steps/Sec: 15.84, Grad Norm: 0.0369 +[2025-02-23 08:03:53] (step=0591400) Train Loss: 0.3220, Train Steps/Sec: 17.31, Grad Norm: 0.0344 +[2025-02-23 08:03:59] (step=0591500) Train Loss: 0.3226, Train Steps/Sec: 17.39, Grad Norm: 0.0392 +[2025-02-23 08:04:05] (step=0591600) Train Loss: 0.3223, Train Steps/Sec: 17.29, Grad Norm: 0.0361 +[2025-02-23 08:04:10] (step=0591700) Train Loss: 0.3221, Train Steps/Sec: 17.18, Grad Norm: 0.0365 +[2025-02-23 08:04:17] (step=0591800) Train Loss: 0.3217, Train Steps/Sec: 14.54, Grad Norm: 0.0347 +[2025-02-23 08:04:23] (step=0591900) Train Loss: 0.3222, Train Steps/Sec: 17.36, Grad Norm: 0.0354 +[2025-02-23 08:04:29] (step=0592000) Train Loss: 0.3221, Train Steps/Sec: 17.28, Grad Norm: 0.0353 +[2025-02-23 08:04:35] (step=0592100) Train Loss: 0.3229, Train Steps/Sec: 17.27, Grad Norm: 0.0372 +[2025-02-23 08:04:40] (step=0592200) Train Loss: 0.3222, Train Steps/Sec: 17.27, Grad Norm: 0.0357 +[2025-02-23 08:04:46] (step=0592300) Train Loss: 0.3219, Train Steps/Sec: 17.40, Grad Norm: 0.0382 +[2025-02-23 08:04:52] (step=0592400) Train Loss: 0.3225, Train Steps/Sec: 17.47, Grad Norm: 0.0374 +[2025-02-23 08:04:58] (step=0592500) Train Loss: 0.3221, Train Steps/Sec: 17.46, Grad Norm: 0.0373 +[2025-02-23 08:05:03] (step=0592600) Train Loss: 0.3224, Train Steps/Sec: 17.44, Grad Norm: 0.0349 +[2025-02-23 08:05:10] (step=0592700) Train Loss: 0.3218, Train Steps/Sec: 15.90, Grad Norm: 0.0376 +[2025-02-23 08:05:16] (step=0592800) Train Loss: 0.3221, Train Steps/Sec: 15.95, Grad Norm: 0.0343 +[2025-02-23 08:05:22] (step=0592900) Train Loss: 0.3220, Train Steps/Sec: 16.68, Grad Norm: 0.0357 +[2025-02-23 08:05:31] (step=0593000) Train Loss: 0.3219, Train Steps/Sec: 11.55, Grad Norm: 0.0364 +[2025-02-23 08:05:37] (step=0593100) Train Loss: 0.3222, Train Steps/Sec: 16.54, Grad Norm: 0.0337 +[2025-02-23 08:05:42] (step=0593200) Train Loss: 0.3215, Train Steps/Sec: 17.38, Grad Norm: 0.0376 +[2025-02-23 08:05:49] (step=0593300) Train Loss: 0.3223, Train Steps/Sec: 15.86, Grad Norm: 0.0326 +[2025-02-23 08:05:54] (step=0593400) Train Loss: 0.3225, Train Steps/Sec: 17.30, Grad Norm: 0.0406 +[2025-02-23 08:06:00] (step=0593500) Train Loss: 0.3222, Train Steps/Sec: 17.29, Grad Norm: 0.0364 +[2025-02-23 08:06:06] (step=0593600) Train Loss: 0.3218, Train Steps/Sec: 17.35, Grad Norm: 0.0383 +[2025-02-23 08:06:12] (step=0593700) Train Loss: 0.3223, Train Steps/Sec: 17.29, Grad Norm: 0.0363 +[2025-02-23 08:06:18] (step=0593800) Train Loss: 0.3219, Train Steps/Sec: 17.36, Grad Norm: 0.0350 +[2025-02-23 08:06:23] (step=0593900) Train Loss: 0.3219, Train Steps/Sec: 17.40, Grad Norm: 0.0347 +[2025-02-23 08:06:29] (step=0594000) Train Loss: 0.3227, Train Steps/Sec: 17.37, Grad Norm: 0.0363 +[2025-02-23 08:06:35] (step=0594100) Train Loss: 0.3222, Train Steps/Sec: 17.35, Grad Norm: 0.0343 +[2025-02-23 08:06:41] (step=0594200) Train Loss: 0.3216, Train Steps/Sec: 17.31, Grad Norm: 0.0351 +[2025-02-23 08:06:47] (step=0594300) Train Loss: 0.3221, Train Steps/Sec: 14.55, Grad Norm: 0.0351 +[2025-02-23 08:06:53] (step=0594400) Train Loss: 0.3220, Train Steps/Sec: 17.36, Grad Norm: 0.0343 +[2025-02-23 08:06:59] (step=0594500) Train Loss: 0.3221, Train Steps/Sec: 17.40, Grad Norm: 0.0367 +[2025-02-23 08:07:05] (step=0594600) Train Loss: 0.3219, Train Steps/Sec: 17.44, Grad Norm: 0.0360 +[2025-02-23 08:07:11] (step=0594700) Train Loss: 0.3221, Train Steps/Sec: 15.17, Grad Norm: 0.0381 +[2025-02-23 08:07:17] (step=0594800) Train Loss: 0.3230, Train Steps/Sec: 16.46, Grad Norm: 0.0373 +[2025-02-23 08:07:24] (step=0594900) Train Loss: 0.3217, Train Steps/Sec: 15.25, Grad Norm: 0.0410 +[2025-02-23 08:07:32] (step=0595000) Train Loss: 0.3217, Train Steps/Sec: 13.06, Grad Norm: 0.0368 +[2025-02-23 08:07:37] (step=0595100) Train Loss: 0.3223, Train Steps/Sec: 17.24, Grad Norm: 0.0365 +[2025-02-23 08:07:43] (step=0595200) Train Loss: 0.3227, Train Steps/Sec: 17.22, Grad Norm: 0.0345 +[2025-02-23 08:07:50] (step=0595300) Train Loss: 0.3223, Train Steps/Sec: 15.76, Grad Norm: 0.0350 +[2025-02-23 08:07:55] (step=0595400) Train Loss: 0.3220, Train Steps/Sec: 17.20, Grad Norm: 0.0341 +[2025-02-23 08:08:02] (step=0595500) Train Loss: 0.3226, Train Steps/Sec: 14.29, Grad Norm: 0.0373 +[2025-02-23 08:08:08] (step=0595600) Train Loss: 0.3225, Train Steps/Sec: 17.26, Grad Norm: 0.0335 +[2025-02-23 08:08:14] (step=0595700) Train Loss: 0.3222, Train Steps/Sec: 17.26, Grad Norm: 0.0360 +[2025-02-23 08:08:20] (step=0595800) Train Loss: 0.3221, Train Steps/Sec: 17.30, Grad Norm: 0.0371 +[2025-02-23 08:08:26] (step=0595900) Train Loss: 0.3222, Train Steps/Sec: 17.35, Grad Norm: 0.0359 +[2025-02-23 08:08:31] (step=0596000) Train Loss: 0.3225, Train Steps/Sec: 17.29, Grad Norm: 0.0343 +[2025-02-23 08:08:37] (step=0596100) Train Loss: 0.3219, Train Steps/Sec: 17.28, Grad Norm: 0.0371 +[2025-02-23 08:08:43] (step=0596200) Train Loss: 0.3225, Train Steps/Sec: 17.27, Grad Norm: 0.0366 +[2025-02-23 08:08:49] (step=0596300) Train Loss: 0.3220, Train Steps/Sec: 17.35, Grad Norm: 0.0365 +[2025-02-23 08:08:54] (step=0596400) Train Loss: 0.3220, Train Steps/Sec: 17.34, Grad Norm: 0.0349 +[2025-02-23 08:09:00] (step=0596500) Train Loss: 0.3223, Train Steps/Sec: 17.25, Grad Norm: 0.0362 +[2025-02-23 08:09:06] (step=0596600) Train Loss: 0.3224, Train Steps/Sec: 17.28, Grad Norm: 0.0351 +[2025-02-23 08:09:13] (step=0596700) Train Loss: 0.3226, Train Steps/Sec: 15.04, Grad Norm: 0.0370 +[2025-02-23 08:09:20] (step=0596800) Train Loss: 0.3225, Train Steps/Sec: 13.01, Grad Norm: 0.0343 +[2025-02-23 08:09:27] (step=0596900) Train Loss: 0.3218, Train Steps/Sec: 14.70, Grad Norm: 0.0344 +[2025-02-23 08:09:34] (step=0597000) Train Loss: 0.3223, Train Steps/Sec: 14.58, Grad Norm: 0.0368 +[2025-02-23 08:09:40] (step=0597100) Train Loss: 0.3226, Train Steps/Sec: 17.37, Grad Norm: 0.0335 +[2025-02-23 08:09:46] (step=0597200) Train Loss: 0.3217, Train Steps/Sec: 16.65, Grad Norm: 0.0408 +[2025-02-23 08:09:52] (step=0597300) Train Loss: 0.3219, Train Steps/Sec: 16.38, Grad Norm: 0.0342 +[2025-02-23 08:09:58] (step=0597400) Train Loss: 0.3229, Train Steps/Sec: 17.19, Grad Norm: 0.0395 +[2025-02-23 08:10:04] (step=0597500) Train Loss: 0.3220, Train Steps/Sec: 17.24, Grad Norm: 0.0376 +[2025-02-23 08:10:09] (step=0597600) Train Loss: 0.3219, Train Steps/Sec: 17.26, Grad Norm: 0.0329 +[2025-02-23 08:10:15] (step=0597700) Train Loss: 0.3221, Train Steps/Sec: 17.31, Grad Norm: 0.0370 +[2025-02-23 08:10:21] (step=0597800) Train Loss: 0.3226, Train Steps/Sec: 17.29, Grad Norm: 0.0356 +[2025-02-23 08:10:27] (step=0597900) Train Loss: 0.3218, Train Steps/Sec: 17.28, Grad Norm: 0.0354 +[2025-02-23 08:10:34] (step=0598000) Train Loss: 0.3221, Train Steps/Sec: 14.47, Grad Norm: 0.0339 +[2025-02-23 08:10:39] (step=0598100) Train Loss: 0.3227, Train Steps/Sec: 17.29, Grad Norm: 0.0388 +[2025-02-23 08:10:45] (step=0598200) Train Loss: 0.3219, Train Steps/Sec: 17.28, Grad Norm: 0.0340 +[2025-02-23 08:10:51] (step=0598300) Train Loss: 0.3221, Train Steps/Sec: 17.31, Grad Norm: 0.0355 +[2025-02-23 08:10:57] (step=0598400) Train Loss: 0.3225, Train Steps/Sec: 17.31, Grad Norm: 0.0381 +[2025-02-23 08:11:03] (step=0598500) Train Loss: 0.3219, Train Steps/Sec: 17.31, Grad Norm: 0.0331 +[2025-02-23 08:11:09] (step=0598600) Train Loss: 0.3218, Train Steps/Sec: 16.61, Grad Norm: 0.0368 +[2025-02-23 08:11:15] (step=0598700) Train Loss: 0.3225, Train Steps/Sec: 15.93, Grad Norm: 0.0388 +[2025-02-23 08:11:21] (step=0598800) Train Loss: 0.3226, Train Steps/Sec: 15.29, Grad Norm: 0.0378 +[2025-02-23 08:11:28] (step=0598900) Train Loss: 0.3220, Train Steps/Sec: 14.26, Grad Norm: 0.0345 +[2025-02-23 08:11:35] (step=0599000) Train Loss: 0.3222, Train Steps/Sec: 15.21, Grad Norm: 0.0376 +[2025-02-23 08:11:41] (step=0599100) Train Loss: 0.3220, Train Steps/Sec: 16.63, Grad Norm: 0.0352 +[2025-02-23 08:11:47] (step=0599200) Train Loss: 0.3218, Train Steps/Sec: 17.14, Grad Norm: 0.0380 +[2025-02-23 08:11:54] (step=0599300) Train Loss: 0.3222, Train Steps/Sec: 13.90, Grad Norm: 0.0366 +[2025-02-23 08:12:00] (step=0599400) Train Loss: 0.3224, Train Steps/Sec: 17.21, Grad Norm: 0.0350 +[2025-02-23 08:12:06] (step=0599500) Train Loss: 0.3223, Train Steps/Sec: 17.23, Grad Norm: 0.0361 +[2025-02-23 08:12:11] (step=0599600) Train Loss: 0.3223, Train Steps/Sec: 17.29, Grad Norm: 0.0375 +[2025-02-23 08:12:17] (step=0599700) Train Loss: 0.3216, Train Steps/Sec: 17.30, Grad Norm: 0.0391 +[2025-02-23 08:12:23] (step=0599800) Train Loss: 0.3220, Train Steps/Sec: 17.30, Grad Norm: 0.0371 +[2025-02-23 08:12:29] (step=0599900) Train Loss: 0.3221, Train Steps/Sec: 17.41, Grad Norm: 0.0371 +[2025-02-23 08:12:34] (step=0600000) Train Loss: 0.3221, Train Steps/Sec: 17.36, Grad Norm: 0.0361 +[2025-02-23 08:12:35] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0600000.pt +[2025-02-23 08:12:41] (step=0600100) Train Loss: 0.3228, Train Steps/Sec: 16.19, Grad Norm: 0.0352 +[2025-02-23 08:12:46] (step=0600200) Train Loss: 0.3220, Train Steps/Sec: 17.46, Grad Norm: 0.0356 +[2025-02-23 08:12:52] (step=0600300) Train Loss: 0.3221, Train Steps/Sec: 17.44, Grad Norm: 0.0341 +[2025-02-23 08:12:58] (step=0600400) Train Loss: 0.3222, Train Steps/Sec: 17.45, Grad Norm: 0.0388 +[2025-02-23 08:13:05] (step=0600500) Train Loss: 0.3221, Train Steps/Sec: 14.09, Grad Norm: 0.0369 +[2025-02-23 08:13:11] (step=0600600) Train Loss: 0.3221, Train Steps/Sec: 17.32, Grad Norm: 0.0345 +[2025-02-23 08:13:18] (step=0600700) Train Loss: 0.3224, Train Steps/Sec: 14.62, Grad Norm: 0.0371 +[2025-02-23 08:13:24] (step=0600800) Train Loss: 0.3222, Train Steps/Sec: 15.29, Grad Norm: 0.0365 +[2025-02-23 08:13:30] (step=0600900) Train Loss: 0.3220, Train Steps/Sec: 15.97, Grad Norm: 0.0340 +[2025-02-23 08:13:37] (step=0601000) Train Loss: 0.3222, Train Steps/Sec: 15.16, Grad Norm: 0.0369 +[2025-02-23 08:13:43] (step=0601100) Train Loss: 0.3218, Train Steps/Sec: 16.62, Grad Norm: 0.0381 +[2025-02-23 08:13:49] (step=0601200) Train Loss: 0.3221, Train Steps/Sec: 17.29, Grad Norm: 0.0393 +[2025-02-23 08:13:55] (step=0601300) Train Loss: 0.3227, Train Steps/Sec: 16.50, Grad Norm: 0.0368 +[2025-02-23 08:14:01] (step=0601400) Train Loss: 0.3228, Train Steps/Sec: 17.30, Grad Norm: 0.0371 +[2025-02-23 08:14:06] (step=0601500) Train Loss: 0.3220, Train Steps/Sec: 17.09, Grad Norm: 0.0337 +[2025-02-23 08:14:12] (step=0601600) Train Loss: 0.3223, Train Steps/Sec: 17.24, Grad Norm: 0.0376 +[2025-02-23 08:14:18] (step=0601700) Train Loss: 0.3222, Train Steps/Sec: 17.20, Grad Norm: 0.0357 +[2025-02-23 08:14:25] (step=0601800) Train Loss: 0.3217, Train Steps/Sec: 14.54, Grad Norm: 0.0367 +[2025-02-23 08:14:31] (step=0601900) Train Loss: 0.3225, Train Steps/Sec: 17.33, Grad Norm: 0.0374 +[2025-02-23 08:14:37] (step=0602000) Train Loss: 0.3226, Train Steps/Sec: 17.33, Grad Norm: 0.0363 +[2025-02-23 08:14:42] (step=0602100) Train Loss: 0.3222, Train Steps/Sec: 17.34, Grad Norm: 0.0381 +[2025-02-23 08:14:48] (step=0602200) Train Loss: 0.3221, Train Steps/Sec: 17.27, Grad Norm: 0.0368 +[2025-02-23 08:14:54] (step=0602300) Train Loss: 0.3219, Train Steps/Sec: 17.35, Grad Norm: 0.0352 +[2025-02-23 08:15:00] (step=0602400) Train Loss: 0.3222, Train Steps/Sec: 17.32, Grad Norm: 0.0357 +[2025-02-23 08:15:06] (step=0602500) Train Loss: 0.3221, Train Steps/Sec: 16.44, Grad Norm: 0.0384 +[2025-02-23 08:15:11] (step=0602600) Train Loss: 0.3228, Train Steps/Sec: 17.26, Grad Norm: 0.0381 +[2025-02-23 08:15:18] (step=0602700) Train Loss: 0.3221, Train Steps/Sec: 14.62, Grad Norm: 0.0349 +[2025-02-23 08:15:25] (step=0602800) Train Loss: 0.3226, Train Steps/Sec: 14.18, Grad Norm: 0.0356 +[2025-02-23 08:15:31] (step=0602900) Train Loss: 0.3219, Train Steps/Sec: 17.26, Grad Norm: 0.0377 +[2025-02-23 08:15:39] (step=0603000) Train Loss: 0.3220, Train Steps/Sec: 12.15, Grad Norm: 0.0350 +[2025-02-23 08:15:45] (step=0603100) Train Loss: 0.3220, Train Steps/Sec: 17.28, Grad Norm: 0.0361 +[2025-02-23 08:15:51] (step=0603200) Train Loss: 0.3220, Train Steps/Sec: 17.30, Grad Norm: 0.0395 +[2025-02-23 08:15:57] (step=0603300) Train Loss: 0.3220, Train Steps/Sec: 16.52, Grad Norm: 0.0419 +[2025-02-23 08:16:03] (step=0603400) Train Loss: 0.3227, Train Steps/Sec: 17.38, Grad Norm: 0.0377 +[2025-02-23 08:16:09] (step=0603500) Train Loss: 0.3221, Train Steps/Sec: 17.34, Grad Norm: 0.0404 +[2025-02-23 08:16:14] (step=0603600) Train Loss: 0.3221, Train Steps/Sec: 17.32, Grad Norm: 0.0335 +[2025-02-23 08:16:20] (step=0603700) Train Loss: 0.3219, Train Steps/Sec: 17.38, Grad Norm: 0.0422 +[2025-02-23 08:16:26] (step=0603800) Train Loss: 0.3231, Train Steps/Sec: 17.27, Grad Norm: 0.0414 +[2025-02-23 08:16:32] (step=0603900) Train Loss: 0.3219, Train Steps/Sec: 17.26, Grad Norm: 0.0360 +[2025-02-23 08:16:37] (step=0604000) Train Loss: 0.3221, Train Steps/Sec: 17.27, Grad Norm: 0.0362 +[2025-02-23 08:16:43] (step=0604100) Train Loss: 0.3220, Train Steps/Sec: 17.30, Grad Norm: 0.0379 +[2025-02-23 08:16:49] (step=0604200) Train Loss: 0.3223, Train Steps/Sec: 17.42, Grad Norm: 0.0378 +[2025-02-23 08:16:56] (step=0604300) Train Loss: 0.3223, Train Steps/Sec: 14.33, Grad Norm: 0.0375 +[2025-02-23 08:17:02] (step=0604400) Train Loss: 0.3224, Train Steps/Sec: 16.53, Grad Norm: 0.0351 +[2025-02-23 08:17:08] (step=0604500) Train Loss: 0.3225, Train Steps/Sec: 17.39, Grad Norm: 0.0336 +[2025-02-23 08:17:14] (step=0604600) Train Loss: 0.3218, Train Steps/Sec: 15.94, Grad Norm: 0.0361 +[2025-02-23 08:17:21] (step=0604700) Train Loss: 0.3227, Train Steps/Sec: 13.67, Grad Norm: 0.0371 +[2025-02-23 08:17:27] (step=0604800) Train Loss: 0.3215, Train Steps/Sec: 16.67, Grad Norm: 0.0373 +[2025-02-23 08:17:33] (step=0604900) Train Loss: 0.3220, Train Steps/Sec: 17.41, Grad Norm: 0.0356 +[2025-02-23 08:17:40] (step=0605000) Train Loss: 0.3222, Train Steps/Sec: 14.74, Grad Norm: 0.0399 +[2025-02-23 08:17:46] (step=0605100) Train Loss: 0.3225, Train Steps/Sec: 16.60, Grad Norm: 0.0366 +[2025-02-23 08:17:52] (step=0605200) Train Loss: 0.3221, Train Steps/Sec: 17.38, Grad Norm: 0.0368 +[2025-02-23 08:17:58] (step=0605300) Train Loss: 0.3225, Train Steps/Sec: 16.48, Grad Norm: 0.0354 +[2025-02-23 08:18:04] (step=0605400) Train Loss: 0.3221, Train Steps/Sec: 17.35, Grad Norm: 0.0333 +[2025-02-23 08:18:10] (step=0605500) Train Loss: 0.3227, Train Steps/Sec: 14.48, Grad Norm: 0.0344 +[2025-02-23 08:18:16] (step=0605600) Train Loss: 0.3221, Train Steps/Sec: 17.31, Grad Norm: 0.0379 +[2025-02-23 08:18:22] (step=0605700) Train Loss: 0.3216, Train Steps/Sec: 17.31, Grad Norm: 0.0361 +[2025-02-23 08:18:28] (step=0605800) Train Loss: 0.3224, Train Steps/Sec: 17.28, Grad Norm: 0.0358 +[2025-02-23 08:18:34] (step=0605900) Train Loss: 0.3222, Train Steps/Sec: 17.29, Grad Norm: 0.0406 +[2025-02-23 08:18:39] (step=0606000) Train Loss: 0.3223, Train Steps/Sec: 17.28, Grad Norm: 0.0380 +[2025-02-23 08:18:45] (step=0606100) Train Loss: 0.3219, Train Steps/Sec: 17.23, Grad Norm: 0.0374 +[2025-02-23 08:18:51] (step=0606200) Train Loss: 0.3226, Train Steps/Sec: 17.27, Grad Norm: 0.0359 +[2025-02-23 08:18:57] (step=0606300) Train Loss: 0.3218, Train Steps/Sec: 17.30, Grad Norm: 0.0350 +[2025-02-23 08:19:03] (step=0606400) Train Loss: 0.3222, Train Steps/Sec: 16.51, Grad Norm: 0.0386 +[2025-02-23 08:19:09] (step=0606500) Train Loss: 0.3220, Train Steps/Sec: 17.35, Grad Norm: 0.0370 +[2025-02-23 08:19:15] (step=0606600) Train Loss: 0.3219, Train Steps/Sec: 14.75, Grad Norm: 0.0355 +[2025-02-23 08:19:22] (step=0606700) Train Loss: 0.3215, Train Steps/Sec: 15.19, Grad Norm: 0.0370 +[2025-02-23 08:19:29] (step=0606800) Train Loss: 0.3226, Train Steps/Sec: 13.38, Grad Norm: 0.0366 +[2025-02-23 08:19:36] (step=0606900) Train Loss: 0.3220, Train Steps/Sec: 15.94, Grad Norm: 0.0386 +[2025-02-23 08:19:42] (step=0607000) Train Loss: 0.3223, Train Steps/Sec: 15.74, Grad Norm: 0.0397 +[2025-02-23 08:19:48] (step=0607100) Train Loss: 0.3223, Train Steps/Sec: 16.50, Grad Norm: 0.0372 +[2025-02-23 08:19:54] (step=0607200) Train Loss: 0.3219, Train Steps/Sec: 17.36, Grad Norm: 0.0390 +[2025-02-23 08:20:00] (step=0607300) Train Loss: 0.3219, Train Steps/Sec: 16.55, Grad Norm: 0.0346 +[2025-02-23 08:20:06] (step=0607400) Train Loss: 0.3224, Train Steps/Sec: 17.26, Grad Norm: 0.0370 +[2025-02-23 08:20:11] (step=0607500) Train Loss: 0.3223, Train Steps/Sec: 17.28, Grad Norm: 0.0352 +[2025-02-23 08:20:17] (step=0607600) Train Loss: 0.3220, Train Steps/Sec: 17.33, Grad Norm: 0.0355 +[2025-02-23 08:20:23] (step=0607700) Train Loss: 0.3220, Train Steps/Sec: 17.26, Grad Norm: 0.0365 +[2025-02-23 08:20:29] (step=0607800) Train Loss: 0.3218, Train Steps/Sec: 17.28, Grad Norm: 0.0334 +[2025-02-23 08:20:35] (step=0607900) Train Loss: 0.3227, Train Steps/Sec: 17.32, Grad Norm: 0.0355 +[2025-02-23 08:20:42] (step=0608000) Train Loss: 0.3222, Train Steps/Sec: 14.41, Grad Norm: 0.0375 +[2025-02-23 08:20:47] (step=0608100) Train Loss: 0.3218, Train Steps/Sec: 17.43, Grad Norm: 0.0350 +[2025-02-23 08:20:53] (step=0608200) Train Loss: 0.3217, Train Steps/Sec: 17.23, Grad Norm: 0.0361 +[2025-02-23 08:20:59] (step=0608300) Train Loss: 0.3222, Train Steps/Sec: 16.44, Grad Norm: 0.0374 +[2025-02-23 08:21:05] (step=0608400) Train Loss: 0.3223, Train Steps/Sec: 17.39, Grad Norm: 0.0367 +[2025-02-23 08:21:11] (step=0608500) Train Loss: 0.3225, Train Steps/Sec: 15.99, Grad Norm: 0.0355 +[2025-02-23 08:21:18] (step=0608600) Train Loss: 0.3223, Train Steps/Sec: 14.84, Grad Norm: 0.0391 +[2025-02-23 08:21:24] (step=0608700) Train Loss: 0.3218, Train Steps/Sec: 15.92, Grad Norm: 0.0353 +[2025-02-23 08:21:30] (step=0608800) Train Loss: 0.3221, Train Steps/Sec: 17.44, Grad Norm: 0.0366 +[2025-02-23 08:21:36] (step=0608900) Train Loss: 0.3224, Train Steps/Sec: 16.00, Grad Norm: 0.0357 +[2025-02-23 08:21:43] (step=0609000) Train Loss: 0.3225, Train Steps/Sec: 15.26, Grad Norm: 0.0357 +[2025-02-23 08:21:49] (step=0609100) Train Loss: 0.3225, Train Steps/Sec: 16.61, Grad Norm: 0.0361 +[2025-02-23 08:21:55] (step=0609200) Train Loss: 0.3221, Train Steps/Sec: 17.42, Grad Norm: 0.0363 +[2025-02-23 08:22:02] (step=0609300) Train Loss: 0.3220, Train Steps/Sec: 13.83, Grad Norm: 0.0367 +[2025-02-23 08:22:08] (step=0609400) Train Loss: 0.3220, Train Steps/Sec: 17.26, Grad Norm: 0.0368 +[2025-02-23 08:22:13] (step=0609500) Train Loss: 0.3220, Train Steps/Sec: 17.32, Grad Norm: 0.0340 +[2025-02-23 08:22:19] (step=0609600) Train Loss: 0.3224, Train Steps/Sec: 17.27, Grad Norm: 0.0354 +[2025-02-23 08:22:25] (step=0609700) Train Loss: 0.3222, Train Steps/Sec: 17.13, Grad Norm: 0.0351 +[2025-02-23 08:22:31] (step=0609800) Train Loss: 0.3221, Train Steps/Sec: 17.06, Grad Norm: 0.0352 +[2025-02-23 08:22:37] (step=0609900) Train Loss: 0.3224, Train Steps/Sec: 17.07, Grad Norm: 0.0359 +[2025-02-23 08:22:43] (step=0610000) Train Loss: 0.3216, Train Steps/Sec: 17.05, Grad Norm: 0.0353 +[2025-02-23 08:22:48] (step=0610100) Train Loss: 0.3222, Train Steps/Sec: 17.22, Grad Norm: 0.0354 +[2025-02-23 08:22:54] (step=0610200) Train Loss: 0.3225, Train Steps/Sec: 17.27, Grad Norm: 0.0390 +[2025-02-23 08:23:00] (step=0610300) Train Loss: 0.3223, Train Steps/Sec: 16.49, Grad Norm: 0.0368 +[2025-02-23 08:23:06] (step=0610400) Train Loss: 0.3216, Train Steps/Sec: 17.26, Grad Norm: 0.0354 +[2025-02-23 08:23:14] (step=0610500) Train Loss: 0.3220, Train Steps/Sec: 12.20, Grad Norm: 0.0407 +[2025-02-23 08:23:20] (step=0610600) Train Loss: 0.3225, Train Steps/Sec: 16.52, Grad Norm: 0.0377 +[2025-02-23 08:23:27] (step=0610700) Train Loss: 0.3219, Train Steps/Sec: 15.75, Grad Norm: 0.0389 +[2025-02-23 08:23:33] (step=0610800) Train Loss: 0.3218, Train Steps/Sec: 16.57, Grad Norm: 0.0347 +[2025-02-23 08:23:39] (step=0610900) Train Loss: 0.3226, Train Steps/Sec: 16.53, Grad Norm: 0.0347 +[2025-02-23 08:23:45] (step=0611000) Train Loss: 0.3222, Train Steps/Sec: 15.16, Grad Norm: 0.0355 +[2025-02-23 08:23:51] (step=0611100) Train Loss: 0.3215, Train Steps/Sec: 16.45, Grad Norm: 0.0402 +[2025-02-23 08:23:57] (step=0611200) Train Loss: 0.3223, Train Steps/Sec: 17.24, Grad Norm: 0.0355 +[2025-02-23 08:24:03] (step=0611300) Train Loss: 0.3227, Train Steps/Sec: 17.27, Grad Norm: 0.0357 +[2025-02-23 08:24:09] (step=0611400) Train Loss: 0.3220, Train Steps/Sec: 16.50, Grad Norm: 0.0370 +[2025-02-23 08:24:15] (step=0611500) Train Loss: 0.3223, Train Steps/Sec: 17.42, Grad Norm: 0.0376 +[2025-02-23 08:24:21] (step=0611600) Train Loss: 0.3223, Train Steps/Sec: 17.43, Grad Norm: 0.0372 +[2025-02-23 08:24:26] (step=0611700) Train Loss: 0.3220, Train Steps/Sec: 17.35, Grad Norm: 0.0355 +[2025-02-23 08:24:33] (step=0611800) Train Loss: 0.3216, Train Steps/Sec: 14.49, Grad Norm: 0.0362 +[2025-02-23 08:24:39] (step=0611900) Train Loss: 0.3220, Train Steps/Sec: 17.21, Grad Norm: 0.0357 +[2025-02-23 08:24:45] (step=0612000) Train Loss: 0.3218, Train Steps/Sec: 17.31, Grad Norm: 0.0371 +[2025-02-23 08:24:51] (step=0612100) Train Loss: 0.3226, Train Steps/Sec: 17.20, Grad Norm: 0.0380 +[2025-02-23 08:24:57] (step=0612200) Train Loss: 0.3218, Train Steps/Sec: 16.28, Grad Norm: 0.0384 +[2025-02-23 08:25:03] (step=0612300) Train Loss: 0.3221, Train Steps/Sec: 17.06, Grad Norm: 0.0354 +[2025-02-23 08:25:09] (step=0612400) Train Loss: 0.3215, Train Steps/Sec: 15.67, Grad Norm: 0.0396 +[2025-02-23 08:25:16] (step=0612500) Train Loss: 0.3220, Train Steps/Sec: 14.49, Grad Norm: 0.0345 +[2025-02-23 08:25:22] (step=0612600) Train Loss: 0.3224, Train Steps/Sec: 17.03, Grad Norm: 0.0355 +[2025-02-23 08:25:28] (step=0612700) Train Loss: 0.3218, Train Steps/Sec: 17.01, Grad Norm: 0.0343 +[2025-02-23 08:25:34] (step=0612800) Train Loss: 0.3224, Train Steps/Sec: 14.98, Grad Norm: 0.0381 +[2025-02-23 08:25:40] (step=0612900) Train Loss: 0.3217, Train Steps/Sec: 16.27, Grad Norm: 0.0401 +[2025-02-23 08:25:48] (step=0613000) Train Loss: 0.3222, Train Steps/Sec: 12.90, Grad Norm: 0.0429 +[2025-02-23 08:25:54] (step=0613100) Train Loss: 0.3218, Train Steps/Sec: 16.44, Grad Norm: 0.0377 +[2025-02-23 08:26:00] (step=0613200) Train Loss: 0.3224, Train Steps/Sec: 17.39, Grad Norm: 0.0404 +[2025-02-23 08:26:06] (step=0613300) Train Loss: 0.3215, Train Steps/Sec: 16.58, Grad Norm: 0.0383 +[2025-02-23 08:26:12] (step=0613400) Train Loss: 0.3217, Train Steps/Sec: 17.32, Grad Norm: 0.0366 +[2025-02-23 08:26:18] (step=0613500) Train Loss: 0.3219, Train Steps/Sec: 17.29, Grad Norm: 0.0351 +[2025-02-23 08:26:23] (step=0613600) Train Loss: 0.3213, Train Steps/Sec: 17.27, Grad Norm: 0.0366 +[2025-02-23 08:26:29] (step=0613700) Train Loss: 0.3221, Train Steps/Sec: 17.29, Grad Norm: 0.0356 +[2025-02-23 08:26:35] (step=0613800) Train Loss: 0.3216, Train Steps/Sec: 17.27, Grad Norm: 0.0379 +[2025-02-23 08:26:41] (step=0613900) Train Loss: 0.3224, Train Steps/Sec: 17.27, Grad Norm: 0.0332 +[2025-02-23 08:26:47] (step=0614000) Train Loss: 0.3223, Train Steps/Sec: 17.34, Grad Norm: 0.0396 +[2025-02-23 08:26:52] (step=0614100) Train Loss: 0.3223, Train Steps/Sec: 17.42, Grad Norm: 0.0351 +[2025-02-23 08:26:58] (step=0614200) Train Loss: 0.3227, Train Steps/Sec: 16.64, Grad Norm: 0.0352 +[2025-02-23 08:27:06] (step=0614300) Train Loss: 0.3225, Train Steps/Sec: 13.43, Grad Norm: 0.0361 +[2025-02-23 08:27:12] (step=0614400) Train Loss: 0.3219, Train Steps/Sec: 15.29, Grad Norm: 0.0381 +[2025-02-23 08:27:18] (step=0614500) Train Loss: 0.3224, Train Steps/Sec: 16.65, Grad Norm: 0.0336 +[2025-02-23 08:27:24] (step=0614600) Train Loss: 0.3217, Train Steps/Sec: 17.15, Grad Norm: 0.0360 +[2025-02-23 08:27:31] (step=0614700) Train Loss: 0.3221, Train Steps/Sec: 15.78, Grad Norm: 0.0343 +[2025-02-23 08:27:37] (step=0614800) Train Loss: 0.3222, Train Steps/Sec: 16.42, Grad Norm: 0.0365 +[2025-02-23 08:27:43] (step=0614900) Train Loss: 0.3221, Train Steps/Sec: 16.49, Grad Norm: 0.0412 +[2025-02-23 08:27:49] (step=0615000) Train Loss: 0.3221, Train Steps/Sec: 15.12, Grad Norm: 0.0362 +[2025-02-23 08:27:55] (step=0615100) Train Loss: 0.3227, Train Steps/Sec: 16.40, Grad Norm: 0.0359 +[2025-02-23 08:28:01] (step=0615200) Train Loss: 0.3223, Train Steps/Sec: 17.15, Grad Norm: 0.0356 +[2025-02-23 08:28:07] (step=0615300) Train Loss: 0.3220, Train Steps/Sec: 17.19, Grad Norm: 0.0352 +[2025-02-23 08:28:13] (step=0615400) Train Loss: 0.3219, Train Steps/Sec: 16.41, Grad Norm: 0.0369 +[2025-02-23 08:28:20] (step=0615500) Train Loss: 0.3223, Train Steps/Sec: 14.48, Grad Norm: 0.0370 +[2025-02-23 08:28:26] (step=0615600) Train Loss: 0.3225, Train Steps/Sec: 17.09, Grad Norm: 0.0363 +[2025-02-23 08:28:32] (step=0615700) Train Loss: 0.3221, Train Steps/Sec: 17.12, Grad Norm: 0.0350 +[2025-02-23 08:28:38] (step=0615800) Train Loss: 0.3217, Train Steps/Sec: 17.32, Grad Norm: 0.0403 +[2025-02-23 08:28:43] (step=0615900) Train Loss: 0.3219, Train Steps/Sec: 17.32, Grad Norm: 0.0374 +[2025-02-23 08:28:49] (step=0616000) Train Loss: 0.3225, Train Steps/Sec: 17.33, Grad Norm: 0.0373 +[2025-02-23 08:28:55] (step=0616100) Train Loss: 0.3223, Train Steps/Sec: 16.58, Grad Norm: 0.0343 +[2025-02-23 08:29:01] (step=0616200) Train Loss: 0.3222, Train Steps/Sec: 17.39, Grad Norm: 0.0368 +[2025-02-23 08:29:07] (step=0616300) Train Loss: 0.3225, Train Steps/Sec: 15.26, Grad Norm: 0.0367 +[2025-02-23 08:29:14] (step=0616400) Train Loss: 0.3215, Train Steps/Sec: 15.30, Grad Norm: 0.0361 +[2025-02-23 08:29:20] (step=0616500) Train Loss: 0.3222, Train Steps/Sec: 17.36, Grad Norm: 0.0377 +[2025-02-23 08:29:25] (step=0616600) Train Loss: 0.3218, Train Steps/Sec: 17.35, Grad Norm: 0.0396 +[2025-02-23 08:29:32] (step=0616700) Train Loss: 0.3216, Train Steps/Sec: 16.62, Grad Norm: 0.0345 +[2025-02-23 08:29:39] (step=0616800) Train Loss: 0.3220, Train Steps/Sec: 13.36, Grad Norm: 0.0374 +[2025-02-23 08:29:45] (step=0616900) Train Loss: 0.3223, Train Steps/Sec: 16.46, Grad Norm: 0.0345 +[2025-02-23 08:29:52] (step=0617000) Train Loss: 0.3216, Train Steps/Sec: 15.14, Grad Norm: 0.0350 +[2025-02-23 08:29:58] (step=0617100) Train Loss: 0.3226, Train Steps/Sec: 16.50, Grad Norm: 0.0394 +[2025-02-23 08:30:04] (step=0617200) Train Loss: 0.3226, Train Steps/Sec: 17.24, Grad Norm: 0.0367 +[2025-02-23 08:30:09] (step=0617300) Train Loss: 0.3221, Train Steps/Sec: 17.27, Grad Norm: 0.0362 +[2025-02-23 08:30:15] (step=0617400) Train Loss: 0.3222, Train Steps/Sec: 16.45, Grad Norm: 0.0336 +[2025-02-23 08:30:21] (step=0617500) Train Loss: 0.3224, Train Steps/Sec: 17.20, Grad Norm: 0.0354 +[2025-02-23 08:30:27] (step=0617600) Train Loss: 0.3220, Train Steps/Sec: 17.28, Grad Norm: 0.0342 +[2025-02-23 08:30:33] (step=0617700) Train Loss: 0.3223, Train Steps/Sec: 17.28, Grad Norm: 0.0350 +[2025-02-23 08:30:39] (step=0617800) Train Loss: 0.3218, Train Steps/Sec: 17.31, Grad Norm: 0.0346 +[2025-02-23 08:30:44] (step=0617900) Train Loss: 0.3221, Train Steps/Sec: 17.35, Grad Norm: 0.0382 +[2025-02-23 08:30:51] (step=0618000) Train Loss: 0.3218, Train Steps/Sec: 14.47, Grad Norm: 0.0343 +[2025-02-23 08:30:57] (step=0618100) Train Loss: 0.3218, Train Steps/Sec: 16.42, Grad Norm: 0.0347 +[2025-02-23 08:31:04] (step=0618200) Train Loss: 0.3219, Train Steps/Sec: 15.80, Grad Norm: 0.0402 +[2025-02-23 08:31:11] (step=0618300) Train Loss: 0.3215, Train Steps/Sec: 14.69, Grad Norm: 0.0334 +[2025-02-23 08:31:16] (step=0618400) Train Loss: 0.3214, Train Steps/Sec: 17.36, Grad Norm: 0.0344 +[2025-02-23 08:31:22] (step=0618500) Train Loss: 0.3221, Train Steps/Sec: 17.34, Grad Norm: 0.0351 +[2025-02-23 08:31:28] (step=0618600) Train Loss: 0.3224, Train Steps/Sec: 16.66, Grad Norm: 0.0411 +[2025-02-23 08:31:34] (step=0618700) Train Loss: 0.3219, Train Steps/Sec: 17.33, Grad Norm: 0.0386 +[2025-02-23 08:31:40] (step=0618800) Train Loss: 0.3222, Train Steps/Sec: 15.89, Grad Norm: 0.0379 +[2025-02-23 08:31:46] (step=0618900) Train Loss: 0.3221, Train Steps/Sec: 16.63, Grad Norm: 0.0354 +[2025-02-23 08:31:53] (step=0619000) Train Loss: 0.3218, Train Steps/Sec: 15.21, Grad Norm: 0.0363 +[2025-02-23 08:31:59] (step=0619100) Train Loss: 0.3219, Train Steps/Sec: 16.55, Grad Norm: 0.0331 +[2025-02-23 08:32:05] (step=0619200) Train Loss: 0.3219, Train Steps/Sec: 17.33, Grad Norm: 0.0385 +[2025-02-23 08:32:11] (step=0619300) Train Loss: 0.3225, Train Steps/Sec: 14.45, Grad Norm: 0.0379 +[2025-02-23 08:32:18] (step=0619400) Train Loss: 0.3225, Train Steps/Sec: 16.48, Grad Norm: 0.0387 +[2025-02-23 08:32:23] (step=0619500) Train Loss: 0.3217, Train Steps/Sec: 17.29, Grad Norm: 0.0378 +[2025-02-23 08:32:29] (step=0619600) Train Loss: 0.3223, Train Steps/Sec: 17.29, Grad Norm: 0.0331 +[2025-02-23 08:32:35] (step=0619700) Train Loss: 0.3218, Train Steps/Sec: 17.26, Grad Norm: 0.0338 +[2025-02-23 08:32:41] (step=0619800) Train Loss: 0.3220, Train Steps/Sec: 17.23, Grad Norm: 0.0366 +[2025-02-23 08:32:46] (step=0619900) Train Loss: 0.3219, Train Steps/Sec: 17.21, Grad Norm: 0.0345 +[2025-02-23 08:32:53] (step=0620000) Train Loss: 0.3218, Train Steps/Sec: 16.47, Grad Norm: 0.0360 +[2025-02-23 08:32:58] (step=0620100) Train Loss: 0.3221, Train Steps/Sec: 17.34, Grad Norm: 0.0386 +[2025-02-23 08:33:05] (step=0620200) Train Loss: 0.3212, Train Steps/Sec: 15.33, Grad Norm: 0.0355 +[2025-02-23 08:33:11] (step=0620300) Train Loss: 0.3217, Train Steps/Sec: 15.33, Grad Norm: 0.0424 +[2025-02-23 08:33:17] (step=0620400) Train Loss: 0.3216, Train Steps/Sec: 17.40, Grad Norm: 0.0387 +[2025-02-23 08:33:24] (step=0620500) Train Loss: 0.3224, Train Steps/Sec: 14.62, Grad Norm: 0.0339 +[2025-02-23 08:33:30] (step=0620600) Train Loss: 0.3219, Train Steps/Sec: 16.59, Grad Norm: 0.0347 +[2025-02-23 08:33:36] (step=0620700) Train Loss: 0.3218, Train Steps/Sec: 16.63, Grad Norm: 0.0342 +[2025-02-23 08:33:42] (step=0620800) Train Loss: 0.3216, Train Steps/Sec: 16.58, Grad Norm: 0.0352 +[2025-02-23 08:33:48] (step=0620900) Train Loss: 0.3220, Train Steps/Sec: 16.62, Grad Norm: 0.0349 +[2025-02-23 08:33:55] (step=0621000) Train Loss: 0.3225, Train Steps/Sec: 15.27, Grad Norm: 0.0317 +[2025-02-23 08:34:01] (step=0621100) Train Loss: 0.3217, Train Steps/Sec: 16.62, Grad Norm: 0.0363 +[2025-02-23 08:34:06] (step=0621200) Train Loss: 0.3226, Train Steps/Sec: 17.34, Grad Norm: 0.0374 +[2025-02-23 08:34:12] (step=0621300) Train Loss: 0.3215, Train Steps/Sec: 17.37, Grad Norm: 0.0351 +[2025-02-23 08:34:18] (step=0621400) Train Loss: 0.3220, Train Steps/Sec: 16.48, Grad Norm: 0.0386 +[2025-02-23 08:34:24] (step=0621500) Train Loss: 0.3216, Train Steps/Sec: 17.20, Grad Norm: 0.0366 +[2025-02-23 08:34:30] (step=0621600) Train Loss: 0.3219, Train Steps/Sec: 17.22, Grad Norm: 0.0352 +[2025-02-23 08:34:36] (step=0621700) Train Loss: 0.3222, Train Steps/Sec: 17.28, Grad Norm: 0.0377 +[2025-02-23 08:34:43] (step=0621800) Train Loss: 0.3228, Train Steps/Sec: 14.24, Grad Norm: 0.0351 +[2025-02-23 08:34:48] (step=0621900) Train Loss: 0.3216, Train Steps/Sec: 17.32, Grad Norm: 0.0328 +[2025-02-23 08:34:55] (step=0622000) Train Loss: 0.3225, Train Steps/Sec: 16.47, Grad Norm: 0.0343 +[2025-02-23 08:35:01] (step=0622100) Train Loss: 0.3223, Train Steps/Sec: 15.87, Grad Norm: 0.0354 +[2025-02-23 08:35:08] (step=0622200) Train Loss: 0.3224, Train Steps/Sec: 14.75, Grad Norm: 0.0340 +[2025-02-23 08:35:13] (step=0622300) Train Loss: 0.3224, Train Steps/Sec: 17.37, Grad Norm: 0.0379 +[2025-02-23 08:35:19] (step=0622400) Train Loss: 0.3222, Train Steps/Sec: 17.40, Grad Norm: 0.0360 +[2025-02-23 08:35:25] (step=0622500) Train Loss: 0.3217, Train Steps/Sec: 16.70, Grad Norm: 0.0384 +[2025-02-23 08:35:31] (step=0622600) Train Loss: 0.3223, Train Steps/Sec: 17.40, Grad Norm: 0.0349 +[2025-02-23 08:35:37] (step=0622700) Train Loss: 0.3221, Train Steps/Sec: 17.39, Grad Norm: 0.0376 +[2025-02-23 08:35:43] (step=0622800) Train Loss: 0.3221, Train Steps/Sec: 15.89, Grad Norm: 0.0383 +[2025-02-23 08:35:49] (step=0622900) Train Loss: 0.3223, Train Steps/Sec: 16.64, Grad Norm: 0.0359 +[2025-02-23 08:35:57] (step=0623000) Train Loss: 0.3218, Train Steps/Sec: 12.96, Grad Norm: 0.0401 +[2025-02-23 08:36:03] (step=0623100) Train Loss: 0.3215, Train Steps/Sec: 16.48, Grad Norm: 0.0331 +[2025-02-23 08:36:08] (step=0623200) Train Loss: 0.3227, Train Steps/Sec: 17.35, Grad Norm: 0.0339 +[2025-02-23 08:36:14] (step=0623300) Train Loss: 0.3222, Train Steps/Sec: 17.37, Grad Norm: 0.0362 +[2025-02-23 08:36:20] (step=0623400) Train Loss: 0.3217, Train Steps/Sec: 16.55, Grad Norm: 0.0382 +[2025-02-23 08:36:26] (step=0623500) Train Loss: 0.3220, Train Steps/Sec: 17.36, Grad Norm: 0.0369 +[2025-02-23 08:36:32] (step=0623600) Train Loss: 0.3218, Train Steps/Sec: 17.30, Grad Norm: 0.0428 +[2025-02-23 08:36:38] (step=0623700) Train Loss: 0.3219, Train Steps/Sec: 17.37, Grad Norm: 0.0341 +[2025-02-23 08:36:43] (step=0623800) Train Loss: 0.3222, Train Steps/Sec: 17.36, Grad Norm: 0.0374 +[2025-02-23 08:36:49] (step=0623900) Train Loss: 0.3221, Train Steps/Sec: 16.57, Grad Norm: 0.0337 +[2025-02-23 08:36:55] (step=0624000) Train Loss: 0.3214, Train Steps/Sec: 16.63, Grad Norm: 0.0337 +[2025-02-23 08:37:02] (step=0624100) Train Loss: 0.3220, Train Steps/Sec: 15.94, Grad Norm: 0.0373 +[2025-02-23 08:37:08] (step=0624200) Train Loss: 0.3221, Train Steps/Sec: 15.93, Grad Norm: 0.0367 +[2025-02-23 08:37:15] (step=0624300) Train Loss: 0.3220, Train Steps/Sec: 14.53, Grad Norm: 0.0393 +[2025-02-23 08:37:21] (step=0624400) Train Loss: 0.3221, Train Steps/Sec: 16.55, Grad Norm: 0.0364 +[2025-02-23 08:37:27] (step=0624500) Train Loss: 0.3220, Train Steps/Sec: 17.34, Grad Norm: 0.0345 +[2025-02-23 08:37:32] (step=0624600) Train Loss: 0.3220, Train Steps/Sec: 17.18, Grad Norm: 0.0388 +[2025-02-23 08:37:39] (step=0624700) Train Loss: 0.3218, Train Steps/Sec: 16.34, Grad Norm: 0.0374 +[2025-02-23 08:37:45] (step=0624800) Train Loss: 0.3222, Train Steps/Sec: 16.23, Grad Norm: 0.0383 +[2025-02-23 08:37:51] (step=0624900) Train Loss: 0.3225, Train Steps/Sec: 16.36, Grad Norm: 0.0385 +[2025-02-23 08:37:58] (step=0625000) Train Loss: 0.3221, Train Steps/Sec: 15.05, Grad Norm: 0.0401 +[2025-02-23 08:38:04] (step=0625100) Train Loss: 0.3220, Train Steps/Sec: 16.39, Grad Norm: 0.0371 +[2025-02-23 08:38:09] (step=0625200) Train Loss: 0.3219, Train Steps/Sec: 17.19, Grad Norm: 0.0367 +[2025-02-23 08:38:15] (step=0625300) Train Loss: 0.3223, Train Steps/Sec: 17.18, Grad Norm: 0.0363 +[2025-02-23 08:38:21] (step=0625400) Train Loss: 0.3221, Train Steps/Sec: 16.49, Grad Norm: 0.0364 +[2025-02-23 08:38:28] (step=0625500) Train Loss: 0.3221, Train Steps/Sec: 15.89, Grad Norm: 0.0368 +[2025-02-23 08:38:34] (step=0625600) Train Loss: 0.3217, Train Steps/Sec: 15.40, Grad Norm: 0.0373 +[2025-02-23 08:38:40] (step=0625700) Train Loss: 0.3224, Train Steps/Sec: 17.12, Grad Norm: 0.0377 +[2025-02-23 08:38:46] (step=0625800) Train Loss: 0.3222, Train Steps/Sec: 16.36, Grad Norm: 0.0406 +[2025-02-23 08:38:52] (step=0625900) Train Loss: 0.3221, Train Steps/Sec: 17.05, Grad Norm: 0.0362 +[2025-02-23 08:38:58] (step=0626000) Train Loss: 0.3223, Train Steps/Sec: 15.75, Grad Norm: 0.0399 +[2025-02-23 08:39:05] (step=0626100) Train Loss: 0.3223, Train Steps/Sec: 14.62, Grad Norm: 0.0393 +[2025-02-23 08:39:11] (step=0626200) Train Loss: 0.3218, Train Steps/Sec: 17.11, Grad Norm: 0.0363 +[2025-02-23 08:39:17] (step=0626300) Train Loss: 0.3224, Train Steps/Sec: 17.21, Grad Norm: 0.0368 +[2025-02-23 08:39:23] (step=0626400) Train Loss: 0.3218, Train Steps/Sec: 16.66, Grad Norm: 0.0362 +[2025-02-23 08:39:29] (step=0626500) Train Loss: 0.3223, Train Steps/Sec: 17.40, Grad Norm: 0.0351 +[2025-02-23 08:39:34] (step=0626600) Train Loss: 0.3215, Train Steps/Sec: 17.40, Grad Norm: 0.0352 +[2025-02-23 08:39:40] (step=0626700) Train Loss: 0.3224, Train Steps/Sec: 17.33, Grad Norm: 0.0360 +[2025-02-23 08:39:48] (step=0626800) Train Loss: 0.3218, Train Steps/Sec: 13.20, Grad Norm: 0.0347 +[2025-02-23 08:39:54] (step=0626900) Train Loss: 0.3215, Train Steps/Sec: 16.42, Grad Norm: 0.0415 +[2025-02-23 08:40:00] (step=0627000) Train Loss: 0.3220, Train Steps/Sec: 15.08, Grad Norm: 0.0374 +[2025-02-23 08:40:06] (step=0627100) Train Loss: 0.3215, Train Steps/Sec: 16.40, Grad Norm: 0.0370 +[2025-02-23 08:40:12] (step=0627200) Train Loss: 0.3217, Train Steps/Sec: 17.22, Grad Norm: 0.0344 +[2025-02-23 08:40:18] (step=0627300) Train Loss: 0.3224, Train Steps/Sec: 17.32, Grad Norm: 0.0345 +[2025-02-23 08:40:24] (step=0627400) Train Loss: 0.3219, Train Steps/Sec: 16.54, Grad Norm: 0.0359 +[2025-02-23 08:40:30] (step=0627500) Train Loss: 0.3226, Train Steps/Sec: 17.31, Grad Norm: 0.0393 +[2025-02-23 08:40:36] (step=0627600) Train Loss: 0.3217, Train Steps/Sec: 17.28, Grad Norm: 0.0364 +[2025-02-23 08:40:41] (step=0627700) Train Loss: 0.3215, Train Steps/Sec: 17.13, Grad Norm: 0.0356 +[2025-02-23 08:40:48] (step=0627800) Train Loss: 0.3225, Train Steps/Sec: 16.35, Grad Norm: 0.0371 +[2025-02-23 08:40:54] (step=0627900) Train Loss: 0.3219, Train Steps/Sec: 16.27, Grad Norm: 0.0345 +[2025-02-23 08:41:00] (step=0628000) Train Loss: 0.3221, Train Steps/Sec: 15.15, Grad Norm: 0.0378 +[2025-02-23 08:41:08] (step=0628100) Train Loss: 0.3217, Train Steps/Sec: 13.48, Grad Norm: 0.0370 +[2025-02-23 08:41:14] (step=0628200) Train Loss: 0.3221, Train Steps/Sec: 17.22, Grad Norm: 0.0365 +[2025-02-23 08:41:20] (step=0628300) Train Loss: 0.3218, Train Steps/Sec: 16.52, Grad Norm: 0.0364 +[2025-02-23 08:41:25] (step=0628400) Train Loss: 0.3223, Train Steps/Sec: 17.26, Grad Norm: 0.0414 +[2025-02-23 08:41:31] (step=0628500) Train Loss: 0.3222, Train Steps/Sec: 17.20, Grad Norm: 0.0347 +[2025-02-23 08:41:37] (step=0628600) Train Loss: 0.3214, Train Steps/Sec: 17.19, Grad Norm: 0.0377 +[2025-02-23 08:41:43] (step=0628700) Train Loss: 0.3219, Train Steps/Sec: 17.16, Grad Norm: 0.0365 +[2025-02-23 08:41:49] (step=0628800) Train Loss: 0.3224, Train Steps/Sec: 15.76, Grad Norm: 0.0390 +[2025-02-23 08:41:55] (step=0628900) Train Loss: 0.3215, Train Steps/Sec: 16.37, Grad Norm: 0.0371 +[2025-02-23 08:42:02] (step=0629000) Train Loss: 0.3227, Train Steps/Sec: 15.85, Grad Norm: 0.0374 +[2025-02-23 08:42:08] (step=0629100) Train Loss: 0.3223, Train Steps/Sec: 15.82, Grad Norm: 0.0365 +[2025-02-23 08:42:14] (step=0629200) Train Loss: 0.3224, Train Steps/Sec: 17.32, Grad Norm: 0.0385 +[2025-02-23 08:42:21] (step=0629300) Train Loss: 0.3228, Train Steps/Sec: 14.39, Grad Norm: 0.0374 +[2025-02-23 08:42:27] (step=0629400) Train Loss: 0.3221, Train Steps/Sec: 16.37, Grad Norm: 0.0372 +[2025-02-23 08:42:33] (step=0629500) Train Loss: 0.3225, Train Steps/Sec: 17.30, Grad Norm: 0.0351 +[2025-02-23 08:42:38] (step=0629600) Train Loss: 0.3219, Train Steps/Sec: 17.21, Grad Norm: 0.0351 +[2025-02-23 08:42:44] (step=0629700) Train Loss: 0.3222, Train Steps/Sec: 17.19, Grad Norm: 0.0407 +[2025-02-23 08:42:50] (step=0629800) Train Loss: 0.3220, Train Steps/Sec: 16.46, Grad Norm: 0.0371 +[2025-02-23 08:42:57] (step=0629900) Train Loss: 0.3218, Train Steps/Sec: 15.81, Grad Norm: 0.0391 +[2025-02-23 08:43:03] (step=0630000) Train Loss: 0.3216, Train Steps/Sec: 14.70, Grad Norm: 0.0406 +[2025-02-23 08:43:09] (step=0630100) Train Loss: 0.3225, Train Steps/Sec: 17.24, Grad Norm: 0.0382 +[2025-02-23 08:43:15] (step=0630200) Train Loss: 0.3214, Train Steps/Sec: 17.29, Grad Norm: 0.0350 +[2025-02-23 08:43:21] (step=0630300) Train Loss: 0.3218, Train Steps/Sec: 16.62, Grad Norm: 0.0363 +[2025-02-23 08:43:27] (step=0630400) Train Loss: 0.3222, Train Steps/Sec: 17.29, Grad Norm: 0.0362 +[2025-02-23 08:43:33] (step=0630500) Train Loss: 0.3216, Train Steps/Sec: 17.21, Grad Norm: 0.0384 +[2025-02-23 08:43:40] (step=0630600) Train Loss: 0.3221, Train Steps/Sec: 14.57, Grad Norm: 0.0397 +[2025-02-23 08:43:45] (step=0630700) Train Loss: 0.3217, Train Steps/Sec: 17.34, Grad Norm: 0.0367 +[2025-02-23 08:43:52] (step=0630800) Train Loss: 0.3218, Train Steps/Sec: 15.82, Grad Norm: 0.0353 +[2025-02-23 08:43:58] (step=0630900) Train Loss: 0.3219, Train Steps/Sec: 16.56, Grad Norm: 0.0363 +[2025-02-23 08:44:04] (step=0631000) Train Loss: 0.3215, Train Steps/Sec: 15.78, Grad Norm: 0.0348 +[2025-02-23 08:44:10] (step=0631100) Train Loss: 0.3220, Train Steps/Sec: 15.79, Grad Norm: 0.0351 +[2025-02-23 08:44:16] (step=0631200) Train Loss: 0.3220, Train Steps/Sec: 17.24, Grad Norm: 0.0381 +[2025-02-23 08:44:22] (step=0631300) Train Loss: 0.3219, Train Steps/Sec: 17.30, Grad Norm: 0.0354 +[2025-02-23 08:44:28] (step=0631400) Train Loss: 0.3226, Train Steps/Sec: 16.49, Grad Norm: 0.0363 +[2025-02-23 08:44:34] (step=0631500) Train Loss: 0.3219, Train Steps/Sec: 17.19, Grad Norm: 0.0368 +[2025-02-23 08:44:40] (step=0631600) Train Loss: 0.3217, Train Steps/Sec: 17.18, Grad Norm: 0.0418 +[2025-02-23 08:44:46] (step=0631700) Train Loss: 0.3217, Train Steps/Sec: 16.48, Grad Norm: 0.0330 +[2025-02-23 08:44:53] (step=0631800) Train Loss: 0.3219, Train Steps/Sec: 13.32, Grad Norm: 0.0371 +[2025-02-23 08:45:00] (step=0631900) Train Loss: 0.3220, Train Steps/Sec: 14.60, Grad Norm: 0.0366 +[2025-02-23 08:45:06] (step=0632000) Train Loss: 0.3224, Train Steps/Sec: 17.23, Grad Norm: 0.0374 +[2025-02-23 08:45:12] (step=0632100) Train Loss: 0.3219, Train Steps/Sec: 17.20, Grad Norm: 0.0403 +[2025-02-23 08:45:18] (step=0632200) Train Loss: 0.3218, Train Steps/Sec: 16.51, Grad Norm: 0.0339 +[2025-02-23 08:45:24] (step=0632300) Train Loss: 0.3222, Train Steps/Sec: 17.27, Grad Norm: 0.0348 +[2025-02-23 08:45:29] (step=0632400) Train Loss: 0.3221, Train Steps/Sec: 17.28, Grad Norm: 0.0351 +[2025-02-23 08:45:35] (step=0632500) Train Loss: 0.3220, Train Steps/Sec: 17.34, Grad Norm: 0.0375 +[2025-02-23 08:45:41] (step=0632600) Train Loss: 0.3224, Train Steps/Sec: 17.34, Grad Norm: 0.0360 +[2025-02-23 08:45:47] (step=0632700) Train Loss: 0.3219, Train Steps/Sec: 17.25, Grad Norm: 0.0377 +[2025-02-23 08:45:53] (step=0632800) Train Loss: 0.3219, Train Steps/Sec: 15.77, Grad Norm: 0.0348 +[2025-02-23 08:45:59] (step=0632900) Train Loss: 0.3223, Train Steps/Sec: 16.46, Grad Norm: 0.0349 +[2025-02-23 08:46:05] (step=0633000) Train Loss: 0.3225, Train Steps/Sec: 15.81, Grad Norm: 0.0404 +[2025-02-23 08:46:13] (step=0633100) Train Loss: 0.3218, Train Steps/Sec: 13.37, Grad Norm: 0.0367 +[2025-02-23 08:46:19] (step=0633200) Train Loss: 0.3214, Train Steps/Sec: 17.29, Grad Norm: 0.0381 +[2025-02-23 08:46:24] (step=0633300) Train Loss: 0.3213, Train Steps/Sec: 17.28, Grad Norm: 0.0402 +[2025-02-23 08:46:31] (step=0633400) Train Loss: 0.3222, Train Steps/Sec: 16.44, Grad Norm: 0.0355 +[2025-02-23 08:46:36] (step=0633500) Train Loss: 0.3217, Train Steps/Sec: 17.15, Grad Norm: 0.0358 +[2025-02-23 08:46:42] (step=0633600) Train Loss: 0.3216, Train Steps/Sec: 17.23, Grad Norm: 0.0343 +[2025-02-23 08:46:48] (step=0633700) Train Loss: 0.3223, Train Steps/Sec: 16.46, Grad Norm: 0.0380 +[2025-02-23 08:46:55] (step=0633800) Train Loss: 0.3220, Train Steps/Sec: 15.10, Grad Norm: 0.0394 +[2025-02-23 08:47:01] (step=0633900) Train Loss: 0.3219, Train Steps/Sec: 15.21, Grad Norm: 0.0372 +[2025-02-23 08:47:07] (step=0634000) Train Loss: 0.3223, Train Steps/Sec: 17.37, Grad Norm: 0.0409 +[2025-02-23 08:47:13] (step=0634100) Train Loss: 0.3221, Train Steps/Sec: 17.42, Grad Norm: 0.0350 +[2025-02-23 08:47:19] (step=0634200) Train Loss: 0.3221, Train Steps/Sec: 16.75, Grad Norm: 0.0375 +[2025-02-23 08:47:26] (step=0634300) Train Loss: 0.3216, Train Steps/Sec: 14.56, Grad Norm: 0.0356 +[2025-02-23 08:47:32] (step=0634400) Train Loss: 0.3214, Train Steps/Sec: 17.34, Grad Norm: 0.0358 +[2025-02-23 08:47:37] (step=0634500) Train Loss: 0.3215, Train Steps/Sec: 17.35, Grad Norm: 0.0423 +[2025-02-23 08:47:43] (step=0634600) Train Loss: 0.3216, Train Steps/Sec: 17.32, Grad Norm: 0.0360 +[2025-02-23 08:47:49] (step=0634700) Train Loss: 0.3217, Train Steps/Sec: 17.28, Grad Norm: 0.0361 +[2025-02-23 08:47:55] (step=0634800) Train Loss: 0.3216, Train Steps/Sec: 15.83, Grad Norm: 0.0393 +[2025-02-23 08:48:01] (step=0634900) Train Loss: 0.3220, Train Steps/Sec: 16.51, Grad Norm: 0.0340 +[2025-02-23 08:48:08] (step=0635000) Train Loss: 0.3220, Train Steps/Sec: 15.85, Grad Norm: 0.0371 +[2025-02-23 08:48:14] (step=0635100) Train Loss: 0.3223, Train Steps/Sec: 15.74, Grad Norm: 0.0368 +[2025-02-23 08:48:20] (step=0635200) Train Loss: 0.3222, Train Steps/Sec: 17.30, Grad Norm: 0.0370 +[2025-02-23 08:48:25] (step=0635300) Train Loss: 0.3221, Train Steps/Sec: 17.35, Grad Norm: 0.0370 +[2025-02-23 08:48:31] (step=0635400) Train Loss: 0.3216, Train Steps/Sec: 17.35, Grad Norm: 0.0421 +[2025-02-23 08:48:37] (step=0635500) Train Loss: 0.3220, Train Steps/Sec: 16.42, Grad Norm: 0.0377 +[2025-02-23 08:48:44] (step=0635600) Train Loss: 0.3218, Train Steps/Sec: 14.03, Grad Norm: 0.0374 +[2025-02-23 08:48:51] (step=0635700) Train Loss: 0.3219, Train Steps/Sec: 15.90, Grad Norm: 0.0361 +[2025-02-23 08:48:58] (step=0635800) Train Loss: 0.3219, Train Steps/Sec: 14.80, Grad Norm: 0.0373 +[2025-02-23 08:49:03] (step=0635900) Train Loss: 0.3216, Train Steps/Sec: 17.34, Grad Norm: 0.0410 +[2025-02-23 08:49:09] (step=0636000) Train Loss: 0.3220, Train Steps/Sec: 17.29, Grad Norm: 0.0387 +[2025-02-23 08:49:15] (step=0636100) Train Loss: 0.3219, Train Steps/Sec: 16.58, Grad Norm: 0.0395 +[2025-02-23 08:49:21] (step=0636200) Train Loss: 0.3226, Train Steps/Sec: 17.22, Grad Norm: 0.0404 +[2025-02-23 08:49:27] (step=0636300) Train Loss: 0.3215, Train Steps/Sec: 17.25, Grad Norm: 0.0374 +[2025-02-23 08:49:32] (step=0636400) Train Loss: 0.3222, Train Steps/Sec: 17.34, Grad Norm: 0.0364 +[2025-02-23 08:49:38] (step=0636500) Train Loss: 0.3220, Train Steps/Sec: 17.26, Grad Norm: 0.0378 +[2025-02-23 08:49:44] (step=0636600) Train Loss: 0.3221, Train Steps/Sec: 17.22, Grad Norm: 0.0362 +[2025-02-23 08:49:50] (step=0636700) Train Loss: 0.3221, Train Steps/Sec: 17.17, Grad Norm: 0.0360 +[2025-02-23 08:49:57] (step=0636800) Train Loss: 0.3225, Train Steps/Sec: 13.37, Grad Norm: 0.0380 +[2025-02-23 08:50:04] (step=0636900) Train Loss: 0.3221, Train Steps/Sec: 16.33, Grad Norm: 0.0341 +[2025-02-23 08:50:10] (step=0637000) Train Loss: 0.3220, Train Steps/Sec: 15.65, Grad Norm: 0.0346 +[2025-02-23 08:50:16] (step=0637100) Train Loss: 0.3221, Train Steps/Sec: 15.63, Grad Norm: 0.0350 +[2025-02-23 08:50:22] (step=0637200) Train Loss: 0.3224, Train Steps/Sec: 17.08, Grad Norm: 0.0383 +[2025-02-23 08:50:28] (step=0637300) Train Loss: 0.3215, Train Steps/Sec: 17.03, Grad Norm: 0.0390 +[2025-02-23 08:50:34] (step=0637400) Train Loss: 0.3217, Train Steps/Sec: 17.07, Grad Norm: 0.0357 +[2025-02-23 08:50:40] (step=0637500) Train Loss: 0.3218, Train Steps/Sec: 15.78, Grad Norm: 0.0361 +[2025-02-23 08:50:46] (step=0637600) Train Loss: 0.3213, Train Steps/Sec: 16.54, Grad Norm: 0.0362 +[2025-02-23 08:50:53] (step=0637700) Train Loss: 0.3219, Train Steps/Sec: 15.89, Grad Norm: 0.0336 +[2025-02-23 08:50:59] (step=0637800) Train Loss: 0.3222, Train Steps/Sec: 15.31, Grad Norm: 0.0382 +[2025-02-23 08:51:05] (step=0637900) Train Loss: 0.3223, Train Steps/Sec: 17.28, Grad Norm: 0.0399 +[2025-02-23 08:51:11] (step=0638000) Train Loss: 0.3215, Train Steps/Sec: 16.55, Grad Norm: 0.0363 +[2025-02-23 08:51:18] (step=0638100) Train Loss: 0.3215, Train Steps/Sec: 14.52, Grad Norm: 0.0372 +[2025-02-23 08:51:24] (step=0638200) Train Loss: 0.3220, Train Steps/Sec: 17.33, Grad Norm: 0.0409 +[2025-02-23 08:51:29] (step=0638300) Train Loss: 0.3216, Train Steps/Sec: 17.38, Grad Norm: 0.0355 +[2025-02-23 08:51:35] (step=0638400) Train Loss: 0.3221, Train Steps/Sec: 17.26, Grad Norm: 0.0361 +[2025-02-23 08:51:41] (step=0638500) Train Loss: 0.3223, Train Steps/Sec: 17.40, Grad Norm: 0.0365 +[2025-02-23 08:51:47] (step=0638600) Train Loss: 0.3219, Train Steps/Sec: 17.38, Grad Norm: 0.0353 +[2025-02-23 08:51:52] (step=0638700) Train Loss: 0.3221, Train Steps/Sec: 17.35, Grad Norm: 0.0359 +[2025-02-23 08:51:59] (step=0638800) Train Loss: 0.3221, Train Steps/Sec: 15.85, Grad Norm: 0.0353 +[2025-02-23 08:52:05] (step=0638900) Train Loss: 0.3219, Train Steps/Sec: 16.54, Grad Norm: 0.0368 +[2025-02-23 08:52:11] (step=0639000) Train Loss: 0.3217, Train Steps/Sec: 15.83, Grad Norm: 0.0326 +[2025-02-23 08:52:17] (step=0639100) Train Loss: 0.3217, Train Steps/Sec: 15.88, Grad Norm: 0.0375 +[2025-02-23 08:52:23] (step=0639200) Train Loss: 0.3220, Train Steps/Sec: 17.33, Grad Norm: 0.0329 +[2025-02-23 08:52:30] (step=0639300) Train Loss: 0.3221, Train Steps/Sec: 14.50, Grad Norm: 0.0347 +[2025-02-23 08:52:36] (step=0639400) Train Loss: 0.3222, Train Steps/Sec: 17.32, Grad Norm: 0.0350 +[2025-02-23 08:52:42] (step=0639500) Train Loss: 0.3214, Train Steps/Sec: 15.81, Grad Norm: 0.0367 +[2025-02-23 08:52:49] (step=0639600) Train Loss: 0.3220, Train Steps/Sec: 15.78, Grad Norm: 0.0347 +[2025-02-23 08:52:55] (step=0639700) Train Loss: 0.3220, Train Steps/Sec: 14.72, Grad Norm: 0.0348 +[2025-02-23 08:53:01] (step=0639800) Train Loss: 0.3222, Train Steps/Sec: 17.31, Grad Norm: 0.0372 +[2025-02-23 08:53:07] (step=0639900) Train Loss: 0.3220, Train Steps/Sec: 17.30, Grad Norm: 0.0379 +[2025-02-23 08:53:13] (step=0640000) Train Loss: 0.3219, Train Steps/Sec: 16.65, Grad Norm: 0.0350 +[2025-02-23 08:53:19] (step=0640100) Train Loss: 0.3222, Train Steps/Sec: 17.38, Grad Norm: 0.0357 +[2025-02-23 08:53:24] (step=0640200) Train Loss: 0.3213, Train Steps/Sec: 17.44, Grad Norm: 0.0368 +[2025-02-23 08:53:30] (step=0640300) Train Loss: 0.3225, Train Steps/Sec: 17.34, Grad Norm: 0.0355 +[2025-02-23 08:53:36] (step=0640400) Train Loss: 0.3219, Train Steps/Sec: 17.34, Grad Norm: 0.0337 +[2025-02-23 08:53:42] (step=0640500) Train Loss: 0.3221, Train Steps/Sec: 17.27, Grad Norm: 0.0388 +[2025-02-23 08:53:49] (step=0640600) Train Loss: 0.3216, Train Steps/Sec: 14.59, Grad Norm: 0.0397 +[2025-02-23 08:53:54] (step=0640700) Train Loss: 0.3216, Train Steps/Sec: 17.30, Grad Norm: 0.0361 +[2025-02-23 08:54:00] (step=0640800) Train Loss: 0.3224, Train Steps/Sec: 16.47, Grad Norm: 0.0350 +[2025-02-23 08:54:07] (step=0640900) Train Loss: 0.3221, Train Steps/Sec: 15.79, Grad Norm: 0.0354 +[2025-02-23 08:54:13] (step=0641000) Train Loss: 0.3220, Train Steps/Sec: 15.85, Grad Norm: 0.0450 +[2025-02-23 08:54:19] (step=0641100) Train Loss: 0.3215, Train Steps/Sec: 15.83, Grad Norm: 0.0397 +[2025-02-23 08:54:25] (step=0641200) Train Loss: 0.3217, Train Steps/Sec: 17.29, Grad Norm: 0.0372 +[2025-02-23 08:54:31] (step=0641300) Train Loss: 0.3218, Train Steps/Sec: 17.38, Grad Norm: 0.0376 +[2025-02-23 08:54:37] (step=0641400) Train Loss: 0.3215, Train Steps/Sec: 17.33, Grad Norm: 0.0360 +[2025-02-23 08:54:43] (step=0641500) Train Loss: 0.3226, Train Steps/Sec: 15.21, Grad Norm: 0.0372 +[2025-02-23 08:54:50] (step=0641600) Train Loss: 0.3216, Train Steps/Sec: 15.94, Grad Norm: 0.0325 +[2025-02-23 08:54:56] (step=0641700) Train Loss: 0.3223, Train Steps/Sec: 15.28, Grad Norm: 0.0403 +[2025-02-23 08:55:03] (step=0641800) Train Loss: 0.3219, Train Steps/Sec: 14.54, Grad Norm: 0.0365 +[2025-02-23 08:55:09] (step=0641900) Train Loss: 0.3219, Train Steps/Sec: 16.68, Grad Norm: 0.0367 +[2025-02-23 08:55:15] (step=0642000) Train Loss: 0.3214, Train Steps/Sec: 17.43, Grad Norm: 0.0356 +[2025-02-23 08:55:20] (step=0642100) Train Loss: 0.3218, Train Steps/Sec: 17.41, Grad Norm: 0.0369 +[2025-02-23 08:55:26] (step=0642200) Train Loss: 0.3215, Train Steps/Sec: 17.34, Grad Norm: 0.0351 +[2025-02-23 08:55:32] (step=0642300) Train Loss: 0.3219, Train Steps/Sec: 17.38, Grad Norm: 0.0345 +[2025-02-23 08:55:38] (step=0642400) Train Loss: 0.3227, Train Steps/Sec: 17.38, Grad Norm: 0.0352 +[2025-02-23 08:55:43] (step=0642500) Train Loss: 0.3215, Train Steps/Sec: 17.45, Grad Norm: 0.0400 +[2025-02-23 08:55:49] (step=0642600) Train Loss: 0.3223, Train Steps/Sec: 17.42, Grad Norm: 0.0373 +[2025-02-23 08:55:55] (step=0642700) Train Loss: 0.3220, Train Steps/Sec: 17.41, Grad Norm: 0.0376 +[2025-02-23 08:56:01] (step=0642800) Train Loss: 0.3216, Train Steps/Sec: 16.62, Grad Norm: 0.0373 +[2025-02-23 08:56:07] (step=0642900) Train Loss: 0.3218, Train Steps/Sec: 15.79, Grad Norm: 0.0347 +[2025-02-23 08:56:14] (step=0643000) Train Loss: 0.3218, Train Steps/Sec: 15.73, Grad Norm: 0.0341 +[2025-02-23 08:56:21] (step=0643100) Train Loss: 0.3223, Train Steps/Sec: 13.48, Grad Norm: 0.0355 +[2025-02-23 08:56:27] (step=0643200) Train Loss: 0.3214, Train Steps/Sec: 17.32, Grad Norm: 0.0333 +[2025-02-23 08:56:33] (step=0643300) Train Loss: 0.3221, Train Steps/Sec: 17.33, Grad Norm: 0.0365 +[2025-02-23 08:56:39] (step=0643400) Train Loss: 0.3219, Train Steps/Sec: 16.55, Grad Norm: 0.0354 +[2025-02-23 08:56:45] (step=0643500) Train Loss: 0.3227, Train Steps/Sec: 15.80, Grad Norm: 0.0388 +[2025-02-23 08:56:52] (step=0643600) Train Loss: 0.3213, Train Steps/Sec: 14.71, Grad Norm: 0.0375 +[2025-02-23 08:56:58] (step=0643700) Train Loss: 0.3219, Train Steps/Sec: 17.32, Grad Norm: 0.0363 +[2025-02-23 08:57:03] (step=0643800) Train Loss: 0.3219, Train Steps/Sec: 17.38, Grad Norm: 0.0359 +[2025-02-23 08:57:09] (step=0643900) Train Loss: 0.3212, Train Steps/Sec: 16.72, Grad Norm: 0.0377 +[2025-02-23 08:57:15] (step=0644000) Train Loss: 0.3221, Train Steps/Sec: 17.45, Grad Norm: 0.0391 +[2025-02-23 08:57:21] (step=0644100) Train Loss: 0.3221, Train Steps/Sec: 17.40, Grad Norm: 0.0354 +[2025-02-23 08:57:27] (step=0644200) Train Loss: 0.3222, Train Steps/Sec: 17.32, Grad Norm: 0.0381 +[2025-02-23 08:57:34] (step=0644300) Train Loss: 0.3225, Train Steps/Sec: 14.29, Grad Norm: 0.0364 +[2025-02-23 08:57:39] (step=0644400) Train Loss: 0.3216, Train Steps/Sec: 17.25, Grad Norm: 0.0353 +[2025-02-23 08:57:45] (step=0644500) Train Loss: 0.3224, Train Steps/Sec: 17.28, Grad Norm: 0.0399 +[2025-02-23 08:57:51] (step=0644600) Train Loss: 0.3226, Train Steps/Sec: 17.31, Grad Norm: 0.0376 +[2025-02-23 08:57:57] (step=0644700) Train Loss: 0.3220, Train Steps/Sec: 17.40, Grad Norm: 0.0410 +[2025-02-23 08:58:03] (step=0644800) Train Loss: 0.3216, Train Steps/Sec: 16.60, Grad Norm: 0.0371 +[2025-02-23 08:58:09] (step=0644900) Train Loss: 0.3220, Train Steps/Sec: 15.85, Grad Norm: 0.0367 +[2025-02-23 08:58:15] (step=0645000) Train Loss: 0.3218, Train Steps/Sec: 15.93, Grad Norm: 0.0381 +[2025-02-23 08:58:21] (step=0645100) Train Loss: 0.3214, Train Steps/Sec: 16.60, Grad Norm: 0.0352 +[2025-02-23 08:58:27] (step=0645200) Train Loss: 0.3220, Train Steps/Sec: 16.60, Grad Norm: 0.0395 +[2025-02-23 08:58:33] (step=0645300) Train Loss: 0.3224, Train Steps/Sec: 17.42, Grad Norm: 0.0373 +[2025-02-23 08:58:40] (step=0645400) Train Loss: 0.3220, Train Steps/Sec: 15.33, Grad Norm: 0.0343 +[2025-02-23 08:58:46] (step=0645500) Train Loss: 0.3220, Train Steps/Sec: 15.87, Grad Norm: 0.0374 +[2025-02-23 08:58:54] (step=0645600) Train Loss: 0.3219, Train Steps/Sec: 12.98, Grad Norm: 0.0361 +[2025-02-23 08:58:59] (step=0645700) Train Loss: 0.3218, Train Steps/Sec: 17.09, Grad Norm: 0.0373 +[2025-02-23 08:59:06] (step=0645800) Train Loss: 0.3216, Train Steps/Sec: 16.55, Grad Norm: 0.0356 +[2025-02-23 08:59:11] (step=0645900) Train Loss: 0.3220, Train Steps/Sec: 17.30, Grad Norm: 0.0371 +[2025-02-23 08:59:17] (step=0646000) Train Loss: 0.3221, Train Steps/Sec: 17.33, Grad Norm: 0.0416 +[2025-02-23 08:59:23] (step=0646100) Train Loss: 0.3218, Train Steps/Sec: 17.35, Grad Norm: 0.0361 +[2025-02-23 08:59:29] (step=0646200) Train Loss: 0.3223, Train Steps/Sec: 17.43, Grad Norm: 0.0367 +[2025-02-23 08:59:34] (step=0646300) Train Loss: 0.3217, Train Steps/Sec: 17.39, Grad Norm: 0.0349 +[2025-02-23 08:59:40] (step=0646400) Train Loss: 0.3220, Train Steps/Sec: 17.29, Grad Norm: 0.0356 +[2025-02-23 08:59:46] (step=0646500) Train Loss: 0.3221, Train Steps/Sec: 17.25, Grad Norm: 0.0340 +[2025-02-23 08:59:52] (step=0646600) Train Loss: 0.3221, Train Steps/Sec: 17.22, Grad Norm: 0.0396 +[2025-02-23 08:59:58] (step=0646700) Train Loss: 0.3219, Train Steps/Sec: 17.24, Grad Norm: 0.0354 +[2025-02-23 09:00:05] (step=0646800) Train Loss: 0.3219, Train Steps/Sec: 13.92, Grad Norm: 0.0372 +[2025-02-23 09:00:11] (step=0646900) Train Loss: 0.3223, Train Steps/Sec: 15.88, Grad Norm: 0.0383 +[2025-02-23 09:00:17] (step=0647000) Train Loss: 0.3217, Train Steps/Sec: 15.83, Grad Norm: 0.0415 +[2025-02-23 09:00:23] (step=0647100) Train Loss: 0.3224, Train Steps/Sec: 16.50, Grad Norm: 0.0346 +[2025-02-23 09:00:29] (step=0647200) Train Loss: 0.3216, Train Steps/Sec: 16.65, Grad Norm: 0.0335 +[2025-02-23 09:00:35] (step=0647300) Train Loss: 0.3219, Train Steps/Sec: 16.45, Grad Norm: 0.0370 +[2025-02-23 09:00:42] (step=0647400) Train Loss: 0.3218, Train Steps/Sec: 15.12, Grad Norm: 0.0410 +[2025-02-23 09:00:49] (step=0647500) Train Loss: 0.3218, Train Steps/Sec: 14.39, Grad Norm: 0.0373 +[2025-02-23 09:00:55] (step=0647600) Train Loss: 0.3215, Train Steps/Sec: 17.13, Grad Norm: 0.0376 +[2025-02-23 09:01:01] (step=0647700) Train Loss: 0.3216, Train Steps/Sec: 16.45, Grad Norm: 0.0407 +[2025-02-23 09:01:07] (step=0647800) Train Loss: 0.3218, Train Steps/Sec: 17.14, Grad Norm: 0.0372 +[2025-02-23 09:01:13] (step=0647900) Train Loss: 0.3224, Train Steps/Sec: 17.15, Grad Norm: 0.0358 +[2025-02-23 09:01:18] (step=0648000) Train Loss: 0.3216, Train Steps/Sec: 17.06, Grad Norm: 0.0376 +[2025-02-23 09:01:25] (step=0648100) Train Loss: 0.3223, Train Steps/Sec: 14.27, Grad Norm: 0.0384 +[2025-02-23 09:01:31] (step=0648200) Train Loss: 0.3217, Train Steps/Sec: 17.18, Grad Norm: 0.0357 +[2025-02-23 09:01:37] (step=0648300) Train Loss: 0.3218, Train Steps/Sec: 17.18, Grad Norm: 0.0342 +[2025-02-23 09:01:43] (step=0648400) Train Loss: 0.3219, Train Steps/Sec: 17.28, Grad Norm: 0.0336 +[2025-02-23 09:01:49] (step=0648500) Train Loss: 0.3218, Train Steps/Sec: 17.32, Grad Norm: 0.0392 +[2025-02-23 09:01:54] (step=0648600) Train Loss: 0.3215, Train Steps/Sec: 17.39, Grad Norm: 0.0381 +[2025-02-23 09:02:00] (step=0648700) Train Loss: 0.3220, Train Steps/Sec: 17.28, Grad Norm: 0.0355 +[2025-02-23 09:02:06] (step=0648800) Train Loss: 0.3211, Train Steps/Sec: 16.50, Grad Norm: 0.0366 +[2025-02-23 09:02:13] (step=0648900) Train Loss: 0.3217, Train Steps/Sec: 15.79, Grad Norm: 0.0372 +[2025-02-23 09:02:19] (step=0649000) Train Loss: 0.3213, Train Steps/Sec: 15.87, Grad Norm: 0.0390 +[2025-02-23 09:02:25] (step=0649100) Train Loss: 0.3225, Train Steps/Sec: 16.47, Grad Norm: 0.0376 +[2025-02-23 09:02:31] (step=0649200) Train Loss: 0.3217, Train Steps/Sec: 16.47, Grad Norm: 0.0366 +[2025-02-23 09:02:39] (step=0649300) Train Loss: 0.3222, Train Steps/Sec: 12.95, Grad Norm: 0.0359 +[2025-02-23 09:02:46] (step=0649400) Train Loss: 0.3219, Train Steps/Sec: 14.64, Grad Norm: 0.0327 +[2025-02-23 09:02:52] (step=0649500) Train Loss: 0.3222, Train Steps/Sec: 16.52, Grad Norm: 0.0373 +[2025-02-23 09:02:57] (step=0649600) Train Loss: 0.3216, Train Steps/Sec: 17.35, Grad Norm: 0.0367 +[2025-02-23 09:03:03] (step=0649700) Train Loss: 0.3220, Train Steps/Sec: 16.62, Grad Norm: 0.0334 +[2025-02-23 09:03:09] (step=0649800) Train Loss: 0.3224, Train Steps/Sec: 17.38, Grad Norm: 0.0405 +[2025-02-23 09:03:15] (step=0649900) Train Loss: 0.3224, Train Steps/Sec: 17.37, Grad Norm: 0.0357 +[2025-02-23 09:03:21] (step=0650000) Train Loss: 0.3221, Train Steps/Sec: 17.36, Grad Norm: 0.0356 +[2025-02-23 09:03:22] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0650000.pt +[2025-02-23 09:03:28] (step=0650100) Train Loss: 0.3226, Train Steps/Sec: 14.82, Grad Norm: 0.0367 +[2025-02-23 09:03:33] (step=0650200) Train Loss: 0.3219, Train Steps/Sec: 17.18, Grad Norm: 0.0397 +[2025-02-23 09:03:39] (step=0650300) Train Loss: 0.3209, Train Steps/Sec: 17.25, Grad Norm: 0.0346 +[2025-02-23 09:03:45] (step=0650400) Train Loss: 0.3220, Train Steps/Sec: 17.16, Grad Norm: 0.0367 +[2025-02-23 09:03:51] (step=0650500) Train Loss: 0.3218, Train Steps/Sec: 17.22, Grad Norm: 0.0361 +[2025-02-23 09:03:58] (step=0650600) Train Loss: 0.3218, Train Steps/Sec: 14.46, Grad Norm: 0.0378 +[2025-02-23 09:04:03] (step=0650700) Train Loss: 0.3221, Train Steps/Sec: 17.31, Grad Norm: 0.0338 +[2025-02-23 09:04:10] (step=0650800) Train Loss: 0.3215, Train Steps/Sec: 16.59, Grad Norm: 0.0374 +[2025-02-23 09:04:16] (step=0650900) Train Loss: 0.3218, Train Steps/Sec: 15.90, Grad Norm: 0.0386 +[2025-02-23 09:04:22] (step=0651000) Train Loss: 0.3220, Train Steps/Sec: 15.88, Grad Norm: 0.0340 +[2025-02-23 09:04:28] (step=0651100) Train Loss: 0.3215, Train Steps/Sec: 16.52, Grad Norm: 0.0382 +[2025-02-23 09:04:35] (step=0651200) Train Loss: 0.3215, Train Steps/Sec: 15.22, Grad Norm: 0.0336 +[2025-02-23 09:04:41] (step=0651300) Train Loss: 0.3215, Train Steps/Sec: 15.92, Grad Norm: 0.0390 +[2025-02-23 09:04:48] (step=0651400) Train Loss: 0.3216, Train Steps/Sec: 15.20, Grad Norm: 0.0374 +[2025-02-23 09:04:54] (step=0651500) Train Loss: 0.3224, Train Steps/Sec: 16.54, Grad Norm: 0.0349 +[2025-02-23 09:05:00] (step=0651600) Train Loss: 0.3220, Train Steps/Sec: 16.64, Grad Norm: 0.0374 +[2025-02-23 09:05:05] (step=0651700) Train Loss: 0.3223, Train Steps/Sec: 17.35, Grad Norm: 0.0379 +[2025-02-23 09:05:12] (step=0651800) Train Loss: 0.3216, Train Steps/Sec: 14.47, Grad Norm: 0.0395 +[2025-02-23 09:05:18] (step=0651900) Train Loss: 0.3215, Train Steps/Sec: 17.38, Grad Norm: 0.0386 +[2025-02-23 09:05:24] (step=0652000) Train Loss: 0.3219, Train Steps/Sec: 17.36, Grad Norm: 0.0395 +[2025-02-23 09:05:30] (step=0652100) Train Loss: 0.3220, Train Steps/Sec: 17.35, Grad Norm: 0.0352 +[2025-02-23 09:05:35] (step=0652200) Train Loss: 0.3221, Train Steps/Sec: 17.34, Grad Norm: 0.0394 +[2025-02-23 09:05:41] (step=0652300) Train Loss: 0.3222, Train Steps/Sec: 17.32, Grad Norm: 0.0408 +[2025-02-23 09:05:47] (step=0652400) Train Loss: 0.3212, Train Steps/Sec: 17.32, Grad Norm: 0.0396 +[2025-02-23 09:05:53] (step=0652500) Train Loss: 0.3222, Train Steps/Sec: 17.39, Grad Norm: 0.0357 +[2025-02-23 09:05:58] (step=0652600) Train Loss: 0.3221, Train Steps/Sec: 17.35, Grad Norm: 0.0359 +[2025-02-23 09:06:04] (step=0652700) Train Loss: 0.3215, Train Steps/Sec: 17.30, Grad Norm: 0.0354 +[2025-02-23 09:06:10] (step=0652800) Train Loss: 0.3220, Train Steps/Sec: 16.64, Grad Norm: 0.0404 +[2025-02-23 09:06:17] (step=0652900) Train Loss: 0.3221, Train Steps/Sec: 15.89, Grad Norm: 0.0373 +[2025-02-23 09:06:23] (step=0653000) Train Loss: 0.3220, Train Steps/Sec: 15.77, Grad Norm: 0.0404 +[2025-02-23 09:06:30] (step=0653100) Train Loss: 0.3217, Train Steps/Sec: 13.48, Grad Norm: 0.0365 +[2025-02-23 09:06:37] (step=0653200) Train Loss: 0.3217, Train Steps/Sec: 15.25, Grad Norm: 0.0382 +[2025-02-23 09:06:44] (step=0653300) Train Loss: 0.3216, Train Steps/Sec: 14.59, Grad Norm: 0.0390 +[2025-02-23 09:06:50] (step=0653400) Train Loss: 0.3220, Train Steps/Sec: 17.20, Grad Norm: 0.0391 +[2025-02-23 09:06:56] (step=0653500) Train Loss: 0.3221, Train Steps/Sec: 16.40, Grad Norm: 0.0382 +[2025-02-23 09:07:02] (step=0653600) Train Loss: 0.3225, Train Steps/Sec: 16.56, Grad Norm: 0.0352 +[2025-02-23 09:07:07] (step=0653700) Train Loss: 0.3220, Train Steps/Sec: 17.31, Grad Norm: 0.0365 +[2025-02-23 09:07:13] (step=0653800) Train Loss: 0.3218, Train Steps/Sec: 17.31, Grad Norm: 0.0350 +[2025-02-23 09:07:19] (step=0653900) Train Loss: 0.3224, Train Steps/Sec: 17.29, Grad Norm: 0.0375 +[2025-02-23 09:07:25] (step=0654000) Train Loss: 0.3216, Train Steps/Sec: 17.32, Grad Norm: 0.0385 +[2025-02-23 09:07:31] (step=0654100) Train Loss: 0.3213, Train Steps/Sec: 17.35, Grad Norm: 0.0354 +[2025-02-23 09:07:36] (step=0654200) Train Loss: 0.3215, Train Steps/Sec: 17.31, Grad Norm: 0.0372 +[2025-02-23 09:07:43] (step=0654300) Train Loss: 0.3217, Train Steps/Sec: 14.31, Grad Norm: 0.0340 +[2025-02-23 09:07:49] (step=0654400) Train Loss: 0.3220, Train Steps/Sec: 17.33, Grad Norm: 0.0359 +[2025-02-23 09:07:55] (step=0654500) Train Loss: 0.3221, Train Steps/Sec: 17.37, Grad Norm: 0.0363 +[2025-02-23 09:08:01] (step=0654600) Train Loss: 0.3218, Train Steps/Sec: 17.30, Grad Norm: 0.0388 +[2025-02-23 09:08:06] (step=0654700) Train Loss: 0.3217, Train Steps/Sec: 17.19, Grad Norm: 0.0376 +[2025-02-23 09:08:13] (step=0654800) Train Loss: 0.3217, Train Steps/Sec: 16.45, Grad Norm: 0.0376 +[2025-02-23 09:08:19] (step=0654900) Train Loss: 0.3217, Train Steps/Sec: 15.77, Grad Norm: 0.0339 +[2025-02-23 09:08:25] (step=0655000) Train Loss: 0.3221, Train Steps/Sec: 15.83, Grad Norm: 0.0373 +[2025-02-23 09:08:32] (step=0655100) Train Loss: 0.3219, Train Steps/Sec: 15.04, Grad Norm: 0.0354 +[2025-02-23 09:08:38] (step=0655200) Train Loss: 0.3217, Train Steps/Sec: 15.14, Grad Norm: 0.0352 +[2025-02-23 09:08:45] (step=0655300) Train Loss: 0.3216, Train Steps/Sec: 15.16, Grad Norm: 0.0356 +[2025-02-23 09:08:51] (step=0655400) Train Loss: 0.3222, Train Steps/Sec: 17.32, Grad Norm: 0.0364 +[2025-02-23 09:08:57] (step=0655500) Train Loss: 0.3218, Train Steps/Sec: 15.77, Grad Norm: 0.0397 +[2025-02-23 09:09:04] (step=0655600) Train Loss: 0.3214, Train Steps/Sec: 14.54, Grad Norm: 0.0359 +[2025-02-23 09:09:10] (step=0655700) Train Loss: 0.3221, Train Steps/Sec: 17.38, Grad Norm: 0.0368 +[2025-02-23 09:09:16] (step=0655800) Train Loss: 0.3216, Train Steps/Sec: 17.34, Grad Norm: 0.0369 +[2025-02-23 09:09:21] (step=0655900) Train Loss: 0.3220, Train Steps/Sec: 17.33, Grad Norm: 0.0347 +[2025-02-23 09:09:27] (step=0656000) Train Loss: 0.3215, Train Steps/Sec: 17.35, Grad Norm: 0.0378 +[2025-02-23 09:09:33] (step=0656100) Train Loss: 0.3215, Train Steps/Sec: 17.36, Grad Norm: 0.0338 +[2025-02-23 09:09:39] (step=0656200) Train Loss: 0.3220, Train Steps/Sec: 17.31, Grad Norm: 0.0347 +[2025-02-23 09:09:44] (step=0656300) Train Loss: 0.3211, Train Steps/Sec: 17.32, Grad Norm: 0.0351 +[2025-02-23 09:09:50] (step=0656400) Train Loss: 0.3218, Train Steps/Sec: 17.31, Grad Norm: 0.0373 +[2025-02-23 09:09:56] (step=0656500) Train Loss: 0.3221, Train Steps/Sec: 17.32, Grad Norm: 0.0408 +[2025-02-23 09:10:02] (step=0656600) Train Loss: 0.3217, Train Steps/Sec: 17.42, Grad Norm: 0.0333 +[2025-02-23 09:10:07] (step=0656700) Train Loss: 0.3220, Train Steps/Sec: 17.38, Grad Norm: 0.0330 +[2025-02-23 09:10:15] (step=0656800) Train Loss: 0.3222, Train Steps/Sec: 14.01, Grad Norm: 0.0359 +[2025-02-23 09:10:21] (step=0656900) Train Loss: 0.3219, Train Steps/Sec: 15.80, Grad Norm: 0.0371 +[2025-02-23 09:10:27] (step=0657000) Train Loss: 0.3214, Train Steps/Sec: 15.90, Grad Norm: 0.0381 +[2025-02-23 09:10:34] (step=0657100) Train Loss: 0.3217, Train Steps/Sec: 14.11, Grad Norm: 0.0389 +[2025-02-23 09:10:41] (step=0657200) Train Loss: 0.3220, Train Steps/Sec: 14.65, Grad Norm: 0.0388 +[2025-02-23 09:10:47] (step=0657300) Train Loss: 0.3211, Train Steps/Sec: 17.38, Grad Norm: 0.0363 +[2025-02-23 09:10:53] (step=0657400) Train Loss: 0.3221, Train Steps/Sec: 16.67, Grad Norm: 0.0372 +[2025-02-23 09:10:59] (step=0657500) Train Loss: 0.3224, Train Steps/Sec: 16.60, Grad Norm: 0.0354 +[2025-02-23 09:11:05] (step=0657600) Train Loss: 0.3225, Train Steps/Sec: 17.39, Grad Norm: 0.0373 +[2025-02-23 09:11:10] (step=0657700) Train Loss: 0.3218, Train Steps/Sec: 17.37, Grad Norm: 0.0341 +[2025-02-23 09:11:16] (step=0657800) Train Loss: 0.3221, Train Steps/Sec: 17.37, Grad Norm: 0.0386 +[2025-02-23 09:11:22] (step=0657900) Train Loss: 0.3221, Train Steps/Sec: 17.20, Grad Norm: 0.0372 +[2025-02-23 09:11:28] (step=0658000) Train Loss: 0.3220, Train Steps/Sec: 16.95, Grad Norm: 0.0399 +[2025-02-23 09:11:35] (step=0658100) Train Loss: 0.3212, Train Steps/Sec: 14.57, Grad Norm: 0.0366 +[2025-02-23 09:11:41] (step=0658200) Train Loss: 0.3223, Train Steps/Sec: 17.25, Grad Norm: 0.0355 +[2025-02-23 09:11:46] (step=0658300) Train Loss: 0.3214, Train Steps/Sec: 17.32, Grad Norm: 0.0359 +[2025-02-23 09:11:52] (step=0658400) Train Loss: 0.3221, Train Steps/Sec: 17.40, Grad Norm: 0.0339 +[2025-02-23 09:11:58] (step=0658500) Train Loss: 0.3224, Train Steps/Sec: 17.43, Grad Norm: 0.0377 +[2025-02-23 09:12:04] (step=0658600) Train Loss: 0.3224, Train Steps/Sec: 17.40, Grad Norm: 0.0349 +[2025-02-23 09:12:09] (step=0658700) Train Loss: 0.3218, Train Steps/Sec: 17.34, Grad Norm: 0.0367 +[2025-02-23 09:12:15] (step=0658800) Train Loss: 0.3219, Train Steps/Sec: 16.58, Grad Norm: 0.0367 +[2025-02-23 09:12:22] (step=0658900) Train Loss: 0.3216, Train Steps/Sec: 15.86, Grad Norm: 0.0366 +[2025-02-23 09:12:28] (step=0659000) Train Loss: 0.3214, Train Steps/Sec: 15.21, Grad Norm: 0.0342 +[2025-02-23 09:12:35] (step=0659100) Train Loss: 0.3213, Train Steps/Sec: 14.02, Grad Norm: 0.0342 +[2025-02-23 09:12:42] (step=0659200) Train Loss: 0.3219, Train Steps/Sec: 15.18, Grad Norm: 0.0363 +[2025-02-23 09:12:49] (step=0659300) Train Loss: 0.3214, Train Steps/Sec: 14.45, Grad Norm: 0.0380 +[2025-02-23 09:12:55] (step=0659400) Train Loss: 0.3221, Train Steps/Sec: 16.49, Grad Norm: 0.0334 +[2025-02-23 09:13:01] (step=0659500) Train Loss: 0.3220, Train Steps/Sec: 16.45, Grad Norm: 0.0388 +[2025-02-23 09:13:07] (step=0659600) Train Loss: 0.3213, Train Steps/Sec: 17.17, Grad Norm: 0.0361 +[2025-02-23 09:13:13] (step=0659700) Train Loss: 0.3213, Train Steps/Sec: 17.17, Grad Norm: 0.0375 +[2025-02-23 09:13:19] (step=0659800) Train Loss: 0.3217, Train Steps/Sec: 17.17, Grad Norm: 0.0348 +[2025-02-23 09:13:24] (step=0659900) Train Loss: 0.3225, Train Steps/Sec: 17.24, Grad Norm: 0.0336 +[2025-02-23 09:13:30] (step=0660000) Train Loss: 0.3223, Train Steps/Sec: 17.27, Grad Norm: 0.0359 +[2025-02-23 09:13:36] (step=0660100) Train Loss: 0.3217, Train Steps/Sec: 17.31, Grad Norm: 0.0388 +[2025-02-23 09:13:42] (step=0660200) Train Loss: 0.3216, Train Steps/Sec: 17.33, Grad Norm: 0.0353 +[2025-02-23 09:13:47] (step=0660300) Train Loss: 0.3216, Train Steps/Sec: 17.30, Grad Norm: 0.0390 +[2025-02-23 09:13:53] (step=0660400) Train Loss: 0.3223, Train Steps/Sec: 17.32, Grad Norm: 0.0365 +[2025-02-23 09:13:59] (step=0660500) Train Loss: 0.3214, Train Steps/Sec: 17.19, Grad Norm: 0.0383 +[2025-02-23 09:14:06] (step=0660600) Train Loss: 0.3222, Train Steps/Sec: 14.43, Grad Norm: 0.0401 +[2025-02-23 09:14:12] (step=0660700) Train Loss: 0.3218, Train Steps/Sec: 17.00, Grad Norm: 0.0379 +[2025-02-23 09:14:18] (step=0660800) Train Loss: 0.3215, Train Steps/Sec: 16.47, Grad Norm: 0.0383 +[2025-02-23 09:14:25] (step=0660900) Train Loss: 0.3216, Train Steps/Sec: 14.60, Grad Norm: 0.0422 +[2025-02-23 09:14:31] (step=0661000) Train Loss: 0.3214, Train Steps/Sec: 15.41, Grad Norm: 0.0358 +[2025-02-23 09:14:38] (step=0661100) Train Loss: 0.3217, Train Steps/Sec: 14.05, Grad Norm: 0.0358 +[2025-02-23 09:14:44] (step=0661200) Train Loss: 0.3222, Train Steps/Sec: 16.53, Grad Norm: 0.0383 +[2025-02-23 09:14:50] (step=0661300) Train Loss: 0.3219, Train Steps/Sec: 16.64, Grad Norm: 0.0371 +[2025-02-23 09:14:56] (step=0661400) Train Loss: 0.3220, Train Steps/Sec: 17.39, Grad Norm: 0.0373 +[2025-02-23 09:15:02] (step=0661500) Train Loss: 0.3209, Train Steps/Sec: 16.57, Grad Norm: 0.0333 +[2025-02-23 09:15:08] (step=0661600) Train Loss: 0.3222, Train Steps/Sec: 17.31, Grad Norm: 0.0374 +[2025-02-23 09:15:14] (step=0661700) Train Loss: 0.3217, Train Steps/Sec: 17.30, Grad Norm: 0.0346 +[2025-02-23 09:15:21] (step=0661800) Train Loss: 0.3222, Train Steps/Sec: 14.36, Grad Norm: 0.0355 +[2025-02-23 09:15:27] (step=0661900) Train Loss: 0.3220, Train Steps/Sec: 17.33, Grad Norm: 0.0385 +[2025-02-23 09:15:32] (step=0662000) Train Loss: 0.3215, Train Steps/Sec: 17.40, Grad Norm: 0.0335 +[2025-02-23 09:15:38] (step=0662100) Train Loss: 0.3217, Train Steps/Sec: 17.39, Grad Norm: 0.0396 +[2025-02-23 09:15:44] (step=0662200) Train Loss: 0.3219, Train Steps/Sec: 17.42, Grad Norm: 0.0367 +[2025-02-23 09:15:50] (step=0662300) Train Loss: 0.3212, Train Steps/Sec: 17.38, Grad Norm: 0.0366 +[2025-02-23 09:15:55] (step=0662400) Train Loss: 0.3224, Train Steps/Sec: 17.42, Grad Norm: 0.0374 +[2025-02-23 09:16:01] (step=0662500) Train Loss: 0.3219, Train Steps/Sec: 17.26, Grad Norm: 0.0363 +[2025-02-23 09:16:07] (step=0662600) Train Loss: 0.3219, Train Steps/Sec: 17.31, Grad Norm: 0.0353 +[2025-02-23 09:16:13] (step=0662700) Train Loss: 0.3221, Train Steps/Sec: 17.26, Grad Norm: 0.0367 +[2025-02-23 09:16:19] (step=0662800) Train Loss: 0.3223, Train Steps/Sec: 16.57, Grad Norm: 0.0377 +[2025-02-23 09:16:26] (step=0662900) Train Loss: 0.3217, Train Steps/Sec: 14.06, Grad Norm: 0.0341 +[2025-02-23 09:16:33] (step=0663000) Train Loss: 0.3224, Train Steps/Sec: 14.04, Grad Norm: 0.0336 +[2025-02-23 09:16:40] (step=0663100) Train Loss: 0.3217, Train Steps/Sec: 13.36, Grad Norm: 0.0378 +[2025-02-23 09:16:47] (step=0663200) Train Loss: 0.3218, Train Steps/Sec: 16.46, Grad Norm: 0.0387 +[2025-02-23 09:16:53] (step=0663300) Train Loss: 0.3216, Train Steps/Sec: 16.53, Grad Norm: 0.0364 +[2025-02-23 09:16:58] (step=0663400) Train Loss: 0.3217, Train Steps/Sec: 17.23, Grad Norm: 0.0364 +[2025-02-23 09:17:04] (step=0663500) Train Loss: 0.3220, Train Steps/Sec: 16.43, Grad Norm: 0.0383 +[2025-02-23 09:17:10] (step=0663600) Train Loss: 0.3216, Train Steps/Sec: 17.23, Grad Norm: 0.0362 +[2025-02-23 09:17:16] (step=0663700) Train Loss: 0.3216, Train Steps/Sec: 17.30, Grad Norm: 0.0360 +[2025-02-23 09:17:22] (step=0663800) Train Loss: 0.3214, Train Steps/Sec: 17.28, Grad Norm: 0.0407 +[2025-02-23 09:17:28] (step=0663900) Train Loss: 0.3218, Train Steps/Sec: 17.30, Grad Norm: 0.0352 +[2025-02-23 09:17:33] (step=0664000) Train Loss: 0.3222, Train Steps/Sec: 17.30, Grad Norm: 0.0382 +[2025-02-23 09:17:39] (step=0664100) Train Loss: 0.3219, Train Steps/Sec: 17.32, Grad Norm: 0.0362 +[2025-02-23 09:17:45] (step=0664200) Train Loss: 0.3216, Train Steps/Sec: 17.34, Grad Norm: 0.0365 +[2025-02-23 09:17:52] (step=0664300) Train Loss: 0.3220, Train Steps/Sec: 14.55, Grad Norm: 0.0334 +[2025-02-23 09:17:58] (step=0664400) Train Loss: 0.3216, Train Steps/Sec: 17.26, Grad Norm: 0.0347 +[2025-02-23 09:18:03] (step=0664500) Train Loss: 0.3215, Train Steps/Sec: 17.27, Grad Norm: 0.0396 +[2025-02-23 09:18:09] (step=0664600) Train Loss: 0.3215, Train Steps/Sec: 17.29, Grad Norm: 0.0397 +[2025-02-23 09:18:15] (step=0664700) Train Loss: 0.3220, Train Steps/Sec: 17.27, Grad Norm: 0.0361 +[2025-02-23 09:18:21] (step=0664800) Train Loss: 0.3216, Train Steps/Sec: 16.54, Grad Norm: 0.0352 +[2025-02-23 09:18:28] (step=0664900) Train Loss: 0.3223, Train Steps/Sec: 13.72, Grad Norm: 0.0421 +[2025-02-23 09:18:35] (step=0665000) Train Loss: 0.3216, Train Steps/Sec: 14.70, Grad Norm: 0.0382 +[2025-02-23 09:18:41] (step=0665100) Train Loss: 0.3220, Train Steps/Sec: 15.77, Grad Norm: 0.0352 +[2025-02-23 09:18:48] (step=0665200) Train Loss: 0.3218, Train Steps/Sec: 15.86, Grad Norm: 0.0339 +[2025-02-23 09:18:54] (step=0665300) Train Loss: 0.3225, Train Steps/Sec: 17.35, Grad Norm: 0.0403 +[2025-02-23 09:18:59] (step=0665400) Train Loss: 0.3219, Train Steps/Sec: 17.37, Grad Norm: 0.0383 +[2025-02-23 09:19:05] (step=0665500) Train Loss: 0.3216, Train Steps/Sec: 16.45, Grad Norm: 0.0395 +[2025-02-23 09:19:12] (step=0665600) Train Loss: 0.3217, Train Steps/Sec: 14.40, Grad Norm: 0.0370 +[2025-02-23 09:19:18] (step=0665700) Train Loss: 0.3220, Train Steps/Sec: 17.05, Grad Norm: 0.0374 +[2025-02-23 09:19:24] (step=0665800) Train Loss: 0.3220, Train Steps/Sec: 17.08, Grad Norm: 0.0365 +[2025-02-23 09:19:30] (step=0665900) Train Loss: 0.3221, Train Steps/Sec: 17.09, Grad Norm: 0.0375 +[2025-02-23 09:19:36] (step=0666000) Train Loss: 0.3221, Train Steps/Sec: 17.11, Grad Norm: 0.0366 +[2025-02-23 09:19:42] (step=0666100) Train Loss: 0.3216, Train Steps/Sec: 17.09, Grad Norm: 0.0362 +[2025-02-23 09:19:47] (step=0666200) Train Loss: 0.3216, Train Steps/Sec: 17.10, Grad Norm: 0.0367 +[2025-02-23 09:19:53] (step=0666300) Train Loss: 0.3218, Train Steps/Sec: 17.08, Grad Norm: 0.0347 +[2025-02-23 09:19:59] (step=0666400) Train Loss: 0.3216, Train Steps/Sec: 17.07, Grad Norm: 0.0386 +[2025-02-23 09:20:05] (step=0666500) Train Loss: 0.3218, Train Steps/Sec: 17.06, Grad Norm: 0.0384 +[2025-02-23 09:20:11] (step=0666600) Train Loss: 0.3221, Train Steps/Sec: 17.04, Grad Norm: 0.0371 +[2025-02-23 09:20:17] (step=0666700) Train Loss: 0.3220, Train Steps/Sec: 17.11, Grad Norm: 0.0376 +[2025-02-23 09:20:25] (step=0666800) Train Loss: 0.3218, Train Steps/Sec: 12.58, Grad Norm: 0.0364 +[2025-02-23 09:20:32] (step=0666900) Train Loss: 0.3219, Train Steps/Sec: 13.85, Grad Norm: 0.0386 +[2025-02-23 09:20:38] (step=0667000) Train Loss: 0.3216, Train Steps/Sec: 16.36, Grad Norm: 0.0387 +[2025-02-23 09:20:45] (step=0667100) Train Loss: 0.3218, Train Steps/Sec: 15.03, Grad Norm: 0.0408 +[2025-02-23 09:20:51] (step=0667200) Train Loss: 0.3217, Train Steps/Sec: 16.34, Grad Norm: 0.0338 +[2025-02-23 09:20:57] (step=0667300) Train Loss: 0.3215, Train Steps/Sec: 17.03, Grad Norm: 0.0375 +[2025-02-23 09:21:03] (step=0667400) Train Loss: 0.3225, Train Steps/Sec: 16.96, Grad Norm: 0.0395 +[2025-02-23 09:21:09] (step=0667500) Train Loss: 0.3220, Train Steps/Sec: 16.27, Grad Norm: 0.0389 +[2025-02-23 09:21:15] (step=0667600) Train Loss: 0.3216, Train Steps/Sec: 17.26, Grad Norm: 0.0337 +[2025-02-23 09:21:20] (step=0667700) Train Loss: 0.3217, Train Steps/Sec: 17.22, Grad Norm: 0.0356 +[2025-02-23 09:21:26] (step=0667800) Train Loss: 0.3216, Train Steps/Sec: 17.19, Grad Norm: 0.0353 +[2025-02-23 09:21:32] (step=0667900) Train Loss: 0.3219, Train Steps/Sec: 17.13, Grad Norm: 0.0353 +[2025-02-23 09:21:38] (step=0668000) Train Loss: 0.3217, Train Steps/Sec: 17.21, Grad Norm: 0.0351 +[2025-02-23 09:21:45] (step=0668100) Train Loss: 0.3227, Train Steps/Sec: 14.45, Grad Norm: 0.0357 +[2025-02-23 09:21:50] (step=0668200) Train Loss: 0.3220, Train Steps/Sec: 17.31, Grad Norm: 0.0379 +[2025-02-23 09:21:56] (step=0668300) Train Loss: 0.3219, Train Steps/Sec: 17.45, Grad Norm: 0.0331 +[2025-02-23 09:22:02] (step=0668400) Train Loss: 0.3219, Train Steps/Sec: 17.36, Grad Norm: 0.0376 +[2025-02-23 09:22:08] (step=0668500) Train Loss: 0.3219, Train Steps/Sec: 17.32, Grad Norm: 0.0431 +[2025-02-23 09:22:14] (step=0668600) Train Loss: 0.3218, Train Steps/Sec: 17.40, Grad Norm: 0.0355 +[2025-02-23 09:22:20] (step=0668700) Train Loss: 0.3225, Train Steps/Sec: 16.69, Grad Norm: 0.0365 +[2025-02-23 09:22:26] (step=0668800) Train Loss: 0.3215, Train Steps/Sec: 15.33, Grad Norm: 0.0361 +[2025-02-23 09:22:33] (step=0668900) Train Loss: 0.3220, Train Steps/Sec: 14.07, Grad Norm: 0.0358 +[2025-02-23 09:22:39] (step=0669000) Train Loss: 0.3216, Train Steps/Sec: 15.92, Grad Norm: 0.0424 +[2025-02-23 09:22:46] (step=0669100) Train Loss: 0.3220, Train Steps/Sec: 15.92, Grad Norm: 0.0347 +[2025-02-23 09:22:52] (step=0669200) Train Loss: 0.3216, Train Steps/Sec: 15.93, Grad Norm: 0.0359 +[2025-02-23 09:22:59] (step=0669300) Train Loss: 0.3216, Train Steps/Sec: 14.49, Grad Norm: 0.0360 +[2025-02-23 09:23:05] (step=0669400) Train Loss: 0.3221, Train Steps/Sec: 17.23, Grad Norm: 0.0349 +[2025-02-23 09:23:11] (step=0669500) Train Loss: 0.3215, Train Steps/Sec: 17.20, Grad Norm: 0.0391 +[2025-02-23 09:23:17] (step=0669600) Train Loss: 0.3214, Train Steps/Sec: 16.40, Grad Norm: 0.0361 +[2025-02-23 09:23:22] (step=0669700) Train Loss: 0.3221, Train Steps/Sec: 17.17, Grad Norm: 0.0339 +[2025-02-23 09:23:28] (step=0669800) Train Loss: 0.3218, Train Steps/Sec: 17.09, Grad Norm: 0.0378 +[2025-02-23 09:23:34] (step=0669900) Train Loss: 0.3218, Train Steps/Sec: 17.15, Grad Norm: 0.0379 +[2025-02-23 09:23:40] (step=0670000) Train Loss: 0.3219, Train Steps/Sec: 17.21, Grad Norm: 0.0341 +[2025-02-23 09:23:46] (step=0670100) Train Loss: 0.3220, Train Steps/Sec: 17.22, Grad Norm: 0.0354 +[2025-02-23 09:23:52] (step=0670200) Train Loss: 0.3212, Train Steps/Sec: 17.25, Grad Norm: 0.0350 +[2025-02-23 09:23:57] (step=0670300) Train Loss: 0.3217, Train Steps/Sec: 17.25, Grad Norm: 0.0361 +[2025-02-23 09:24:03] (step=0670400) Train Loss: 0.3219, Train Steps/Sec: 17.21, Grad Norm: 0.0381 +[2025-02-23 09:24:09] (step=0670500) Train Loss: 0.3222, Train Steps/Sec: 17.10, Grad Norm: 0.0346 +[2025-02-23 09:24:16] (step=0670600) Train Loss: 0.3221, Train Steps/Sec: 14.34, Grad Norm: 0.0355 +[2025-02-23 09:24:23] (step=0670700) Train Loss: 0.3216, Train Steps/Sec: 14.65, Grad Norm: 0.0348 +[2025-02-23 09:24:29] (step=0670800) Train Loss: 0.3221, Train Steps/Sec: 15.24, Grad Norm: 0.0366 +[2025-02-23 09:24:36] (step=0670900) Train Loss: 0.3217, Train Steps/Sec: 15.20, Grad Norm: 0.0372 +[2025-02-23 09:24:42] (step=0671000) Train Loss: 0.3216, Train Steps/Sec: 16.01, Grad Norm: 0.0384 +[2025-02-23 09:24:49] (step=0671100) Train Loss: 0.3225, Train Steps/Sec: 15.77, Grad Norm: 0.0388 +[2025-02-23 09:24:55] (step=0671200) Train Loss: 0.3220, Train Steps/Sec: 16.46, Grad Norm: 0.0368 +[2025-02-23 09:25:00] (step=0671300) Train Loss: 0.3214, Train Steps/Sec: 17.08, Grad Norm: 0.0373 +[2025-02-23 09:25:06] (step=0671400) Train Loss: 0.3217, Train Steps/Sec: 17.10, Grad Norm: 0.0333 +[2025-02-23 09:25:12] (step=0671500) Train Loss: 0.3219, Train Steps/Sec: 17.28, Grad Norm: 0.0398 +[2025-02-23 09:25:18] (step=0671600) Train Loss: 0.3214, Train Steps/Sec: 16.47, Grad Norm: 0.0356 +[2025-02-23 09:25:24] (step=0671700) Train Loss: 0.3218, Train Steps/Sec: 17.20, Grad Norm: 0.0431 +[2025-02-23 09:25:31] (step=0671800) Train Loss: 0.3217, Train Steps/Sec: 14.20, Grad Norm: 0.0383 +[2025-02-23 09:25:37] (step=0671900) Train Loss: 0.3221, Train Steps/Sec: 17.30, Grad Norm: 0.0366 +[2025-02-23 09:25:43] (step=0672000) Train Loss: 0.3217, Train Steps/Sec: 17.27, Grad Norm: 0.0374 +[2025-02-23 09:25:48] (step=0672100) Train Loss: 0.3221, Train Steps/Sec: 17.30, Grad Norm: 0.0380 +[2025-02-23 09:25:54] (step=0672200) Train Loss: 0.3216, Train Steps/Sec: 17.28, Grad Norm: 0.0351 +[2025-02-23 09:26:00] (step=0672300) Train Loss: 0.3213, Train Steps/Sec: 17.23, Grad Norm: 0.0361 +[2025-02-23 09:26:06] (step=0672400) Train Loss: 0.3216, Train Steps/Sec: 17.32, Grad Norm: 0.0345 +[2025-02-23 09:26:12] (step=0672500) Train Loss: 0.3220, Train Steps/Sec: 17.39, Grad Norm: 0.0350 +[2025-02-23 09:26:17] (step=0672600) Train Loss: 0.3221, Train Steps/Sec: 17.41, Grad Norm: 0.0365 +[2025-02-23 09:26:24] (step=0672700) Train Loss: 0.3216, Train Steps/Sec: 14.60, Grad Norm: 0.0394 +[2025-02-23 09:26:31] (step=0672800) Train Loss: 0.3216, Train Steps/Sec: 15.12, Grad Norm: 0.0421 +[2025-02-23 09:26:37] (step=0672900) Train Loss: 0.3218, Train Steps/Sec: 15.65, Grad Norm: 0.0335 +[2025-02-23 09:26:44] (step=0673000) Train Loss: 0.3212, Train Steps/Sec: 15.29, Grad Norm: 0.0412 +[2025-02-23 09:26:51] (step=0673100) Train Loss: 0.3219, Train Steps/Sec: 13.32, Grad Norm: 0.0352 +[2025-02-23 09:26:57] (step=0673200) Train Loss: 0.3223, Train Steps/Sec: 16.58, Grad Norm: 0.0364 +[2025-02-23 09:27:03] (step=0673300) Train Loss: 0.3222, Train Steps/Sec: 17.37, Grad Norm: 0.0358 +[2025-02-23 09:27:09] (step=0673400) Train Loss: 0.3219, Train Steps/Sec: 17.43, Grad Norm: 0.0351 +[2025-02-23 09:27:14] (step=0673500) Train Loss: 0.3221, Train Steps/Sec: 17.42, Grad Norm: 0.0341 +[2025-02-23 09:27:20] (step=0673600) Train Loss: 0.3214, Train Steps/Sec: 16.59, Grad Norm: 0.0381 +[2025-02-23 09:27:26] (step=0673700) Train Loss: 0.3218, Train Steps/Sec: 17.33, Grad Norm: 0.0360 +[2025-02-23 09:27:32] (step=0673800) Train Loss: 0.3211, Train Steps/Sec: 17.35, Grad Norm: 0.0418 +[2025-02-23 09:27:38] (step=0673900) Train Loss: 0.3218, Train Steps/Sec: 17.35, Grad Norm: 0.0376 +[2025-02-23 09:27:44] (step=0674000) Train Loss: 0.3221, Train Steps/Sec: 17.40, Grad Norm: 0.0348 +[2025-02-23 09:27:49] (step=0674100) Train Loss: 0.3216, Train Steps/Sec: 17.46, Grad Norm: 0.0352 +[2025-02-23 09:27:55] (step=0674200) Train Loss: 0.3219, Train Steps/Sec: 17.40, Grad Norm: 0.0378 +[2025-02-23 09:28:02] (step=0674300) Train Loss: 0.3213, Train Steps/Sec: 14.49, Grad Norm: 0.0382 +[2025-02-23 09:28:08] (step=0674400) Train Loss: 0.3208, Train Steps/Sec: 17.30, Grad Norm: 0.0360 +[2025-02-23 09:28:13] (step=0674500) Train Loss: 0.3218, Train Steps/Sec: 17.34, Grad Norm: 0.0382 +[2025-02-23 09:28:20] (step=0674600) Train Loss: 0.3215, Train Steps/Sec: 14.63, Grad Norm: 0.0370 +[2025-02-23 09:28:27] (step=0674700) Train Loss: 0.3214, Train Steps/Sec: 15.15, Grad Norm: 0.0352 +[2025-02-23 09:28:33] (step=0674800) Train Loss: 0.3218, Train Steps/Sec: 17.18, Grad Norm: 0.0358 +[2025-02-23 09:28:39] (step=0674900) Train Loss: 0.3221, Train Steps/Sec: 15.08, Grad Norm: 0.0382 +[2025-02-23 09:28:46] (step=0675000) Train Loss: 0.3218, Train Steps/Sec: 15.60, Grad Norm: 0.0352 +[2025-02-23 09:28:52] (step=0675100) Train Loss: 0.3219, Train Steps/Sec: 16.40, Grad Norm: 0.0351 +[2025-02-23 09:28:58] (step=0675200) Train Loss: 0.3217, Train Steps/Sec: 15.67, Grad Norm: 0.0362 +[2025-02-23 09:29:04] (step=0675300) Train Loss: 0.3221, Train Steps/Sec: 17.00, Grad Norm: 0.0391 +[2025-02-23 09:29:10] (step=0675400) Train Loss: 0.3223, Train Steps/Sec: 16.88, Grad Norm: 0.0400 +[2025-02-23 09:29:16] (step=0675500) Train Loss: 0.3219, Train Steps/Sec: 16.94, Grad Norm: 0.0381 +[2025-02-23 09:29:23] (step=0675600) Train Loss: 0.3220, Train Steps/Sec: 13.72, Grad Norm: 0.0376 +[2025-02-23 09:29:29] (step=0675700) Train Loss: 0.3216, Train Steps/Sec: 17.30, Grad Norm: 0.0343 +[2025-02-23 09:29:35] (step=0675800) Train Loss: 0.3218, Train Steps/Sec: 17.32, Grad Norm: 0.0383 +[2025-02-23 09:29:41] (step=0675900) Train Loss: 0.3223, Train Steps/Sec: 17.27, Grad Norm: 0.0365 +[2025-02-23 09:29:46] (step=0676000) Train Loss: 0.3217, Train Steps/Sec: 17.28, Grad Norm: 0.0325 +[2025-02-23 09:29:52] (step=0676100) Train Loss: 0.3217, Train Steps/Sec: 17.34, Grad Norm: 0.0382 +[2025-02-23 09:29:58] (step=0676200) Train Loss: 0.3218, Train Steps/Sec: 17.36, Grad Norm: 0.0337 +[2025-02-23 09:30:04] (step=0676300) Train Loss: 0.3213, Train Steps/Sec: 17.29, Grad Norm: 0.0389 +[2025-02-23 09:30:09] (step=0676400) Train Loss: 0.3218, Train Steps/Sec: 17.29, Grad Norm: 0.0364 +[2025-02-23 09:30:16] (step=0676500) Train Loss: 0.3216, Train Steps/Sec: 16.50, Grad Norm: 0.0364 +[2025-02-23 09:30:22] (step=0676600) Train Loss: 0.3220, Train Steps/Sec: 14.68, Grad Norm: 0.0409 +[2025-02-23 09:30:29] (step=0676700) Train Loss: 0.3221, Train Steps/Sec: 15.74, Grad Norm: 0.0409 +[2025-02-23 09:30:36] (step=0676800) Train Loss: 0.3221, Train Steps/Sec: 14.20, Grad Norm: 0.0377 +[2025-02-23 09:30:42] (step=0676900) Train Loss: 0.3217, Train Steps/Sec: 15.49, Grad Norm: 0.0365 +[2025-02-23 09:30:49] (step=0677000) Train Loss: 0.3221, Train Steps/Sec: 15.72, Grad Norm: 0.0357 +[2025-02-23 09:30:55] (step=0677100) Train Loss: 0.3217, Train Steps/Sec: 16.56, Grad Norm: 0.0343 +[2025-02-23 09:31:01] (step=0677200) Train Loss: 0.3221, Train Steps/Sec: 15.74, Grad Norm: 0.0398 +[2025-02-23 09:31:07] (step=0677300) Train Loss: 0.3216, Train Steps/Sec: 17.23, Grad Norm: 0.0337 +[2025-02-23 09:31:13] (step=0677400) Train Loss: 0.3219, Train Steps/Sec: 17.17, Grad Norm: 0.0356 +[2025-02-23 09:31:18] (step=0677500) Train Loss: 0.3218, Train Steps/Sec: 17.13, Grad Norm: 0.0366 +[2025-02-23 09:31:25] (step=0677600) Train Loss: 0.3216, Train Steps/Sec: 16.38, Grad Norm: 0.0370 +[2025-02-23 09:31:30] (step=0677700) Train Loss: 0.3222, Train Steps/Sec: 17.20, Grad Norm: 0.0358 +[2025-02-23 09:31:36] (step=0677800) Train Loss: 0.3216, Train Steps/Sec: 17.18, Grad Norm: 0.0352 +[2025-02-23 09:31:42] (step=0677900) Train Loss: 0.3220, Train Steps/Sec: 17.24, Grad Norm: 0.0386 +[2025-02-23 09:31:48] (step=0678000) Train Loss: 0.3214, Train Steps/Sec: 17.20, Grad Norm: 0.0382 +[2025-02-23 09:31:55] (step=0678100) Train Loss: 0.3221, Train Steps/Sec: 14.37, Grad Norm: 0.0391 +[2025-02-23 09:32:01] (step=0678200) Train Loss: 0.3218, Train Steps/Sec: 17.04, Grad Norm: 0.0370 +[2025-02-23 09:32:07] (step=0678300) Train Loss: 0.3220, Train Steps/Sec: 17.07, Grad Norm: 0.0354 +[2025-02-23 09:32:12] (step=0678400) Train Loss: 0.3216, Train Steps/Sec: 17.08, Grad Norm: 0.0361 +[2025-02-23 09:32:19] (step=0678500) Train Loss: 0.3220, Train Steps/Sec: 14.52, Grad Norm: 0.0358 +[2025-02-23 09:32:26] (step=0678600) Train Loss: 0.3218, Train Steps/Sec: 15.67, Grad Norm: 0.0419 +[2025-02-23 09:32:31] (step=0678700) Train Loss: 0.3221, Train Steps/Sec: 17.03, Grad Norm: 0.0334 +[2025-02-23 09:32:38] (step=0678800) Train Loss: 0.3216, Train Steps/Sec: 16.34, Grad Norm: 0.0385 +[2025-02-23 09:32:44] (step=0678900) Train Loss: 0.3218, Train Steps/Sec: 15.54, Grad Norm: 0.0361 +[2025-02-23 09:32:50] (step=0679000) Train Loss: 0.3222, Train Steps/Sec: 15.81, Grad Norm: 0.0372 +[2025-02-23 09:32:56] (step=0679100) Train Loss: 0.3216, Train Steps/Sec: 16.54, Grad Norm: 0.0385 +[2025-02-23 09:33:03] (step=0679200) Train Loss: 0.3216, Train Steps/Sec: 15.78, Grad Norm: 0.0351 +[2025-02-23 09:33:10] (step=0679300) Train Loss: 0.3222, Train Steps/Sec: 14.38, Grad Norm: 0.0352 +[2025-02-23 09:33:16] (step=0679400) Train Loss: 0.3220, Train Steps/Sec: 17.24, Grad Norm: 0.0357 +[2025-02-23 09:33:21] (step=0679500) Train Loss: 0.3218, Train Steps/Sec: 17.30, Grad Norm: 0.0358 +[2025-02-23 09:33:27] (step=0679600) Train Loss: 0.3213, Train Steps/Sec: 16.44, Grad Norm: 0.0394 +[2025-02-23 09:33:33] (step=0679700) Train Loss: 0.3219, Train Steps/Sec: 17.28, Grad Norm: 0.0382 +[2025-02-23 09:33:39] (step=0679800) Train Loss: 0.3212, Train Steps/Sec: 17.40, Grad Norm: 0.0347 +[2025-02-23 09:33:45] (step=0679900) Train Loss: 0.3222, Train Steps/Sec: 17.42, Grad Norm: 0.0389 +[2025-02-23 09:33:50] (step=0680000) Train Loss: 0.3216, Train Steps/Sec: 17.34, Grad Norm: 0.0350 +[2025-02-23 09:33:56] (step=0680100) Train Loss: 0.3218, Train Steps/Sec: 17.38, Grad Norm: 0.0369 +[2025-02-23 09:34:02] (step=0680200) Train Loss: 0.3221, Train Steps/Sec: 17.32, Grad Norm: 0.0336 +[2025-02-23 09:34:08] (step=0680300) Train Loss: 0.3216, Train Steps/Sec: 17.29, Grad Norm: 0.0362 +[2025-02-23 09:34:14] (step=0680400) Train Loss: 0.3217, Train Steps/Sec: 16.53, Grad Norm: 0.0360 +[2025-02-23 09:34:21] (step=0680500) Train Loss: 0.3226, Train Steps/Sec: 14.67, Grad Norm: 0.0368 +[2025-02-23 09:34:28] (step=0680600) Train Loss: 0.3218, Train Steps/Sec: 13.91, Grad Norm: 0.0350 +[2025-02-23 09:34:34] (step=0680700) Train Loss: 0.3216, Train Steps/Sec: 16.62, Grad Norm: 0.0332 +[2025-02-23 09:34:40] (step=0680800) Train Loss: 0.3221, Train Steps/Sec: 17.35, Grad Norm: 0.0368 +[2025-02-23 09:34:46] (step=0680900) Train Loss: 0.3224, Train Steps/Sec: 15.80, Grad Norm: 0.0394 +[2025-02-23 09:34:52] (step=0681000) Train Loss: 0.3215, Train Steps/Sec: 15.97, Grad Norm: 0.0359 +[2025-02-23 09:34:58] (step=0681100) Train Loss: 0.3221, Train Steps/Sec: 16.54, Grad Norm: 0.0381 +[2025-02-23 09:35:05] (step=0681200) Train Loss: 0.3216, Train Steps/Sec: 15.75, Grad Norm: 0.0317 +[2025-02-23 09:35:10] (step=0681300) Train Loss: 0.3221, Train Steps/Sec: 16.99, Grad Norm: 0.0341 +[2025-02-23 09:35:16] (step=0681400) Train Loss: 0.3222, Train Steps/Sec: 17.26, Grad Norm: 0.0356 +[2025-02-23 09:35:22] (step=0681500) Train Loss: 0.3219, Train Steps/Sec: 17.36, Grad Norm: 0.0355 +[2025-02-23 09:35:28] (step=0681600) Train Loss: 0.3218, Train Steps/Sec: 16.56, Grad Norm: 0.0357 +[2025-02-23 09:35:34] (step=0681700) Train Loss: 0.3216, Train Steps/Sec: 17.23, Grad Norm: 0.0372 +[2025-02-23 09:35:41] (step=0681800) Train Loss: 0.3216, Train Steps/Sec: 14.50, Grad Norm: 0.0354 +[2025-02-23 09:35:47] (step=0681900) Train Loss: 0.3218, Train Steps/Sec: 17.22, Grad Norm: 0.0352 +[2025-02-23 09:35:52] (step=0682000) Train Loss: 0.3217, Train Steps/Sec: 17.35, Grad Norm: 0.0356 +[2025-02-23 09:35:58] (step=0682100) Train Loss: 0.3216, Train Steps/Sec: 17.29, Grad Norm: 0.0353 +[2025-02-23 09:36:04] (step=0682200) Train Loss: 0.3222, Train Steps/Sec: 17.26, Grad Norm: 0.0416 +[2025-02-23 09:36:10] (step=0682300) Train Loss: 0.3222, Train Steps/Sec: 17.37, Grad Norm: 0.0346 +[2025-02-23 09:36:17] (step=0682400) Train Loss: 0.3218, Train Steps/Sec: 14.64, Grad Norm: 0.0377 +[2025-02-23 09:36:23] (step=0682500) Train Loss: 0.3219, Train Steps/Sec: 15.24, Grad Norm: 0.0349 +[2025-02-23 09:36:29] (step=0682600) Train Loss: 0.3219, Train Steps/Sec: 17.29, Grad Norm: 0.0345 +[2025-02-23 09:36:35] (step=0682700) Train Loss: 0.3219, Train Steps/Sec: 16.60, Grad Norm: 0.0421 +[2025-02-23 09:36:41] (step=0682800) Train Loss: 0.3222, Train Steps/Sec: 17.35, Grad Norm: 0.0411 +[2025-02-23 09:36:47] (step=0682900) Train Loss: 0.3210, Train Steps/Sec: 15.84, Grad Norm: 0.0362 +[2025-02-23 09:36:53] (step=0683000) Train Loss: 0.3219, Train Steps/Sec: 16.63, Grad Norm: 0.0374 +[2025-02-23 09:37:00] (step=0683100) Train Loss: 0.3219, Train Steps/Sec: 13.38, Grad Norm: 0.0400 +[2025-02-23 09:37:07] (step=0683200) Train Loss: 0.3215, Train Steps/Sec: 15.72, Grad Norm: 0.0381 +[2025-02-23 09:37:13] (step=0683300) Train Loss: 0.3217, Train Steps/Sec: 17.26, Grad Norm: 0.0388 +[2025-02-23 09:37:18] (step=0683400) Train Loss: 0.3215, Train Steps/Sec: 17.24, Grad Norm: 0.0390 +[2025-02-23 09:37:24] (step=0683500) Train Loss: 0.3217, Train Steps/Sec: 17.21, Grad Norm: 0.0389 +[2025-02-23 09:37:30] (step=0683600) Train Loss: 0.3221, Train Steps/Sec: 16.38, Grad Norm: 0.0377 +[2025-02-23 09:37:36] (step=0683700) Train Loss: 0.3216, Train Steps/Sec: 17.21, Grad Norm: 0.0357 +[2025-02-23 09:37:42] (step=0683800) Train Loss: 0.3219, Train Steps/Sec: 17.22, Grad Norm: 0.0341 +[2025-02-23 09:37:48] (step=0683900) Train Loss: 0.3213, Train Steps/Sec: 17.18, Grad Norm: 0.0350 +[2025-02-23 09:37:54] (step=0684000) Train Loss: 0.3219, Train Steps/Sec: 17.17, Grad Norm: 0.0367 +[2025-02-23 09:37:59] (step=0684100) Train Loss: 0.3222, Train Steps/Sec: 17.17, Grad Norm: 0.0367 +[2025-02-23 09:38:05] (step=0684200) Train Loss: 0.3222, Train Steps/Sec: 17.18, Grad Norm: 0.0382 +[2025-02-23 09:38:13] (step=0684300) Train Loss: 0.3220, Train Steps/Sec: 13.05, Grad Norm: 0.0386 +[2025-02-23 09:38:20] (step=0684400) Train Loss: 0.3216, Train Steps/Sec: 14.46, Grad Norm: 0.0333 +[2025-02-23 09:38:26] (step=0684500) Train Loss: 0.3220, Train Steps/Sec: 17.12, Grad Norm: 0.0396 +[2025-02-23 09:38:32] (step=0684600) Train Loss: 0.3221, Train Steps/Sec: 16.45, Grad Norm: 0.0382 +[2025-02-23 09:38:38] (step=0684700) Train Loss: 0.3216, Train Steps/Sec: 17.20, Grad Norm: 0.0349 +[2025-02-23 09:38:43] (step=0684800) Train Loss: 0.3212, Train Steps/Sec: 17.12, Grad Norm: 0.0354 +[2025-02-23 09:38:50] (step=0684900) Train Loss: 0.3223, Train Steps/Sec: 15.58, Grad Norm: 0.0372 +[2025-02-23 09:38:56] (step=0685000) Train Loss: 0.3222, Train Steps/Sec: 16.38, Grad Norm: 0.0375 +[2025-02-23 09:39:02] (step=0685100) Train Loss: 0.3217, Train Steps/Sec: 15.65, Grad Norm: 0.0370 +[2025-02-23 09:39:09] (step=0685200) Train Loss: 0.3214, Train Steps/Sec: 15.53, Grad Norm: 0.0330 +[2025-02-23 09:39:15] (step=0685300) Train Loss: 0.3217, Train Steps/Sec: 17.06, Grad Norm: 0.0348 +[2025-02-23 09:39:21] (step=0685400) Train Loss: 0.3219, Train Steps/Sec: 17.01, Grad Norm: 0.0388 +[2025-02-23 09:39:26] (step=0685500) Train Loss: 0.3218, Train Steps/Sec: 16.98, Grad Norm: 0.0371 +[2025-02-23 09:39:34] (step=0685600) Train Loss: 0.3217, Train Steps/Sec: 13.66, Grad Norm: 0.0360 +[2025-02-23 09:39:40] (step=0685700) Train Loss: 0.3217, Train Steps/Sec: 17.25, Grad Norm: 0.0373 +[2025-02-23 09:39:45] (step=0685800) Train Loss: 0.3220, Train Steps/Sec: 17.26, Grad Norm: 0.0360 +[2025-02-23 09:39:51] (step=0685900) Train Loss: 0.3211, Train Steps/Sec: 17.20, Grad Norm: 0.0374 +[2025-02-23 09:39:57] (step=0686000) Train Loss: 0.3219, Train Steps/Sec: 17.18, Grad Norm: 0.0376 +[2025-02-23 09:40:03] (step=0686100) Train Loss: 0.3216, Train Steps/Sec: 17.20, Grad Norm: 0.0393 +[2025-02-23 09:40:09] (step=0686200) Train Loss: 0.3219, Train Steps/Sec: 17.10, Grad Norm: 0.0337 +[2025-02-23 09:40:15] (step=0686300) Train Loss: 0.3218, Train Steps/Sec: 15.01, Grad Norm: 0.0383 +[2025-02-23 09:40:22] (step=0686400) Train Loss: 0.3221, Train Steps/Sec: 15.08, Grad Norm: 0.0379 +[2025-02-23 09:40:28] (step=0686500) Train Loss: 0.3221, Train Steps/Sec: 17.07, Grad Norm: 0.0371 +[2025-02-23 09:40:34] (step=0686600) Train Loss: 0.3216, Train Steps/Sec: 16.32, Grad Norm: 0.0385 +[2025-02-23 09:40:40] (step=0686700) Train Loss: 0.3216, Train Steps/Sec: 17.22, Grad Norm: 0.0368 +[2025-02-23 09:40:47] (step=0686800) Train Loss: 0.3215, Train Steps/Sec: 14.39, Grad Norm: 0.0341 +[2025-02-23 09:40:53] (step=0686900) Train Loss: 0.3215, Train Steps/Sec: 15.72, Grad Norm: 0.0364 +[2025-02-23 09:40:59] (step=0687000) Train Loss: 0.3214, Train Steps/Sec: 16.58, Grad Norm: 0.0374 +[2025-02-23 09:41:05] (step=0687100) Train Loss: 0.3212, Train Steps/Sec: 15.94, Grad Norm: 0.0372 +[2025-02-23 09:41:12] (step=0687200) Train Loss: 0.3221, Train Steps/Sec: 15.86, Grad Norm: 0.0351 +[2025-02-23 09:41:17] (step=0687300) Train Loss: 0.3216, Train Steps/Sec: 17.36, Grad Norm: 0.0372 +[2025-02-23 09:41:23] (step=0687400) Train Loss: 0.3220, Train Steps/Sec: 17.35, Grad Norm: 0.0373 +[2025-02-23 09:41:29] (step=0687500) Train Loss: 0.3216, Train Steps/Sec: 17.41, Grad Norm: 0.0397 +[2025-02-23 09:41:35] (step=0687600) Train Loss: 0.3219, Train Steps/Sec: 16.53, Grad Norm: 0.0342 +[2025-02-23 09:41:41] (step=0687700) Train Loss: 0.3219, Train Steps/Sec: 17.38, Grad Norm: 0.0369 +[2025-02-23 09:41:47] (step=0687800) Train Loss: 0.3219, Train Steps/Sec: 17.14, Grad Norm: 0.0412 +[2025-02-23 09:41:52] (step=0687900) Train Loss: 0.3219, Train Steps/Sec: 17.36, Grad Norm: 0.0414 +[2025-02-23 09:41:58] (step=0688000) Train Loss: 0.3215, Train Steps/Sec: 17.31, Grad Norm: 0.0356 +[2025-02-23 09:42:05] (step=0688100) Train Loss: 0.3215, Train Steps/Sec: 14.33, Grad Norm: 0.0387 +[2025-02-23 09:42:12] (step=0688200) Train Loss: 0.3217, Train Steps/Sec: 14.66, Grad Norm: 0.0367 +[2025-02-23 09:42:18] (step=0688300) Train Loss: 0.3219, Train Steps/Sec: 15.82, Grad Norm: 0.0364 +[2025-02-23 09:42:24] (step=0688400) Train Loss: 0.3219, Train Steps/Sec: 17.26, Grad Norm: 0.0351 +[2025-02-23 09:42:30] (step=0688500) Train Loss: 0.3214, Train Steps/Sec: 16.48, Grad Norm: 0.0351 +[2025-02-23 09:42:36] (step=0688600) Train Loss: 0.3220, Train Steps/Sec: 17.33, Grad Norm: 0.0363 +[2025-02-23 09:42:42] (step=0688700) Train Loss: 0.3217, Train Steps/Sec: 17.29, Grad Norm: 0.0384 +[2025-02-23 09:42:47] (step=0688800) Train Loss: 0.3219, Train Steps/Sec: 17.33, Grad Norm: 0.0384 +[2025-02-23 09:42:53] (step=0688900) Train Loss: 0.3217, Train Steps/Sec: 16.57, Grad Norm: 0.0339 +[2025-02-23 09:43:00] (step=0689000) Train Loss: 0.3221, Train Steps/Sec: 15.82, Grad Norm: 0.0341 +[2025-02-23 09:43:06] (step=0689100) Train Loss: 0.3217, Train Steps/Sec: 15.88, Grad Norm: 0.0416 +[2025-02-23 09:43:12] (step=0689200) Train Loss: 0.3213, Train Steps/Sec: 15.88, Grad Norm: 0.0361 +[2025-02-23 09:43:18] (step=0689300) Train Loss: 0.3217, Train Steps/Sec: 17.35, Grad Norm: 0.0369 +[2025-02-23 09:43:25] (step=0689400) Train Loss: 0.3215, Train Steps/Sec: 14.43, Grad Norm: 0.0371 +[2025-02-23 09:43:31] (step=0689500) Train Loss: 0.3216, Train Steps/Sec: 17.39, Grad Norm: 0.0343 +[2025-02-23 09:43:37] (step=0689600) Train Loss: 0.3214, Train Steps/Sec: 16.55, Grad Norm: 0.0357 +[2025-02-23 09:43:43] (step=0689700) Train Loss: 0.3221, Train Steps/Sec: 17.37, Grad Norm: 0.0403 +[2025-02-23 09:43:48] (step=0689800) Train Loss: 0.3218, Train Steps/Sec: 17.32, Grad Norm: 0.0358 +[2025-02-23 09:43:54] (step=0689900) Train Loss: 0.3222, Train Steps/Sec: 17.24, Grad Norm: 0.0380 +[2025-02-23 09:44:00] (step=0690000) Train Loss: 0.3218, Train Steps/Sec: 17.32, Grad Norm: 0.0454 +[2025-02-23 09:44:06] (step=0690100) Train Loss: 0.3216, Train Steps/Sec: 17.34, Grad Norm: 0.0374 +[2025-02-23 09:44:13] (step=0690200) Train Loss: 0.3213, Train Steps/Sec: 13.64, Grad Norm: 0.0366 +[2025-02-23 09:44:19] (step=0690300) Train Loss: 0.3220, Train Steps/Sec: 16.49, Grad Norm: 0.0376 +[2025-02-23 09:44:25] (step=0690400) Train Loss: 0.3220, Train Steps/Sec: 17.23, Grad Norm: 0.0393 +[2025-02-23 09:44:31] (step=0690500) Train Loss: 0.3221, Train Steps/Sec: 16.53, Grad Norm: 0.0377 +[2025-02-23 09:44:38] (step=0690600) Train Loss: 0.3220, Train Steps/Sec: 14.36, Grad Norm: 0.0414 +[2025-02-23 09:44:44] (step=0690700) Train Loss: 0.3219, Train Steps/Sec: 17.15, Grad Norm: 0.0339 +[2025-02-23 09:44:50] (step=0690800) Train Loss: 0.3220, Train Steps/Sec: 17.23, Grad Norm: 0.0364 +[2025-02-23 09:44:56] (step=0690900) Train Loss: 0.3219, Train Steps/Sec: 15.72, Grad Norm: 0.0370 +[2025-02-23 09:45:02] (step=0691000) Train Loss: 0.3210, Train Steps/Sec: 16.54, Grad Norm: 0.0367 +[2025-02-23 09:45:08] (step=0691100) Train Loss: 0.3222, Train Steps/Sec: 15.86, Grad Norm: 0.0374 +[2025-02-23 09:45:15] (step=0691200) Train Loss: 0.3212, Train Steps/Sec: 15.74, Grad Norm: 0.0330 +[2025-02-23 09:45:21] (step=0691300) Train Loss: 0.3216, Train Steps/Sec: 17.18, Grad Norm: 0.0350 +[2025-02-23 09:45:26] (step=0691400) Train Loss: 0.3217, Train Steps/Sec: 17.20, Grad Norm: 0.0439 +[2025-02-23 09:45:32] (step=0691500) Train Loss: 0.3217, Train Steps/Sec: 17.24, Grad Norm: 0.0411 +[2025-02-23 09:45:38] (step=0691600) Train Loss: 0.3218, Train Steps/Sec: 16.49, Grad Norm: 0.0376 +[2025-02-23 09:45:44] (step=0691700) Train Loss: 0.3218, Train Steps/Sec: 17.33, Grad Norm: 0.0343 +[2025-02-23 09:45:50] (step=0691800) Train Loss: 0.3216, Train Steps/Sec: 17.32, Grad Norm: 0.0338 +[2025-02-23 09:45:57] (step=0691900) Train Loss: 0.3223, Train Steps/Sec: 14.37, Grad Norm: 0.0363 +[2025-02-23 09:46:02] (step=0692000) Train Loss: 0.3219, Train Steps/Sec: 17.38, Grad Norm: 0.0368 +[2025-02-23 09:46:09] (step=0692100) Train Loss: 0.3215, Train Steps/Sec: 14.75, Grad Norm: 0.0377 +[2025-02-23 09:46:16] (step=0692200) Train Loss: 0.3213, Train Steps/Sec: 15.27, Grad Norm: 0.0384 +[2025-02-23 09:46:22] (step=0692300) Train Loss: 0.3220, Train Steps/Sec: 17.29, Grad Norm: 0.0339 +[2025-02-23 09:46:28] (step=0692400) Train Loss: 0.3228, Train Steps/Sec: 16.51, Grad Norm: 0.0368 +[2025-02-23 09:46:33] (step=0692500) Train Loss: 0.3219, Train Steps/Sec: 17.31, Grad Norm: 0.0344 +[2025-02-23 09:46:39] (step=0692600) Train Loss: 0.3211, Train Steps/Sec: 17.30, Grad Norm: 0.0369 +[2025-02-23 09:46:45] (step=0692700) Train Loss: 0.3222, Train Steps/Sec: 17.34, Grad Norm: 0.0369 +[2025-02-23 09:46:51] (step=0692800) Train Loss: 0.3214, Train Steps/Sec: 17.37, Grad Norm: 0.0362 +[2025-02-23 09:46:57] (step=0692900) Train Loss: 0.3222, Train Steps/Sec: 16.56, Grad Norm: 0.0455 +[2025-02-23 09:47:03] (step=0693000) Train Loss: 0.3221, Train Steps/Sec: 15.77, Grad Norm: 0.0389 +[2025-02-23 09:47:11] (step=0693100) Train Loss: 0.3223, Train Steps/Sec: 13.51, Grad Norm: 0.0340 +[2025-02-23 09:47:17] (step=0693200) Train Loss: 0.3219, Train Steps/Sec: 15.68, Grad Norm: 0.0388 +[2025-02-23 09:47:23] (step=0693300) Train Loss: 0.3218, Train Steps/Sec: 17.14, Grad Norm: 0.0353 +[2025-02-23 09:47:29] (step=0693400) Train Loss: 0.3222, Train Steps/Sec: 17.28, Grad Norm: 0.0353 +[2025-02-23 09:47:34] (step=0693500) Train Loss: 0.3218, Train Steps/Sec: 17.36, Grad Norm: 0.0381 +[2025-02-23 09:47:40] (step=0693600) Train Loss: 0.3220, Train Steps/Sec: 16.49, Grad Norm: 0.0359 +[2025-02-23 09:47:46] (step=0693700) Train Loss: 0.3220, Train Steps/Sec: 17.30, Grad Norm: 0.0414 +[2025-02-23 09:47:52] (step=0693800) Train Loss: 0.3216, Train Steps/Sec: 17.29, Grad Norm: 0.0347 +[2025-02-23 09:47:58] (step=0693900) Train Loss: 0.3218, Train Steps/Sec: 17.31, Grad Norm: 0.0399 +[2025-02-23 09:48:04] (step=0694000) Train Loss: 0.3214, Train Steps/Sec: 16.55, Grad Norm: 0.0370 +[2025-02-23 09:48:11] (step=0694100) Train Loss: 0.3225, Train Steps/Sec: 14.59, Grad Norm: 0.0397 +[2025-02-23 09:48:17] (step=0694200) Train Loss: 0.3223, Train Steps/Sec: 16.51, Grad Norm: 0.0346 +[2025-02-23 09:48:22] (step=0694300) Train Loss: 0.3215, Train Steps/Sec: 17.23, Grad Norm: 0.0364 +[2025-02-23 09:48:30] (step=0694400) Train Loss: 0.3217, Train Steps/Sec: 13.91, Grad Norm: 0.0353 +[2025-02-23 09:48:35] (step=0694500) Train Loss: 0.3215, Train Steps/Sec: 17.23, Grad Norm: 0.0370 +[2025-02-23 09:48:41] (step=0694600) Train Loss: 0.3217, Train Steps/Sec: 17.34, Grad Norm: 0.0359 +[2025-02-23 09:48:47] (step=0694700) Train Loss: 0.3218, Train Steps/Sec: 17.32, Grad Norm: 0.0390 +[2025-02-23 09:48:53] (step=0694800) Train Loss: 0.3219, Train Steps/Sec: 17.34, Grad Norm: 0.0354 +[2025-02-23 09:48:59] (step=0694900) Train Loss: 0.3216, Train Steps/Sec: 16.46, Grad Norm: 0.0413 +[2025-02-23 09:49:05] (step=0695000) Train Loss: 0.3219, Train Steps/Sec: 15.81, Grad Norm: 0.0362 +[2025-02-23 09:49:11] (step=0695100) Train Loss: 0.3219, Train Steps/Sec: 15.92, Grad Norm: 0.0361 +[2025-02-23 09:49:18] (step=0695200) Train Loss: 0.3221, Train Steps/Sec: 15.82, Grad Norm: 0.0335 +[2025-02-23 09:49:24] (step=0695300) Train Loss: 0.3213, Train Steps/Sec: 17.37, Grad Norm: 0.0387 +[2025-02-23 09:49:29] (step=0695400) Train Loss: 0.3215, Train Steps/Sec: 17.29, Grad Norm: 0.0350 +[2025-02-23 09:49:35] (step=0695500) Train Loss: 0.3216, Train Steps/Sec: 17.31, Grad Norm: 0.0327 +[2025-02-23 09:49:42] (step=0695600) Train Loss: 0.3214, Train Steps/Sec: 13.76, Grad Norm: 0.0372 +[2025-02-23 09:49:48] (step=0695700) Train Loss: 0.3221, Train Steps/Sec: 17.26, Grad Norm: 0.0392 +[2025-02-23 09:49:54] (step=0695800) Train Loss: 0.3213, Train Steps/Sec: 17.29, Grad Norm: 0.0353 +[2025-02-23 09:50:00] (step=0695900) Train Loss: 0.3217, Train Steps/Sec: 17.29, Grad Norm: 0.0356 +[2025-02-23 09:50:06] (step=0696000) Train Loss: 0.3217, Train Steps/Sec: 15.13, Grad Norm: 0.0362 +[2025-02-23 09:50:13] (step=0696100) Train Loss: 0.3216, Train Steps/Sec: 14.55, Grad Norm: 0.0362 +[2025-02-23 09:50:19] (step=0696200) Train Loss: 0.3215, Train Steps/Sec: 17.13, Grad Norm: 0.0374 +[2025-02-23 09:50:25] (step=0696300) Train Loss: 0.3218, Train Steps/Sec: 16.48, Grad Norm: 0.0385 +[2025-02-23 09:50:31] (step=0696400) Train Loss: 0.3218, Train Steps/Sec: 17.22, Grad Norm: 0.0382 +[2025-02-23 09:50:37] (step=0696500) Train Loss: 0.3216, Train Steps/Sec: 17.23, Grad Norm: 0.0381 +[2025-02-23 09:50:43] (step=0696600) Train Loss: 0.3218, Train Steps/Sec: 17.25, Grad Norm: 0.0353 +[2025-02-23 09:50:48] (step=0696700) Train Loss: 0.3216, Train Steps/Sec: 17.24, Grad Norm: 0.0344 +[2025-02-23 09:50:54] (step=0696800) Train Loss: 0.3218, Train Steps/Sec: 17.24, Grad Norm: 0.0391 +[2025-02-23 09:51:01] (step=0696900) Train Loss: 0.3216, Train Steps/Sec: 13.83, Grad Norm: 0.0354 +[2025-02-23 09:51:08] (step=0697000) Train Loss: 0.3215, Train Steps/Sec: 15.76, Grad Norm: 0.0367 +[2025-02-23 09:51:14] (step=0697100) Train Loss: 0.3215, Train Steps/Sec: 15.76, Grad Norm: 0.0347 +[2025-02-23 09:51:20] (step=0697200) Train Loss: 0.3221, Train Steps/Sec: 16.41, Grad Norm: 0.0365 +[2025-02-23 09:51:26] (step=0697300) Train Loss: 0.3220, Train Steps/Sec: 16.44, Grad Norm: 0.0364 +[2025-02-23 09:51:32] (step=0697400) Train Loss: 0.3214, Train Steps/Sec: 17.23, Grad Norm: 0.0376 +[2025-02-23 09:51:38] (step=0697500) Train Loss: 0.3211, Train Steps/Sec: 17.28, Grad Norm: 0.0372 +[2025-02-23 09:51:44] (step=0697600) Train Loss: 0.3217, Train Steps/Sec: 16.51, Grad Norm: 0.0348 +[2025-02-23 09:51:50] (step=0697700) Train Loss: 0.3215, Train Steps/Sec: 17.29, Grad Norm: 0.0364 +[2025-02-23 09:51:56] (step=0697800) Train Loss: 0.3214, Train Steps/Sec: 17.27, Grad Norm: 0.0383 +[2025-02-23 09:52:02] (step=0697900) Train Loss: 0.3211, Train Steps/Sec: 16.55, Grad Norm: 0.0393 +[2025-02-23 09:52:09] (step=0698000) Train Loss: 0.3217, Train Steps/Sec: 14.02, Grad Norm: 0.0410 +[2025-02-23 09:52:16] (step=0698100) Train Loss: 0.3218, Train Steps/Sec: 14.35, Grad Norm: 0.0371 +[2025-02-23 09:52:22] (step=0698200) Train Loss: 0.3218, Train Steps/Sec: 16.39, Grad Norm: 0.0393 +[2025-02-23 09:52:28] (step=0698300) Train Loss: 0.3217, Train Steps/Sec: 17.15, Grad Norm: 0.0387 +[2025-02-23 09:52:33] (step=0698400) Train Loss: 0.3214, Train Steps/Sec: 17.15, Grad Norm: 0.0367 +[2025-02-23 09:52:39] (step=0698500) Train Loss: 0.3219, Train Steps/Sec: 17.13, Grad Norm: 0.0366 +[2025-02-23 09:52:45] (step=0698600) Train Loss: 0.3220, Train Steps/Sec: 17.15, Grad Norm: 0.0373 +[2025-02-23 09:52:51] (step=0698700) Train Loss: 0.3221, Train Steps/Sec: 17.17, Grad Norm: 0.0379 +[2025-02-23 09:52:57] (step=0698800) Train Loss: 0.3217, Train Steps/Sec: 17.17, Grad Norm: 0.0365 +[2025-02-23 09:53:03] (step=0698900) Train Loss: 0.3218, Train Steps/Sec: 16.52, Grad Norm: 0.0349 +[2025-02-23 09:53:09] (step=0699000) Train Loss: 0.3210, Train Steps/Sec: 15.77, Grad Norm: 0.0355 +[2025-02-23 09:53:15] (step=0699100) Train Loss: 0.3219, Train Steps/Sec: 16.54, Grad Norm: 0.0362 +[2025-02-23 09:53:22] (step=0699200) Train Loss: 0.3212, Train Steps/Sec: 15.85, Grad Norm: 0.0373 +[2025-02-23 09:53:28] (step=0699300) Train Loss: 0.3219, Train Steps/Sec: 16.43, Grad Norm: 0.0349 +[2025-02-23 09:53:35] (step=0699400) Train Loss: 0.3213, Train Steps/Sec: 14.35, Grad Norm: 0.0383 +[2025-02-23 09:53:40] (step=0699500) Train Loss: 0.3221, Train Steps/Sec: 17.29, Grad Norm: 0.0335 +[2025-02-23 09:53:46] (step=0699600) Train Loss: 0.3216, Train Steps/Sec: 16.49, Grad Norm: 0.0371 +[2025-02-23 09:53:52] (step=0699700) Train Loss: 0.3215, Train Steps/Sec: 17.22, Grad Norm: 0.0358 +[2025-02-23 09:53:58] (step=0699800) Train Loss: 0.3215, Train Steps/Sec: 17.36, Grad Norm: 0.0340 +[2025-02-23 09:54:05] (step=0699900) Train Loss: 0.3215, Train Steps/Sec: 13.70, Grad Norm: 0.0393 +[2025-02-23 09:54:11] (step=0700000) Train Loss: 0.3215, Train Steps/Sec: 16.60, Grad Norm: 0.0359 +[2025-02-23 09:54:12] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0700000.pt +[2025-02-23 09:54:18] (step=0700100) Train Loss: 0.3220, Train Steps/Sec: 15.11, Grad Norm: 0.0412 +[2025-02-23 09:54:24] (step=0700200) Train Loss: 0.3222, Train Steps/Sec: 16.55, Grad Norm: 0.0348 +[2025-02-23 09:54:30] (step=0700300) Train Loss: 0.3218, Train Steps/Sec: 17.29, Grad Norm: 0.0373 +[2025-02-23 09:54:36] (step=0700400) Train Loss: 0.3217, Train Steps/Sec: 17.32, Grad Norm: 0.0366 +[2025-02-23 09:54:41] (step=0700500) Train Loss: 0.3216, Train Steps/Sec: 17.22, Grad Norm: 0.0388 +[2025-02-23 09:54:48] (step=0700600) Train Loss: 0.3213, Train Steps/Sec: 14.41, Grad Norm: 0.0389 +[2025-02-23 09:54:54] (step=0700700) Train Loss: 0.3221, Train Steps/Sec: 17.32, Grad Norm: 0.0382 +[2025-02-23 09:55:00] (step=0700800) Train Loss: 0.3217, Train Steps/Sec: 17.34, Grad Norm: 0.0390 +[2025-02-23 09:55:06] (step=0700900) Train Loss: 0.3221, Train Steps/Sec: 16.37, Grad Norm: 0.0403 +[2025-02-23 09:55:12] (step=0701000) Train Loss: 0.3218, Train Steps/Sec: 15.67, Grad Norm: 0.0370 +[2025-02-23 09:55:18] (step=0701100) Train Loss: 0.3217, Train Steps/Sec: 16.46, Grad Norm: 0.0360 +[2025-02-23 09:55:25] (step=0701200) Train Loss: 0.3215, Train Steps/Sec: 15.71, Grad Norm: 0.0395 +[2025-02-23 09:55:31] (step=0701300) Train Loss: 0.3218, Train Steps/Sec: 16.39, Grad Norm: 0.0376 +[2025-02-23 09:55:37] (step=0701400) Train Loss: 0.3215, Train Steps/Sec: 17.18, Grad Norm: 0.0380 +[2025-02-23 09:55:42] (step=0701500) Train Loss: 0.3217, Train Steps/Sec: 17.32, Grad Norm: 0.0389 +[2025-02-23 09:55:48] (step=0701600) Train Loss: 0.3216, Train Steps/Sec: 17.36, Grad Norm: 0.0346 +[2025-02-23 09:55:54] (step=0701700) Train Loss: 0.3215, Train Steps/Sec: 16.57, Grad Norm: 0.0363 +[2025-02-23 09:56:01] (step=0701800) Train Loss: 0.3221, Train Steps/Sec: 15.26, Grad Norm: 0.0355 +[2025-02-23 09:56:09] (step=0701900) Train Loss: 0.3210, Train Steps/Sec: 12.60, Grad Norm: 0.0385 +[2025-02-23 09:56:15] (step=0702000) Train Loss: 0.3219, Train Steps/Sec: 17.34, Grad Norm: 0.0414 +[2025-02-23 09:56:21] (step=0702100) Train Loss: 0.3212, Train Steps/Sec: 16.60, Grad Norm: 0.0372 +[2025-02-23 09:56:26] (step=0702200) Train Loss: 0.3216, Train Steps/Sec: 17.33, Grad Norm: 0.0388 +[2025-02-23 09:56:32] (step=0702300) Train Loss: 0.3216, Train Steps/Sec: 17.36, Grad Norm: 0.0411 +[2025-02-23 09:56:38] (step=0702400) Train Loss: 0.3219, Train Steps/Sec: 17.30, Grad Norm: 0.0407 +[2025-02-23 09:56:44] (step=0702500) Train Loss: 0.3216, Train Steps/Sec: 17.36, Grad Norm: 0.0378 +[2025-02-23 09:56:49] (step=0702600) Train Loss: 0.3217, Train Steps/Sec: 17.35, Grad Norm: 0.0375 +[2025-02-23 09:56:55] (step=0702700) Train Loss: 0.3215, Train Steps/Sec: 17.29, Grad Norm: 0.0361 +[2025-02-23 09:57:01] (step=0702800) Train Loss: 0.3217, Train Steps/Sec: 17.36, Grad Norm: 0.0326 +[2025-02-23 09:57:07] (step=0702900) Train Loss: 0.3219, Train Steps/Sec: 16.58, Grad Norm: 0.0383 +[2025-02-23 09:57:13] (step=0703000) Train Loss: 0.3220, Train Steps/Sec: 15.95, Grad Norm: 0.0363 +[2025-02-23 09:57:20] (step=0703100) Train Loss: 0.3221, Train Steps/Sec: 13.90, Grad Norm: 0.0385 +[2025-02-23 09:57:27] (step=0703200) Train Loss: 0.3212, Train Steps/Sec: 15.82, Grad Norm: 0.0358 +[2025-02-23 09:57:33] (step=0703300) Train Loss: 0.3219, Train Steps/Sec: 16.37, Grad Norm: 0.0358 +[2025-02-23 09:57:39] (step=0703400) Train Loss: 0.3209, Train Steps/Sec: 17.04, Grad Norm: 0.0357 +[2025-02-23 09:57:45] (step=0703500) Train Loss: 0.3217, Train Steps/Sec: 17.08, Grad Norm: 0.0401 +[2025-02-23 09:57:51] (step=0703600) Train Loss: 0.3220, Train Steps/Sec: 16.30, Grad Norm: 0.0389 +[2025-02-23 09:57:57] (step=0703700) Train Loss: 0.3219, Train Steps/Sec: 17.07, Grad Norm: 0.0360 +[2025-02-23 09:58:04] (step=0703800) Train Loss: 0.3221, Train Steps/Sec: 13.49, Grad Norm: 0.0386 +[2025-02-23 09:58:10] (step=0703900) Train Loss: 0.3217, Train Steps/Sec: 16.27, Grad Norm: 0.0368 +[2025-02-23 09:58:16] (step=0704000) Train Loss: 0.3213, Train Steps/Sec: 17.10, Grad Norm: 0.0372 +[2025-02-23 09:58:22] (step=0704100) Train Loss: 0.3217, Train Steps/Sec: 16.47, Grad Norm: 0.0386 +[2025-02-23 09:58:28] (step=0704200) Train Loss: 0.3213, Train Steps/Sec: 17.24, Grad Norm: 0.0370 +[2025-02-23 09:58:34] (step=0704300) Train Loss: 0.3220, Train Steps/Sec: 17.21, Grad Norm: 0.0366 +[2025-02-23 09:58:41] (step=0704400) Train Loss: 0.3221, Train Steps/Sec: 14.49, Grad Norm: 0.0383 +[2025-02-23 09:58:46] (step=0704500) Train Loss: 0.3215, Train Steps/Sec: 17.32, Grad Norm: 0.0379 +[2025-02-23 09:58:52] (step=0704600) Train Loss: 0.3213, Train Steps/Sec: 17.33, Grad Norm: 0.0362 +[2025-02-23 09:58:58] (step=0704700) Train Loss: 0.3219, Train Steps/Sec: 17.34, Grad Norm: 0.0374 +[2025-02-23 09:59:04] (step=0704800) Train Loss: 0.3214, Train Steps/Sec: 17.31, Grad Norm: 0.0342 +[2025-02-23 09:59:10] (step=0704900) Train Loss: 0.3217, Train Steps/Sec: 16.45, Grad Norm: 0.0372 +[2025-02-23 09:59:16] (step=0705000) Train Loss: 0.3216, Train Steps/Sec: 15.79, Grad Norm: 0.0350 +[2025-02-23 09:59:22] (step=0705100) Train Loss: 0.3214, Train Steps/Sec: 16.59, Grad Norm: 0.0397 +[2025-02-23 09:59:28] (step=0705200) Train Loss: 0.3218, Train Steps/Sec: 15.89, Grad Norm: 0.0419 +[2025-02-23 09:59:34] (step=0705300) Train Loss: 0.3222, Train Steps/Sec: 16.58, Grad Norm: 0.0365 +[2025-02-23 09:59:40] (step=0705400) Train Loss: 0.3216, Train Steps/Sec: 17.37, Grad Norm: 0.0334 +[2025-02-23 09:59:46] (step=0705500) Train Loss: 0.3218, Train Steps/Sec: 17.36, Grad Norm: 0.0374 +[2025-02-23 09:59:53] (step=0705600) Train Loss: 0.3219, Train Steps/Sec: 13.85, Grad Norm: 0.0346 +[2025-02-23 10:00:00] (step=0705700) Train Loss: 0.3216, Train Steps/Sec: 15.01, Grad Norm: 0.0392 +[2025-02-23 10:00:07] (step=0705800) Train Loss: 0.3217, Train Steps/Sec: 14.54, Grad Norm: 0.0355 +[2025-02-23 10:00:13] (step=0705900) Train Loss: 0.3218, Train Steps/Sec: 17.09, Grad Norm: 0.0376 +[2025-02-23 10:00:19] (step=0706000) Train Loss: 0.3218, Train Steps/Sec: 16.27, Grad Norm: 0.0416 +[2025-02-23 10:00:25] (step=0706100) Train Loss: 0.3217, Train Steps/Sec: 17.04, Grad Norm: 0.0354 +[2025-02-23 10:00:31] (step=0706200) Train Loss: 0.3215, Train Steps/Sec: 17.10, Grad Norm: 0.0369 +[2025-02-23 10:00:36] (step=0706300) Train Loss: 0.3216, Train Steps/Sec: 17.04, Grad Norm: 0.0374 +[2025-02-23 10:00:42] (step=0706400) Train Loss: 0.3215, Train Steps/Sec: 17.07, Grad Norm: 0.0355 +[2025-02-23 10:00:48] (step=0706500) Train Loss: 0.3221, Train Steps/Sec: 17.11, Grad Norm: 0.0376 +[2025-02-23 10:00:54] (step=0706600) Train Loss: 0.3210, Train Steps/Sec: 17.12, Grad Norm: 0.0400 +[2025-02-23 10:01:00] (step=0706700) Train Loss: 0.3217, Train Steps/Sec: 17.11, Grad Norm: 0.0343 +[2025-02-23 10:01:06] (step=0706800) Train Loss: 0.3211, Train Steps/Sec: 17.10, Grad Norm: 0.0368 +[2025-02-23 10:01:13] (step=0706900) Train Loss: 0.3217, Train Steps/Sec: 13.88, Grad Norm: 0.0417 +[2025-02-23 10:01:19] (step=0707000) Train Loss: 0.3217, Train Steps/Sec: 15.66, Grad Norm: 0.0332 +[2025-02-23 10:01:25] (step=0707100) Train Loss: 0.3212, Train Steps/Sec: 16.40, Grad Norm: 0.0388 +[2025-02-23 10:01:32] (step=0707200) Train Loss: 0.3208, Train Steps/Sec: 15.69, Grad Norm: 0.0401 +[2025-02-23 10:01:38] (step=0707300) Train Loss: 0.3211, Train Steps/Sec: 16.37, Grad Norm: 0.0339 +[2025-02-23 10:01:44] (step=0707400) Train Loss: 0.3216, Train Steps/Sec: 17.31, Grad Norm: 0.0379 +[2025-02-23 10:01:49] (step=0707500) Train Loss: 0.3213, Train Steps/Sec: 17.32, Grad Norm: 0.0377 +[2025-02-23 10:01:55] (step=0707600) Train Loss: 0.3213, Train Steps/Sec: 17.32, Grad Norm: 0.0370 +[2025-02-23 10:02:03] (step=0707700) Train Loss: 0.3213, Train Steps/Sec: 13.13, Grad Norm: 0.0346 +[2025-02-23 10:02:09] (step=0707800) Train Loss: 0.3218, Train Steps/Sec: 16.49, Grad Norm: 0.0400 +[2025-02-23 10:02:15] (step=0707900) Train Loss: 0.3220, Train Steps/Sec: 17.29, Grad Norm: 0.0410 +[2025-02-23 10:02:21] (step=0708000) Train Loss: 0.3220, Train Steps/Sec: 16.52, Grad Norm: 0.0337 +[2025-02-23 10:02:28] (step=0708100) Train Loss: 0.3216, Train Steps/Sec: 14.45, Grad Norm: 0.0360 +[2025-02-23 10:02:33] (step=0708200) Train Loss: 0.3220, Train Steps/Sec: 17.40, Grad Norm: 0.0340 +[2025-02-23 10:02:39] (step=0708300) Train Loss: 0.3217, Train Steps/Sec: 17.41, Grad Norm: 0.0380 +[2025-02-23 10:02:45] (step=0708400) Train Loss: 0.3213, Train Steps/Sec: 17.32, Grad Norm: 0.0377 +[2025-02-23 10:02:51] (step=0708500) Train Loss: 0.3217, Train Steps/Sec: 17.41, Grad Norm: 0.0356 +[2025-02-23 10:02:56] (step=0708600) Train Loss: 0.3223, Train Steps/Sec: 17.35, Grad Norm: 0.0400 +[2025-02-23 10:03:02] (step=0708700) Train Loss: 0.3216, Train Steps/Sec: 17.29, Grad Norm: 0.0363 +[2025-02-23 10:03:08] (step=0708800) Train Loss: 0.3222, Train Steps/Sec: 17.38, Grad Norm: 0.0415 +[2025-02-23 10:03:14] (step=0708900) Train Loss: 0.3221, Train Steps/Sec: 17.41, Grad Norm: 0.0362 +[2025-02-23 10:03:20] (step=0709000) Train Loss: 0.3214, Train Steps/Sec: 15.20, Grad Norm: 0.0382 +[2025-02-23 10:03:26] (step=0709100) Train Loss: 0.3215, Train Steps/Sec: 16.45, Grad Norm: 0.0349 +[2025-02-23 10:03:33] (step=0709200) Train Loss: 0.3218, Train Steps/Sec: 15.73, Grad Norm: 0.0387 +[2025-02-23 10:03:39] (step=0709300) Train Loss: 0.3215, Train Steps/Sec: 16.44, Grad Norm: 0.0373 +[2025-02-23 10:03:46] (step=0709400) Train Loss: 0.3211, Train Steps/Sec: 14.57, Grad Norm: 0.0395 +[2025-02-23 10:03:51] (step=0709500) Train Loss: 0.3208, Train Steps/Sec: 17.28, Grad Norm: 0.0340 +[2025-02-23 10:03:58] (step=0709600) Train Loss: 0.3212, Train Steps/Sec: 15.23, Grad Norm: 0.0348 +[2025-02-23 10:04:05] (step=0709700) Train Loss: 0.3212, Train Steps/Sec: 14.65, Grad Norm: 0.0439 +[2025-02-23 10:04:11] (step=0709800) Train Loss: 0.3214, Train Steps/Sec: 17.30, Grad Norm: 0.0383 +[2025-02-23 10:04:17] (step=0709900) Train Loss: 0.3214, Train Steps/Sec: 16.44, Grad Norm: 0.0352 +[2025-02-23 10:04:22] (step=0710000) Train Loss: 0.3218, Train Steps/Sec: 17.23, Grad Norm: 0.0361 +[2025-02-23 10:04:28] (step=0710100) Train Loss: 0.3214, Train Steps/Sec: 17.26, Grad Norm: 0.0389 +[2025-02-23 10:04:34] (step=0710200) Train Loss: 0.3222, Train Steps/Sec: 17.25, Grad Norm: 0.0332 +[2025-02-23 10:04:40] (step=0710300) Train Loss: 0.3211, Train Steps/Sec: 17.31, Grad Norm: 0.0376 +[2025-02-23 10:04:46] (step=0710400) Train Loss: 0.3219, Train Steps/Sec: 17.31, Grad Norm: 0.0386 +[2025-02-23 10:04:51] (step=0710500) Train Loss: 0.3217, Train Steps/Sec: 17.36, Grad Norm: 0.0386 +[2025-02-23 10:04:58] (step=0710600) Train Loss: 0.3219, Train Steps/Sec: 14.53, Grad Norm: 0.0347 +[2025-02-23 10:05:04] (step=0710700) Train Loss: 0.3213, Train Steps/Sec: 17.33, Grad Norm: 0.0361 +[2025-02-23 10:05:10] (step=0710800) Train Loss: 0.3223, Train Steps/Sec: 17.34, Grad Norm: 0.0368 +[2025-02-23 10:05:16] (step=0710900) Train Loss: 0.3221, Train Steps/Sec: 17.39, Grad Norm: 0.0343 +[2025-02-23 10:05:22] (step=0711000) Train Loss: 0.3215, Train Steps/Sec: 15.20, Grad Norm: 0.0372 +[2025-02-23 10:05:28] (step=0711100) Train Loss: 0.3225, Train Steps/Sec: 17.29, Grad Norm: 0.0369 +[2025-02-23 10:05:34] (step=0711200) Train Loss: 0.3215, Train Steps/Sec: 15.28, Grad Norm: 0.0391 +[2025-02-23 10:05:40] (step=0711300) Train Loss: 0.3215, Train Steps/Sec: 16.66, Grad Norm: 0.0365 +[2025-02-23 10:05:46] (step=0711400) Train Loss: 0.3219, Train Steps/Sec: 17.37, Grad Norm: 0.0375 +[2025-02-23 10:05:53] (step=0711500) Train Loss: 0.3218, Train Steps/Sec: 15.91, Grad Norm: 0.0337 +[2025-02-23 10:05:59] (step=0711600) Train Loss: 0.3220, Train Steps/Sec: 14.74, Grad Norm: 0.0374 +[2025-02-23 10:06:06] (step=0711700) Train Loss: 0.3216, Train Steps/Sec: 15.79, Grad Norm: 0.0357 +[2025-02-23 10:06:11] (step=0711800) Train Loss: 0.3222, Train Steps/Sec: 17.25, Grad Norm: 0.0372 +[2025-02-23 10:06:19] (step=0711900) Train Loss: 0.3219, Train Steps/Sec: 13.96, Grad Norm: 0.0326 +[2025-02-23 10:06:24] (step=0712000) Train Loss: 0.3212, Train Steps/Sec: 17.35, Grad Norm: 0.0373 +[2025-02-23 10:06:30] (step=0712100) Train Loss: 0.3221, Train Steps/Sec: 17.31, Grad Norm: 0.0370 +[2025-02-23 10:06:36] (step=0712200) Train Loss: 0.3218, Train Steps/Sec: 17.27, Grad Norm: 0.0344 +[2025-02-23 10:06:42] (step=0712300) Train Loss: 0.3214, Train Steps/Sec: 17.30, Grad Norm: 0.0357 +[2025-02-23 10:06:48] (step=0712400) Train Loss: 0.3216, Train Steps/Sec: 17.29, Grad Norm: 0.0375 +[2025-02-23 10:06:53] (step=0712500) Train Loss: 0.3215, Train Steps/Sec: 17.26, Grad Norm: 0.0368 +[2025-02-23 10:06:59] (step=0712600) Train Loss: 0.3222, Train Steps/Sec: 17.33, Grad Norm: 0.0380 +[2025-02-23 10:07:05] (step=0712700) Train Loss: 0.3219, Train Steps/Sec: 17.36, Grad Norm: 0.0341 +[2025-02-23 10:07:11] (step=0712800) Train Loss: 0.3216, Train Steps/Sec: 17.31, Grad Norm: 0.0352 +[2025-02-23 10:07:16] (step=0712900) Train Loss: 0.3220, Train Steps/Sec: 17.29, Grad Norm: 0.0416 +[2025-02-23 10:07:23] (step=0713000) Train Loss: 0.3218, Train Steps/Sec: 15.14, Grad Norm: 0.0329 +[2025-02-23 10:07:30] (step=0713100) Train Loss: 0.3218, Train Steps/Sec: 14.44, Grad Norm: 0.0366 +[2025-02-23 10:07:37] (step=0713200) Train Loss: 0.3215, Train Steps/Sec: 15.18, Grad Norm: 0.0365 +[2025-02-23 10:07:43] (step=0713300) Train Loss: 0.3215, Train Steps/Sec: 16.52, Grad Norm: 0.0382 +[2025-02-23 10:07:48] (step=0713400) Train Loss: 0.3225, Train Steps/Sec: 17.24, Grad Norm: 0.0336 +[2025-02-23 10:07:55] (step=0713500) Train Loss: 0.3222, Train Steps/Sec: 14.53, Grad Norm: 0.0362 +[2025-02-23 10:08:02] (step=0713600) Train Loss: 0.3217, Train Steps/Sec: 15.14, Grad Norm: 0.0406 +[2025-02-23 10:08:08] (step=0713700) Train Loss: 0.3218, Train Steps/Sec: 16.41, Grad Norm: 0.0366 +[2025-02-23 10:08:14] (step=0713800) Train Loss: 0.3220, Train Steps/Sec: 16.42, Grad Norm: 0.0398 +[2025-02-23 10:08:20] (step=0713900) Train Loss: 0.3214, Train Steps/Sec: 17.25, Grad Norm: 0.0370 +[2025-02-23 10:08:26] (step=0714000) Train Loss: 0.3218, Train Steps/Sec: 17.32, Grad Norm: 0.0391 +[2025-02-23 10:08:31] (step=0714100) Train Loss: 0.3213, Train Steps/Sec: 17.22, Grad Norm: 0.0325 +[2025-02-23 10:08:37] (step=0714200) Train Loss: 0.3216, Train Steps/Sec: 17.15, Grad Norm: 0.0371 +[2025-02-23 10:08:43] (step=0714300) Train Loss: 0.3215, Train Steps/Sec: 17.19, Grad Norm: 0.0369 +[2025-02-23 10:08:50] (step=0714400) Train Loss: 0.3217, Train Steps/Sec: 14.57, Grad Norm: 0.0357 +[2025-02-23 10:08:56] (step=0714500) Train Loss: 0.3215, Train Steps/Sec: 17.36, Grad Norm: 0.0376 +[2025-02-23 10:09:01] (step=0714600) Train Loss: 0.3215, Train Steps/Sec: 17.39, Grad Norm: 0.0354 +[2025-02-23 10:09:07] (step=0714700) Train Loss: 0.3217, Train Steps/Sec: 17.40, Grad Norm: 0.0384 +[2025-02-23 10:09:13] (step=0714800) Train Loss: 0.3212, Train Steps/Sec: 17.35, Grad Norm: 0.0347 +[2025-02-23 10:09:19] (step=0714900) Train Loss: 0.3213, Train Steps/Sec: 17.38, Grad Norm: 0.0393 +[2025-02-23 10:09:25] (step=0715000) Train Loss: 0.3217, Train Steps/Sec: 15.19, Grad Norm: 0.0402 +[2025-02-23 10:09:31] (step=0715100) Train Loss: 0.3216, Train Steps/Sec: 17.34, Grad Norm: 0.0362 +[2025-02-23 10:09:38] (step=0715200) Train Loss: 0.3214, Train Steps/Sec: 15.21, Grad Norm: 0.0349 +[2025-02-23 10:09:44] (step=0715300) Train Loss: 0.3213, Train Steps/Sec: 16.56, Grad Norm: 0.0421 +[2025-02-23 10:09:50] (step=0715400) Train Loss: 0.3222, Train Steps/Sec: 15.22, Grad Norm: 0.0379 +[2025-02-23 10:09:57] (step=0715500) Train Loss: 0.3217, Train Steps/Sec: 14.78, Grad Norm: 0.0382 +[2025-02-23 10:10:04] (step=0715600) Train Loss: 0.3215, Train Steps/Sec: 14.42, Grad Norm: 0.0375 +[2025-02-23 10:10:10] (step=0715700) Train Loss: 0.3216, Train Steps/Sec: 15.74, Grad Norm: 0.0370 +[2025-02-23 10:10:16] (step=0715800) Train Loss: 0.3215, Train Steps/Sec: 17.23, Grad Norm: 0.0361 +[2025-02-23 10:10:22] (step=0715900) Train Loss: 0.3217, Train Steps/Sec: 17.31, Grad Norm: 0.0382 +[2025-02-23 10:10:28] (step=0716000) Train Loss: 0.3221, Train Steps/Sec: 17.32, Grad Norm: 0.0352 +[2025-02-23 10:10:34] (step=0716100) Train Loss: 0.3216, Train Steps/Sec: 17.26, Grad Norm: 0.0366 +[2025-02-23 10:10:39] (step=0716200) Train Loss: 0.3213, Train Steps/Sec: 17.23, Grad Norm: 0.0357 +[2025-02-23 10:10:45] (step=0716300) Train Loss: 0.3211, Train Steps/Sec: 17.33, Grad Norm: 0.0390 +[2025-02-23 10:10:51] (step=0716400) Train Loss: 0.3217, Train Steps/Sec: 17.34, Grad Norm: 0.0355 +[2025-02-23 10:10:57] (step=0716500) Train Loss: 0.3218, Train Steps/Sec: 17.41, Grad Norm: 0.0369 +[2025-02-23 10:11:02] (step=0716600) Train Loss: 0.3216, Train Steps/Sec: 17.39, Grad Norm: 0.0409 +[2025-02-23 10:11:08] (step=0716700) Train Loss: 0.3216, Train Steps/Sec: 17.22, Grad Norm: 0.0399 +[2025-02-23 10:11:14] (step=0716800) Train Loss: 0.3214, Train Steps/Sec: 17.16, Grad Norm: 0.0341 +[2025-02-23 10:11:21] (step=0716900) Train Loss: 0.3213, Train Steps/Sec: 14.51, Grad Norm: 0.0385 +[2025-02-23 10:11:28] (step=0717000) Train Loss: 0.3224, Train Steps/Sec: 15.15, Grad Norm: 0.0383 +[2025-02-23 10:11:33] (step=0717100) Train Loss: 0.3217, Train Steps/Sec: 17.36, Grad Norm: 0.0342 +[2025-02-23 10:11:40] (step=0717200) Train Loss: 0.3224, Train Steps/Sec: 15.27, Grad Norm: 0.0399 +[2025-02-23 10:11:46] (step=0717300) Train Loss: 0.3217, Train Steps/Sec: 16.55, Grad Norm: 0.0351 +[2025-02-23 10:11:53] (step=0717400) Train Loss: 0.3217, Train Steps/Sec: 14.75, Grad Norm: 0.0366 +[2025-02-23 10:11:59] (step=0717500) Train Loss: 0.3213, Train Steps/Sec: 15.32, Grad Norm: 0.0361 +[2025-02-23 10:12:05] (step=0717600) Train Loss: 0.3219, Train Steps/Sec: 17.31, Grad Norm: 0.0340 +[2025-02-23 10:12:11] (step=0717700) Train Loss: 0.3217, Train Steps/Sec: 15.79, Grad Norm: 0.0348 +[2025-02-23 10:12:17] (step=0717800) Train Loss: 0.3208, Train Steps/Sec: 17.36, Grad Norm: 0.0346 +[2025-02-23 10:12:23] (step=0717900) Train Loss: 0.3213, Train Steps/Sec: 17.33, Grad Norm: 0.0381 +[2025-02-23 10:12:29] (step=0718000) Train Loss: 0.3214, Train Steps/Sec: 17.29, Grad Norm: 0.0373 +[2025-02-23 10:12:36] (step=0718100) Train Loss: 0.3216, Train Steps/Sec: 14.37, Grad Norm: 0.0347 +[2025-02-23 10:12:41] (step=0718200) Train Loss: 0.3217, Train Steps/Sec: 17.30, Grad Norm: 0.0385 +[2025-02-23 10:12:47] (step=0718300) Train Loss: 0.3216, Train Steps/Sec: 17.35, Grad Norm: 0.0385 +[2025-02-23 10:12:53] (step=0718400) Train Loss: 0.3219, Train Steps/Sec: 17.28, Grad Norm: 0.0393 +[2025-02-23 10:12:59] (step=0718500) Train Loss: 0.3215, Train Steps/Sec: 17.34, Grad Norm: 0.0369 +[2025-02-23 10:13:04] (step=0718600) Train Loss: 0.3219, Train Steps/Sec: 17.33, Grad Norm: 0.0362 +[2025-02-23 10:13:10] (step=0718700) Train Loss: 0.3213, Train Steps/Sec: 17.35, Grad Norm: 0.0393 +[2025-02-23 10:13:16] (step=0718800) Train Loss: 0.3219, Train Steps/Sec: 17.22, Grad Norm: 0.0374 +[2025-02-23 10:13:22] (step=0718900) Train Loss: 0.3220, Train Steps/Sec: 17.23, Grad Norm: 0.0389 +[2025-02-23 10:13:28] (step=0719000) Train Loss: 0.3206, Train Steps/Sec: 15.11, Grad Norm: 0.0413 +[2025-02-23 10:13:34] (step=0719100) Train Loss: 0.3212, Train Steps/Sec: 17.29, Grad Norm: 0.0360 +[2025-02-23 10:13:41] (step=0719200) Train Loss: 0.3215, Train Steps/Sec: 15.30, Grad Norm: 0.0371 +[2025-02-23 10:13:48] (step=0719300) Train Loss: 0.3222, Train Steps/Sec: 14.62, Grad Norm: 0.0340 +[2025-02-23 10:13:56] (step=0719400) Train Loss: 0.3216, Train Steps/Sec: 12.69, Grad Norm: 0.0362 +[2025-02-23 10:14:01] (step=0719500) Train Loss: 0.3222, Train Steps/Sec: 17.32, Grad Norm: 0.0348 +[2025-02-23 10:14:07] (step=0719600) Train Loss: 0.3217, Train Steps/Sec: 16.60, Grad Norm: 0.0338 +[2025-02-23 10:14:13] (step=0719700) Train Loss: 0.3219, Train Steps/Sec: 16.63, Grad Norm: 0.0367 +[2025-02-23 10:14:19] (step=0719800) Train Loss: 0.3213, Train Steps/Sec: 17.43, Grad Norm: 0.0360 +[2025-02-23 10:14:25] (step=0719900) Train Loss: 0.3218, Train Steps/Sec: 17.46, Grad Norm: 0.0343 +[2025-02-23 10:14:31] (step=0720000) Train Loss: 0.3216, Train Steps/Sec: 17.41, Grad Norm: 0.0362 +[2025-02-23 10:14:36] (step=0720100) Train Loss: 0.3222, Train Steps/Sec: 17.29, Grad Norm: 0.0375 +[2025-02-23 10:14:42] (step=0720200) Train Loss: 0.3212, Train Steps/Sec: 17.31, Grad Norm: 0.0357 +[2025-02-23 10:14:48] (step=0720300) Train Loss: 0.3213, Train Steps/Sec: 17.40, Grad Norm: 0.0387 +[2025-02-23 10:14:54] (step=0720400) Train Loss: 0.3215, Train Steps/Sec: 17.41, Grad Norm: 0.0387 +[2025-02-23 10:14:59] (step=0720500) Train Loss: 0.3216, Train Steps/Sec: 17.37, Grad Norm: 0.0377 +[2025-02-23 10:15:06] (step=0720600) Train Loss: 0.3216, Train Steps/Sec: 14.43, Grad Norm: 0.0380 +[2025-02-23 10:15:12] (step=0720700) Train Loss: 0.3216, Train Steps/Sec: 17.38, Grad Norm: 0.0378 +[2025-02-23 10:15:18] (step=0720800) Train Loss: 0.3220, Train Steps/Sec: 17.32, Grad Norm: 0.0353 +[2025-02-23 10:15:24] (step=0720900) Train Loss: 0.3216, Train Steps/Sec: 17.30, Grad Norm: 0.0379 +[2025-02-23 10:15:30] (step=0721000) Train Loss: 0.3212, Train Steps/Sec: 15.17, Grad Norm: 0.0350 +[2025-02-23 10:15:36] (step=0721100) Train Loss: 0.3216, Train Steps/Sec: 17.36, Grad Norm: 0.0373 +[2025-02-23 10:15:42] (step=0721200) Train Loss: 0.3214, Train Steps/Sec: 15.91, Grad Norm: 0.0385 +[2025-02-23 10:15:49] (step=0721300) Train Loss: 0.3215, Train Steps/Sec: 14.64, Grad Norm: 0.0373 +[2025-02-23 10:15:56] (step=0721400) Train Loss: 0.3216, Train Steps/Sec: 15.36, Grad Norm: 0.0404 +[2025-02-23 10:16:01] (step=0721500) Train Loss: 0.3217, Train Steps/Sec: 17.34, Grad Norm: 0.0340 +[2025-02-23 10:16:07] (step=0721600) Train Loss: 0.3216, Train Steps/Sec: 16.51, Grad Norm: 0.0405 +[2025-02-23 10:16:13] (step=0721700) Train Loss: 0.3215, Train Steps/Sec: 16.52, Grad Norm: 0.0364 +[2025-02-23 10:16:19] (step=0721800) Train Loss: 0.3217, Train Steps/Sec: 17.25, Grad Norm: 0.0334 +[2025-02-23 10:16:26] (step=0721900) Train Loss: 0.3218, Train Steps/Sec: 14.41, Grad Norm: 0.0369 +[2025-02-23 10:16:32] (step=0722000) Train Loss: 0.3222, Train Steps/Sec: 17.30, Grad Norm: 0.0347 +[2025-02-23 10:16:38] (step=0722100) Train Loss: 0.3219, Train Steps/Sec: 17.34, Grad Norm: 0.0366 +[2025-02-23 10:16:44] (step=0722200) Train Loss: 0.3220, Train Steps/Sec: 17.39, Grad Norm: 0.0333 +[2025-02-23 10:16:49] (step=0722300) Train Loss: 0.3218, Train Steps/Sec: 17.41, Grad Norm: 0.0346 +[2025-02-23 10:16:55] (step=0722400) Train Loss: 0.3223, Train Steps/Sec: 17.42, Grad Norm: 0.0454 +[2025-02-23 10:17:01] (step=0722500) Train Loss: 0.3213, Train Steps/Sec: 17.40, Grad Norm: 0.0341 +[2025-02-23 10:17:06] (step=0722600) Train Loss: 0.3219, Train Steps/Sec: 17.43, Grad Norm: 0.0385 +[2025-02-23 10:17:12] (step=0722700) Train Loss: 0.3212, Train Steps/Sec: 17.46, Grad Norm: 0.0360 +[2025-02-23 10:17:18] (step=0722800) Train Loss: 0.3218, Train Steps/Sec: 17.38, Grad Norm: 0.0346 +[2025-02-23 10:17:24] (step=0722900) Train Loss: 0.3218, Train Steps/Sec: 17.34, Grad Norm: 0.0369 +[2025-02-23 10:17:30] (step=0723000) Train Loss: 0.3216, Train Steps/Sec: 15.18, Grad Norm: 0.0391 +[2025-02-23 10:17:37] (step=0723100) Train Loss: 0.3210, Train Steps/Sec: 14.32, Grad Norm: 0.0379 +[2025-02-23 10:17:45] (step=0723200) Train Loss: 0.3219, Train Steps/Sec: 13.42, Grad Norm: 0.0361 +[2025-02-23 10:17:52] (step=0723300) Train Loss: 0.3219, Train Steps/Sec: 14.03, Grad Norm: 0.0359 +[2025-02-23 10:17:58] (step=0723400) Train Loss: 0.3217, Train Steps/Sec: 17.12, Grad Norm: 0.0361 +[2025-02-23 10:18:04] (step=0723500) Train Loss: 0.3212, Train Steps/Sec: 16.36, Grad Norm: 0.0390 +[2025-02-23 10:18:10] (step=0723600) Train Loss: 0.3216, Train Steps/Sec: 17.14, Grad Norm: 0.0376 +[2025-02-23 10:18:16] (step=0723700) Train Loss: 0.3217, Train Steps/Sec: 16.36, Grad Norm: 0.0327 +[2025-02-23 10:18:22] (step=0723800) Train Loss: 0.3211, Train Steps/Sec: 17.12, Grad Norm: 0.0400 +[2025-02-23 10:18:27] (step=0723900) Train Loss: 0.3212, Train Steps/Sec: 17.19, Grad Norm: 0.0391 +[2025-02-23 10:18:33] (step=0724000) Train Loss: 0.3214, Train Steps/Sec: 17.20, Grad Norm: 0.0361 +[2025-02-23 10:18:39] (step=0724100) Train Loss: 0.3222, Train Steps/Sec: 17.20, Grad Norm: 0.0388 +[2025-02-23 10:18:45] (step=0724200) Train Loss: 0.3215, Train Steps/Sec: 17.21, Grad Norm: 0.0354 +[2025-02-23 10:18:51] (step=0724300) Train Loss: 0.3215, Train Steps/Sec: 17.10, Grad Norm: 0.0371 +[2025-02-23 10:18:58] (step=0724400) Train Loss: 0.3219, Train Steps/Sec: 14.35, Grad Norm: 0.0357 +[2025-02-23 10:19:04] (step=0724500) Train Loss: 0.3214, Train Steps/Sec: 17.23, Grad Norm: 0.0374 +[2025-02-23 10:19:09] (step=0724600) Train Loss: 0.3211, Train Steps/Sec: 17.12, Grad Norm: 0.0346 +[2025-02-23 10:19:15] (step=0724700) Train Loss: 0.3219, Train Steps/Sec: 17.23, Grad Norm: 0.0336 +[2025-02-23 10:19:21] (step=0724800) Train Loss: 0.3219, Train Steps/Sec: 17.23, Grad Norm: 0.0352 +[2025-02-23 10:19:27] (step=0724900) Train Loss: 0.3214, Train Steps/Sec: 17.34, Grad Norm: 0.0338 +[2025-02-23 10:19:33] (step=0725000) Train Loss: 0.3213, Train Steps/Sec: 15.23, Grad Norm: 0.0356 +[2025-02-23 10:19:39] (step=0725100) Train Loss: 0.3213, Train Steps/Sec: 16.60, Grad Norm: 0.0333 +[2025-02-23 10:19:47] (step=0725200) Train Loss: 0.3216, Train Steps/Sec: 13.59, Grad Norm: 0.0358 +[2025-02-23 10:19:53] (step=0725300) Train Loss: 0.3217, Train Steps/Sec: 15.41, Grad Norm: 0.0358 +[2025-02-23 10:19:59] (step=0725400) Train Loss: 0.3220, Train Steps/Sec: 16.61, Grad Norm: 0.0362 +[2025-02-23 10:20:05] (step=0725500) Train Loss: 0.3220, Train Steps/Sec: 16.60, Grad Norm: 0.0399 +[2025-02-23 10:20:12] (step=0725600) Train Loss: 0.3215, Train Steps/Sec: 14.34, Grad Norm: 0.0403 +[2025-02-23 10:20:18] (step=0725700) Train Loss: 0.3221, Train Steps/Sec: 16.39, Grad Norm: 0.0378 +[2025-02-23 10:20:24] (step=0725800) Train Loss: 0.3216, Train Steps/Sec: 17.19, Grad Norm: 0.0350 +[2025-02-23 10:20:30] (step=0725900) Train Loss: 0.3218, Train Steps/Sec: 17.14, Grad Norm: 0.0384 +[2025-02-23 10:20:36] (step=0726000) Train Loss: 0.3222, Train Steps/Sec: 17.20, Grad Norm: 0.0359 +[2025-02-23 10:20:42] (step=0726100) Train Loss: 0.3210, Train Steps/Sec: 17.20, Grad Norm: 0.0405 +[2025-02-23 10:20:47] (step=0726200) Train Loss: 0.3207, Train Steps/Sec: 17.19, Grad Norm: 0.0349 +[2025-02-23 10:20:53] (step=0726300) Train Loss: 0.3223, Train Steps/Sec: 17.26, Grad Norm: 0.0384 +[2025-02-23 10:20:59] (step=0726400) Train Loss: 0.3214, Train Steps/Sec: 17.40, Grad Norm: 0.0424 +[2025-02-23 10:21:05] (step=0726500) Train Loss: 0.3217, Train Steps/Sec: 17.31, Grad Norm: 0.0353 +[2025-02-23 10:21:11] (step=0726600) Train Loss: 0.3219, Train Steps/Sec: 17.31, Grad Norm: 0.0376 +[2025-02-23 10:21:16] (step=0726700) Train Loss: 0.3215, Train Steps/Sec: 17.24, Grad Norm: 0.0358 +[2025-02-23 10:21:22] (step=0726800) Train Loss: 0.3219, Train Steps/Sec: 17.26, Grad Norm: 0.0367 +[2025-02-23 10:21:29] (step=0726900) Train Loss: 0.3212, Train Steps/Sec: 14.56, Grad Norm: 0.0392 +[2025-02-23 10:21:36] (step=0727000) Train Loss: 0.3218, Train Steps/Sec: 15.19, Grad Norm: 0.0353 +[2025-02-23 10:21:42] (step=0727100) Train Loss: 0.3213, Train Steps/Sec: 15.23, Grad Norm: 0.0345 +[2025-02-23 10:21:50] (step=0727200) Train Loss: 0.3211, Train Steps/Sec: 13.30, Grad Norm: 0.0402 +[2025-02-23 10:21:55] (step=0727300) Train Loss: 0.3215, Train Steps/Sec: 17.38, Grad Norm: 0.0432 +[2025-02-23 10:22:02] (step=0727400) Train Loss: 0.3211, Train Steps/Sec: 15.82, Grad Norm: 0.0355 +[2025-02-23 10:22:08] (step=0727500) Train Loss: 0.3212, Train Steps/Sec: 17.28, Grad Norm: 0.0367 +[2025-02-23 10:22:13] (step=0727600) Train Loss: 0.3213, Train Steps/Sec: 17.28, Grad Norm: 0.0386 +[2025-02-23 10:22:19] (step=0727700) Train Loss: 0.3216, Train Steps/Sec: 16.55, Grad Norm: 0.0359 +[2025-02-23 10:22:25] (step=0727800) Train Loss: 0.3214, Train Steps/Sec: 17.46, Grad Norm: 0.0386 +[2025-02-23 10:22:31] (step=0727900) Train Loss: 0.3213, Train Steps/Sec: 17.40, Grad Norm: 0.0366 +[2025-02-23 10:22:37] (step=0728000) Train Loss: 0.3217, Train Steps/Sec: 17.41, Grad Norm: 0.0383 +[2025-02-23 10:22:44] (step=0728100) Train Loss: 0.3213, Train Steps/Sec: 14.49, Grad Norm: 0.0391 +[2025-02-23 10:22:49] (step=0728200) Train Loss: 0.3214, Train Steps/Sec: 17.20, Grad Norm: 0.0374 +[2025-02-23 10:22:55] (step=0728300) Train Loss: 0.3219, Train Steps/Sec: 17.23, Grad Norm: 0.0407 +[2025-02-23 10:23:01] (step=0728400) Train Loss: 0.3218, Train Steps/Sec: 17.38, Grad Norm: 0.0368 +[2025-02-23 10:23:07] (step=0728500) Train Loss: 0.3215, Train Steps/Sec: 17.32, Grad Norm: 0.0359 +[2025-02-23 10:23:12] (step=0728600) Train Loss: 0.3215, Train Steps/Sec: 17.30, Grad Norm: 0.0394 +[2025-02-23 10:23:18] (step=0728700) Train Loss: 0.3217, Train Steps/Sec: 17.32, Grad Norm: 0.0366 +[2025-02-23 10:23:24] (step=0728800) Train Loss: 0.3216, Train Steps/Sec: 17.37, Grad Norm: 0.0376 +[2025-02-23 10:23:30] (step=0728900) Train Loss: 0.3215, Train Steps/Sec: 17.36, Grad Norm: 0.0403 +[2025-02-23 10:23:37] (step=0729000) Train Loss: 0.3219, Train Steps/Sec: 14.02, Grad Norm: 0.0359 +[2025-02-23 10:23:43] (step=0729100) Train Loss: 0.3214, Train Steps/Sec: 15.81, Grad Norm: 0.0386 +[2025-02-23 10:23:51] (step=0729200) Train Loss: 0.3211, Train Steps/Sec: 13.50, Grad Norm: 0.0381 +[2025-02-23 10:23:57] (step=0729300) Train Loss: 0.3212, Train Steps/Sec: 16.46, Grad Norm: 0.0359 +[2025-02-23 10:24:04] (step=0729400) Train Loss: 0.3211, Train Steps/Sec: 13.93, Grad Norm: 0.0379 +[2025-02-23 10:24:10] (step=0729500) Train Loss: 0.3216, Train Steps/Sec: 17.32, Grad Norm: 0.0366 +[2025-02-23 10:24:15] (step=0729600) Train Loss: 0.3212, Train Steps/Sec: 17.30, Grad Norm: 0.0397 +[2025-02-23 10:24:21] (step=0729700) Train Loss: 0.3217, Train Steps/Sec: 16.56, Grad Norm: 0.0357 +[2025-02-23 10:24:27] (step=0729800) Train Loss: 0.3213, Train Steps/Sec: 17.34, Grad Norm: 0.0328 +[2025-02-23 10:24:33] (step=0729900) Train Loss: 0.3221, Train Steps/Sec: 17.32, Grad Norm: 0.0385 +[2025-02-23 10:24:39] (step=0730000) Train Loss: 0.3221, Train Steps/Sec: 17.34, Grad Norm: 0.0338 +[2025-02-23 10:24:45] (step=0730100) Train Loss: 0.3220, Train Steps/Sec: 17.37, Grad Norm: 0.0376 +[2025-02-23 10:24:50] (step=0730200) Train Loss: 0.3212, Train Steps/Sec: 17.39, Grad Norm: 0.0399 +[2025-02-23 10:24:56] (step=0730300) Train Loss: 0.3218, Train Steps/Sec: 17.38, Grad Norm: 0.0400 +[2025-02-23 10:25:02] (step=0730400) Train Loss: 0.3218, Train Steps/Sec: 17.16, Grad Norm: 0.0362 +[2025-02-23 10:25:08] (step=0730500) Train Loss: 0.3219, Train Steps/Sec: 17.28, Grad Norm: 0.0374 +[2025-02-23 10:25:15] (step=0730600) Train Loss: 0.3215, Train Steps/Sec: 14.33, Grad Norm: 0.0386 +[2025-02-23 10:25:20] (step=0730700) Train Loss: 0.3220, Train Steps/Sec: 17.22, Grad Norm: 0.0349 +[2025-02-23 10:25:26] (step=0730800) Train Loss: 0.3212, Train Steps/Sec: 17.26, Grad Norm: 0.0358 +[2025-02-23 10:25:32] (step=0730900) Train Loss: 0.3218, Train Steps/Sec: 17.27, Grad Norm: 0.0398 +[2025-02-23 10:25:39] (step=0731000) Train Loss: 0.3221, Train Steps/Sec: 13.66, Grad Norm: 0.0340 +[2025-02-23 10:25:46] (step=0731100) Train Loss: 0.3218, Train Steps/Sec: 14.76, Grad Norm: 0.0381 +[2025-02-23 10:25:53] (step=0731200) Train Loss: 0.3216, Train Steps/Sec: 15.33, Grad Norm: 0.0370 +[2025-02-23 10:25:59] (step=0731300) Train Loss: 0.3221, Train Steps/Sec: 16.63, Grad Norm: 0.0369 +[2025-02-23 10:26:05] (step=0731400) Train Loss: 0.3209, Train Steps/Sec: 16.59, Grad Norm: 0.0401 +[2025-02-23 10:26:10] (step=0731500) Train Loss: 0.3213, Train Steps/Sec: 17.43, Grad Norm: 0.0384 +[2025-02-23 10:26:16] (step=0731600) Train Loss: 0.3222, Train Steps/Sec: 17.41, Grad Norm: 0.0432 +[2025-02-23 10:26:22] (step=0731700) Train Loss: 0.3218, Train Steps/Sec: 16.53, Grad Norm: 0.0325 +[2025-02-23 10:26:28] (step=0731800) Train Loss: 0.3210, Train Steps/Sec: 17.33, Grad Norm: 0.0378 +[2025-02-23 10:26:35] (step=0731900) Train Loss: 0.3214, Train Steps/Sec: 14.42, Grad Norm: 0.0359 +[2025-02-23 10:26:41] (step=0732000) Train Loss: 0.3218, Train Steps/Sec: 17.42, Grad Norm: 0.0382 +[2025-02-23 10:26:46] (step=0732100) Train Loss: 0.3212, Train Steps/Sec: 17.40, Grad Norm: 0.0390 +[2025-02-23 10:26:52] (step=0732200) Train Loss: 0.3211, Train Steps/Sec: 17.42, Grad Norm: 0.0356 +[2025-02-23 10:26:58] (step=0732300) Train Loss: 0.3213, Train Steps/Sec: 17.39, Grad Norm: 0.0335 +[2025-02-23 10:27:04] (step=0732400) Train Loss: 0.3219, Train Steps/Sec: 17.42, Grad Norm: 0.0367 +[2025-02-23 10:27:09] (step=0732500) Train Loss: 0.3217, Train Steps/Sec: 17.43, Grad Norm: 0.0369 +[2025-02-23 10:27:15] (step=0732600) Train Loss: 0.3218, Train Steps/Sec: 17.46, Grad Norm: 0.0354 +[2025-02-23 10:27:21] (step=0732700) Train Loss: 0.3211, Train Steps/Sec: 17.39, Grad Norm: 0.0338 +[2025-02-23 10:27:27] (step=0732800) Train Loss: 0.3208, Train Steps/Sec: 17.39, Grad Norm: 0.0381 +[2025-02-23 10:27:33] (step=0732900) Train Loss: 0.3217, Train Steps/Sec: 15.99, Grad Norm: 0.0446 +[2025-02-23 10:27:40] (step=0733000) Train Loss: 0.3212, Train Steps/Sec: 14.19, Grad Norm: 0.0341 +[2025-02-23 10:27:48] (step=0733100) Train Loss: 0.3226, Train Steps/Sec: 13.26, Grad Norm: 0.0353 +[2025-02-23 10:27:54] (step=0733200) Train Loss: 0.3212, Train Steps/Sec: 14.53, Grad Norm: 0.0336 +[2025-02-23 10:28:00] (step=0733300) Train Loss: 0.3213, Train Steps/Sec: 17.25, Grad Norm: 0.0368 +[2025-02-23 10:28:06] (step=0733400) Train Loss: 0.3212, Train Steps/Sec: 16.51, Grad Norm: 0.0362 +[2025-02-23 10:28:12] (step=0733500) Train Loss: 0.3217, Train Steps/Sec: 17.24, Grad Norm: 0.0385 +[2025-02-23 10:28:18] (step=0733600) Train Loss: 0.3218, Train Steps/Sec: 17.23, Grad Norm: 0.0338 +[2025-02-23 10:28:24] (step=0733700) Train Loss: 0.3220, Train Steps/Sec: 16.42, Grad Norm: 0.0338 +[2025-02-23 10:28:30] (step=0733800) Train Loss: 0.3212, Train Steps/Sec: 17.25, Grad Norm: 0.0355 +[2025-02-23 10:28:36] (step=0733900) Train Loss: 0.3210, Train Steps/Sec: 17.15, Grad Norm: 0.0356 +[2025-02-23 10:28:41] (step=0734000) Train Loss: 0.3215, Train Steps/Sec: 17.17, Grad Norm: 0.0351 +[2025-02-23 10:28:47] (step=0734100) Train Loss: 0.3208, Train Steps/Sec: 17.15, Grad Norm: 0.0327 +[2025-02-23 10:28:53] (step=0734200) Train Loss: 0.3214, Train Steps/Sec: 17.21, Grad Norm: 0.0374 +[2025-02-23 10:28:59] (step=0734300) Train Loss: 0.3214, Train Steps/Sec: 17.22, Grad Norm: 0.0402 +[2025-02-23 10:29:06] (step=0734400) Train Loss: 0.3216, Train Steps/Sec: 14.22, Grad Norm: 0.0340 +[2025-02-23 10:29:12] (step=0734500) Train Loss: 0.3218, Train Steps/Sec: 17.00, Grad Norm: 0.0369 +[2025-02-23 10:29:18] (step=0734600) Train Loss: 0.3210, Train Steps/Sec: 17.27, Grad Norm: 0.0367 +[2025-02-23 10:29:23] (step=0734700) Train Loss: 0.3215, Train Steps/Sec: 17.29, Grad Norm: 0.0361 +[2025-02-23 10:29:29] (step=0734800) Train Loss: 0.3216, Train Steps/Sec: 16.59, Grad Norm: 0.0385 +[2025-02-23 10:29:36] (step=0734900) Train Loss: 0.3213, Train Steps/Sec: 15.92, Grad Norm: 0.0354 +[2025-02-23 10:29:43] (step=0735000) Train Loss: 0.3212, Train Steps/Sec: 13.58, Grad Norm: 0.0422 +[2025-02-23 10:29:49] (step=0735100) Train Loss: 0.3214, Train Steps/Sec: 16.50, Grad Norm: 0.0366 +[2025-02-23 10:29:56] (step=0735200) Train Loss: 0.3224, Train Steps/Sec: 14.53, Grad Norm: 0.0366 +[2025-02-23 10:30:02] (step=0735300) Train Loss: 0.3216, Train Steps/Sec: 17.20, Grad Norm: 0.0357 +[2025-02-23 10:30:08] (step=0735400) Train Loss: 0.3213, Train Steps/Sec: 16.37, Grad Norm: 0.0379 +[2025-02-23 10:30:14] (step=0735500) Train Loss: 0.3216, Train Steps/Sec: 17.02, Grad Norm: 0.0396 +[2025-02-23 10:30:21] (step=0735600) Train Loss: 0.3217, Train Steps/Sec: 14.31, Grad Norm: 0.0345 +[2025-02-23 10:30:27] (step=0735700) Train Loss: 0.3213, Train Steps/Sec: 16.45, Grad Norm: 0.0395 +[2025-02-23 10:30:33] (step=0735800) Train Loss: 0.3211, Train Steps/Sec: 17.32, Grad Norm: 0.0367 +[2025-02-23 10:30:38] (step=0735900) Train Loss: 0.3216, Train Steps/Sec: 17.32, Grad Norm: 0.0389 +[2025-02-23 10:30:44] (step=0736000) Train Loss: 0.3216, Train Steps/Sec: 17.29, Grad Norm: 0.0369 +[2025-02-23 10:30:50] (step=0736100) Train Loss: 0.3222, Train Steps/Sec: 17.41, Grad Norm: 0.0333 +[2025-02-23 10:30:56] (step=0736200) Train Loss: 0.3220, Train Steps/Sec: 17.40, Grad Norm: 0.0381 +[2025-02-23 10:31:01] (step=0736300) Train Loss: 0.3216, Train Steps/Sec: 17.33, Grad Norm: 0.0355 +[2025-02-23 10:31:07] (step=0736400) Train Loss: 0.3217, Train Steps/Sec: 17.34, Grad Norm: 0.0372 +[2025-02-23 10:31:13] (step=0736500) Train Loss: 0.3212, Train Steps/Sec: 17.25, Grad Norm: 0.0401 +[2025-02-23 10:31:19] (step=0736600) Train Loss: 0.3216, Train Steps/Sec: 17.22, Grad Norm: 0.0406 +[2025-02-23 10:31:25] (step=0736700) Train Loss: 0.3216, Train Steps/Sec: 17.22, Grad Norm: 0.0356 +[2025-02-23 10:31:31] (step=0736800) Train Loss: 0.3216, Train Steps/Sec: 15.11, Grad Norm: 0.0340 +[2025-02-23 10:31:39] (step=0736900) Train Loss: 0.3210, Train Steps/Sec: 13.03, Grad Norm: 0.0404 +[2025-02-23 10:31:46] (step=0737000) Train Loss: 0.3216, Train Steps/Sec: 15.22, Grad Norm: 0.0389 +[2025-02-23 10:31:52] (step=0737100) Train Loss: 0.3218, Train Steps/Sec: 15.82, Grad Norm: 0.0344 +[2025-02-23 10:31:58] (step=0737200) Train Loss: 0.3215, Train Steps/Sec: 15.24, Grad Norm: 0.0350 +[2025-02-23 10:32:04] (step=0737300) Train Loss: 0.3215, Train Steps/Sec: 17.38, Grad Norm: 0.0360 +[2025-02-23 10:32:10] (step=0737400) Train Loss: 0.3217, Train Steps/Sec: 16.61, Grad Norm: 0.0397 +[2025-02-23 10:32:16] (step=0737500) Train Loss: 0.3218, Train Steps/Sec: 17.40, Grad Norm: 0.0378 +[2025-02-23 10:32:22] (step=0737600) Train Loss: 0.3216, Train Steps/Sec: 17.23, Grad Norm: 0.0386 +[2025-02-23 10:32:28] (step=0737700) Train Loss: 0.3215, Train Steps/Sec: 16.46, Grad Norm: 0.0341 +[2025-02-23 10:32:34] (step=0737800) Train Loss: 0.3217, Train Steps/Sec: 17.22, Grad Norm: 0.0350 +[2025-02-23 10:32:39] (step=0737900) Train Loss: 0.3214, Train Steps/Sec: 17.26, Grad Norm: 0.0359 +[2025-02-23 10:32:45] (step=0738000) Train Loss: 0.3219, Train Steps/Sec: 17.20, Grad Norm: 0.0346 +[2025-02-23 10:32:52] (step=0738100) Train Loss: 0.3219, Train Steps/Sec: 14.41, Grad Norm: 0.0388 +[2025-02-23 10:32:58] (step=0738200) Train Loss: 0.3211, Train Steps/Sec: 17.24, Grad Norm: 0.0383 +[2025-02-23 10:33:04] (step=0738300) Train Loss: 0.3219, Train Steps/Sec: 17.19, Grad Norm: 0.0323 +[2025-02-23 10:33:10] (step=0738400) Train Loss: 0.3220, Train Steps/Sec: 17.32, Grad Norm: 0.0337 +[2025-02-23 10:33:15] (step=0738500) Train Loss: 0.3220, Train Steps/Sec: 17.32, Grad Norm: 0.0376 +[2025-02-23 10:33:21] (step=0738600) Train Loss: 0.3215, Train Steps/Sec: 17.38, Grad Norm: 0.0400 +[2025-02-23 10:33:27] (step=0738700) Train Loss: 0.3213, Train Steps/Sec: 16.72, Grad Norm: 0.0337 +[2025-02-23 10:33:34] (step=0738800) Train Loss: 0.3217, Train Steps/Sec: 15.36, Grad Norm: 0.0384 +[2025-02-23 10:33:40] (step=0738900) Train Loss: 0.3213, Train Steps/Sec: 15.19, Grad Norm: 0.0360 +[2025-02-23 10:33:46] (step=0739000) Train Loss: 0.3219, Train Steps/Sec: 15.90, Grad Norm: 0.0361 +[2025-02-23 10:33:53] (step=0739100) Train Loss: 0.3213, Train Steps/Sec: 15.88, Grad Norm: 0.0372 +[2025-02-23 10:33:59] (step=0739200) Train Loss: 0.3219, Train Steps/Sec: 15.30, Grad Norm: 0.0359 +[2025-02-23 10:34:05] (step=0739300) Train Loss: 0.3217, Train Steps/Sec: 17.37, Grad Norm: 0.0350 +[2025-02-23 10:34:12] (step=0739400) Train Loss: 0.3213, Train Steps/Sec: 13.94, Grad Norm: 0.0381 +[2025-02-23 10:34:18] (step=0739500) Train Loss: 0.3215, Train Steps/Sec: 17.32, Grad Norm: 0.0385 +[2025-02-23 10:34:24] (step=0739600) Train Loss: 0.3217, Train Steps/Sec: 17.39, Grad Norm: 0.0374 +[2025-02-23 10:34:30] (step=0739700) Train Loss: 0.3213, Train Steps/Sec: 16.55, Grad Norm: 0.0344 +[2025-02-23 10:34:36] (step=0739800) Train Loss: 0.3212, Train Steps/Sec: 17.36, Grad Norm: 0.0325 +[2025-02-23 10:34:41] (step=0739900) Train Loss: 0.3221, Train Steps/Sec: 17.40, Grad Norm: 0.0378 +[2025-02-23 10:34:47] (step=0740000) Train Loss: 0.3214, Train Steps/Sec: 17.38, Grad Norm: 0.0338 +[2025-02-23 10:34:53] (step=0740100) Train Loss: 0.3212, Train Steps/Sec: 17.36, Grad Norm: 0.0389 +[2025-02-23 10:34:59] (step=0740200) Train Loss: 0.3217, Train Steps/Sec: 17.40, Grad Norm: 0.0388 +[2025-02-23 10:35:04] (step=0740300) Train Loss: 0.3217, Train Steps/Sec: 17.39, Grad Norm: 0.0331 +[2025-02-23 10:35:10] (step=0740400) Train Loss: 0.3213, Train Steps/Sec: 17.33, Grad Norm: 0.0349 +[2025-02-23 10:35:16] (step=0740500) Train Loss: 0.3218, Train Steps/Sec: 17.29, Grad Norm: 0.0364 +[2025-02-23 10:35:23] (step=0740600) Train Loss: 0.3209, Train Steps/Sec: 14.46, Grad Norm: 0.0407 +[2025-02-23 10:35:29] (step=0740700) Train Loss: 0.3215, Train Steps/Sec: 15.29, Grad Norm: 0.0373 +[2025-02-23 10:35:36] (step=0740800) Train Loss: 0.3216, Train Steps/Sec: 15.24, Grad Norm: 0.0379 +[2025-02-23 10:35:42] (step=0740900) Train Loss: 0.3212, Train Steps/Sec: 16.52, Grad Norm: 0.0384 +[2025-02-23 10:35:49] (step=0741000) Train Loss: 0.3211, Train Steps/Sec: 15.17, Grad Norm: 0.0374 +[2025-02-23 10:35:55] (step=0741100) Train Loss: 0.3218, Train Steps/Sec: 16.47, Grad Norm: 0.0361 +[2025-02-23 10:36:01] (step=0741200) Train Loss: 0.3213, Train Steps/Sec: 15.80, Grad Norm: 0.0371 +[2025-02-23 10:36:07] (step=0741300) Train Loss: 0.3213, Train Steps/Sec: 16.50, Grad Norm: 0.0362 +[2025-02-23 10:36:13] (step=0741400) Train Loss: 0.3211, Train Steps/Sec: 16.43, Grad Norm: 0.0355 +[2025-02-23 10:36:19] (step=0741500) Train Loss: 0.3210, Train Steps/Sec: 17.27, Grad Norm: 0.0355 +[2025-02-23 10:36:25] (step=0741600) Train Loss: 0.3216, Train Steps/Sec: 17.23, Grad Norm: 0.0370 +[2025-02-23 10:36:31] (step=0741700) Train Loss: 0.3213, Train Steps/Sec: 16.43, Grad Norm: 0.0381 +[2025-02-23 10:36:37] (step=0741800) Train Loss: 0.3218, Train Steps/Sec: 17.25, Grad Norm: 0.0374 +[2025-02-23 10:36:43] (step=0741900) Train Loss: 0.3218, Train Steps/Sec: 14.51, Grad Norm: 0.0401 +[2025-02-23 10:36:49] (step=0742000) Train Loss: 0.3206, Train Steps/Sec: 17.26, Grad Norm: 0.0372 +[2025-02-23 10:36:55] (step=0742100) Train Loss: 0.3214, Train Steps/Sec: 17.31, Grad Norm: 0.0356 +[2025-02-23 10:37:01] (step=0742200) Train Loss: 0.3213, Train Steps/Sec: 17.30, Grad Norm: 0.0374 +[2025-02-23 10:37:07] (step=0742300) Train Loss: 0.3213, Train Steps/Sec: 17.25, Grad Norm: 0.0361 +[2025-02-23 10:37:12] (step=0742400) Train Loss: 0.3216, Train Steps/Sec: 17.25, Grad Norm: 0.0353 +[2025-02-23 10:37:18] (step=0742500) Train Loss: 0.3216, Train Steps/Sec: 17.25, Grad Norm: 0.0364 +[2025-02-23 10:37:24] (step=0742600) Train Loss: 0.3219, Train Steps/Sec: 15.95, Grad Norm: 0.0384 +[2025-02-23 10:37:31] (step=0742700) Train Loss: 0.3218, Train Steps/Sec: 15.94, Grad Norm: 0.0366 +[2025-02-23 10:37:37] (step=0742800) Train Loss: 0.3217, Train Steps/Sec: 15.16, Grad Norm: 0.0388 +[2025-02-23 10:37:43] (step=0742900) Train Loss: 0.3212, Train Steps/Sec: 17.25, Grad Norm: 0.0351 +[2025-02-23 10:37:50] (step=0743000) Train Loss: 0.3217, Train Steps/Sec: 15.26, Grad Norm: 0.0360 +[2025-02-23 10:37:57] (step=0743100) Train Loss: 0.3216, Train Steps/Sec: 13.97, Grad Norm: 0.0381 +[2025-02-23 10:38:03] (step=0743200) Train Loss: 0.3213, Train Steps/Sec: 15.79, Grad Norm: 0.0367 +[2025-02-23 10:38:09] (step=0743300) Train Loss: 0.3213, Train Steps/Sec: 16.55, Grad Norm: 0.0361 +[2025-02-23 10:38:15] (step=0743400) Train Loss: 0.3212, Train Steps/Sec: 16.52, Grad Norm: 0.0357 +[2025-02-23 10:38:21] (step=0743500) Train Loss: 0.3207, Train Steps/Sec: 17.25, Grad Norm: 0.0394 +[2025-02-23 10:38:27] (step=0743600) Train Loss: 0.3212, Train Steps/Sec: 17.31, Grad Norm: 0.0375 +[2025-02-23 10:38:33] (step=0743700) Train Loss: 0.3213, Train Steps/Sec: 16.47, Grad Norm: 0.0401 +[2025-02-23 10:38:39] (step=0743800) Train Loss: 0.3214, Train Steps/Sec: 17.33, Grad Norm: 0.0373 +[2025-02-23 10:38:44] (step=0743900) Train Loss: 0.3214, Train Steps/Sec: 17.32, Grad Norm: 0.0354 +[2025-02-23 10:38:50] (step=0744000) Train Loss: 0.3216, Train Steps/Sec: 17.23, Grad Norm: 0.0366 +[2025-02-23 10:38:56] (step=0744100) Train Loss: 0.3210, Train Steps/Sec: 17.25, Grad Norm: 0.0381 +[2025-02-23 10:39:02] (step=0744200) Train Loss: 0.3216, Train Steps/Sec: 17.20, Grad Norm: 0.0376 +[2025-02-23 10:39:08] (step=0744300) Train Loss: 0.3214, Train Steps/Sec: 17.23, Grad Norm: 0.0362 +[2025-02-23 10:39:15] (step=0744400) Train Loss: 0.3216, Train Steps/Sec: 14.37, Grad Norm: 0.0383 +[2025-02-23 10:39:20] (step=0744500) Train Loss: 0.3211, Train Steps/Sec: 17.19, Grad Norm: 0.0389 +[2025-02-23 10:39:27] (step=0744600) Train Loss: 0.3221, Train Steps/Sec: 15.24, Grad Norm: 0.0369 +[2025-02-23 10:39:34] (step=0744700) Train Loss: 0.3214, Train Steps/Sec: 15.17, Grad Norm: 0.0379 +[2025-02-23 10:39:40] (step=0744800) Train Loss: 0.3207, Train Steps/Sec: 16.48, Grad Norm: 0.0437 +[2025-02-23 10:39:46] (step=0744900) Train Loss: 0.3212, Train Steps/Sec: 16.49, Grad Norm: 0.0354 +[2025-02-23 10:39:52] (step=0745000) Train Loss: 0.3220, Train Steps/Sec: 15.79, Grad Norm: 0.0348 +[2025-02-23 10:39:58] (step=0745100) Train Loss: 0.3221, Train Steps/Sec: 16.39, Grad Norm: 0.0347 +[2025-02-23 10:40:05] (step=0745200) Train Loss: 0.3211, Train Steps/Sec: 15.68, Grad Norm: 0.0410 +[2025-02-23 10:40:11] (step=0745300) Train Loss: 0.3228, Train Steps/Sec: 16.37, Grad Norm: 0.0360 +[2025-02-23 10:40:17] (step=0745400) Train Loss: 0.3216, Train Steps/Sec: 16.38, Grad Norm: 0.0359 +[2025-02-23 10:40:23] (step=0745500) Train Loss: 0.3211, Train Steps/Sec: 17.11, Grad Norm: 0.0340 +[2025-02-23 10:40:30] (step=0745600) Train Loss: 0.3213, Train Steps/Sec: 14.40, Grad Norm: 0.0325 +[2025-02-23 10:40:36] (step=0745700) Train Loss: 0.3220, Train Steps/Sec: 16.38, Grad Norm: 0.0366 +[2025-02-23 10:40:42] (step=0745800) Train Loss: 0.3212, Train Steps/Sec: 17.26, Grad Norm: 0.0341 +[2025-02-23 10:40:47] (step=0745900) Train Loss: 0.3217, Train Steps/Sec: 17.26, Grad Norm: 0.0357 +[2025-02-23 10:40:53] (step=0746000) Train Loss: 0.3218, Train Steps/Sec: 17.31, Grad Norm: 0.0330 +[2025-02-23 10:40:59] (step=0746100) Train Loss: 0.3216, Train Steps/Sec: 17.37, Grad Norm: 0.0348 +[2025-02-23 10:41:05] (step=0746200) Train Loss: 0.3214, Train Steps/Sec: 17.35, Grad Norm: 0.0369 +[2025-02-23 10:41:10] (step=0746300) Train Loss: 0.3211, Train Steps/Sec: 17.38, Grad Norm: 0.0406 +[2025-02-23 10:41:16] (step=0746400) Train Loss: 0.3214, Train Steps/Sec: 17.42, Grad Norm: 0.0390 +[2025-02-23 10:41:22] (step=0746500) Train Loss: 0.3221, Train Steps/Sec: 15.99, Grad Norm: 0.0344 +[2025-02-23 10:41:29] (step=0746600) Train Loss: 0.3213, Train Steps/Sec: 15.32, Grad Norm: 0.0334 +[2025-02-23 10:41:35] (step=0746700) Train Loss: 0.3211, Train Steps/Sec: 15.85, Grad Norm: 0.0383 +[2025-02-23 10:41:41] (step=0746800) Train Loss: 0.3212, Train Steps/Sec: 16.59, Grad Norm: 0.0379 +[2025-02-23 10:41:48] (step=0746900) Train Loss: 0.3214, Train Steps/Sec: 14.31, Grad Norm: 0.0370 +[2025-02-23 10:41:55] (step=0747000) Train Loss: 0.3211, Train Steps/Sec: 15.80, Grad Norm: 0.0385 +[2025-02-23 10:42:01] (step=0747100) Train Loss: 0.3218, Train Steps/Sec: 16.45, Grad Norm: 0.0360 +[2025-02-23 10:42:07] (step=0747200) Train Loss: 0.3208, Train Steps/Sec: 15.79, Grad Norm: 0.0330 +[2025-02-23 10:42:13] (step=0747300) Train Loss: 0.3214, Train Steps/Sec: 16.50, Grad Norm: 0.0377 +[2025-02-23 10:42:19] (step=0747400) Train Loss: 0.3212, Train Steps/Sec: 16.47, Grad Norm: 0.0380 +[2025-02-23 10:42:25] (step=0747500) Train Loss: 0.3214, Train Steps/Sec: 17.12, Grad Norm: 0.0341 +[2025-02-23 10:42:31] (step=0747600) Train Loss: 0.3218, Train Steps/Sec: 17.11, Grad Norm: 0.0370 +[2025-02-23 10:42:37] (step=0747700) Train Loss: 0.3221, Train Steps/Sec: 16.35, Grad Norm: 0.0366 +[2025-02-23 10:42:43] (step=0747800) Train Loss: 0.3212, Train Steps/Sec: 17.12, Grad Norm: 0.0362 +[2025-02-23 10:42:49] (step=0747900) Train Loss: 0.3213, Train Steps/Sec: 17.11, Grad Norm: 0.0368 +[2025-02-23 10:42:54] (step=0748000) Train Loss: 0.3220, Train Steps/Sec: 17.24, Grad Norm: 0.0359 +[2025-02-23 10:43:01] (step=0748100) Train Loss: 0.3218, Train Steps/Sec: 14.37, Grad Norm: 0.0349 +[2025-02-23 10:43:07] (step=0748200) Train Loss: 0.3216, Train Steps/Sec: 17.14, Grad Norm: 0.0373 +[2025-02-23 10:43:13] (step=0748300) Train Loss: 0.3216, Train Steps/Sec: 17.27, Grad Norm: 0.0367 +[2025-02-23 10:43:19] (step=0748400) Train Loss: 0.3213, Train Steps/Sec: 17.24, Grad Norm: 0.0363 +[2025-02-23 10:43:26] (step=0748500) Train Loss: 0.3219, Train Steps/Sec: 14.68, Grad Norm: 0.0399 +[2025-02-23 10:43:32] (step=0748600) Train Loss: 0.3212, Train Steps/Sec: 15.89, Grad Norm: 0.0321 +[2025-02-23 10:43:38] (step=0748700) Train Loss: 0.3212, Train Steps/Sec: 16.46, Grad Norm: 0.0351 +[2025-02-23 10:43:44] (step=0748800) Train Loss: 0.3213, Train Steps/Sec: 16.50, Grad Norm: 0.0399 +[2025-02-23 10:43:50] (step=0748900) Train Loss: 0.3214, Train Steps/Sec: 17.28, Grad Norm: 0.0399 +[2025-02-23 10:43:56] (step=0749000) Train Loss: 0.3209, Train Steps/Sec: 15.98, Grad Norm: 0.0372 +[2025-02-23 10:44:02] (step=0749100) Train Loss: 0.3218, Train Steps/Sec: 16.55, Grad Norm: 0.0381 +[2025-02-23 10:44:08] (step=0749200) Train Loss: 0.3215, Train Steps/Sec: 15.88, Grad Norm: 0.0351 +[2025-02-23 10:44:14] (step=0749300) Train Loss: 0.3217, Train Steps/Sec: 16.61, Grad Norm: 0.0357 +[2025-02-23 10:44:22] (step=0749400) Train Loss: 0.3216, Train Steps/Sec: 14.07, Grad Norm: 0.0366 +[2025-02-23 10:44:27] (step=0749500) Train Loss: 0.3213, Train Steps/Sec: 17.28, Grad Norm: 0.0388 +[2025-02-23 10:44:33] (step=0749600) Train Loss: 0.3214, Train Steps/Sec: 17.27, Grad Norm: 0.0374 +[2025-02-23 10:44:39] (step=0749700) Train Loss: 0.3216, Train Steps/Sec: 16.54, Grad Norm: 0.0360 +[2025-02-23 10:44:45] (step=0749800) Train Loss: 0.3219, Train Steps/Sec: 17.35, Grad Norm: 0.0321 +[2025-02-23 10:44:51] (step=0749900) Train Loss: 0.3213, Train Steps/Sec: 17.29, Grad Norm: 0.0375 +[2025-02-23 10:44:57] (step=0750000) Train Loss: 0.3210, Train Steps/Sec: 17.31, Grad Norm: 0.0326 +[2025-02-23 10:44:57] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0750000.pt +[2025-02-23 10:45:03] (step=0750100) Train Loss: 0.3218, Train Steps/Sec: 14.99, Grad Norm: 0.0416 +[2025-02-23 10:45:09] (step=0750200) Train Loss: 0.3213, Train Steps/Sec: 17.30, Grad Norm: 0.0383 +[2025-02-23 10:45:15] (step=0750300) Train Loss: 0.3211, Train Steps/Sec: 17.27, Grad Norm: 0.0379 +[2025-02-23 10:45:21] (step=0750400) Train Loss: 0.3216, Train Steps/Sec: 15.93, Grad Norm: 0.0329 +[2025-02-23 10:45:28] (step=0750500) Train Loss: 0.3214, Train Steps/Sec: 15.25, Grad Norm: 0.0355 +[2025-02-23 10:45:34] (step=0750600) Train Loss: 0.3212, Train Steps/Sec: 14.74, Grad Norm: 0.0372 +[2025-02-23 10:45:41] (step=0750700) Train Loss: 0.3214, Train Steps/Sec: 14.78, Grad Norm: 0.0355 +[2025-02-23 10:45:47] (step=0750800) Train Loss: 0.3220, Train Steps/Sec: 17.15, Grad Norm: 0.0379 +[2025-02-23 10:45:53] (step=0750900) Train Loss: 0.3215, Train Steps/Sec: 17.32, Grad Norm: 0.0385 +[2025-02-23 10:45:59] (step=0751000) Train Loss: 0.3214, Train Steps/Sec: 16.66, Grad Norm: 0.0346 +[2025-02-23 10:46:05] (step=0751100) Train Loss: 0.3214, Train Steps/Sec: 15.85, Grad Norm: 0.0384 +[2025-02-23 10:46:11] (step=0751200) Train Loss: 0.3212, Train Steps/Sec: 15.93, Grad Norm: 0.0405 +[2025-02-23 10:46:17] (step=0751300) Train Loss: 0.3214, Train Steps/Sec: 16.67, Grad Norm: 0.0350 +[2025-02-23 10:46:23] (step=0751400) Train Loss: 0.3214, Train Steps/Sec: 16.49, Grad Norm: 0.0373 +[2025-02-23 10:46:29] (step=0751500) Train Loss: 0.3215, Train Steps/Sec: 17.22, Grad Norm: 0.0333 +[2025-02-23 10:46:35] (step=0751600) Train Loss: 0.3219, Train Steps/Sec: 17.31, Grad Norm: 0.0336 +[2025-02-23 10:46:41] (step=0751700) Train Loss: 0.3217, Train Steps/Sec: 17.25, Grad Norm: 0.0379 +[2025-02-23 10:46:47] (step=0751800) Train Loss: 0.3214, Train Steps/Sec: 16.51, Grad Norm: 0.0382 +[2025-02-23 10:46:54] (step=0751900) Train Loss: 0.3210, Train Steps/Sec: 14.44, Grad Norm: 0.0394 +[2025-02-23 10:47:00] (step=0752000) Train Loss: 0.3213, Train Steps/Sec: 17.26, Grad Norm: 0.0374 +[2025-02-23 10:47:05] (step=0752100) Train Loss: 0.3213, Train Steps/Sec: 17.35, Grad Norm: 0.0350 +[2025-02-23 10:47:11] (step=0752200) Train Loss: 0.3216, Train Steps/Sec: 17.35, Grad Norm: 0.0380 +[2025-02-23 10:47:17] (step=0752300) Train Loss: 0.3217, Train Steps/Sec: 17.35, Grad Norm: 0.0354 +[2025-02-23 10:47:24] (step=0752400) Train Loss: 0.3209, Train Steps/Sec: 14.73, Grad Norm: 0.0421 +[2025-02-23 10:47:30] (step=0752500) Train Loss: 0.3212, Train Steps/Sec: 15.86, Grad Norm: 0.0403 +[2025-02-23 10:47:36] (step=0752600) Train Loss: 0.3213, Train Steps/Sec: 16.45, Grad Norm: 0.0369 +[2025-02-23 10:47:42] (step=0752700) Train Loss: 0.3215, Train Steps/Sec: 16.53, Grad Norm: 0.0372 +[2025-02-23 10:47:48] (step=0752800) Train Loss: 0.3213, Train Steps/Sec: 17.34, Grad Norm: 0.0349 +[2025-02-23 10:47:54] (step=0752900) Train Loss: 0.3214, Train Steps/Sec: 17.32, Grad Norm: 0.0385 +[2025-02-23 10:47:59] (step=0753000) Train Loss: 0.3216, Train Steps/Sec: 17.33, Grad Norm: 0.0394 +[2025-02-23 10:48:06] (step=0753100) Train Loss: 0.3213, Train Steps/Sec: 15.11, Grad Norm: 0.0362 +[2025-02-23 10:48:14] (step=0753200) Train Loss: 0.3215, Train Steps/Sec: 13.32, Grad Norm: 0.0356 +[2025-02-23 10:48:20] (step=0753300) Train Loss: 0.3211, Train Steps/Sec: 16.51, Grad Norm: 0.0371 +[2025-02-23 10:48:26] (step=0753400) Train Loss: 0.3217, Train Steps/Sec: 16.49, Grad Norm: 0.0340 +[2025-02-23 10:48:32] (step=0753500) Train Loss: 0.3217, Train Steps/Sec: 17.16, Grad Norm: 0.0365 +[2025-02-23 10:48:37] (step=0753600) Train Loss: 0.3216, Train Steps/Sec: 17.14, Grad Norm: 0.0372 +[2025-02-23 10:48:43] (step=0753700) Train Loss: 0.3216, Train Steps/Sec: 17.21, Grad Norm: 0.0370 +[2025-02-23 10:48:49] (step=0753800) Train Loss: 0.3215, Train Steps/Sec: 16.36, Grad Norm: 0.0376 +[2025-02-23 10:48:55] (step=0753900) Train Loss: 0.3221, Train Steps/Sec: 17.21, Grad Norm: 0.0333 +[2025-02-23 10:49:01] (step=0754000) Train Loss: 0.3213, Train Steps/Sec: 17.13, Grad Norm: 0.0367 +[2025-02-23 10:49:07] (step=0754100) Train Loss: 0.3211, Train Steps/Sec: 17.14, Grad Norm: 0.0384 +[2025-02-23 10:49:13] (step=0754200) Train Loss: 0.3208, Train Steps/Sec: 17.11, Grad Norm: 0.0395 +[2025-02-23 10:49:19] (step=0754300) Train Loss: 0.3215, Train Steps/Sec: 15.80, Grad Norm: 0.0351 +[2025-02-23 10:49:27] (step=0754400) Train Loss: 0.3211, Train Steps/Sec: 12.51, Grad Norm: 0.0361 +[2025-02-23 10:49:33] (step=0754500) Train Loss: 0.3215, Train Steps/Sec: 16.49, Grad Norm: 0.0347 +[2025-02-23 10:49:39] (step=0754600) Train Loss: 0.3212, Train Steps/Sec: 16.49, Grad Norm: 0.0393 +[2025-02-23 10:49:45] (step=0754700) Train Loss: 0.3212, Train Steps/Sec: 17.23, Grad Norm: 0.0377 +[2025-02-23 10:49:51] (step=0754800) Train Loss: 0.3210, Train Steps/Sec: 17.22, Grad Norm: 0.0358 +[2025-02-23 10:49:56] (step=0754900) Train Loss: 0.3212, Train Steps/Sec: 17.25, Grad Norm: 0.0350 +[2025-02-23 10:50:02] (step=0755000) Train Loss: 0.3221, Train Steps/Sec: 17.28, Grad Norm: 0.0353 +[2025-02-23 10:50:09] (step=0755100) Train Loss: 0.3211, Train Steps/Sec: 15.17, Grad Norm: 0.0343 +[2025-02-23 10:50:15] (step=0755200) Train Loss: 0.3219, Train Steps/Sec: 16.50, Grad Norm: 0.0396 +[2025-02-23 10:50:21] (step=0755300) Train Loss: 0.3216, Train Steps/Sec: 15.77, Grad Norm: 0.0338 +[2025-02-23 10:50:27] (step=0755400) Train Loss: 0.3212, Train Steps/Sec: 16.42, Grad Norm: 0.0366 +[2025-02-23 10:50:33] (step=0755500) Train Loss: 0.3212, Train Steps/Sec: 17.20, Grad Norm: 0.0362 +[2025-02-23 10:50:39] (step=0755600) Train Loss: 0.3210, Train Steps/Sec: 17.15, Grad Norm: 0.0393 +[2025-02-23 10:50:46] (step=0755700) Train Loss: 0.3214, Train Steps/Sec: 14.40, Grad Norm: 0.0343 +[2025-02-23 10:50:52] (step=0755800) Train Loss: 0.3211, Train Steps/Sec: 16.38, Grad Norm: 0.0357 +[2025-02-23 10:50:58] (step=0755900) Train Loss: 0.3216, Train Steps/Sec: 17.14, Grad Norm: 0.0356 +[2025-02-23 10:51:04] (step=0756000) Train Loss: 0.3210, Train Steps/Sec: 17.17, Grad Norm: 0.0387 +[2025-02-23 10:51:10] (step=0756100) Train Loss: 0.3219, Train Steps/Sec: 17.28, Grad Norm: 0.0348 +[2025-02-23 10:51:16] (step=0756200) Train Loss: 0.3213, Train Steps/Sec: 16.60, Grad Norm: 0.0347 +[2025-02-23 10:51:22] (step=0756300) Train Loss: 0.3209, Train Steps/Sec: 15.32, Grad Norm: 0.0349 +[2025-02-23 10:51:28] (step=0756400) Train Loss: 0.3224, Train Steps/Sec: 15.95, Grad Norm: 0.0379 +[2025-02-23 10:51:34] (step=0756500) Train Loss: 0.3211, Train Steps/Sec: 16.48, Grad Norm: 0.0339 +[2025-02-23 10:51:40] (step=0756600) Train Loss: 0.3216, Train Steps/Sec: 16.50, Grad Norm: 0.0351 +[2025-02-23 10:51:46] (step=0756700) Train Loss: 0.3217, Train Steps/Sec: 17.30, Grad Norm: 0.0340 +[2025-02-23 10:51:52] (step=0756800) Train Loss: 0.3211, Train Steps/Sec: 17.27, Grad Norm: 0.0372 +[2025-02-23 10:51:59] (step=0756900) Train Loss: 0.3209, Train Steps/Sec: 14.35, Grad Norm: 0.0405 +[2025-02-23 10:52:05] (step=0757000) Train Loss: 0.3217, Train Steps/Sec: 17.19, Grad Norm: 0.0328 +[2025-02-23 10:52:11] (step=0757100) Train Loss: 0.3217, Train Steps/Sec: 15.03, Grad Norm: 0.0339 +[2025-02-23 10:52:18] (step=0757200) Train Loss: 0.3214, Train Steps/Sec: 15.81, Grad Norm: 0.0323 +[2025-02-23 10:52:24] (step=0757300) Train Loss: 0.3211, Train Steps/Sec: 16.55, Grad Norm: 0.0417 +[2025-02-23 10:52:30] (step=0757400) Train Loss: 0.3214, Train Steps/Sec: 16.54, Grad Norm: 0.0365 +[2025-02-23 10:52:36] (step=0757500) Train Loss: 0.3213, Train Steps/Sec: 17.27, Grad Norm: 0.0349 +[2025-02-23 10:52:42] (step=0757600) Train Loss: 0.3209, Train Steps/Sec: 17.25, Grad Norm: 0.0357 +[2025-02-23 10:52:47] (step=0757700) Train Loss: 0.3215, Train Steps/Sec: 17.29, Grad Norm: 0.0356 +[2025-02-23 10:52:53] (step=0757800) Train Loss: 0.3215, Train Steps/Sec: 16.42, Grad Norm: 0.0370 +[2025-02-23 10:52:59] (step=0757900) Train Loss: 0.3214, Train Steps/Sec: 17.29, Grad Norm: 0.0327 +[2025-02-23 10:53:05] (step=0758000) Train Loss: 0.3212, Train Steps/Sec: 17.26, Grad Norm: 0.0402 +[2025-02-23 10:53:11] (step=0758100) Train Loss: 0.3212, Train Steps/Sec: 17.18, Grad Norm: 0.0371 +[2025-02-23 10:53:19] (step=0758200) Train Loss: 0.3214, Train Steps/Sec: 12.91, Grad Norm: 0.0336 +[2025-02-23 10:53:25] (step=0758300) Train Loss: 0.3216, Train Steps/Sec: 15.31, Grad Norm: 0.0386 +[2025-02-23 10:53:31] (step=0758400) Train Loss: 0.3211, Train Steps/Sec: 16.48, Grad Norm: 0.0329 +[2025-02-23 10:53:37] (step=0758500) Train Loss: 0.3216, Train Steps/Sec: 16.45, Grad Norm: 0.0386 +[2025-02-23 10:53:43] (step=0758600) Train Loss: 0.3216, Train Steps/Sec: 17.20, Grad Norm: 0.0368 +[2025-02-23 10:53:49] (step=0758700) Train Loss: 0.3215, Train Steps/Sec: 17.19, Grad Norm: 0.0364 +[2025-02-23 10:53:55] (step=0758800) Train Loss: 0.3215, Train Steps/Sec: 17.19, Grad Norm: 0.0375 +[2025-02-23 10:54:00] (step=0758900) Train Loss: 0.3212, Train Steps/Sec: 17.19, Grad Norm: 0.0395 +[2025-02-23 10:54:06] (step=0759000) Train Loss: 0.3215, Train Steps/Sec: 17.03, Grad Norm: 0.0373 +[2025-02-23 10:54:13] (step=0759100) Train Loss: 0.3212, Train Steps/Sec: 15.04, Grad Norm: 0.0358 +[2025-02-23 10:54:19] (step=0759200) Train Loss: 0.3215, Train Steps/Sec: 16.52, Grad Norm: 0.0422 +[2025-02-23 10:54:25] (step=0759300) Train Loss: 0.3214, Train Steps/Sec: 15.83, Grad Norm: 0.0392 +[2025-02-23 10:54:33] (step=0759400) Train Loss: 0.3214, Train Steps/Sec: 13.88, Grad Norm: 0.0411 +[2025-02-23 10:54:38] (step=0759500) Train Loss: 0.3216, Train Steps/Sec: 17.25, Grad Norm: 0.0403 +[2025-02-23 10:54:44] (step=0759600) Train Loss: 0.3215, Train Steps/Sec: 17.26, Grad Norm: 0.0361 +[2025-02-23 10:54:50] (step=0759700) Train Loss: 0.3215, Train Steps/Sec: 17.29, Grad Norm: 0.0336 +[2025-02-23 10:54:56] (step=0759800) Train Loss: 0.3219, Train Steps/Sec: 16.53, Grad Norm: 0.0376 +[2025-02-23 10:55:02] (step=0759900) Train Loss: 0.3214, Train Steps/Sec: 17.29, Grad Norm: 0.0368 +[2025-02-23 10:55:08] (step=0760000) Train Loss: 0.3217, Train Steps/Sec: 17.28, Grad Norm: 0.0351 +[2025-02-23 10:55:14] (step=0760100) Train Loss: 0.3214, Train Steps/Sec: 16.51, Grad Norm: 0.0365 +[2025-02-23 10:55:20] (step=0760200) Train Loss: 0.3219, Train Steps/Sec: 15.23, Grad Norm: 0.0379 +[2025-02-23 10:55:27] (step=0760300) Train Loss: 0.3217, Train Steps/Sec: 15.81, Grad Norm: 0.0415 +[2025-02-23 10:55:33] (step=0760400) Train Loss: 0.3211, Train Steps/Sec: 16.41, Grad Norm: 0.0352 +[2025-02-23 10:55:39] (step=0760500) Train Loss: 0.3212, Train Steps/Sec: 16.39, Grad Norm: 0.0409 +[2025-02-23 10:55:45] (step=0760600) Train Loss: 0.3213, Train Steps/Sec: 17.03, Grad Norm: 0.0391 +[2025-02-23 10:55:52] (step=0760700) Train Loss: 0.3211, Train Steps/Sec: 14.41, Grad Norm: 0.0385 +[2025-02-23 10:55:57] (step=0760800) Train Loss: 0.3218, Train Steps/Sec: 17.00, Grad Norm: 0.0372 +[2025-02-23 10:56:03] (step=0760900) Train Loss: 0.3212, Train Steps/Sec: 17.01, Grad Norm: 0.0369 +[2025-02-23 10:56:09] (step=0761000) Train Loss: 0.3214, Train Steps/Sec: 17.08, Grad Norm: 0.0354 +[2025-02-23 10:56:16] (step=0761100) Train Loss: 0.3214, Train Steps/Sec: 14.96, Grad Norm: 0.0426 +[2025-02-23 10:56:22] (step=0761200) Train Loss: 0.3219, Train Steps/Sec: 17.05, Grad Norm: 0.0379 +[2025-02-23 10:56:28] (step=0761300) Train Loss: 0.3215, Train Steps/Sec: 15.01, Grad Norm: 0.0364 +[2025-02-23 10:56:35] (step=0761400) Train Loss: 0.3220, Train Steps/Sec: 16.30, Grad Norm: 0.0396 +[2025-02-23 10:56:40] (step=0761500) Train Loss: 0.3215, Train Steps/Sec: 17.03, Grad Norm: 0.0324 +[2025-02-23 10:56:46] (step=0761600) Train Loss: 0.3221, Train Steps/Sec: 17.07, Grad Norm: 0.0417 +[2025-02-23 10:56:52] (step=0761700) Train Loss: 0.3214, Train Steps/Sec: 17.06, Grad Norm: 0.0392 +[2025-02-23 10:56:58] (step=0761800) Train Loss: 0.3215, Train Steps/Sec: 16.30, Grad Norm: 0.0355 +[2025-02-23 10:57:05] (step=0761900) Train Loss: 0.3215, Train Steps/Sec: 14.34, Grad Norm: 0.0373 +[2025-02-23 10:57:11] (step=0762000) Train Loss: 0.3215, Train Steps/Sec: 17.12, Grad Norm: 0.0385 +[2025-02-23 10:57:18] (step=0762100) Train Loss: 0.3208, Train Steps/Sec: 14.60, Grad Norm: 0.0335 +[2025-02-23 10:57:24] (step=0762200) Train Loss: 0.3219, Train Steps/Sec: 15.76, Grad Norm: 0.0369 +[2025-02-23 10:57:30] (step=0762300) Train Loss: 0.3219, Train Steps/Sec: 16.37, Grad Norm: 0.0360 +[2025-02-23 10:57:37] (step=0762400) Train Loss: 0.3211, Train Steps/Sec: 16.36, Grad Norm: 0.0370 +[2025-02-23 10:57:42] (step=0762500) Train Loss: 0.3212, Train Steps/Sec: 17.10, Grad Norm: 0.0347 +[2025-02-23 10:57:48] (step=0762600) Train Loss: 0.3216, Train Steps/Sec: 17.12, Grad Norm: 0.0370 +[2025-02-23 10:57:54] (step=0762700) Train Loss: 0.3222, Train Steps/Sec: 17.21, Grad Norm: 0.0358 +[2025-02-23 10:58:00] (step=0762800) Train Loss: 0.3212, Train Steps/Sec: 17.08, Grad Norm: 0.0350 +[2025-02-23 10:58:06] (step=0762900) Train Loss: 0.3212, Train Steps/Sec: 17.17, Grad Norm: 0.0399 +[2025-02-23 10:58:12] (step=0763000) Train Loss: 0.3218, Train Steps/Sec: 17.03, Grad Norm: 0.0362 +[2025-02-23 10:58:18] (step=0763100) Train Loss: 0.3213, Train Steps/Sec: 15.16, Grad Norm: 0.0363 +[2025-02-23 10:58:25] (step=0763200) Train Loss: 0.3217, Train Steps/Sec: 14.38, Grad Norm: 0.0389 +[2025-02-23 10:58:31] (step=0763300) Train Loss: 0.3212, Train Steps/Sec: 15.91, Grad Norm: 0.0384 +[2025-02-23 10:58:37] (step=0763400) Train Loss: 0.3219, Train Steps/Sec: 16.59, Grad Norm: 0.0361 +[2025-02-23 10:58:43] (step=0763500) Train Loss: 0.3217, Train Steps/Sec: 17.33, Grad Norm: 0.0362 +[2025-02-23 10:58:49] (step=0763600) Train Loss: 0.3219, Train Steps/Sec: 17.33, Grad Norm: 0.0392 +[2025-02-23 10:58:55] (step=0763700) Train Loss: 0.3215, Train Steps/Sec: 17.35, Grad Norm: 0.0348 +[2025-02-23 10:59:01] (step=0763800) Train Loss: 0.3214, Train Steps/Sec: 16.55, Grad Norm: 0.0380 +[2025-02-23 10:59:07] (step=0763900) Train Loss: 0.3215, Train Steps/Sec: 17.39, Grad Norm: 0.0363 +[2025-02-23 10:59:13] (step=0764000) Train Loss: 0.3216, Train Steps/Sec: 16.65, Grad Norm: 0.0321 +[2025-02-23 10:59:20] (step=0764100) Train Loss: 0.3217, Train Steps/Sec: 14.27, Grad Norm: 0.0380 +[2025-02-23 10:59:26] (step=0764200) Train Loss: 0.3215, Train Steps/Sec: 16.51, Grad Norm: 0.0363 +[2025-02-23 10:59:31] (step=0764300) Train Loss: 0.3214, Train Steps/Sec: 17.34, Grad Norm: 0.0351 +[2025-02-23 10:59:39] (step=0764400) Train Loss: 0.3214, Train Steps/Sec: 13.88, Grad Norm: 0.0363 +[2025-02-23 10:59:44] (step=0764500) Train Loss: 0.3212, Train Steps/Sec: 17.23, Grad Norm: 0.0364 +[2025-02-23 10:59:50] (step=0764600) Train Loss: 0.3220, Train Steps/Sec: 17.29, Grad Norm: 0.0398 +[2025-02-23 10:59:56] (step=0764700) Train Loss: 0.3217, Train Steps/Sec: 17.34, Grad Norm: 0.0380 +[2025-02-23 11:00:02] (step=0764800) Train Loss: 0.3212, Train Steps/Sec: 17.21, Grad Norm: 0.0412 +[2025-02-23 11:00:08] (step=0764900) Train Loss: 0.3219, Train Steps/Sec: 17.26, Grad Norm: 0.0336 +[2025-02-23 11:00:13] (step=0765000) Train Loss: 0.3212, Train Steps/Sec: 17.31, Grad Norm: 0.0366 +[2025-02-23 11:00:20] (step=0765100) Train Loss: 0.3215, Train Steps/Sec: 15.15, Grad Norm: 0.0371 +[2025-02-23 11:00:26] (step=0765200) Train Loss: 0.3215, Train Steps/Sec: 17.33, Grad Norm: 0.0383 +[2025-02-23 11:00:32] (step=0765300) Train Loss: 0.3218, Train Steps/Sec: 15.80, Grad Norm: 0.0357 +[2025-02-23 11:00:38] (step=0765400) Train Loss: 0.3218, Train Steps/Sec: 15.87, Grad Norm: 0.0332 +[2025-02-23 11:00:44] (step=0765500) Train Loss: 0.3215, Train Steps/Sec: 17.34, Grad Norm: 0.0383 +[2025-02-23 11:00:50] (step=0765600) Train Loss: 0.3214, Train Steps/Sec: 17.27, Grad Norm: 0.0355 +[2025-02-23 11:00:57] (step=0765700) Train Loss: 0.3211, Train Steps/Sec: 14.44, Grad Norm: 0.0334 +[2025-02-23 11:01:03] (step=0765800) Train Loss: 0.3216, Train Steps/Sec: 16.35, Grad Norm: 0.0400 +[2025-02-23 11:01:09] (step=0765900) Train Loss: 0.3216, Train Steps/Sec: 16.42, Grad Norm: 0.0341 +[2025-02-23 11:01:16] (step=0766000) Train Loss: 0.3212, Train Steps/Sec: 15.33, Grad Norm: 0.0376 +[2025-02-23 11:01:22] (step=0766100) Train Loss: 0.3210, Train Steps/Sec: 16.06, Grad Norm: 0.0362 +[2025-02-23 11:01:28] (step=0766200) Train Loss: 0.3217, Train Steps/Sec: 16.57, Grad Norm: 0.0403 +[2025-02-23 11:01:34] (step=0766300) Train Loss: 0.3211, Train Steps/Sec: 16.58, Grad Norm: 0.0352 +[2025-02-23 11:01:40] (step=0766400) Train Loss: 0.3210, Train Steps/Sec: 17.33, Grad Norm: 0.0340 +[2025-02-23 11:01:45] (step=0766500) Train Loss: 0.3215, Train Steps/Sec: 17.40, Grad Norm: 0.0351 +[2025-02-23 11:01:51] (step=0766600) Train Loss: 0.3214, Train Steps/Sec: 17.35, Grad Norm: 0.0384 +[2025-02-23 11:01:57] (step=0766700) Train Loss: 0.3214, Train Steps/Sec: 17.34, Grad Norm: 0.0351 +[2025-02-23 11:02:03] (step=0766800) Train Loss: 0.3214, Train Steps/Sec: 17.31, Grad Norm: 0.0387 +[2025-02-23 11:02:10] (step=0766900) Train Loss: 0.3215, Train Steps/Sec: 14.42, Grad Norm: 0.0380 +[2025-02-23 11:02:15] (step=0767000) Train Loss: 0.3213, Train Steps/Sec: 17.30, Grad Norm: 0.0358 +[2025-02-23 11:02:22] (step=0767100) Train Loss: 0.3213, Train Steps/Sec: 15.17, Grad Norm: 0.0389 +[2025-02-23 11:02:28] (step=0767200) Train Loss: 0.3219, Train Steps/Sec: 17.31, Grad Norm: 0.0339 +[2025-02-23 11:02:34] (step=0767300) Train Loss: 0.3211, Train Steps/Sec: 15.89, Grad Norm: 0.0334 +[2025-02-23 11:02:40] (step=0767400) Train Loss: 0.3217, Train Steps/Sec: 15.85, Grad Norm: 0.0388 +[2025-02-23 11:02:46] (step=0767500) Train Loss: 0.3213, Train Steps/Sec: 17.35, Grad Norm: 0.0366 +[2025-02-23 11:02:52] (step=0767600) Train Loss: 0.3212, Train Steps/Sec: 17.34, Grad Norm: 0.0403 +[2025-02-23 11:02:58] (step=0767700) Train Loss: 0.3212, Train Steps/Sec: 17.24, Grad Norm: 0.0448 +[2025-02-23 11:03:04] (step=0767800) Train Loss: 0.3211, Train Steps/Sec: 16.42, Grad Norm: 0.0372 +[2025-02-23 11:03:10] (step=0767900) Train Loss: 0.3211, Train Steps/Sec: 15.82, Grad Norm: 0.0390 +[2025-02-23 11:03:17] (step=0768000) Train Loss: 0.3209, Train Steps/Sec: 14.75, Grad Norm: 0.0364 +[2025-02-23 11:03:23] (step=0768100) Train Loss: 0.3214, Train Steps/Sec: 16.44, Grad Norm: 0.0362 +[2025-02-23 11:03:30] (step=0768200) Train Loss: 0.3217, Train Steps/Sec: 14.55, Grad Norm: 0.0384 +[2025-02-23 11:03:36] (step=0768300) Train Loss: 0.3217, Train Steps/Sec: 16.52, Grad Norm: 0.0338 +[2025-02-23 11:03:42] (step=0768400) Train Loss: 0.3216, Train Steps/Sec: 17.34, Grad Norm: 0.0348 +[2025-02-23 11:03:47] (step=0768500) Train Loss: 0.3216, Train Steps/Sec: 17.38, Grad Norm: 0.0353 +[2025-02-23 11:03:53] (step=0768600) Train Loss: 0.3215, Train Steps/Sec: 17.31, Grad Norm: 0.0367 +[2025-02-23 11:03:59] (step=0768700) Train Loss: 0.3215, Train Steps/Sec: 17.33, Grad Norm: 0.0355 +[2025-02-23 11:04:05] (step=0768800) Train Loss: 0.3211, Train Steps/Sec: 17.33, Grad Norm: 0.0369 +[2025-02-23 11:04:11] (step=0768900) Train Loss: 0.3207, Train Steps/Sec: 17.30, Grad Norm: 0.0359 +[2025-02-23 11:04:16] (step=0769000) Train Loss: 0.3214, Train Steps/Sec: 17.29, Grad Norm: 0.0347 +[2025-02-23 11:04:23] (step=0769100) Train Loss: 0.3216, Train Steps/Sec: 15.09, Grad Norm: 0.0332 +[2025-02-23 11:04:29] (step=0769200) Train Loss: 0.3217, Train Steps/Sec: 17.33, Grad Norm: 0.0345 +[2025-02-23 11:04:35] (step=0769300) Train Loss: 0.3214, Train Steps/Sec: 15.90, Grad Norm: 0.0327 +[2025-02-23 11:04:43] (step=0769400) Train Loss: 0.3214, Train Steps/Sec: 13.26, Grad Norm: 0.0325 +[2025-02-23 11:04:48] (step=0769500) Train Loss: 0.3221, Train Steps/Sec: 17.07, Grad Norm: 0.0333 +[2025-02-23 11:04:54] (step=0769600) Train Loss: 0.3207, Train Steps/Sec: 17.07, Grad Norm: 0.0372 +[2025-02-23 11:05:00] (step=0769700) Train Loss: 0.3217, Train Steps/Sec: 17.25, Grad Norm: 0.0370 +[2025-02-23 11:05:06] (step=0769800) Train Loss: 0.3216, Train Steps/Sec: 15.81, Grad Norm: 0.0333 +[2025-02-23 11:05:13] (step=0769900) Train Loss: 0.3208, Train Steps/Sec: 15.32, Grad Norm: 0.0356 +[2025-02-23 11:05:19] (step=0770000) Train Loss: 0.3215, Train Steps/Sec: 15.98, Grad Norm: 0.0367 +[2025-02-23 11:05:25] (step=0770100) Train Loss: 0.3213, Train Steps/Sec: 16.50, Grad Norm: 0.0380 +[2025-02-23 11:05:31] (step=0770200) Train Loss: 0.3216, Train Steps/Sec: 16.53, Grad Norm: 0.0393 +[2025-02-23 11:05:37] (step=0770300) Train Loss: 0.3214, Train Steps/Sec: 17.26, Grad Norm: 0.0353 +[2025-02-23 11:05:43] (step=0770400) Train Loss: 0.3209, Train Steps/Sec: 17.31, Grad Norm: 0.0362 +[2025-02-23 11:05:49] (step=0770500) Train Loss: 0.3213, Train Steps/Sec: 17.28, Grad Norm: 0.0366 +[2025-02-23 11:05:55] (step=0770600) Train Loss: 0.3215, Train Steps/Sec: 17.18, Grad Norm: 0.0350 +[2025-02-23 11:06:01] (step=0770700) Train Loss: 0.3214, Train Steps/Sec: 14.52, Grad Norm: 0.0363 +[2025-02-23 11:06:07] (step=0770800) Train Loss: 0.3213, Train Steps/Sec: 17.39, Grad Norm: 0.0353 +[2025-02-23 11:06:13] (step=0770900) Train Loss: 0.3215, Train Steps/Sec: 17.38, Grad Norm: 0.0346 +[2025-02-23 11:06:19] (step=0771000) Train Loss: 0.3215, Train Steps/Sec: 17.40, Grad Norm: 0.0367 +[2025-02-23 11:06:25] (step=0771100) Train Loss: 0.3213, Train Steps/Sec: 15.24, Grad Norm: 0.0368 +[2025-02-23 11:06:31] (step=0771200) Train Loss: 0.3214, Train Steps/Sec: 17.39, Grad Norm: 0.0386 +[2025-02-23 11:06:37] (step=0771300) Train Loss: 0.3217, Train Steps/Sec: 15.92, Grad Norm: 0.0347 +[2025-02-23 11:06:44] (step=0771400) Train Loss: 0.3216, Train Steps/Sec: 15.93, Grad Norm: 0.0355 +[2025-02-23 11:06:49] (step=0771500) Train Loss: 0.3216, Train Steps/Sec: 17.33, Grad Norm: 0.0343 +[2025-02-23 11:06:55] (step=0771600) Train Loss: 0.3212, Train Steps/Sec: 17.29, Grad Norm: 0.0371 +[2025-02-23 11:07:01] (step=0771700) Train Loss: 0.3215, Train Steps/Sec: 17.39, Grad Norm: 0.0391 +[2025-02-23 11:07:08] (step=0771800) Train Loss: 0.3219, Train Steps/Sec: 14.72, Grad Norm: 0.0364 +[2025-02-23 11:07:15] (step=0771900) Train Loss: 0.3216, Train Steps/Sec: 13.02, Grad Norm: 0.0352 +[2025-02-23 11:07:21] (step=0772000) Train Loss: 0.3215, Train Steps/Sec: 16.31, Grad Norm: 0.0351 +[2025-02-23 11:07:27] (step=0772100) Train Loss: 0.3209, Train Steps/Sec: 17.08, Grad Norm: 0.0349 +[2025-02-23 11:07:33] (step=0772200) Train Loss: 0.3219, Train Steps/Sec: 16.30, Grad Norm: 0.0365 +[2025-02-23 11:07:39] (step=0772300) Train Loss: 0.3215, Train Steps/Sec: 17.29, Grad Norm: 0.0339 +[2025-02-23 11:07:45] (step=0772400) Train Loss: 0.3216, Train Steps/Sec: 17.32, Grad Norm: 0.0346 +[2025-02-23 11:07:51] (step=0772500) Train Loss: 0.3221, Train Steps/Sec: 17.27, Grad Norm: 0.0366 +[2025-02-23 11:07:57] (step=0772600) Train Loss: 0.3218, Train Steps/Sec: 17.26, Grad Norm: 0.0390 +[2025-02-23 11:08:02] (step=0772700) Train Loss: 0.3217, Train Steps/Sec: 17.28, Grad Norm: 0.0381 +[2025-02-23 11:08:08] (step=0772800) Train Loss: 0.3216, Train Steps/Sec: 17.35, Grad Norm: 0.0373 +[2025-02-23 11:08:14] (step=0772900) Train Loss: 0.3218, Train Steps/Sec: 17.40, Grad Norm: 0.0335 +[2025-02-23 11:08:20] (step=0773000) Train Loss: 0.3214, Train Steps/Sec: 17.39, Grad Norm: 0.0371 +[2025-02-23 11:08:26] (step=0773100) Train Loss: 0.3210, Train Steps/Sec: 15.77, Grad Norm: 0.0387 +[2025-02-23 11:08:33] (step=0773200) Train Loss: 0.3216, Train Steps/Sec: 13.99, Grad Norm: 0.0331 +[2025-02-23 11:08:40] (step=0773300) Train Loss: 0.3214, Train Steps/Sec: 15.68, Grad Norm: 0.0345 +[2025-02-23 11:08:46] (step=0773400) Train Loss: 0.3218, Train Steps/Sec: 15.77, Grad Norm: 0.0352 +[2025-02-23 11:08:52] (step=0773500) Train Loss: 0.3208, Train Steps/Sec: 17.28, Grad Norm: 0.0340 +[2025-02-23 11:08:57] (step=0773600) Train Loss: 0.3214, Train Steps/Sec: 17.34, Grad Norm: 0.0331 +[2025-02-23 11:09:03] (step=0773700) Train Loss: 0.3215, Train Steps/Sec: 16.63, Grad Norm: 0.0351 +[2025-02-23 11:09:10] (step=0773800) Train Loss: 0.3212, Train Steps/Sec: 14.71, Grad Norm: 0.0392 +[2025-02-23 11:09:17] (step=0773900) Train Loss: 0.3215, Train Steps/Sec: 15.94, Grad Norm: 0.0395 +[2025-02-23 11:09:23] (step=0774000) Train Loss: 0.3215, Train Steps/Sec: 16.49, Grad Norm: 0.0362 +[2025-02-23 11:09:29] (step=0774100) Train Loss: 0.3217, Train Steps/Sec: 16.48, Grad Norm: 0.0327 +[2025-02-23 11:09:34] (step=0774200) Train Loss: 0.3213, Train Steps/Sec: 17.32, Grad Norm: 0.0383 +[2025-02-23 11:09:40] (step=0774300) Train Loss: 0.3216, Train Steps/Sec: 17.32, Grad Norm: 0.0368 +[2025-02-23 11:09:47] (step=0774400) Train Loss: 0.3217, Train Steps/Sec: 14.43, Grad Norm: 0.0349 +[2025-02-23 11:09:53] (step=0774500) Train Loss: 0.3216, Train Steps/Sec: 17.36, Grad Norm: 0.0367 +[2025-02-23 11:09:59] (step=0774600) Train Loss: 0.3217, Train Steps/Sec: 17.38, Grad Norm: 0.0364 +[2025-02-23 11:10:04] (step=0774700) Train Loss: 0.3213, Train Steps/Sec: 17.33, Grad Norm: 0.0350 +[2025-02-23 11:10:10] (step=0774800) Train Loss: 0.3216, Train Steps/Sec: 17.34, Grad Norm: 0.0382 +[2025-02-23 11:10:16] (step=0774900) Train Loss: 0.3212, Train Steps/Sec: 17.32, Grad Norm: 0.0367 +[2025-02-23 11:10:22] (step=0775000) Train Loss: 0.3206, Train Steps/Sec: 17.30, Grad Norm: 0.0354 +[2025-02-23 11:10:28] (step=0775100) Train Loss: 0.3216, Train Steps/Sec: 15.79, Grad Norm: 0.0337 +[2025-02-23 11:10:34] (step=0775200) Train Loss: 0.3218, Train Steps/Sec: 16.38, Grad Norm: 0.0380 +[2025-02-23 11:10:41] (step=0775300) Train Loss: 0.3212, Train Steps/Sec: 15.77, Grad Norm: 0.0371 +[2025-02-23 11:10:47] (step=0775400) Train Loss: 0.3214, Train Steps/Sec: 16.62, Grad Norm: 0.0386 +[2025-02-23 11:10:53] (step=0775500) Train Loss: 0.3216, Train Steps/Sec: 16.60, Grad Norm: 0.0352 +[2025-02-23 11:10:58] (step=0775600) Train Loss: 0.3215, Train Steps/Sec: 17.23, Grad Norm: 0.0349 +[2025-02-23 11:11:06] (step=0775700) Train Loss: 0.3211, Train Steps/Sec: 12.92, Grad Norm: 0.0343 +[2025-02-23 11:11:13] (step=0775800) Train Loss: 0.3217, Train Steps/Sec: 14.72, Grad Norm: 0.0380 +[2025-02-23 11:11:19] (step=0775900) Train Loss: 0.3214, Train Steps/Sec: 17.29, Grad Norm: 0.0405 +[2025-02-23 11:11:25] (step=0776000) Train Loss: 0.3212, Train Steps/Sec: 15.76, Grad Norm: 0.0436 +[2025-02-23 11:11:31] (step=0776100) Train Loss: 0.3223, Train Steps/Sec: 17.32, Grad Norm: 0.0350 +[2025-02-23 11:11:37] (step=0776200) Train Loss: 0.3212, Train Steps/Sec: 17.41, Grad Norm: 0.0366 +[2025-02-23 11:11:42] (step=0776300) Train Loss: 0.3215, Train Steps/Sec: 17.41, Grad Norm: 0.0349 +[2025-02-23 11:11:48] (step=0776400) Train Loss: 0.3211, Train Steps/Sec: 17.41, Grad Norm: 0.0413 +[2025-02-23 11:11:54] (step=0776500) Train Loss: 0.3216, Train Steps/Sec: 17.38, Grad Norm: 0.0368 +[2025-02-23 11:12:00] (step=0776600) Train Loss: 0.3220, Train Steps/Sec: 17.36, Grad Norm: 0.0348 +[2025-02-23 11:12:05] (step=0776700) Train Loss: 0.3216, Train Steps/Sec: 17.30, Grad Norm: 0.0348 +[2025-02-23 11:12:11] (step=0776800) Train Loss: 0.3216, Train Steps/Sec: 17.30, Grad Norm: 0.0366 +[2025-02-23 11:12:18] (step=0776900) Train Loss: 0.3210, Train Steps/Sec: 14.45, Grad Norm: 0.0359 +[2025-02-23 11:12:24] (step=0777000) Train Loss: 0.3219, Train Steps/Sec: 17.18, Grad Norm: 0.0380 +[2025-02-23 11:12:30] (step=0777100) Train Loss: 0.3213, Train Steps/Sec: 15.75, Grad Norm: 0.0356 +[2025-02-23 11:12:36] (step=0777200) Train Loss: 0.3214, Train Steps/Sec: 16.45, Grad Norm: 0.0341 +[2025-02-23 11:12:43] (step=0777300) Train Loss: 0.3216, Train Steps/Sec: 15.83, Grad Norm: 0.0360 +[2025-02-23 11:12:49] (step=0777400) Train Loss: 0.3211, Train Steps/Sec: 16.45, Grad Norm: 0.0368 +[2025-02-23 11:12:55] (step=0777500) Train Loss: 0.3214, Train Steps/Sec: 16.48, Grad Norm: 0.0338 +[2025-02-23 11:13:01] (step=0777600) Train Loss: 0.3209, Train Steps/Sec: 16.49, Grad Norm: 0.0339 +[2025-02-23 11:13:07] (step=0777700) Train Loss: 0.3218, Train Steps/Sec: 15.19, Grad Norm: 0.0378 +[2025-02-23 11:13:14] (step=0777800) Train Loss: 0.3212, Train Steps/Sec: 15.12, Grad Norm: 0.0360 +[2025-02-23 11:13:20] (step=0777900) Train Loss: 0.3214, Train Steps/Sec: 16.37, Grad Norm: 0.0361 +[2025-02-23 11:13:26] (step=0778000) Train Loss: 0.3221, Train Steps/Sec: 16.39, Grad Norm: 0.0341 +[2025-02-23 11:13:32] (step=0778100) Train Loss: 0.3212, Train Steps/Sec: 17.11, Grad Norm: 0.0400 +[2025-02-23 11:13:39] (step=0778200) Train Loss: 0.3210, Train Steps/Sec: 14.31, Grad Norm: 0.0343 +[2025-02-23 11:13:45] (step=0778300) Train Loss: 0.3216, Train Steps/Sec: 17.15, Grad Norm: 0.0370 +[2025-02-23 11:13:51] (step=0778400) Train Loss: 0.3211, Train Steps/Sec: 17.13, Grad Norm: 0.0358 +[2025-02-23 11:13:57] (step=0778500) Train Loss: 0.3211, Train Steps/Sec: 17.12, Grad Norm: 0.0375 +[2025-02-23 11:14:02] (step=0778600) Train Loss: 0.3215, Train Steps/Sec: 17.21, Grad Norm: 0.0366 +[2025-02-23 11:14:08] (step=0778700) Train Loss: 0.3210, Train Steps/Sec: 17.20, Grad Norm: 0.0351 +[2025-02-23 11:14:14] (step=0778800) Train Loss: 0.3216, Train Steps/Sec: 17.23, Grad Norm: 0.0387 +[2025-02-23 11:14:20] (step=0778900) Train Loss: 0.3216, Train Steps/Sec: 17.30, Grad Norm: 0.0363 +[2025-02-23 11:14:26] (step=0779000) Train Loss: 0.3212, Train Steps/Sec: 17.30, Grad Norm: 0.0369 +[2025-02-23 11:14:32] (step=0779100) Train Loss: 0.3214, Train Steps/Sec: 15.81, Grad Norm: 0.0359 +[2025-02-23 11:14:38] (step=0779200) Train Loss: 0.3218, Train Steps/Sec: 16.57, Grad Norm: 0.0366 +[2025-02-23 11:14:44] (step=0779300) Train Loss: 0.3218, Train Steps/Sec: 15.90, Grad Norm: 0.0368 +[2025-02-23 11:14:51] (step=0779400) Train Loss: 0.3212, Train Steps/Sec: 13.95, Grad Norm: 0.0391 +[2025-02-23 11:14:58] (step=0779500) Train Loss: 0.3213, Train Steps/Sec: 15.81, Grad Norm: 0.0379 +[2025-02-23 11:15:04] (step=0779600) Train Loss: 0.3211, Train Steps/Sec: 15.80, Grad Norm: 0.0361 +[2025-02-23 11:15:11] (step=0779700) Train Loss: 0.3212, Train Steps/Sec: 15.16, Grad Norm: 0.0373 +[2025-02-23 11:15:17] (step=0779800) Train Loss: 0.3217, Train Steps/Sec: 16.30, Grad Norm: 0.0351 +[2025-02-23 11:15:23] (step=0779900) Train Loss: 0.3215, Train Steps/Sec: 15.60, Grad Norm: 0.0346 +[2025-02-23 11:15:29] (step=0780000) Train Loss: 0.3215, Train Steps/Sec: 17.06, Grad Norm: 0.0392 +[2025-02-23 11:15:35] (step=0780100) Train Loss: 0.3218, Train Steps/Sec: 17.03, Grad Norm: 0.0395 +[2025-02-23 11:15:41] (step=0780200) Train Loss: 0.3207, Train Steps/Sec: 17.05, Grad Norm: 0.0404 +[2025-02-23 11:15:47] (step=0780300) Train Loss: 0.3211, Train Steps/Sec: 17.08, Grad Norm: 0.0341 +[2025-02-23 11:15:53] (step=0780400) Train Loss: 0.3217, Train Steps/Sec: 17.32, Grad Norm: 0.0332 +[2025-02-23 11:15:58] (step=0780500) Train Loss: 0.3210, Train Steps/Sec: 17.29, Grad Norm: 0.0385 +[2025-02-23 11:16:04] (step=0780600) Train Loss: 0.3211, Train Steps/Sec: 17.01, Grad Norm: 0.0354 +[2025-02-23 11:16:11] (step=0780700) Train Loss: 0.3215, Train Steps/Sec: 14.55, Grad Norm: 0.0366 +[2025-02-23 11:16:17] (step=0780800) Train Loss: 0.3212, Train Steps/Sec: 17.32, Grad Norm: 0.0358 +[2025-02-23 11:16:23] (step=0780900) Train Loss: 0.3217, Train Steps/Sec: 17.34, Grad Norm: 0.0346 +[2025-02-23 11:16:28] (step=0781000) Train Loss: 0.3214, Train Steps/Sec: 17.33, Grad Norm: 0.0397 +[2025-02-23 11:16:35] (step=0781100) Train Loss: 0.3220, Train Steps/Sec: 15.88, Grad Norm: 0.0332 +[2025-02-23 11:16:41] (step=0781200) Train Loss: 0.3209, Train Steps/Sec: 16.50, Grad Norm: 0.0356 +[2025-02-23 11:16:47] (step=0781300) Train Loss: 0.3217, Train Steps/Sec: 15.91, Grad Norm: 0.0360 +[2025-02-23 11:16:53] (step=0781400) Train Loss: 0.3210, Train Steps/Sec: 16.66, Grad Norm: 0.0379 +[2025-02-23 11:17:00] (step=0781500) Train Loss: 0.3209, Train Steps/Sec: 15.24, Grad Norm: 0.0380 +[2025-02-23 11:17:06] (step=0781600) Train Loss: 0.3211, Train Steps/Sec: 15.93, Grad Norm: 0.0379 +[2025-02-23 11:17:12] (step=0781700) Train Loss: 0.3212, Train Steps/Sec: 15.97, Grad Norm: 0.0394 +[2025-02-23 11:17:18] (step=0781800) Train Loss: 0.3213, Train Steps/Sec: 15.78, Grad Norm: 0.0393 +[2025-02-23 11:17:26] (step=0781900) Train Loss: 0.3216, Train Steps/Sec: 14.00, Grad Norm: 0.0380 +[2025-02-23 11:17:31] (step=0782000) Train Loss: 0.3216, Train Steps/Sec: 17.28, Grad Norm: 0.0349 +[2025-02-23 11:17:37] (step=0782100) Train Loss: 0.3218, Train Steps/Sec: 17.29, Grad Norm: 0.0398 +[2025-02-23 11:17:43] (step=0782200) Train Loss: 0.3217, Train Steps/Sec: 17.38, Grad Norm: 0.0358 +[2025-02-23 11:17:49] (step=0782300) Train Loss: 0.3216, Train Steps/Sec: 17.35, Grad Norm: 0.0335 +[2025-02-23 11:17:54] (step=0782400) Train Loss: 0.3217, Train Steps/Sec: 17.43, Grad Norm: 0.0366 +[2025-02-23 11:18:00] (step=0782500) Train Loss: 0.3218, Train Steps/Sec: 17.36, Grad Norm: 0.0389 +[2025-02-23 11:18:06] (step=0782600) Train Loss: 0.3213, Train Steps/Sec: 17.39, Grad Norm: 0.0367 +[2025-02-23 11:18:12] (step=0782700) Train Loss: 0.3210, Train Steps/Sec: 17.32, Grad Norm: 0.0350 +[2025-02-23 11:18:18] (step=0782800) Train Loss: 0.3213, Train Steps/Sec: 17.27, Grad Norm: 0.0361 +[2025-02-23 11:18:23] (step=0782900) Train Loss: 0.3211, Train Steps/Sec: 17.31, Grad Norm: 0.0411 +[2025-02-23 11:18:29] (step=0783000) Train Loss: 0.3211, Train Steps/Sec: 17.31, Grad Norm: 0.0387 +[2025-02-23 11:18:35] (step=0783100) Train Loss: 0.3218, Train Steps/Sec: 15.72, Grad Norm: 0.0378 +[2025-02-23 11:18:43] (step=0783200) Train Loss: 0.3218, Train Steps/Sec: 14.06, Grad Norm: 0.0365 +[2025-02-23 11:18:49] (step=0783300) Train Loss: 0.3209, Train Steps/Sec: 15.91, Grad Norm: 0.0324 +[2025-02-23 11:18:55] (step=0783400) Train Loss: 0.3217, Train Steps/Sec: 15.99, Grad Norm: 0.0370 +[2025-02-23 11:19:02] (step=0783500) Train Loss: 0.3212, Train Steps/Sec: 14.71, Grad Norm: 0.0389 +[2025-02-23 11:19:08] (step=0783600) Train Loss: 0.3217, Train Steps/Sec: 15.97, Grad Norm: 0.0381 +[2025-02-23 11:19:14] (step=0783700) Train Loss: 0.3217, Train Steps/Sec: 17.31, Grad Norm: 0.0346 +[2025-02-23 11:19:21] (step=0783800) Train Loss: 0.3216, Train Steps/Sec: 15.20, Grad Norm: 0.0372 +[2025-02-23 11:19:26] (step=0783900) Train Loss: 0.3216, Train Steps/Sec: 17.27, Grad Norm: 0.0370 +[2025-02-23 11:19:32] (step=0784000) Train Loss: 0.3208, Train Steps/Sec: 17.27, Grad Norm: 0.0344 +[2025-02-23 11:19:38] (step=0784100) Train Loss: 0.3219, Train Steps/Sec: 17.32, Grad Norm: 0.0381 +[2025-02-23 11:19:44] (step=0784200) Train Loss: 0.3213, Train Steps/Sec: 17.07, Grad Norm: 0.0385 +[2025-02-23 11:19:50] (step=0784300) Train Loss: 0.3212, Train Steps/Sec: 17.09, Grad Norm: 0.0383 +[2025-02-23 11:19:57] (step=0784400) Train Loss: 0.3213, Train Steps/Sec: 14.31, Grad Norm: 0.0351 +[2025-02-23 11:20:02] (step=0784500) Train Loss: 0.3207, Train Steps/Sec: 17.05, Grad Norm: 0.0399 +[2025-02-23 11:20:08] (step=0784600) Train Loss: 0.3217, Train Steps/Sec: 17.07, Grad Norm: 0.0371 +[2025-02-23 11:20:14] (step=0784700) Train Loss: 0.3212, Train Steps/Sec: 17.10, Grad Norm: 0.0367 +[2025-02-23 11:20:20] (step=0784800) Train Loss: 0.3212, Train Steps/Sec: 17.13, Grad Norm: 0.0347 +[2025-02-23 11:20:26] (step=0784900) Train Loss: 0.3212, Train Steps/Sec: 17.10, Grad Norm: 0.0327 +[2025-02-23 11:20:32] (step=0785000) Train Loss: 0.3220, Train Steps/Sec: 17.13, Grad Norm: 0.0346 +[2025-02-23 11:20:38] (step=0785100) Train Loss: 0.3209, Train Steps/Sec: 15.69, Grad Norm: 0.0366 +[2025-02-23 11:20:44] (step=0785200) Train Loss: 0.3216, Train Steps/Sec: 16.26, Grad Norm: 0.0396 +[2025-02-23 11:20:51] (step=0785300) Train Loss: 0.3209, Train Steps/Sec: 15.76, Grad Norm: 0.0350 +[2025-02-23 11:20:57] (step=0785400) Train Loss: 0.3219, Train Steps/Sec: 15.01, Grad Norm: 0.0355 +[2025-02-23 11:21:04] (step=0785500) Train Loss: 0.3214, Train Steps/Sec: 14.60, Grad Norm: 0.0344 +[2025-02-23 11:21:10] (step=0785600) Train Loss: 0.3216, Train Steps/Sec: 16.35, Grad Norm: 0.0382 +[2025-02-23 11:21:17] (step=0785700) Train Loss: 0.3213, Train Steps/Sec: 13.89, Grad Norm: 0.0395 +[2025-02-23 11:21:24] (step=0785800) Train Loss: 0.3214, Train Steps/Sec: 15.66, Grad Norm: 0.0357 +[2025-02-23 11:21:30] (step=0785900) Train Loss: 0.3212, Train Steps/Sec: 17.23, Grad Norm: 0.0348 +[2025-02-23 11:21:35] (step=0786000) Train Loss: 0.3218, Train Steps/Sec: 17.16, Grad Norm: 0.0329 +[2025-02-23 11:21:41] (step=0786100) Train Loss: 0.3216, Train Steps/Sec: 17.23, Grad Norm: 0.0315 +[2025-02-23 11:21:47] (step=0786200) Train Loss: 0.3215, Train Steps/Sec: 17.17, Grad Norm: 0.0351 +[2025-02-23 11:21:53] (step=0786300) Train Loss: 0.3215, Train Steps/Sec: 17.20, Grad Norm: 0.0343 +[2025-02-23 11:21:59] (step=0786400) Train Loss: 0.3212, Train Steps/Sec: 17.30, Grad Norm: 0.0342 +[2025-02-23 11:22:04] (step=0786500) Train Loss: 0.3208, Train Steps/Sec: 17.28, Grad Norm: 0.0364 +[2025-02-23 11:22:10] (step=0786600) Train Loss: 0.3213, Train Steps/Sec: 17.30, Grad Norm: 0.0362 +[2025-02-23 11:22:16] (step=0786700) Train Loss: 0.3219, Train Steps/Sec: 17.31, Grad Norm: 0.0334 +[2025-02-23 11:22:22] (step=0786800) Train Loss: 0.3219, Train Steps/Sec: 17.21, Grad Norm: 0.0369 +[2025-02-23 11:22:29] (step=0786900) Train Loss: 0.3218, Train Steps/Sec: 14.25, Grad Norm: 0.0361 +[2025-02-23 11:22:35] (step=0787000) Train Loss: 0.3213, Train Steps/Sec: 17.35, Grad Norm: 0.0364 +[2025-02-23 11:22:41] (step=0787100) Train Loss: 0.3212, Train Steps/Sec: 15.94, Grad Norm: 0.0360 +[2025-02-23 11:22:47] (step=0787200) Train Loss: 0.3216, Train Steps/Sec: 16.60, Grad Norm: 0.0328 +[2025-02-23 11:22:53] (step=0787300) Train Loss: 0.3215, Train Steps/Sec: 15.27, Grad Norm: 0.0357 +[2025-02-23 11:23:00] (step=0787400) Train Loss: 0.3214, Train Steps/Sec: 14.76, Grad Norm: 0.0355 +[2025-02-23 11:23:07] (step=0787500) Train Loss: 0.3216, Train Steps/Sec: 15.29, Grad Norm: 0.0343 +[2025-02-23 11:23:13] (step=0787600) Train Loss: 0.3218, Train Steps/Sec: 17.26, Grad Norm: 0.0412 +[2025-02-23 11:23:19] (step=0787700) Train Loss: 0.3213, Train Steps/Sec: 15.75, Grad Norm: 0.0353 +[2025-02-23 11:23:25] (step=0787800) Train Loss: 0.3213, Train Steps/Sec: 16.44, Grad Norm: 0.0395 +[2025-02-23 11:23:31] (step=0787900) Train Loss: 0.3207, Train Steps/Sec: 17.30, Grad Norm: 0.0350 +[2025-02-23 11:23:37] (step=0788000) Train Loss: 0.3211, Train Steps/Sec: 17.37, Grad Norm: 0.0374 +[2025-02-23 11:23:42] (step=0788100) Train Loss: 0.3214, Train Steps/Sec: 17.20, Grad Norm: 0.0377 +[2025-02-23 11:23:49] (step=0788200) Train Loss: 0.3216, Train Steps/Sec: 14.28, Grad Norm: 0.0367 +[2025-02-23 11:23:55] (step=0788300) Train Loss: 0.3215, Train Steps/Sec: 17.28, Grad Norm: 0.0372 +[2025-02-23 11:24:01] (step=0788400) Train Loss: 0.3217, Train Steps/Sec: 17.25, Grad Norm: 0.0343 +[2025-02-23 11:24:07] (step=0788500) Train Loss: 0.3215, Train Steps/Sec: 17.24, Grad Norm: 0.0386 +[2025-02-23 11:24:13] (step=0788600) Train Loss: 0.3216, Train Steps/Sec: 17.21, Grad Norm: 0.0376 +[2025-02-23 11:24:18] (step=0788700) Train Loss: 0.3221, Train Steps/Sec: 17.26, Grad Norm: 0.0353 +[2025-02-23 11:24:24] (step=0788800) Train Loss: 0.3210, Train Steps/Sec: 17.23, Grad Norm: 0.0397 +[2025-02-23 11:24:30] (step=0788900) Train Loss: 0.3216, Train Steps/Sec: 17.14, Grad Norm: 0.0373 +[2025-02-23 11:24:36] (step=0789000) Train Loss: 0.3216, Train Steps/Sec: 17.21, Grad Norm: 0.0356 +[2025-02-23 11:24:42] (step=0789100) Train Loss: 0.3210, Train Steps/Sec: 15.71, Grad Norm: 0.0376 +[2025-02-23 11:24:48] (step=0789200) Train Loss: 0.3216, Train Steps/Sec: 16.45, Grad Norm: 0.0355 +[2025-02-23 11:24:55] (step=0789300) Train Loss: 0.3211, Train Steps/Sec: 14.08, Grad Norm: 0.0395 +[2025-02-23 11:25:03] (step=0789400) Train Loss: 0.3210, Train Steps/Sec: 12.40, Grad Norm: 0.0362 +[2025-02-23 11:25:09] (step=0789500) Train Loss: 0.3219, Train Steps/Sec: 16.50, Grad Norm: 0.0368 +[2025-02-23 11:25:16] (step=0789600) Train Loss: 0.3216, Train Steps/Sec: 15.76, Grad Norm: 0.0349 +[2025-02-23 11:25:22] (step=0789700) Train Loss: 0.3215, Train Steps/Sec: 17.34, Grad Norm: 0.0372 +[2025-02-23 11:25:28] (step=0789800) Train Loss: 0.3216, Train Steps/Sec: 16.51, Grad Norm: 0.0346 +[2025-02-23 11:25:33] (step=0789900) Train Loss: 0.3212, Train Steps/Sec: 17.38, Grad Norm: 0.0391 +[2025-02-23 11:25:39] (step=0790000) Train Loss: 0.3213, Train Steps/Sec: 17.34, Grad Norm: 0.0351 +[2025-02-23 11:25:45] (step=0790100) Train Loss: 0.3220, Train Steps/Sec: 17.34, Grad Norm: 0.0338 +[2025-02-23 11:25:51] (step=0790200) Train Loss: 0.3213, Train Steps/Sec: 17.36, Grad Norm: 0.0344 +[2025-02-23 11:25:56] (step=0790300) Train Loss: 0.3209, Train Steps/Sec: 17.40, Grad Norm: 0.0344 +[2025-02-23 11:26:02] (step=0790400) Train Loss: 0.3210, Train Steps/Sec: 17.35, Grad Norm: 0.0342 +[2025-02-23 11:26:08] (step=0790500) Train Loss: 0.3214, Train Steps/Sec: 17.30, Grad Norm: 0.0401 +[2025-02-23 11:26:14] (step=0790600) Train Loss: 0.3209, Train Steps/Sec: 17.13, Grad Norm: 0.0331 +[2025-02-23 11:26:21] (step=0790700) Train Loss: 0.3213, Train Steps/Sec: 14.57, Grad Norm: 0.0358 +[2025-02-23 11:26:26] (step=0790800) Train Loss: 0.3209, Train Steps/Sec: 17.36, Grad Norm: 0.0370 +[2025-02-23 11:26:32] (step=0790900) Train Loss: 0.3215, Train Steps/Sec: 17.36, Grad Norm: 0.0370 +[2025-02-23 11:26:38] (step=0791000) Train Loss: 0.3218, Train Steps/Sec: 17.36, Grad Norm: 0.0361 +[2025-02-23 11:26:44] (step=0791100) Train Loss: 0.3210, Train Steps/Sec: 15.96, Grad Norm: 0.0380 +[2025-02-23 11:26:51] (step=0791200) Train Loss: 0.3211, Train Steps/Sec: 15.96, Grad Norm: 0.0365 +[2025-02-23 11:26:58] (step=0791300) Train Loss: 0.3213, Train Steps/Sec: 14.23, Grad Norm: 0.0333 +[2025-02-23 11:27:04] (step=0791400) Train Loss: 0.3213, Train Steps/Sec: 15.29, Grad Norm: 0.0341 +[2025-02-23 11:27:10] (step=0791500) Train Loss: 0.3213, Train Steps/Sec: 16.58, Grad Norm: 0.0347 +[2025-02-23 11:27:16] (step=0791600) Train Loss: 0.3215, Train Steps/Sec: 15.80, Grad Norm: 0.0385 +[2025-02-23 11:27:22] (step=0791700) Train Loss: 0.3213, Train Steps/Sec: 17.40, Grad Norm: 0.0311 +[2025-02-23 11:27:28] (step=0791800) Train Loss: 0.3209, Train Steps/Sec: 16.57, Grad Norm: 0.0378 +[2025-02-23 11:27:35] (step=0791900) Train Loss: 0.3215, Train Steps/Sec: 14.50, Grad Norm: 0.0372 +[2025-02-23 11:27:41] (step=0792000) Train Loss: 0.3207, Train Steps/Sec: 17.35, Grad Norm: 0.0351 +[2025-02-23 11:27:47] (step=0792100) Train Loss: 0.3214, Train Steps/Sec: 17.34, Grad Norm: 0.0333 +[2025-02-23 11:27:52] (step=0792200) Train Loss: 0.3213, Train Steps/Sec: 17.35, Grad Norm: 0.0333 +[2025-02-23 11:27:58] (step=0792300) Train Loss: 0.3220, Train Steps/Sec: 17.34, Grad Norm: 0.0345 +[2025-02-23 11:28:04] (step=0792400) Train Loss: 0.3209, Train Steps/Sec: 17.33, Grad Norm: 0.0355 +[2025-02-23 11:28:10] (step=0792500) Train Loss: 0.3209, Train Steps/Sec: 17.34, Grad Norm: 0.0359 +[2025-02-23 11:28:16] (step=0792600) Train Loss: 0.3215, Train Steps/Sec: 17.34, Grad Norm: 0.0360 +[2025-02-23 11:28:21] (step=0792700) Train Loss: 0.3218, Train Steps/Sec: 17.30, Grad Norm: 0.0375 +[2025-02-23 11:28:27] (step=0792800) Train Loss: 0.3208, Train Steps/Sec: 17.36, Grad Norm: 0.0363 +[2025-02-23 11:28:33] (step=0792900) Train Loss: 0.3216, Train Steps/Sec: 17.40, Grad Norm: 0.0333 +[2025-02-23 11:28:39] (step=0793000) Train Loss: 0.3216, Train Steps/Sec: 17.40, Grad Norm: 0.0362 +[2025-02-23 11:28:45] (step=0793100) Train Loss: 0.3214, Train Steps/Sec: 15.93, Grad Norm: 0.0391 +[2025-02-23 11:28:53] (step=0793200) Train Loss: 0.3219, Train Steps/Sec: 12.12, Grad Norm: 0.0371 +[2025-02-23 11:29:00] (step=0793300) Train Loss: 0.3214, Train Steps/Sec: 14.52, Grad Norm: 0.0389 +[2025-02-23 11:29:06] (step=0793400) Train Loss: 0.3210, Train Steps/Sec: 17.13, Grad Norm: 0.0366 +[2025-02-23 11:29:13] (step=0793500) Train Loss: 0.3213, Train Steps/Sec: 14.50, Grad Norm: 0.0354 +[2025-02-23 11:29:19] (step=0793600) Train Loss: 0.3207, Train Steps/Sec: 17.08, Grad Norm: 0.0334 +[2025-02-23 11:29:24] (step=0793700) Train Loss: 0.3215, Train Steps/Sec: 17.02, Grad Norm: 0.0422 +[2025-02-23 11:29:30] (step=0793800) Train Loss: 0.3216, Train Steps/Sec: 17.05, Grad Norm: 0.0355 +[2025-02-23 11:29:36] (step=0793900) Train Loss: 0.3214, Train Steps/Sec: 16.32, Grad Norm: 0.0367 +[2025-02-23 11:29:42] (step=0794000) Train Loss: 0.3209, Train Steps/Sec: 17.09, Grad Norm: 0.0350 +[2025-02-23 11:29:48] (step=0794100) Train Loss: 0.3215, Train Steps/Sec: 17.10, Grad Norm: 0.0359 +[2025-02-23 11:29:54] (step=0794200) Train Loss: 0.3215, Train Steps/Sec: 17.10, Grad Norm: 0.0374 +[2025-02-23 11:30:00] (step=0794300) Train Loss: 0.3220, Train Steps/Sec: 17.09, Grad Norm: 0.0344 +[2025-02-23 11:30:07] (step=0794400) Train Loss: 0.3209, Train Steps/Sec: 14.30, Grad Norm: 0.0367 +[2025-02-23 11:30:13] (step=0794500) Train Loss: 0.3211, Train Steps/Sec: 17.02, Grad Norm: 0.0383 +[2025-02-23 11:30:19] (step=0794600) Train Loss: 0.3213, Train Steps/Sec: 16.97, Grad Norm: 0.0391 +[2025-02-23 11:30:24] (step=0794700) Train Loss: 0.3212, Train Steps/Sec: 16.96, Grad Norm: 0.0368 +[2025-02-23 11:30:30] (step=0794800) Train Loss: 0.3215, Train Steps/Sec: 16.97, Grad Norm: 0.0358 +[2025-02-23 11:30:36] (step=0794900) Train Loss: 0.3212, Train Steps/Sec: 16.95, Grad Norm: 0.0353 +[2025-02-23 11:30:42] (step=0795000) Train Loss: 0.3214, Train Steps/Sec: 16.97, Grad Norm: 0.0336 +[2025-02-23 11:30:49] (step=0795100) Train Loss: 0.3214, Train Steps/Sec: 14.98, Grad Norm: 0.0362 +[2025-02-23 11:30:56] (step=0795200) Train Loss: 0.3215, Train Steps/Sec: 14.59, Grad Norm: 0.0377 +[2025-02-23 11:31:03] (step=0795300) Train Loss: 0.3212, Train Steps/Sec: 14.62, Grad Norm: 0.0368 +[2025-02-23 11:31:08] (step=0795400) Train Loss: 0.3210, Train Steps/Sec: 17.28, Grad Norm: 0.0335 +[2025-02-23 11:31:15] (step=0795500) Train Loss: 0.3209, Train Steps/Sec: 14.53, Grad Norm: 0.0376 +[2025-02-23 11:31:21] (step=0795600) Train Loss: 0.3219, Train Steps/Sec: 17.20, Grad Norm: 0.0368 +[2025-02-23 11:31:28] (step=0795700) Train Loss: 0.3221, Train Steps/Sec: 14.41, Grad Norm: 0.0412 +[2025-02-23 11:31:34] (step=0795800) Train Loss: 0.3209, Train Steps/Sec: 17.27, Grad Norm: 0.0414 +[2025-02-23 11:31:40] (step=0795900) Train Loss: 0.3210, Train Steps/Sec: 16.52, Grad Norm: 0.0326 +[2025-02-23 11:31:46] (step=0796000) Train Loss: 0.3215, Train Steps/Sec: 17.24, Grad Norm: 0.0352 +[2025-02-23 11:31:51] (step=0796100) Train Loss: 0.3216, Train Steps/Sec: 17.24, Grad Norm: 0.0359 +[2025-02-23 11:31:57] (step=0796200) Train Loss: 0.3212, Train Steps/Sec: 17.24, Grad Norm: 0.0341 +[2025-02-23 11:32:03] (step=0796300) Train Loss: 0.3210, Train Steps/Sec: 17.23, Grad Norm: 0.0330 +[2025-02-23 11:32:09] (step=0796400) Train Loss: 0.3212, Train Steps/Sec: 17.28, Grad Norm: 0.0362 +[2025-02-23 11:32:15] (step=0796500) Train Loss: 0.3210, Train Steps/Sec: 17.20, Grad Norm: 0.0331 +[2025-02-23 11:32:20] (step=0796600) Train Loss: 0.3211, Train Steps/Sec: 17.14, Grad Norm: 0.0385 +[2025-02-23 11:32:26] (step=0796700) Train Loss: 0.3206, Train Steps/Sec: 17.11, Grad Norm: 0.0371 +[2025-02-23 11:32:32] (step=0796800) Train Loss: 0.3209, Train Steps/Sec: 17.20, Grad Norm: 0.0335 +[2025-02-23 11:32:39] (step=0796900) Train Loss: 0.3215, Train Steps/Sec: 14.41, Grad Norm: 0.0344 +[2025-02-23 11:32:45] (step=0797000) Train Loss: 0.3209, Train Steps/Sec: 16.41, Grad Norm: 0.0344 +[2025-02-23 11:32:52] (step=0797100) Train Loss: 0.3210, Train Steps/Sec: 14.08, Grad Norm: 0.0390 +[2025-02-23 11:32:59] (step=0797200) Train Loss: 0.3217, Train Steps/Sec: 15.17, Grad Norm: 0.0372 +[2025-02-23 11:33:05] (step=0797300) Train Loss: 0.3212, Train Steps/Sec: 15.90, Grad Norm: 0.0387 +[2025-02-23 11:33:11] (step=0797400) Train Loss: 0.3220, Train Steps/Sec: 15.86, Grad Norm: 0.0368 +[2025-02-23 11:33:18] (step=0797500) Train Loss: 0.3206, Train Steps/Sec: 15.88, Grad Norm: 0.0394 +[2025-02-23 11:33:24] (step=0797600) Train Loss: 0.3211, Train Steps/Sec: 17.23, Grad Norm: 0.0380 +[2025-02-23 11:33:29] (step=0797700) Train Loss: 0.3221, Train Steps/Sec: 17.17, Grad Norm: 0.0356 +[2025-02-23 11:33:35] (step=0797800) Train Loss: 0.3211, Train Steps/Sec: 17.15, Grad Norm: 0.0375 +[2025-02-23 11:33:41] (step=0797900) Train Loss: 0.3212, Train Steps/Sec: 16.44, Grad Norm: 0.0341 +[2025-02-23 11:33:47] (step=0798000) Train Loss: 0.3215, Train Steps/Sec: 17.26, Grad Norm: 0.0375 +[2025-02-23 11:33:53] (step=0798100) Train Loss: 0.3214, Train Steps/Sec: 17.26, Grad Norm: 0.0341 +[2025-02-23 11:34:00] (step=0798200) Train Loss: 0.3207, Train Steps/Sec: 14.24, Grad Norm: 0.0339 +[2025-02-23 11:34:06] (step=0798300) Train Loss: 0.3211, Train Steps/Sec: 17.17, Grad Norm: 0.0363 +[2025-02-23 11:34:12] (step=0798400) Train Loss: 0.3213, Train Steps/Sec: 17.24, Grad Norm: 0.0374 +[2025-02-23 11:34:17] (step=0798500) Train Loss: 0.3216, Train Steps/Sec: 17.24, Grad Norm: 0.0360 +[2025-02-23 11:34:23] (step=0798600) Train Loss: 0.3215, Train Steps/Sec: 17.26, Grad Norm: 0.0400 +[2025-02-23 11:34:29] (step=0798700) Train Loss: 0.3209, Train Steps/Sec: 17.27, Grad Norm: 0.0361 +[2025-02-23 11:34:35] (step=0798800) Train Loss: 0.3217, Train Steps/Sec: 17.31, Grad Norm: 0.0331 +[2025-02-23 11:34:41] (step=0798900) Train Loss: 0.3219, Train Steps/Sec: 17.30, Grad Norm: 0.0360 +[2025-02-23 11:34:47] (step=0799000) Train Loss: 0.3211, Train Steps/Sec: 16.55, Grad Norm: 0.0350 +[2025-02-23 11:34:53] (step=0799100) Train Loss: 0.3215, Train Steps/Sec: 14.74, Grad Norm: 0.0383 +[2025-02-23 11:35:00] (step=0799200) Train Loss: 0.3218, Train Steps/Sec: 15.36, Grad Norm: 0.0345 +[2025-02-23 11:35:06] (step=0799300) Train Loss: 0.3214, Train Steps/Sec: 16.64, Grad Norm: 0.0371 +[2025-02-23 11:35:14] (step=0799400) Train Loss: 0.3217, Train Steps/Sec: 12.84, Grad Norm: 0.0346 +[2025-02-23 11:35:20] (step=0799500) Train Loss: 0.3210, Train Steps/Sec: 15.73, Grad Norm: 0.0390 +[2025-02-23 11:35:26] (step=0799600) Train Loss: 0.3208, Train Steps/Sec: 17.27, Grad Norm: 0.0415 +[2025-02-23 11:35:32] (step=0799700) Train Loss: 0.3215, Train Steps/Sec: 17.34, Grad Norm: 0.0323 +[2025-02-23 11:35:37] (step=0799800) Train Loss: 0.3216, Train Steps/Sec: 17.32, Grad Norm: 0.0383 +[2025-02-23 11:35:43] (step=0799900) Train Loss: 0.3218, Train Steps/Sec: 16.56, Grad Norm: 0.0349 +[2025-02-23 11:35:49] (step=0800000) Train Loss: 0.3211, Train Steps/Sec: 17.34, Grad Norm: 0.0399 +[2025-02-23 11:35:50] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0800000.pt +[2025-02-23 11:35:56] (step=0800100) Train Loss: 0.3211, Train Steps/Sec: 15.60, Grad Norm: 0.0351 +[2025-02-23 11:36:01] (step=0800200) Train Loss: 0.3214, Train Steps/Sec: 17.15, Grad Norm: 0.0394 +[2025-02-23 11:36:07] (step=0800300) Train Loss: 0.3211, Train Steps/Sec: 17.19, Grad Norm: 0.0328 +[2025-02-23 11:36:13] (step=0800400) Train Loss: 0.3209, Train Steps/Sec: 17.27, Grad Norm: 0.0340 +[2025-02-23 11:36:19] (step=0800500) Train Loss: 0.3217, Train Steps/Sec: 17.26, Grad Norm: 0.0347 +[2025-02-23 11:36:25] (step=0800600) Train Loss: 0.3216, Train Steps/Sec: 17.27, Grad Norm: 0.0381 +[2025-02-23 11:36:32] (step=0800700) Train Loss: 0.3214, Train Steps/Sec: 14.40, Grad Norm: 0.0381 +[2025-02-23 11:36:37] (step=0800800) Train Loss: 0.3217, Train Steps/Sec: 17.22, Grad Norm: 0.0387 +[2025-02-23 11:36:43] (step=0800900) Train Loss: 0.3211, Train Steps/Sec: 16.35, Grad Norm: 0.0337 +[2025-02-23 11:36:50] (step=0801000) Train Loss: 0.3211, Train Steps/Sec: 15.25, Grad Norm: 0.0378 +[2025-02-23 11:36:57] (step=0801100) Train Loss: 0.3212, Train Steps/Sec: 14.52, Grad Norm: 0.0375 +[2025-02-23 11:37:03] (step=0801200) Train Loss: 0.3215, Train Steps/Sec: 16.28, Grad Norm: 0.0401 +[2025-02-23 11:37:10] (step=0801300) Train Loss: 0.3213, Train Steps/Sec: 14.50, Grad Norm: 0.0354 +[2025-02-23 11:37:16] (step=0801400) Train Loss: 0.3207, Train Steps/Sec: 17.17, Grad Norm: 0.0368 +[2025-02-23 11:37:22] (step=0801500) Train Loss: 0.3209, Train Steps/Sec: 15.72, Grad Norm: 0.0336 +[2025-02-23 11:37:28] (step=0801600) Train Loss: 0.3208, Train Steps/Sec: 17.14, Grad Norm: 0.0349 +[2025-02-23 11:37:34] (step=0801700) Train Loss: 0.3215, Train Steps/Sec: 17.10, Grad Norm: 0.0374 +[2025-02-23 11:37:40] (step=0801800) Train Loss: 0.3215, Train Steps/Sec: 17.06, Grad Norm: 0.0365 +[2025-02-23 11:37:47] (step=0801900) Train Loss: 0.3211, Train Steps/Sec: 13.82, Grad Norm: 0.0354 +[2025-02-23 11:37:53] (step=0802000) Train Loss: 0.3217, Train Steps/Sec: 17.04, Grad Norm: 0.0343 +[2025-02-23 11:37:59] (step=0802100) Train Loss: 0.3214, Train Steps/Sec: 17.10, Grad Norm: 0.0340 +[2025-02-23 11:38:04] (step=0802200) Train Loss: 0.3217, Train Steps/Sec: 17.30, Grad Norm: 0.0355 +[2025-02-23 11:38:10] (step=0802300) Train Loss: 0.3213, Train Steps/Sec: 17.33, Grad Norm: 0.0355 +[2025-02-23 11:38:16] (step=0802400) Train Loss: 0.3218, Train Steps/Sec: 17.33, Grad Norm: 0.0368 +[2025-02-23 11:38:22] (step=0802500) Train Loss: 0.3211, Train Steps/Sec: 17.29, Grad Norm: 0.0372 +[2025-02-23 11:38:28] (step=0802600) Train Loss: 0.3209, Train Steps/Sec: 17.27, Grad Norm: 0.0397 +[2025-02-23 11:38:33] (step=0802700) Train Loss: 0.3216, Train Steps/Sec: 17.30, Grad Norm: 0.0352 +[2025-02-23 11:38:39] (step=0802800) Train Loss: 0.3212, Train Steps/Sec: 17.33, Grad Norm: 0.0361 +[2025-02-23 11:38:45] (step=0802900) Train Loss: 0.3214, Train Steps/Sec: 15.95, Grad Norm: 0.0376 +[2025-02-23 11:38:51] (step=0803000) Train Loss: 0.3212, Train Steps/Sec: 16.58, Grad Norm: 0.0335 +[2025-02-23 11:38:58] (step=0803100) Train Loss: 0.3218, Train Steps/Sec: 14.74, Grad Norm: 0.0377 +[2025-02-23 11:39:06] (step=0803200) Train Loss: 0.3220, Train Steps/Sec: 13.27, Grad Norm: 0.0366 +[2025-02-23 11:39:12] (step=0803300) Train Loss: 0.3219, Train Steps/Sec: 15.10, Grad Norm: 0.0385 +[2025-02-23 11:39:18] (step=0803400) Train Loss: 0.3211, Train Steps/Sec: 17.17, Grad Norm: 0.0382 +[2025-02-23 11:39:25] (step=0803500) Train Loss: 0.3209, Train Steps/Sec: 15.83, Grad Norm: 0.0343 +[2025-02-23 11:39:30] (step=0803600) Train Loss: 0.3218, Train Steps/Sec: 17.26, Grad Norm: 0.0339 +[2025-02-23 11:39:36] (step=0803700) Train Loss: 0.3207, Train Steps/Sec: 17.21, Grad Norm: 0.0347 +[2025-02-23 11:39:42] (step=0803800) Train Loss: 0.3221, Train Steps/Sec: 17.22, Grad Norm: 0.0386 +[2025-02-23 11:39:48] (step=0803900) Train Loss: 0.3215, Train Steps/Sec: 16.53, Grad Norm: 0.0366 +[2025-02-23 11:39:54] (step=0804000) Train Loss: 0.3212, Train Steps/Sec: 17.29, Grad Norm: 0.0357 +[2025-02-23 11:40:00] (step=0804100) Train Loss: 0.3218, Train Steps/Sec: 17.28, Grad Norm: 0.0362 +[2025-02-23 11:40:05] (step=0804200) Train Loss: 0.3224, Train Steps/Sec: 17.24, Grad Norm: 0.0355 +[2025-02-23 11:40:11] (step=0804300) Train Loss: 0.3209, Train Steps/Sec: 17.28, Grad Norm: 0.0391 +[2025-02-23 11:40:18] (step=0804400) Train Loss: 0.3213, Train Steps/Sec: 14.23, Grad Norm: 0.0362 +[2025-02-23 11:40:24] (step=0804500) Train Loss: 0.3213, Train Steps/Sec: 17.09, Grad Norm: 0.0353 +[2025-02-23 11:40:30] (step=0804600) Train Loss: 0.3211, Train Steps/Sec: 17.26, Grad Norm: 0.0334 +[2025-02-23 11:40:36] (step=0804700) Train Loss: 0.3209, Train Steps/Sec: 17.36, Grad Norm: 0.0365 +[2025-02-23 11:40:41] (step=0804800) Train Loss: 0.3212, Train Steps/Sec: 17.36, Grad Norm: 0.0355 +[2025-02-23 11:40:48] (step=0804900) Train Loss: 0.3216, Train Steps/Sec: 14.73, Grad Norm: 0.0357 +[2025-02-23 11:40:54] (step=0805000) Train Loss: 0.3214, Train Steps/Sec: 15.90, Grad Norm: 0.0346 +[2025-02-23 11:41:01] (step=0805100) Train Loss: 0.3212, Train Steps/Sec: 15.76, Grad Norm: 0.0418 +[2025-02-23 11:41:07] (step=0805200) Train Loss: 0.3214, Train Steps/Sec: 15.02, Grad Norm: 0.0416 +[2025-02-23 11:41:14] (step=0805300) Train Loss: 0.3212, Train Steps/Sec: 16.46, Grad Norm: 0.0331 +[2025-02-23 11:41:20] (step=0805400) Train Loss: 0.3216, Train Steps/Sec: 16.51, Grad Norm: 0.0352 +[2025-02-23 11:41:26] (step=0805500) Train Loss: 0.3210, Train Steps/Sec: 15.80, Grad Norm: 0.0364 +[2025-02-23 11:41:32] (step=0805600) Train Loss: 0.3215, Train Steps/Sec: 17.20, Grad Norm: 0.0332 +[2025-02-23 11:41:39] (step=0805700) Train Loss: 0.3211, Train Steps/Sec: 14.29, Grad Norm: 0.0388 +[2025-02-23 11:41:45] (step=0805800) Train Loss: 0.3218, Train Steps/Sec: 17.16, Grad Norm: 0.0363 +[2025-02-23 11:41:51] (step=0805900) Train Loss: 0.3210, Train Steps/Sec: 16.40, Grad Norm: 0.0352 +[2025-02-23 11:41:56] (step=0806000) Train Loss: 0.3212, Train Steps/Sec: 17.17, Grad Norm: 0.0348 +[2025-02-23 11:42:02] (step=0806100) Train Loss: 0.3213, Train Steps/Sec: 17.11, Grad Norm: 0.0356 +[2025-02-23 11:42:08] (step=0806200) Train Loss: 0.3215, Train Steps/Sec: 17.15, Grad Norm: 0.0370 +[2025-02-23 11:42:14] (step=0806300) Train Loss: 0.3216, Train Steps/Sec: 17.20, Grad Norm: 0.0396 +[2025-02-23 11:42:20] (step=0806400) Train Loss: 0.3213, Train Steps/Sec: 17.25, Grad Norm: 0.0387 +[2025-02-23 11:42:26] (step=0806500) Train Loss: 0.3212, Train Steps/Sec: 17.24, Grad Norm: 0.0346 +[2025-02-23 11:42:31] (step=0806600) Train Loss: 0.3216, Train Steps/Sec: 17.25, Grad Norm: 0.0382 +[2025-02-23 11:42:37] (step=0806700) Train Loss: 0.3217, Train Steps/Sec: 17.35, Grad Norm: 0.0343 +[2025-02-23 11:42:44] (step=0806800) Train Loss: 0.3212, Train Steps/Sec: 14.78, Grad Norm: 0.0337 +[2025-02-23 11:42:51] (step=0806900) Train Loss: 0.3213, Train Steps/Sec: 13.92, Grad Norm: 0.0346 +[2025-02-23 11:42:57] (step=0807000) Train Loss: 0.3214, Train Steps/Sec: 16.24, Grad Norm: 0.0323 +[2025-02-23 11:43:04] (step=0807100) Train Loss: 0.3208, Train Steps/Sec: 14.39, Grad Norm: 0.0334 +[2025-02-23 11:43:10] (step=0807200) Train Loss: 0.3209, Train Steps/Sec: 16.25, Grad Norm: 0.0334 +[2025-02-23 11:43:16] (step=0807300) Train Loss: 0.3212, Train Steps/Sec: 16.33, Grad Norm: 0.0350 +[2025-02-23 11:43:23] (step=0807400) Train Loss: 0.3213, Train Steps/Sec: 16.57, Grad Norm: 0.0349 +[2025-02-23 11:43:29] (step=0807500) Train Loss: 0.3208, Train Steps/Sec: 15.95, Grad Norm: 0.0358 +[2025-02-23 11:43:35] (step=0807600) Train Loss: 0.3214, Train Steps/Sec: 17.42, Grad Norm: 0.0358 +[2025-02-23 11:43:40] (step=0807700) Train Loss: 0.3210, Train Steps/Sec: 17.38, Grad Norm: 0.0346 +[2025-02-23 11:43:46] (step=0807800) Train Loss: 0.3216, Train Steps/Sec: 17.30, Grad Norm: 0.0362 +[2025-02-23 11:43:52] (step=0807900) Train Loss: 0.3214, Train Steps/Sec: 16.59, Grad Norm: 0.0360 +[2025-02-23 11:43:58] (step=0808000) Train Loss: 0.3215, Train Steps/Sec: 17.31, Grad Norm: 0.0344 +[2025-02-23 11:44:04] (step=0808100) Train Loss: 0.3214, Train Steps/Sec: 17.37, Grad Norm: 0.0374 +[2025-02-23 11:44:11] (step=0808200) Train Loss: 0.3207, Train Steps/Sec: 14.25, Grad Norm: 0.0346 +[2025-02-23 11:44:17] (step=0808300) Train Loss: 0.3212, Train Steps/Sec: 17.08, Grad Norm: 0.0346 +[2025-02-23 11:44:22] (step=0808400) Train Loss: 0.3207, Train Steps/Sec: 17.21, Grad Norm: 0.0347 +[2025-02-23 11:44:28] (step=0808500) Train Loss: 0.3216, Train Steps/Sec: 17.04, Grad Norm: 0.0336 +[2025-02-23 11:44:34] (step=0808600) Train Loss: 0.3215, Train Steps/Sec: 17.03, Grad Norm: 0.0331 +[2025-02-23 11:44:40] (step=0808700) Train Loss: 0.3214, Train Steps/Sec: 16.29, Grad Norm: 0.0371 +[2025-02-23 11:44:47] (step=0808800) Train Loss: 0.3213, Train Steps/Sec: 15.22, Grad Norm: 0.0369 +[2025-02-23 11:44:53] (step=0808900) Train Loss: 0.3217, Train Steps/Sec: 15.71, Grad Norm: 0.0349 +[2025-02-23 11:44:59] (step=0809000) Train Loss: 0.3205, Train Steps/Sec: 17.07, Grad Norm: 0.0379 +[2025-02-23 11:45:06] (step=0809100) Train Loss: 0.3209, Train Steps/Sec: 14.40, Grad Norm: 0.0386 +[2025-02-23 11:45:12] (step=0809200) Train Loss: 0.3213, Train Steps/Sec: 16.28, Grad Norm: 0.0343 +[2025-02-23 11:45:18] (step=0809300) Train Loss: 0.3213, Train Steps/Sec: 16.32, Grad Norm: 0.0339 +[2025-02-23 11:45:25] (step=0809400) Train Loss: 0.3221, Train Steps/Sec: 13.96, Grad Norm: 0.0373 +[2025-02-23 11:45:32] (step=0809500) Train Loss: 0.3213, Train Steps/Sec: 15.65, Grad Norm: 0.0345 +[2025-02-23 11:45:38] (step=0809600) Train Loss: 0.3213, Train Steps/Sec: 17.15, Grad Norm: 0.0335 +[2025-02-23 11:45:43] (step=0809700) Train Loss: 0.3211, Train Steps/Sec: 17.17, Grad Norm: 0.0393 +[2025-02-23 11:45:49] (step=0809800) Train Loss: 0.3212, Train Steps/Sec: 17.18, Grad Norm: 0.0361 +[2025-02-23 11:45:55] (step=0809900) Train Loss: 0.3208, Train Steps/Sec: 16.46, Grad Norm: 0.0392 +[2025-02-23 11:46:01] (step=0810000) Train Loss: 0.3213, Train Steps/Sec: 17.18, Grad Norm: 0.0365 +[2025-02-23 11:46:07] (step=0810100) Train Loss: 0.3213, Train Steps/Sec: 17.23, Grad Norm: 0.0354 +[2025-02-23 11:46:13] (step=0810200) Train Loss: 0.3213, Train Steps/Sec: 17.24, Grad Norm: 0.0380 +[2025-02-23 11:46:19] (step=0810300) Train Loss: 0.3214, Train Steps/Sec: 17.23, Grad Norm: 0.0316 +[2025-02-23 11:46:24] (step=0810400) Train Loss: 0.3217, Train Steps/Sec: 17.25, Grad Norm: 0.0381 +[2025-02-23 11:46:30] (step=0810500) Train Loss: 0.3217, Train Steps/Sec: 17.28, Grad Norm: 0.0361 +[2025-02-23 11:46:36] (step=0810600) Train Loss: 0.3217, Train Steps/Sec: 17.23, Grad Norm: 0.0376 +[2025-02-23 11:46:44] (step=0810700) Train Loss: 0.3212, Train Steps/Sec: 12.72, Grad Norm: 0.0357 +[2025-02-23 11:46:50] (step=0810800) Train Loss: 0.3214, Train Steps/Sec: 17.27, Grad Norm: 0.0381 +[2025-02-23 11:46:56] (step=0810900) Train Loss: 0.3219, Train Steps/Sec: 15.91, Grad Norm: 0.0342 +[2025-02-23 11:47:02] (step=0811000) Train Loss: 0.3212, Train Steps/Sec: 15.80, Grad Norm: 0.0349 +[2025-02-23 11:47:09] (step=0811100) Train Loss: 0.3209, Train Steps/Sec: 15.81, Grad Norm: 0.0358 +[2025-02-23 11:47:15] (step=0811200) Train Loss: 0.3209, Train Steps/Sec: 16.47, Grad Norm: 0.0371 +[2025-02-23 11:47:21] (step=0811300) Train Loss: 0.3216, Train Steps/Sec: 16.53, Grad Norm: 0.0342 +[2025-02-23 11:47:27] (step=0811400) Train Loss: 0.3216, Train Steps/Sec: 16.60, Grad Norm: 0.0365 +[2025-02-23 11:47:33] (step=0811500) Train Loss: 0.3214, Train Steps/Sec: 16.58, Grad Norm: 0.0373 +[2025-02-23 11:47:39] (step=0811600) Train Loss: 0.3213, Train Steps/Sec: 16.56, Grad Norm: 0.0368 +[2025-02-23 11:47:45] (step=0811700) Train Loss: 0.3208, Train Steps/Sec: 17.36, Grad Norm: 0.0328 +[2025-02-23 11:47:50] (step=0811800) Train Loss: 0.3211, Train Steps/Sec: 17.34, Grad Norm: 0.0368 +[2025-02-23 11:47:57] (step=0811900) Train Loss: 0.3214, Train Steps/Sec: 14.46, Grad Norm: 0.0358 +[2025-02-23 11:48:03] (step=0812000) Train Loss: 0.3207, Train Steps/Sec: 16.33, Grad Norm: 0.0370 +[2025-02-23 11:48:09] (step=0812100) Train Loss: 0.3217, Train Steps/Sec: 17.34, Grad Norm: 0.0364 +[2025-02-23 11:48:15] (step=0812200) Train Loss: 0.3206, Train Steps/Sec: 17.34, Grad Norm: 0.0331 +[2025-02-23 11:48:21] (step=0812300) Train Loss: 0.3210, Train Steps/Sec: 17.37, Grad Norm: 0.0370 +[2025-02-23 11:48:26] (step=0812400) Train Loss: 0.3211, Train Steps/Sec: 17.39, Grad Norm: 0.0373 +[2025-02-23 11:48:32] (step=0812500) Train Loss: 0.3213, Train Steps/Sec: 17.24, Grad Norm: 0.0376 +[2025-02-23 11:48:39] (step=0812600) Train Loss: 0.3216, Train Steps/Sec: 15.82, Grad Norm: 0.0339 +[2025-02-23 11:48:45] (step=0812700) Train Loss: 0.3213, Train Steps/Sec: 15.97, Grad Norm: 0.0370 +[2025-02-23 11:48:51] (step=0812800) Train Loss: 0.3214, Train Steps/Sec: 16.01, Grad Norm: 0.0370 +[2025-02-23 11:48:57] (step=0812900) Train Loss: 0.3217, Train Steps/Sec: 17.37, Grad Norm: 0.0376 +[2025-02-23 11:49:03] (step=0813000) Train Loss: 0.3216, Train Steps/Sec: 15.82, Grad Norm: 0.0359 +[2025-02-23 11:49:09] (step=0813100) Train Loss: 0.3219, Train Steps/Sec: 16.54, Grad Norm: 0.0343 +[2025-02-23 11:49:17] (step=0813200) Train Loss: 0.3215, Train Steps/Sec: 13.26, Grad Norm: 0.0364 +[2025-02-23 11:49:23] (step=0813300) Train Loss: 0.3213, Train Steps/Sec: 16.41, Grad Norm: 0.0368 +[2025-02-23 11:49:29] (step=0813400) Train Loss: 0.3212, Train Steps/Sec: 16.44, Grad Norm: 0.0349 +[2025-02-23 11:49:35] (step=0813500) Train Loss: 0.3210, Train Steps/Sec: 16.40, Grad Norm: 0.0381 +[2025-02-23 11:49:41] (step=0813600) Train Loss: 0.3212, Train Steps/Sec: 16.31, Grad Norm: 0.0351 +[2025-02-23 11:49:47] (step=0813700) Train Loss: 0.3211, Train Steps/Sec: 17.05, Grad Norm: 0.0337 +[2025-02-23 11:49:53] (step=0813800) Train Loss: 0.3214, Train Steps/Sec: 16.99, Grad Norm: 0.0372 +[2025-02-23 11:49:59] (step=0813900) Train Loss: 0.3213, Train Steps/Sec: 17.05, Grad Norm: 0.0360 +[2025-02-23 11:50:05] (step=0814000) Train Loss: 0.3214, Train Steps/Sec: 16.36, Grad Norm: 0.0366 +[2025-02-23 11:50:11] (step=0814100) Train Loss: 0.3202, Train Steps/Sec: 17.09, Grad Norm: 0.0346 +[2025-02-23 11:50:17] (step=0814200) Train Loss: 0.3213, Train Steps/Sec: 17.11, Grad Norm: 0.0345 +[2025-02-23 11:50:22] (step=0814300) Train Loss: 0.3215, Train Steps/Sec: 17.11, Grad Norm: 0.0361 +[2025-02-23 11:50:28] (step=0814400) Train Loss: 0.3210, Train Steps/Sec: 17.12, Grad Norm: 0.0384 +[2025-02-23 11:50:35] (step=0814500) Train Loss: 0.3212, Train Steps/Sec: 14.21, Grad Norm: 0.0385 +[2025-02-23 11:50:42] (step=0814600) Train Loss: 0.3211, Train Steps/Sec: 14.59, Grad Norm: 0.0370 +[2025-02-23 11:50:48] (step=0814700) Train Loss: 0.3214, Train Steps/Sec: 16.35, Grad Norm: 0.0326 +[2025-02-23 11:50:54] (step=0814800) Train Loss: 0.3215, Train Steps/Sec: 16.38, Grad Norm: 0.0347 +[2025-02-23 11:51:01] (step=0814900) Train Loss: 0.3218, Train Steps/Sec: 15.65, Grad Norm: 0.0353 +[2025-02-23 11:51:07] (step=0815000) Train Loss: 0.3218, Train Steps/Sec: 17.08, Grad Norm: 0.0342 +[2025-02-23 11:51:13] (step=0815100) Train Loss: 0.3214, Train Steps/Sec: 16.41, Grad Norm: 0.0348 +[2025-02-23 11:51:19] (step=0815200) Train Loss: 0.3216, Train Steps/Sec: 16.31, Grad Norm: 0.0325 +[2025-02-23 11:51:25] (step=0815300) Train Loss: 0.3213, Train Steps/Sec: 16.26, Grad Norm: 0.0366 +[2025-02-23 11:51:31] (step=0815400) Train Loss: 0.3214, Train Steps/Sec: 15.65, Grad Norm: 0.0385 +[2025-02-23 11:51:38] (step=0815500) Train Loss: 0.3207, Train Steps/Sec: 16.30, Grad Norm: 0.0355 +[2025-02-23 11:51:44] (step=0815600) Train Loss: 0.3212, Train Steps/Sec: 16.26, Grad Norm: 0.0372 +[2025-02-23 11:51:51] (step=0815700) Train Loss: 0.3211, Train Steps/Sec: 14.30, Grad Norm: 0.0380 +[2025-02-23 11:51:57] (step=0815800) Train Loss: 0.3211, Train Steps/Sec: 17.10, Grad Norm: 0.0369 +[2025-02-23 11:52:02] (step=0815900) Train Loss: 0.3208, Train Steps/Sec: 17.12, Grad Norm: 0.0338 +[2025-02-23 11:52:08] (step=0816000) Train Loss: 0.3212, Train Steps/Sec: 16.45, Grad Norm: 0.0341 +[2025-02-23 11:52:14] (step=0816100) Train Loss: 0.3212, Train Steps/Sec: 17.17, Grad Norm: 0.0368 +[2025-02-23 11:52:20] (step=0816200) Train Loss: 0.3218, Train Steps/Sec: 17.14, Grad Norm: 0.0369 +[2025-02-23 11:52:26] (step=0816300) Train Loss: 0.3207, Train Steps/Sec: 17.11, Grad Norm: 0.0363 +[2025-02-23 11:52:32] (step=0816400) Train Loss: 0.3215, Train Steps/Sec: 17.08, Grad Norm: 0.0365 +[2025-02-23 11:52:39] (step=0816500) Train Loss: 0.3214, Train Steps/Sec: 14.58, Grad Norm: 0.0334 +[2025-02-23 11:52:45] (step=0816600) Train Loss: 0.3213, Train Steps/Sec: 17.08, Grad Norm: 0.0382 +[2025-02-23 11:52:51] (step=0816700) Train Loss: 0.3211, Train Steps/Sec: 15.73, Grad Norm: 0.0339 +[2025-02-23 11:52:57] (step=0816800) Train Loss: 0.3212, Train Steps/Sec: 16.32, Grad Norm: 0.0422 +[2025-02-23 11:53:03] (step=0816900) Train Loss: 0.3214, Train Steps/Sec: 16.31, Grad Norm: 0.0336 +[2025-02-23 11:53:10] (step=0817000) Train Loss: 0.3216, Train Steps/Sec: 14.26, Grad Norm: 0.0341 +[2025-02-23 11:53:17] (step=0817100) Train Loss: 0.3215, Train Steps/Sec: 15.62, Grad Norm: 0.0328 +[2025-02-23 11:53:22] (step=0817200) Train Loss: 0.3211, Train Steps/Sec: 17.10, Grad Norm: 0.0381 +[2025-02-23 11:53:29] (step=0817300) Train Loss: 0.3207, Train Steps/Sec: 16.31, Grad Norm: 0.0360 +[2025-02-23 11:53:35] (step=0817400) Train Loss: 0.3208, Train Steps/Sec: 15.72, Grad Norm: 0.0359 +[2025-02-23 11:53:41] (step=0817500) Train Loss: 0.3215, Train Steps/Sec: 16.52, Grad Norm: 0.0336 +[2025-02-23 11:53:47] (step=0817600) Train Loss: 0.3216, Train Steps/Sec: 16.49, Grad Norm: 0.0344 +[2025-02-23 11:53:53] (step=0817700) Train Loss: 0.3215, Train Steps/Sec: 17.20, Grad Norm: 0.0330 +[2025-02-23 11:53:59] (step=0817800) Train Loss: 0.3210, Train Steps/Sec: 17.25, Grad Norm: 0.0387 +[2025-02-23 11:54:04] (step=0817900) Train Loss: 0.3210, Train Steps/Sec: 17.24, Grad Norm: 0.0361 +[2025-02-23 11:54:11] (step=0818000) Train Loss: 0.3212, Train Steps/Sec: 16.50, Grad Norm: 0.0352 +[2025-02-23 11:54:16] (step=0818100) Train Loss: 0.3215, Train Steps/Sec: 17.30, Grad Norm: 0.0393 +[2025-02-23 11:54:23] (step=0818200) Train Loss: 0.3209, Train Steps/Sec: 14.29, Grad Norm: 0.0352 +[2025-02-23 11:54:29] (step=0818300) Train Loss: 0.3217, Train Steps/Sec: 17.17, Grad Norm: 0.0344 +[2025-02-23 11:54:35] (step=0818400) Train Loss: 0.3211, Train Steps/Sec: 17.15, Grad Norm: 0.0361 +[2025-02-23 11:54:42] (step=0818500) Train Loss: 0.3212, Train Steps/Sec: 14.57, Grad Norm: 0.0333 +[2025-02-23 11:54:48] (step=0818600) Train Loss: 0.3214, Train Steps/Sec: 16.49, Grad Norm: 0.0326 +[2025-02-23 11:54:54] (step=0818700) Train Loss: 0.3208, Train Steps/Sec: 16.59, Grad Norm: 0.0341 +[2025-02-23 11:55:00] (step=0818800) Train Loss: 0.3213, Train Steps/Sec: 15.80, Grad Norm: 0.0368 +[2025-02-23 11:55:06] (step=0818900) Train Loss: 0.3211, Train Steps/Sec: 17.19, Grad Norm: 0.0365 +[2025-02-23 11:55:12] (step=0819000) Train Loss: 0.3212, Train Steps/Sec: 17.25, Grad Norm: 0.0346 +[2025-02-23 11:55:18] (step=0819100) Train Loss: 0.3212, Train Steps/Sec: 17.29, Grad Norm: 0.0363 +[2025-02-23 11:55:24] (step=0819200) Train Loss: 0.3211, Train Steps/Sec: 15.76, Grad Norm: 0.0336 +[2025-02-23 11:55:30] (step=0819300) Train Loss: 0.3222, Train Steps/Sec: 16.44, Grad Norm: 0.0354 +[2025-02-23 11:55:36] (step=0819400) Train Loss: 0.3215, Train Steps/Sec: 15.76, Grad Norm: 0.0372 +[2025-02-23 11:55:44] (step=0819500) Train Loss: 0.3212, Train Steps/Sec: 13.71, Grad Norm: 0.0326 +[2025-02-23 11:55:50] (step=0819600) Train Loss: 0.3217, Train Steps/Sec: 16.37, Grad Norm: 0.0360 +[2025-02-23 11:55:56] (step=0819700) Train Loss: 0.3211, Train Steps/Sec: 17.22, Grad Norm: 0.0375 +[2025-02-23 11:56:01] (step=0819800) Train Loss: 0.3214, Train Steps/Sec: 17.20, Grad Norm: 0.0362 +[2025-02-23 11:56:07] (step=0819900) Train Loss: 0.3207, Train Steps/Sec: 17.23, Grad Norm: 0.0329 +[2025-02-23 11:56:13] (step=0820000) Train Loss: 0.3211, Train Steps/Sec: 16.67, Grad Norm: 0.0361 +[2025-02-23 11:56:19] (step=0820100) Train Loss: 0.3215, Train Steps/Sec: 17.37, Grad Norm: 0.0324 +[2025-02-23 11:56:25] (step=0820200) Train Loss: 0.3213, Train Steps/Sec: 17.40, Grad Norm: 0.0390 +[2025-02-23 11:56:31] (step=0820300) Train Loss: 0.3213, Train Steps/Sec: 17.39, Grad Norm: 0.0337 +[2025-02-23 11:56:37] (step=0820400) Train Loss: 0.3210, Train Steps/Sec: 14.75, Grad Norm: 0.0374 +[2025-02-23 11:56:43] (step=0820500) Train Loss: 0.3204, Train Steps/Sec: 17.22, Grad Norm: 0.0328 +[2025-02-23 11:56:49] (step=0820600) Train Loss: 0.3212, Train Steps/Sec: 15.88, Grad Norm: 0.0328 +[2025-02-23 11:56:57] (step=0820700) Train Loss: 0.3214, Train Steps/Sec: 13.71, Grad Norm: 0.0346 +[2025-02-23 11:57:03] (step=0820800) Train Loss: 0.3210, Train Steps/Sec: 16.39, Grad Norm: 0.0362 +[2025-02-23 11:57:09] (step=0820900) Train Loss: 0.3213, Train Steps/Sec: 17.23, Grad Norm: 0.0366 +[2025-02-23 11:57:14] (step=0821000) Train Loss: 0.3217, Train Steps/Sec: 17.14, Grad Norm: 0.0399 +[2025-02-23 11:57:21] (step=0821100) Train Loss: 0.3208, Train Steps/Sec: 16.42, Grad Norm: 0.0333 +[2025-02-23 11:57:27] (step=0821200) Train Loss: 0.3216, Train Steps/Sec: 16.28, Grad Norm: 0.0341 +[2025-02-23 11:57:33] (step=0821300) Train Loss: 0.3212, Train Steps/Sec: 16.38, Grad Norm: 0.0401 +[2025-02-23 11:57:39] (step=0821400) Train Loss: 0.3211, Train Steps/Sec: 15.83, Grad Norm: 0.0357 +[2025-02-23 11:57:45] (step=0821500) Train Loss: 0.3210, Train Steps/Sec: 16.47, Grad Norm: 0.0335 +[2025-02-23 11:57:51] (step=0821600) Train Loss: 0.3215, Train Steps/Sec: 16.39, Grad Norm: 0.0360 +[2025-02-23 11:57:57] (step=0821700) Train Loss: 0.3210, Train Steps/Sec: 17.14, Grad Norm: 0.0365 +[2025-02-23 11:58:03] (step=0821800) Train Loss: 0.3218, Train Steps/Sec: 17.22, Grad Norm: 0.0348 +[2025-02-23 11:58:09] (step=0821900) Train Loss: 0.3212, Train Steps/Sec: 17.20, Grad Norm: 0.0357 +[2025-02-23 11:58:16] (step=0822000) Train Loss: 0.3211, Train Steps/Sec: 13.72, Grad Norm: 0.0352 +[2025-02-23 11:58:22] (step=0822100) Train Loss: 0.3213, Train Steps/Sec: 17.06, Grad Norm: 0.0347 +[2025-02-23 11:58:28] (step=0822200) Train Loss: 0.3209, Train Steps/Sec: 17.04, Grad Norm: 0.0351 +[2025-02-23 11:58:34] (step=0822300) Train Loss: 0.3211, Train Steps/Sec: 16.98, Grad Norm: 0.0346 +[2025-02-23 11:58:41] (step=0822400) Train Loss: 0.3215, Train Steps/Sec: 14.55, Grad Norm: 0.0361 +[2025-02-23 11:58:47] (step=0822500) Train Loss: 0.3212, Train Steps/Sec: 16.38, Grad Norm: 0.0341 +[2025-02-23 11:58:53] (step=0822600) Train Loss: 0.3211, Train Steps/Sec: 16.44, Grad Norm: 0.0387 +[2025-02-23 11:58:59] (step=0822700) Train Loss: 0.3211, Train Steps/Sec: 15.61, Grad Norm: 0.0380 +[2025-02-23 11:59:05] (step=0822800) Train Loss: 0.3215, Train Steps/Sec: 17.03, Grad Norm: 0.0417 +[2025-02-23 11:59:11] (step=0822900) Train Loss: 0.3212, Train Steps/Sec: 17.09, Grad Norm: 0.0333 +[2025-02-23 11:59:17] (step=0823000) Train Loss: 0.3216, Train Steps/Sec: 17.08, Grad Norm: 0.0377 +[2025-02-23 11:59:23] (step=0823100) Train Loss: 0.3208, Train Steps/Sec: 17.10, Grad Norm: 0.0339 +[2025-02-23 11:59:30] (step=0823200) Train Loss: 0.3214, Train Steps/Sec: 13.31, Grad Norm: 0.0348 +[2025-02-23 11:59:36] (step=0823300) Train Loss: 0.3212, Train Steps/Sec: 16.44, Grad Norm: 0.0362 +[2025-02-23 11:59:43] (step=0823400) Train Loss: 0.3209, Train Steps/Sec: 15.74, Grad Norm: 0.0345 +[2025-02-23 11:59:49] (step=0823500) Train Loss: 0.3213, Train Steps/Sec: 16.54, Grad Norm: 0.0370 +[2025-02-23 11:59:55] (step=0823600) Train Loss: 0.3210, Train Steps/Sec: 16.53, Grad Norm: 0.0358 +[2025-02-23 12:00:00] (step=0823700) Train Loss: 0.3214, Train Steps/Sec: 17.33, Grad Norm: 0.0346 +[2025-02-23 12:00:06] (step=0823800) Train Loss: 0.3215, Train Steps/Sec: 17.23, Grad Norm: 0.0334 +[2025-02-23 12:00:12] (step=0823900) Train Loss: 0.3211, Train Steps/Sec: 17.23, Grad Norm: 0.0330 +[2025-02-23 12:00:18] (step=0824000) Train Loss: 0.3213, Train Steps/Sec: 16.60, Grad Norm: 0.0377 +[2025-02-23 12:00:24] (step=0824100) Train Loss: 0.3212, Train Steps/Sec: 17.36, Grad Norm: 0.0413 +[2025-02-23 12:00:30] (step=0824200) Train Loss: 0.3214, Train Steps/Sec: 17.35, Grad Norm: 0.0351 +[2025-02-23 12:00:36] (step=0824300) Train Loss: 0.3210, Train Steps/Sec: 14.85, Grad Norm: 0.0352 +[2025-02-23 12:00:42] (step=0824400) Train Loss: 0.3212, Train Steps/Sec: 17.24, Grad Norm: 0.0401 +[2025-02-23 12:00:50] (step=0824500) Train Loss: 0.3215, Train Steps/Sec: 13.38, Grad Norm: 0.0346 +[2025-02-23 12:00:56] (step=0824600) Train Loss: 0.3210, Train Steps/Sec: 16.45, Grad Norm: 0.0330 +[2025-02-23 12:01:02] (step=0824700) Train Loss: 0.3213, Train Steps/Sec: 16.51, Grad Norm: 0.0358 +[2025-02-23 12:01:07] (step=0824800) Train Loss: 0.3216, Train Steps/Sec: 17.37, Grad Norm: 0.0347 +[2025-02-23 12:01:13] (step=0824900) Train Loss: 0.3212, Train Steps/Sec: 17.31, Grad Norm: 0.0364 +[2025-02-23 12:01:19] (step=0825000) Train Loss: 0.3218, Train Steps/Sec: 17.36, Grad Norm: 0.0365 +[2025-02-23 12:01:25] (step=0825100) Train Loss: 0.3217, Train Steps/Sec: 17.36, Grad Norm: 0.0362 +[2025-02-23 12:01:31] (step=0825200) Train Loss: 0.3210, Train Steps/Sec: 15.78, Grad Norm: 0.0342 +[2025-02-23 12:01:37] (step=0825300) Train Loss: 0.3212, Train Steps/Sec: 16.26, Grad Norm: 0.0345 +[2025-02-23 12:01:44] (step=0825400) Train Loss: 0.3214, Train Steps/Sec: 15.84, Grad Norm: 0.0341 +[2025-02-23 12:01:49] (step=0825500) Train Loss: 0.3215, Train Steps/Sec: 17.43, Grad Norm: 0.0336 +[2025-02-23 12:01:56] (step=0825600) Train Loss: 0.3214, Train Steps/Sec: 15.90, Grad Norm: 0.0336 +[2025-02-23 12:02:03] (step=0825700) Train Loss: 0.3211, Train Steps/Sec: 14.35, Grad Norm: 0.0341 +[2025-02-23 12:02:08] (step=0825800) Train Loss: 0.3213, Train Steps/Sec: 17.32, Grad Norm: 0.0342 +[2025-02-23 12:02:14] (step=0825900) Train Loss: 0.3216, Train Steps/Sec: 17.22, Grad Norm: 0.0362 +[2025-02-23 12:02:20] (step=0826000) Train Loss: 0.3218, Train Steps/Sec: 16.46, Grad Norm: 0.0357 +[2025-02-23 12:02:26] (step=0826100) Train Loss: 0.3205, Train Steps/Sec: 17.17, Grad Norm: 0.0343 +[2025-02-23 12:02:32] (step=0826200) Train Loss: 0.3211, Train Steps/Sec: 16.54, Grad Norm: 0.0379 +[2025-02-23 12:02:39] (step=0826300) Train Loss: 0.3215, Train Steps/Sec: 15.11, Grad Norm: 0.0398 +[2025-02-23 12:02:45] (step=0826400) Train Loss: 0.3210, Train Steps/Sec: 16.48, Grad Norm: 0.0370 +[2025-02-23 12:02:51] (step=0826500) Train Loss: 0.3208, Train Steps/Sec: 16.48, Grad Norm: 0.0450 +[2025-02-23 12:02:57] (step=0826600) Train Loss: 0.3208, Train Steps/Sec: 15.77, Grad Norm: 0.0358 +[2025-02-23 12:03:03] (step=0826700) Train Loss: 0.3213, Train Steps/Sec: 17.22, Grad Norm: 0.0347 +[2025-02-23 12:03:09] (step=0826800) Train Loss: 0.3215, Train Steps/Sec: 17.23, Grad Norm: 0.0354 +[2025-02-23 12:03:15] (step=0826900) Train Loss: 0.3215, Train Steps/Sec: 17.14, Grad Norm: 0.0377 +[2025-02-23 12:03:22] (step=0827000) Train Loss: 0.3205, Train Steps/Sec: 14.36, Grad Norm: 0.0369 +[2025-02-23 12:03:28] (step=0827100) Train Loss: 0.3217, Train Steps/Sec: 16.38, Grad Norm: 0.0367 +[2025-02-23 12:03:34] (step=0827200) Train Loss: 0.3216, Train Steps/Sec: 16.46, Grad Norm: 0.0432 +[2025-02-23 12:03:40] (step=0827300) Train Loss: 0.3209, Train Steps/Sec: 16.52, Grad Norm: 0.0379 +[2025-02-23 12:03:46] (step=0827400) Train Loss: 0.3209, Train Steps/Sec: 15.81, Grad Norm: 0.0410 +[2025-02-23 12:03:52] (step=0827500) Train Loss: 0.3213, Train Steps/Sec: 16.63, Grad Norm: 0.0345 +[2025-02-23 12:03:58] (step=0827600) Train Loss: 0.3212, Train Steps/Sec: 16.55, Grad Norm: 0.0361 +[2025-02-23 12:04:04] (step=0827700) Train Loss: 0.3212, Train Steps/Sec: 17.23, Grad Norm: 0.0327 +[2025-02-23 12:04:10] (step=0827800) Train Loss: 0.3219, Train Steps/Sec: 17.22, Grad Norm: 0.0357 +[2025-02-23 12:04:16] (step=0827900) Train Loss: 0.3211, Train Steps/Sec: 17.18, Grad Norm: 0.0332 +[2025-02-23 12:04:22] (step=0828000) Train Loss: 0.3209, Train Steps/Sec: 16.45, Grad Norm: 0.0351 +[2025-02-23 12:04:28] (step=0828100) Train Loss: 0.3214, Train Steps/Sec: 17.19, Grad Norm: 0.0358 +[2025-02-23 12:04:36] (step=0828200) Train Loss: 0.3205, Train Steps/Sec: 12.70, Grad Norm: 0.0360 +[2025-02-23 12:04:42] (step=0828300) Train Loss: 0.3212, Train Steps/Sec: 16.62, Grad Norm: 0.0356 +[2025-02-23 12:04:48] (step=0828400) Train Loss: 0.3211, Train Steps/Sec: 16.66, Grad Norm: 0.0360 +[2025-02-23 12:04:54] (step=0828500) Train Loss: 0.3210, Train Steps/Sec: 16.50, Grad Norm: 0.0389 +[2025-02-23 12:05:00] (step=0828600) Train Loss: 0.3210, Train Steps/Sec: 16.38, Grad Norm: 0.0334 +[2025-02-23 12:05:06] (step=0828700) Train Loss: 0.3216, Train Steps/Sec: 17.12, Grad Norm: 0.0331 +[2025-02-23 12:05:11] (step=0828800) Train Loss: 0.3210, Train Steps/Sec: 17.21, Grad Norm: 0.0387 +[2025-02-23 12:05:17] (step=0828900) Train Loss: 0.3211, Train Steps/Sec: 17.21, Grad Norm: 0.0383 +[2025-02-23 12:05:23] (step=0829000) Train Loss: 0.3214, Train Steps/Sec: 17.15, Grad Norm: 0.0350 +[2025-02-23 12:05:29] (step=0829100) Train Loss: 0.3214, Train Steps/Sec: 17.32, Grad Norm: 0.0383 +[2025-02-23 12:05:35] (step=0829200) Train Loss: 0.3216, Train Steps/Sec: 15.88, Grad Norm: 0.0340 +[2025-02-23 12:05:41] (step=0829300) Train Loss: 0.3208, Train Steps/Sec: 16.55, Grad Norm: 0.0356 +[2025-02-23 12:05:47] (step=0829400) Train Loss: 0.3208, Train Steps/Sec: 15.79, Grad Norm: 0.0347 +[2025-02-23 12:05:55] (step=0829500) Train Loss: 0.3208, Train Steps/Sec: 13.75, Grad Norm: 0.0373 +[2025-02-23 12:06:01] (step=0829600) Train Loss: 0.3210, Train Steps/Sec: 17.32, Grad Norm: 0.0372 +[2025-02-23 12:06:07] (step=0829700) Train Loss: 0.3209, Train Steps/Sec: 16.51, Grad Norm: 0.0355 +[2025-02-23 12:06:12] (step=0829800) Train Loss: 0.3215, Train Steps/Sec: 17.28, Grad Norm: 0.0366 +[2025-02-23 12:06:18] (step=0829900) Train Loss: 0.3212, Train Steps/Sec: 17.28, Grad Norm: 0.0340 +[2025-02-23 12:06:24] (step=0830000) Train Loss: 0.3210, Train Steps/Sec: 16.60, Grad Norm: 0.0340 +[2025-02-23 12:06:30] (step=0830100) Train Loss: 0.3206, Train Steps/Sec: 16.53, Grad Norm: 0.0370 +[2025-02-23 12:06:37] (step=0830200) Train Loss: 0.3210, Train Steps/Sec: 15.15, Grad Norm: 0.0338 +[2025-02-23 12:06:43] (step=0830300) Train Loss: 0.3211, Train Steps/Sec: 16.48, Grad Norm: 0.0358 +[2025-02-23 12:06:49] (step=0830400) Train Loss: 0.3207, Train Steps/Sec: 16.52, Grad Norm: 0.0351 +[2025-02-23 12:06:55] (step=0830500) Train Loss: 0.3209, Train Steps/Sec: 15.80, Grad Norm: 0.0361 +[2025-02-23 12:07:01] (step=0830600) Train Loss: 0.3208, Train Steps/Sec: 17.26, Grad Norm: 0.0348 +[2025-02-23 12:07:08] (step=0830700) Train Loss: 0.3208, Train Steps/Sec: 14.20, Grad Norm: 0.0341 +[2025-02-23 12:07:14] (step=0830800) Train Loss: 0.3213, Train Steps/Sec: 17.38, Grad Norm: 0.0321 +[2025-02-23 12:07:20] (step=0830900) Train Loss: 0.3211, Train Steps/Sec: 17.36, Grad Norm: 0.0390 +[2025-02-23 12:07:25] (step=0831000) Train Loss: 0.3210, Train Steps/Sec: 17.32, Grad Norm: 0.0378 +[2025-02-23 12:07:31] (step=0831100) Train Loss: 0.3208, Train Steps/Sec: 17.21, Grad Norm: 0.0394 +[2025-02-23 12:07:38] (step=0831200) Train Loss: 0.3216, Train Steps/Sec: 15.82, Grad Norm: 0.0333 +[2025-02-23 12:07:44] (step=0831300) Train Loss: 0.3208, Train Steps/Sec: 16.63, Grad Norm: 0.0343 +[2025-02-23 12:07:50] (step=0831400) Train Loss: 0.3211, Train Steps/Sec: 15.92, Grad Norm: 0.0370 +[2025-02-23 12:07:56] (step=0831500) Train Loss: 0.3211, Train Steps/Sec: 17.38, Grad Norm: 0.0339 +[2025-02-23 12:08:02] (step=0831600) Train Loss: 0.3218, Train Steps/Sec: 16.60, Grad Norm: 0.0349 +[2025-02-23 12:08:08] (step=0831700) Train Loss: 0.3214, Train Steps/Sec: 16.60, Grad Norm: 0.0385 +[2025-02-23 12:08:13] (step=0831800) Train Loss: 0.3211, Train Steps/Sec: 17.32, Grad Norm: 0.0360 +[2025-02-23 12:08:19] (step=0831900) Train Loss: 0.3212, Train Steps/Sec: 17.26, Grad Norm: 0.0360 +[2025-02-23 12:08:26] (step=0832000) Train Loss: 0.3210, Train Steps/Sec: 13.84, Grad Norm: 0.0356 +[2025-02-23 12:08:33] (step=0832100) Train Loss: 0.3208, Train Steps/Sec: 14.57, Grad Norm: 0.0400 +[2025-02-23 12:08:39] (step=0832200) Train Loss: 0.3213, Train Steps/Sec: 16.45, Grad Norm: 0.0335 +[2025-02-23 12:08:45] (step=0832300) Train Loss: 0.3215, Train Steps/Sec: 16.56, Grad Norm: 0.0419 +[2025-02-23 12:08:51] (step=0832400) Train Loss: 0.3210, Train Steps/Sec: 16.55, Grad Norm: 0.0357 +[2025-02-23 12:08:58] (step=0832500) Train Loss: 0.3212, Train Steps/Sec: 16.63, Grad Norm: 0.0373 +[2025-02-23 12:09:03] (step=0832600) Train Loss: 0.3209, Train Steps/Sec: 17.47, Grad Norm: 0.0362 +[2025-02-23 12:09:09] (step=0832700) Train Loss: 0.3215, Train Steps/Sec: 17.41, Grad Norm: 0.0344 +[2025-02-23 12:09:15] (step=0832800) Train Loss: 0.3216, Train Steps/Sec: 17.39, Grad Norm: 0.0365 +[2025-02-23 12:09:20] (step=0832900) Train Loss: 0.3215, Train Steps/Sec: 17.44, Grad Norm: 0.0339 +[2025-02-23 12:09:26] (step=0833000) Train Loss: 0.3211, Train Steps/Sec: 17.49, Grad Norm: 0.0340 +[2025-02-23 12:09:32] (step=0833100) Train Loss: 0.3206, Train Steps/Sec: 17.48, Grad Norm: 0.0348 +[2025-02-23 12:09:39] (step=0833200) Train Loss: 0.3213, Train Steps/Sec: 13.38, Grad Norm: 0.0346 +[2025-02-23 12:09:45] (step=0833300) Train Loss: 0.3218, Train Steps/Sec: 16.42, Grad Norm: 0.0350 +[2025-02-23 12:09:52] (step=0833400) Train Loss: 0.3209, Train Steps/Sec: 15.82, Grad Norm: 0.0369 +[2025-02-23 12:09:58] (step=0833500) Train Loss: 0.3212, Train Steps/Sec: 16.64, Grad Norm: 0.0351 +[2025-02-23 12:10:04] (step=0833600) Train Loss: 0.3212, Train Steps/Sec: 17.41, Grad Norm: 0.0361 +[2025-02-23 12:10:10] (step=0833700) Train Loss: 0.3213, Train Steps/Sec: 16.63, Grad Norm: 0.0358 +[2025-02-23 12:10:15] (step=0833800) Train Loss: 0.3214, Train Steps/Sec: 17.38, Grad Norm: 0.0391 +[2025-02-23 12:10:21] (step=0833900) Train Loss: 0.3211, Train Steps/Sec: 17.38, Grad Norm: 0.0375 +[2025-02-23 12:10:27] (step=0834000) Train Loss: 0.3212, Train Steps/Sec: 15.98, Grad Norm: 0.0340 +[2025-02-23 12:10:34] (step=0834100) Train Loss: 0.3210, Train Steps/Sec: 15.23, Grad Norm: 0.0328 +[2025-02-23 12:10:40] (step=0834200) Train Loss: 0.3217, Train Steps/Sec: 16.00, Grad Norm: 0.0334 +[2025-02-23 12:10:46] (step=0834300) Train Loss: 0.3213, Train Steps/Sec: 17.37, Grad Norm: 0.0355 +[2025-02-23 12:10:52] (step=0834400) Train Loss: 0.3214, Train Steps/Sec: 15.76, Grad Norm: 0.0353 +[2025-02-23 12:10:59] (step=0834500) Train Loss: 0.3211, Train Steps/Sec: 14.51, Grad Norm: 0.0337 +[2025-02-23 12:11:05] (step=0834600) Train Loss: 0.3207, Train Steps/Sec: 17.26, Grad Norm: 0.0392 +[2025-02-23 12:11:11] (step=0834700) Train Loss: 0.3216, Train Steps/Sec: 17.28, Grad Norm: 0.0382 +[2025-02-23 12:11:16] (step=0834800) Train Loss: 0.3211, Train Steps/Sec: 17.42, Grad Norm: 0.0360 +[2025-02-23 12:11:22] (step=0834900) Train Loss: 0.3214, Train Steps/Sec: 17.40, Grad Norm: 0.0336 +[2025-02-23 12:11:28] (step=0835000) Train Loss: 0.3215, Train Steps/Sec: 17.30, Grad Norm: 0.0351 +[2025-02-23 12:11:34] (step=0835100) Train Loss: 0.3213, Train Steps/Sec: 17.33, Grad Norm: 0.0410 +[2025-02-23 12:11:40] (step=0835200) Train Loss: 0.3215, Train Steps/Sec: 15.86, Grad Norm: 0.0383 +[2025-02-23 12:11:46] (step=0835300) Train Loss: 0.3217, Train Steps/Sec: 16.64, Grad Norm: 0.0400 +[2025-02-23 12:11:52] (step=0835400) Train Loss: 0.3212, Train Steps/Sec: 15.87, Grad Norm: 0.0353 +[2025-02-23 12:11:58] (step=0835500) Train Loss: 0.3212, Train Steps/Sec: 17.36, Grad Norm: 0.0337 +[2025-02-23 12:12:04] (step=0835600) Train Loss: 0.3215, Train Steps/Sec: 16.58, Grad Norm: 0.0391 +[2025-02-23 12:12:11] (step=0835700) Train Loss: 0.3208, Train Steps/Sec: 13.99, Grad Norm: 0.0379 +[2025-02-23 12:12:17] (step=0835800) Train Loss: 0.3214, Train Steps/Sec: 17.39, Grad Norm: 0.0332 +[2025-02-23 12:12:23] (step=0835900) Train Loss: 0.3207, Train Steps/Sec: 17.39, Grad Norm: 0.0379 +[2025-02-23 12:12:30] (step=0836000) Train Loss: 0.3218, Train Steps/Sec: 14.22, Grad Norm: 0.0356 +[2025-02-23 12:12:36] (step=0836100) Train Loss: 0.3210, Train Steps/Sec: 16.57, Grad Norm: 0.0338 +[2025-02-23 12:12:42] (step=0836200) Train Loss: 0.3212, Train Steps/Sec: 16.64, Grad Norm: 0.0334 +[2025-02-23 12:12:48] (step=0836300) Train Loss: 0.3212, Train Steps/Sec: 16.59, Grad Norm: 0.0333 +[2025-02-23 12:12:54] (step=0836400) Train Loss: 0.3213, Train Steps/Sec: 16.51, Grad Norm: 0.0381 +[2025-02-23 12:13:00] (step=0836500) Train Loss: 0.3212, Train Steps/Sec: 17.31, Grad Norm: 0.0350 +[2025-02-23 12:13:06] (step=0836600) Train Loss: 0.3217, Train Steps/Sec: 17.37, Grad Norm: 0.0342 +[2025-02-23 12:13:11] (step=0836700) Train Loss: 0.3209, Train Steps/Sec: 17.41, Grad Norm: 0.0361 +[2025-02-23 12:13:17] (step=0836800) Train Loss: 0.3208, Train Steps/Sec: 17.43, Grad Norm: 0.0370 +[2025-02-23 12:13:23] (step=0836900) Train Loss: 0.3211, Train Steps/Sec: 17.32, Grad Norm: 0.0351 +[2025-02-23 12:13:30] (step=0837000) Train Loss: 0.3208, Train Steps/Sec: 14.58, Grad Norm: 0.0327 +[2025-02-23 12:13:35] (step=0837100) Train Loss: 0.3220, Train Steps/Sec: 17.40, Grad Norm: 0.0354 +[2025-02-23 12:13:42] (step=0837200) Train Loss: 0.3209, Train Steps/Sec: 15.93, Grad Norm: 0.0369 +[2025-02-23 12:13:48] (step=0837300) Train Loss: 0.3215, Train Steps/Sec: 16.57, Grad Norm: 0.0372 +[2025-02-23 12:13:54] (step=0837400) Train Loss: 0.3211, Train Steps/Sec: 15.88, Grad Norm: 0.0369 +[2025-02-23 12:14:00] (step=0837500) Train Loss: 0.3218, Train Steps/Sec: 17.41, Grad Norm: 0.0365 +[2025-02-23 12:14:06] (step=0837600) Train Loss: 0.3212, Train Steps/Sec: 16.64, Grad Norm: 0.0369 +[2025-02-23 12:14:12] (step=0837700) Train Loss: 0.3215, Train Steps/Sec: 16.58, Grad Norm: 0.0366 +[2025-02-23 12:14:18] (step=0837800) Train Loss: 0.3208, Train Steps/Sec: 17.32, Grad Norm: 0.0369 +[2025-02-23 12:14:24] (step=0837900) Train Loss: 0.3215, Train Steps/Sec: 14.78, Grad Norm: 0.0350 +[2025-02-23 12:14:31] (step=0838000) Train Loss: 0.3209, Train Steps/Sec: 15.90, Grad Norm: 0.0356 +[2025-02-23 12:14:36] (step=0838100) Train Loss: 0.3217, Train Steps/Sec: 17.28, Grad Norm: 0.0350 +[2025-02-23 12:14:44] (step=0838200) Train Loss: 0.3209, Train Steps/Sec: 13.40, Grad Norm: 0.0349 +[2025-02-23 12:14:50] (step=0838300) Train Loss: 0.3213, Train Steps/Sec: 16.52, Grad Norm: 0.0333 +[2025-02-23 12:14:56] (step=0838400) Train Loss: 0.3214, Train Steps/Sec: 17.38, Grad Norm: 0.0364 +[2025-02-23 12:15:02] (step=0838500) Train Loss: 0.3208, Train Steps/Sec: 17.30, Grad Norm: 0.0347 +[2025-02-23 12:15:07] (step=0838600) Train Loss: 0.3204, Train Steps/Sec: 17.33, Grad Norm: 0.0368 +[2025-02-23 12:15:13] (step=0838700) Train Loss: 0.3207, Train Steps/Sec: 17.34, Grad Norm: 0.0384 +[2025-02-23 12:15:19] (step=0838800) Train Loss: 0.3216, Train Steps/Sec: 17.40, Grad Norm: 0.0386 +[2025-02-23 12:15:25] (step=0838900) Train Loss: 0.3210, Train Steps/Sec: 17.37, Grad Norm: 0.0328 +[2025-02-23 12:15:30] (step=0839000) Train Loss: 0.3217, Train Steps/Sec: 17.37, Grad Norm: 0.0329 +[2025-02-23 12:15:36] (step=0839100) Train Loss: 0.3214, Train Steps/Sec: 17.38, Grad Norm: 0.0335 +[2025-02-23 12:15:42] (step=0839200) Train Loss: 0.3213, Train Steps/Sec: 15.88, Grad Norm: 0.0354 +[2025-02-23 12:15:48] (step=0839300) Train Loss: 0.3212, Train Steps/Sec: 16.62, Grad Norm: 0.0364 +[2025-02-23 12:15:55] (step=0839400) Train Loss: 0.3213, Train Steps/Sec: 15.79, Grad Norm: 0.0345 +[2025-02-23 12:16:02] (step=0839500) Train Loss: 0.3215, Train Steps/Sec: 14.39, Grad Norm: 0.0370 +[2025-02-23 12:16:08] (step=0839600) Train Loss: 0.3207, Train Steps/Sec: 16.46, Grad Norm: 0.0408 +[2025-02-23 12:16:14] (step=0839700) Train Loss: 0.3211, Train Steps/Sec: 16.53, Grad Norm: 0.0379 +[2025-02-23 12:16:20] (step=0839800) Train Loss: 0.3209, Train Steps/Sec: 16.63, Grad Norm: 0.0355 +[2025-02-23 12:16:26] (step=0839900) Train Loss: 0.3216, Train Steps/Sec: 15.34, Grad Norm: 0.0340 +[2025-02-23 12:16:33] (step=0840000) Train Loss: 0.3210, Train Steps/Sec: 15.99, Grad Norm: 0.0341 +[2025-02-23 12:16:39] (step=0840100) Train Loss: 0.3212, Train Steps/Sec: 16.72, Grad Norm: 0.0360 +[2025-02-23 12:16:45] (step=0840200) Train Loss: 0.3213, Train Steps/Sec: 16.59, Grad Norm: 0.0336 +[2025-02-23 12:16:51] (step=0840300) Train Loss: 0.3215, Train Steps/Sec: 16.57, Grad Norm: 0.0327 +[2025-02-23 12:16:56] (step=0840400) Train Loss: 0.3211, Train Steps/Sec: 17.39, Grad Norm: 0.0338 +[2025-02-23 12:17:02] (step=0840500) Train Loss: 0.3220, Train Steps/Sec: 17.43, Grad Norm: 0.0362 +[2025-02-23 12:17:08] (step=0840600) Train Loss: 0.3212, Train Steps/Sec: 17.45, Grad Norm: 0.0356 +[2025-02-23 12:17:15] (step=0840700) Train Loss: 0.3208, Train Steps/Sec: 14.55, Grad Norm: 0.0325 +[2025-02-23 12:17:21] (step=0840800) Train Loss: 0.3211, Train Steps/Sec: 17.35, Grad Norm: 0.0345 +[2025-02-23 12:17:26] (step=0840900) Train Loss: 0.3215, Train Steps/Sec: 17.28, Grad Norm: 0.0329 +[2025-02-23 12:17:32] (step=0841000) Train Loss: 0.3211, Train Steps/Sec: 17.29, Grad Norm: 0.0335 +[2025-02-23 12:17:38] (step=0841100) Train Loss: 0.3209, Train Steps/Sec: 17.29, Grad Norm: 0.0329 +[2025-02-23 12:17:44] (step=0841200) Train Loss: 0.3214, Train Steps/Sec: 15.84, Grad Norm: 0.0378 +[2025-02-23 12:17:50] (step=0841300) Train Loss: 0.3215, Train Steps/Sec: 16.56, Grad Norm: 0.0352 +[2025-02-23 12:17:57] (step=0841400) Train Loss: 0.3213, Train Steps/Sec: 15.86, Grad Norm: 0.0354 +[2025-02-23 12:18:02] (step=0841500) Train Loss: 0.3210, Train Steps/Sec: 17.36, Grad Norm: 0.0332 +[2025-02-23 12:18:08] (step=0841600) Train Loss: 0.3210, Train Steps/Sec: 16.62, Grad Norm: 0.0404 +[2025-02-23 12:18:14] (step=0841700) Train Loss: 0.3212, Train Steps/Sec: 16.65, Grad Norm: 0.0363 +[2025-02-23 12:18:21] (step=0841800) Train Loss: 0.3213, Train Steps/Sec: 15.36, Grad Norm: 0.0384 +[2025-02-23 12:18:27] (step=0841900) Train Loss: 0.3209, Train Steps/Sec: 16.54, Grad Norm: 0.0367 +[2025-02-23 12:18:34] (step=0842000) Train Loss: 0.3208, Train Steps/Sec: 13.61, Grad Norm: 0.0391 +[2025-02-23 12:18:40] (step=0842100) Train Loss: 0.3208, Train Steps/Sec: 16.62, Grad Norm: 0.0331 +[2025-02-23 12:18:46] (step=0842200) Train Loss: 0.3208, Train Steps/Sec: 16.65, Grad Norm: 0.0339 +[2025-02-23 12:18:52] (step=0842300) Train Loss: 0.3205, Train Steps/Sec: 17.41, Grad Norm: 0.0365 +[2025-02-23 12:18:58] (step=0842400) Train Loss: 0.3216, Train Steps/Sec: 17.39, Grad Norm: 0.0369 +[2025-02-23 12:19:04] (step=0842500) Train Loss: 0.3210, Train Steps/Sec: 17.32, Grad Norm: 0.0361 +[2025-02-23 12:19:09] (step=0842600) Train Loss: 0.3204, Train Steps/Sec: 17.27, Grad Norm: 0.0383 +[2025-02-23 12:19:15] (step=0842700) Train Loss: 0.3215, Train Steps/Sec: 17.32, Grad Norm: 0.0357 +[2025-02-23 12:19:21] (step=0842800) Train Loss: 0.3210, Train Steps/Sec: 17.33, Grad Norm: 0.0353 +[2025-02-23 12:19:27] (step=0842900) Train Loss: 0.3215, Train Steps/Sec: 17.28, Grad Norm: 0.0353 +[2025-02-23 12:19:32] (step=0843000) Train Loss: 0.3213, Train Steps/Sec: 17.29, Grad Norm: 0.0337 +[2025-02-23 12:19:38] (step=0843100) Train Loss: 0.3214, Train Steps/Sec: 17.30, Grad Norm: 0.0359 +[2025-02-23 12:19:46] (step=0843200) Train Loss: 0.3213, Train Steps/Sec: 13.33, Grad Norm: 0.0362 +[2025-02-23 12:19:52] (step=0843300) Train Loss: 0.3212, Train Steps/Sec: 16.58, Grad Norm: 0.0393 +[2025-02-23 12:19:58] (step=0843400) Train Loss: 0.3212, Train Steps/Sec: 16.56, Grad Norm: 0.0336 +[2025-02-23 12:20:04] (step=0843500) Train Loss: 0.3209, Train Steps/Sec: 16.61, Grad Norm: 0.0329 +[2025-02-23 12:20:10] (step=0843600) Train Loss: 0.3210, Train Steps/Sec: 16.68, Grad Norm: 0.0364 +[2025-02-23 12:20:16] (step=0843700) Train Loss: 0.3210, Train Steps/Sec: 15.98, Grad Norm: 0.0364 +[2025-02-23 12:20:22] (step=0843800) Train Loss: 0.3212, Train Steps/Sec: 15.99, Grad Norm: 0.0328 +[2025-02-23 12:20:28] (step=0843900) Train Loss: 0.3214, Train Steps/Sec: 16.60, Grad Norm: 0.0383 +[2025-02-23 12:20:35] (step=0844000) Train Loss: 0.3212, Train Steps/Sec: 15.88, Grad Norm: 0.0353 +[2025-02-23 12:20:41] (step=0844100) Train Loss: 0.3206, Train Steps/Sec: 16.55, Grad Norm: 0.0365 +[2025-02-23 12:20:47] (step=0844200) Train Loss: 0.3213, Train Steps/Sec: 16.58, Grad Norm: 0.0326 +[2025-02-23 12:20:53] (step=0844300) Train Loss: 0.3211, Train Steps/Sec: 17.41, Grad Norm: 0.0352 +[2025-02-23 12:20:58] (step=0844400) Train Loss: 0.3208, Train Steps/Sec: 17.27, Grad Norm: 0.0358 +[2025-02-23 12:21:05] (step=0844500) Train Loss: 0.3213, Train Steps/Sec: 14.45, Grad Norm: 0.0379 +[2025-02-23 12:21:11] (step=0844600) Train Loss: 0.3204, Train Steps/Sec: 17.41, Grad Norm: 0.0321 +[2025-02-23 12:21:17] (step=0844700) Train Loss: 0.3212, Train Steps/Sec: 17.35, Grad Norm: 0.0379 +[2025-02-23 12:21:23] (step=0844800) Train Loss: 0.3217, Train Steps/Sec: 17.33, Grad Norm: 0.0362 +[2025-02-23 12:21:28] (step=0844900) Train Loss: 0.3204, Train Steps/Sec: 17.31, Grad Norm: 0.0378 +[2025-02-23 12:21:34] (step=0845000) Train Loss: 0.3211, Train Steps/Sec: 17.34, Grad Norm: 0.0356 +[2025-02-23 12:21:40] (step=0845100) Train Loss: 0.3209, Train Steps/Sec: 17.33, Grad Norm: 0.0371 +[2025-02-23 12:21:46] (step=0845200) Train Loss: 0.3214, Train Steps/Sec: 15.87, Grad Norm: 0.0343 +[2025-02-23 12:21:52] (step=0845300) Train Loss: 0.3215, Train Steps/Sec: 16.50, Grad Norm: 0.0360 +[2025-02-23 12:21:58] (step=0845400) Train Loss: 0.3213, Train Steps/Sec: 16.58, Grad Norm: 0.0360 +[2025-02-23 12:22:04] (step=0845500) Train Loss: 0.3209, Train Steps/Sec: 16.59, Grad Norm: 0.0318 +[2025-02-23 12:22:10] (step=0845600) Train Loss: 0.3206, Train Steps/Sec: 16.59, Grad Norm: 0.0355 +[2025-02-23 12:22:18] (step=0845700) Train Loss: 0.3215, Train Steps/Sec: 12.71, Grad Norm: 0.0380 +[2025-02-23 12:22:24] (step=0845800) Train Loss: 0.3211, Train Steps/Sec: 16.60, Grad Norm: 0.0358 +[2025-02-23 12:22:30] (step=0845900) Train Loss: 0.3210, Train Steps/Sec: 16.61, Grad Norm: 0.0334 +[2025-02-23 12:22:37] (step=0846000) Train Loss: 0.3214, Train Steps/Sec: 15.89, Grad Norm: 0.0372 +[2025-02-23 12:22:43] (step=0846100) Train Loss: 0.3212, Train Steps/Sec: 16.56, Grad Norm: 0.0337 +[2025-02-23 12:22:48] (step=0846200) Train Loss: 0.3215, Train Steps/Sec: 17.39, Grad Norm: 0.0348 +[2025-02-23 12:22:54] (step=0846300) Train Loss: 0.3210, Train Steps/Sec: 17.40, Grad Norm: 0.0367 +[2025-02-23 12:23:00] (step=0846400) Train Loss: 0.3214, Train Steps/Sec: 17.41, Grad Norm: 0.0339 +[2025-02-23 12:23:06] (step=0846500) Train Loss: 0.3216, Train Steps/Sec: 17.37, Grad Norm: 0.0408 +[2025-02-23 12:23:11] (step=0846600) Train Loss: 0.3212, Train Steps/Sec: 17.41, Grad Norm: 0.0330 +[2025-02-23 12:23:17] (step=0846700) Train Loss: 0.3213, Train Steps/Sec: 17.44, Grad Norm: 0.0369 +[2025-02-23 12:23:23] (step=0846800) Train Loss: 0.3218, Train Steps/Sec: 17.38, Grad Norm: 0.0366 +[2025-02-23 12:23:29] (step=0846900) Train Loss: 0.3209, Train Steps/Sec: 17.27, Grad Norm: 0.0350 +[2025-02-23 12:23:35] (step=0847000) Train Loss: 0.3212, Train Steps/Sec: 14.76, Grad Norm: 0.0383 +[2025-02-23 12:23:41] (step=0847100) Train Loss: 0.3210, Train Steps/Sec: 17.41, Grad Norm: 0.0351 +[2025-02-23 12:23:47] (step=0847200) Train Loss: 0.3208, Train Steps/Sec: 15.91, Grad Norm: 0.0395 +[2025-02-23 12:23:53] (step=0847300) Train Loss: 0.3211, Train Steps/Sec: 16.59, Grad Norm: 0.0375 +[2025-02-23 12:23:59] (step=0847400) Train Loss: 0.3210, Train Steps/Sec: 17.42, Grad Norm: 0.0343 +[2025-02-23 12:24:05] (step=0847500) Train Loss: 0.3209, Train Steps/Sec: 15.92, Grad Norm: 0.0339 +[2025-02-23 12:24:12] (step=0847600) Train Loss: 0.3213, Train Steps/Sec: 14.31, Grad Norm: 0.0345 +[2025-02-23 12:24:18] (step=0847700) Train Loss: 0.3211, Train Steps/Sec: 16.62, Grad Norm: 0.0340 +[2025-02-23 12:24:24] (step=0847800) Train Loss: 0.3206, Train Steps/Sec: 16.67, Grad Norm: 0.0346 +[2025-02-23 12:24:30] (step=0847900) Train Loss: 0.3217, Train Steps/Sec: 16.73, Grad Norm: 0.0340 +[2025-02-23 12:24:36] (step=0848000) Train Loss: 0.3216, Train Steps/Sec: 16.69, Grad Norm: 0.0350 +[2025-02-23 12:24:43] (step=0848100) Train Loss: 0.3210, Train Steps/Sec: 15.97, Grad Norm: 0.0353 +[2025-02-23 12:24:50] (step=0848200) Train Loss: 0.3205, Train Steps/Sec: 14.65, Grad Norm: 0.0357 +[2025-02-23 12:24:55] (step=0848300) Train Loss: 0.3217, Train Steps/Sec: 17.36, Grad Norm: 0.0344 +[2025-02-23 12:25:01] (step=0848400) Train Loss: 0.3210, Train Steps/Sec: 17.36, Grad Norm: 0.0330 +[2025-02-23 12:25:07] (step=0848500) Train Loss: 0.3208, Train Steps/Sec: 17.34, Grad Norm: 0.0321 +[2025-02-23 12:25:13] (step=0848600) Train Loss: 0.3218, Train Steps/Sec: 17.35, Grad Norm: 0.0378 +[2025-02-23 12:25:18] (step=0848700) Train Loss: 0.3217, Train Steps/Sec: 17.42, Grad Norm: 0.0338 +[2025-02-23 12:25:24] (step=0848800) Train Loss: 0.3220, Train Steps/Sec: 17.43, Grad Norm: 0.0407 +[2025-02-23 12:25:30] (step=0848900) Train Loss: 0.3214, Train Steps/Sec: 17.42, Grad Norm: 0.0346 +[2025-02-23 12:25:36] (step=0849000) Train Loss: 0.3207, Train Steps/Sec: 17.40, Grad Norm: 0.0337 +[2025-02-23 12:25:41] (step=0849100) Train Loss: 0.3213, Train Steps/Sec: 17.39, Grad Norm: 0.0331 +[2025-02-23 12:25:48] (step=0849200) Train Loss: 0.3209, Train Steps/Sec: 15.86, Grad Norm: 0.0359 +[2025-02-23 12:25:53] (step=0849300) Train Loss: 0.3212, Train Steps/Sec: 17.35, Grad Norm: 0.0353 +[2025-02-23 12:25:59] (step=0849400) Train Loss: 0.3213, Train Steps/Sec: 16.51, Grad Norm: 0.0335 +[2025-02-23 12:26:07] (step=0849500) Train Loss: 0.3212, Train Steps/Sec: 13.60, Grad Norm: 0.0412 +[2025-02-23 12:26:14] (step=0849600) Train Loss: 0.3214, Train Steps/Sec: 14.11, Grad Norm: 0.0346 +[2025-02-23 12:26:20] (step=0849700) Train Loss: 0.3205, Train Steps/Sec: 15.90, Grad Norm: 0.0344 +[2025-02-23 12:26:26] (step=0849800) Train Loss: 0.3213, Train Steps/Sec: 16.65, Grad Norm: 0.0377 +[2025-02-23 12:26:32] (step=0849900) Train Loss: 0.3206, Train Steps/Sec: 16.63, Grad Norm: 0.0350 +[2025-02-23 12:26:38] (step=0850000) Train Loss: 0.3215, Train Steps/Sec: 16.71, Grad Norm: 0.0366 +[2025-02-23 12:26:39] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0850000.pt +[2025-02-23 12:26:45] (step=0850100) Train Loss: 0.3206, Train Steps/Sec: 15.11, Grad Norm: 0.0389 +[2025-02-23 12:26:51] (step=0850200) Train Loss: 0.3214, Train Steps/Sec: 17.41, Grad Norm: 0.0336 +[2025-02-23 12:26:56] (step=0850300) Train Loss: 0.3215, Train Steps/Sec: 17.38, Grad Norm: 0.0352 +[2025-02-23 12:27:02] (step=0850400) Train Loss: 0.3214, Train Steps/Sec: 17.41, Grad Norm: 0.0428 +[2025-02-23 12:27:08] (step=0850500) Train Loss: 0.3208, Train Steps/Sec: 17.39, Grad Norm: 0.0345 +[2025-02-23 12:27:14] (step=0850600) Train Loss: 0.3211, Train Steps/Sec: 17.44, Grad Norm: 0.0354 +[2025-02-23 12:27:20] (step=0850700) Train Loss: 0.3210, Train Steps/Sec: 14.57, Grad Norm: 0.0379 +[2025-02-23 12:27:26] (step=0850800) Train Loss: 0.3209, Train Steps/Sec: 17.31, Grad Norm: 0.0339 +[2025-02-23 12:27:32] (step=0850900) Train Loss: 0.3210, Train Steps/Sec: 17.35, Grad Norm: 0.0387 +[2025-02-23 12:27:38] (step=0851000) Train Loss: 0.3217, Train Steps/Sec: 17.34, Grad Norm: 0.0337 +[2025-02-23 12:27:44] (step=0851100) Train Loss: 0.3214, Train Steps/Sec: 17.22, Grad Norm: 0.0343 +[2025-02-23 12:27:50] (step=0851200) Train Loss: 0.3207, Train Steps/Sec: 15.69, Grad Norm: 0.0378 +[2025-02-23 12:27:56] (step=0851300) Train Loss: 0.3212, Train Steps/Sec: 16.42, Grad Norm: 0.0335 +[2025-02-23 12:28:02] (step=0851400) Train Loss: 0.3214, Train Steps/Sec: 17.31, Grad Norm: 0.0330 +[2025-02-23 12:28:09] (step=0851500) Train Loss: 0.3209, Train Steps/Sec: 13.74, Grad Norm: 0.0345 +[2025-02-23 12:28:15] (step=0851600) Train Loss: 0.3209, Train Steps/Sec: 16.75, Grad Norm: 0.0387 +[2025-02-23 12:28:21] (step=0851700) Train Loss: 0.3215, Train Steps/Sec: 16.04, Grad Norm: 0.0354 +[2025-02-23 12:28:27] (step=0851800) Train Loss: 0.3213, Train Steps/Sec: 16.72, Grad Norm: 0.0398 +[2025-02-23 12:28:33] (step=0851900) Train Loss: 0.3210, Train Steps/Sec: 16.48, Grad Norm: 0.0371 +[2025-02-23 12:28:41] (step=0852000) Train Loss: 0.3214, Train Steps/Sec: 13.63, Grad Norm: 0.0347 +[2025-02-23 12:28:46] (step=0852100) Train Loss: 0.3211, Train Steps/Sec: 17.45, Grad Norm: 0.0377 +[2025-02-23 12:28:52] (step=0852200) Train Loss: 0.3214, Train Steps/Sec: 17.45, Grad Norm: 0.0345 +[2025-02-23 12:28:58] (step=0852300) Train Loss: 0.3212, Train Steps/Sec: 17.52, Grad Norm: 0.0379 +[2025-02-23 12:29:04] (step=0852400) Train Loss: 0.3213, Train Steps/Sec: 17.47, Grad Norm: 0.0375 +[2025-02-23 12:29:09] (step=0852500) Train Loss: 0.3212, Train Steps/Sec: 17.49, Grad Norm: 0.0363 +[2025-02-23 12:29:15] (step=0852600) Train Loss: 0.3207, Train Steps/Sec: 17.51, Grad Norm: 0.0371 +[2025-02-23 12:29:21] (step=0852700) Train Loss: 0.3210, Train Steps/Sec: 17.49, Grad Norm: 0.0369 +[2025-02-23 12:29:26] (step=0852800) Train Loss: 0.3210, Train Steps/Sec: 17.46, Grad Norm: 0.0335 +[2025-02-23 12:29:32] (step=0852900) Train Loss: 0.3214, Train Steps/Sec: 17.46, Grad Norm: 0.0352 +[2025-02-23 12:29:38] (step=0853000) Train Loss: 0.3213, Train Steps/Sec: 17.46, Grad Norm: 0.0406 +[2025-02-23 12:29:44] (step=0853100) Train Loss: 0.3211, Train Steps/Sec: 17.48, Grad Norm: 0.0381 +[2025-02-23 12:29:51] (step=0853200) Train Loss: 0.3215, Train Steps/Sec: 13.65, Grad Norm: 0.0389 +[2025-02-23 12:29:57] (step=0853300) Train Loss: 0.3211, Train Steps/Sec: 17.31, Grad Norm: 0.0362 +[2025-02-23 12:30:03] (step=0853400) Train Loss: 0.3214, Train Steps/Sec: 16.48, Grad Norm: 0.0361 +[2025-02-23 12:30:10] (step=0853500) Train Loss: 0.3214, Train Steps/Sec: 14.17, Grad Norm: 0.0363 +[2025-02-23 12:30:16] (step=0853600) Train Loss: 0.3210, Train Steps/Sec: 15.90, Grad Norm: 0.0371 +[2025-02-23 12:30:22] (step=0853700) Train Loss: 0.3215, Train Steps/Sec: 15.89, Grad Norm: 0.0358 +[2025-02-23 12:30:28] (step=0853800) Train Loss: 0.3213, Train Steps/Sec: 16.57, Grad Norm: 0.0377 +[2025-02-23 12:30:34] (step=0853900) Train Loss: 0.3205, Train Steps/Sec: 17.37, Grad Norm: 0.0409 +[2025-02-23 12:30:40] (step=0854000) Train Loss: 0.3209, Train Steps/Sec: 16.62, Grad Norm: 0.0351 +[2025-02-23 12:30:46] (step=0854100) Train Loss: 0.3217, Train Steps/Sec: 16.69, Grad Norm: 0.0390 +[2025-02-23 12:30:52] (step=0854200) Train Loss: 0.3213, Train Steps/Sec: 17.50, Grad Norm: 0.0350 +[2025-02-23 12:30:58] (step=0854300) Train Loss: 0.3214, Train Steps/Sec: 17.50, Grad Norm: 0.0369 +[2025-02-23 12:31:03] (step=0854400) Train Loss: 0.3211, Train Steps/Sec: 17.48, Grad Norm: 0.0341 +[2025-02-23 12:31:10] (step=0854500) Train Loss: 0.3211, Train Steps/Sec: 14.58, Grad Norm: 0.0344 +[2025-02-23 12:31:16] (step=0854600) Train Loss: 0.3213, Train Steps/Sec: 17.32, Grad Norm: 0.0373 +[2025-02-23 12:31:22] (step=0854700) Train Loss: 0.3210, Train Steps/Sec: 17.33, Grad Norm: 0.0354 +[2025-02-23 12:31:28] (step=0854800) Train Loss: 0.3213, Train Steps/Sec: 17.33, Grad Norm: 0.0397 +[2025-02-23 12:31:33] (step=0854900) Train Loss: 0.3208, Train Steps/Sec: 17.18, Grad Norm: 0.0346 +[2025-02-23 12:31:39] (step=0855000) Train Loss: 0.3211, Train Steps/Sec: 17.21, Grad Norm: 0.0361 +[2025-02-23 12:31:45] (step=0855100) Train Loss: 0.3211, Train Steps/Sec: 17.24, Grad Norm: 0.0383 +[2025-02-23 12:31:51] (step=0855200) Train Loss: 0.3206, Train Steps/Sec: 15.76, Grad Norm: 0.0335 +[2025-02-23 12:31:57] (step=0855300) Train Loss: 0.3210, Train Steps/Sec: 17.37, Grad Norm: 0.0367 +[2025-02-23 12:32:04] (step=0855400) Train Loss: 0.3208, Train Steps/Sec: 14.18, Grad Norm: 0.0338 +[2025-02-23 12:32:10] (step=0855500) Train Loss: 0.3210, Train Steps/Sec: 15.95, Grad Norm: 0.0383 +[2025-02-23 12:32:17] (step=0855600) Train Loss: 0.3219, Train Steps/Sec: 15.37, Grad Norm: 0.0376 +[2025-02-23 12:32:24] (step=0855700) Train Loss: 0.3209, Train Steps/Sec: 13.58, Grad Norm: 0.0372 +[2025-02-23 12:32:30] (step=0855800) Train Loss: 0.3214, Train Steps/Sec: 17.34, Grad Norm: 0.0355 +[2025-02-23 12:32:36] (step=0855900) Train Loss: 0.3213, Train Steps/Sec: 16.52, Grad Norm: 0.0349 +[2025-02-23 12:32:42] (step=0856000) Train Loss: 0.3214, Train Steps/Sec: 17.34, Grad Norm: 0.0382 +[2025-02-23 12:32:48] (step=0856100) Train Loss: 0.3209, Train Steps/Sec: 16.69, Grad Norm: 0.0386 +[2025-02-23 12:32:54] (step=0856200) Train Loss: 0.3209, Train Steps/Sec: 17.43, Grad Norm: 0.0336 +[2025-02-23 12:32:59] (step=0856300) Train Loss: 0.3211, Train Steps/Sec: 17.44, Grad Norm: 0.0355 +[2025-02-23 12:33:05] (step=0856400) Train Loss: 0.3211, Train Steps/Sec: 17.43, Grad Norm: 0.0376 +[2025-02-23 12:33:11] (step=0856500) Train Loss: 0.3205, Train Steps/Sec: 17.46, Grad Norm: 0.0372 +[2025-02-23 12:33:17] (step=0856600) Train Loss: 0.3214, Train Steps/Sec: 17.48, Grad Norm: 0.0371 +[2025-02-23 12:33:22] (step=0856700) Train Loss: 0.3217, Train Steps/Sec: 17.49, Grad Norm: 0.0395 +[2025-02-23 12:33:28] (step=0856800) Train Loss: 0.3215, Train Steps/Sec: 17.49, Grad Norm: 0.0399 +[2025-02-23 12:33:34] (step=0856900) Train Loss: 0.3213, Train Steps/Sec: 17.43, Grad Norm: 0.0345 +[2025-02-23 12:33:41] (step=0857000) Train Loss: 0.3211, Train Steps/Sec: 14.66, Grad Norm: 0.0336 +[2025-02-23 12:33:46] (step=0857100) Train Loss: 0.3205, Train Steps/Sec: 17.40, Grad Norm: 0.0359 +[2025-02-23 12:33:53] (step=0857200) Train Loss: 0.3209, Train Steps/Sec: 15.93, Grad Norm: 0.0359 +[2025-02-23 12:33:59] (step=0857300) Train Loss: 0.3215, Train Steps/Sec: 16.80, Grad Norm: 0.0375 +[2025-02-23 12:34:05] (step=0857400) Train Loss: 0.3213, Train Steps/Sec: 14.84, Grad Norm: 0.0361 +[2025-02-23 12:34:12] (step=0857500) Train Loss: 0.3209, Train Steps/Sec: 15.41, Grad Norm: 0.0335 +[2025-02-23 12:34:18] (step=0857600) Train Loss: 0.3208, Train Steps/Sec: 16.10, Grad Norm: 0.0378 +[2025-02-23 12:34:24] (step=0857700) Train Loss: 0.3214, Train Steps/Sec: 15.93, Grad Norm: 0.0355 +[2025-02-23 12:34:30] (step=0857800) Train Loss: 0.3207, Train Steps/Sec: 17.46, Grad Norm: 0.0326 +[2025-02-23 12:34:36] (step=0857900) Train Loss: 0.3212, Train Steps/Sec: 16.60, Grad Norm: 0.0330 +[2025-02-23 12:34:42] (step=0858000) Train Loss: 0.3212, Train Steps/Sec: 17.31, Grad Norm: 0.0361 +[2025-02-23 12:34:48] (step=0858100) Train Loss: 0.3212, Train Steps/Sec: 16.63, Grad Norm: 0.0366 +[2025-02-23 12:34:55] (step=0858200) Train Loss: 0.3212, Train Steps/Sec: 14.63, Grad Norm: 0.0381 +[2025-02-23 12:35:00] (step=0858300) Train Loss: 0.3212, Train Steps/Sec: 17.31, Grad Norm: 0.0423 +[2025-02-23 12:35:06] (step=0858400) Train Loss: 0.3211, Train Steps/Sec: 17.39, Grad Norm: 0.0360 +[2025-02-23 12:35:12] (step=0858500) Train Loss: 0.3216, Train Steps/Sec: 17.43, Grad Norm: 0.0376 +[2025-02-23 12:35:18] (step=0858600) Train Loss: 0.3213, Train Steps/Sec: 17.36, Grad Norm: 0.0358 +[2025-02-23 12:35:23] (step=0858700) Train Loss: 0.3211, Train Steps/Sec: 17.42, Grad Norm: 0.0335 +[2025-02-23 12:35:29] (step=0858800) Train Loss: 0.3213, Train Steps/Sec: 17.43, Grad Norm: 0.0382 +[2025-02-23 12:35:35] (step=0858900) Train Loss: 0.3213, Train Steps/Sec: 17.45, Grad Norm: 0.0397 +[2025-02-23 12:35:41] (step=0859000) Train Loss: 0.3215, Train Steps/Sec: 17.40, Grad Norm: 0.0373 +[2025-02-23 12:35:46] (step=0859100) Train Loss: 0.3211, Train Steps/Sec: 17.37, Grad Norm: 0.0345 +[2025-02-23 12:35:52] (step=0859200) Train Loss: 0.3212, Train Steps/Sec: 16.64, Grad Norm: 0.0351 +[2025-02-23 12:35:59] (step=0859300) Train Loss: 0.3207, Train Steps/Sec: 14.24, Grad Norm: 0.0360 +[2025-02-23 12:36:05] (step=0859400) Train Loss: 0.3213, Train Steps/Sec: 16.63, Grad Norm: 0.0351 +[2025-02-23 12:36:13] (step=0859500) Train Loss: 0.3216, Train Steps/Sec: 12.71, Grad Norm: 0.0372 +[2025-02-23 12:36:20] (step=0859600) Train Loss: 0.3209, Train Steps/Sec: 15.93, Grad Norm: 0.0376 +[2025-02-23 12:36:26] (step=0859700) Train Loss: 0.3211, Train Steps/Sec: 16.48, Grad Norm: 0.0384 +[2025-02-23 12:36:32] (step=0859800) Train Loss: 0.3214, Train Steps/Sec: 16.45, Grad Norm: 0.0378 +[2025-02-23 12:36:38] (step=0859900) Train Loss: 0.3208, Train Steps/Sec: 17.26, Grad Norm: 0.0368 +[2025-02-23 12:36:43] (step=0860000) Train Loss: 0.3213, Train Steps/Sec: 17.24, Grad Norm: 0.0409 +[2025-02-23 12:36:49] (step=0860100) Train Loss: 0.3211, Train Steps/Sec: 16.57, Grad Norm: 0.0343 +[2025-02-23 12:36:55] (step=0860200) Train Loss: 0.3211, Train Steps/Sec: 17.30, Grad Norm: 0.0408 +[2025-02-23 12:37:01] (step=0860300) Train Loss: 0.3214, Train Steps/Sec: 17.34, Grad Norm: 0.0361 +[2025-02-23 12:37:07] (step=0860400) Train Loss: 0.3216, Train Steps/Sec: 17.39, Grad Norm: 0.0347 +[2025-02-23 12:37:12] (step=0860500) Train Loss: 0.3207, Train Steps/Sec: 17.43, Grad Norm: 0.0307 +[2025-02-23 12:37:18] (step=0860600) Train Loss: 0.3209, Train Steps/Sec: 17.30, Grad Norm: 0.0376 +[2025-02-23 12:37:25] (step=0860700) Train Loss: 0.3212, Train Steps/Sec: 14.51, Grad Norm: 0.0371 +[2025-02-23 12:37:31] (step=0860800) Train Loss: 0.3212, Train Steps/Sec: 17.10, Grad Norm: 0.0368 +[2025-02-23 12:37:37] (step=0860900) Train Loss: 0.3215, Train Steps/Sec: 17.16, Grad Norm: 0.0356 +[2025-02-23 12:37:43] (step=0861000) Train Loss: 0.3215, Train Steps/Sec: 17.20, Grad Norm: 0.0338 +[2025-02-23 12:37:48] (step=0861100) Train Loss: 0.3216, Train Steps/Sec: 17.10, Grad Norm: 0.0346 +[2025-02-23 12:37:55] (step=0861200) Train Loss: 0.3213, Train Steps/Sec: 15.80, Grad Norm: 0.0353 +[2025-02-23 12:38:02] (step=0861300) Train Loss: 0.3212, Train Steps/Sec: 14.59, Grad Norm: 0.0409 +[2025-02-23 12:38:08] (step=0861400) Train Loss: 0.3210, Train Steps/Sec: 15.67, Grad Norm: 0.0431 +[2025-02-23 12:38:15] (step=0861500) Train Loss: 0.3211, Train Steps/Sec: 15.16, Grad Norm: 0.0365 +[2025-02-23 12:38:21] (step=0861600) Train Loss: 0.3211, Train Steps/Sec: 15.69, Grad Norm: 0.0339 +[2025-02-23 12:38:27] (step=0861700) Train Loss: 0.3211, Train Steps/Sec: 16.46, Grad Norm: 0.0333 +[2025-02-23 12:38:33] (step=0861800) Train Loss: 0.3202, Train Steps/Sec: 16.39, Grad Norm: 0.0360 +[2025-02-23 12:38:39] (step=0861900) Train Loss: 0.3214, Train Steps/Sec: 17.24, Grad Norm: 0.0370 +[2025-02-23 12:38:46] (step=0862000) Train Loss: 0.3211, Train Steps/Sec: 14.37, Grad Norm: 0.0379 +[2025-02-23 12:38:52] (step=0862100) Train Loss: 0.3213, Train Steps/Sec: 16.53, Grad Norm: 0.0359 +[2025-02-23 12:38:58] (step=0862200) Train Loss: 0.3212, Train Steps/Sec: 17.28, Grad Norm: 0.0365 +[2025-02-23 12:39:04] (step=0862300) Train Loss: 0.3213, Train Steps/Sec: 17.29, Grad Norm: 0.0378 +[2025-02-23 12:39:09] (step=0862400) Train Loss: 0.3217, Train Steps/Sec: 17.30, Grad Norm: 0.0333 +[2025-02-23 12:39:15] (step=0862500) Train Loss: 0.3210, Train Steps/Sec: 17.26, Grad Norm: 0.0376 +[2025-02-23 12:39:21] (step=0862600) Train Loss: 0.3218, Train Steps/Sec: 17.19, Grad Norm: 0.0363 +[2025-02-23 12:39:27] (step=0862700) Train Loss: 0.3211, Train Steps/Sec: 17.24, Grad Norm: 0.0362 +[2025-02-23 12:39:33] (step=0862800) Train Loss: 0.3211, Train Steps/Sec: 17.37, Grad Norm: 0.0349 +[2025-02-23 12:39:38] (step=0862900) Train Loss: 0.3212, Train Steps/Sec: 17.29, Grad Norm: 0.0340 +[2025-02-23 12:39:44] (step=0863000) Train Loss: 0.3207, Train Steps/Sec: 17.30, Grad Norm: 0.0317 +[2025-02-23 12:39:50] (step=0863100) Train Loss: 0.3209, Train Steps/Sec: 17.30, Grad Norm: 0.0349 +[2025-02-23 12:39:58] (step=0863200) Train Loss: 0.3209, Train Steps/Sec: 11.73, Grad Norm: 0.0360 +[2025-02-23 12:40:04] (step=0863300) Train Loss: 0.3208, Train Steps/Sec: 16.40, Grad Norm: 0.0319 +[2025-02-23 12:40:11] (step=0863400) Train Loss: 0.3208, Train Steps/Sec: 15.84, Grad Norm: 0.0404 +[2025-02-23 12:40:17] (step=0863500) Train Loss: 0.3207, Train Steps/Sec: 15.17, Grad Norm: 0.0373 +[2025-02-23 12:40:23] (step=0863600) Train Loss: 0.3208, Train Steps/Sec: 16.50, Grad Norm: 0.0375 +[2025-02-23 12:40:30] (step=0863700) Train Loss: 0.3214, Train Steps/Sec: 15.76, Grad Norm: 0.0377 +[2025-02-23 12:40:36] (step=0863800) Train Loss: 0.3219, Train Steps/Sec: 17.24, Grad Norm: 0.0355 +[2025-02-23 12:40:41] (step=0863900) Train Loss: 0.3210, Train Steps/Sec: 17.23, Grad Norm: 0.0350 +[2025-02-23 12:40:47] (step=0864000) Train Loss: 0.3213, Train Steps/Sec: 17.24, Grad Norm: 0.0369 +[2025-02-23 12:40:53] (step=0864100) Train Loss: 0.3211, Train Steps/Sec: 16.56, Grad Norm: 0.0330 +[2025-02-23 12:40:59] (step=0864200) Train Loss: 0.3207, Train Steps/Sec: 17.27, Grad Norm: 0.0348 +[2025-02-23 12:41:05] (step=0864300) Train Loss: 0.3213, Train Steps/Sec: 17.29, Grad Norm: 0.0345 +[2025-02-23 12:41:11] (step=0864400) Train Loss: 0.3213, Train Steps/Sec: 17.34, Grad Norm: 0.0340 +[2025-02-23 12:41:18] (step=0864500) Train Loss: 0.3205, Train Steps/Sec: 14.30, Grad Norm: 0.0350 +[2025-02-23 12:41:23] (step=0864600) Train Loss: 0.3214, Train Steps/Sec: 17.34, Grad Norm: 0.0352 +[2025-02-23 12:41:29] (step=0864700) Train Loss: 0.3203, Train Steps/Sec: 17.38, Grad Norm: 0.0363 +[2025-02-23 12:41:35] (step=0864800) Train Loss: 0.3211, Train Steps/Sec: 17.30, Grad Norm: 0.0392 +[2025-02-23 12:41:41] (step=0864900) Train Loss: 0.3212, Train Steps/Sec: 17.28, Grad Norm: 0.0360 +[2025-02-23 12:41:46] (step=0865000) Train Loss: 0.3209, Train Steps/Sec: 17.29, Grad Norm: 0.0349 +[2025-02-23 12:41:53] (step=0865100) Train Loss: 0.3211, Train Steps/Sec: 15.90, Grad Norm: 0.0372 +[2025-02-23 12:41:59] (step=0865200) Train Loss: 0.3219, Train Steps/Sec: 15.29, Grad Norm: 0.0361 +[2025-02-23 12:42:06] (step=0865300) Train Loss: 0.3212, Train Steps/Sec: 15.16, Grad Norm: 0.0314 +[2025-02-23 12:42:12] (step=0865400) Train Loss: 0.3215, Train Steps/Sec: 16.43, Grad Norm: 0.0332 +[2025-02-23 12:42:19] (step=0865500) Train Loss: 0.3214, Train Steps/Sec: 15.11, Grad Norm: 0.0344 +[2025-02-23 12:42:25] (step=0865600) Train Loss: 0.3208, Train Steps/Sec: 16.58, Grad Norm: 0.0370 +[2025-02-23 12:42:32] (step=0865700) Train Loss: 0.3217, Train Steps/Sec: 13.31, Grad Norm: 0.0372 +[2025-02-23 12:42:38] (step=0865800) Train Loss: 0.3208, Train Steps/Sec: 17.39, Grad Norm: 0.0317 +[2025-02-23 12:42:44] (step=0865900) Train Loss: 0.3215, Train Steps/Sec: 17.41, Grad Norm: 0.0382 +[2025-02-23 12:42:49] (step=0866000) Train Loss: 0.3211, Train Steps/Sec: 17.35, Grad Norm: 0.0370 +[2025-02-23 12:42:55] (step=0866100) Train Loss: 0.3216, Train Steps/Sec: 16.56, Grad Norm: 0.0360 +[2025-02-23 12:43:01] (step=0866200) Train Loss: 0.3213, Train Steps/Sec: 17.29, Grad Norm: 0.0351 +[2025-02-23 12:43:07] (step=0866300) Train Loss: 0.3217, Train Steps/Sec: 17.34, Grad Norm: 0.0326 +[2025-02-23 12:43:13] (step=0866400) Train Loss: 0.3209, Train Steps/Sec: 17.23, Grad Norm: 0.0365 +[2025-02-23 12:43:19] (step=0866500) Train Loss: 0.3212, Train Steps/Sec: 17.24, Grad Norm: 0.0349 +[2025-02-23 12:43:24] (step=0866600) Train Loss: 0.3212, Train Steps/Sec: 17.23, Grad Norm: 0.0333 +[2025-02-23 12:43:30] (step=0866700) Train Loss: 0.3217, Train Steps/Sec: 17.22, Grad Norm: 0.0342 +[2025-02-23 12:43:36] (step=0866800) Train Loss: 0.3209, Train Steps/Sec: 17.22, Grad Norm: 0.0373 +[2025-02-23 12:43:42] (step=0866900) Train Loss: 0.3207, Train Steps/Sec: 17.24, Grad Norm: 0.0370 +[2025-02-23 12:43:49] (step=0867000) Train Loss: 0.3210, Train Steps/Sec: 13.93, Grad Norm: 0.0356 +[2025-02-23 12:43:56] (step=0867100) Train Loss: 0.3214, Train Steps/Sec: 15.28, Grad Norm: 0.0358 +[2025-02-23 12:44:02] (step=0867200) Train Loss: 0.3211, Train Steps/Sec: 15.90, Grad Norm: 0.0365 +[2025-02-23 12:44:08] (step=0867300) Train Loss: 0.3213, Train Steps/Sec: 15.93, Grad Norm: 0.0380 +[2025-02-23 12:44:14] (step=0867400) Train Loss: 0.3213, Train Steps/Sec: 15.83, Grad Norm: 0.0357 +[2025-02-23 12:44:21] (step=0867500) Train Loss: 0.3216, Train Steps/Sec: 15.92, Grad Norm: 0.0338 +[2025-02-23 12:44:27] (step=0867600) Train Loss: 0.3213, Train Steps/Sec: 16.65, Grad Norm: 0.0364 +[2025-02-23 12:44:33] (step=0867700) Train Loss: 0.3208, Train Steps/Sec: 16.41, Grad Norm: 0.0363 +[2025-02-23 12:44:39] (step=0867800) Train Loss: 0.3208, Train Steps/Sec: 16.42, Grad Norm: 0.0354 +[2025-02-23 12:44:45] (step=0867900) Train Loss: 0.3211, Train Steps/Sec: 17.17, Grad Norm: 0.0326 +[2025-02-23 12:44:51] (step=0868000) Train Loss: 0.3207, Train Steps/Sec: 17.11, Grad Norm: 0.0318 +[2025-02-23 12:44:57] (step=0868100) Train Loss: 0.3213, Train Steps/Sec: 16.54, Grad Norm: 0.0355 +[2025-02-23 12:45:04] (step=0868200) Train Loss: 0.3210, Train Steps/Sec: 14.35, Grad Norm: 0.0390 +[2025-02-23 12:45:09] (step=0868300) Train Loss: 0.3213, Train Steps/Sec: 17.13, Grad Norm: 0.0359 +[2025-02-23 12:45:15] (step=0868400) Train Loss: 0.3213, Train Steps/Sec: 17.12, Grad Norm: 0.0364 +[2025-02-23 12:45:21] (step=0868500) Train Loss: 0.3217, Train Steps/Sec: 17.20, Grad Norm: 0.0375 +[2025-02-23 12:45:27] (step=0868600) Train Loss: 0.3207, Train Steps/Sec: 17.21, Grad Norm: 0.0335 +[2025-02-23 12:45:33] (step=0868700) Train Loss: 0.3218, Train Steps/Sec: 17.19, Grad Norm: 0.0382 +[2025-02-23 12:45:39] (step=0868800) Train Loss: 0.3212, Train Steps/Sec: 17.29, Grad Norm: 0.0363 +[2025-02-23 12:45:44] (step=0868900) Train Loss: 0.3207, Train Steps/Sec: 17.33, Grad Norm: 0.0384 +[2025-02-23 12:45:51] (step=0869000) Train Loss: 0.3214, Train Steps/Sec: 14.75, Grad Norm: 0.0375 +[2025-02-23 12:45:57] (step=0869100) Train Loss: 0.3212, Train Steps/Sec: 17.39, Grad Norm: 0.0346 +[2025-02-23 12:46:03] (step=0869200) Train Loss: 0.3210, Train Steps/Sec: 15.37, Grad Norm: 0.0353 +[2025-02-23 12:46:09] (step=0869300) Train Loss: 0.3215, Train Steps/Sec: 16.61, Grad Norm: 0.0363 +[2025-02-23 12:46:16] (step=0869400) Train Loss: 0.3211, Train Steps/Sec: 15.86, Grad Norm: 0.0376 +[2025-02-23 12:46:23] (step=0869500) Train Loss: 0.3211, Train Steps/Sec: 13.28, Grad Norm: 0.0372 +[2025-02-23 12:46:30] (step=0869600) Train Loss: 0.3213, Train Steps/Sec: 15.76, Grad Norm: 0.0356 +[2025-02-23 12:46:35] (step=0869700) Train Loss: 0.3210, Train Steps/Sec: 17.23, Grad Norm: 0.0393 +[2025-02-23 12:46:41] (step=0869800) Train Loss: 0.3211, Train Steps/Sec: 16.56, Grad Norm: 0.0359 +[2025-02-23 12:46:47] (step=0869900) Train Loss: 0.3214, Train Steps/Sec: 17.24, Grad Norm: 0.0374 +[2025-02-23 12:46:53] (step=0870000) Train Loss: 0.3208, Train Steps/Sec: 17.22, Grad Norm: 0.0375 +[2025-02-23 12:46:59] (step=0870100) Train Loss: 0.3208, Train Steps/Sec: 16.51, Grad Norm: 0.0372 +[2025-02-23 12:47:05] (step=0870200) Train Loss: 0.3214, Train Steps/Sec: 17.24, Grad Norm: 0.0430 +[2025-02-23 12:47:11] (step=0870300) Train Loss: 0.3206, Train Steps/Sec: 17.27, Grad Norm: 0.0358 +[2025-02-23 12:47:16] (step=0870400) Train Loss: 0.3209, Train Steps/Sec: 17.31, Grad Norm: 0.0327 +[2025-02-23 12:47:22] (step=0870500) Train Loss: 0.3214, Train Steps/Sec: 17.36, Grad Norm: 0.0348 +[2025-02-23 12:47:28] (step=0870600) Train Loss: 0.3214, Train Steps/Sec: 17.25, Grad Norm: 0.0368 +[2025-02-23 12:47:35] (step=0870700) Train Loss: 0.3213, Train Steps/Sec: 14.32, Grad Norm: 0.0356 +[2025-02-23 12:47:41] (step=0870800) Train Loss: 0.3206, Train Steps/Sec: 16.91, Grad Norm: 0.0342 +[2025-02-23 12:47:47] (step=0870900) Train Loss: 0.3215, Train Steps/Sec: 16.32, Grad Norm: 0.0384 +[2025-02-23 12:47:54] (step=0871000) Train Loss: 0.3212, Train Steps/Sec: 15.10, Grad Norm: 0.0361 +[2025-02-23 12:48:00] (step=0871100) Train Loss: 0.3211, Train Steps/Sec: 17.17, Grad Norm: 0.0331 +[2025-02-23 12:48:06] (step=0871200) Train Loss: 0.3212, Train Steps/Sec: 15.20, Grad Norm: 0.0339 +[2025-02-23 12:48:12] (step=0871300) Train Loss: 0.3209, Train Steps/Sec: 15.68, Grad Norm: 0.0351 +[2025-02-23 12:48:19] (step=0871400) Train Loss: 0.3213, Train Steps/Sec: 16.39, Grad Norm: 0.0325 +[2025-02-23 12:48:25] (step=0871500) Train Loss: 0.3214, Train Steps/Sec: 15.07, Grad Norm: 0.0353 +[2025-02-23 12:48:31] (step=0871600) Train Loss: 0.3211, Train Steps/Sec: 17.24, Grad Norm: 0.0350 +[2025-02-23 12:48:37] (step=0871700) Train Loss: 0.3209, Train Steps/Sec: 16.46, Grad Norm: 0.0361 +[2025-02-23 12:48:43] (step=0871800) Train Loss: 0.3208, Train Steps/Sec: 16.43, Grad Norm: 0.0351 +[2025-02-23 12:48:49] (step=0871900) Train Loss: 0.3211, Train Steps/Sec: 17.20, Grad Norm: 0.0341 +[2025-02-23 12:48:56] (step=0872000) Train Loss: 0.3216, Train Steps/Sec: 14.05, Grad Norm: 0.0334 +[2025-02-23 12:49:02] (step=0872100) Train Loss: 0.3215, Train Steps/Sec: 16.20, Grad Norm: 0.0366 +[2025-02-23 12:49:08] (step=0872200) Train Loss: 0.3213, Train Steps/Sec: 17.08, Grad Norm: 0.0350 +[2025-02-23 12:49:14] (step=0872300) Train Loss: 0.3215, Train Steps/Sec: 17.18, Grad Norm: 0.0350 +[2025-02-23 12:49:20] (step=0872400) Train Loss: 0.3207, Train Steps/Sec: 17.18, Grad Norm: 0.0435 +[2025-02-23 12:49:26] (step=0872500) Train Loss: 0.3208, Train Steps/Sec: 17.17, Grad Norm: 0.0339 +[2025-02-23 12:49:31] (step=0872600) Train Loss: 0.3214, Train Steps/Sec: 17.16, Grad Norm: 0.0322 +[2025-02-23 12:49:37] (step=0872700) Train Loss: 0.3207, Train Steps/Sec: 17.15, Grad Norm: 0.0384 +[2025-02-23 12:49:43] (step=0872800) Train Loss: 0.3205, Train Steps/Sec: 17.16, Grad Norm: 0.0331 +[2025-02-23 12:49:50] (step=0872900) Train Loss: 0.3215, Train Steps/Sec: 14.52, Grad Norm: 0.0379 +[2025-02-23 12:49:56] (step=0873000) Train Loss: 0.3206, Train Steps/Sec: 17.35, Grad Norm: 0.0360 +[2025-02-23 12:50:02] (step=0873100) Train Loss: 0.3207, Train Steps/Sec: 15.82, Grad Norm: 0.0339 +[2025-02-23 12:50:09] (step=0873200) Train Loss: 0.3209, Train Steps/Sec: 13.89, Grad Norm: 0.0358 +[2025-02-23 12:50:16] (step=0873300) Train Loss: 0.3213, Train Steps/Sec: 15.66, Grad Norm: 0.0369 +[2025-02-23 12:50:22] (step=0873400) Train Loss: 0.3208, Train Steps/Sec: 16.47, Grad Norm: 0.0329 +[2025-02-23 12:50:28] (step=0873500) Train Loss: 0.3210, Train Steps/Sec: 15.23, Grad Norm: 0.0384 +[2025-02-23 12:50:34] (step=0873600) Train Loss: 0.3207, Train Steps/Sec: 17.36, Grad Norm: 0.0372 +[2025-02-23 12:50:40] (step=0873700) Train Loss: 0.3219, Train Steps/Sec: 16.59, Grad Norm: 0.0331 +[2025-02-23 12:50:46] (step=0873800) Train Loss: 0.3213, Train Steps/Sec: 16.56, Grad Norm: 0.0338 +[2025-02-23 12:50:52] (step=0873900) Train Loss: 0.3207, Train Steps/Sec: 17.27, Grad Norm: 0.0344 +[2025-02-23 12:50:58] (step=0874000) Train Loss: 0.3206, Train Steps/Sec: 17.14, Grad Norm: 0.0379 +[2025-02-23 12:51:04] (step=0874100) Train Loss: 0.3213, Train Steps/Sec: 16.45, Grad Norm: 0.0357 +[2025-02-23 12:51:10] (step=0874200) Train Loss: 0.3206, Train Steps/Sec: 17.31, Grad Norm: 0.0351 +[2025-02-23 12:51:15] (step=0874300) Train Loss: 0.3211, Train Steps/Sec: 17.34, Grad Norm: 0.0366 +[2025-02-23 12:51:21] (step=0874400) Train Loss: 0.3209, Train Steps/Sec: 17.35, Grad Norm: 0.0356 +[2025-02-23 12:51:28] (step=0874500) Train Loss: 0.3212, Train Steps/Sec: 14.29, Grad Norm: 0.0397 +[2025-02-23 12:51:34] (step=0874600) Train Loss: 0.3213, Train Steps/Sec: 17.27, Grad Norm: 0.0336 +[2025-02-23 12:51:40] (step=0874700) Train Loss: 0.3207, Train Steps/Sec: 17.25, Grad Norm: 0.0337 +[2025-02-23 12:51:46] (step=0874800) Train Loss: 0.3211, Train Steps/Sec: 16.58, Grad Norm: 0.0351 +[2025-02-23 12:51:52] (step=0874900) Train Loss: 0.3211, Train Steps/Sec: 15.34, Grad Norm: 0.0342 +[2025-02-23 12:51:58] (step=0875000) Train Loss: 0.3216, Train Steps/Sec: 17.35, Grad Norm: 0.0384 +[2025-02-23 12:52:04] (step=0875100) Train Loss: 0.3211, Train Steps/Sec: 15.97, Grad Norm: 0.0377 +[2025-02-23 12:52:11] (step=0875200) Train Loss: 0.3209, Train Steps/Sec: 15.97, Grad Norm: 0.0348 +[2025-02-23 12:52:17] (step=0875300) Train Loss: 0.3205, Train Steps/Sec: 16.52, Grad Norm: 0.0388 +[2025-02-23 12:52:23] (step=0875400) Train Loss: 0.3214, Train Steps/Sec: 16.53, Grad Norm: 0.0354 +[2025-02-23 12:52:29] (step=0875500) Train Loss: 0.3208, Train Steps/Sec: 15.84, Grad Norm: 0.0357 +[2025-02-23 12:52:35] (step=0875600) Train Loss: 0.3208, Train Steps/Sec: 16.58, Grad Norm: 0.0380 +[2025-02-23 12:52:42] (step=0875700) Train Loss: 0.3210, Train Steps/Sec: 15.26, Grad Norm: 0.0373 +[2025-02-23 12:52:48] (step=0875800) Train Loss: 0.3206, Train Steps/Sec: 14.68, Grad Norm: 0.0339 +[2025-02-23 12:52:54] (step=0875900) Train Loss: 0.3209, Train Steps/Sec: 17.03, Grad Norm: 0.0337 +[2025-02-23 12:53:00] (step=0876000) Train Loss: 0.3211, Train Steps/Sec: 17.00, Grad Norm: 0.0370 +[2025-02-23 12:53:06] (step=0876100) Train Loss: 0.3212, Train Steps/Sec: 16.31, Grad Norm: 0.0367 +[2025-02-23 12:53:12] (step=0876200) Train Loss: 0.3213, Train Steps/Sec: 17.02, Grad Norm: 0.0337 +[2025-02-23 12:53:18] (step=0876300) Train Loss: 0.3208, Train Steps/Sec: 17.00, Grad Norm: 0.0316 +[2025-02-23 12:53:24] (step=0876400) Train Loss: 0.3215, Train Steps/Sec: 17.00, Grad Norm: 0.0352 +[2025-02-23 12:53:30] (step=0876500) Train Loss: 0.3215, Train Steps/Sec: 16.98, Grad Norm: 0.0366 +[2025-02-23 12:53:36] (step=0876600) Train Loss: 0.3211, Train Steps/Sec: 16.99, Grad Norm: 0.0343 +[2025-02-23 12:53:42] (step=0876700) Train Loss: 0.3210, Train Steps/Sec: 16.97, Grad Norm: 0.0329 +[2025-02-23 12:53:49] (step=0876800) Train Loss: 0.3208, Train Steps/Sec: 14.46, Grad Norm: 0.0351 +[2025-02-23 12:53:54] (step=0876900) Train Loss: 0.3205, Train Steps/Sec: 17.04, Grad Norm: 0.0371 +[2025-02-23 12:54:02] (step=0877000) Train Loss: 0.3208, Train Steps/Sec: 13.28, Grad Norm: 0.0319 +[2025-02-23 12:54:08] (step=0877100) Train Loss: 0.3212, Train Steps/Sec: 16.33, Grad Norm: 0.0352 +[2025-02-23 12:54:14] (step=0877200) Train Loss: 0.3212, Train Steps/Sec: 16.35, Grad Norm: 0.0389 +[2025-02-23 12:54:20] (step=0877300) Train Loss: 0.3212, Train Steps/Sec: 16.27, Grad Norm: 0.0380 +[2025-02-23 12:54:27] (step=0877400) Train Loss: 0.3214, Train Steps/Sec: 15.55, Grad Norm: 0.0355 +[2025-02-23 12:54:33] (step=0877500) Train Loss: 0.3214, Train Steps/Sec: 15.65, Grad Norm: 0.0380 +[2025-02-23 12:54:39] (step=0877600) Train Loss: 0.3215, Train Steps/Sec: 17.03, Grad Norm: 0.0344 +[2025-02-23 12:54:45] (step=0877700) Train Loss: 0.3211, Train Steps/Sec: 16.35, Grad Norm: 0.0365 +[2025-02-23 12:54:51] (step=0877800) Train Loss: 0.3214, Train Steps/Sec: 16.25, Grad Norm: 0.0377 +[2025-02-23 12:54:57] (step=0877900) Train Loss: 0.3212, Train Steps/Sec: 17.11, Grad Norm: 0.0378 +[2025-02-23 12:55:03] (step=0878000) Train Loss: 0.3215, Train Steps/Sec: 17.18, Grad Norm: 0.0333 +[2025-02-23 12:55:09] (step=0878100) Train Loss: 0.3210, Train Steps/Sec: 16.42, Grad Norm: 0.0338 +[2025-02-23 12:55:15] (step=0878200) Train Loss: 0.3213, Train Steps/Sec: 17.17, Grad Norm: 0.0385 +[2025-02-23 12:55:22] (step=0878300) Train Loss: 0.3213, Train Steps/Sec: 14.29, Grad Norm: 0.0404 +[2025-02-23 12:55:28] (step=0878400) Train Loss: 0.3207, Train Steps/Sec: 17.16, Grad Norm: 0.0340 +[2025-02-23 12:55:34] (step=0878500) Train Loss: 0.3212, Train Steps/Sec: 17.24, Grad Norm: 0.0343 +[2025-02-23 12:55:39] (step=0878600) Train Loss: 0.3206, Train Steps/Sec: 17.28, Grad Norm: 0.0357 +[2025-02-23 12:55:46] (step=0878700) Train Loss: 0.3207, Train Steps/Sec: 15.29, Grad Norm: 0.0336 +[2025-02-23 12:55:52] (step=0878800) Train Loss: 0.3207, Train Steps/Sec: 16.50, Grad Norm: 0.0403 +[2025-02-23 12:55:58] (step=0878900) Train Loss: 0.3212, Train Steps/Sec: 17.27, Grad Norm: 0.0370 +[2025-02-23 12:56:04] (step=0879000) Train Loss: 0.3210, Train Steps/Sec: 16.52, Grad Norm: 0.0374 +[2025-02-23 12:56:10] (step=0879100) Train Loss: 0.3207, Train Steps/Sec: 16.62, Grad Norm: 0.0337 +[2025-02-23 12:56:16] (step=0879200) Train Loss: 0.3215, Train Steps/Sec: 16.63, Grad Norm: 0.0415 +[2025-02-23 12:56:22] (step=0879300) Train Loss: 0.3210, Train Steps/Sec: 16.54, Grad Norm: 0.0311 +[2025-02-23 12:56:28] (step=0879400) Train Loss: 0.3213, Train Steps/Sec: 15.79, Grad Norm: 0.0368 +[2025-02-23 12:56:36] (step=0879500) Train Loss: 0.3212, Train Steps/Sec: 13.43, Grad Norm: 0.0354 +[2025-02-23 12:56:41] (step=0879600) Train Loss: 0.3212, Train Steps/Sec: 17.18, Grad Norm: 0.0356 +[2025-02-23 12:56:48] (step=0879700) Train Loss: 0.3204, Train Steps/Sec: 16.48, Grad Norm: 0.0340 +[2025-02-23 12:56:54] (step=0879800) Train Loss: 0.3206, Train Steps/Sec: 16.39, Grad Norm: 0.0324 +[2025-02-23 12:56:59] (step=0879900) Train Loss: 0.3212, Train Steps/Sec: 17.21, Grad Norm: 0.0362 +[2025-02-23 12:57:05] (step=0880000) Train Loss: 0.3202, Train Steps/Sec: 17.25, Grad Norm: 0.0374 +[2025-02-23 12:57:11] (step=0880100) Train Loss: 0.3208, Train Steps/Sec: 16.56, Grad Norm: 0.0375 +[2025-02-23 12:57:17] (step=0880200) Train Loss: 0.3206, Train Steps/Sec: 17.30, Grad Norm: 0.0341 +[2025-02-23 12:57:23] (step=0880300) Train Loss: 0.3210, Train Steps/Sec: 17.32, Grad Norm: 0.0356 +[2025-02-23 12:57:29] (step=0880400) Train Loss: 0.3217, Train Steps/Sec: 17.31, Grad Norm: 0.0381 +[2025-02-23 12:57:34] (step=0880500) Train Loss: 0.3203, Train Steps/Sec: 17.28, Grad Norm: 0.0416 +[2025-02-23 12:57:41] (step=0880600) Train Loss: 0.3212, Train Steps/Sec: 16.48, Grad Norm: 0.0363 +[2025-02-23 12:57:47] (step=0880700) Train Loss: 0.3212, Train Steps/Sec: 15.24, Grad Norm: 0.0358 +[2025-02-23 12:57:54] (step=0880800) Train Loss: 0.3219, Train Steps/Sec: 14.40, Grad Norm: 0.0336 +[2025-02-23 12:58:00] (step=0880900) Train Loss: 0.3210, Train Steps/Sec: 15.81, Grad Norm: 0.0346 +[2025-02-23 12:58:06] (step=0881000) Train Loss: 0.3208, Train Steps/Sec: 16.60, Grad Norm: 0.0350 +[2025-02-23 12:58:12] (step=0881100) Train Loss: 0.3212, Train Steps/Sec: 17.30, Grad Norm: 0.0352 +[2025-02-23 12:58:18] (step=0881200) Train Loss: 0.3208, Train Steps/Sec: 16.55, Grad Norm: 0.0377 +[2025-02-23 12:58:25] (step=0881300) Train Loss: 0.3216, Train Steps/Sec: 15.82, Grad Norm: 0.0374 +[2025-02-23 12:58:31] (step=0881400) Train Loss: 0.3209, Train Steps/Sec: 16.45, Grad Norm: 0.0352 +[2025-02-23 12:58:37] (step=0881500) Train Loss: 0.3220, Train Steps/Sec: 16.47, Grad Norm: 0.0387 +[2025-02-23 12:58:43] (step=0881600) Train Loss: 0.3208, Train Steps/Sec: 16.49, Grad Norm: 0.0338 +[2025-02-23 12:58:49] (step=0881700) Train Loss: 0.3210, Train Steps/Sec: 16.58, Grad Norm: 0.0349 +[2025-02-23 12:58:55] (step=0881800) Train Loss: 0.3211, Train Steps/Sec: 16.50, Grad Norm: 0.0367 +[2025-02-23 12:59:01] (step=0881900) Train Loss: 0.3209, Train Steps/Sec: 17.29, Grad Norm: 0.0382 +[2025-02-23 12:59:08] (step=0882000) Train Loss: 0.3213, Train Steps/Sec: 14.25, Grad Norm: 0.0340 +[2025-02-23 12:59:14] (step=0882100) Train Loss: 0.3211, Train Steps/Sec: 16.55, Grad Norm: 0.0415 +[2025-02-23 12:59:19] (step=0882200) Train Loss: 0.3211, Train Steps/Sec: 17.22, Grad Norm: 0.0367 +[2025-02-23 12:59:25] (step=0882300) Train Loss: 0.3209, Train Steps/Sec: 17.35, Grad Norm: 0.0373 +[2025-02-23 12:59:31] (step=0882400) Train Loss: 0.3215, Train Steps/Sec: 17.41, Grad Norm: 0.0348 +[2025-02-23 12:59:37] (step=0882500) Train Loss: 0.3213, Train Steps/Sec: 17.39, Grad Norm: 0.0332 +[2025-02-23 12:59:43] (step=0882600) Train Loss: 0.3207, Train Steps/Sec: 15.29, Grad Norm: 0.0342 +[2025-02-23 12:59:49] (step=0882700) Train Loss: 0.3209, Train Steps/Sec: 16.52, Grad Norm: 0.0355 +[2025-02-23 12:59:55] (step=0882800) Train Loss: 0.3205, Train Steps/Sec: 16.56, Grad Norm: 0.0343 +[2025-02-23 13:00:01] (step=0882900) Train Loss: 0.3214, Train Steps/Sec: 16.51, Grad Norm: 0.0307 +[2025-02-23 13:00:07] (step=0883000) Train Loss: 0.3207, Train Steps/Sec: 16.59, Grad Norm: 0.0380 +[2025-02-23 13:00:13] (step=0883100) Train Loss: 0.3215, Train Steps/Sec: 17.39, Grad Norm: 0.0352 +[2025-02-23 13:00:19] (step=0883200) Train Loss: 0.3211, Train Steps/Sec: 17.33, Grad Norm: 0.0341 +[2025-02-23 13:00:27] (step=0883300) Train Loss: 0.3213, Train Steps/Sec: 12.89, Grad Norm: 0.0405 +[2025-02-23 13:00:33] (step=0883400) Train Loss: 0.3213, Train Steps/Sec: 16.58, Grad Norm: 0.0365 +[2025-02-23 13:00:39] (step=0883500) Train Loss: 0.3213, Train Steps/Sec: 16.58, Grad Norm: 0.0371 +[2025-02-23 13:00:45] (step=0883600) Train Loss: 0.3210, Train Steps/Sec: 16.63, Grad Norm: 0.0360 +[2025-02-23 13:00:51] (step=0883700) Train Loss: 0.3210, Train Steps/Sec: 16.66, Grad Norm: 0.0349 +[2025-02-23 13:00:57] (step=0883800) Train Loss: 0.3204, Train Steps/Sec: 16.57, Grad Norm: 0.0359 +[2025-02-23 13:01:03] (step=0883900) Train Loss: 0.3205, Train Steps/Sec: 17.33, Grad Norm: 0.0345 +[2025-02-23 13:01:08] (step=0884000) Train Loss: 0.3212, Train Steps/Sec: 17.35, Grad Norm: 0.0372 +[2025-02-23 13:01:14] (step=0884100) Train Loss: 0.3210, Train Steps/Sec: 16.59, Grad Norm: 0.0390 +[2025-02-23 13:01:20] (step=0884200) Train Loss: 0.3218, Train Steps/Sec: 17.31, Grad Norm: 0.0358 +[2025-02-23 13:01:26] (step=0884300) Train Loss: 0.3211, Train Steps/Sec: 17.33, Grad Norm: 0.0345 +[2025-02-23 13:01:32] (step=0884400) Train Loss: 0.3209, Train Steps/Sec: 17.28, Grad Norm: 0.0333 +[2025-02-23 13:01:39] (step=0884500) Train Loss: 0.3211, Train Steps/Sec: 13.84, Grad Norm: 0.0370 +[2025-02-23 13:01:46] (step=0884600) Train Loss: 0.3213, Train Steps/Sec: 15.16, Grad Norm: 0.0338 +[2025-02-23 13:01:51] (step=0884700) Train Loss: 0.3211, Train Steps/Sec: 17.15, Grad Norm: 0.0352 +[2025-02-23 13:01:58] (step=0884800) Train Loss: 0.3211, Train Steps/Sec: 15.77, Grad Norm: 0.0362 +[2025-02-23 13:02:04] (step=0884900) Train Loss: 0.3207, Train Steps/Sec: 16.45, Grad Norm: 0.0327 +[2025-02-23 13:02:10] (step=0885000) Train Loss: 0.3206, Train Steps/Sec: 17.13, Grad Norm: 0.0333 +[2025-02-23 13:02:16] (step=0885100) Train Loss: 0.3216, Train Steps/Sec: 17.09, Grad Norm: 0.0345 +[2025-02-23 13:02:22] (step=0885200) Train Loss: 0.3207, Train Steps/Sec: 16.35, Grad Norm: 0.0356 +[2025-02-23 13:02:28] (step=0885300) Train Loss: 0.3210, Train Steps/Sec: 15.63, Grad Norm: 0.0341 +[2025-02-23 13:02:34] (step=0885400) Train Loss: 0.3207, Train Steps/Sec: 16.33, Grad Norm: 0.0399 +[2025-02-23 13:02:40] (step=0885500) Train Loss: 0.3213, Train Steps/Sec: 16.35, Grad Norm: 0.0369 +[2025-02-23 13:02:46] (step=0885600) Train Loss: 0.3215, Train Steps/Sec: 16.40, Grad Norm: 0.0369 +[2025-02-23 13:02:53] (step=0885700) Train Loss: 0.3205, Train Steps/Sec: 16.43, Grad Norm: 0.0369 +[2025-02-23 13:03:00] (step=0885800) Train Loss: 0.3207, Train Steps/Sec: 13.79, Grad Norm: 0.0347 +[2025-02-23 13:03:06] (step=0885900) Train Loss: 0.3208, Train Steps/Sec: 17.22, Grad Norm: 0.0341 +[2025-02-23 13:03:11] (step=0886000) Train Loss: 0.3212, Train Steps/Sec: 17.26, Grad Norm: 0.0384 +[2025-02-23 13:03:17] (step=0886100) Train Loss: 0.3210, Train Steps/Sec: 16.42, Grad Norm: 0.0379 +[2025-02-23 13:03:23] (step=0886200) Train Loss: 0.3214, Train Steps/Sec: 17.36, Grad Norm: 0.0380 +[2025-02-23 13:03:29] (step=0886300) Train Loss: 0.3215, Train Steps/Sec: 17.40, Grad Norm: 0.0337 +[2025-02-23 13:03:35] (step=0886400) Train Loss: 0.3208, Train Steps/Sec: 17.34, Grad Norm: 0.0340 +[2025-02-23 13:03:41] (step=0886500) Train Loss: 0.3210, Train Steps/Sec: 15.29, Grad Norm: 0.0422 +[2025-02-23 13:03:47] (step=0886600) Train Loss: 0.3211, Train Steps/Sec: 16.56, Grad Norm: 0.0351 +[2025-02-23 13:03:53] (step=0886700) Train Loss: 0.3209, Train Steps/Sec: 16.58, Grad Norm: 0.0411 +[2025-02-23 13:03:59] (step=0886800) Train Loss: 0.3207, Train Steps/Sec: 16.52, Grad Norm: 0.0386 +[2025-02-23 13:04:05] (step=0886900) Train Loss: 0.3214, Train Steps/Sec: 16.60, Grad Norm: 0.0363 +[2025-02-23 13:04:12] (step=0887000) Train Loss: 0.3210, Train Steps/Sec: 14.35, Grad Norm: 0.0368 +[2025-02-23 13:04:18] (step=0887100) Train Loss: 0.3213, Train Steps/Sec: 17.19, Grad Norm: 0.0343 +[2025-02-23 13:04:24] (step=0887200) Train Loss: 0.3206, Train Steps/Sec: 16.48, Grad Norm: 0.0354 +[2025-02-23 13:04:31] (step=0887300) Train Loss: 0.3219, Train Steps/Sec: 15.84, Grad Norm: 0.0362 +[2025-02-23 13:04:37] (step=0887400) Train Loss: 0.3214, Train Steps/Sec: 16.47, Grad Norm: 0.0386 +[2025-02-23 13:04:43] (step=0887500) Train Loss: 0.3210, Train Steps/Sec: 16.53, Grad Norm: 0.0387 +[2025-02-23 13:04:49] (step=0887600) Train Loss: 0.3207, Train Steps/Sec: 16.67, Grad Norm: 0.0406 +[2025-02-23 13:04:55] (step=0887700) Train Loss: 0.3207, Train Steps/Sec: 16.67, Grad Norm: 0.0360 +[2025-02-23 13:05:01] (step=0887800) Train Loss: 0.3208, Train Steps/Sec: 16.50, Grad Norm: 0.0342 +[2025-02-23 13:05:07] (step=0887900) Train Loss: 0.3211, Train Steps/Sec: 17.16, Grad Norm: 0.0334 +[2025-02-23 13:05:12] (step=0888000) Train Loss: 0.3212, Train Steps/Sec: 17.17, Grad Norm: 0.0343 +[2025-02-23 13:05:19] (step=0888100) Train Loss: 0.3210, Train Steps/Sec: 16.26, Grad Norm: 0.0360 +[2025-02-23 13:05:24] (step=0888200) Train Loss: 0.3209, Train Steps/Sec: 17.07, Grad Norm: 0.0382 +[2025-02-23 13:05:31] (step=0888300) Train Loss: 0.3211, Train Steps/Sec: 14.39, Grad Norm: 0.0341 +[2025-02-23 13:05:38] (step=0888400) Train Loss: 0.3210, Train Steps/Sec: 16.48, Grad Norm: 0.0383 +[2025-02-23 13:05:44] (step=0888500) Train Loss: 0.3203, Train Steps/Sec: 15.21, Grad Norm: 0.0330 +[2025-02-23 13:05:50] (step=0888600) Train Loss: 0.3209, Train Steps/Sec: 17.21, Grad Norm: 0.0375 +[2025-02-23 13:05:56] (step=0888700) Train Loss: 0.3211, Train Steps/Sec: 15.85, Grad Norm: 0.0393 +[2025-02-23 13:06:02] (step=0888800) Train Loss: 0.3211, Train Steps/Sec: 16.57, Grad Norm: 0.0358 +[2025-02-23 13:06:08] (step=0888900) Train Loss: 0.3215, Train Steps/Sec: 17.35, Grad Norm: 0.0377 +[2025-02-23 13:06:14] (step=0889000) Train Loss: 0.3211, Train Steps/Sec: 17.36, Grad Norm: 0.0332 +[2025-02-23 13:06:20] (step=0889100) Train Loss: 0.3210, Train Steps/Sec: 16.56, Grad Norm: 0.0356 +[2025-02-23 13:06:26] (step=0889200) Train Loss: 0.3218, Train Steps/Sec: 17.36, Grad Norm: 0.0393 +[2025-02-23 13:06:32] (step=0889300) Train Loss: 0.3207, Train Steps/Sec: 15.85, Grad Norm: 0.0388 +[2025-02-23 13:06:38] (step=0889400) Train Loss: 0.3210, Train Steps/Sec: 16.52, Grad Norm: 0.0370 +[2025-02-23 13:06:46] (step=0889500) Train Loss: 0.3207, Train Steps/Sec: 13.03, Grad Norm: 0.0370 +[2025-02-23 13:06:52] (step=0889600) Train Loss: 0.3209, Train Steps/Sec: 16.28, Grad Norm: 0.0347 +[2025-02-23 13:06:58] (step=0889700) Train Loss: 0.3211, Train Steps/Sec: 16.31, Grad Norm: 0.0356 +[2025-02-23 13:07:04] (step=0889800) Train Loss: 0.3209, Train Steps/Sec: 16.27, Grad Norm: 0.0363 +[2025-02-23 13:07:10] (step=0889900) Train Loss: 0.3208, Train Steps/Sec: 16.96, Grad Norm: 0.0366 +[2025-02-23 13:07:16] (step=0890000) Train Loss: 0.3211, Train Steps/Sec: 17.04, Grad Norm: 0.0335 +[2025-02-23 13:07:22] (step=0890100) Train Loss: 0.3207, Train Steps/Sec: 16.27, Grad Norm: 0.0356 +[2025-02-23 13:07:28] (step=0890200) Train Loss: 0.3204, Train Steps/Sec: 16.99, Grad Norm: 0.0354 +[2025-02-23 13:07:34] (step=0890300) Train Loss: 0.3217, Train Steps/Sec: 16.98, Grad Norm: 0.0350 +[2025-02-23 13:07:40] (step=0890400) Train Loss: 0.3209, Train Steps/Sec: 15.02, Grad Norm: 0.0337 +[2025-02-23 13:07:47] (step=0890500) Train Loss: 0.3215, Train Steps/Sec: 16.32, Grad Norm: 0.0349 +[2025-02-23 13:07:53] (step=0890600) Train Loss: 0.3208, Train Steps/Sec: 16.40, Grad Norm: 0.0343 +[2025-02-23 13:07:59] (step=0890700) Train Loss: 0.3209, Train Steps/Sec: 16.26, Grad Norm: 0.0384 +[2025-02-23 13:08:06] (step=0890800) Train Loss: 0.3211, Train Steps/Sec: 13.81, Grad Norm: 0.0339 +[2025-02-23 13:08:12] (step=0890900) Train Loss: 0.3216, Train Steps/Sec: 17.00, Grad Norm: 0.0366 +[2025-02-23 13:08:18] (step=0891000) Train Loss: 0.3207, Train Steps/Sec: 16.99, Grad Norm: 0.0346 +[2025-02-23 13:08:24] (step=0891100) Train Loss: 0.3209, Train Steps/Sec: 16.23, Grad Norm: 0.0330 +[2025-02-23 13:08:30] (step=0891200) Train Loss: 0.3207, Train Steps/Sec: 16.99, Grad Norm: 0.0372 +[2025-02-23 13:08:36] (step=0891300) Train Loss: 0.3213, Train Steps/Sec: 15.62, Grad Norm: 0.0369 +[2025-02-23 13:08:42] (step=0891400) Train Loss: 0.3210, Train Steps/Sec: 16.30, Grad Norm: 0.0351 +[2025-02-23 13:08:49] (step=0891500) Train Loss: 0.3212, Train Steps/Sec: 16.30, Grad Norm: 0.0358 +[2025-02-23 13:08:55] (step=0891600) Train Loss: 0.3210, Train Steps/Sec: 16.35, Grad Norm: 0.0360 +[2025-02-23 13:09:01] (step=0891700) Train Loss: 0.3211, Train Steps/Sec: 16.38, Grad Norm: 0.0355 +[2025-02-23 13:09:07] (step=0891800) Train Loss: 0.3212, Train Steps/Sec: 16.49, Grad Norm: 0.0374 +[2025-02-23 13:09:13] (step=0891900) Train Loss: 0.3206, Train Steps/Sec: 17.32, Grad Norm: 0.0344 +[2025-02-23 13:09:19] (step=0892000) Train Loss: 0.3211, Train Steps/Sec: 14.53, Grad Norm: 0.0369 +[2025-02-23 13:09:26] (step=0892100) Train Loss: 0.3214, Train Steps/Sec: 16.52, Grad Norm: 0.0384 +[2025-02-23 13:09:31] (step=0892200) Train Loss: 0.3215, Train Steps/Sec: 17.27, Grad Norm: 0.0347 +[2025-02-23 13:09:38] (step=0892300) Train Loss: 0.3202, Train Steps/Sec: 15.89, Grad Norm: 0.0328 +[2025-02-23 13:09:44] (step=0892400) Train Loss: 0.3207, Train Steps/Sec: 15.93, Grad Norm: 0.0379 +[2025-02-23 13:09:50] (step=0892500) Train Loss: 0.3211, Train Steps/Sec: 17.43, Grad Norm: 0.0339 +[2025-02-23 13:09:56] (step=0892600) Train Loss: 0.3210, Train Steps/Sec: 16.02, Grad Norm: 0.0341 +[2025-02-23 13:10:02] (step=0892700) Train Loss: 0.3216, Train Steps/Sec: 16.68, Grad Norm: 0.0338 +[2025-02-23 13:10:08] (step=0892800) Train Loss: 0.3203, Train Steps/Sec: 17.40, Grad Norm: 0.0397 +[2025-02-23 13:10:13] (step=0892900) Train Loss: 0.3215, Train Steps/Sec: 17.42, Grad Norm: 0.0383 +[2025-02-23 13:10:19] (step=0893000) Train Loss: 0.3215, Train Steps/Sec: 16.56, Grad Norm: 0.0372 +[2025-02-23 13:10:25] (step=0893100) Train Loss: 0.3207, Train Steps/Sec: 17.37, Grad Norm: 0.0401 +[2025-02-23 13:10:31] (step=0893200) Train Loss: 0.3213, Train Steps/Sec: 17.32, Grad Norm: 0.0371 +[2025-02-23 13:10:38] (step=0893300) Train Loss: 0.3211, Train Steps/Sec: 13.45, Grad Norm: 0.0381 +[2025-02-23 13:10:44] (step=0893400) Train Loss: 0.3212, Train Steps/Sec: 16.44, Grad Norm: 0.0337 +[2025-02-23 13:10:51] (step=0893500) Train Loss: 0.3209, Train Steps/Sec: 16.38, Grad Norm: 0.0372 +[2025-02-23 13:10:57] (step=0893600) Train Loss: 0.3214, Train Steps/Sec: 16.49, Grad Norm: 0.0396 +[2025-02-23 13:11:03] (step=0893700) Train Loss: 0.3208, Train Steps/Sec: 16.53, Grad Norm: 0.0364 +[2025-02-23 13:11:09] (step=0893800) Train Loss: 0.3211, Train Steps/Sec: 16.48, Grad Norm: 0.0391 +[2025-02-23 13:11:15] (step=0893900) Train Loss: 0.3212, Train Steps/Sec: 17.29, Grad Norm: 0.0368 +[2025-02-23 13:11:20] (step=0894000) Train Loss: 0.3215, Train Steps/Sec: 17.26, Grad Norm: 0.0374 +[2025-02-23 13:11:26] (step=0894100) Train Loss: 0.3212, Train Steps/Sec: 16.42, Grad Norm: 0.0376 +[2025-02-23 13:11:32] (step=0894200) Train Loss: 0.3211, Train Steps/Sec: 17.19, Grad Norm: 0.0336 +[2025-02-23 13:11:39] (step=0894300) Train Loss: 0.3215, Train Steps/Sec: 14.66, Grad Norm: 0.0366 +[2025-02-23 13:11:45] (step=0894400) Train Loss: 0.3205, Train Steps/Sec: 17.30, Grad Norm: 0.0398 +[2025-02-23 13:11:52] (step=0894500) Train Loss: 0.3209, Train Steps/Sec: 13.48, Grad Norm: 0.0372 +[2025-02-23 13:11:58] (step=0894600) Train Loss: 0.3210, Train Steps/Sec: 17.26, Grad Norm: 0.0348 +[2025-02-23 13:12:04] (step=0894700) Train Loss: 0.3206, Train Steps/Sec: 16.54, Grad Norm: 0.0373 +[2025-02-23 13:12:10] (step=0894800) Train Loss: 0.3213, Train Steps/Sec: 17.27, Grad Norm: 0.0340 +[2025-02-23 13:12:16] (step=0894900) Train Loss: 0.3212, Train Steps/Sec: 17.30, Grad Norm: 0.0404 +[2025-02-23 13:12:22] (step=0895000) Train Loss: 0.3205, Train Steps/Sec: 16.52, Grad Norm: 0.0352 +[2025-02-23 13:12:28] (step=0895100) Train Loss: 0.3214, Train Steps/Sec: 17.33, Grad Norm: 0.0333 +[2025-02-23 13:12:33] (step=0895200) Train Loss: 0.3210, Train Steps/Sec: 17.16, Grad Norm: 0.0338 +[2025-02-23 13:12:40] (step=0895300) Train Loss: 0.3213, Train Steps/Sec: 15.80, Grad Norm: 0.0324 +[2025-02-23 13:12:46] (step=0895400) Train Loss: 0.3209, Train Steps/Sec: 16.42, Grad Norm: 0.0381 +[2025-02-23 13:12:52] (step=0895500) Train Loss: 0.3212, Train Steps/Sec: 16.48, Grad Norm: 0.0399 +[2025-02-23 13:12:58] (step=0895600) Train Loss: 0.3210, Train Steps/Sec: 16.56, Grad Norm: 0.0323 +[2025-02-23 13:13:04] (step=0895700) Train Loss: 0.3212, Train Steps/Sec: 16.45, Grad Norm: 0.0348 +[2025-02-23 13:13:11] (step=0895800) Train Loss: 0.3207, Train Steps/Sec: 13.96, Grad Norm: 0.0362 +[2025-02-23 13:13:17] (step=0895900) Train Loss: 0.3214, Train Steps/Sec: 17.28, Grad Norm: 0.0357 +[2025-02-23 13:13:23] (step=0896000) Train Loss: 0.3209, Train Steps/Sec: 17.27, Grad Norm: 0.0369 +[2025-02-23 13:13:28] (step=0896100) Train Loss: 0.3208, Train Steps/Sec: 17.31, Grad Norm: 0.0325 +[2025-02-23 13:13:35] (step=0896200) Train Loss: 0.3209, Train Steps/Sec: 15.19, Grad Norm: 0.0337 +[2025-02-23 13:13:41] (step=0896300) Train Loss: 0.3206, Train Steps/Sec: 15.96, Grad Norm: 0.0365 +[2025-02-23 13:13:47] (step=0896400) Train Loss: 0.3217, Train Steps/Sec: 16.65, Grad Norm: 0.0341 +[2025-02-23 13:13:53] (step=0896500) Train Loss: 0.3207, Train Steps/Sec: 16.63, Grad Norm: 0.0372 +[2025-02-23 13:13:59] (step=0896600) Train Loss: 0.3211, Train Steps/Sec: 16.70, Grad Norm: 0.0339 +[2025-02-23 13:14:05] (step=0896700) Train Loss: 0.3211, Train Steps/Sec: 17.38, Grad Norm: 0.0345 +[2025-02-23 13:14:11] (step=0896800) Train Loss: 0.3212, Train Steps/Sec: 17.44, Grad Norm: 0.0354 +[2025-02-23 13:14:17] (step=0896900) Train Loss: 0.3215, Train Steps/Sec: 16.64, Grad Norm: 0.0363 +[2025-02-23 13:14:24] (step=0897000) Train Loss: 0.3217, Train Steps/Sec: 14.46, Grad Norm: 0.0358 +[2025-02-23 13:14:30] (step=0897100) Train Loss: 0.3210, Train Steps/Sec: 17.24, Grad Norm: 0.0333 +[2025-02-23 13:14:35] (step=0897200) Train Loss: 0.3215, Train Steps/Sec: 17.26, Grad Norm: 0.0367 +[2025-02-23 13:14:41] (step=0897300) Train Loss: 0.3207, Train Steps/Sec: 16.58, Grad Norm: 0.0360 +[2025-02-23 13:14:48] (step=0897400) Train Loss: 0.3213, Train Steps/Sec: 15.78, Grad Norm: 0.0382 +[2025-02-23 13:14:54] (step=0897500) Train Loss: 0.3215, Train Steps/Sec: 16.62, Grad Norm: 0.0334 +[2025-02-23 13:15:00] (step=0897600) Train Loss: 0.3208, Train Steps/Sec: 16.57, Grad Norm: 0.0366 +[2025-02-23 13:15:06] (step=0897700) Train Loss: 0.3213, Train Steps/Sec: 16.58, Grad Norm: 0.0385 +[2025-02-23 13:15:12] (step=0897800) Train Loss: 0.3209, Train Steps/Sec: 16.55, Grad Norm: 0.0346 +[2025-02-23 13:15:18] (step=0897900) Train Loss: 0.3206, Train Steps/Sec: 17.27, Grad Norm: 0.0353 +[2025-02-23 13:15:23] (step=0898000) Train Loss: 0.3209, Train Steps/Sec: 17.32, Grad Norm: 0.0387 +[2025-02-23 13:15:30] (step=0898100) Train Loss: 0.3216, Train Steps/Sec: 16.54, Grad Norm: 0.0329 +[2025-02-23 13:15:36] (step=0898200) Train Loss: 0.3208, Train Steps/Sec: 14.63, Grad Norm: 0.0376 +[2025-02-23 13:15:43] (step=0898300) Train Loss: 0.3211, Train Steps/Sec: 14.44, Grad Norm: 0.0343 +[2025-02-23 13:15:50] (step=0898400) Train Loss: 0.3216, Train Steps/Sec: 15.89, Grad Norm: 0.0405 +[2025-02-23 13:15:55] (step=0898500) Train Loss: 0.3215, Train Steps/Sec: 17.31, Grad Norm: 0.0367 +[2025-02-23 13:16:01] (step=0898600) Train Loss: 0.3212, Train Steps/Sec: 16.62, Grad Norm: 0.0330 +[2025-02-23 13:16:07] (step=0898700) Train Loss: 0.3209, Train Steps/Sec: 17.33, Grad Norm: 0.0384 +[2025-02-23 13:16:13] (step=0898800) Train Loss: 0.3213, Train Steps/Sec: 17.36, Grad Norm: 0.0397 +[2025-02-23 13:16:19] (step=0898900) Train Loss: 0.3212, Train Steps/Sec: 16.53, Grad Norm: 0.0388 +[2025-02-23 13:16:25] (step=0899000) Train Loss: 0.3208, Train Steps/Sec: 17.32, Grad Norm: 0.0357 +[2025-02-23 13:16:30] (step=0899100) Train Loss: 0.3209, Train Steps/Sec: 17.35, Grad Norm: 0.0360 +[2025-02-23 13:16:36] (step=0899200) Train Loss: 0.3205, Train Steps/Sec: 17.40, Grad Norm: 0.0349 +[2025-02-23 13:16:42] (step=0899300) Train Loss: 0.3213, Train Steps/Sec: 16.65, Grad Norm: 0.0355 +[2025-02-23 13:16:48] (step=0899400) Train Loss: 0.3211, Train Steps/Sec: 16.60, Grad Norm: 0.0379 +[2025-02-23 13:16:56] (step=0899500) Train Loss: 0.3211, Train Steps/Sec: 13.37, Grad Norm: 0.0357 +[2025-02-23 13:17:02] (step=0899600) Train Loss: 0.3210, Train Steps/Sec: 16.59, Grad Norm: 0.0400 +[2025-02-23 13:17:08] (step=0899700) Train Loss: 0.3209, Train Steps/Sec: 16.55, Grad Norm: 0.0386 +[2025-02-23 13:17:14] (step=0899800) Train Loss: 0.3211, Train Steps/Sec: 16.52, Grad Norm: 0.0342 +[2025-02-23 13:17:20] (step=0899900) Train Loss: 0.3213, Train Steps/Sec: 17.31, Grad Norm: 0.0359 +[2025-02-23 13:17:25] (step=0900000) Train Loss: 0.3214, Train Steps/Sec: 17.36, Grad Norm: 0.0357 +[2025-02-23 13:17:26] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0900000.pt +[2025-02-23 13:17:33] (step=0900100) Train Loss: 0.3210, Train Steps/Sec: 13.70, Grad Norm: 0.0390 +[2025-02-23 13:17:39] (step=0900200) Train Loss: 0.3212, Train Steps/Sec: 15.86, Grad Norm: 0.0358 +[2025-02-23 13:17:45] (step=0900300) Train Loss: 0.3208, Train Steps/Sec: 16.61, Grad Norm: 0.0343 +[2025-02-23 13:17:51] (step=0900400) Train Loss: 0.3215, Train Steps/Sec: 16.63, Grad Norm: 0.0336 +[2025-02-23 13:17:57] (step=0900500) Train Loss: 0.3216, Train Steps/Sec: 16.67, Grad Norm: 0.0355 +[2025-02-23 13:18:03] (step=0900600) Train Loss: 0.3211, Train Steps/Sec: 17.36, Grad Norm: 0.0398 +[2025-02-23 13:18:09] (step=0900700) Train Loss: 0.3210, Train Steps/Sec: 17.26, Grad Norm: 0.0379 +[2025-02-23 13:18:16] (step=0900800) Train Loss: 0.3202, Train Steps/Sec: 13.99, Grad Norm: 0.0384 +[2025-02-23 13:18:22] (step=0900900) Train Loss: 0.3209, Train Steps/Sec: 17.35, Grad Norm: 0.0335 +[2025-02-23 13:18:27] (step=0901000) Train Loss: 0.3208, Train Steps/Sec: 17.28, Grad Norm: 0.0365 +[2025-02-23 13:18:33] (step=0901100) Train Loss: 0.3213, Train Steps/Sec: 17.24, Grad Norm: 0.0353 +[2025-02-23 13:18:39] (step=0901200) Train Loss: 0.3212, Train Steps/Sec: 17.28, Grad Norm: 0.0370 +[2025-02-23 13:18:45] (step=0901300) Train Loss: 0.3215, Train Steps/Sec: 16.60, Grad Norm: 0.0338 +[2025-02-23 13:18:51] (step=0901400) Train Loss: 0.3212, Train Steps/Sec: 16.53, Grad Norm: 0.0363 +[2025-02-23 13:18:57] (step=0901500) Train Loss: 0.3208, Train Steps/Sec: 16.54, Grad Norm: 0.0385 +[2025-02-23 13:19:03] (step=0901600) Train Loss: 0.3213, Train Steps/Sec: 15.84, Grad Norm: 0.0351 +[2025-02-23 13:19:09] (step=0901700) Train Loss: 0.3209, Train Steps/Sec: 16.59, Grad Norm: 0.0347 +[2025-02-23 13:19:15] (step=0901800) Train Loss: 0.3211, Train Steps/Sec: 17.34, Grad Norm: 0.0339 +[2025-02-23 13:19:21] (step=0901900) Train Loss: 0.3216, Train Steps/Sec: 16.55, Grad Norm: 0.0340 +[2025-02-23 13:19:28] (step=0902000) Train Loss: 0.3214, Train Steps/Sec: 13.97, Grad Norm: 0.0333 +[2025-02-23 13:19:35] (step=0902100) Train Loss: 0.3210, Train Steps/Sec: 15.22, Grad Norm: 0.0356 +[2025-02-23 13:19:41] (step=0902200) Train Loss: 0.3206, Train Steps/Sec: 16.51, Grad Norm: 0.0335 +[2025-02-23 13:19:47] (step=0902300) Train Loss: 0.3210, Train Steps/Sec: 15.89, Grad Norm: 0.0371 +[2025-02-23 13:19:53] (step=0902400) Train Loss: 0.3207, Train Steps/Sec: 17.23, Grad Norm: 0.0382 +[2025-02-23 13:19:59] (step=0902500) Train Loss: 0.3212, Train Steps/Sec: 16.50, Grad Norm: 0.0321 +[2025-02-23 13:20:05] (step=0902600) Train Loss: 0.3207, Train Steps/Sec: 17.24, Grad Norm: 0.0385 +[2025-02-23 13:20:11] (step=0902700) Train Loss: 0.3215, Train Steps/Sec: 16.35, Grad Norm: 0.0353 +[2025-02-23 13:20:17] (step=0902800) Train Loss: 0.3213, Train Steps/Sec: 17.27, Grad Norm: 0.0332 +[2025-02-23 13:20:23] (step=0902900) Train Loss: 0.3213, Train Steps/Sec: 17.25, Grad Norm: 0.0341 +[2025-02-23 13:20:28] (step=0903000) Train Loss: 0.3207, Train Steps/Sec: 17.20, Grad Norm: 0.0369 +[2025-02-23 13:20:34] (step=0903100) Train Loss: 0.3218, Train Steps/Sec: 17.18, Grad Norm: 0.0369 +[2025-02-23 13:20:40] (step=0903200) Train Loss: 0.3208, Train Steps/Sec: 17.06, Grad Norm: 0.0349 +[2025-02-23 13:20:47] (step=0903300) Train Loss: 0.3212, Train Steps/Sec: 13.89, Grad Norm: 0.0338 +[2025-02-23 13:20:53] (step=0903400) Train Loss: 0.3214, Train Steps/Sec: 16.48, Grad Norm: 0.0345 +[2025-02-23 13:20:59] (step=0903500) Train Loss: 0.3207, Train Steps/Sec: 16.53, Grad Norm: 0.0367 +[2025-02-23 13:21:06] (step=0903600) Train Loss: 0.3212, Train Steps/Sec: 15.90, Grad Norm: 0.0366 +[2025-02-23 13:21:12] (step=0903700) Train Loss: 0.3208, Train Steps/Sec: 16.65, Grad Norm: 0.0355 +[2025-02-23 13:21:18] (step=0903800) Train Loss: 0.3211, Train Steps/Sec: 17.41, Grad Norm: 0.0351 +[2025-02-23 13:21:24] (step=0903900) Train Loss: 0.3216, Train Steps/Sec: 16.61, Grad Norm: 0.0345 +[2025-02-23 13:21:30] (step=0904000) Train Loss: 0.3214, Train Steps/Sec: 15.87, Grad Norm: 0.0341 +[2025-02-23 13:21:36] (step=0904100) Train Loss: 0.3211, Train Steps/Sec: 15.90, Grad Norm: 0.0354 +[2025-02-23 13:21:42] (step=0904200) Train Loss: 0.3213, Train Steps/Sec: 15.89, Grad Norm: 0.0369 +[2025-02-23 13:21:48] (step=0904300) Train Loss: 0.3209, Train Steps/Sec: 16.63, Grad Norm: 0.0317 +[2025-02-23 13:21:54] (step=0904400) Train Loss: 0.3209, Train Steps/Sec: 16.64, Grad Norm: 0.0405 +[2025-02-23 13:22:02] (step=0904500) Train Loss: 0.3209, Train Steps/Sec: 14.17, Grad Norm: 0.0356 +[2025-02-23 13:22:07] (step=0904600) Train Loss: 0.3216, Train Steps/Sec: 17.12, Grad Norm: 0.0357 +[2025-02-23 13:22:13] (step=0904700) Train Loss: 0.3215, Train Steps/Sec: 16.41, Grad Norm: 0.0373 +[2025-02-23 13:22:19] (step=0904800) Train Loss: 0.3212, Train Steps/Sec: 17.20, Grad Norm: 0.0355 +[2025-02-23 13:22:25] (step=0904900) Train Loss: 0.3210, Train Steps/Sec: 17.15, Grad Norm: 0.0358 +[2025-02-23 13:22:31] (step=0905000) Train Loss: 0.3207, Train Steps/Sec: 17.21, Grad Norm: 0.0369 +[2025-02-23 13:22:37] (step=0905100) Train Loss: 0.3218, Train Steps/Sec: 17.30, Grad Norm: 0.0401 +[2025-02-23 13:22:42] (step=0905200) Train Loss: 0.3216, Train Steps/Sec: 17.33, Grad Norm: 0.0348 +[2025-02-23 13:22:49] (step=0905300) Train Loss: 0.3212, Train Steps/Sec: 16.56, Grad Norm: 0.0358 +[2025-02-23 13:22:55] (step=0905400) Train Loss: 0.3217, Train Steps/Sec: 16.54, Grad Norm: 0.0367 +[2025-02-23 13:23:01] (step=0905500) Train Loss: 0.3209, Train Steps/Sec: 16.54, Grad Norm: 0.0351 +[2025-02-23 13:23:07] (step=0905600) Train Loss: 0.3210, Train Steps/Sec: 15.87, Grad Norm: 0.0340 +[2025-02-23 13:23:13] (step=0905700) Train Loss: 0.3209, Train Steps/Sec: 16.48, Grad Norm: 0.0382 +[2025-02-23 13:23:20] (step=0905800) Train Loss: 0.3205, Train Steps/Sec: 14.20, Grad Norm: 0.0379 +[2025-02-23 13:23:26] (step=0905900) Train Loss: 0.3203, Train Steps/Sec: 15.70, Grad Norm: 0.0368 +[2025-02-23 13:23:33] (step=0906000) Train Loss: 0.3214, Train Steps/Sec: 15.22, Grad Norm: 0.0339 +[2025-02-23 13:23:39] (step=0906100) Train Loss: 0.3207, Train Steps/Sec: 17.23, Grad Norm: 0.0343 +[2025-02-23 13:23:45] (step=0906200) Train Loss: 0.3212, Train Steps/Sec: 15.85, Grad Norm: 0.0340 +[2025-02-23 13:23:51] (step=0906300) Train Loss: 0.3206, Train Steps/Sec: 16.69, Grad Norm: 0.0356 +[2025-02-23 13:23:57] (step=0906400) Train Loss: 0.3215, Train Steps/Sec: 16.66, Grad Norm: 0.0342 +[2025-02-23 13:24:03] (step=0906500) Train Loss: 0.3215, Train Steps/Sec: 17.36, Grad Norm: 0.0357 +[2025-02-23 13:24:09] (step=0906600) Train Loss: 0.3212, Train Steps/Sec: 17.35, Grad Norm: 0.0364 +[2025-02-23 13:24:15] (step=0906700) Train Loss: 0.3213, Train Steps/Sec: 16.43, Grad Norm: 0.0349 +[2025-02-23 13:24:20] (step=0906800) Train Loss: 0.3213, Train Steps/Sec: 17.33, Grad Norm: 0.0354 +[2025-02-23 13:24:26] (step=0906900) Train Loss: 0.3211, Train Steps/Sec: 17.35, Grad Norm: 0.0321 +[2025-02-23 13:24:33] (step=0907000) Train Loss: 0.3208, Train Steps/Sec: 14.28, Grad Norm: 0.0354 +[2025-02-23 13:24:39] (step=0907100) Train Loss: 0.3207, Train Steps/Sec: 17.07, Grad Norm: 0.0376 +[2025-02-23 13:24:45] (step=0907200) Train Loss: 0.3216, Train Steps/Sec: 17.10, Grad Norm: 0.0330 +[2025-02-23 13:24:51] (step=0907300) Train Loss: 0.3211, Train Steps/Sec: 16.28, Grad Norm: 0.0347 +[2025-02-23 13:24:57] (step=0907400) Train Loss: 0.3206, Train Steps/Sec: 16.29, Grad Norm: 0.0365 +[2025-02-23 13:25:03] (step=0907500) Train Loss: 0.3204, Train Steps/Sec: 16.29, Grad Norm: 0.0392 +[2025-02-23 13:25:10] (step=0907600) Train Loss: 0.3215, Train Steps/Sec: 15.67, Grad Norm: 0.0357 +[2025-02-23 13:25:16] (step=0907700) Train Loss: 0.3205, Train Steps/Sec: 16.42, Grad Norm: 0.0347 +[2025-02-23 13:25:22] (step=0907800) Train Loss: 0.3214, Train Steps/Sec: 17.11, Grad Norm: 0.0371 +[2025-02-23 13:25:28] (step=0907900) Train Loss: 0.3209, Train Steps/Sec: 14.83, Grad Norm: 0.0378 +[2025-02-23 13:25:35] (step=0908000) Train Loss: 0.3212, Train Steps/Sec: 16.40, Grad Norm: 0.0365 +[2025-02-23 13:25:40] (step=0908100) Train Loss: 0.3212, Train Steps/Sec: 17.08, Grad Norm: 0.0358 +[2025-02-23 13:25:47] (step=0908200) Train Loss: 0.3208, Train Steps/Sec: 14.96, Grad Norm: 0.0356 +[2025-02-23 13:25:54] (step=0908300) Train Loss: 0.3210, Train Steps/Sec: 13.65, Grad Norm: 0.0328 +[2025-02-23 13:26:00] (step=0908400) Train Loss: 0.3211, Train Steps/Sec: 17.13, Grad Norm: 0.0389 +[2025-02-23 13:26:06] (step=0908500) Train Loss: 0.3211, Train Steps/Sec: 17.17, Grad Norm: 0.0399 +[2025-02-23 13:26:12] (step=0908600) Train Loss: 0.3208, Train Steps/Sec: 16.46, Grad Norm: 0.0342 +[2025-02-23 13:26:18] (step=0908700) Train Loss: 0.3204, Train Steps/Sec: 17.31, Grad Norm: 0.0358 +[2025-02-23 13:26:24] (step=0908800) Train Loss: 0.3210, Train Steps/Sec: 17.30, Grad Norm: 0.0345 +[2025-02-23 13:26:30] (step=0908900) Train Loss: 0.3214, Train Steps/Sec: 17.31, Grad Norm: 0.0350 +[2025-02-23 13:26:35] (step=0909000) Train Loss: 0.3208, Train Steps/Sec: 17.42, Grad Norm: 0.0341 +[2025-02-23 13:26:41] (step=0909100) Train Loss: 0.3206, Train Steps/Sec: 17.40, Grad Norm: 0.0393 +[2025-02-23 13:26:47] (step=0909200) Train Loss: 0.3212, Train Steps/Sec: 17.34, Grad Norm: 0.0369 +[2025-02-23 13:26:53] (step=0909300) Train Loss: 0.3209, Train Steps/Sec: 16.52, Grad Norm: 0.0337 +[2025-02-23 13:26:59] (step=0909400) Train Loss: 0.3209, Train Steps/Sec: 16.58, Grad Norm: 0.0373 +[2025-02-23 13:27:06] (step=0909500) Train Loss: 0.3216, Train Steps/Sec: 13.38, Grad Norm: 0.0364 +[2025-02-23 13:27:13] (step=0909600) Train Loss: 0.3209, Train Steps/Sec: 15.73, Grad Norm: 0.0337 +[2025-02-23 13:27:19] (step=0909700) Train Loss: 0.3209, Train Steps/Sec: 16.58, Grad Norm: 0.0338 +[2025-02-23 13:27:25] (step=0909800) Train Loss: 0.3213, Train Steps/Sec: 16.52, Grad Norm: 0.0388 +[2025-02-23 13:27:32] (step=0909900) Train Loss: 0.3217, Train Steps/Sec: 14.56, Grad Norm: 0.0369 +[2025-02-23 13:27:38] (step=0910000) Train Loss: 0.3215, Train Steps/Sec: 17.09, Grad Norm: 0.0351 +[2025-02-23 13:27:44] (step=0910100) Train Loss: 0.3204, Train Steps/Sec: 16.33, Grad Norm: 0.0355 +[2025-02-23 13:27:50] (step=0910200) Train Loss: 0.3209, Train Steps/Sec: 15.77, Grad Norm: 0.0329 +[2025-02-23 13:27:56] (step=0910300) Train Loss: 0.3212, Train Steps/Sec: 16.46, Grad Norm: 0.0391 +[2025-02-23 13:28:02] (step=0910400) Train Loss: 0.3210, Train Steps/Sec: 17.17, Grad Norm: 0.0333 +[2025-02-23 13:28:08] (step=0910500) Train Loss: 0.3211, Train Steps/Sec: 17.16, Grad Norm: 0.0337 +[2025-02-23 13:28:14] (step=0910600) Train Loss: 0.3211, Train Steps/Sec: 16.29, Grad Norm: 0.0352 +[2025-02-23 13:28:20] (step=0910700) Train Loss: 0.3207, Train Steps/Sec: 16.93, Grad Norm: 0.0376 +[2025-02-23 13:28:27] (step=0910800) Train Loss: 0.3201, Train Steps/Sec: 14.10, Grad Norm: 0.0347 +[2025-02-23 13:28:33] (step=0910900) Train Loss: 0.3207, Train Steps/Sec: 17.30, Grad Norm: 0.0351 +[2025-02-23 13:28:38] (step=0911000) Train Loss: 0.3207, Train Steps/Sec: 17.07, Grad Norm: 0.0361 +[2025-02-23 13:28:44] (step=0911100) Train Loss: 0.3211, Train Steps/Sec: 17.07, Grad Norm: 0.0349 +[2025-02-23 13:28:50] (step=0911200) Train Loss: 0.3211, Train Steps/Sec: 17.05, Grad Norm: 0.0389 +[2025-02-23 13:28:56] (step=0911300) Train Loss: 0.3212, Train Steps/Sec: 16.27, Grad Norm: 0.0381 +[2025-02-23 13:29:02] (step=0911400) Train Loss: 0.3207, Train Steps/Sec: 16.32, Grad Norm: 0.0358 +[2025-02-23 13:29:09] (step=0911500) Train Loss: 0.3211, Train Steps/Sec: 16.26, Grad Norm: 0.0355 +[2025-02-23 13:29:15] (step=0911600) Train Loss: 0.3211, Train Steps/Sec: 15.66, Grad Norm: 0.0352 +[2025-02-23 13:29:21] (step=0911700) Train Loss: 0.3209, Train Steps/Sec: 15.69, Grad Norm: 0.0331 +[2025-02-23 13:29:28] (step=0911800) Train Loss: 0.3209, Train Steps/Sec: 15.14, Grad Norm: 0.0387 +[2025-02-23 13:29:34] (step=0911900) Train Loss: 0.3218, Train Steps/Sec: 16.38, Grad Norm: 0.0359 +[2025-02-23 13:29:41] (step=0912000) Train Loss: 0.3211, Train Steps/Sec: 14.17, Grad Norm: 0.0358 +[2025-02-23 13:29:47] (step=0912100) Train Loss: 0.3209, Train Steps/Sec: 15.85, Grad Norm: 0.0325 +[2025-02-23 13:29:54] (step=0912200) Train Loss: 0.3217, Train Steps/Sec: 15.82, Grad Norm: 0.0362 +[2025-02-23 13:30:00] (step=0912300) Train Loss: 0.3213, Train Steps/Sec: 17.26, Grad Norm: 0.0374 +[2025-02-23 13:30:05] (step=0912400) Train Loss: 0.3213, Train Steps/Sec: 17.26, Grad Norm: 0.0365 +[2025-02-23 13:30:11] (step=0912500) Train Loss: 0.3209, Train Steps/Sec: 16.51, Grad Norm: 0.0384 +[2025-02-23 13:30:17] (step=0912600) Train Loss: 0.3208, Train Steps/Sec: 17.29, Grad Norm: 0.0380 +[2025-02-23 13:30:23] (step=0912700) Train Loss: 0.3204, Train Steps/Sec: 17.30, Grad Norm: 0.0357 +[2025-02-23 13:30:29] (step=0912800) Train Loss: 0.3206, Train Steps/Sec: 17.34, Grad Norm: 0.0361 +[2025-02-23 13:30:35] (step=0912900) Train Loss: 0.3207, Train Steps/Sec: 17.39, Grad Norm: 0.0352 +[2025-02-23 13:30:40] (step=0913000) Train Loss: 0.3212, Train Steps/Sec: 17.25, Grad Norm: 0.0372 +[2025-02-23 13:30:46] (step=0913100) Train Loss: 0.3210, Train Steps/Sec: 17.33, Grad Norm: 0.0389 +[2025-02-23 13:30:52] (step=0913200) Train Loss: 0.3208, Train Steps/Sec: 17.21, Grad Norm: 0.0351 +[2025-02-23 13:30:59] (step=0913300) Train Loss: 0.3210, Train Steps/Sec: 13.48, Grad Norm: 0.0340 +[2025-02-23 13:31:05] (step=0913400) Train Loss: 0.3210, Train Steps/Sec: 16.40, Grad Norm: 0.0364 +[2025-02-23 13:31:12] (step=0913500) Train Loss: 0.3208, Train Steps/Sec: 16.48, Grad Norm: 0.0346 +[2025-02-23 13:31:18] (step=0913600) Train Loss: 0.3212, Train Steps/Sec: 15.85, Grad Norm: 0.0319 +[2025-02-23 13:31:24] (step=0913700) Train Loss: 0.3211, Train Steps/Sec: 15.87, Grad Norm: 0.0374 +[2025-02-23 13:31:31] (step=0913800) Train Loss: 0.3211, Train Steps/Sec: 15.30, Grad Norm: 0.0367 +[2025-02-23 13:31:37] (step=0913900) Train Loss: 0.3218, Train Steps/Sec: 16.52, Grad Norm: 0.0326 +[2025-02-23 13:31:43] (step=0914000) Train Loss: 0.3209, Train Steps/Sec: 15.88, Grad Norm: 0.0339 +[2025-02-23 13:31:49] (step=0914100) Train Loss: 0.3209, Train Steps/Sec: 17.30, Grad Norm: 0.0362 +[2025-02-23 13:31:55] (step=0914200) Train Loss: 0.3211, Train Steps/Sec: 15.87, Grad Norm: 0.0365 +[2025-02-23 13:32:01] (step=0914300) Train Loss: 0.3215, Train Steps/Sec: 17.30, Grad Norm: 0.0342 +[2025-02-23 13:32:07] (step=0914400) Train Loss: 0.3209, Train Steps/Sec: 17.32, Grad Norm: 0.0356 +[2025-02-23 13:32:14] (step=0914500) Train Loss: 0.3211, Train Steps/Sec: 13.65, Grad Norm: 0.0390 +[2025-02-23 13:32:20] (step=0914600) Train Loss: 0.3206, Train Steps/Sec: 17.23, Grad Norm: 0.0306 +[2025-02-23 13:32:26] (step=0914700) Train Loss: 0.3210, Train Steps/Sec: 17.36, Grad Norm: 0.0334 +[2025-02-23 13:32:31] (step=0914800) Train Loss: 0.3214, Train Steps/Sec: 17.33, Grad Norm: 0.0397 +[2025-02-23 13:32:37] (step=0914900) Train Loss: 0.3208, Train Steps/Sec: 17.34, Grad Norm: 0.0363 +[2025-02-23 13:32:43] (step=0915000) Train Loss: 0.3214, Train Steps/Sec: 17.36, Grad Norm: 0.0351 +[2025-02-23 13:32:49] (step=0915100) Train Loss: 0.3202, Train Steps/Sec: 17.42, Grad Norm: 0.0370 +[2025-02-23 13:32:54] (step=0915200) Train Loss: 0.3216, Train Steps/Sec: 17.37, Grad Norm: 0.0345 +[2025-02-23 13:33:00] (step=0915300) Train Loss: 0.3207, Train Steps/Sec: 16.60, Grad Norm: 0.0345 +[2025-02-23 13:33:06] (step=0915400) Train Loss: 0.3211, Train Steps/Sec: 16.56, Grad Norm: 0.0341 +[2025-02-23 13:33:12] (step=0915500) Train Loss: 0.3215, Train Steps/Sec: 16.53, Grad Norm: 0.0359 +[2025-02-23 13:33:19] (step=0915600) Train Loss: 0.3214, Train Steps/Sec: 15.20, Grad Norm: 0.0362 +[2025-02-23 13:33:26] (step=0915700) Train Loss: 0.3212, Train Steps/Sec: 14.70, Grad Norm: 0.0341 +[2025-02-23 13:33:33] (step=0915800) Train Loss: 0.3215, Train Steps/Sec: 14.19, Grad Norm: 0.0309 +[2025-02-23 13:33:39] (step=0915900) Train Loss: 0.3209, Train Steps/Sec: 15.75, Grad Norm: 0.0348 +[2025-02-23 13:33:45] (step=0916000) Train Loss: 0.3206, Train Steps/Sec: 16.48, Grad Norm: 0.0395 +[2025-02-23 13:33:51] (step=0916100) Train Loss: 0.3215, Train Steps/Sec: 16.57, Grad Norm: 0.0336 +[2025-02-23 13:33:57] (step=0916200) Train Loss: 0.3211, Train Steps/Sec: 16.57, Grad Norm: 0.0392 +[2025-02-23 13:34:03] (step=0916300) Train Loss: 0.3211, Train Steps/Sec: 17.26, Grad Norm: 0.0323 +[2025-02-23 13:34:09] (step=0916400) Train Loss: 0.3210, Train Steps/Sec: 16.50, Grad Norm: 0.0358 +[2025-02-23 13:34:15] (step=0916500) Train Loss: 0.3209, Train Steps/Sec: 17.33, Grad Norm: 0.0331 +[2025-02-23 13:34:21] (step=0916600) Train Loss: 0.3207, Train Steps/Sec: 17.27, Grad Norm: 0.0331 +[2025-02-23 13:34:27] (step=0916700) Train Loss: 0.3209, Train Steps/Sec: 17.32, Grad Norm: 0.0353 +[2025-02-23 13:34:32] (step=0916800) Train Loss: 0.3212, Train Steps/Sec: 17.35, Grad Norm: 0.0332 +[2025-02-23 13:34:38] (step=0916900) Train Loss: 0.3215, Train Steps/Sec: 17.33, Grad Norm: 0.0352 +[2025-02-23 13:34:45] (step=0917000) Train Loss: 0.3205, Train Steps/Sec: 14.14, Grad Norm: 0.0379 +[2025-02-23 13:34:51] (step=0917100) Train Loss: 0.3210, Train Steps/Sec: 17.22, Grad Norm: 0.0338 +[2025-02-23 13:34:57] (step=0917200) Train Loss: 0.3205, Train Steps/Sec: 17.30, Grad Norm: 0.0353 +[2025-02-23 13:35:03] (step=0917300) Train Loss: 0.3208, Train Steps/Sec: 16.51, Grad Norm: 0.0328 +[2025-02-23 13:35:09] (step=0917400) Train Loss: 0.3210, Train Steps/Sec: 16.54, Grad Norm: 0.0354 +[2025-02-23 13:35:15] (step=0917500) Train Loss: 0.3212, Train Steps/Sec: 16.52, Grad Norm: 0.0352 +[2025-02-23 13:35:22] (step=0917600) Train Loss: 0.3210, Train Steps/Sec: 15.31, Grad Norm: 0.0334 +[2025-02-23 13:35:28] (step=0917700) Train Loss: 0.3213, Train Steps/Sec: 15.36, Grad Norm: 0.0378 +[2025-02-23 13:35:34] (step=0917800) Train Loss: 0.3217, Train Steps/Sec: 16.75, Grad Norm: 0.0377 +[2025-02-23 13:35:40] (step=0917900) Train Loss: 0.3209, Train Steps/Sec: 15.97, Grad Norm: 0.0338 +[2025-02-23 13:35:47] (step=0918000) Train Loss: 0.3212, Train Steps/Sec: 16.02, Grad Norm: 0.0334 +[2025-02-23 13:35:52] (step=0918100) Train Loss: 0.3208, Train Steps/Sec: 17.45, Grad Norm: 0.0389 +[2025-02-23 13:35:58] (step=0918200) Train Loss: 0.3206, Train Steps/Sec: 16.67, Grad Norm: 0.0353 +[2025-02-23 13:36:05] (step=0918300) Train Loss: 0.3206, Train Steps/Sec: 13.86, Grad Norm: 0.0327 +[2025-02-23 13:36:11] (step=0918400) Train Loss: 0.3210, Train Steps/Sec: 17.29, Grad Norm: 0.0399 +[2025-02-23 13:36:17] (step=0918500) Train Loss: 0.3202, Train Steps/Sec: 17.35, Grad Norm: 0.0404 +[2025-02-23 13:36:23] (step=0918600) Train Loss: 0.3214, Train Steps/Sec: 17.45, Grad Norm: 0.0386 +[2025-02-23 13:36:28] (step=0918700) Train Loss: 0.3209, Train Steps/Sec: 17.44, Grad Norm: 0.0367 +[2025-02-23 13:36:34] (step=0918800) Train Loss: 0.3207, Train Steps/Sec: 17.44, Grad Norm: 0.0338 +[2025-02-23 13:36:40] (step=0918900) Train Loss: 0.3212, Train Steps/Sec: 17.43, Grad Norm: 0.0388 +[2025-02-23 13:36:46] (step=0919000) Train Loss: 0.3207, Train Steps/Sec: 17.47, Grad Norm: 0.0376 +[2025-02-23 13:36:51] (step=0919100) Train Loss: 0.3209, Train Steps/Sec: 17.39, Grad Norm: 0.0385 +[2025-02-23 13:36:57] (step=0919200) Train Loss: 0.3207, Train Steps/Sec: 17.41, Grad Norm: 0.0360 +[2025-02-23 13:37:03] (step=0919300) Train Loss: 0.3215, Train Steps/Sec: 16.63, Grad Norm: 0.0345 +[2025-02-23 13:37:09] (step=0919400) Train Loss: 0.3213, Train Steps/Sec: 16.60, Grad Norm: 0.0348 +[2025-02-23 13:37:17] (step=0919500) Train Loss: 0.3209, Train Steps/Sec: 13.51, Grad Norm: 0.0384 +[2025-02-23 13:37:23] (step=0919600) Train Loss: 0.3212, Train Steps/Sec: 14.66, Grad Norm: 0.0371 +[2025-02-23 13:37:29] (step=0919700) Train Loss: 0.3209, Train Steps/Sec: 17.40, Grad Norm: 0.0388 +[2025-02-23 13:37:35] (step=0919800) Train Loss: 0.3206, Train Steps/Sec: 15.95, Grad Norm: 0.0355 +[2025-02-23 13:37:42] (step=0919900) Train Loss: 0.3217, Train Steps/Sec: 15.96, Grad Norm: 0.0397 +[2025-02-23 13:37:48] (step=0920000) Train Loss: 0.3208, Train Steps/Sec: 16.67, Grad Norm: 0.0362 +[2025-02-23 13:37:53] (step=0920100) Train Loss: 0.3214, Train Steps/Sec: 17.39, Grad Norm: 0.0332 +[2025-02-23 13:37:59] (step=0920200) Train Loss: 0.3208, Train Steps/Sec: 16.70, Grad Norm: 0.0330 +[2025-02-23 13:38:05] (step=0920300) Train Loss: 0.3209, Train Steps/Sec: 16.65, Grad Norm: 0.0367 +[2025-02-23 13:38:11] (step=0920400) Train Loss: 0.3208, Train Steps/Sec: 17.43, Grad Norm: 0.0339 +[2025-02-23 13:38:17] (step=0920500) Train Loss: 0.3205, Train Steps/Sec: 17.38, Grad Norm: 0.0401 +[2025-02-23 13:38:23] (step=0920600) Train Loss: 0.3217, Train Steps/Sec: 17.41, Grad Norm: 0.0367 +[2025-02-23 13:38:28] (step=0920700) Train Loss: 0.3214, Train Steps/Sec: 17.45, Grad Norm: 0.0344 +[2025-02-23 13:38:35] (step=0920800) Train Loss: 0.3212, Train Steps/Sec: 14.40, Grad Norm: 0.0347 +[2025-02-23 13:38:41] (step=0920900) Train Loss: 0.3212, Train Steps/Sec: 17.32, Grad Norm: 0.0311 +[2025-02-23 13:38:47] (step=0921000) Train Loss: 0.3207, Train Steps/Sec: 17.29, Grad Norm: 0.0338 +[2025-02-23 13:38:53] (step=0921100) Train Loss: 0.3210, Train Steps/Sec: 17.34, Grad Norm: 0.0347 +[2025-02-23 13:38:58] (step=0921200) Train Loss: 0.3218, Train Steps/Sec: 17.46, Grad Norm: 0.0388 +[2025-02-23 13:39:04] (step=0921300) Train Loss: 0.3211, Train Steps/Sec: 17.43, Grad Norm: 0.0342 +[2025-02-23 13:39:10] (step=0921400) Train Loss: 0.3209, Train Steps/Sec: 15.91, Grad Norm: 0.0375 +[2025-02-23 13:39:17] (step=0921500) Train Loss: 0.3207, Train Steps/Sec: 14.67, Grad Norm: 0.0350 +[2025-02-23 13:39:23] (step=0921600) Train Loss: 0.3207, Train Steps/Sec: 16.71, Grad Norm: 0.0362 +[2025-02-23 13:39:30] (step=0921700) Train Loss: 0.3209, Train Steps/Sec: 15.98, Grad Norm: 0.0372 +[2025-02-23 13:39:36] (step=0921800) Train Loss: 0.3206, Train Steps/Sec: 15.37, Grad Norm: 0.0347 +[2025-02-23 13:39:42] (step=0921900) Train Loss: 0.3211, Train Steps/Sec: 15.96, Grad Norm: 0.0363 +[2025-02-23 13:39:49] (step=0922000) Train Loss: 0.3210, Train Steps/Sec: 14.41, Grad Norm: 0.0381 +[2025-02-23 13:39:55] (step=0922100) Train Loss: 0.3204, Train Steps/Sec: 17.26, Grad Norm: 0.0373 +[2025-02-23 13:40:01] (step=0922200) Train Loss: 0.3207, Train Steps/Sec: 15.93, Grad Norm: 0.0382 +[2025-02-23 13:40:07] (step=0922300) Train Loss: 0.3209, Train Steps/Sec: 17.44, Grad Norm: 0.0346 +[2025-02-23 13:40:13] (step=0922400) Train Loss: 0.3216, Train Steps/Sec: 17.41, Grad Norm: 0.0382 +[2025-02-23 13:40:19] (step=0922500) Train Loss: 0.3207, Train Steps/Sec: 17.43, Grad Norm: 0.0352 +[2025-02-23 13:40:24] (step=0922600) Train Loss: 0.3207, Train Steps/Sec: 17.48, Grad Norm: 0.0383 +[2025-02-23 13:40:30] (step=0922700) Train Loss: 0.3209, Train Steps/Sec: 17.41, Grad Norm: 0.0406 +[2025-02-23 13:40:36] (step=0922800) Train Loss: 0.3214, Train Steps/Sec: 17.37, Grad Norm: 0.0340 +[2025-02-23 13:40:42] (step=0922900) Train Loss: 0.3209, Train Steps/Sec: 17.36, Grad Norm: 0.0378 +[2025-02-23 13:40:47] (step=0923000) Train Loss: 0.3209, Train Steps/Sec: 17.40, Grad Norm: 0.0387 +[2025-02-23 13:40:53] (step=0923100) Train Loss: 0.3210, Train Steps/Sec: 17.46, Grad Norm: 0.0387 +[2025-02-23 13:40:59] (step=0923200) Train Loss: 0.3211, Train Steps/Sec: 17.42, Grad Norm: 0.0345 +[2025-02-23 13:41:06] (step=0923300) Train Loss: 0.3207, Train Steps/Sec: 14.42, Grad Norm: 0.0323 +[2025-02-23 13:41:12] (step=0923400) Train Loss: 0.3210, Train Steps/Sec: 15.11, Grad Norm: 0.0354 +[2025-02-23 13:41:19] (step=0923500) Train Loss: 0.3211, Train Steps/Sec: 14.59, Grad Norm: 0.0342 +[2025-02-23 13:41:25] (step=0923600) Train Loss: 0.3212, Train Steps/Sec: 17.22, Grad Norm: 0.0341 +[2025-02-23 13:41:32] (step=0923700) Train Loss: 0.3205, Train Steps/Sec: 15.18, Grad Norm: 0.0328 +[2025-02-23 13:41:38] (step=0923800) Train Loss: 0.3214, Train Steps/Sec: 15.87, Grad Norm: 0.0348 +[2025-02-23 13:41:44] (step=0923900) Train Loss: 0.3217, Train Steps/Sec: 15.89, Grad Norm: 0.0336 +[2025-02-23 13:41:50] (step=0924000) Train Loss: 0.3208, Train Steps/Sec: 17.29, Grad Norm: 0.0334 +[2025-02-23 13:41:56] (step=0924100) Train Loss: 0.3207, Train Steps/Sec: 17.34, Grad Norm: 0.0387 +[2025-02-23 13:42:02] (step=0924200) Train Loss: 0.3205, Train Steps/Sec: 15.93, Grad Norm: 0.0377 +[2025-02-23 13:42:08] (step=0924300) Train Loss: 0.3211, Train Steps/Sec: 17.39, Grad Norm: 0.0369 +[2025-02-23 13:42:14] (step=0924400) Train Loss: 0.3210, Train Steps/Sec: 17.43, Grad Norm: 0.0396 +[2025-02-23 13:42:20] (step=0924500) Train Loss: 0.3213, Train Steps/Sec: 14.64, Grad Norm: 0.0427 +[2025-02-23 13:42:26] (step=0924600) Train Loss: 0.3207, Train Steps/Sec: 17.12, Grad Norm: 0.0330 +[2025-02-23 13:42:32] (step=0924700) Train Loss: 0.3213, Train Steps/Sec: 17.21, Grad Norm: 0.0339 +[2025-02-23 13:42:38] (step=0924800) Train Loss: 0.3211, Train Steps/Sec: 17.21, Grad Norm: 0.0340 +[2025-02-23 13:42:44] (step=0924900) Train Loss: 0.3206, Train Steps/Sec: 17.41, Grad Norm: 0.0350 +[2025-02-23 13:42:49] (step=0925000) Train Loss: 0.3211, Train Steps/Sec: 17.37, Grad Norm: 0.0378 +[2025-02-23 13:42:55] (step=0925100) Train Loss: 0.3207, Train Steps/Sec: 17.35, Grad Norm: 0.0417 +[2025-02-23 13:43:01] (step=0925200) Train Loss: 0.3210, Train Steps/Sec: 17.29, Grad Norm: 0.0429 +[2025-02-23 13:43:07] (step=0925300) Train Loss: 0.3209, Train Steps/Sec: 17.20, Grad Norm: 0.0351 +[2025-02-23 13:43:14] (step=0925400) Train Loss: 0.3215, Train Steps/Sec: 13.55, Grad Norm: 0.0349 +[2025-02-23 13:43:20] (step=0925500) Train Loss: 0.3213, Train Steps/Sec: 16.49, Grad Norm: 0.0309 +[2025-02-23 13:43:26] (step=0925600) Train Loss: 0.3203, Train Steps/Sec: 17.34, Grad Norm: 0.0352 +[2025-02-23 13:43:33] (step=0925700) Train Loss: 0.3212, Train Steps/Sec: 14.55, Grad Norm: 0.0370 +[2025-02-23 13:43:40] (step=0925800) Train Loss: 0.3207, Train Steps/Sec: 13.53, Grad Norm: 0.0342 +[2025-02-23 13:43:46] (step=0925900) Train Loss: 0.3208, Train Steps/Sec: 16.62, Grad Norm: 0.0347 +[2025-02-23 13:43:52] (step=0926000) Train Loss: 0.3211, Train Steps/Sec: 17.42, Grad Norm: 0.0345 +[2025-02-23 13:43:58] (step=0926100) Train Loss: 0.3212, Train Steps/Sec: 16.67, Grad Norm: 0.0366 +[2025-02-23 13:44:04] (step=0926200) Train Loss: 0.3215, Train Steps/Sec: 16.67, Grad Norm: 0.0357 +[2025-02-23 13:44:10] (step=0926300) Train Loss: 0.3210, Train Steps/Sec: 17.46, Grad Norm: 0.0342 +[2025-02-23 13:44:15] (step=0926400) Train Loss: 0.3206, Train Steps/Sec: 17.41, Grad Norm: 0.0330 +[2025-02-23 13:44:21] (step=0926500) Train Loss: 0.3210, Train Steps/Sec: 17.43, Grad Norm: 0.0389 +[2025-02-23 13:44:27] (step=0926600) Train Loss: 0.3206, Train Steps/Sec: 17.46, Grad Norm: 0.0370 +[2025-02-23 13:44:33] (step=0926700) Train Loss: 0.3215, Train Steps/Sec: 17.45, Grad Norm: 0.0317 +[2025-02-23 13:44:38] (step=0926800) Train Loss: 0.3211, Train Steps/Sec: 17.41, Grad Norm: 0.0377 +[2025-02-23 13:44:44] (step=0926900) Train Loss: 0.3208, Train Steps/Sec: 17.42, Grad Norm: 0.0396 +[2025-02-23 13:44:51] (step=0927000) Train Loss: 0.3209, Train Steps/Sec: 14.22, Grad Norm: 0.0385 +[2025-02-23 13:44:57] (step=0927100) Train Loss: 0.3210, Train Steps/Sec: 17.34, Grad Norm: 0.0355 +[2025-02-23 13:45:03] (step=0927200) Train Loss: 0.3206, Train Steps/Sec: 17.32, Grad Norm: 0.0398 +[2025-02-23 13:45:09] (step=0927300) Train Loss: 0.3209, Train Steps/Sec: 16.65, Grad Norm: 0.0349 +[2025-02-23 13:45:16] (step=0927400) Train Loss: 0.3211, Train Steps/Sec: 14.12, Grad Norm: 0.0371 +[2025-02-23 13:45:22] (step=0927500) Train Loss: 0.3211, Train Steps/Sec: 16.55, Grad Norm: 0.0414 +[2025-02-23 13:45:28] (step=0927600) Train Loss: 0.3209, Train Steps/Sec: 16.62, Grad Norm: 0.0393 +[2025-02-23 13:45:34] (step=0927700) Train Loss: 0.3206, Train Steps/Sec: 15.30, Grad Norm: 0.0369 +[2025-02-23 13:45:41] (step=0927800) Train Loss: 0.3212, Train Steps/Sec: 16.03, Grad Norm: 0.0343 +[2025-02-23 13:45:47] (step=0927900) Train Loss: 0.3207, Train Steps/Sec: 16.66, Grad Norm: 0.0375 +[2025-02-23 13:45:52] (step=0928000) Train Loss: 0.3212, Train Steps/Sec: 17.42, Grad Norm: 0.0333 +[2025-02-23 13:45:58] (step=0928100) Train Loss: 0.3212, Train Steps/Sec: 16.58, Grad Norm: 0.0346 +[2025-02-23 13:46:04] (step=0928200) Train Loss: 0.3210, Train Steps/Sec: 17.37, Grad Norm: 0.0327 +[2025-02-23 13:46:11] (step=0928300) Train Loss: 0.3207, Train Steps/Sec: 13.71, Grad Norm: 0.0424 +[2025-02-23 13:46:17] (step=0928400) Train Loss: 0.3210, Train Steps/Sec: 17.29, Grad Norm: 0.0363 +[2025-02-23 13:46:23] (step=0928500) Train Loss: 0.3212, Train Steps/Sec: 17.30, Grad Norm: 0.0371 +[2025-02-23 13:46:29] (step=0928600) Train Loss: 0.3213, Train Steps/Sec: 17.33, Grad Norm: 0.0334 +[2025-02-23 13:46:35] (step=0928700) Train Loss: 0.3211, Train Steps/Sec: 17.23, Grad Norm: 0.0333 +[2025-02-23 13:46:40] (step=0928800) Train Loss: 0.3210, Train Steps/Sec: 17.28, Grad Norm: 0.0343 +[2025-02-23 13:46:46] (step=0928900) Train Loss: 0.3212, Train Steps/Sec: 17.29, Grad Norm: 0.0338 +[2025-02-23 13:46:52] (step=0929000) Train Loss: 0.3205, Train Steps/Sec: 17.32, Grad Norm: 0.0355 +[2025-02-23 13:46:58] (step=0929100) Train Loss: 0.3208, Train Steps/Sec: 17.40, Grad Norm: 0.0350 +[2025-02-23 13:47:03] (step=0929200) Train Loss: 0.3211, Train Steps/Sec: 17.32, Grad Norm: 0.0354 +[2025-02-23 13:47:10] (step=0929300) Train Loss: 0.3208, Train Steps/Sec: 15.23, Grad Norm: 0.0349 +[2025-02-23 13:47:16] (step=0929400) Train Loss: 0.3207, Train Steps/Sec: 15.89, Grad Norm: 0.0342 +[2025-02-23 13:47:24] (step=0929500) Train Loss: 0.3211, Train Steps/Sec: 13.76, Grad Norm: 0.0339 +[2025-02-23 13:47:30] (step=0929600) Train Loss: 0.3209, Train Steps/Sec: 15.90, Grad Norm: 0.0341 +[2025-02-23 13:47:36] (step=0929700) Train Loss: 0.3204, Train Steps/Sec: 15.27, Grad Norm: 0.0350 +[2025-02-23 13:47:42] (step=0929800) Train Loss: 0.3211, Train Steps/Sec: 16.68, Grad Norm: 0.0335 +[2025-02-23 13:47:48] (step=0929900) Train Loss: 0.3216, Train Steps/Sec: 16.67, Grad Norm: 0.0378 +[2025-02-23 13:47:54] (step=0930000) Train Loss: 0.3209, Train Steps/Sec: 16.64, Grad Norm: 0.0346 +[2025-02-23 13:48:00] (step=0930100) Train Loss: 0.3214, Train Steps/Sec: 17.44, Grad Norm: 0.0369 +[2025-02-23 13:48:06] (step=0930200) Train Loss: 0.3214, Train Steps/Sec: 17.46, Grad Norm: 0.0372 +[2025-02-23 13:48:12] (step=0930300) Train Loss: 0.3213, Train Steps/Sec: 16.69, Grad Norm: 0.0348 +[2025-02-23 13:48:18] (step=0930400) Train Loss: 0.3210, Train Steps/Sec: 17.42, Grad Norm: 0.0352 +[2025-02-23 13:48:23] (step=0930500) Train Loss: 0.3207, Train Steps/Sec: 17.44, Grad Norm: 0.0365 +[2025-02-23 13:48:29] (step=0930600) Train Loss: 0.3208, Train Steps/Sec: 17.44, Grad Norm: 0.0330 +[2025-02-23 13:48:35] (step=0930700) Train Loss: 0.3209, Train Steps/Sec: 17.35, Grad Norm: 0.0360 +[2025-02-23 13:48:42] (step=0930800) Train Loss: 0.3204, Train Steps/Sec: 14.34, Grad Norm: 0.0363 +[2025-02-23 13:48:48] (step=0930900) Train Loss: 0.3211, Train Steps/Sec: 17.46, Grad Norm: 0.0365 +[2025-02-23 13:48:53] (step=0931000) Train Loss: 0.3213, Train Steps/Sec: 17.40, Grad Norm: 0.0364 +[2025-02-23 13:48:59] (step=0931100) Train Loss: 0.3207, Train Steps/Sec: 17.48, Grad Norm: 0.0340 +[2025-02-23 13:49:05] (step=0931200) Train Loss: 0.3212, Train Steps/Sec: 16.03, Grad Norm: 0.0375 +[2025-02-23 13:49:12] (step=0931300) Train Loss: 0.3205, Train Steps/Sec: 16.12, Grad Norm: 0.0371 +[2025-02-23 13:49:18] (step=0931400) Train Loss: 0.3207, Train Steps/Sec: 15.91, Grad Norm: 0.0389 +[2025-02-23 13:49:24] (step=0931500) Train Loss: 0.3216, Train Steps/Sec: 15.94, Grad Norm: 0.0347 +[2025-02-23 13:49:30] (step=0931600) Train Loss: 0.3204, Train Steps/Sec: 16.79, Grad Norm: 0.0358 +[2025-02-23 13:49:37] (step=0931700) Train Loss: 0.3206, Train Steps/Sec: 15.34, Grad Norm: 0.0327 +[2025-02-23 13:49:43] (step=0931800) Train Loss: 0.3217, Train Steps/Sec: 16.74, Grad Norm: 0.0338 +[2025-02-23 13:49:49] (step=0931900) Train Loss: 0.3211, Train Steps/Sec: 16.66, Grad Norm: 0.0436 +[2025-02-23 13:49:56] (step=0932000) Train Loss: 0.3213, Train Steps/Sec: 13.81, Grad Norm: 0.0369 +[2025-02-23 13:50:02] (step=0932100) Train Loss: 0.3209, Train Steps/Sec: 17.29, Grad Norm: 0.0356 +[2025-02-23 13:50:07] (step=0932200) Train Loss: 0.3212, Train Steps/Sec: 17.15, Grad Norm: 0.0392 +[2025-02-23 13:50:13] (step=0932300) Train Loss: 0.3216, Train Steps/Sec: 16.47, Grad Norm: 0.0342 +[2025-02-23 13:50:19] (step=0932400) Train Loss: 0.3211, Train Steps/Sec: 17.19, Grad Norm: 0.0328 +[2025-02-23 13:50:25] (step=0932500) Train Loss: 0.3208, Train Steps/Sec: 17.07, Grad Norm: 0.0372 +[2025-02-23 13:50:31] (step=0932600) Train Loss: 0.3211, Train Steps/Sec: 17.07, Grad Norm: 0.0372 +[2025-02-23 13:50:37] (step=0932700) Train Loss: 0.3212, Train Steps/Sec: 17.11, Grad Norm: 0.0362 +[2025-02-23 13:50:43] (step=0932800) Train Loss: 0.3206, Train Steps/Sec: 17.14, Grad Norm: 0.0336 +[2025-02-23 13:50:49] (step=0932900) Train Loss: 0.3215, Train Steps/Sec: 17.20, Grad Norm: 0.0356 +[2025-02-23 13:50:54] (step=0933000) Train Loss: 0.3208, Train Steps/Sec: 17.20, Grad Norm: 0.0352 +[2025-02-23 13:51:00] (step=0933100) Train Loss: 0.3209, Train Steps/Sec: 16.33, Grad Norm: 0.0371 +[2025-02-23 13:51:07] (step=0933200) Train Loss: 0.3211, Train Steps/Sec: 15.14, Grad Norm: 0.0383 +[2025-02-23 13:51:14] (step=0933300) Train Loss: 0.3216, Train Steps/Sec: 14.14, Grad Norm: 0.0343 +[2025-02-23 13:51:21] (step=0933400) Train Loss: 0.3212, Train Steps/Sec: 15.10, Grad Norm: 0.0346 +[2025-02-23 13:51:27] (step=0933500) Train Loss: 0.3208, Train Steps/Sec: 15.84, Grad Norm: 0.0361 +[2025-02-23 13:51:33] (step=0933600) Train Loss: 0.3211, Train Steps/Sec: 16.59, Grad Norm: 0.0317 +[2025-02-23 13:51:39] (step=0933700) Train Loss: 0.3207, Train Steps/Sec: 15.78, Grad Norm: 0.0344 +[2025-02-23 13:51:45] (step=0933800) Train Loss: 0.3217, Train Steps/Sec: 16.58, Grad Norm: 0.0335 +[2025-02-23 13:51:52] (step=0933900) Train Loss: 0.3215, Train Steps/Sec: 15.85, Grad Norm: 0.0355 +[2025-02-23 13:51:58] (step=0934000) Train Loss: 0.3210, Train Steps/Sec: 17.30, Grad Norm: 0.0365 +[2025-02-23 13:52:03] (step=0934100) Train Loss: 0.3209, Train Steps/Sec: 17.25, Grad Norm: 0.0340 +[2025-02-23 13:52:09] (step=0934200) Train Loss: 0.3211, Train Steps/Sec: 17.44, Grad Norm: 0.0331 +[2025-02-23 13:52:15] (step=0934300) Train Loss: 0.3206, Train Steps/Sec: 16.66, Grad Norm: 0.0321 +[2025-02-23 13:52:21] (step=0934400) Train Loss: 0.3209, Train Steps/Sec: 17.38, Grad Norm: 0.0359 +[2025-02-23 13:52:28] (step=0934500) Train Loss: 0.3211, Train Steps/Sec: 14.23, Grad Norm: 0.0356 +[2025-02-23 13:52:34] (step=0934600) Train Loss: 0.3203, Train Steps/Sec: 17.29, Grad Norm: 0.0403 +[2025-02-23 13:52:39] (step=0934700) Train Loss: 0.3208, Train Steps/Sec: 17.44, Grad Norm: 0.0330 +[2025-02-23 13:52:45] (step=0934800) Train Loss: 0.3206, Train Steps/Sec: 17.48, Grad Norm: 0.0373 +[2025-02-23 13:52:51] (step=0934900) Train Loss: 0.3212, Train Steps/Sec: 17.48, Grad Norm: 0.0358 +[2025-02-23 13:52:57] (step=0935000) Train Loss: 0.3208, Train Steps/Sec: 17.47, Grad Norm: 0.0392 +[2025-02-23 13:53:03] (step=0935100) Train Loss: 0.3210, Train Steps/Sec: 14.82, Grad Norm: 0.0389 +[2025-02-23 13:53:09] (step=0935200) Train Loss: 0.3211, Train Steps/Sec: 17.42, Grad Norm: 0.0349 +[2025-02-23 13:53:15] (step=0935300) Train Loss: 0.3204, Train Steps/Sec: 17.48, Grad Norm: 0.0378 +[2025-02-23 13:53:21] (step=0935400) Train Loss: 0.3206, Train Steps/Sec: 15.32, Grad Norm: 0.0326 +[2025-02-23 13:53:28] (step=0935500) Train Loss: 0.3210, Train Steps/Sec: 15.99, Grad Norm: 0.0370 +[2025-02-23 13:53:34] (step=0935600) Train Loss: 0.3213, Train Steps/Sec: 16.76, Grad Norm: 0.0332 +[2025-02-23 13:53:40] (step=0935700) Train Loss: 0.3213, Train Steps/Sec: 15.97, Grad Norm: 0.0349 +[2025-02-23 13:53:47] (step=0935800) Train Loss: 0.3210, Train Steps/Sec: 13.75, Grad Norm: 0.0335 +[2025-02-23 13:53:53] (step=0935900) Train Loss: 0.3207, Train Steps/Sec: 15.81, Grad Norm: 0.0367 +[2025-02-23 13:53:59] (step=0936000) Train Loss: 0.3205, Train Steps/Sec: 17.37, Grad Norm: 0.0356 +[2025-02-23 13:54:05] (step=0936100) Train Loss: 0.3212, Train Steps/Sec: 17.40, Grad Norm: 0.0350 +[2025-02-23 13:54:11] (step=0936200) Train Loss: 0.3215, Train Steps/Sec: 17.43, Grad Norm: 0.0358 +[2025-02-23 13:54:17] (step=0936300) Train Loss: 0.3209, Train Steps/Sec: 16.66, Grad Norm: 0.0345 +[2025-02-23 13:54:22] (step=0936400) Train Loss: 0.3212, Train Steps/Sec: 17.39, Grad Norm: 0.0352 +[2025-02-23 13:54:28] (step=0936500) Train Loss: 0.3211, Train Steps/Sec: 17.40, Grad Norm: 0.0332 +[2025-02-23 13:54:34] (step=0936600) Train Loss: 0.3208, Train Steps/Sec: 17.36, Grad Norm: 0.0378 +[2025-02-23 13:54:40] (step=0936700) Train Loss: 0.3218, Train Steps/Sec: 17.31, Grad Norm: 0.0363 +[2025-02-23 13:54:45] (step=0936800) Train Loss: 0.3210, Train Steps/Sec: 17.34, Grad Norm: 0.0365 +[2025-02-23 13:54:51] (step=0936900) Train Loss: 0.3209, Train Steps/Sec: 17.39, Grad Norm: 0.0357 +[2025-02-23 13:54:58] (step=0937000) Train Loss: 0.3206, Train Steps/Sec: 13.78, Grad Norm: 0.0379 +[2025-02-23 13:55:05] (step=0937100) Train Loss: 0.3214, Train Steps/Sec: 15.07, Grad Norm: 0.0346 +[2025-02-23 13:55:11] (step=0937200) Train Loss: 0.3206, Train Steps/Sec: 17.21, Grad Norm: 0.0374 +[2025-02-23 13:55:17] (step=0937300) Train Loss: 0.3213, Train Steps/Sec: 16.55, Grad Norm: 0.0359 +[2025-02-23 13:55:24] (step=0937400) Train Loss: 0.3202, Train Steps/Sec: 15.14, Grad Norm: 0.0341 +[2025-02-23 13:55:30] (step=0937500) Train Loss: 0.3209, Train Steps/Sec: 15.91, Grad Norm: 0.0359 +[2025-02-23 13:55:36] (step=0937600) Train Loss: 0.3207, Train Steps/Sec: 17.41, Grad Norm: 0.0336 +[2025-02-23 13:55:42] (step=0937700) Train Loss: 0.3211, Train Steps/Sec: 16.00, Grad Norm: 0.0378 +[2025-02-23 13:55:48] (step=0937800) Train Loss: 0.3210, Train Steps/Sec: 15.99, Grad Norm: 0.0338 +[2025-02-23 13:55:54] (step=0937900) Train Loss: 0.3214, Train Steps/Sec: 16.68, Grad Norm: 0.0361 +[2025-02-23 13:56:00] (step=0938000) Train Loss: 0.3214, Train Steps/Sec: 17.47, Grad Norm: 0.0318 +[2025-02-23 13:56:06] (step=0938100) Train Loss: 0.3210, Train Steps/Sec: 17.44, Grad Norm: 0.0338 +[2025-02-23 13:56:11] (step=0938200) Train Loss: 0.3212, Train Steps/Sec: 17.32, Grad Norm: 0.0337 +[2025-02-23 13:56:19] (step=0938300) Train Loss: 0.3209, Train Steps/Sec: 13.81, Grad Norm: 0.0349 +[2025-02-23 13:56:24] (step=0938400) Train Loss: 0.3208, Train Steps/Sec: 17.32, Grad Norm: 0.0409 +[2025-02-23 13:56:30] (step=0938500) Train Loss: 0.3209, Train Steps/Sec: 17.27, Grad Norm: 0.0347 +[2025-02-23 13:56:36] (step=0938600) Train Loss: 0.3204, Train Steps/Sec: 17.29, Grad Norm: 0.0325 +[2025-02-23 13:56:42] (step=0938700) Train Loss: 0.3208, Train Steps/Sec: 17.35, Grad Norm: 0.0414 +[2025-02-23 13:56:47] (step=0938800) Train Loss: 0.3211, Train Steps/Sec: 17.38, Grad Norm: 0.0378 +[2025-02-23 13:56:53] (step=0938900) Train Loss: 0.3210, Train Steps/Sec: 17.41, Grad Norm: 0.0347 +[2025-02-23 13:57:00] (step=0939000) Train Loss: 0.3212, Train Steps/Sec: 14.74, Grad Norm: 0.0400 +[2025-02-23 13:57:06] (step=0939100) Train Loss: 0.3210, Train Steps/Sec: 17.43, Grad Norm: 0.0332 +[2025-02-23 13:57:11] (step=0939200) Train Loss: 0.3214, Train Steps/Sec: 17.42, Grad Norm: 0.0414 +[2025-02-23 13:57:18] (step=0939300) Train Loss: 0.3214, Train Steps/Sec: 15.96, Grad Norm: 0.0347 +[2025-02-23 13:57:24] (step=0939400) Train Loss: 0.3202, Train Steps/Sec: 15.26, Grad Norm: 0.0395 +[2025-02-23 13:57:30] (step=0939500) Train Loss: 0.3209, Train Steps/Sec: 17.32, Grad Norm: 0.0371 +[2025-02-23 13:57:37] (step=0939600) Train Loss: 0.3206, Train Steps/Sec: 13.68, Grad Norm: 0.0369 +[2025-02-23 13:57:44] (step=0939700) Train Loss: 0.3206, Train Steps/Sec: 15.85, Grad Norm: 0.0351 +[2025-02-23 13:57:50] (step=0939800) Train Loss: 0.3200, Train Steps/Sec: 15.94, Grad Norm: 0.0340 +[2025-02-23 13:57:56] (step=0939900) Train Loss: 0.3210, Train Steps/Sec: 16.63, Grad Norm: 0.0386 +[2025-02-23 13:58:02] (step=0940000) Train Loss: 0.3213, Train Steps/Sec: 17.44, Grad Norm: 0.0329 +[2025-02-23 13:58:07] (step=0940100) Train Loss: 0.3210, Train Steps/Sec: 17.43, Grad Norm: 0.0366 +[2025-02-23 13:58:13] (step=0940200) Train Loss: 0.3208, Train Steps/Sec: 17.42, Grad Norm: 0.0363 +[2025-02-23 13:58:19] (step=0940300) Train Loss: 0.3209, Train Steps/Sec: 16.69, Grad Norm: 0.0400 +[2025-02-23 13:58:25] (step=0940400) Train Loss: 0.3215, Train Steps/Sec: 17.44, Grad Norm: 0.0383 +[2025-02-23 13:58:31] (step=0940500) Train Loss: 0.3206, Train Steps/Sec: 17.47, Grad Norm: 0.0372 +[2025-02-23 13:58:36] (step=0940600) Train Loss: 0.3208, Train Steps/Sec: 17.47, Grad Norm: 0.0374 +[2025-02-23 13:58:42] (step=0940700) Train Loss: 0.3201, Train Steps/Sec: 17.43, Grad Norm: 0.0328 +[2025-02-23 13:58:49] (step=0940800) Train Loss: 0.3209, Train Steps/Sec: 14.20, Grad Norm: 0.0358 +[2025-02-23 13:58:56] (step=0940900) Train Loss: 0.3203, Train Steps/Sec: 15.74, Grad Norm: 0.0336 +[2025-02-23 13:59:02] (step=0941000) Train Loss: 0.3206, Train Steps/Sec: 15.90, Grad Norm: 0.0398 +[2025-02-23 13:59:08] (step=0941100) Train Loss: 0.3210, Train Steps/Sec: 17.36, Grad Norm: 0.0389 +[2025-02-23 13:59:14] (step=0941200) Train Loss: 0.3206, Train Steps/Sec: 16.71, Grad Norm: 0.0348 +[2025-02-23 13:59:20] (step=0941300) Train Loss: 0.3211, Train Steps/Sec: 16.71, Grad Norm: 0.0335 +[2025-02-23 13:59:26] (step=0941400) Train Loss: 0.3208, Train Steps/Sec: 15.35, Grad Norm: 0.0354 +[2025-02-23 13:59:32] (step=0941500) Train Loss: 0.3212, Train Steps/Sec: 17.41, Grad Norm: 0.0371 +[2025-02-23 13:59:38] (step=0941600) Train Loss: 0.3206, Train Steps/Sec: 16.59, Grad Norm: 0.0358 +[2025-02-23 13:59:44] (step=0941700) Train Loss: 0.3208, Train Steps/Sec: 15.91, Grad Norm: 0.0370 +[2025-02-23 13:59:50] (step=0941800) Train Loss: 0.3206, Train Steps/Sec: 15.91, Grad Norm: 0.0352 +[2025-02-23 13:59:56] (step=0941900) Train Loss: 0.3204, Train Steps/Sec: 17.34, Grad Norm: 0.0336 +[2025-02-23 14:00:02] (step=0942000) Train Loss: 0.3213, Train Steps/Sec: 16.54, Grad Norm: 0.0349 +[2025-02-23 14:00:09] (step=0942100) Train Loss: 0.3210, Train Steps/Sec: 14.33, Grad Norm: 0.0353 +[2025-02-23 14:00:15] (step=0942200) Train Loss: 0.3213, Train Steps/Sec: 17.38, Grad Norm: 0.0324 +[2025-02-23 14:00:21] (step=0942300) Train Loss: 0.3209, Train Steps/Sec: 16.65, Grad Norm: 0.0382 +[2025-02-23 14:00:27] (step=0942400) Train Loss: 0.3211, Train Steps/Sec: 17.39, Grad Norm: 0.0354 +[2025-02-23 14:00:32] (step=0942500) Train Loss: 0.3213, Train Steps/Sec: 17.38, Grad Norm: 0.0342 +[2025-02-23 14:00:38] (step=0942600) Train Loss: 0.3203, Train Steps/Sec: 17.41, Grad Norm: 0.0387 +[2025-02-23 14:00:44] (step=0942700) Train Loss: 0.3203, Train Steps/Sec: 17.43, Grad Norm: 0.0335 +[2025-02-23 14:00:50] (step=0942800) Train Loss: 0.3209, Train Steps/Sec: 17.42, Grad Norm: 0.0347 +[2025-02-23 14:00:57] (step=0942900) Train Loss: 0.3211, Train Steps/Sec: 14.66, Grad Norm: 0.0357 +[2025-02-23 14:01:02] (step=0943000) Train Loss: 0.3211, Train Steps/Sec: 17.39, Grad Norm: 0.0381 +[2025-02-23 14:01:08] (step=0943100) Train Loss: 0.3205, Train Steps/Sec: 17.39, Grad Norm: 0.0366 +[2025-02-23 14:01:14] (step=0943200) Train Loss: 0.3209, Train Steps/Sec: 16.04, Grad Norm: 0.0358 +[2025-02-23 14:01:22] (step=0943300) Train Loss: 0.3205, Train Steps/Sec: 13.74, Grad Norm: 0.0354 +[2025-02-23 14:01:28] (step=0943400) Train Loss: 0.3206, Train Steps/Sec: 15.82, Grad Norm: 0.0348 +[2025-02-23 14:01:34] (step=0943500) Train Loss: 0.3215, Train Steps/Sec: 17.35, Grad Norm: 0.0352 +[2025-02-23 14:01:40] (step=0943600) Train Loss: 0.3207, Train Steps/Sec: 16.59, Grad Norm: 0.0339 +[2025-02-23 14:01:46] (step=0943700) Train Loss: 0.3211, Train Steps/Sec: 15.20, Grad Norm: 0.0355 +[2025-02-23 14:01:52] (step=0943800) Train Loss: 0.3205, Train Steps/Sec: 16.67, Grad Norm: 0.0321 +[2025-02-23 14:01:58] (step=0943900) Train Loss: 0.3207, Train Steps/Sec: 17.41, Grad Norm: 0.0334 +[2025-02-23 14:02:04] (step=0944000) Train Loss: 0.3211, Train Steps/Sec: 16.64, Grad Norm: 0.0336 +[2025-02-23 14:02:10] (step=0944100) Train Loss: 0.3203, Train Steps/Sec: 17.43, Grad Norm: 0.0356 +[2025-02-23 14:02:16] (step=0944200) Train Loss: 0.3214, Train Steps/Sec: 17.35, Grad Norm: 0.0345 +[2025-02-23 14:02:22] (step=0944300) Train Loss: 0.3208, Train Steps/Sec: 16.66, Grad Norm: 0.0353 +[2025-02-23 14:02:27] (step=0944400) Train Loss: 0.3211, Train Steps/Sec: 17.40, Grad Norm: 0.0361 +[2025-02-23 14:02:33] (step=0944500) Train Loss: 0.3211, Train Steps/Sec: 17.29, Grad Norm: 0.0399 +[2025-02-23 14:02:40] (step=0944600) Train Loss: 0.3208, Train Steps/Sec: 14.29, Grad Norm: 0.0341 +[2025-02-23 14:02:46] (step=0944700) Train Loss: 0.3211, Train Steps/Sec: 17.40, Grad Norm: 0.0371 +[2025-02-23 14:02:52] (step=0944800) Train Loss: 0.3209, Train Steps/Sec: 15.23, Grad Norm: 0.0392 +[2025-02-23 14:02:58] (step=0944900) Train Loss: 0.3209, Train Steps/Sec: 16.62, Grad Norm: 0.0349 +[2025-02-23 14:03:04] (step=0945000) Train Loss: 0.3209, Train Steps/Sec: 17.46, Grad Norm: 0.0341 +[2025-02-23 14:03:10] (step=0945100) Train Loss: 0.3209, Train Steps/Sec: 16.73, Grad Norm: 0.0343 +[2025-02-23 14:03:16] (step=0945200) Train Loss: 0.3202, Train Steps/Sec: 16.75, Grad Norm: 0.0323 +[2025-02-23 14:03:22] (step=0945300) Train Loss: 0.3209, Train Steps/Sec: 16.73, Grad Norm: 0.0337 +[2025-02-23 14:03:28] (step=0945400) Train Loss: 0.3210, Train Steps/Sec: 15.92, Grad Norm: 0.0369 +[2025-02-23 14:03:34] (step=0945500) Train Loss: 0.3202, Train Steps/Sec: 17.45, Grad Norm: 0.0350 +[2025-02-23 14:03:40] (step=0945600) Train Loss: 0.3203, Train Steps/Sec: 15.96, Grad Norm: 0.0318 +[2025-02-23 14:03:47] (step=0945700) Train Loss: 0.3208, Train Steps/Sec: 15.87, Grad Norm: 0.0361 +[2025-02-23 14:03:54] (step=0945800) Train Loss: 0.3205, Train Steps/Sec: 13.58, Grad Norm: 0.0402 +[2025-02-23 14:04:00] (step=0945900) Train Loss: 0.3209, Train Steps/Sec: 17.14, Grad Norm: 0.0321 +[2025-02-23 14:04:06] (step=0946000) Train Loss: 0.3210, Train Steps/Sec: 16.54, Grad Norm: 0.0347 +[2025-02-23 14:04:12] (step=0946100) Train Loss: 0.3207, Train Steps/Sec: 17.34, Grad Norm: 0.0336 +[2025-02-23 14:04:17] (step=0946200) Train Loss: 0.3213, Train Steps/Sec: 17.33, Grad Norm: 0.0326 +[2025-02-23 14:04:23] (step=0946300) Train Loss: 0.3212, Train Steps/Sec: 16.61, Grad Norm: 0.0352 +[2025-02-23 14:04:29] (step=0946400) Train Loss: 0.3215, Train Steps/Sec: 17.31, Grad Norm: 0.0327 +[2025-02-23 14:04:35] (step=0946500) Train Loss: 0.3210, Train Steps/Sec: 17.39, Grad Norm: 0.0329 +[2025-02-23 14:04:41] (step=0946600) Train Loss: 0.3206, Train Steps/Sec: 17.42, Grad Norm: 0.0315 +[2025-02-23 14:04:46] (step=0946700) Train Loss: 0.3207, Train Steps/Sec: 17.41, Grad Norm: 0.0332 +[2025-02-23 14:04:53] (step=0946800) Train Loss: 0.3209, Train Steps/Sec: 14.72, Grad Norm: 0.0385 +[2025-02-23 14:04:59] (step=0946900) Train Loss: 0.3206, Train Steps/Sec: 17.43, Grad Norm: 0.0401 +[2025-02-23 14:05:05] (step=0947000) Train Loss: 0.3213, Train Steps/Sec: 17.37, Grad Norm: 0.0377 +[2025-02-23 14:05:12] (step=0947100) Train Loss: 0.3207, Train Steps/Sec: 13.52, Grad Norm: 0.0363 +[2025-02-23 14:05:18] (step=0947200) Train Loss: 0.3208, Train Steps/Sec: 16.55, Grad Norm: 0.0315 +[2025-02-23 14:05:24] (step=0947300) Train Loss: 0.3206, Train Steps/Sec: 17.22, Grad Norm: 0.0386 +[2025-02-23 14:05:30] (step=0947400) Train Loss: 0.3207, Train Steps/Sec: 15.79, Grad Norm: 0.0320 +[2025-02-23 14:05:36] (step=0947500) Train Loss: 0.3212, Train Steps/Sec: 17.32, Grad Norm: 0.0349 +[2025-02-23 14:05:42] (step=0947600) Train Loss: 0.3211, Train Steps/Sec: 15.78, Grad Norm: 0.0357 +[2025-02-23 14:05:49] (step=0947700) Train Loss: 0.3206, Train Steps/Sec: 15.83, Grad Norm: 0.0370 +[2025-02-23 14:05:55] (step=0947800) Train Loss: 0.3212, Train Steps/Sec: 16.50, Grad Norm: 0.0352 +[2025-02-23 14:06:01] (step=0947900) Train Loss: 0.3207, Train Steps/Sec: 17.34, Grad Norm: 0.0371 +[2025-02-23 14:06:07] (step=0948000) Train Loss: 0.3208, Train Steps/Sec: 16.58, Grad Norm: 0.0372 +[2025-02-23 14:06:12] (step=0948100) Train Loss: 0.3208, Train Steps/Sec: 17.36, Grad Norm: 0.0375 +[2025-02-23 14:06:18] (step=0948200) Train Loss: 0.3211, Train Steps/Sec: 17.37, Grad Norm: 0.0369 +[2025-02-23 14:06:25] (step=0948300) Train Loss: 0.3206, Train Steps/Sec: 13.66, Grad Norm: 0.0355 +[2025-02-23 14:06:31] (step=0948400) Train Loss: 0.3205, Train Steps/Sec: 17.25, Grad Norm: 0.0339 +[2025-02-23 14:06:37] (step=0948500) Train Loss: 0.3206, Train Steps/Sec: 17.28, Grad Norm: 0.0362 +[2025-02-23 14:06:43] (step=0948600) Train Loss: 0.3207, Train Steps/Sec: 17.17, Grad Norm: 0.0379 +[2025-02-23 14:06:50] (step=0948700) Train Loss: 0.3208, Train Steps/Sec: 15.09, Grad Norm: 0.0423 +[2025-02-23 14:06:56] (step=0948800) Train Loss: 0.3211, Train Steps/Sec: 16.44, Grad Norm: 0.0323 +[2025-02-23 14:07:01] (step=0948900) Train Loss: 0.3213, Train Steps/Sec: 17.18, Grad Norm: 0.0388 +[2025-02-23 14:07:07] (step=0949000) Train Loss: 0.3207, Train Steps/Sec: 16.61, Grad Norm: 0.0352 +[2025-02-23 14:07:13] (step=0949100) Train Loss: 0.3214, Train Steps/Sec: 16.61, Grad Norm: 0.0347 +[2025-02-23 14:07:20] (step=0949200) Train Loss: 0.3211, Train Steps/Sec: 16.55, Grad Norm: 0.0338 +[2025-02-23 14:07:25] (step=0949300) Train Loss: 0.3204, Train Steps/Sec: 17.31, Grad Norm: 0.0343 +[2025-02-23 14:07:32] (step=0949400) Train Loss: 0.3208, Train Steps/Sec: 15.84, Grad Norm: 0.0335 +[2025-02-23 14:07:37] (step=0949500) Train Loss: 0.3201, Train Steps/Sec: 17.28, Grad Norm: 0.0372 +[2025-02-23 14:07:45] (step=0949600) Train Loss: 0.3207, Train Steps/Sec: 13.18, Grad Norm: 0.0341 +[2025-02-23 14:07:51] (step=0949700) Train Loss: 0.3208, Train Steps/Sec: 15.86, Grad Norm: 0.0390 +[2025-02-23 14:07:57] (step=0949800) Train Loss: 0.3208, Train Steps/Sec: 16.60, Grad Norm: 0.0358 +[2025-02-23 14:08:03] (step=0949900) Train Loss: 0.3206, Train Steps/Sec: 17.34, Grad Norm: 0.0377 +[2025-02-23 14:08:09] (step=0950000) Train Loss: 0.3206, Train Steps/Sec: 16.60, Grad Norm: 0.0309 +[2025-02-23 14:08:10] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/0950000.pt +[2025-02-23 14:08:16] (step=0950100) Train Loss: 0.3211, Train Steps/Sec: 15.16, Grad Norm: 0.0352 +[2025-02-23 14:08:22] (step=0950200) Train Loss: 0.3208, Train Steps/Sec: 17.33, Grad Norm: 0.0333 +[2025-02-23 14:08:27] (step=0950300) Train Loss: 0.3209, Train Steps/Sec: 17.33, Grad Norm: 0.0464 +[2025-02-23 14:08:33] (step=0950400) Train Loss: 0.3215, Train Steps/Sec: 16.55, Grad Norm: 0.0370 +[2025-02-23 14:08:39] (step=0950500) Train Loss: 0.3210, Train Steps/Sec: 17.32, Grad Norm: 0.0335 +[2025-02-23 14:08:45] (step=0950600) Train Loss: 0.3211, Train Steps/Sec: 16.49, Grad Norm: 0.0364 +[2025-02-23 14:08:52] (step=0950700) Train Loss: 0.3206, Train Steps/Sec: 15.24, Grad Norm: 0.0356 +[2025-02-23 14:08:59] (step=0950800) Train Loss: 0.3207, Train Steps/Sec: 14.24, Grad Norm: 0.0340 +[2025-02-23 14:09:04] (step=0950900) Train Loss: 0.3209, Train Steps/Sec: 17.41, Grad Norm: 0.0373 +[2025-02-23 14:09:11] (step=0951000) Train Loss: 0.3204, Train Steps/Sec: 15.98, Grad Norm: 0.0388 +[2025-02-23 14:09:17] (step=0951100) Train Loss: 0.3210, Train Steps/Sec: 16.61, Grad Norm: 0.0380 +[2025-02-23 14:09:23] (step=0951200) Train Loss: 0.3207, Train Steps/Sec: 17.30, Grad Norm: 0.0354 +[2025-02-23 14:09:28] (step=0951300) Train Loss: 0.3204, Train Steps/Sec: 17.33, Grad Norm: 0.0355 +[2025-02-23 14:09:35] (step=0951400) Train Loss: 0.3216, Train Steps/Sec: 15.86, Grad Norm: 0.0341 +[2025-02-23 14:09:41] (step=0951500) Train Loss: 0.3214, Train Steps/Sec: 16.60, Grad Norm: 0.0341 +[2025-02-23 14:09:47] (step=0951600) Train Loss: 0.3210, Train Steps/Sec: 16.65, Grad Norm: 0.0381 +[2025-02-23 14:09:53] (step=0951700) Train Loss: 0.3209, Train Steps/Sec: 16.66, Grad Norm: 0.0367 +[2025-02-23 14:09:59] (step=0951800) Train Loss: 0.3206, Train Steps/Sec: 16.06, Grad Norm: 0.0325 +[2025-02-23 14:10:05] (step=0951900) Train Loss: 0.3207, Train Steps/Sec: 17.45, Grad Norm: 0.0338 +[2025-02-23 14:10:11] (step=0952000) Train Loss: 0.3210, Train Steps/Sec: 16.58, Grad Norm: 0.0369 +[2025-02-23 14:10:18] (step=0952100) Train Loss: 0.3205, Train Steps/Sec: 14.32, Grad Norm: 0.0361 +[2025-02-23 14:10:23] (step=0952200) Train Loss: 0.3209, Train Steps/Sec: 17.40, Grad Norm: 0.0378 +[2025-02-23 14:10:29] (step=0952300) Train Loss: 0.3205, Train Steps/Sec: 17.43, Grad Norm: 0.0368 +[2025-02-23 14:10:35] (step=0952400) Train Loss: 0.3203, Train Steps/Sec: 16.65, Grad Norm: 0.0343 +[2025-02-23 14:10:41] (step=0952500) Train Loss: 0.3210, Train Steps/Sec: 17.41, Grad Norm: 0.0438 +[2025-02-23 14:10:48] (step=0952600) Train Loss: 0.3209, Train Steps/Sec: 14.76, Grad Norm: 0.0331 +[2025-02-23 14:10:53] (step=0952700) Train Loss: 0.3207, Train Steps/Sec: 17.32, Grad Norm: 0.0381 +[2025-02-23 14:10:59] (step=0952800) Train Loss: 0.3211, Train Steps/Sec: 17.36, Grad Norm: 0.0380 +[2025-02-23 14:11:05] (step=0952900) Train Loss: 0.3212, Train Steps/Sec: 15.96, Grad Norm: 0.0353 +[2025-02-23 14:11:12] (step=0953000) Train Loss: 0.3208, Train Steps/Sec: 16.45, Grad Norm: 0.0334 +[2025-02-23 14:11:17] (step=0953100) Train Loss: 0.3213, Train Steps/Sec: 17.10, Grad Norm: 0.0333 +[2025-02-23 14:11:23] (step=0953200) Train Loss: 0.3210, Train Steps/Sec: 17.16, Grad Norm: 0.0370 +[2025-02-23 14:11:30] (step=0953300) Train Loss: 0.3209, Train Steps/Sec: 14.07, Grad Norm: 0.0319 +[2025-02-23 14:11:37] (step=0953400) Train Loss: 0.3210, Train Steps/Sec: 15.07, Grad Norm: 0.0349 +[2025-02-23 14:11:43] (step=0953500) Train Loss: 0.3205, Train Steps/Sec: 17.30, Grad Norm: 0.0339 +[2025-02-23 14:11:49] (step=0953600) Train Loss: 0.3212, Train Steps/Sec: 16.48, Grad Norm: 0.0325 +[2025-02-23 14:11:55] (step=0953700) Train Loss: 0.3208, Train Steps/Sec: 16.30, Grad Norm: 0.0344 +[2025-02-23 14:12:01] (step=0953800) Train Loss: 0.3206, Train Steps/Sec: 15.85, Grad Norm: 0.0357 +[2025-02-23 14:12:07] (step=0953900) Train Loss: 0.3208, Train Steps/Sec: 17.35, Grad Norm: 0.0349 +[2025-02-23 14:12:13] (step=0954000) Train Loss: 0.3209, Train Steps/Sec: 16.62, Grad Norm: 0.0376 +[2025-02-23 14:12:19] (step=0954100) Train Loss: 0.3210, Train Steps/Sec: 17.30, Grad Norm: 0.0344 +[2025-02-23 14:12:25] (step=0954200) Train Loss: 0.3213, Train Steps/Sec: 17.29, Grad Norm: 0.0330 +[2025-02-23 14:12:30] (step=0954300) Train Loss: 0.3211, Train Steps/Sec: 17.42, Grad Norm: 0.0352 +[2025-02-23 14:12:36] (step=0954400) Train Loss: 0.3211, Train Steps/Sec: 16.69, Grad Norm: 0.0319 +[2025-02-23 14:12:42] (step=0954500) Train Loss: 0.3215, Train Steps/Sec: 16.57, Grad Norm: 0.0391 +[2025-02-23 14:12:50] (step=0954600) Train Loss: 0.3211, Train Steps/Sec: 13.33, Grad Norm: 0.0320 +[2025-02-23 14:12:56] (step=0954700) Train Loss: 0.3209, Train Steps/Sec: 17.37, Grad Norm: 0.0328 +[2025-02-23 14:13:01] (step=0954800) Train Loss: 0.3204, Train Steps/Sec: 17.44, Grad Norm: 0.0403 +[2025-02-23 14:13:08] (step=0954900) Train Loss: 0.3214, Train Steps/Sec: 15.96, Grad Norm: 0.0350 +[2025-02-23 14:13:14] (step=0955000) Train Loss: 0.3208, Train Steps/Sec: 16.52, Grad Norm: 0.0343 +[2025-02-23 14:13:20] (step=0955100) Train Loss: 0.3215, Train Steps/Sec: 17.31, Grad Norm: 0.0380 +[2025-02-23 14:13:25] (step=0955200) Train Loss: 0.3207, Train Steps/Sec: 17.29, Grad Norm: 0.0337 +[2025-02-23 14:13:31] (step=0955300) Train Loss: 0.3207, Train Steps/Sec: 17.33, Grad Norm: 0.0388 +[2025-02-23 14:13:37] (step=0955400) Train Loss: 0.3206, Train Steps/Sec: 15.86, Grad Norm: 0.0348 +[2025-02-23 14:13:43] (step=0955500) Train Loss: 0.3215, Train Steps/Sec: 16.56, Grad Norm: 0.0325 +[2025-02-23 14:13:49] (step=0955600) Train Loss: 0.3220, Train Steps/Sec: 16.46, Grad Norm: 0.0384 +[2025-02-23 14:13:56] (step=0955700) Train Loss: 0.3208, Train Steps/Sec: 16.50, Grad Norm: 0.0328 +[2025-02-23 14:14:03] (step=0955800) Train Loss: 0.3213, Train Steps/Sec: 13.22, Grad Norm: 0.0382 +[2025-02-23 14:14:09] (step=0955900) Train Loss: 0.3206, Train Steps/Sec: 17.20, Grad Norm: 0.0329 +[2025-02-23 14:14:15] (step=0956000) Train Loss: 0.3215, Train Steps/Sec: 16.51, Grad Norm: 0.0317 +[2025-02-23 14:14:21] (step=0956100) Train Loss: 0.3207, Train Steps/Sec: 17.33, Grad Norm: 0.0353 +[2025-02-23 14:14:27] (step=0956200) Train Loss: 0.3203, Train Steps/Sec: 17.39, Grad Norm: 0.0333 +[2025-02-23 14:14:32] (step=0956300) Train Loss: 0.3205, Train Steps/Sec: 17.40, Grad Norm: 0.0360 +[2025-02-23 14:14:38] (step=0956400) Train Loss: 0.3213, Train Steps/Sec: 16.62, Grad Norm: 0.0309 +[2025-02-23 14:14:45] (step=0956500) Train Loss: 0.3213, Train Steps/Sec: 14.67, Grad Norm: 0.0352 +[2025-02-23 14:14:51] (step=0956600) Train Loss: 0.3206, Train Steps/Sec: 17.34, Grad Norm: 0.0357 +[2025-02-23 14:14:57] (step=0956700) Train Loss: 0.3206, Train Steps/Sec: 17.32, Grad Norm: 0.0336 +[2025-02-23 14:15:03] (step=0956800) Train Loss: 0.3212, Train Steps/Sec: 15.95, Grad Norm: 0.0355 +[2025-02-23 14:15:09] (step=0956900) Train Loss: 0.3210, Train Steps/Sec: 16.63, Grad Norm: 0.0393 +[2025-02-23 14:15:15] (step=0957000) Train Loss: 0.3213, Train Steps/Sec: 17.32, Grad Norm: 0.0368 +[2025-02-23 14:15:22] (step=0957100) Train Loss: 0.3204, Train Steps/Sec: 14.26, Grad Norm: 0.0331 +[2025-02-23 14:15:27] (step=0957200) Train Loss: 0.3206, Train Steps/Sec: 17.42, Grad Norm: 0.0333 +[2025-02-23 14:15:33] (step=0957300) Train Loss: 0.3205, Train Steps/Sec: 16.62, Grad Norm: 0.0367 +[2025-02-23 14:15:39] (step=0957400) Train Loss: 0.3213, Train Steps/Sec: 16.65, Grad Norm: 0.0362 +[2025-02-23 14:15:46] (step=0957500) Train Loss: 0.3212, Train Steps/Sec: 16.65, Grad Norm: 0.0316 +[2025-02-23 14:15:52] (step=0957600) Train Loss: 0.3205, Train Steps/Sec: 16.65, Grad Norm: 0.0333 +[2025-02-23 14:15:58] (step=0957700) Train Loss: 0.3208, Train Steps/Sec: 16.52, Grad Norm: 0.0318 +[2025-02-23 14:16:04] (step=0957800) Train Loss: 0.3207, Train Steps/Sec: 16.63, Grad Norm: 0.0330 +[2025-02-23 14:16:10] (step=0957900) Train Loss: 0.3209, Train Steps/Sec: 16.56, Grad Norm: 0.0344 +[2025-02-23 14:16:16] (step=0958000) Train Loss: 0.3209, Train Steps/Sec: 16.69, Grad Norm: 0.0347 +[2025-02-23 14:16:21] (step=0958100) Train Loss: 0.3210, Train Steps/Sec: 17.53, Grad Norm: 0.0335 +[2025-02-23 14:16:27] (step=0958200) Train Loss: 0.3210, Train Steps/Sec: 17.49, Grad Norm: 0.0330 +[2025-02-23 14:16:34] (step=0958300) Train Loss: 0.3209, Train Steps/Sec: 14.42, Grad Norm: 0.0375 +[2025-02-23 14:16:41] (step=0958400) Train Loss: 0.3204, Train Steps/Sec: 15.24, Grad Norm: 0.0382 +[2025-02-23 14:16:47] (step=0958500) Train Loss: 0.3204, Train Steps/Sec: 15.82, Grad Norm: 0.0327 +[2025-02-23 14:16:53] (step=0958600) Train Loss: 0.3208, Train Steps/Sec: 17.41, Grad Norm: 0.0356 +[2025-02-23 14:16:58] (step=0958700) Train Loss: 0.3209, Train Steps/Sec: 17.40, Grad Norm: 0.0375 +[2025-02-23 14:17:05] (step=0958800) Train Loss: 0.3202, Train Steps/Sec: 16.00, Grad Norm: 0.0360 +[2025-02-23 14:17:11] (step=0958900) Train Loss: 0.3213, Train Steps/Sec: 16.54, Grad Norm: 0.0344 +[2025-02-23 14:17:16] (step=0959000) Train Loss: 0.3209, Train Steps/Sec: 17.41, Grad Norm: 0.0354 +[2025-02-23 14:17:22] (step=0959100) Train Loss: 0.3208, Train Steps/Sec: 17.33, Grad Norm: 0.0355 +[2025-02-23 14:17:28] (step=0959200) Train Loss: 0.3209, Train Steps/Sec: 17.37, Grad Norm: 0.0303 +[2025-02-23 14:17:34] (step=0959300) Train Loss: 0.3209, Train Steps/Sec: 16.58, Grad Norm: 0.0361 +[2025-02-23 14:17:40] (step=0959400) Train Loss: 0.3216, Train Steps/Sec: 17.38, Grad Norm: 0.0348 +[2025-02-23 14:17:46] (step=0959500) Train Loss: 0.3206, Train Steps/Sec: 15.72, Grad Norm: 0.0345 +[2025-02-23 14:17:53] (step=0959600) Train Loss: 0.3209, Train Steps/Sec: 14.00, Grad Norm: 0.0361 +[2025-02-23 14:17:59] (step=0959700) Train Loss: 0.3205, Train Steps/Sec: 16.61, Grad Norm: 0.0385 +[2025-02-23 14:18:05] (step=0959800) Train Loss: 0.3203, Train Steps/Sec: 16.59, Grad Norm: 0.0373 +[2025-02-23 14:18:11] (step=0959900) Train Loss: 0.3211, Train Steps/Sec: 16.69, Grad Norm: 0.0412 +[2025-02-23 14:18:17] (step=0960000) Train Loss: 0.3206, Train Steps/Sec: 16.67, Grad Norm: 0.0388 +[2025-02-23 14:18:23] (step=0960100) Train Loss: 0.3213, Train Steps/Sec: 17.43, Grad Norm: 0.0328 +[2025-02-23 14:18:29] (step=0960200) Train Loss: 0.3206, Train Steps/Sec: 17.38, Grad Norm: 0.0323 +[2025-02-23 14:18:35] (step=0960300) Train Loss: 0.3206, Train Steps/Sec: 16.61, Grad Norm: 0.0352 +[2025-02-23 14:18:42] (step=0960400) Train Loss: 0.3201, Train Steps/Sec: 14.78, Grad Norm: 0.0348 +[2025-02-23 14:18:47] (step=0960500) Train Loss: 0.3214, Train Steps/Sec: 17.48, Grad Norm: 0.0337 +[2025-02-23 14:18:53] (step=0960600) Train Loss: 0.3206, Train Steps/Sec: 17.47, Grad Norm: 0.0363 +[2025-02-23 14:18:59] (step=0960700) Train Loss: 0.3211, Train Steps/Sec: 16.05, Grad Norm: 0.0336 +[2025-02-23 14:19:06] (step=0960800) Train Loss: 0.3211, Train Steps/Sec: 13.97, Grad Norm: 0.0346 +[2025-02-23 14:19:12] (step=0960900) Train Loss: 0.3206, Train Steps/Sec: 17.39, Grad Norm: 0.0363 +[2025-02-23 14:19:18] (step=0961000) Train Loss: 0.3207, Train Steps/Sec: 17.35, Grad Norm: 0.0339 +[2025-02-23 14:19:24] (step=0961100) Train Loss: 0.3208, Train Steps/Sec: 17.42, Grad Norm: 0.0381 +[2025-02-23 14:19:30] (step=0961200) Train Loss: 0.3206, Train Steps/Sec: 16.69, Grad Norm: 0.0350 +[2025-02-23 14:19:35] (step=0961300) Train Loss: 0.3212, Train Steps/Sec: 17.44, Grad Norm: 0.0342 +[2025-02-23 14:19:41] (step=0961400) Train Loss: 0.3208, Train Steps/Sec: 17.45, Grad Norm: 0.0375 +[2025-02-23 14:19:47] (step=0961500) Train Loss: 0.3209, Train Steps/Sec: 15.86, Grad Norm: 0.0383 +[2025-02-23 14:19:53] (step=0961600) Train Loss: 0.3204, Train Steps/Sec: 16.60, Grad Norm: 0.0331 +[2025-02-23 14:20:00] (step=0961700) Train Loss: 0.3213, Train Steps/Sec: 16.53, Grad Norm: 0.0362 +[2025-02-23 14:20:06] (step=0961800) Train Loss: 0.3212, Train Steps/Sec: 16.65, Grad Norm: 0.0354 +[2025-02-23 14:20:11] (step=0961900) Train Loss: 0.3212, Train Steps/Sec: 16.73, Grad Norm: 0.0370 +[2025-02-23 14:20:18] (step=0962000) Train Loss: 0.3216, Train Steps/Sec: 16.62, Grad Norm: 0.0359 +[2025-02-23 14:20:24] (step=0962100) Train Loss: 0.3209, Train Steps/Sec: 14.57, Grad Norm: 0.0419 +[2025-02-23 14:20:30] (step=0962200) Train Loss: 0.3211, Train Steps/Sec: 17.38, Grad Norm: 0.0328 +[2025-02-23 14:20:37] (step=0962300) Train Loss: 0.3208, Train Steps/Sec: 15.30, Grad Norm: 0.0358 +[2025-02-23 14:20:43] (step=0962400) Train Loss: 0.3208, Train Steps/Sec: 15.96, Grad Norm: 0.0327 +[2025-02-23 14:20:49] (step=0962500) Train Loss: 0.3211, Train Steps/Sec: 17.41, Grad Norm: 0.0407 +[2025-02-23 14:20:55] (step=0962600) Train Loss: 0.3204, Train Steps/Sec: 16.72, Grad Norm: 0.0341 +[2025-02-23 14:21:01] (step=0962700) Train Loss: 0.3209, Train Steps/Sec: 16.71, Grad Norm: 0.0358 +[2025-02-23 14:21:07] (step=0962800) Train Loss: 0.3215, Train Steps/Sec: 16.65, Grad Norm: 0.0336 +[2025-02-23 14:21:12] (step=0962900) Train Loss: 0.3214, Train Steps/Sec: 17.42, Grad Norm: 0.0372 +[2025-02-23 14:21:18] (step=0963000) Train Loss: 0.3213, Train Steps/Sec: 17.43, Grad Norm: 0.0354 +[2025-02-23 14:21:24] (step=0963100) Train Loss: 0.3204, Train Steps/Sec: 17.32, Grad Norm: 0.0347 +[2025-02-23 14:21:30] (step=0963200) Train Loss: 0.3211, Train Steps/Sec: 16.44, Grad Norm: 0.0406 +[2025-02-23 14:21:37] (step=0963300) Train Loss: 0.3213, Train Steps/Sec: 14.40, Grad Norm: 0.0334 +[2025-02-23 14:21:43] (step=0963400) Train Loss: 0.3211, Train Steps/Sec: 17.25, Grad Norm: 0.0360 +[2025-02-23 14:21:49] (step=0963500) Train Loss: 0.3205, Train Steps/Sec: 15.66, Grad Norm: 0.0343 +[2025-02-23 14:21:55] (step=0963600) Train Loss: 0.3211, Train Steps/Sec: 16.52, Grad Norm: 0.0372 +[2025-02-23 14:22:01] (step=0963700) Train Loss: 0.3204, Train Steps/Sec: 16.41, Grad Norm: 0.0372 +[2025-02-23 14:22:07] (step=0963800) Train Loss: 0.3216, Train Steps/Sec: 16.44, Grad Norm: 0.0353 +[2025-02-23 14:22:13] (step=0963900) Train Loss: 0.3203, Train Steps/Sec: 16.42, Grad Norm: 0.0343 +[2025-02-23 14:22:20] (step=0964000) Train Loss: 0.3210, Train Steps/Sec: 16.54, Grad Norm: 0.0369 +[2025-02-23 14:22:25] (step=0964100) Train Loss: 0.3210, Train Steps/Sec: 17.35, Grad Norm: 0.0342 +[2025-02-23 14:22:31] (step=0964200) Train Loss: 0.3203, Train Steps/Sec: 16.61, Grad Norm: 0.0382 +[2025-02-23 14:22:38] (step=0964300) Train Loss: 0.3219, Train Steps/Sec: 15.39, Grad Norm: 0.0407 +[2025-02-23 14:22:44] (step=0964400) Train Loss: 0.3204, Train Steps/Sec: 16.67, Grad Norm: 0.0383 +[2025-02-23 14:22:50] (step=0964500) Train Loss: 0.3211, Train Steps/Sec: 17.25, Grad Norm: 0.0343 +[2025-02-23 14:22:57] (step=0964600) Train Loss: 0.3207, Train Steps/Sec: 13.29, Grad Norm: 0.0368 +[2025-02-23 14:23:03] (step=0964700) Train Loss: 0.3205, Train Steps/Sec: 16.64, Grad Norm: 0.0323 +[2025-02-23 14:23:09] (step=0964800) Train Loss: 0.3209, Train Steps/Sec: 17.41, Grad Norm: 0.0369 +[2025-02-23 14:23:15] (step=0964900) Train Loss: 0.3211, Train Steps/Sec: 17.39, Grad Norm: 0.0353 +[2025-02-23 14:23:20] (step=0965000) Train Loss: 0.3206, Train Steps/Sec: 17.40, Grad Norm: 0.0330 +[2025-02-23 14:23:26] (step=0965100) Train Loss: 0.3204, Train Steps/Sec: 16.68, Grad Norm: 0.0337 +[2025-02-23 14:23:32] (step=0965200) Train Loss: 0.3209, Train Steps/Sec: 17.41, Grad Norm: 0.0336 +[2025-02-23 14:23:38] (step=0965300) Train Loss: 0.3209, Train Steps/Sec: 17.39, Grad Norm: 0.0363 +[2025-02-23 14:23:44] (step=0965400) Train Loss: 0.3212, Train Steps/Sec: 17.40, Grad Norm: 0.0366 +[2025-02-23 14:23:50] (step=0965500) Train Loss: 0.3213, Train Steps/Sec: 16.57, Grad Norm: 0.0345 +[2025-02-23 14:23:56] (step=0965600) Train Loss: 0.3206, Train Steps/Sec: 16.67, Grad Norm: 0.0364 +[2025-02-23 14:24:02] (step=0965700) Train Loss: 0.3210, Train Steps/Sec: 16.54, Grad Norm: 0.0378 +[2025-02-23 14:24:09] (step=0965800) Train Loss: 0.3204, Train Steps/Sec: 13.68, Grad Norm: 0.0372 +[2025-02-23 14:24:15] (step=0965900) Train Loss: 0.3208, Train Steps/Sec: 16.59, Grad Norm: 0.0391 +[2025-02-23 14:24:21] (step=0966000) Train Loss: 0.3207, Train Steps/Sec: 16.65, Grad Norm: 0.0345 +[2025-02-23 14:24:27] (step=0966100) Train Loss: 0.3210, Train Steps/Sec: 17.43, Grad Norm: 0.0350 +[2025-02-23 14:24:34] (step=0966200) Train Loss: 0.3208, Train Steps/Sec: 14.79, Grad Norm: 0.0329 +[2025-02-23 14:24:39] (step=0966300) Train Loss: 0.3206, Train Steps/Sec: 17.42, Grad Norm: 0.0387 +[2025-02-23 14:24:45] (step=0966400) Train Loss: 0.3208, Train Steps/Sec: 16.63, Grad Norm: 0.0331 +[2025-02-23 14:24:51] (step=0966500) Train Loss: 0.3210, Train Steps/Sec: 16.68, Grad Norm: 0.0382 +[2025-02-23 14:24:57] (step=0966600) Train Loss: 0.3210, Train Steps/Sec: 16.66, Grad Norm: 0.0327 +[2025-02-23 14:25:03] (step=0966700) Train Loss: 0.3210, Train Steps/Sec: 16.59, Grad Norm: 0.0361 +[2025-02-23 14:25:09] (step=0966800) Train Loss: 0.3203, Train Steps/Sec: 17.32, Grad Norm: 0.0339 +[2025-02-23 14:25:15] (step=0966900) Train Loss: 0.3206, Train Steps/Sec: 17.29, Grad Norm: 0.0348 +[2025-02-23 14:25:21] (step=0967000) Train Loss: 0.3208, Train Steps/Sec: 17.16, Grad Norm: 0.0349 +[2025-02-23 14:25:28] (step=0967100) Train Loss: 0.3202, Train Steps/Sec: 13.80, Grad Norm: 0.0390 +[2025-02-23 14:25:34] (step=0967200) Train Loss: 0.3203, Train Steps/Sec: 17.46, Grad Norm: 0.0329 +[2025-02-23 14:25:39] (step=0967300) Train Loss: 0.3206, Train Steps/Sec: 17.50, Grad Norm: 0.0334 +[2025-02-23 14:25:45] (step=0967400) Train Loss: 0.3206, Train Steps/Sec: 17.40, Grad Norm: 0.0345 +[2025-02-23 14:25:51] (step=0967500) Train Loss: 0.3207, Train Steps/Sec: 16.56, Grad Norm: 0.0355 +[2025-02-23 14:25:57] (step=0967600) Train Loss: 0.3208, Train Steps/Sec: 16.66, Grad Norm: 0.0379 +[2025-02-23 14:26:03] (step=0967700) Train Loss: 0.3215, Train Steps/Sec: 16.64, Grad Norm: 0.0338 +[2025-02-23 14:26:09] (step=0967800) Train Loss: 0.3217, Train Steps/Sec: 16.72, Grad Norm: 0.0349 +[2025-02-23 14:26:15] (step=0967900) Train Loss: 0.3210, Train Steps/Sec: 16.72, Grad Norm: 0.0342 +[2025-02-23 14:26:21] (step=0968000) Train Loss: 0.3208, Train Steps/Sec: 16.69, Grad Norm: 0.0330 +[2025-02-23 14:26:27] (step=0968100) Train Loss: 0.3210, Train Steps/Sec: 16.74, Grad Norm: 0.0352 +[2025-02-23 14:26:34] (step=0968200) Train Loss: 0.3206, Train Steps/Sec: 15.40, Grad Norm: 0.0339 +[2025-02-23 14:26:41] (step=0968300) Train Loss: 0.3212, Train Steps/Sec: 14.56, Grad Norm: 0.0367 +[2025-02-23 14:26:47] (step=0968400) Train Loss: 0.3212, Train Steps/Sec: 16.70, Grad Norm: 0.0332 +[2025-02-23 14:26:53] (step=0968500) Train Loss: 0.3204, Train Steps/Sec: 16.06, Grad Norm: 0.0368 +[2025-02-23 14:26:59] (step=0968600) Train Loss: 0.3206, Train Steps/Sec: 16.69, Grad Norm: 0.0316 +[2025-02-23 14:27:05] (step=0968700) Train Loss: 0.3201, Train Steps/Sec: 17.35, Grad Norm: 0.0350 +[2025-02-23 14:27:10] (step=0968800) Train Loss: 0.3205, Train Steps/Sec: 17.32, Grad Norm: 0.0338 +[2025-02-23 14:27:16] (step=0968900) Train Loss: 0.3212, Train Steps/Sec: 17.39, Grad Norm: 0.0356 +[2025-02-23 14:27:22] (step=0969000) Train Loss: 0.3211, Train Steps/Sec: 16.64, Grad Norm: 0.0354 +[2025-02-23 14:27:28] (step=0969100) Train Loss: 0.3208, Train Steps/Sec: 17.41, Grad Norm: 0.0400 +[2025-02-23 14:27:34] (step=0969200) Train Loss: 0.3206, Train Steps/Sec: 17.44, Grad Norm: 0.0333 +[2025-02-23 14:27:39] (step=0969300) Train Loss: 0.3202, Train Steps/Sec: 17.39, Grad Norm: 0.0389 +[2025-02-23 14:27:45] (step=0969400) Train Loss: 0.3210, Train Steps/Sec: 17.41, Grad Norm: 0.0349 +[2025-02-23 14:27:51] (step=0969500) Train Loss: 0.3207, Train Steps/Sec: 15.80, Grad Norm: 0.0363 +[2025-02-23 14:27:59] (step=0969600) Train Loss: 0.3216, Train Steps/Sec: 13.90, Grad Norm: 0.0369 +[2025-02-23 14:28:05] (step=0969700) Train Loss: 0.3202, Train Steps/Sec: 16.56, Grad Norm: 0.0322 +[2025-02-23 14:28:11] (step=0969800) Train Loss: 0.3207, Train Steps/Sec: 16.58, Grad Norm: 0.0360 +[2025-02-23 14:28:17] (step=0969900) Train Loss: 0.3208, Train Steps/Sec: 16.68, Grad Norm: 0.0353 +[2025-02-23 14:28:23] (step=0970000) Train Loss: 0.3207, Train Steps/Sec: 16.58, Grad Norm: 0.0360 +[2025-02-23 14:28:30] (step=0970100) Train Loss: 0.3209, Train Steps/Sec: 14.50, Grad Norm: 0.0348 +[2025-02-23 14:28:35] (step=0970200) Train Loss: 0.3212, Train Steps/Sec: 17.26, Grad Norm: 0.0343 +[2025-02-23 14:28:41] (step=0970300) Train Loss: 0.3204, Train Steps/Sec: 17.25, Grad Norm: 0.0380 +[2025-02-23 14:28:47] (step=0970400) Train Loss: 0.3213, Train Steps/Sec: 15.79, Grad Norm: 0.0331 +[2025-02-23 14:28:54] (step=0970500) Train Loss: 0.3210, Train Steps/Sec: 16.40, Grad Norm: 0.0367 +[2025-02-23 14:29:00] (step=0970600) Train Loss: 0.3206, Train Steps/Sec: 16.44, Grad Norm: 0.0347 +[2025-02-23 14:29:05] (step=0970700) Train Loss: 0.3207, Train Steps/Sec: 17.17, Grad Norm: 0.0374 +[2025-02-23 14:29:12] (step=0970800) Train Loss: 0.3203, Train Steps/Sec: 14.33, Grad Norm: 0.0324 +[2025-02-23 14:29:18] (step=0970900) Train Loss: 0.3214, Train Steps/Sec: 17.30, Grad Norm: 0.0323 +[2025-02-23 14:29:24] (step=0971000) Train Loss: 0.3208, Train Steps/Sec: 16.53, Grad Norm: 0.0360 +[2025-02-23 14:29:30] (step=0971100) Train Loss: 0.3207, Train Steps/Sec: 17.31, Grad Norm: 0.0332 +[2025-02-23 14:29:36] (step=0971200) Train Loss: 0.3210, Train Steps/Sec: 17.32, Grad Norm: 0.0355 +[2025-02-23 14:29:42] (step=0971300) Train Loss: 0.3212, Train Steps/Sec: 17.23, Grad Norm: 0.0393 +[2025-02-23 14:29:47] (step=0971400) Train Loss: 0.3207, Train Steps/Sec: 17.19, Grad Norm: 0.0341 +[2025-02-23 14:29:54] (step=0971500) Train Loss: 0.3208, Train Steps/Sec: 15.84, Grad Norm: 0.0345 +[2025-02-23 14:30:00] (step=0971600) Train Loss: 0.3210, Train Steps/Sec: 16.57, Grad Norm: 0.0343 +[2025-02-23 14:30:06] (step=0971700) Train Loss: 0.3200, Train Steps/Sec: 16.47, Grad Norm: 0.0332 +[2025-02-23 14:30:12] (step=0971800) Train Loss: 0.3205, Train Steps/Sec: 16.57, Grad Norm: 0.0363 +[2025-02-23 14:30:18] (step=0971900) Train Loss: 0.3203, Train Steps/Sec: 16.57, Grad Norm: 0.0413 +[2025-02-23 14:30:24] (step=0972000) Train Loss: 0.3212, Train Steps/Sec: 15.73, Grad Norm: 0.0363 +[2025-02-23 14:30:32] (step=0972100) Train Loss: 0.3209, Train Steps/Sec: 12.61, Grad Norm: 0.0342 +[2025-02-23 14:30:38] (step=0972200) Train Loss: 0.3209, Train Steps/Sec: 17.35, Grad Norm: 0.0373 +[2025-02-23 14:30:44] (step=0972300) Train Loss: 0.3206, Train Steps/Sec: 17.35, Grad Norm: 0.0359 +[2025-02-23 14:30:50] (step=0972400) Train Loss: 0.3205, Train Steps/Sec: 15.34, Grad Norm: 0.0335 +[2025-02-23 14:30:56] (step=0972500) Train Loss: 0.3205, Train Steps/Sec: 16.64, Grad Norm: 0.0339 +[2025-02-23 14:31:02] (step=0972600) Train Loss: 0.3213, Train Steps/Sec: 17.32, Grad Norm: 0.0311 +[2025-02-23 14:31:08] (step=0972700) Train Loss: 0.3207, Train Steps/Sec: 17.37, Grad Norm: 0.0343 +[2025-02-23 14:31:14] (step=0972800) Train Loss: 0.3203, Train Steps/Sec: 17.35, Grad Norm: 0.0388 +[2025-02-23 14:31:20] (step=0972900) Train Loss: 0.3202, Train Steps/Sec: 16.60, Grad Norm: 0.0377 +[2025-02-23 14:31:25] (step=0973000) Train Loss: 0.3206, Train Steps/Sec: 17.36, Grad Norm: 0.0364 +[2025-02-23 14:31:31] (step=0973100) Train Loss: 0.3205, Train Steps/Sec: 17.31, Grad Norm: 0.0339 +[2025-02-23 14:31:37] (step=0973200) Train Loss: 0.3208, Train Steps/Sec: 17.30, Grad Norm: 0.0354 +[2025-02-23 14:31:44] (step=0973300) Train Loss: 0.3209, Train Steps/Sec: 14.36, Grad Norm: 0.0350 +[2025-02-23 14:31:50] (step=0973400) Train Loss: 0.3212, Train Steps/Sec: 17.13, Grad Norm: 0.0327 +[2025-02-23 14:31:56] (step=0973500) Train Loss: 0.3204, Train Steps/Sec: 15.67, Grad Norm: 0.0350 +[2025-02-23 14:32:02] (step=0973600) Train Loss: 0.3212, Train Steps/Sec: 17.35, Grad Norm: 0.0375 +[2025-02-23 14:32:08] (step=0973700) Train Loss: 0.3212, Train Steps/Sec: 15.90, Grad Norm: 0.0371 +[2025-02-23 14:32:14] (step=0973800) Train Loss: 0.3206, Train Steps/Sec: 16.65, Grad Norm: 0.0373 +[2025-02-23 14:32:20] (step=0973900) Train Loss: 0.3212, Train Steps/Sec: 16.70, Grad Norm: 0.0401 +[2025-02-23 14:32:27] (step=0974000) Train Loss: 0.3211, Train Steps/Sec: 14.13, Grad Norm: 0.0358 +[2025-02-23 14:32:33] (step=0974100) Train Loss: 0.3211, Train Steps/Sec: 17.44, Grad Norm: 0.0351 +[2025-02-23 14:32:39] (step=0974200) Train Loss: 0.3208, Train Steps/Sec: 17.42, Grad Norm: 0.0345 +[2025-02-23 14:32:45] (step=0974300) Train Loss: 0.3214, Train Steps/Sec: 16.00, Grad Norm: 0.0365 +[2025-02-23 14:32:51] (step=0974400) Train Loss: 0.3210, Train Steps/Sec: 16.66, Grad Norm: 0.0362 +[2025-02-23 14:32:57] (step=0974500) Train Loss: 0.3212, Train Steps/Sec: 16.53, Grad Norm: 0.0378 +[2025-02-23 14:33:04] (step=0974600) Train Loss: 0.3210, Train Steps/Sec: 14.44, Grad Norm: 0.0354 +[2025-02-23 14:33:10] (step=0974700) Train Loss: 0.3207, Train Steps/Sec: 17.29, Grad Norm: 0.0371 +[2025-02-23 14:33:16] (step=0974800) Train Loss: 0.3205, Train Steps/Sec: 16.63, Grad Norm: 0.0373 +[2025-02-23 14:33:22] (step=0974900) Train Loss: 0.3209, Train Steps/Sec: 17.42, Grad Norm: 0.0386 +[2025-02-23 14:33:27] (step=0975000) Train Loss: 0.3204, Train Steps/Sec: 17.37, Grad Norm: 0.0332 +[2025-02-23 14:33:33] (step=0975100) Train Loss: 0.3213, Train Steps/Sec: 17.43, Grad Norm: 0.0370 +[2025-02-23 14:33:39] (step=0975200) Train Loss: 0.3209, Train Steps/Sec: 17.40, Grad Norm: 0.0349 +[2025-02-23 14:33:45] (step=0975300) Train Loss: 0.3209, Train Steps/Sec: 17.39, Grad Norm: 0.0355 +[2025-02-23 14:33:50] (step=0975400) Train Loss: 0.3208, Train Steps/Sec: 17.33, Grad Norm: 0.0367 +[2025-02-23 14:33:57] (step=0975500) Train Loss: 0.3208, Train Steps/Sec: 15.70, Grad Norm: 0.0396 +[2025-02-23 14:34:02] (step=0975600) Train Loss: 0.3204, Train Steps/Sec: 17.39, Grad Norm: 0.0355 +[2025-02-23 14:34:08] (step=0975700) Train Loss: 0.3210, Train Steps/Sec: 16.70, Grad Norm: 0.0324 +[2025-02-23 14:34:16] (step=0975800) Train Loss: 0.3207, Train Steps/Sec: 13.49, Grad Norm: 0.0329 +[2025-02-23 14:34:23] (step=0975900) Train Loss: 0.3207, Train Steps/Sec: 14.72, Grad Norm: 0.0389 +[2025-02-23 14:34:29] (step=0976000) Train Loss: 0.3206, Train Steps/Sec: 15.96, Grad Norm: 0.0361 +[2025-02-23 14:34:35] (step=0976100) Train Loss: 0.3209, Train Steps/Sec: 17.46, Grad Norm: 0.0386 +[2025-02-23 14:34:40] (step=0976200) Train Loss: 0.3204, Train Steps/Sec: 17.47, Grad Norm: 0.0369 +[2025-02-23 14:34:47] (step=0976300) Train Loss: 0.3209, Train Steps/Sec: 16.04, Grad Norm: 0.0348 +[2025-02-23 14:34:53] (step=0976400) Train Loss: 0.3206, Train Steps/Sec: 16.76, Grad Norm: 0.0361 +[2025-02-23 14:34:59] (step=0976500) Train Loss: 0.3205, Train Steps/Sec: 16.71, Grad Norm: 0.0353 +[2025-02-23 14:35:04] (step=0976600) Train Loss: 0.3206, Train Steps/Sec: 17.38, Grad Norm: 0.0342 +[2025-02-23 14:35:10] (step=0976700) Train Loss: 0.3204, Train Steps/Sec: 17.36, Grad Norm: 0.0334 +[2025-02-23 14:35:16] (step=0976800) Train Loss: 0.3212, Train Steps/Sec: 16.58, Grad Norm: 0.0334 +[2025-02-23 14:35:22] (step=0976900) Train Loss: 0.3213, Train Steps/Sec: 17.34, Grad Norm: 0.0354 +[2025-02-23 14:35:28] (step=0977000) Train Loss: 0.3211, Train Steps/Sec: 17.01, Grad Norm: 0.0353 +[2025-02-23 14:35:35] (step=0977100) Train Loss: 0.3213, Train Steps/Sec: 14.40, Grad Norm: 0.0336 +[2025-02-23 14:35:41] (step=0977200) Train Loss: 0.3208, Train Steps/Sec: 17.10, Grad Norm: 0.0355 +[2025-02-23 14:35:46] (step=0977300) Train Loss: 0.3207, Train Steps/Sec: 17.09, Grad Norm: 0.0375 +[2025-02-23 14:35:52] (step=0977400) Train Loss: 0.3208, Train Steps/Sec: 17.07, Grad Norm: 0.0347 +[2025-02-23 14:35:59] (step=0977500) Train Loss: 0.3209, Train Steps/Sec: 15.64, Grad Norm: 0.0386 +[2025-02-23 14:36:05] (step=0977600) Train Loss: 0.3205, Train Steps/Sec: 17.19, Grad Norm: 0.0326 +[2025-02-23 14:36:11] (step=0977700) Train Loss: 0.3209, Train Steps/Sec: 16.59, Grad Norm: 0.0344 +[2025-02-23 14:36:17] (step=0977800) Train Loss: 0.3212, Train Steps/Sec: 15.86, Grad Norm: 0.0364 +[2025-02-23 14:36:24] (step=0977900) Train Loss: 0.3211, Train Steps/Sec: 14.11, Grad Norm: 0.0357 +[2025-02-23 14:36:30] (step=0978000) Train Loss: 0.3210, Train Steps/Sec: 16.62, Grad Norm: 0.0316 +[2025-02-23 14:36:36] (step=0978100) Train Loss: 0.3206, Train Steps/Sec: 17.41, Grad Norm: 0.0326 +[2025-02-23 14:36:42] (step=0978200) Train Loss: 0.3205, Train Steps/Sec: 15.99, Grad Norm: 0.0349 +[2025-02-23 14:36:49] (step=0978300) Train Loss: 0.3209, Train Steps/Sec: 13.94, Grad Norm: 0.0340 +[2025-02-23 14:36:55] (step=0978400) Train Loss: 0.3206, Train Steps/Sec: 16.64, Grad Norm: 0.0359 +[2025-02-23 14:37:01] (step=0978500) Train Loss: 0.3208, Train Steps/Sec: 17.10, Grad Norm: 0.0350 +[2025-02-23 14:37:07] (step=0978600) Train Loss: 0.3211, Train Steps/Sec: 16.98, Grad Norm: 0.0322 +[2025-02-23 14:37:13] (step=0978700) Train Loss: 0.3214, Train Steps/Sec: 16.97, Grad Norm: 0.0388 +[2025-02-23 14:37:19] (step=0978800) Train Loss: 0.3200, Train Steps/Sec: 16.26, Grad Norm: 0.0347 +[2025-02-23 14:37:25] (step=0978900) Train Loss: 0.3208, Train Steps/Sec: 17.14, Grad Norm: 0.0351 +[2025-02-23 14:37:31] (step=0979000) Train Loss: 0.3207, Train Steps/Sec: 17.36, Grad Norm: 0.0368 +[2025-02-23 14:37:36] (step=0979100) Train Loss: 0.3205, Train Steps/Sec: 17.36, Grad Norm: 0.0395 +[2025-02-23 14:37:42] (step=0979200) Train Loss: 0.3212, Train Steps/Sec: 17.40, Grad Norm: 0.0357 +[2025-02-23 14:37:48] (step=0979300) Train Loss: 0.3210, Train Steps/Sec: 17.44, Grad Norm: 0.0397 +[2025-02-23 14:37:54] (step=0979400) Train Loss: 0.3203, Train Steps/Sec: 17.46, Grad Norm: 0.0363 +[2025-02-23 14:38:00] (step=0979500) Train Loss: 0.3214, Train Steps/Sec: 15.91, Grad Norm: 0.0386 +[2025-02-23 14:38:07] (step=0979600) Train Loss: 0.3203, Train Steps/Sec: 14.31, Grad Norm: 0.0336 +[2025-02-23 14:38:13] (step=0979700) Train Loss: 0.3217, Train Steps/Sec: 15.90, Grad Norm: 0.0377 +[2025-02-23 14:38:20] (step=0979800) Train Loss: 0.3207, Train Steps/Sec: 14.68, Grad Norm: 0.0325 +[2025-02-23 14:38:26] (step=0979900) Train Loss: 0.3210, Train Steps/Sec: 15.98, Grad Norm: 0.0334 +[2025-02-23 14:38:32] (step=0980000) Train Loss: 0.3206, Train Steps/Sec: 16.70, Grad Norm: 0.0323 +[2025-02-23 14:38:38] (step=0980100) Train Loss: 0.3206, Train Steps/Sec: 17.41, Grad Norm: 0.0328 +[2025-02-23 14:38:44] (step=0980200) Train Loss: 0.3206, Train Steps/Sec: 15.92, Grad Norm: 0.0353 +[2025-02-23 14:38:50] (step=0980300) Train Loss: 0.3204, Train Steps/Sec: 16.74, Grad Norm: 0.0374 +[2025-02-23 14:38:56] (step=0980400) Train Loss: 0.3206, Train Steps/Sec: 17.45, Grad Norm: 0.0324 +[2025-02-23 14:39:02] (step=0980500) Train Loss: 0.3211, Train Steps/Sec: 16.61, Grad Norm: 0.0338 +[2025-02-23 14:39:08] (step=0980600) Train Loss: 0.3200, Train Steps/Sec: 17.40, Grad Norm: 0.0347 +[2025-02-23 14:39:14] (step=0980700) Train Loss: 0.3202, Train Steps/Sec: 16.61, Grad Norm: 0.0339 +[2025-02-23 14:39:21] (step=0980800) Train Loss: 0.3212, Train Steps/Sec: 14.45, Grad Norm: 0.0339 +[2025-02-23 14:39:26] (step=0980900) Train Loss: 0.3204, Train Steps/Sec: 17.31, Grad Norm: 0.0365 +[2025-02-23 14:39:32] (step=0981000) Train Loss: 0.3207, Train Steps/Sec: 17.38, Grad Norm: 0.0357 +[2025-02-23 14:39:38] (step=0981100) Train Loss: 0.3205, Train Steps/Sec: 17.47, Grad Norm: 0.0336 +[2025-02-23 14:39:44] (step=0981200) Train Loss: 0.3206, Train Steps/Sec: 17.48, Grad Norm: 0.0364 +[2025-02-23 14:39:49] (step=0981300) Train Loss: 0.3218, Train Steps/Sec: 17.41, Grad Norm: 0.0319 +[2025-02-23 14:39:55] (step=0981400) Train Loss: 0.3202, Train Steps/Sec: 17.41, Grad Norm: 0.0386 +[2025-02-23 14:40:01] (step=0981500) Train Loss: 0.3208, Train Steps/Sec: 15.88, Grad Norm: 0.0345 +[2025-02-23 14:40:07] (step=0981600) Train Loss: 0.3209, Train Steps/Sec: 17.40, Grad Norm: 0.0333 +[2025-02-23 14:40:13] (step=0981700) Train Loss: 0.3205, Train Steps/Sec: 16.67, Grad Norm: 0.0351 +[2025-02-23 14:40:20] (step=0981800) Train Loss: 0.3207, Train Steps/Sec: 14.11, Grad Norm: 0.0395 +[2025-02-23 14:40:26] (step=0981900) Train Loss: 0.3206, Train Steps/Sec: 16.74, Grad Norm: 0.0337 +[2025-02-23 14:40:32] (step=0982000) Train Loss: 0.3205, Train Steps/Sec: 17.41, Grad Norm: 0.0411 +[2025-02-23 14:40:40] (step=0982100) Train Loss: 0.3213, Train Steps/Sec: 13.08, Grad Norm: 0.0351 +[2025-02-23 14:40:46] (step=0982200) Train Loss: 0.3208, Train Steps/Sec: 16.63, Grad Norm: 0.0362 +[2025-02-23 14:40:51] (step=0982300) Train Loss: 0.3198, Train Steps/Sec: 17.36, Grad Norm: 0.0362 +[2025-02-23 14:40:57] (step=0982400) Train Loss: 0.3209, Train Steps/Sec: 17.41, Grad Norm: 0.0363 +[2025-02-23 14:41:03] (step=0982500) Train Loss: 0.3214, Train Steps/Sec: 16.66, Grad Norm: 0.0326 +[2025-02-23 14:41:09] (step=0982600) Train Loss: 0.3212, Train Steps/Sec: 17.32, Grad Norm: 0.0391 +[2025-02-23 14:41:15] (step=0982700) Train Loss: 0.3205, Train Steps/Sec: 16.60, Grad Norm: 0.0322 +[2025-02-23 14:41:21] (step=0982800) Train Loss: 0.3209, Train Steps/Sec: 17.45, Grad Norm: 0.0359 +[2025-02-23 14:41:26] (step=0982900) Train Loss: 0.3204, Train Steps/Sec: 17.46, Grad Norm: 0.0377 +[2025-02-23 14:41:32] (step=0983000) Train Loss: 0.3209, Train Steps/Sec: 17.50, Grad Norm: 0.0356 +[2025-02-23 14:41:38] (step=0983100) Train Loss: 0.3212, Train Steps/Sec: 17.48, Grad Norm: 0.0360 +[2025-02-23 14:41:44] (step=0983200) Train Loss: 0.3205, Train Steps/Sec: 17.46, Grad Norm: 0.0324 +[2025-02-23 14:41:50] (step=0983300) Train Loss: 0.3210, Train Steps/Sec: 14.41, Grad Norm: 0.0397 +[2025-02-23 14:41:56] (step=0983400) Train Loss: 0.3208, Train Steps/Sec: 17.30, Grad Norm: 0.0341 +[2025-02-23 14:42:03] (step=0983500) Train Loss: 0.3209, Train Steps/Sec: 15.79, Grad Norm: 0.0393 +[2025-02-23 14:42:08] (step=0983600) Train Loss: 0.3198, Train Steps/Sec: 17.42, Grad Norm: 0.0381 +[2025-02-23 14:42:15] (step=0983700) Train Loss: 0.3209, Train Steps/Sec: 14.15, Grad Norm: 0.0355 +[2025-02-23 14:42:22] (step=0983800) Train Loss: 0.3207, Train Steps/Sec: 15.88, Grad Norm: 0.0328 +[2025-02-23 14:42:28] (step=0983900) Train Loss: 0.3209, Train Steps/Sec: 16.66, Grad Norm: 0.0342 +[2025-02-23 14:42:34] (step=0984000) Train Loss: 0.3214, Train Steps/Sec: 16.52, Grad Norm: 0.0353 +[2025-02-23 14:42:40] (step=0984100) Train Loss: 0.3205, Train Steps/Sec: 15.79, Grad Norm: 0.0373 +[2025-02-23 14:42:46] (step=0984200) Train Loss: 0.3213, Train Steps/Sec: 16.54, Grad Norm: 0.0356 +[2025-02-23 14:42:52] (step=0984300) Train Loss: 0.3207, Train Steps/Sec: 17.26, Grad Norm: 0.0368 +[2025-02-23 14:42:58] (step=0984400) Train Loss: 0.3201, Train Steps/Sec: 17.36, Grad Norm: 0.0360 +[2025-02-23 14:43:04] (step=0984500) Train Loss: 0.3210, Train Steps/Sec: 16.61, Grad Norm: 0.0359 +[2025-02-23 14:43:11] (step=0984600) Train Loss: 0.3209, Train Steps/Sec: 13.86, Grad Norm: 0.0347 +[2025-02-23 14:43:17] (step=0984700) Train Loss: 0.3208, Train Steps/Sec: 17.32, Grad Norm: 0.0353 +[2025-02-23 14:43:23] (step=0984800) Train Loss: 0.3208, Train Steps/Sec: 17.31, Grad Norm: 0.0393 +[2025-02-23 14:43:28] (step=0984900) Train Loss: 0.3206, Train Steps/Sec: 17.40, Grad Norm: 0.0371 +[2025-02-23 14:43:34] (step=0985000) Train Loss: 0.3209, Train Steps/Sec: 17.36, Grad Norm: 0.0368 +[2025-02-23 14:43:40] (step=0985100) Train Loss: 0.3205, Train Steps/Sec: 17.38, Grad Norm: 0.0348 +[2025-02-23 14:43:46] (step=0985200) Train Loss: 0.3208, Train Steps/Sec: 17.36, Grad Norm: 0.0347 +[2025-02-23 14:43:51] (step=0985300) Train Loss: 0.3209, Train Steps/Sec: 17.38, Grad Norm: 0.0369 +[2025-02-23 14:43:57] (step=0985400) Train Loss: 0.3206, Train Steps/Sec: 17.43, Grad Norm: 0.0353 +[2025-02-23 14:44:03] (step=0985500) Train Loss: 0.3207, Train Steps/Sec: 15.84, Grad Norm: 0.0367 +[2025-02-23 14:44:09] (step=0985600) Train Loss: 0.3212, Train Steps/Sec: 17.32, Grad Norm: 0.0336 +[2025-02-23 14:44:16] (step=0985700) Train Loss: 0.3207, Train Steps/Sec: 14.19, Grad Norm: 0.0417 +[2025-02-23 14:44:24] (step=0985800) Train Loss: 0.3212, Train Steps/Sec: 13.52, Grad Norm: 0.0413 +[2025-02-23 14:44:30] (step=0985900) Train Loss: 0.3211, Train Steps/Sec: 16.68, Grad Norm: 0.0341 +[2025-02-23 14:44:36] (step=0986000) Train Loss: 0.3209, Train Steps/Sec: 16.00, Grad Norm: 0.0317 +[2025-02-23 14:44:42] (step=0986100) Train Loss: 0.3203, Train Steps/Sec: 16.03, Grad Norm: 0.0371 +[2025-02-23 14:44:48] (step=0986200) Train Loss: 0.3211, Train Steps/Sec: 17.38, Grad Norm: 0.0351 +[2025-02-23 14:44:54] (step=0986300) Train Loss: 0.3211, Train Steps/Sec: 17.42, Grad Norm: 0.0339 +[2025-02-23 14:44:59] (step=0986400) Train Loss: 0.3206, Train Steps/Sec: 17.51, Grad Norm: 0.0438 +[2025-02-23 14:45:05] (step=0986500) Train Loss: 0.3212, Train Steps/Sec: 16.80, Grad Norm: 0.0358 +[2025-02-23 14:45:11] (step=0986600) Train Loss: 0.3207, Train Steps/Sec: 16.62, Grad Norm: 0.0361 +[2025-02-23 14:45:17] (step=0986700) Train Loss: 0.3209, Train Steps/Sec: 17.42, Grad Norm: 0.0330 +[2025-02-23 14:45:23] (step=0986800) Train Loss: 0.3209, Train Steps/Sec: 17.39, Grad Norm: 0.0393 +[2025-02-23 14:45:28] (step=0986900) Train Loss: 0.3203, Train Steps/Sec: 17.53, Grad Norm: 0.0363 +[2025-02-23 14:45:34] (step=0987000) Train Loss: 0.3205, Train Steps/Sec: 17.54, Grad Norm: 0.0319 +[2025-02-23 14:45:41] (step=0987100) Train Loss: 0.3211, Train Steps/Sec: 14.46, Grad Norm: 0.0344 +[2025-02-23 14:45:47] (step=0987200) Train Loss: 0.3203, Train Steps/Sec: 17.44, Grad Norm: 0.0380 +[2025-02-23 14:45:53] (step=0987300) Train Loss: 0.3210, Train Steps/Sec: 17.43, Grad Norm: 0.0379 +[2025-02-23 14:45:58] (step=0987400) Train Loss: 0.3209, Train Steps/Sec: 17.53, Grad Norm: 0.0344 +[2025-02-23 14:46:05] (step=0987500) Train Loss: 0.3210, Train Steps/Sec: 15.96, Grad Norm: 0.0364 +[2025-02-23 14:46:11] (step=0987600) Train Loss: 0.3216, Train Steps/Sec: 14.77, Grad Norm: 0.0362 +[2025-02-23 14:46:17] (step=0987700) Train Loss: 0.3207, Train Steps/Sec: 16.73, Grad Norm: 0.0343 +[2025-02-23 14:46:24] (step=0987800) Train Loss: 0.3209, Train Steps/Sec: 15.98, Grad Norm: 0.0319 +[2025-02-23 14:46:30] (step=0987900) Train Loss: 0.3206, Train Steps/Sec: 15.35, Grad Norm: 0.0323 +[2025-02-23 14:46:36] (step=0988000) Train Loss: 0.3205, Train Steps/Sec: 17.47, Grad Norm: 0.0357 +[2025-02-23 14:46:42] (step=0988100) Train Loss: 0.3208, Train Steps/Sec: 16.06, Grad Norm: 0.0338 +[2025-02-23 14:46:48] (step=0988200) Train Loss: 0.3215, Train Steps/Sec: 17.45, Grad Norm: 0.0319 +[2025-02-23 14:46:55] (step=0988300) Train Loss: 0.3205, Train Steps/Sec: 14.50, Grad Norm: 0.0329 +[2025-02-23 14:47:01] (step=0988400) Train Loss: 0.3207, Train Steps/Sec: 17.04, Grad Norm: 0.0333 +[2025-02-23 14:47:07] (step=0988500) Train Loss: 0.3205, Train Steps/Sec: 15.65, Grad Norm: 0.0381 +[2025-02-23 14:47:13] (step=0988600) Train Loss: 0.3200, Train Steps/Sec: 17.12, Grad Norm: 0.0357 +[2025-02-23 14:47:19] (step=0988700) Train Loss: 0.3211, Train Steps/Sec: 17.08, Grad Norm: 0.0361 +[2025-02-23 14:47:24] (step=0988800) Train Loss: 0.3206, Train Steps/Sec: 17.15, Grad Norm: 0.0349 +[2025-02-23 14:47:30] (step=0988900) Train Loss: 0.3210, Train Steps/Sec: 17.14, Grad Norm: 0.0389 +[2025-02-23 14:47:36] (step=0989000) Train Loss: 0.3207, Train Steps/Sec: 17.15, Grad Norm: 0.0371 +[2025-02-23 14:47:42] (step=0989100) Train Loss: 0.3208, Train Steps/Sec: 17.16, Grad Norm: 0.0337 +[2025-02-23 14:47:48] (step=0989200) Train Loss: 0.3210, Train Steps/Sec: 17.18, Grad Norm: 0.0344 +[2025-02-23 14:47:54] (step=0989300) Train Loss: 0.3209, Train Steps/Sec: 17.13, Grad Norm: 0.0342 +[2025-02-23 14:47:59] (step=0989400) Train Loss: 0.3205, Train Steps/Sec: 17.13, Grad Norm: 0.0342 +[2025-02-23 14:48:06] (step=0989500) Train Loss: 0.3210, Train Steps/Sec: 14.50, Grad Norm: 0.0358 +[2025-02-23 14:48:14] (step=0989600) Train Loss: 0.3200, Train Steps/Sec: 13.33, Grad Norm: 0.0386 +[2025-02-23 14:48:20] (step=0989700) Train Loss: 0.3209, Train Steps/Sec: 16.39, Grad Norm: 0.0336 +[2025-02-23 14:48:27] (step=0989800) Train Loss: 0.3204, Train Steps/Sec: 15.04, Grad Norm: 0.0388 +[2025-02-23 14:48:33] (step=0989900) Train Loss: 0.3209, Train Steps/Sec: 15.68, Grad Norm: 0.0325 +[2025-02-23 14:48:39] (step=0990000) Train Loss: 0.3212, Train Steps/Sec: 16.38, Grad Norm: 0.0342 +[2025-02-23 14:48:45] (step=0990100) Train Loss: 0.3201, Train Steps/Sec: 16.40, Grad Norm: 0.0314 +[2025-02-23 14:48:51] (step=0990200) Train Loss: 0.3210, Train Steps/Sec: 17.15, Grad Norm: 0.0349 +[2025-02-23 14:48:57] (step=0990300) Train Loss: 0.3207, Train Steps/Sec: 17.44, Grad Norm: 0.0340 +[2025-02-23 14:49:02] (step=0990400) Train Loss: 0.3202, Train Steps/Sec: 17.40, Grad Norm: 0.0340 +[2025-02-23 14:49:09] (step=0990500) Train Loss: 0.3207, Train Steps/Sec: 15.89, Grad Norm: 0.0366 +[2025-02-23 14:49:15] (step=0990600) Train Loss: 0.3209, Train Steps/Sec: 17.36, Grad Norm: 0.0329 +[2025-02-23 14:49:20] (step=0990700) Train Loss: 0.3203, Train Steps/Sec: 17.39, Grad Norm: 0.0359 +[2025-02-23 14:49:27] (step=0990800) Train Loss: 0.3203, Train Steps/Sec: 14.47, Grad Norm: 0.0338 +[2025-02-23 14:49:33] (step=0990900) Train Loss: 0.3207, Train Steps/Sec: 17.33, Grad Norm: 0.0310 +[2025-02-23 14:49:39] (step=0991000) Train Loss: 0.3209, Train Steps/Sec: 17.36, Grad Norm: 0.0393 +[2025-02-23 14:49:45] (step=0991100) Train Loss: 0.3206, Train Steps/Sec: 17.36, Grad Norm: 0.0348 +[2025-02-23 14:49:50] (step=0991200) Train Loss: 0.3203, Train Steps/Sec: 17.25, Grad Norm: 0.0358 +[2025-02-23 14:49:56] (step=0991300) Train Loss: 0.3207, Train Steps/Sec: 17.37, Grad Norm: 0.0343 +[2025-02-23 14:50:02] (step=0991400) Train Loss: 0.3207, Train Steps/Sec: 17.41, Grad Norm: 0.0324 +[2025-02-23 14:50:09] (step=0991500) Train Loss: 0.3210, Train Steps/Sec: 14.07, Grad Norm: 0.0334 +[2025-02-23 14:50:15] (step=0991600) Train Loss: 0.3211, Train Steps/Sec: 17.42, Grad Norm: 0.0334 +[2025-02-23 14:50:21] (step=0991700) Train Loss: 0.3208, Train Steps/Sec: 16.68, Grad Norm: 0.0396 +[2025-02-23 14:50:27] (step=0991800) Train Loss: 0.3207, Train Steps/Sec: 14.67, Grad Norm: 0.0379 +[2025-02-23 14:50:33] (step=0991900) Train Loss: 0.3207, Train Steps/Sec: 16.70, Grad Norm: 0.0336 +[2025-02-23 14:50:39] (step=0992000) Train Loss: 0.3206, Train Steps/Sec: 16.65, Grad Norm: 0.0351 +[2025-02-23 14:50:47] (step=0992100) Train Loss: 0.3211, Train Steps/Sec: 14.03, Grad Norm: 0.0349 +[2025-02-23 14:50:52] (step=0992200) Train Loss: 0.3216, Train Steps/Sec: 17.39, Grad Norm: 0.0320 +[2025-02-23 14:50:58] (step=0992300) Train Loss: 0.3204, Train Steps/Sec: 17.32, Grad Norm: 0.0373 +[2025-02-23 14:51:04] (step=0992400) Train Loss: 0.3200, Train Steps/Sec: 16.53, Grad Norm: 0.0351 +[2025-02-23 14:51:10] (step=0992500) Train Loss: 0.3204, Train Steps/Sec: 16.68, Grad Norm: 0.0310 +[2025-02-23 14:51:16] (step=0992600) Train Loss: 0.3207, Train Steps/Sec: 17.39, Grad Norm: 0.0367 +[2025-02-23 14:51:22] (step=0992700) Train Loss: 0.3208, Train Steps/Sec: 17.41, Grad Norm: 0.0366 +[2025-02-23 14:51:27] (step=0992800) Train Loss: 0.3205, Train Steps/Sec: 17.47, Grad Norm: 0.0388 +[2025-02-23 14:51:33] (step=0992900) Train Loss: 0.3211, Train Steps/Sec: 17.53, Grad Norm: 0.0355 +[2025-02-23 14:51:39] (step=0993000) Train Loss: 0.3199, Train Steps/Sec: 17.45, Grad Norm: 0.0348 +[2025-02-23 14:51:45] (step=0993100) Train Loss: 0.3213, Train Steps/Sec: 17.45, Grad Norm: 0.0334 +[2025-02-23 14:51:50] (step=0993200) Train Loss: 0.3212, Train Steps/Sec: 17.35, Grad Norm: 0.0355 +[2025-02-23 14:51:57] (step=0993300) Train Loss: 0.3206, Train Steps/Sec: 14.63, Grad Norm: 0.0335 +[2025-02-23 14:52:04] (step=0993400) Train Loss: 0.3206, Train Steps/Sec: 15.11, Grad Norm: 0.0358 +[2025-02-23 14:52:10] (step=0993500) Train Loss: 0.3206, Train Steps/Sec: 15.04, Grad Norm: 0.0343 +[2025-02-23 14:52:16] (step=0993600) Train Loss: 0.3208, Train Steps/Sec: 17.27, Grad Norm: 0.0325 +[2025-02-23 14:52:23] (step=0993700) Train Loss: 0.3213, Train Steps/Sec: 15.89, Grad Norm: 0.0371 +[2025-02-23 14:52:29] (step=0993800) Train Loss: 0.3203, Train Steps/Sec: 15.23, Grad Norm: 0.0374 +[2025-02-23 14:52:35] (step=0993900) Train Loss: 0.3210, Train Steps/Sec: 15.95, Grad Norm: 0.0360 +[2025-02-23 14:52:41] (step=0994000) Train Loss: 0.3213, Train Steps/Sec: 17.33, Grad Norm: 0.0407 +[2025-02-23 14:52:47] (step=0994100) Train Loss: 0.3208, Train Steps/Sec: 16.63, Grad Norm: 0.0367 +[2025-02-23 14:52:53] (step=0994200) Train Loss: 0.3208, Train Steps/Sec: 17.43, Grad Norm: 0.0352 +[2025-02-23 14:52:59] (step=0994300) Train Loss: 0.3214, Train Steps/Sec: 17.33, Grad Norm: 0.0351 +[2025-02-23 14:53:05] (step=0994400) Train Loss: 0.3206, Train Steps/Sec: 16.49, Grad Norm: 0.0342 +[2025-02-23 14:53:11] (step=0994500) Train Loss: 0.3213, Train Steps/Sec: 16.53, Grad Norm: 0.0339 +[2025-02-23 14:53:18] (step=0994600) Train Loss: 0.3208, Train Steps/Sec: 14.38, Grad Norm: 0.0356 +[2025-02-23 14:53:24] (step=0994700) Train Loss: 0.3205, Train Steps/Sec: 17.26, Grad Norm: 0.0345 +[2025-02-23 14:53:29] (step=0994800) Train Loss: 0.3208, Train Steps/Sec: 17.30, Grad Norm: 0.0395 +[2025-02-23 14:53:35] (step=0994900) Train Loss: 0.3207, Train Steps/Sec: 17.34, Grad Norm: 0.0350 +[2025-02-23 14:53:41] (step=0995000) Train Loss: 0.3207, Train Steps/Sec: 17.42, Grad Norm: 0.0351 +[2025-02-23 14:53:47] (step=0995100) Train Loss: 0.3211, Train Steps/Sec: 17.42, Grad Norm: 0.0353 +[2025-02-23 14:53:52] (step=0995200) Train Loss: 0.3213, Train Steps/Sec: 17.34, Grad Norm: 0.0376 +[2025-02-23 14:53:58] (step=0995300) Train Loss: 0.3205, Train Steps/Sec: 17.32, Grad Norm: 0.0390 +[2025-02-23 14:54:05] (step=0995400) Train Loss: 0.3210, Train Steps/Sec: 14.58, Grad Norm: 0.0359 +[2025-02-23 14:54:11] (step=0995500) Train Loss: 0.3206, Train Steps/Sec: 16.45, Grad Norm: 0.0346 +[2025-02-23 14:54:17] (step=0995600) Train Loss: 0.3210, Train Steps/Sec: 16.42, Grad Norm: 0.0335 +[2025-02-23 14:54:24] (step=0995700) Train Loss: 0.3214, Train Steps/Sec: 15.25, Grad Norm: 0.0344 +[2025-02-23 14:54:31] (step=0995800) Train Loss: 0.3212, Train Steps/Sec: 12.94, Grad Norm: 0.0428 +[2025-02-23 14:54:37] (step=0995900) Train Loss: 0.3210, Train Steps/Sec: 16.59, Grad Norm: 0.0336 +[2025-02-23 14:54:43] (step=0996000) Train Loss: 0.3212, Train Steps/Sec: 17.37, Grad Norm: 0.0327 +[2025-02-23 14:54:49] (step=0996100) Train Loss: 0.3208, Train Steps/Sec: 16.68, Grad Norm: 0.0321 +[2025-02-23 14:54:55] (step=0996200) Train Loss: 0.3203, Train Steps/Sec: 17.37, Grad Norm: 0.0341 +[2025-02-23 14:55:01] (step=0996300) Train Loss: 0.3207, Train Steps/Sec: 16.58, Grad Norm: 0.0327 +[2025-02-23 14:55:07] (step=0996400) Train Loss: 0.3210, Train Steps/Sec: 17.34, Grad Norm: 0.0336 +[2025-02-23 14:55:13] (step=0996500) Train Loss: 0.3205, Train Steps/Sec: 16.59, Grad Norm: 0.0318 +[2025-02-23 14:55:19] (step=0996600) Train Loss: 0.3210, Train Steps/Sec: 17.28, Grad Norm: 0.0366 +[2025-02-23 14:55:24] (step=0996700) Train Loss: 0.3204, Train Steps/Sec: 17.24, Grad Norm: 0.0355 +[2025-02-23 14:55:30] (step=0996800) Train Loss: 0.3211, Train Steps/Sec: 17.32, Grad Norm: 0.0393 +[2025-02-23 14:55:36] (step=0996900) Train Loss: 0.3204, Train Steps/Sec: 17.22, Grad Norm: 0.0365 +[2025-02-23 14:55:42] (step=0997000) Train Loss: 0.3213, Train Steps/Sec: 17.20, Grad Norm: 0.0337 +[2025-02-23 14:55:49] (step=0997100) Train Loss: 0.3206, Train Steps/Sec: 14.52, Grad Norm: 0.0326 +[2025-02-23 14:55:54] (step=0997200) Train Loss: 0.3215, Train Steps/Sec: 17.49, Grad Norm: 0.0331 +[2025-02-23 14:56:01] (step=0997300) Train Loss: 0.3206, Train Steps/Sec: 14.71, Grad Norm: 0.0323 +[2025-02-23 14:56:07] (step=0997400) Train Loss: 0.3210, Train Steps/Sec: 17.40, Grad Norm: 0.0357 +[2025-02-23 14:56:13] (step=0997500) Train Loss: 0.3202, Train Steps/Sec: 15.78, Grad Norm: 0.0380 +[2025-02-23 14:56:20] (step=0997600) Train Loss: 0.3207, Train Steps/Sec: 15.90, Grad Norm: 0.0322 +[2025-02-23 14:56:26] (step=0997700) Train Loss: 0.3205, Train Steps/Sec: 16.58, Grad Norm: 0.0344 +[2025-02-23 14:56:32] (step=0997800) Train Loss: 0.3208, Train Steps/Sec: 15.26, Grad Norm: 0.0335 +[2025-02-23 14:56:38] (step=0997900) Train Loss: 0.3208, Train Steps/Sec: 16.60, Grad Norm: 0.0341 +[2025-02-23 14:56:44] (step=0998000) Train Loss: 0.3209, Train Steps/Sec: 17.46, Grad Norm: 0.0338 +[2025-02-23 14:56:50] (step=0998100) Train Loss: 0.3207, Train Steps/Sec: 16.73, Grad Norm: 0.0332 +[2025-02-23 14:56:56] (step=0998200) Train Loss: 0.3206, Train Steps/Sec: 17.31, Grad Norm: 0.0348 +[2025-02-23 14:57:03] (step=0998300) Train Loss: 0.3204, Train Steps/Sec: 13.94, Grad Norm: 0.0329 +[2025-02-23 14:57:09] (step=0998400) Train Loss: 0.3204, Train Steps/Sec: 17.23, Grad Norm: 0.0366 +[2025-02-23 14:57:15] (step=0998500) Train Loss: 0.3205, Train Steps/Sec: 16.60, Grad Norm: 0.0366 +[2025-02-23 14:57:20] (step=0998600) Train Loss: 0.3208, Train Steps/Sec: 17.38, Grad Norm: 0.0383 +[2025-02-23 14:57:26] (step=0998700) Train Loss: 0.3207, Train Steps/Sec: 17.34, Grad Norm: 0.0370 +[2025-02-23 14:57:32] (step=0998800) Train Loss: 0.3209, Train Steps/Sec: 17.34, Grad Norm: 0.0326 +[2025-02-23 14:57:38] (step=0998900) Train Loss: 0.3214, Train Steps/Sec: 17.29, Grad Norm: 0.0339 +[2025-02-23 14:57:44] (step=0999000) Train Loss: 0.3212, Train Steps/Sec: 17.37, Grad Norm: 0.0386 +[2025-02-23 14:57:49] (step=0999100) Train Loss: 0.3198, Train Steps/Sec: 17.41, Grad Norm: 0.0374 +[2025-02-23 14:57:55] (step=0999200) Train Loss: 0.3208, Train Steps/Sec: 16.55, Grad Norm: 0.0330 +[2025-02-23 14:58:02] (step=0999300) Train Loss: 0.3210, Train Steps/Sec: 15.28, Grad Norm: 0.0379 +[2025-02-23 14:58:08] (step=0999400) Train Loss: 0.3207, Train Steps/Sec: 17.43, Grad Norm: 0.0374 +[2025-02-23 14:58:14] (step=0999500) Train Loss: 0.3204, Train Steps/Sec: 16.56, Grad Norm: 0.0396 +[2025-02-23 14:58:21] (step=0999600) Train Loss: 0.3210, Train Steps/Sec: 13.23, Grad Norm: 0.0377 +[2025-02-23 14:58:28] (step=0999700) Train Loss: 0.3205, Train Steps/Sec: 15.92, Grad Norm: 0.0339 +[2025-02-23 14:58:34] (step=0999800) Train Loss: 0.3203, Train Steps/Sec: 15.91, Grad Norm: 0.0353 +[2025-02-23 14:58:40] (step=0999900) Train Loss: 0.3201, Train Steps/Sec: 16.65, Grad Norm: 0.0338 +[2025-02-23 14:58:46] (step=1000000) Train Loss: 0.3201, Train Steps/Sec: 17.31, Grad Norm: 0.0385 +[2025-02-23 14:58:47] Saved checkpoint to ../logs/flow/flow_s_1000kx1024_vavae/checkpoints/1000000.pt