Upload summary.json with huggingface_hub
Browse files- summary.json +1 -0
summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"train_loss": [], "val_loss": [], "val_pp": [], "val_acc": [], "args": {"config_format": "base", "experiment_name": null, "seed": 0, "data_seed": 1337, "eval_interval": 200, "full_eval_at": [], "eval_batches": 32, "device": "cuda:0", "distributed_backend": "nccl", "log_interval": 50, "results_base_folder": "./exps", "permanent_ckpt_interval": 0, "latest_ckpt_interval": 20000, "resume_from": "exps/UNTIED-800M-NoQuantizer@16:NoQuantizer@16-c4_c4_llama_nlayers16_nhead16_lr7.5e-05_sched_cos_warmup30517_decay_linear_0.1_iter305175_bs32x2_ws8_seed0_data_seed1337/ckpts/latest", "resume_from_swa": null, "auto_resume": true, "wandb": true, "wandb_project": "llm-baselines", "wandb_run_prefix": "UNTIED-800M-NoQuantizer@16:NoQuantizer@16-c4", "eval_seq_prefix": "none", "log_dynamics": false, "dynamics_logger_cfg": "./src/logger/rotational_logger.yaml", "scheduler": "cos", "cos_inf_steps": 0, "iterations": 305175, "warmup_steps": 30517, "lr": 7.5e-05, "wsd_final_lr_scale": 0.0, "wsd_fract_decay": 0.1, "decay_type": "linear", "opt": "adamw", "batch_size": 32, "acc_steps": 2, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "weight_average": false, "wa_interval": 5, "wa_horizon": 500, "wa_dtype": "float32", "wa_use_temp_dir": false, "wa_sweep_horizon": false, "max_num_wa_sweeps": 5, "exponential_moving_average": false, "ema_interval": 10, "ema_decay": 0.95, "ema_after_warmup": false, "datasets_dir": "./datasets/", "dataset": "c4", "tokenizer": "gpt2", "vocab_size": 50304, "data_in_ram": false, "model": "llama", "parallel_block": false, "use_pretrained": "none", "from_dense": false, "init_std": 0.02, "dropout": 0.0, "n_head": 16, "n_layer": 16, "sequence_length": 512, "n_embd": 2048, "multiple_of": 256, "rmsnorm_eps": 1e-05, "dtype": "bfloat16", "bias": false, "compile": true, "mlp_dim_exp_factor": 1.0, "w_quant": "NoQuantizer", "w_quant_kwargs": {}, "a_quant": "NoQuantizer", "a_quant_kwargs": {}, "world_size": 8}}
|