pietrolesci commited on
Commit
a95851e
·
verified ·
1 Parent(s): d884905

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Experiment Configuration
2
+ ```yaml
3
+ callbacks:
4
+ grad_accum:
5
+ _target_: src.callbacks.gradient_accumulation.GradientAccumulationScheduler
6
+ scheduling:
7
+ 0: 16
8
+ grad_norm:
9
+ _target_: src.callbacks.grad_norm.GradNorm
10
+ check_clipping: false
11
+ group_separator: /
12
+ histogram_freq: null
13
+ log_weight_distribution: false
14
+ norm_type: 2
15
+ only_total: true
16
+ lr_monitor:
17
+ _target_: src.callbacks.lr_monitor.SimpleLearningRateMonitor
18
+ model_checkpoint:
19
+ _target_: src.callbacks.model_checkpoint.ModelCheckpoint
20
+ dirpath: .checkpoints
21
+ enable_version_counter: false
22
+ every_n_train_steps: 2000
23
+ filename: '{step}'
24
+ save_initial_checkpoint: true
25
+ save_last: link
26
+ save_top_k: -1
27
+ verbose: true
28
+ speed_monitor:
29
+ _target_: src.callbacks.speed_monitor.SpeedMonitor
30
+ data:
31
+ batch_size: 8
32
+ drop_last: false
33
+ eval_batch_size: 16
34
+ multiprocessing_context: null
35
+ num_workers: 8
36
+ persistent_workers: false
37
+ pin_memory: true
38
+ prefetch_factor: 2
39
+ shuffle: true
40
+ dataset: minipile
41
+ loggers:
42
+ tensorboard:
43
+ _target_: src.loggers.TensorBoardLogger
44
+ name: ''
45
+ save_dir: ./
46
+ version: ./
47
+ model: smol_llama-81M-tied
48
+ optim:
49
+ lr: 0.0006
50
+ num_warmup_steps: 2000
51
+ optim_kwargs:
52
+ betas:
53
+ - 0.9
54
+ - 0.95
55
+ eps: 1.0e-08
56
+ fused: true
57
+ optim_name: adamw
58
+ scheduler_kwargs:
59
+ min_lr_ratio: 0.01
60
+ num_decay_steps: 2000
61
+ num_stable_steps: 46000
62
+ scheduler_name: warmup_stable_decay
63
+ weight_decay: 0.1
64
+ out_parent_folder: model_train
65
+ resume_from_checkpoint: null
66
+ run_folder: minipile/smol_llama-81M-tied_bpe128000minipile_2024-11-01T13-04-36
67
+ save_initial_checkpoint: true
68
+ seed: 42
69
+ tok_name: bpe128000minipile
70
+ tok_path: /home/pl487/rdd/outputs/tokenizers/bpe128000minipile
71
+ torch_compile: true
72
+ train_data_path: /home/pl487/rdd/data/minipile/bpe128000minipile/train
73
+ trainer:
74
+ accelerator: gpu
75
+ deterministic: false
76
+ enable_progress_bar: true
77
+ fast_dev_run: false
78
+ gradient_clip_algorithm: norm
79
+ gradient_clip_val: 1.0
80
+ limit_val_batches: 500
81
+ log_every_n_steps: 1
82
+ max_steps: 50000
83
+ precision: bf16-true
84
+ val_check_interval: 2000
85
+ val_data_path: /home/pl487/rdd/data/minipile/bpe128000minipile/validation
86
+ ```
events.out.tfevents.1730466280.dev-gpu-pl487.915577.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5da0ee1e8278db36177e0317c6a0b0e27f361c717301dc94aa555b1b7afd4c4
3
+ size 85055516
hparams.yaml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ loggers:
2
+ tensorboard:
3
+ _target_: src.loggers.TensorBoardLogger
4
+ save_dir: ./
5
+ name: ''
6
+ version: ./
7
+ callbacks:
8
+ lr_monitor:
9
+ _target_: src.callbacks.lr_monitor.SimpleLearningRateMonitor
10
+ grad_norm:
11
+ _target_: src.callbacks.grad_norm.GradNorm
12
+ norm_type: 2
13
+ group_separator: /
14
+ histogram_freq: null
15
+ check_clipping: false
16
+ log_weight_distribution: false
17
+ only_total: true
18
+ speed_monitor:
19
+ _target_: src.callbacks.speed_monitor.SpeedMonitor
20
+ grad_accum:
21
+ _target_: src.callbacks.gradient_accumulation.GradientAccumulationScheduler
22
+ scheduling:
23
+ 0: 16
24
+ model_checkpoint:
25
+ _target_: src.callbacks.model_checkpoint.ModelCheckpoint
26
+ dirpath: .checkpoints
27
+ filename: '{step}'
28
+ enable_version_counter: false
29
+ every_n_train_steps: 2000
30
+ save_top_k: -1
31
+ save_last: link
32
+ verbose: true
33
+ save_initial_checkpoint: true
34
+ tok_path: /home/pl487/rdd/outputs/tokenizers/bpe128000minipile
35
+ run_folder: minipile/smol_llama-81M-tied_bpe128000minipile_2024-11-01T13-04-36
36
+ out_parent_folder: model_train
37
+ tok_name: bpe128000minipile
38
+ dataset: minipile
39
+ train_data_path: /home/pl487/rdd/data/minipile/bpe128000minipile/train
40
+ val_data_path: /home/pl487/rdd/data/minipile/bpe128000minipile/validation
41
+ model: smol_llama-81M-tied
42
+ resume_from_checkpoint: null
43
+ save_initial_checkpoint: true
44
+ seed: 42
45
+ torch_compile: true
46
+ data:
47
+ batch_size: 8
48
+ eval_batch_size: 16
49
+ shuffle: true
50
+ drop_last: false
51
+ num_workers: 8
52
+ pin_memory: true
53
+ persistent_workers: false
54
+ prefetch_factor: 2
55
+ multiprocessing_context: null
56
+ optim:
57
+ optim_name: adamw
58
+ lr: 0.0006
59
+ weight_decay: 0.1
60
+ optim_kwargs:
61
+ fused: true
62
+ eps: 1.0e-08
63
+ betas:
64
+ - 0.9
65
+ - 0.95
66
+ scheduler_name: warmup_stable_decay
67
+ num_warmup_steps: 2000
68
+ scheduler_kwargs:
69
+ num_stable_steps: 46000
70
+ num_decay_steps: 2000
71
+ min_lr_ratio: 0.01
72
+ trainer:
73
+ accelerator: gpu
74
+ precision: bf16-true
75
+ deterministic: false
76
+ log_every_n_steps: 1
77
+ enable_progress_bar: true
78
+ fast_dev_run: false
79
+ gradient_clip_val: 1.0
80
+ gradient_clip_algorithm: norm
81
+ val_check_interval: 2000
82
+ max_steps: 50000
83
+ limit_val_batches: 500
tb_logs.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d095fadd44eae44039c3ff22ef96c875d01dcf17e59aa54420c4307b5b7c5baa
3
+ size 7801331