mfajcik commited on
Commit
39f7b34
·
verified ·
1 Parent(s): c84a321

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +114 -1
README.md CHANGED
@@ -10,7 +10,120 @@ datasets:
10
  Training Dataset: Semant Search Summarization Dataset
11
  Training configuration
12
  ```yaml
13
- tbd.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ```
15
 
16
  Example:
 
10
  Training Dataset: Semant Search Summarization Dataset
11
  Training configuration
12
  ```yaml
13
+ max_seq_len: 18000
14
+ global_seed: 42
15
+
16
+ # Run Name
17
+ run_name: csmpt_summarization_d01 # If left blank, will be read from env var $COMPOSER_RUN_NAME
18
+
19
+ # Model
20
+ model:
21
+ name: hf_causal_lm
22
+ pretrained_model_name_or_path: BUT-FIT/csmpt7b
23
+ init_device: cpu
24
+ pretrained: true
25
+ trust_remote_code: true
26
+ config_overrides:
27
+ max_seq_len: ${max_seq_len}
28
+ attn_config:
29
+ attn_impl: flash
30
+ alibi: true
31
+ resid_pdrop: 0.1
32
+ tokenizer_name: BUT-FIT/csmpt7b
33
+
34
+ dataset: &hf_dataset
35
+ hf_name: BUT-FIT/CzechRAGSummarization
36
+ decoder_only_format: true
37
+ max_seq_len: ${max_seq_len}
38
+
39
+ # Tokenizer
40
+ tokenizer:
41
+ name: BUT-FIT/csmpt7b
42
+ kwargs:
43
+ model_max_length: ${max_seq_len}
44
+
45
+ # Dataloaders
46
+ train_loader:
47
+ name: finetuning
48
+ dataset:
49
+ <<: *hf_dataset
50
+ split: train
51
+ shuffle: true
52
+ shuffle_seed: ${global_seed}
53
+ drop_last: true
54
+ num_workers: 8
55
+
56
+ eval_loader:
57
+ name: finetuning
58
+ dataset:
59
+ <<: *hf_dataset
60
+ split: validation
61
+ shuffle: false
62
+ shuffle_seed: ${global_seed}
63
+ drop_last: false
64
+ num_workers: 2
65
+
66
+ # Optimization
67
+ scheduler:
68
+ name: cosine_with_warmup
69
+ t_warmup: 50ba
70
+ alpha_f: 0.1
71
+
72
+ optimizer:
73
+ name: decoupled_lionw
74
+ lr: 1.0e-5
75
+ betas:
76
+ - 0.9
77
+ - 0.95
78
+ weight_decay: 0.0
79
+
80
+ algorithms:
81
+ gradient_clipping:
82
+ clipping_type: norm
83
+ clipping_threshold: 5.0
84
+
85
+ max_duration: 4500ba
86
+ eval_interval: 200ba
87
+ eval_first: false
88
+ eval_subset_num_batches: -1
89
+ global_train_batch_size: 64
90
+
91
+ # System
92
+ seed: ${global_seed}
93
+ device_eval_batch_size: 2
94
+ device_train_microbatch_size: 1
95
+ # device_train_microbatch_size: auto
96
+ precision: amp_bf16
97
+
98
+ # FSDP
99
+ fsdp_config:
100
+ sharding_strategy: FULL_SHARD
101
+ mixed_precision: PURE
102
+ activation_checkpointing: true
103
+ activation_checkpointing_reentrant: false
104
+ activation_cpu_offload: false
105
+ limit_all_gathers: true
106
+ verbose: false
107
+ backward_prefetch: BACKWARD_PRE
108
+
109
+ # Logging
110
+ progress_bar: true
111
+ log_to_console: true
112
+ console_log_interval: 1ba
113
+
114
+ callbacks:
115
+ speed_monitor:
116
+ window_size: 10
117
+ lr_monitor: { }
118
+ memory_monitor: { }
119
+ runtime_estimator: { }
120
+
121
+
122
+ save_num_checkpoints_to_keep: 10 # Important, this cleans up checkpoints saved to DISK
123
+ save_interval: 200ba
124
+ save_folder: ./{run_name}/checkpoints
125
+
126
+
127
  ```
128
 
129
  Example: