pretrain-core-0
Browse files- README.md +23 -0
- scripts/pretrain_core_model_0.yaml +3 -3
README.md
CHANGED
@@ -53,6 +53,29 @@ time python -B prepare_core_datasets.py
|
|
53 |
```
|
54 |
|
55 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
```
|
57 |
|
58 |
```bash
|
|
|
53 |
```
|
54 |
|
55 |
```
|
56 |
+
i=0, min_len=0, max_len=1073741824, block_size=1025, chunk_size=16400000, len(dataset)=5146620, len(dataset) * block_size=5275285500
|
57 |
+
Total number of tokens in the optimized dataset '../core-data-0-0-1073741824-1025-16000' is 5275285500
|
58 |
+
|
59 |
+
i=1, min_len=1025, max_len=2049, block_size=2049, chunk_size=16392000, len(dataset)=309838, len(dataset) * block_size=634858062
|
60 |
+
Total number of tokens in the optimized dataset '../core-data-1-1025-2049-2049-8000' is 634858062
|
61 |
+
|
62 |
+
i=2, min_len=2049, max_len=4097, block_size=4097, chunk_size=16388000, len(dataset)=113843, len(dataset) * block_size=466414771
|
63 |
+
Total number of tokens in the optimized dataset '../core-data-2-2049-4097-4097-4000' is 466414771
|
64 |
+
|
65 |
+
i=3, min_len=4097, max_len=8193, block_size=8193, chunk_size=16386000, len(dataset)=56713, len(dataset) * block_size=464649609
|
66 |
+
Total number of tokens in the optimized dataset '../core-data-3-4097-8193-8193-2000' is 464649609
|
67 |
+
|
68 |
+
i=4, min_len=8193, max_len=16385, block_size=16385, chunk_size=16385000, len(dataset)=37406, len(dataset) * block_size=612897310
|
69 |
+
Total number of tokens in the optimized dataset '../core-data-4-8193-16385-16385-1000' is 612897310
|
70 |
+
|
71 |
+
i=5, min_len=16385, max_len=32769, block_size=32769, chunk_size=16384500, len(dataset)=12737, len(dataset) * block_size=417378753
|
72 |
+
Total number of tokens in the optimized dataset '../core-data-5-16385-32769-32769-500' is 417378753
|
73 |
+
|
74 |
+
i=6, min_len=32769, max_len=65537, block_size=65537, chunk_size=16384250, len(dataset)=2824, len(dataset) * block_size=185076488
|
75 |
+
Total number of tokens in the optimized dataset '../core-data-6-32769-65537-65537-250' is 185076488
|
76 |
+
|
77 |
+
i=7, min_len=65537, max_len=131073, block_size=131073, chunk_size=16384125, len(dataset)=634, len(dataset) * block_size=83100282
|
78 |
+
Total number of tokens in the optimized dataset '../core-data-7-65537-131073-131073-125' is 83100282
|
79 |
```
|
80 |
|
81 |
```bash
|
scripts/pretrain_core_model_0.yaml
CHANGED
@@ -46,7 +46,7 @@ data:
|
|
46 |
class_path: LitData
|
47 |
|
48 |
init_args:
|
49 |
-
data_path: "../core-data-0-0-1073741824-
|
50 |
num_workers: 32
|
51 |
|
52 |
# Training-related arguments. See ``litgpt.args.TrainArgs`` for details
|
@@ -65,13 +65,13 @@ train:
|
|
65 |
micro_batch_size: 8
|
66 |
|
67 |
# Number of iterations with learning rate warmup active (type: int, default: 2000)
|
68 |
-
lr_warmup_steps:
|
69 |
|
70 |
# Number of epochs to train on (type: Optional[int], default: null)
|
71 |
epochs:
|
72 |
|
73 |
# Total number of tokens to train on (type: Optional[int], default: 3000000000000)
|
74 |
-
max_tokens:
|
75 |
|
76 |
# Limits the number of optimizer steps to run. (type: Optional[int], default: null)
|
77 |
max_steps:
|
|
|
46 |
class_path: LitData
|
47 |
|
48 |
init_args:
|
49 |
+
data_path: "../core-data-0-0-1073741824-1025-16000/"
|
50 |
num_workers: 32
|
51 |
|
52 |
# Training-related arguments. See ``litgpt.args.TrainArgs`` for details
|
|
|
65 |
micro_batch_size: 8
|
66 |
|
67 |
# Number of iterations with learning rate warmup active (type: int, default: 2000)
|
68 |
+
lr_warmup_steps: 2000
|
69 |
|
70 |
# Number of epochs to train on (type: Optional[int], default: null)
|
71 |
epochs:
|
72 |
|
73 |
# Total number of tokens to train on (type: Optional[int], default: 3000000000000)
|
74 |
+
max_tokens: 5275285500
|
75 |
|
76 |
# Limits the number of optimizer steps to run. (type: Optional[int], default: null)
|
77 |
max_steps:
|