Safetensors
patchtst
abao commited on
Commit
5a7805b
·
verified ·
1 Parent(s): 7eb100a

Upload 3 files

Browse files
Files changed (3) hide show
  1. config.json +59 -0
  2. model.safetensors +3 -0
  3. training_info.json +392 -0
config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu",
3
+ "architectures": [
4
+ "PatchTSTForPretraining"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bias": true,
8
+ "channel_attention": true,
9
+ "channel_consistent_masking": false,
10
+ "context_length": 512,
11
+ "channel_rope": true,
12
+ "d_model": 512,
13
+ "distribution_output": "student_t",
14
+ "do_mask_input": true,
15
+ "dropout": 0.0,
16
+ "ff_dropout": 0.0,
17
+ "ffn_dim": 512,
18
+ "head_dropout": 0.0,
19
+ "huber_delta": 1.0,
20
+ "init_std": 0.02,
21
+ "loss": "mse",
22
+ "mask_type": "random",
23
+ "mask_value": 0,
24
+ "max_wavelength": 500,
25
+ "mode": "pretrain",
26
+ "model_type": "patchtst",
27
+ "norm_eps": 1e-05,
28
+ "norm_type": "rmsnorm",
29
+ "num_attention_heads": 8,
30
+ "num_forecast_mask_patches": 3,
31
+ "num_hidden_layers": 8,
32
+ "num_input_channels": 1,
33
+ "num_parallel_samples": 100,
34
+ "num_poly_feats": 256,
35
+ "num_rff": 496,
36
+ "num_targets": 1,
37
+ "output_range": null,
38
+ "patch_length": 16,
39
+ "patch_stride": 16,
40
+ "path_dropout": 0.0,
41
+ "poly_feat_degree": 2,
42
+ "pooling_type": "max",
43
+ "positional_dropout": 0.0,
44
+ "positional_encoding_type": "sincos",
45
+ "pre_norm": true,
46
+ "prediction_length": 128,
47
+ "pretrained_encoder_path": null,
48
+ "random_mask_ratio": 0.5,
49
+ "rff_trainable": true,
50
+ "rope_percent": 0.75,
51
+ "scaling": "std",
52
+ "share_embedding": true,
53
+ "share_projection": true,
54
+ "torch_dtype": "float32",
55
+ "transformers_version": "4.40.1",
56
+ "unmasked_channel_indices": null,
57
+ "use_cls_token": false,
58
+ "use_dynamics_embedding": false
59
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddea56eca94b35fd64d983e0e4eaf02dd1ab5a0967859182531d25f21c4be168
3
+ size 84188936
training_info.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "mode": "pretrain",
4
+ "context_length": 512,
5
+ "prediction_length": 128,
6
+ "distribution_output": "student_t",
7
+ "loss": "mse",
8
+ "huber_delta": 1.0,
9
+ "patch_length": 16,
10
+ "patch_stride": 16,
11
+ "num_hidden_layers": 8,
12
+ "d_model": 512,
13
+ "num_attention_heads": 8,
14
+ "channel_attention": true,
15
+ "ffn_dim": 512,
16
+ "norm_type": "rmsnorm",
17
+ "norm_eps": 1e-05,
18
+ "attention_dropout": 0.0,
19
+ "positional_dropout": 0.0,
20
+ "path_dropout": 0.0,
21
+ "ff_dropout": 0.0,
22
+ "bias": true,
23
+ "activation_function": "gelu",
24
+ "pre_norm": true,
25
+ "use_cls_token": false,
26
+ "init_std": 0.02,
27
+ "scaling": "std",
28
+ "do_mask_input": null,
29
+ "mask_type": "random",
30
+ "random_mask_ratio": 0.5,
31
+ "num_forecast_mask_patches": 3,
32
+ "channel_consistent_masking": false,
33
+ "unmasked_channel_indices": null,
34
+ "mask_value": 0,
35
+ "pooling_type": "max",
36
+ "head_dropout": 0.0,
37
+ "num_parallel_samples": 100,
38
+ "max_wavelength": 500,
39
+ "rope_percent": 0.75,
40
+ "pretrained_encoder_path": null,
41
+ "use_dynamics_embedding": false,
42
+ "num_poly_feats": 256,
43
+ "poly_feat_degree": 2,
44
+ "rff_trainable": true,
45
+ "num_rff": 496
46
+ },
47
+ "train_config": {
48
+ "seed": 99,
49
+ "max_steps": 200000,
50
+ "save_steps": 50000,
51
+ "log_steps": 1000,
52
+ "per_device_train_batch_size": 1024,
53
+ "gradient_accumulation_steps": 1,
54
+ "max_grad_norm": 1.0,
55
+ "dataloader_num_workers": 16,
56
+ "dataloader_prefetch_factor": 2,
57
+ "tf32": false,
58
+ "torch_compile": true,
59
+ "optim": "adamw_torch_fused",
60
+ "learning_rate": 0.001,
61
+ "lr_scheduler_type": "cosine",
62
+ "warmup_ratio": 0.1,
63
+ "weight_decay": 0.0,
64
+ "output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
65
+ "ddp_backend": "nccl",
66
+ "ddp_find_unused_parameters": false,
67
+ "remove_unused_columns": false
68
+ },
69
+ "all_config": {
70
+ "run_name": "mlm_stand_chattn_noembed",
71
+ "wandb": {
72
+ "log": true,
73
+ "project_name": "dystformer",
74
+ "entity": "gilpinlab",
75
+ "group_name": "fine-tuning",
76
+ "resume": false,
77
+ "tags": null
78
+ },
79
+ "patchtst": {
80
+ "mode": "pretrain",
81
+ "context_length": 512,
82
+ "prediction_length": 128,
83
+ "distribution_output": "student_t",
84
+ "loss": "mse",
85
+ "huber_delta": 1.0,
86
+ "patch_length": 16,
87
+ "patch_stride": 16,
88
+ "num_hidden_layers": 8,
89
+ "d_model": 512,
90
+ "num_attention_heads": 8,
91
+ "channel_attention": true,
92
+ "ffn_dim": 512,
93
+ "norm_type": "rmsnorm",
94
+ "norm_eps": 1e-05,
95
+ "attention_dropout": 0.0,
96
+ "positional_dropout": 0.0,
97
+ "path_dropout": 0.0,
98
+ "ff_dropout": 0.0,
99
+ "bias": true,
100
+ "activation_function": "gelu",
101
+ "pre_norm": true,
102
+ "use_cls_token": false,
103
+ "init_std": 0.02,
104
+ "scaling": "std",
105
+ "do_mask_input": null,
106
+ "mask_type": "random",
107
+ "random_mask_ratio": 0.5,
108
+ "num_forecast_mask_patches": 3,
109
+ "channel_consistent_masking": false,
110
+ "unmasked_channel_indices": null,
111
+ "mask_value": 0,
112
+ "pooling_type": "max",
113
+ "head_dropout": 0.0,
114
+ "num_parallel_samples": 100,
115
+ "max_wavelength": 500,
116
+ "rope_percent": 0.75,
117
+ "pretrained_encoder_path": null,
118
+ "use_dynamics_embedding": false,
119
+ "num_poly_feats": 256,
120
+ "poly_feat_degree": 2,
121
+ "rff_trainable": true,
122
+ "num_rff": 496
123
+ },
124
+ "chronos": {
125
+ "model_id": "amazon/chronos-t5-mini",
126
+ "model_type": "seq2seq",
127
+ "random_init": false,
128
+ "tie_embeddings": true,
129
+ "context_length": 512,
130
+ "prediction_length": 64,
131
+ "num_samples": 20,
132
+ "n_tokens": 4096,
133
+ "n_special_tokens": 2,
134
+ "pad_token_id": 0,
135
+ "eos_token_id": 1,
136
+ "use_eos_token": true,
137
+ "tokenizer_class": "MeanScaleUniformBins",
138
+ "tokenizer_kwargs": {
139
+ "low_limit": -15.0,
140
+ "high_limit": 15.0
141
+ },
142
+ "temperature": 1.0,
143
+ "top_k": 50,
144
+ "top_p": 1.0
145
+ },
146
+ "train": {
147
+ "seed": 99,
148
+ "max_steps": 200000,
149
+ "save_steps": 50000,
150
+ "log_steps": 1000,
151
+ "per_device_train_batch_size": 1024,
152
+ "gradient_accumulation_steps": 1,
153
+ "max_grad_norm": 1.0,
154
+ "dataloader_num_workers": 16,
155
+ "dataloader_prefetch_factor": 2,
156
+ "tf32": false,
157
+ "torch_compile": true,
158
+ "optim": "adamw_torch_fused",
159
+ "learning_rate": 0.001,
160
+ "lr_scheduler_type": "cosine",
161
+ "warmup_ratio": 0.1,
162
+ "weight_decay": 0.0,
163
+ "output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
164
+ "ddp_backend": "nccl",
165
+ "ddp_find_unused_parameters": false,
166
+ "remove_unused_columns": false
167
+ },
168
+ "scheduler": {
169
+ "enabled": false,
170
+ "schedule_value_name": "noise_scale",
171
+ "schedule_name": "cosine",
172
+ "epoch_stop": 0.5,
173
+ "init_value": 1.0,
174
+ "final_value": 0.0,
175
+ "eps": 0.008,
176
+ "num_steps": 4,
177
+ "decay_rate": 8.0
178
+ },
179
+ "eval": {
180
+ "mode": "predict",
181
+ "data_path": "/stor/work/AMDG_Gilpin_Summer2024/data/test/",
182
+ "checkpoint_path": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints",
183
+ "device": "cuda:7",
184
+ "torch_dtype": "float32",
185
+ "batch_size": 32,
186
+ "num_systems": 10,
187
+ "sliding_context": false,
188
+ "metric_names": [
189
+ "mse",
190
+ "mae",
191
+ "smape",
192
+ "r2_score",
193
+ "spearman"
194
+ ],
195
+ "forecast_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/forecasts",
196
+ "labels_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/labels",
197
+ "completions_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/completions",
198
+ "patch_input_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/patch_input",
199
+ "timestep_masks_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/timestep_masks",
200
+ "metrics_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/metrics",
201
+ "metrics_fname": "metrics.json",
202
+ "overwrite": false,
203
+ "seed": 42,
204
+ "parallel_sample_reduction": "mean",
205
+ "limit_prediction_length": true,
206
+ "prediction_length": 64,
207
+ "num_test_instances": 1,
208
+ "window_style": "sampled",
209
+ "window_stride": 1,
210
+ "split_coords": false,
211
+ "verbose": false,
212
+ "use_channel_sampler": false,
213
+ "channel_sampler": {
214
+ "num_channels": 3,
215
+ "num_samples": 2
216
+ }
217
+ },
218
+ "run_metrics": {
219
+ "wandb_run_id": null,
220
+ "plot_dir": "figs",
221
+ "save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/run_metrics",
222
+ "save_fname": "metrics.json"
223
+ },
224
+ "train_data_dirs": [
225
+ "/stor/work/AMDG_Gilpin_Summer2024/data/final_skew40/train",
226
+ "/stor/work/AMDG_Gilpin_Summer2024/data/final_skew40/train_z5_z10",
227
+ "/stor/work/AMDG_Gilpin_Summer2024/data/final_base40/train",
228
+ "/stor/work/AMDG_Gilpin_Summer2024/data/final_base40/train_z5_z10"
229
+ ],
230
+ "extra_train_data_paths": null,
231
+ "probability": null,
232
+ "shuffle_buffer_length": 100000,
233
+ "min_past": 60,
234
+ "max_missing_prop": 0.9,
235
+ "fixed_dim": 3,
236
+ "augmentations": {
237
+ "augmentation_rate": 0.2,
238
+ "probabilities": [
239
+ 0.3333333333333333,
240
+ 0.3333333333333333,
241
+ 0.3333333333333333,
242
+ 0.0,
243
+ 0.0
244
+ ],
245
+ "dim_range": [
246
+ 3,
247
+ 8
248
+ ],
249
+ "lag_range": [
250
+ 1,
251
+ 10
252
+ ],
253
+ "phase_surrogate_cutoff": 1.0,
254
+ "mode_range": [
255
+ 5,
256
+ 15
257
+ ],
258
+ "max_wavenumber": 10.0,
259
+ "max_amp": 10.0
260
+ },
261
+ "sampling": {
262
+ "data_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/",
263
+ "sys_class": "continuous_no_delay",
264
+ "test_split": 0.3,
265
+ "split_prefix": null,
266
+ "pairs_rseed": 123,
267
+ "rseed": 999,
268
+ "num_points": 4096,
269
+ "num_periods": 40,
270
+ "num_periods_min": 20,
271
+ "num_periods_max": 60,
272
+ "num_ics": 1,
273
+ "num_param_perturbations": 4,
274
+ "param_scale": 0.5,
275
+ "split_coords": false,
276
+ "standardize": false,
277
+ "verbose": false,
278
+ "multiprocessing": true,
279
+ "debug_system": null,
280
+ "silence_integration_errors": false,
281
+ "save_params": true,
282
+ "save_traj_stats": false,
283
+ "ignore_probability": 0.0,
284
+ "sign_match_probability": 0.5,
285
+ "atol": 1e-10,
286
+ "rtol": 1e-09,
287
+ "reference_traj": {
288
+ "length": 4096,
289
+ "transient": 0.5,
290
+ "n_periods": 40,
291
+ "atol": 1e-07,
292
+ "rtol": 1e-06
293
+ }
294
+ },
295
+ "validator": {
296
+ "enable": true,
297
+ "verbose": false,
298
+ "transient_time_frac": 0.05,
299
+ "plot_save_dir": null,
300
+ "save_failed_trajs": false
301
+ },
302
+ "events": {
303
+ "max_duration": 300,
304
+ "instability_threshold": 10000.0,
305
+ "min_step": 1e-10,
306
+ "verbose": true
307
+ },
308
+ "skew": {
309
+ "num_pairs": 5000,
310
+ "normalization_strategy": "flow_rms",
311
+ "randomize_driver_indices": true,
312
+ "transform_scales": true,
313
+ "train_nonskew_path": null,
314
+ "test_nonskew_path": null,
315
+ "coupling_map_type": "additive",
316
+ "coupling_map": {
317
+ "transform_scales": false,
318
+ "randomize_driver_indices": true,
319
+ "normalization_strategy": "flow_rms",
320
+ "random_seed": 0
321
+ }
322
+ },
323
+ "analysis": {
324
+ "data_dir": "/stor/work/AMDG_Gilpin_Summer2024/data",
325
+ "split": "copy/final_skew40/train",
326
+ "num_samples": 1,
327
+ "one_dim_target": false,
328
+ "save_dir": "outputs",
329
+ "plots_dir": "figures",
330
+ "compute_quantile_limits": false,
331
+ "compute_max_lyapunov_exponents": false,
332
+ "filter_ensemble": true,
333
+ "filter_json_fname": "failed_samples",
334
+ "verbose": true,
335
+ "attractor_tests": [
336
+ "check_zero_one_test"
337
+ ],
338
+ "check_not_transient": {
339
+ "max_transient_prop": 0.2,
340
+ "atol": 0.001
341
+ },
342
+ "check_stationarity": {
343
+ "p_value": 0.05
344
+ },
345
+ "check_boundedness": {
346
+ "threshold": 10000.0,
347
+ "max_zscore": 5,
348
+ "eps": 1e-10
349
+ },
350
+ "check_zero_one_test": {
351
+ "threshold": 0.2,
352
+ "strategy": "score"
353
+ }
354
+ }
355
+ },
356
+ "job_info": {
357
+ "cuda_available": true,
358
+ "device_count": 4,
359
+ "device_names": {
360
+ "0": "AMD Instinct MI100",
361
+ "1": "AMD Instinct MI100",
362
+ "2": "AMD Instinct MI100",
363
+ "3": "AMD Instinct MI100"
364
+ },
365
+ "mem_info": {
366
+ "0": [
367
+ 4627103744,
368
+ 34342961152
369
+ ],
370
+ "1": [
371
+ 4617928704,
372
+ 34342961152
373
+ ],
374
+ "2": [
375
+ 4626317312,
376
+ 34342961152
377
+ ],
378
+ "3": [
379
+ 4628414464,
380
+ 34342961152
381
+ ]
382
+ },
383
+ "torchelastic_launched": true,
384
+ "world_size": 4,
385
+ "python_version": "3.11.9 (main, Apr 19 2024, 16:48:06) [GCC 11.2.0]",
386
+ "torch_version": "2.2.2+rocm5.7",
387
+ "numpy_version": "1.26.4",
388
+ "gluonts_version": "0.15.1",
389
+ "transformers_version": "4.40.1",
390
+ "accelerate_version": "0.34.2"
391
+ }
392
+ }