DatPySci commited on
Commit
93bdafb
·
verified ·
1 Parent(s): 18944b0

Delete files models/OLMo-1B/wandb/ with huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. models/OLMo-1B/wandb/wandb/debug-internal.log +0 -13
  2. models/OLMo-1B/wandb/wandb/debug.log +0 -26
  3. models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/files/config.yaml +0 -575
  4. models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/files/output.log +0 -144
  5. models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/files/requirements.txt +0 -271
  6. models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/files/wandb-metadata.json +0 -103
  7. models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/files/wandb-summary.json +0 -1
  8. models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/logs/debug-internal.log +0 -11
  9. models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/logs/debug.log +0 -23
  10. models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/run-8qjie2ty.wandb +0 -0
  11. models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/files/config.yaml +0 -575
  12. models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/files/output.log +0 -241
  13. models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/files/requirements.txt +0 -271
  14. models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/files/wandb-metadata.json +0 -103
  15. models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/files/wandb-summary.json +0 -1
  16. models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/logs/debug-internal.log +0 -11
  17. models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/logs/debug.log +0 -23
  18. models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/run-kgru6t23.wandb +0 -0
  19. models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/files/config.yaml +0 -575
  20. models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/files/output.log +0 -241
  21. models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/files/requirements.txt +0 -271
  22. models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/files/wandb-metadata.json +0 -103
  23. models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/files/wandb-summary.json +0 -1
  24. models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/logs/debug-internal.log +0 -11
  25. models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/logs/debug.log +0 -23
  26. models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/run-c0ptjfpp.wandb +0 -0
  27. models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/files/config.yaml +0 -579
  28. models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/files/output.log +0 -71
  29. models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/files/requirements.txt +0 -271
  30. models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/files/wandb-metadata.json +0 -103
  31. models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/files/wandb-summary.json +0 -1
  32. models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/logs/debug-internal.log +0 -12
  33. models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/logs/debug.log +0 -26
  34. models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/run-i0ugjt0v.wandb +0 -0
  35. models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/files/config.yaml +0 -579
  36. models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/files/output.log +0 -78
  37. models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/files/requirements.txt +0 -271
  38. models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/files/wandb-metadata.json +0 -103
  39. models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/files/wandb-summary.json +0 -1
  40. models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/logs/debug-internal.log +0 -12
  41. models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/logs/debug.log +0 -26
  42. models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/run-z9xp3525.wandb +0 -0
  43. models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/files/config.yaml +0 -575
  44. models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/files/output.log +0 -35
  45. models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/files/requirements.txt +0 -271
  46. models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/files/wandb-metadata.json +0 -103
  47. models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/files/wandb-summary.json +0 -1
  48. models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/logs/debug-internal.log +0 -11
  49. models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/logs/debug.log +0 -23
  50. models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/run-2hopup81.wandb +0 -0
models/OLMo-1B/wandb/wandb/debug-internal.log DELETED
@@ -1,13 +0,0 @@
1
- {"time":"2025-12-19T08:35:08.828028876+01:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
- {"time":"2025-12-19T08:35:09.083424379+01:00","level":"INFO","msg":"stream: created new stream","id":"zvk9qxz6"}
3
- {"time":"2025-12-19T08:35:09.084141654+01:00","level":"INFO","msg":"handler: started","stream_id":"zvk9qxz6"}
4
- {"time":"2025-12-19T08:35:09.086154012+01:00","level":"INFO","msg":"stream: started","id":"zvk9qxz6"}
5
- {"time":"2025-12-19T08:35:09.086168743+01:00","level":"INFO","msg":"writer: started","stream_id":"zvk9qxz6"}
6
- {"time":"2025-12-19T08:35:09.086173112+01:00","level":"INFO","msg":"sender: started","stream_id":"zvk9qxz6"}
7
- {"time":"2025-12-20T01:19:49.769458459+01:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/marksmans/olmo-debug/zvk9qxz6/file_stream\": dial tcp: lookup api.wandb.ai on 127.0.0.53:53: read udp 127.0.0.1:51827->127.0.0.53:53: i/o timeout"}
8
- {"time":"2025-12-20T06:02:10.195866407+01:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
- {"time":"2025-12-20T06:02:10.390657483+01:00","level":"INFO","msg":"handler: operation stats","stats":{}}
10
- {"time":"2025-12-20T06:02:10.434932054+01:00","level":"INFO","msg":"stream: closing","id":"zvk9qxz6"}
11
- {"time":"2025-12-20T06:02:10.445057599+01:00","level":"INFO","msg":"handler: closed","stream_id":"zvk9qxz6"}
12
- {"time":"2025-12-20T06:02:10.457659306+01:00","level":"INFO","msg":"sender: closed","stream_id":"zvk9qxz6"}
13
- {"time":"2025-12-20T06:02:10.464136678+01:00","level":"INFO","msg":"stream: closed","id":"zvk9qxz6"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/debug.log DELETED
@@ -1,26 +0,0 @@
1
- 2025-12-19 08:35:08,585 INFO MainThread:264281 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
- 2025-12-19 08:35:08,587 INFO MainThread:264281 [wandb_setup.py:_flush():80] Configure stats pid to 264281
3
- 2025-12-19 08:35:08,587 INFO MainThread:264281 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
- 2025-12-19 08:35:08,588 INFO MainThread:264281 [wandb_setup.py:_flush():80] Loading settings from /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/wandb/settings
5
- 2025-12-19 08:35:08,589 INFO MainThread:264281 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
- 2025-12-19 08:35:08,589 INFO MainThread:264281 [wandb_init.py:setup_run_log_directory():714] Logging user logs to checkpoints/OLMo-1B-as_fm3_omi2/wandb/wandb/run-20251219_083508-zvk9qxz6/logs/debug.log
7
- 2025-12-19 08:35:08,590 INFO MainThread:264281 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to checkpoints/OLMo-1B-as_fm3_omi2/wandb/wandb/run-20251219_083508-zvk9qxz6/logs/debug-internal.log
8
- 2025-12-19 08:35:08,591 INFO MainThread:264281 [wandb_init.py:init():841] calling init triggers
9
- 2025-12-19 08:35:08,591 INFO MainThread:264281 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
- config: {'run_name': 'OLMo-1B-as_fm3_omi2', 'seed': 6198, 'epoch': None, 'dry_run': False, 'model': {'d_model': 2048, 'n_heads': 16, 'n_kv_heads': None, 'clip_qkv': None, 'n_layers': 16, 'mlp_ratio': 8, 'mlp_hidden_size': None, 'activation_type': 'swiglu', 'block_type': 'sequential', 'block_group_size': 1, 'alibi': False, 'alibi_bias_max': 8.0, 'rope': True, 'rope_full_precision': True, 'rope_theta': 10000, 'flash_attention': False, 'attention_dropout': 0.0, 'multi_query_attention': False, 'attention_layer_norm': False, 'residual_dropout': 0.0, 'embedding_dropout': 0.0, 'embedding_layer_norm': False, 'layer_norm_type': 'default', 'layer_norm_with_affine': False, 'layer_norm_eps': 1e-05, 'attention_layer_norm_with_affine': False, 'max_sequence_length': 2048, 'include_bias': False, 'bias_for_layer_norm': False, 'scale_logits': False, 'vocab_size': 32000, 'embedding_size': 32000, 'weight_tying': True, 'eos_token_id': 0, 'pad_token_id': 1, 'init_device': 'cuda', 'init_fn': 'normal', 'init_std': 0.02, 'init_cutoff_factor': 3.0, 'precision': 'amp_bf16', 'scale_emb_init': False, 'emb_init_std': None, 'norm_after': False}, 'optimizer': {'name': 'adamw', 'learning_rate': 0.0005, 'weight_decay': 0.1, 'betas': (0.9, 0.95), 'eps': 1e-08, 'no_decay_norm_and_bias': None, 'selective_updates': False, 'decay_norm_and_bias': True, 'decay_embeddings': True, 'metrics_log_interval': 10, 'record_update_metrics': False}, 'scheduler': {'name': 'cosine_with_warmup', 'units': 'steps', 't_warmup': 2000, 't_max': None, 'alpha_f': 0.1, 'grad_clip_warmup_steps': None, 'grad_clip_warmup_factor': None, 'warmup_min_lr': 0.0}, 'data': {'paths': ['data_token/as_fm3_omi2/algebraic-stack-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00055_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00056_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00057_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00058_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00059_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00060_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00061_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00062_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00063_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00064_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00065_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00066_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00067_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00068_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00069_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00070_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00071_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00072_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00073_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00074_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00075_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00076_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00077_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00078_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00017_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00018_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00055_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00056_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00057_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00058_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00059_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00060_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00061_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00062_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00063_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00064_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00065_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00066_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00067_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00068_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00069_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00070_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00071_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00072_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00073_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00074_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00075_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00076_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00077_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00078_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00079_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00080_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00081_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00082_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00083_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00084_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00085_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00086_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00087_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00088_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00089_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00090_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00091_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00092_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00093_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00094_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00095_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00096_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00097_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00098_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00099_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct1-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00017_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00018_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/tinygsm-tokenized/00016_00000_doc_shuffled.ds'], 'memmap_dtype': 'uint16', 'datasets': None, 'label_mask_paths': None, 'pad_direction': 'right', 'generate_attention_mask': False, 'generate_doc_lengths': False, 'num_workers': 32, 'drop_last': True, 'pin_memory': True, 'prefetch_factor': 8, 'persistent_workers': True, 'timeout': 0, 'seed': None, 'instance_filter': None, 'custom_dataset': None}, 'restore_dataloader': True, 'fast_forward_batches': None, 'evaluators': [], 'eval_interval': 5000, 'tokenizer': {'identifier': 'meta-llama/Llama-2-7b-hf', 'truncate_direction': 'right'}, 'save_folder': 'checkpoints/OLMo-1B-as_fm3_omi2', 'remote_save_folder': None, 'canceled_check_interval': 6000, 'save_interval': 3000, 'save_interval_unsharded': 3000, 'save_interval_ephemeral': None, 'save_num_checkpoints_to_keep': -1, 'save_num_unsharded_checkpoints_to_keep': -1, 'save_overwrite': True, 'force_save_unsharded': False, 'no_pre_train_checkpoint': False, 'load_path': 'checkpoints/OLMo-1B-as_fm3_omi2/step51000-unsharded', 'load_path_sharded_checkpointer': None, 'try_load_latest_save': False, 'reset_optimizer_state': False, 'reset_trainer_state': False, 'sharded_checkpointer': 'torch_legacy', 'new_style_checkpoints': None, 'max_duration': '1ep', 'global_train_batch_size': 512, 'device_train_batch_size': 128, 'device_train_microbatch_size': 16, 'device_eval_batch_size': 16, 'eval_subset_num_batches': -1, 'eval_on_load': False, 'device_train_grad_accum': 8, 'max_grad_norm': 1.0, 'max_grad_norm_ratio': None, 'precision': 'amp_bf16', 'speed_monitor': {'window_size': 20, 'gpu_flops_available': None}, 'console_log_interval': 1, 'gen1_gc_interval': 1, 'compile': None, 'distributed_strategy': 'fsdp', 'fsdp': {'use_orig_params': True, 'sharding_strategy': <ShardingStrategy.FULL_SHARD: 1>, 'wrapping_strategy': None, 'precision': 'mixed', 'hybrid_sharding_num_model_replicas': None}, 'ddp': {'grad_sync_mode': 'batch', 'find_unused_params': False}, 'single': {'device': 'auto'}, 'softmax_auxiliary_loss': False, 'auxiliary_loss_multiplier': 0.0001, 'time_limit': None, 'extra_steps_after_cancel': 10, 'early_stopping_factor': None, 'save_data_indices': True, 'python_profiling': False, 'torch_profiling': False, 'stop_at': None, 'stop_after': None, 'activation_checkpointing': None, 'fused_loss': None, 'hf_datasets_cache_dir': None, 'module_outputs_save_steps': None, '_wandb': {}}
11
- 2025-12-19 08:35:08,592 INFO MainThread:264281 [wandb_init.py:init():889] starting backend
12
- 2025-12-19 08:35:08,822 INFO MainThread:264281 [wandb_init.py:init():892] sending inform_init request
13
- 2025-12-19 08:35:08,826 INFO MainThread:264281 [wandb_init.py:init():900] backend started and connected
14
- 2025-12-19 08:35:08,829 INFO MainThread:264281 [wandb_init.py:init():970] updated telemetry
15
- 2025-12-19 08:35:08,830 INFO MainThread:264281 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
- 2025-12-19 08:35:09,331 INFO MainThread:264281 [wandb_init.py:init():1041] starting run threads in backend
17
- 2025-12-19 08:35:09,390 INFO MainThread:264281 [wandb_run.py:_console_start():2521] atexit reg
18
- 2025-12-19 08:35:09,391 INFO MainThread:264281 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
- 2025-12-19 08:35:09,391 INFO MainThread:264281 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
- 2025-12-19 08:35:09,392 INFO MainThread:264281 [wandb_run.py:_redirect():2461] Redirects installed.
21
- 2025-12-19 08:35:09,394 INFO MainThread:264281 [wandb_init.py:init():1081] run started, returning control to user process
22
- 2025-12-20 06:02:08,253 INFO MainThread:264281 [wandb_run.py:_finish():2287] finishing run marksmans/olmo-debug/zvk9qxz6
23
- 2025-12-20 06:02:08,293 INFO MainThread:264281 [wandb_run.py:_atexit_cleanup():2486] got exitcode: 0
24
- 2025-12-20 06:02:08,312 INFO MainThread:264281 [wandb_run.py:_restore():2468] restore
25
- 2025-12-20 06:02:08,315 INFO MainThread:264281 [wandb_run.py:_restore():2474] restore done
26
- 2025-12-20 06:02:10,416 INFO MainThread:264281 [wandb_run.py:_footer_sync_info():3862] logging synced files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/files/config.yaml DELETED
@@ -1,575 +0,0 @@
1
- _wandb:
2
- value:
3
- cli_version: 0.23.1
4
- e:
5
- gat9u7yjnpjadjdd0jcwog0os0b9blu3:
6
- args:
7
- - pretraining/configs/RL-1B.yaml
8
- codePath: OLMo/scripts/train.py
9
- codePathLocal: OLMo/scripts/train.py
10
- cpu_count: 112
11
- cpu_count_logical: 224
12
- cudaVersion: "13.0"
13
- disk:
14
- /:
15
- total: "2055141851136"
16
- used: "49254445056"
17
18
- executable: /opt/conda/bin/python
19
- gpu: NVIDIA H100 80GB HBM3
20
- gpu_count: 1
21
- gpu_nvidia:
22
- - architecture: Hopper
23
- cudaCores: 16896
24
- memoryTotal: "85520809984"
25
- name: NVIDIA H100 80GB HBM3
26
- uuid: GPU-4c999b2a-2578-9e62-0539-4b826d85fda8
27
- host: serv-3342
28
- memory:
29
- total: "2164176814080"
30
- os: Linux-6.8.0-1043-nvidia-x86_64-with-glibc2.35
31
- program: /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py
32
- python: CPython 3.11.11
33
- root: checkpoints/OLMo-1B-as_fm3_omi2/wandb
34
- slurm:
35
- cluster_name: pegasus
36
- conf: /etc/slurm/slurm.conf
37
- cpu_bind: quiet,mask_cpu:0x000000000001FE00000000000000000000000001FE00000000000000
38
- cpu_bind_list: 0x000000000001FE00000000000000000000000001FE00000000000000
39
- cpu_bind_type: 'mask_cpu:'
40
- cpu_bind_verbose: quiet
41
- cpus_on_node: "16"
42
- cpus_per_task: "16"
43
- distribution: cyclic
44
- gpus: "1"
45
- gpus_on_node: "1"
46
- gtids: "0"
47
- job_cpus_per_node: "16"
48
- job_end_time: "1765734524"
49
- job_gid: "8000"
50
- job_group: iml
51
- job_id: "2383756"
52
- job_name: bash
53
- job_nodelist: serv-3342
54
- job_num_nodes: "1"
55
- job_partition: H100
56
- job_qos: normal
57
- job_start_time: "1765720124"
58
- job_uid: "13262"
59
- job_user: nguyen
60
- jobid: "2383756"
61
- launch_node_ipaddr: 192.168.33.114
62
- localid: "0"
63
- mem_per_cpu: "16384"
64
- mpi_type: pmix
65
- nnodes: "1"
66
- nodeid: "0"
67
- nodelist: serv-3342
68
- nprocs: "1"
69
- ntasks: "1"
70
- oom_kill_step: "0"
71
- pmix_mapping_serv: (vector,(0,1,1))
72
- pmixp_abort_agent_port: "33735"
73
- prio_process: "1"
74
- procid: "0"
75
- pty_port: "45219"
76
- pty_win_col: "156"
77
- pty_win_row: "41"
78
- srun_comm_host: 192.168.33.114
79
- srun_comm_port: "35153"
80
- step_gpus: "5"
81
- step_id: "0"
82
- step_launcher_port: "35153"
83
- step_nodelist: serv-3342
84
- step_num_nodes: "1"
85
- step_num_tasks: "1"
86
- step_tasks_per_node: "1"
87
- stepid: "0"
88
- submit_dir: /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain
89
- submit_host: login1
90
- task_pid: "3684902"
91
- tasks_per_node: "1"
92
- topology_addr: serv-3342
93
- topology_addr_pattern: node
94
- tres_bind: gres/gpu:per_task:1
95
- tres_per_task: cpu=16,gres/gpu=1
96
- umask: "0022"
97
- startedAt: "2025-12-14T14:30:41.667717Z"
98
- writerId: gat9u7yjnpjadjdd0jcwog0os0b9blu3
99
- m: []
100
- python_version: 3.11.11
101
- t:
102
- "1":
103
- - 1
104
- - 5
105
- - 11
106
- - 41
107
- - 49
108
- - 51
109
- - 53
110
- "2":
111
- - 1
112
- - 5
113
- - 11
114
- - 41
115
- - 49
116
- - 51
117
- - 53
118
- "3":
119
- - 13
120
- - 15
121
- - 16
122
- "4": 3.11.11
123
- "5": 0.23.1
124
- "6": 4.57.3
125
- "12": 0.23.1
126
- "13": linux-x86_64
127
- activation_checkpointing:
128
- value: null
129
- auxiliary_loss_multiplier:
130
- value: 0.0001
131
- canceled_check_interval:
132
- value: 6000
133
- compile:
134
- value: null
135
- console_log_interval:
136
- value: 1
137
- data:
138
- value:
139
- custom_dataset: null
140
- datasets: null
141
- drop_last: true
142
- generate_attention_mask: false
143
- generate_doc_lengths: false
144
- instance_filter: null
145
- label_mask_paths: null
146
- memmap_dtype: uint16
147
- num_workers: 32
148
- pad_direction: right
149
- paths:
150
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00000_00000_doc_shuffled.ds
151
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00001_00000_doc_shuffled.ds
152
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00002_00000_doc_shuffled.ds
153
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00003_00000_doc_shuffled.ds
154
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00004_00000_doc_shuffled.ds
155
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00005_00000_doc_shuffled.ds
156
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds
157
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00007_00000_doc_shuffled.ds
158
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00008_00000_doc_shuffled.ds
159
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00009_00000_doc_shuffled.ds
160
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00010_00000_doc_shuffled.ds
161
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00011_00000_doc_shuffled.ds
162
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00012_00000_doc_shuffled.ds
163
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00016_00000_doc_shuffled.ds
164
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00019_00000_doc_shuffled.ds
165
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00020_00000_doc_shuffled.ds
166
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00021_00000_doc_shuffled.ds
167
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00022_00000_doc_shuffled.ds
168
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00023_00000_doc_shuffled.ds
169
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00024_00000_doc_shuffled.ds
170
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00025_00000_doc_shuffled.ds
171
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00026_00000_doc_shuffled.ds
172
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00027_00000_doc_shuffled.ds
173
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00028_00000_doc_shuffled.ds
174
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00029_00000_doc_shuffled.ds
175
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00030_00000_doc_shuffled.ds
176
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00031_00000_doc_shuffled.ds
177
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00032_00000_doc_shuffled.ds
178
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00033_00000_doc_shuffled.ds
179
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00034_00000_doc_shuffled.ds
180
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00035_00000_doc_shuffled.ds
181
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00036_00000_doc_shuffled.ds
182
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00037_00000_doc_shuffled.ds
183
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00038_00000_doc_shuffled.ds
184
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00039_00000_doc_shuffled.ds
185
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00040_00000_doc_shuffled.ds
186
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00041_00000_doc_shuffled.ds
187
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00042_00000_doc_shuffled.ds
188
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00043_00000_doc_shuffled.ds
189
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00044_00000_doc_shuffled.ds
190
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00045_00000_doc_shuffled.ds
191
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00046_00000_doc_shuffled.ds
192
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00047_00000_doc_shuffled.ds
193
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00048_00000_doc_shuffled.ds
194
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00049_00000_doc_shuffled.ds
195
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00050_00000_doc_shuffled.ds
196
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00051_00000_doc_shuffled.ds
197
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00052_00000_doc_shuffled.ds
198
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00053_00000_doc_shuffled.ds
199
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00054_00000_doc_shuffled.ds
200
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00055_00000_doc_shuffled.ds
201
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00056_00000_doc_shuffled.ds
202
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00057_00000_doc_shuffled.ds
203
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00058_00000_doc_shuffled.ds
204
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00059_00000_doc_shuffled.ds
205
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00060_00000_doc_shuffled.ds
206
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00061_00000_doc_shuffled.ds
207
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00062_00000_doc_shuffled.ds
208
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00063_00000_doc_shuffled.ds
209
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00064_00000_doc_shuffled.ds
210
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00065_00000_doc_shuffled.ds
211
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00066_00000_doc_shuffled.ds
212
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00067_00000_doc_shuffled.ds
213
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00068_00000_doc_shuffled.ds
214
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00069_00000_doc_shuffled.ds
215
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00070_00000_doc_shuffled.ds
216
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00071_00000_doc_shuffled.ds
217
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00072_00000_doc_shuffled.ds
218
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00073_00000_doc_shuffled.ds
219
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00074_00000_doc_shuffled.ds
220
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00075_00000_doc_shuffled.ds
221
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00076_00000_doc_shuffled.ds
222
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00077_00000_doc_shuffled.ds
223
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00078_00000_doc_shuffled.ds
224
- - data_token/as_fm3_omi2/finemath3-tokenized/00000_00000_doc_shuffled.ds
225
- - data_token/as_fm3_omi2/finemath3-tokenized/00001_00000_doc_shuffled.ds
226
- - data_token/as_fm3_omi2/finemath3-tokenized/00002_00000_doc_shuffled.ds
227
- - data_token/as_fm3_omi2/finemath3-tokenized/00003_00000_doc_shuffled.ds
228
- - data_token/as_fm3_omi2/finemath3-tokenized/00004_00000_doc_shuffled.ds
229
- - data_token/as_fm3_omi2/finemath3-tokenized/00005_00000_doc_shuffled.ds
230
- - data_token/as_fm3_omi2/finemath3-tokenized/00006_00000_doc_shuffled.ds
231
- - data_token/as_fm3_omi2/finemath3-tokenized/00007_00000_doc_shuffled.ds
232
- - data_token/as_fm3_omi2/finemath3-tokenized/00008_00000_doc_shuffled.ds
233
- - data_token/as_fm3_omi2/finemath3-tokenized/00009_00000_doc_shuffled.ds
234
- - data_token/as_fm3_omi2/finemath3-tokenized/00010_00000_doc_shuffled.ds
235
- - data_token/as_fm3_omi2/finemath3-tokenized/00011_00000_doc_shuffled.ds
236
- - data_token/as_fm3_omi2/finemath3-tokenized/00012_00000_doc_shuffled.ds
237
- - data_token/as_fm3_omi2/finemath3-tokenized/00013_00000_doc_shuffled.ds
238
- - data_token/as_fm3_omi2/finemath3-tokenized/00014_00000_doc_shuffled.ds
239
- - data_token/as_fm3_omi2/finemath3-tokenized/00015_00000_doc_shuffled.ds
240
- - data_token/as_fm3_omi2/finemath3-tokenized/00016_00000_doc_shuffled.ds
241
- - data_token/as_fm3_omi2/finemath3-tokenized/00017_00000_doc_shuffled.ds
242
- - data_token/as_fm3_omi2/finemath3-tokenized/00018_00000_doc_shuffled.ds
243
- - data_token/as_fm3_omi2/finemath3-tokenized/00019_00000_doc_shuffled.ds
244
- - data_token/as_fm3_omi2/finemath3-tokenized/00020_00000_doc_shuffled.ds
245
- - data_token/as_fm3_omi2/finemath3-tokenized/00021_00000_doc_shuffled.ds
246
- - data_token/as_fm3_omi2/finemath3-tokenized/00022_00000_doc_shuffled.ds
247
- - data_token/as_fm3_omi2/finemath3-tokenized/00023_00000_doc_shuffled.ds
248
- - data_token/as_fm3_omi2/finemath3-tokenized/00024_00000_doc_shuffled.ds
249
- - data_token/as_fm3_omi2/finemath3-tokenized/00025_00000_doc_shuffled.ds
250
- - data_token/as_fm3_omi2/finemath3-tokenized/00026_00000_doc_shuffled.ds
251
- - data_token/as_fm3_omi2/finemath3-tokenized/00027_00000_doc_shuffled.ds
252
- - data_token/as_fm3_omi2/finemath3-tokenized/00028_00000_doc_shuffled.ds
253
- - data_token/as_fm3_omi2/finemath3-tokenized/00029_00000_doc_shuffled.ds
254
- - data_token/as_fm3_omi2/finemath3-tokenized/00030_00000_doc_shuffled.ds
255
- - data_token/as_fm3_omi2/finemath3-tokenized/00031_00000_doc_shuffled.ds
256
- - data_token/as_fm3_omi2/finemath3-tokenized/00032_00000_doc_shuffled.ds
257
- - data_token/as_fm3_omi2/finemath3-tokenized/00033_00000_doc_shuffled.ds
258
- - data_token/as_fm3_omi2/finemath3-tokenized/00034_00000_doc_shuffled.ds
259
- - data_token/as_fm3_omi2/finemath3-tokenized/00035_00000_doc_shuffled.ds
260
- - data_token/as_fm3_omi2/finemath3-tokenized/00036_00000_doc_shuffled.ds
261
- - data_token/as_fm3_omi2/finemath3-tokenized/00037_00000_doc_shuffled.ds
262
- - data_token/as_fm3_omi2/finemath3-tokenized/00038_00000_doc_shuffled.ds
263
- - data_token/as_fm3_omi2/finemath3-tokenized/00039_00000_doc_shuffled.ds
264
- - data_token/as_fm3_omi2/finemath3-tokenized/00040_00000_doc_shuffled.ds
265
- - data_token/as_fm3_omi2/finemath3-tokenized/00041_00000_doc_shuffled.ds
266
- - data_token/as_fm3_omi2/finemath3-tokenized/00042_00000_doc_shuffled.ds
267
- - data_token/as_fm3_omi2/finemath3-tokenized/00043_00000_doc_shuffled.ds
268
- - data_token/as_fm3_omi2/finemath3-tokenized/00044_00000_doc_shuffled.ds
269
- - data_token/as_fm3_omi2/finemath3-tokenized/00045_00000_doc_shuffled.ds
270
- - data_token/as_fm3_omi2/finemath3-tokenized/00046_00000_doc_shuffled.ds
271
- - data_token/as_fm3_omi2/finemath3-tokenized/00047_00000_doc_shuffled.ds
272
- - data_token/as_fm3_omi2/finemath3-tokenized/00048_00000_doc_shuffled.ds
273
- - data_token/as_fm3_omi2/finemath3-tokenized/00049_00000_doc_shuffled.ds
274
- - data_token/as_fm3_omi2/finemath3-tokenized/00050_00000_doc_shuffled.ds
275
- - data_token/as_fm3_omi2/finemath3-tokenized/00051_00000_doc_shuffled.ds
276
- - data_token/as_fm3_omi2/finemath3-tokenized/00052_00000_doc_shuffled.ds
277
- - data_token/as_fm3_omi2/finemath3-tokenized/00053_00000_doc_shuffled.ds
278
- - data_token/as_fm3_omi2/finemath3-tokenized/00054_00000_doc_shuffled.ds
279
- - data_token/as_fm3_omi2/finemath3-tokenized/00055_00000_doc_shuffled.ds
280
- - data_token/as_fm3_omi2/finemath3-tokenized/00056_00000_doc_shuffled.ds
281
- - data_token/as_fm3_omi2/finemath3-tokenized/00057_00000_doc_shuffled.ds
282
- - data_token/as_fm3_omi2/finemath3-tokenized/00058_00000_doc_shuffled.ds
283
- - data_token/as_fm3_omi2/finemath3-tokenized/00059_00000_doc_shuffled.ds
284
- - data_token/as_fm3_omi2/finemath3-tokenized/00060_00000_doc_shuffled.ds
285
- - data_token/as_fm3_omi2/finemath3-tokenized/00061_00000_doc_shuffled.ds
286
- - data_token/as_fm3_omi2/finemath3-tokenized/00062_00000_doc_shuffled.ds
287
- - data_token/as_fm3_omi2/finemath3-tokenized/00063_00000_doc_shuffled.ds
288
- - data_token/as_fm3_omi2/finemath3-tokenized/00064_00000_doc_shuffled.ds
289
- - data_token/as_fm3_omi2/finemath3-tokenized/00065_00000_doc_shuffled.ds
290
- - data_token/as_fm3_omi2/finemath3-tokenized/00066_00000_doc_shuffled.ds
291
- - data_token/as_fm3_omi2/finemath3-tokenized/00067_00000_doc_shuffled.ds
292
- - data_token/as_fm3_omi2/finemath3-tokenized/00068_00000_doc_shuffled.ds
293
- - data_token/as_fm3_omi2/finemath3-tokenized/00069_00000_doc_shuffled.ds
294
- - data_token/as_fm3_omi2/finemath3-tokenized/00070_00000_doc_shuffled.ds
295
- - data_token/as_fm3_omi2/finemath3-tokenized/00071_00000_doc_shuffled.ds
296
- - data_token/as_fm3_omi2/finemath3-tokenized/00072_00000_doc_shuffled.ds
297
- - data_token/as_fm3_omi2/finemath3-tokenized/00073_00000_doc_shuffled.ds
298
- - data_token/as_fm3_omi2/finemath3-tokenized/00074_00000_doc_shuffled.ds
299
- - data_token/as_fm3_omi2/finemath3-tokenized/00075_00000_doc_shuffled.ds
300
- - data_token/as_fm3_omi2/finemath3-tokenized/00076_00000_doc_shuffled.ds
301
- - data_token/as_fm3_omi2/finemath3-tokenized/00077_00000_doc_shuffled.ds
302
- - data_token/as_fm3_omi2/finemath3-tokenized/00078_00000_doc_shuffled.ds
303
- - data_token/as_fm3_omi2/finemath3-tokenized/00079_00000_doc_shuffled.ds
304
- - data_token/as_fm3_omi2/finemath3-tokenized/00080_00000_doc_shuffled.ds
305
- - data_token/as_fm3_omi2/finemath3-tokenized/00081_00000_doc_shuffled.ds
306
- - data_token/as_fm3_omi2/finemath3-tokenized/00082_00000_doc_shuffled.ds
307
- - data_token/as_fm3_omi2/finemath3-tokenized/00083_00000_doc_shuffled.ds
308
- - data_token/as_fm3_omi2/finemath3-tokenized/00084_00000_doc_shuffled.ds
309
- - data_token/as_fm3_omi2/finemath3-tokenized/00085_00000_doc_shuffled.ds
310
- - data_token/as_fm3_omi2/finemath3-tokenized/00086_00000_doc_shuffled.ds
311
- - data_token/as_fm3_omi2/finemath3-tokenized/00087_00000_doc_shuffled.ds
312
- - data_token/as_fm3_omi2/finemath3-tokenized/00088_00000_doc_shuffled.ds
313
- - data_token/as_fm3_omi2/finemath3-tokenized/00089_00000_doc_shuffled.ds
314
- - data_token/as_fm3_omi2/finemath3-tokenized/00090_00000_doc_shuffled.ds
315
- - data_token/as_fm3_omi2/finemath3-tokenized/00091_00000_doc_shuffled.ds
316
- - data_token/as_fm3_omi2/finemath3-tokenized/00092_00000_doc_shuffled.ds
317
- - data_token/as_fm3_omi2/finemath3-tokenized/00093_00000_doc_shuffled.ds
318
- - data_token/as_fm3_omi2/finemath3-tokenized/00094_00000_doc_shuffled.ds
319
- - data_token/as_fm3_omi2/finemath3-tokenized/00095_00000_doc_shuffled.ds
320
- - data_token/as_fm3_omi2/finemath3-tokenized/00096_00000_doc_shuffled.ds
321
- - data_token/as_fm3_omi2/finemath3-tokenized/00097_00000_doc_shuffled.ds
322
- - data_token/as_fm3_omi2/finemath3-tokenized/00098_00000_doc_shuffled.ds
323
- - data_token/as_fm3_omi2/finemath3-tokenized/00099_00000_doc_shuffled.ds
324
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00000_00000_doc_shuffled.ds
325
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00001_00000_doc_shuffled.ds
326
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00002_00000_doc_shuffled.ds
327
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00003_00000_doc_shuffled.ds
328
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00004_00000_doc_shuffled.ds
329
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00005_00000_doc_shuffled.ds
330
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00006_00000_doc_shuffled.ds
331
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00007_00000_doc_shuffled.ds
332
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00008_00000_doc_shuffled.ds
333
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00009_00000_doc_shuffled.ds
334
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00010_00000_doc_shuffled.ds
335
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00011_00000_doc_shuffled.ds
336
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00012_00000_doc_shuffled.ds
337
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00013_00000_doc_shuffled.ds
338
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00014_00000_doc_shuffled.ds
339
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00015_00000_doc_shuffled.ds
340
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00016_00000_doc_shuffled.ds
341
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00017_00000_doc_shuffled.ds
342
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00018_00000_doc_shuffled.ds
343
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00019_00000_doc_shuffled.ds
344
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00020_00000_doc_shuffled.ds
345
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00021_00000_doc_shuffled.ds
346
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00022_00000_doc_shuffled.ds
347
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00023_00000_doc_shuffled.ds
348
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00024_00000_doc_shuffled.ds
349
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00025_00000_doc_shuffled.ds
350
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00026_00000_doc_shuffled.ds
351
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00027_00000_doc_shuffled.ds
352
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00028_00000_doc_shuffled.ds
353
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00029_00000_doc_shuffled.ds
354
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00030_00000_doc_shuffled.ds
355
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00031_00000_doc_shuffled.ds
356
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00032_00000_doc_shuffled.ds
357
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00033_00000_doc_shuffled.ds
358
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00034_00000_doc_shuffled.ds
359
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00035_00000_doc_shuffled.ds
360
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00036_00000_doc_shuffled.ds
361
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00037_00000_doc_shuffled.ds
362
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00038_00000_doc_shuffled.ds
363
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00039_00000_doc_shuffled.ds
364
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00040_00000_doc_shuffled.ds
365
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00041_00000_doc_shuffled.ds
366
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00042_00000_doc_shuffled.ds
367
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00043_00000_doc_shuffled.ds
368
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00044_00000_doc_shuffled.ds
369
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00045_00000_doc_shuffled.ds
370
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00046_00000_doc_shuffled.ds
371
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00047_00000_doc_shuffled.ds
372
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00048_00000_doc_shuffled.ds
373
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00049_00000_doc_shuffled.ds
374
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00050_00000_doc_shuffled.ds
375
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00051_00000_doc_shuffled.ds
376
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00052_00000_doc_shuffled.ds
377
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00053_00000_doc_shuffled.ds
378
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00054_00000_doc_shuffled.ds
379
- persistent_workers: true
380
- pin_memory: true
381
- prefetch_factor: 8
382
- seed: null
383
- timeout: 0
384
- ddp:
385
- value:
386
- find_unused_params: false
387
- grad_sync_mode: batch
388
- device_eval_batch_size:
389
- value: 16
390
- device_train_batch_size:
391
- value: 512
392
- device_train_grad_accum:
393
- value: 32
394
- device_train_microbatch_size:
395
- value: 16
396
- distributed_strategy:
397
- value: fsdp
398
- dry_run:
399
- value: false
400
- early_stopping_factor:
401
- value: null
402
- epoch:
403
- value: null
404
- eval_interval:
405
- value: 5000
406
- eval_on_load:
407
- value: false
408
- eval_subset_num_batches:
409
- value: -1
410
- evaluators:
411
- value: []
412
- extra_steps_after_cancel:
413
- value: 10
414
- fast_forward_batches:
415
- value: null
416
- force_save_unsharded:
417
- value: false
418
- fsdp:
419
- value:
420
- hybrid_sharding_num_model_replicas: null
421
- precision: mixed
422
- sharding_strategy: FULL_SHARD
423
- use_orig_params: true
424
- wrapping_strategy: null
425
- fused_loss:
426
- value: null
427
- gen1_gc_interval:
428
- value: 1
429
- global_train_batch_size:
430
- value: 512
431
- hf_datasets_cache_dir:
432
- value: null
433
- load_path:
434
- value: null
435
- load_path_sharded_checkpointer:
436
- value: null
437
- max_duration:
438
- value: 1ep
439
- max_grad_norm:
440
- value: 1
441
- max_grad_norm_ratio:
442
- value: null
443
- model:
444
- value:
445
- activation_type: swiglu
446
- alibi: false
447
- alibi_bias_max: 8
448
- attention_dropout: 0
449
- attention_layer_norm: false
450
- attention_layer_norm_with_affine: false
451
- bias_for_layer_norm: false
452
- block_group_size: 1
453
- block_type: sequential
454
- clip_qkv: null
455
- d_model: 2048
456
- emb_init_std: null
457
- embedding_dropout: 0
458
- embedding_layer_norm: false
459
- embedding_size: 32000
460
- eos_token_id: 0
461
- flash_attention: false
462
- include_bias: false
463
- init_cutoff_factor: 3
464
- init_device: cuda
465
- init_fn: normal
466
- init_std: 0.02
467
- layer_norm_eps: 1e-05
468
- layer_norm_type: default
469
- layer_norm_with_affine: false
470
- max_sequence_length: 2048
471
- mlp_hidden_size: null
472
- mlp_ratio: 8
473
- multi_query_attention: false
474
- n_heads: 16
475
- n_kv_heads: null
476
- n_layers: 16
477
- norm_after: false
478
- pad_token_id: 1
479
- precision: amp_bf16
480
- residual_dropout: 0
481
- rope: true
482
- rope_full_precision: true
483
- rope_theta: 10000
484
- scale_emb_init: false
485
- scale_logits: false
486
- vocab_size: 32000
487
- weight_tying: true
488
- module_outputs_save_steps:
489
- value: null
490
- new_style_checkpoints:
491
- value: null
492
- no_pre_train_checkpoint:
493
- value: false
494
- optimizer:
495
- value:
496
- betas:
497
- - 0.9
498
- - 0.95
499
- decay_embeddings: true
500
- decay_norm_and_bias: true
501
- eps: 1e-08
502
- learning_rate: 0.0005
503
- metrics_log_interval: 10
504
- name: adamw
505
- no_decay_norm_and_bias: null
506
- record_update_metrics: false
507
- selective_updates: false
508
- weight_decay: 0.1
509
- precision:
510
- value: amp_bf16
511
- python_profiling:
512
- value: false
513
- remote_save_folder:
514
- value: null
515
- reset_optimizer_state:
516
- value: false
517
- reset_trainer_state:
518
- value: false
519
- restore_dataloader:
520
- value: true
521
- run_name:
522
- value: OLMo-1B-as_fm3_omi2
523
- save_data_indices:
524
- value: true
525
- save_folder:
526
- value: checkpoints/OLMo-1B-as_fm3_omi2
527
- save_interval:
528
- value: 3000
529
- save_interval_ephemeral:
530
- value: null
531
- save_interval_unsharded:
532
- value: 3000
533
- save_num_checkpoints_to_keep:
534
- value: -1
535
- save_num_unsharded_checkpoints_to_keep:
536
- value: -1
537
- save_overwrite:
538
- value: true
539
- scheduler:
540
- value:
541
- alpha_f: 0.1
542
- grad_clip_warmup_factor: null
543
- grad_clip_warmup_steps: null
544
- name: cosine_with_warmup
545
- t_max: null
546
- t_warmup: 2000
547
- units: steps
548
- warmup_min_lr: 0
549
- seed:
550
- value: 6198
551
- sharded_checkpointer:
552
- value: torch_legacy
553
- single:
554
- value:
555
- device: auto
556
- softmax_auxiliary_loss:
557
- value: false
558
- speed_monitor:
559
- value:
560
- gpu_flops_available: null
561
- window_size: 20
562
- stop_after:
563
- value: null
564
- stop_at:
565
- value: null
566
- time_limit:
567
- value: null
568
- tokenizer:
569
- value:
570
- identifier: meta-llama/Llama-2-7b-hf
571
- truncate_direction: right
572
- torch_profiling:
573
- value: false
574
- try_load_latest_save:
575
- value: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/files/output.log DELETED
@@ -1,144 +0,0 @@
1
- Traceback (most recent call last):
2
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py", line 436, in <module>
3
- main(cfg)
4
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py", line 132, in main
5
- train_loader = build_train_dataloader(cfg)
6
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^
7
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/__init__.py", line 156, in build_train_dataloader
8
- dataset = IterableDataset(
9
- ^^^^^^^^^^^^^^^^
10
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/iterable_dataset.py", line 57, in __init__
11
- if self.drop_last and len(self.dataset) % self.world_size != 0: # type: ignore[arg-type]
12
- ^^^^^^^^^^^^^^^^^
13
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/memmap_dataset.py", line 176, in __len__
14
- self._num_instances = self.offsets[-1][1]
15
- ^^^^^^^^^^^^
16
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/memmap_dataset.py", line 138, in offsets
17
- path, length = future.result()
18
- ^^^^^^^^^^^^^^^
19
- File "/opt/conda/lib/python3.11/concurrent/futures/_base.py", line 449, in result
20
- return self.__get_result()
21
- ^^^^^^^^^^^^^^^^^^^
22
- File "/opt/conda/lib/python3.11/concurrent/futures/_base.py", line 401, in __get_result
23
- raise self._exception
24
- File "/opt/conda/lib/python3.11/concurrent/futures/thread.py", line 58, in run
25
- result = self.fn(*self.args, **self.kwargs)
26
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
27
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/memmap_dataset.py", line 172, in _get_file_length
28
- return path, file_size(path) // (item_size * self._chunk_size)
29
- ^^^^^^^^^^^^^^^
30
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/util.py", line 350, in file_size
31
- return os.stat(path).st_size
32
- ^^^^^^^^^^^^^
33
- FileNotFoundError: [Errno 2] No such file or directory: 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds'
34
- [2025-12-14 15:30:44] CRITICAL [olmo.util:168, rank=0] Uncaught FileNotFoundError: [Errno 2] No such file or directory: 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds'
35
- ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
36
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py:436 │
37
- │ │
38
- │ 433 │ │ log.info("Device is CPU. Updating config...") │
39
- │ 434 │ │ cfg.model.init_device = "cpu" │
40
- │ 435 │ │ cfg.distributed_strategy = "single" # type: ignore │
41
- │ ❱ 436 │ main(cfg) │
42
- │ 437  │
43
- │ │
44
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py:132 │
45
- │ │
46
- │ 129 │ seed_all(cfg.seed) │
47
- │ 130 │  │
48
- │ 131 │ # Construct data loader. │
49
- │ ❱ 132 │ train_loader = build_train_dataloader(cfg) │
50
- │ 133 │  │
51
- │ 134 │ # Construct evaluators. │
52
- │ 135 │ evaluators = build_evaluators(cfg, device) │
53
- │ │
54
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/__init__.p │
55
- │ │
56
- │ 153 │ │ │ ) │
57
- │ 154 │ │ else: │
58
- │ 155 │ │ │ work_dir.mkdir(exist_ok=True, parents=True) │
59
- │ ❱ 156 │ dataset = IterableDataset( │
60
- │ 157 │ │ dataset, # type: ignore │
61
- │ 158 │ │ train_config.global_train_batch_size, │
62
- │ 159 │ │ seed=seed, │
63
- │ │
64
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/iterable_d │
65
- │ │
66
- │  54 │ │ self.world_size = world_size if world_size is not None else get_world_size() │
67
- │  55 │ │ # If the dataset length is evenly divisible by # of replicas, then there │
68
- │  56 │ │ # is no need to drop any data, since the dataset will be split equally. │
69
- │ ❱  57 │ │ if self.drop_last and len(self.dataset) % self.world_size != 0: # type: ignore[ar │
70
- │  58 │ │ │ # Split to nearest available length that is evenly divisible by world size. │
71
- │  59 │ │ │ # This is to ensure each rank receives the same amount of data. │
72
- │  60 │ │ │ num_samples = math.ceil( │
73
- │ │
74
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/memmap_dat │
75
- │ │
76
- │ 173 │  │
77
- │ 174 │ def __len__(self) -> int: │
78
- │ 175 │ │ if self._num_instances is None: │
79
- │ ❱ 176 │ │ │ self._num_instances = self.offsets[-1][1] │
80
- │ 177 │ │ return self._num_instances │
81
- │ 178 │  │
82
- │ 179 │ def __getitem__(self, index: int) -> Dict[str, Any]: │
83
- │ │
84
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/memmap_dat │
85
- │ │
86
- │ 135 │ │ │ │ │ │ mask_path_futures.append(executor.submit(self._get_file_length, ma │
87
- │ 136 │ │ │ │  │
88
- │ 137 │ │ │ │ for future in concurrent.futures.as_completed(path_futures): │
89
- │ ❱ 138 │ │ │ │ │ path, length = future.result() │
90
- │ 139 │ │ │ │ │ path_to_length[path] = length │
91
- │ 140 │ │ │ │  │
92
- │ 141 │ │ │ │ for future in concurrent.futures.as_completed(mask_path_futures): │
93
- │ │
94
- │ /opt/conda/lib/python3.11/concurrent/futures/_base.py:449 in result │
95
- │ │
96
- │ 446 │ │ │ │ if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: │
97
- │ 447 │ │ │ │ │ raise CancelledError() │
98
- │ 448 │ │ │ │ elif self._state == FINISHED: │
99
- │ ❱ 449 │ │ │ │ │ return self.__get_result() │
100
- │ 450 │ │ │ │  │
101
- │ 451 │ │ │ │ self._condition.wait(timeout) │
102
- │ 452  │
103
- │ │
104
- │ /opt/conda/lib/python3.11/concurrent/futures/_base.py:401 in __get_result │
105
- │ │
106
- │ 398 │ def __get_result(self): │
107
- │ 399 │ │ if self._exception: │
108
- │ 400 │ │ │ try: │
109
- │ ❱ 401 │ │ │ │ raise self._exception │
110
- │ 402 │ │ │ finally: │
111
- │ 403 │ │ │ │ # Break a reference cycle with the exception in self._exception │
112
- │ 404 │ │ │ │ self = None │
113
- │ │
114
- │ /opt/conda/lib/python3.11/concurrent/futures/thread.py:58 in run │
115
- │ │
116
- │  55 │ │ │ return │
117
- │  56 │ │  │
118
- │  57 │ │ try: │
119
- │ ❱  58 │ │ │ result = self.fn(*self.args, **self.kwargs) │
120
- │  59 │ │ except BaseException as exc: │
121
- │  60 │ │ │ self.future.set_exception(exc) │
122
- │  61 │ │ │ # Break a reference cycle with the exception 'exc' │
123
- │ │
124
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/memmap_dat │
125
- │ │
126
- │ 169 │ def _get_file_length(self, path, dtype=None) -> Tuple[PathOrStr, int]: │
127
- │ 170 │ │ dtype = dtype or self.dtype │
128
- │ 171 │ │ item_size = dtype(0).itemsize │
129
- │ ❱ 172 │ │ return path, file_size(path) // (item_size * self._chunk_size) │
130
- │ 173 │  │
131
- │ 174 │ def __len__(self) -> int: │
132
- │ 175 │ │ if self._num_instances is None: │
133
- │ │
134
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/util.py:350 in │
135
- │ │
136
- │ 347 │ │ else: │
137
- │ 348 │ │ │ raise NotImplementedError(f"file size not implemented for '{parsed.scheme}' fi │
138
- │ 349 │ else: │
139
- │ ❱ 350 │ │ return os.stat(path).st_size │
140
- │ 351  │
141
- │ 352  │
142
- │ 353 def upload(source: PathOrStr, target: str, save_overwrite: bool = False): │
143
- ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
144
- FileNotFoundError: [Errno 2] No such file or directory: 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/files/requirements.txt DELETED
@@ -1,271 +0,0 @@
1
- scikit-learn==1.8.0
2
- joblib==1.5.2
3
- threadpoolctl==3.6.0
4
- torchmetrics==1.8.2
5
- lightning-utilities==0.15.2
6
- wandb==0.23.1
7
- GitPython==3.1.45
8
- gitdb==4.0.12
9
- smmap==5.0.2
10
- datasets==4.4.1
11
- pandas==2.3.3
12
- multiprocess==0.70.18
13
- pyarrow==22.0.0
14
- tzdata==2025.3
15
- xxhash==3.6.0
16
- ai2-olmo==0.6.0
17
- ai2-olmo-core==2.4.0
18
- cached_path==1.8.0
19
- google-cloud-storage==2.19.0
20
- google-cloud-core==2.5.0
21
- boto3==1.42.9
22
- google-api-core==2.28.1
23
- google-auth==2.43.0
24
- s3transfer==0.16.0
25
- botocore==1.42.9
26
- google-resumable-media==2.8.0
27
- pyasn1_modules==0.4.2
28
- rich==13.9.4
29
- rsa==4.9.1
30
- bettermap==1.3.1
31
- google-crc32c==1.7.1
32
- jmespath==1.0.1
33
- numpy==1.26.4
34
- omegaconf==2.3.0
35
- proto-plus==1.26.1
36
- pyasn1==0.6.1
37
- python-dateutil==2.9.0.post0
38
- antlr4-python3-runtime==4.9.3
39
- zstandard==0.23.0
40
- zipp==3.21.0
41
- yarl==1.22.0
42
- xgrammar==0.1.18
43
- xformers==0.0.29.post2
44
- wrapt==2.0.1
45
- wheel==0.45.1
46
- websockets==15.0.1
47
- wcwidth==0.2.13
48
- watchfiles==1.1.1
49
- vllm==0.8.5.post1
50
- uvloop==0.22.1
51
- uvicorn==0.38.0
52
- urllib3==2.3.0
53
- typing-inspection==0.4.2
54
- typing_extensions==4.15.0
55
- types-dataclasses==0.6.6
56
- typer==0.20.0
57
- truststore==0.10.0
58
- triton==3.2.0
59
- transformers==4.57.3
60
- traitlets==5.14.3
61
- tqdm==4.67.1
62
- torchvision==0.21.0+cu124
63
- torchelastic==0.2.2
64
- torchaudio==2.6.0+cu124
65
- torch==2.6.0+cu124
66
- tokenizers==0.22.1
67
- tiktoken==0.12.0
68
- sympy==1.13.1
69
- starlette==0.50.0
70
- stack_data==0.6.3
71
- soupsieve==2.5
72
- sortedcontainers==2.4.0
73
- sniffio==1.3.1
74
- six==1.17.0
75
- shellingham==1.5.4
76
- setuptools==75.8.0
77
- sentry-sdk==2.47.0
78
- sentencepiece==0.2.1
79
- scipy==1.16.3
80
- safetensors==0.7.0
81
- ruamel.yaml.clib==0.2.8
82
- ruamel.yaml==0.18.10
83
- rpds-py==0.22.3
84
- rignore==0.7.6
85
- rich-toolkit==0.17.0
86
- requests==2.32.3
87
- regex==2025.11.3
88
- referencing==0.36.2
89
- ray==2.52.1
90
- pyzmq==27.1.0
91
- pytz==2024.2
92
- python-multipart==0.0.20
93
- python-json-logger==4.0.0
94
- python-etcd==0.4.5
95
- python-dotenv==1.2.1
96
- Pygments==2.19.1
97
- pydantic-extra-types==2.10.6
98
- pydantic_core==2.41.5
99
- pydantic==2.12.5
100
- pycparser==2.22
101
- pycountry==24.6.1
102
- pycosat==0.6.6
103
- py-cpuinfo==9.0.0
104
- pure_eval==0.2.3
105
- ptyprocess==0.7.0
106
- psutil==6.1.1
107
- protobuf==4.25.8
108
- propcache==0.4.1
109
- prompt_toolkit==3.0.50
110
- prometheus-fastapi-instrumentator==7.1.0
111
- prometheus_client==0.23.1
112
- pluggy==1.5.0
113
- platformdirs==4.3.6
114
- pkgutil_resolve_name==1.3.10
115
- pkginfo==1.12.0
116
- pip==24.3.1
117
- pillow==11.0.0
118
- pickleshare==0.7.5
119
- pexpect==4.9.0
120
- partial-json-parser==0.2.1.1.post7
121
- parso==0.8.4
122
- packaging==24.2
123
- outlines_core==0.1.26
124
- outlines==0.1.11
125
- optree==0.14.0
126
- opentelemetry-semantic-conventions-ai==0.4.13
127
- opentelemetry-semantic-conventions==0.47b0
128
- opentelemetry-sdk==1.26.0
129
- opentelemetry-proto==1.26.0
130
- opentelemetry-exporter-otlp-proto-http==1.26.0
131
- opentelemetry-exporter-otlp-proto-grpc==1.26.0
132
- opentelemetry-exporter-otlp-proto-common==1.26.0
133
- opentelemetry-exporter-otlp==1.26.0
134
- opentelemetry-api==1.26.0
135
- opencv-python-headless==4.12.0.88
136
- openai==2.11.0
137
- nvidia-nvtx-cu12==12.4.127
138
- nvidia-nvjitlink-cu12==12.4.127
139
- nvidia-nccl-cu12==2.21.5
140
- nvidia-cusparselt-cu12==0.6.2
141
- nvidia-cusparse-cu12==12.3.1.170
142
- nvidia-cusolver-cu12==11.6.1.9
143
- nvidia-curand-cu12==10.3.5.147
144
- nvidia-cufft-cu12==11.2.1.3
145
- nvidia-cudnn-cu12==9.1.0.70
146
- nvidia-cuda-runtime-cu12==12.4.127
147
- nvidia-cuda-nvrtc-cu12==12.4.127
148
- nvidia-cuda-cupti-cu12==12.4.127
149
- nvidia-cublas-cu12==12.4.5.8
150
- numba==0.61.2
151
- ninja==1.11.1.3
152
- networkx==3.4.2
153
- nest-asyncio==1.6.0
154
- multidict==6.7.0
155
- msgspec==0.20.0
156
- msgpack==1.1.2
157
- mpmath==1.3.0
158
- more-itertools==10.6.0
159
- mistral_common==1.8.6
160
- menuinst==2.2.0
161
- mdurl==0.1.2
162
- matplotlib-inline==0.1.7
163
- math-verify==0.8.0
164
- markdown-it-py==4.0.0
165
- lm-format-enforcer==0.10.12
166
- llvmlite==0.44.0
167
- llguidance==0.7.30
168
- lintrunner==0.12.7
169
- lief==0.14.1
170
- libmambapy==2.0.5
171
- libarchive-c==5.1
172
- latex2sympy2_extended==1.10.2
173
- lark==1.2.2
174
- jsonschema-specifications==2024.10.1
175
- jsonschema==4.23.0
176
- jsonpointer==3.0.0
177
- jsonpatch==1.33
178
- jiter==0.12.0
179
- Jinja2==3.1.5
180
- jedi==0.19.2
181
- ipython==8.31.0
182
- interegular==0.3.3
183
- importlib_resources==6.5.2
184
- importlib_metadata==8.0.0
185
- idna==3.10
186
- hypothesis==6.124.7
187
- hyperframe==6.0.1
188
- huggingface-hub==0.36.0
189
- httpx==0.28.1
190
- httptools==0.7.1
191
- httpcore==1.0.9
192
- hpack==4.0.0
193
- hf-xet==1.2.0
194
- h2==4.1.0
195
- h11==0.16.0
196
- grpcio==1.76.0
197
- googleapis-common-protos==1.72.0
198
- gguf==0.17.1
199
- fsspec==2024.12.0
200
- frozenlist==1.8.0
201
- frozendict==2.4.6
202
- filelock==3.17.0
203
- fastrlock==0.8.3
204
- fastar==0.8.0
205
- fastapi-cloud-cli==0.6.0
206
- fastapi-cli==0.0.16
207
- fastapi==0.124.4
208
- expecttest==0.3.0
209
- executing==2.1.0
210
- exceptiongroup==1.2.2
211
- email-validator==2.3.0
212
- einops==0.8.1
213
- dnspython==2.7.0
214
- distro==1.9.0
215
- diskcache==5.6.3
216
- dill==0.4.0
217
- depyf==0.18.0
218
- Deprecated==1.3.1
219
- decorator==5.1.1
220
- cupy-cuda12x==13.6.0
221
- conda_package_streaming==0.11.0
222
- conda-package-handling==2.4.0
223
- conda-libmamba-solver==25.1.1
224
- conda_index==0.5.0
225
- conda-build==25.1.1
226
- conda==25.1.0
227
- compressed-tensors==0.9.3
228
- colorama==0.4.6
229
- cmake==3.31.4
230
- cloudpickle==3.1.2
231
- click==8.1.8
232
- charset-normalizer==3.4.1
233
- chardet==5.2.0
234
- cffi==1.17.1
235
- certifi==2024.12.14
236
- cachetools==6.2.3
237
- boltons==24.0.0
238
- blake3==1.0.8
239
- beautifulsoup4==4.12.3
240
- attrs==25.1.0
241
- astunparse==1.6.3
242
- asttokens==3.0.0
243
- astor==0.8.1
244
- archspec==0.2.5
245
- anyio==4.12.0
246
- annotated-types==0.7.0
247
- annotated-doc==0.0.4
248
- airportsdata==20250909
249
- aiosignal==1.4.0
250
- aiohttp==3.13.2
251
- aiohappyeyeballs==2.6.1
252
- PyYAML==6.0.2
253
- PySocks==1.7.1
254
- MarkupSafe==3.0.2
255
- Brotli==1.1.0
256
- zipp==3.19.2
257
- wheel==0.43.0
258
- typing_extensions==4.12.2
259
- typeguard==4.3.0
260
- tomli==2.0.1
261
- platformdirs==4.2.2
262
- packaging==24.2
263
- more-itertools==10.3.0
264
- jaraco.text==3.12.1
265
- jaraco.functools==4.0.1
266
- jaraco.context==5.3.0
267
- jaraco.collections==5.1.0
268
- inflect==7.3.1
269
- importlib_metadata==8.0.0
270
- backports.tarfile==1.2.0
271
- autocommand==2.2.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/files/wandb-metadata.json DELETED
@@ -1,103 +0,0 @@
1
- {
2
- "os": "Linux-6.8.0-1043-nvidia-x86_64-with-glibc2.35",
3
- "python": "CPython 3.11.11",
4
- "startedAt": "2025-12-14T14:30:41.667717Z",
5
- "args": [
6
- "pretraining/configs/RL-1B.yaml"
7
- ],
8
- "program": "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py",
9
- "codePath": "OLMo/scripts/train.py",
10
- "codePathLocal": "OLMo/scripts/train.py",
11
- "email": "[email protected]",
12
- "root": "checkpoints/OLMo-1B-as_fm3_omi2/wandb",
13
- "host": "serv-3342",
14
- "executable": "/opt/conda/bin/python",
15
- "cpu_count": 112,
16
- "cpu_count_logical": 224,
17
- "gpu": "NVIDIA H100 80GB HBM3",
18
- "gpu_count": 1,
19
- "disk": {
20
- "/": {
21
- "total": "2055141851136",
22
- "used": "49254445056"
23
- }
24
- },
25
- "memory": {
26
- "total": "2164176814080"
27
- },
28
- "gpu_nvidia": [
29
- {
30
- "name": "NVIDIA H100 80GB HBM3",
31
- "memoryTotal": "85520809984",
32
- "cudaCores": 16896,
33
- "architecture": "Hopper",
34
- "uuid": "GPU-4c999b2a-2578-9e62-0539-4b826d85fda8"
35
- }
36
- ],
37
- "cudaVersion": "13.0",
38
- "slurm": {
39
- "cluster_name": "pegasus",
40
- "conf": "/etc/slurm/slurm.conf",
41
- "cpu_bind": "quiet,mask_cpu:0x000000000001FE00000000000000000000000001FE00000000000000",
42
- "cpu_bind_list": "0x000000000001FE00000000000000000000000001FE00000000000000",
43
- "cpu_bind_type": "mask_cpu:",
44
- "cpu_bind_verbose": "quiet",
45
- "cpus_on_node": "16",
46
- "cpus_per_task": "16",
47
- "distribution": "cyclic",
48
- "gpus": "1",
49
- "gpus_on_node": "1",
50
- "gtids": "0",
51
- "job_cpus_per_node": "16",
52
- "job_end_time": "1765734524",
53
- "job_gid": "8000",
54
- "job_group": "iml",
55
- "job_id": "2383756",
56
- "job_name": "bash",
57
- "job_nodelist": "serv-3342",
58
- "job_num_nodes": "1",
59
- "job_partition": "H100",
60
- "job_qos": "normal",
61
- "job_start_time": "1765720124",
62
- "job_uid": "13262",
63
- "job_user": "nguyen",
64
- "jobid": "2383756",
65
- "launch_node_ipaddr": "192.168.33.114",
66
- "localid": "0",
67
- "mem_per_cpu": "16384",
68
- "mpi_type": "pmix",
69
- "nnodes": "1",
70
- "nodeid": "0",
71
- "nodelist": "serv-3342",
72
- "nprocs": "1",
73
- "ntasks": "1",
74
- "oom_kill_step": "0",
75
- "pmix_mapping_serv": "(vector,(0,1,1))",
76
- "pmixp_abort_agent_port": "33735",
77
- "prio_process": "1",
78
- "procid": "0",
79
- "pty_port": "45219",
80
- "pty_win_col": "156",
81
- "pty_win_row": "41",
82
- "srun_comm_host": "192.168.33.114",
83
- "srun_comm_port": "35153",
84
- "step_gpus": "5",
85
- "step_id": "0",
86
- "step_launcher_port": "35153",
87
- "step_nodelist": "serv-3342",
88
- "step_num_nodes": "1",
89
- "step_num_tasks": "1",
90
- "step_tasks_per_node": "1",
91
- "stepid": "0",
92
- "submit_dir": "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain",
93
- "submit_host": "login1",
94
- "task_pid": "3684902",
95
- "tasks_per_node": "1",
96
- "topology_addr": "serv-3342",
97
- "topology_addr_pattern": "node",
98
- "tres_bind": "gres/gpu:per_task:1",
99
- "tres_per_task": "cpu=16,gres/gpu=1",
100
- "umask": "0022"
101
- },
102
- "writerId": "gat9u7yjnpjadjdd0jcwog0os0b9blu3"
103
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"_wandb":{"runtime":2},"_runtime":2}
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/logs/debug-internal.log DELETED
@@ -1,11 +0,0 @@
1
- {"time":"2025-12-14T15:30:42.008201805+01:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
- {"time":"2025-12-14T15:30:42.27964618+01:00","level":"INFO","msg":"stream: created new stream","id":"8qjie2ty"}
3
- {"time":"2025-12-14T15:30:42.280781062+01:00","level":"INFO","msg":"handler: started","stream_id":"8qjie2ty"}
4
- {"time":"2025-12-14T15:30:42.299825499+01:00","level":"INFO","msg":"stream: started","id":"8qjie2ty"}
5
- {"time":"2025-12-14T15:30:42.300011321+01:00","level":"INFO","msg":"writer: started","stream_id":"8qjie2ty"}
6
- {"time":"2025-12-14T15:30:42.30003616+01:00","level":"INFO","msg":"sender: started","stream_id":"8qjie2ty"}
7
- {"time":"2025-12-14T15:30:45.045287757+01:00","level":"INFO","msg":"stream: closing","id":"8qjie2ty"}
8
- {"time":"2025-12-14T15:30:45.781741134+01:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
- {"time":"2025-12-14T15:30:45.993844409+01:00","level":"INFO","msg":"handler: closed","stream_id":"8qjie2ty"}
10
- {"time":"2025-12-14T15:30:46.002152318+01:00","level":"INFO","msg":"sender: closed","stream_id":"8qjie2ty"}
11
- {"time":"2025-12-14T15:30:46.010867789+01:00","level":"INFO","msg":"stream: closed","id":"8qjie2ty"}
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/logs/debug.log DELETED
@@ -1,23 +0,0 @@
1
- 2025-12-14 15:30:41,744 INFO MainThread:3737058 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
- 2025-12-14 15:30:41,746 INFO MainThread:3737058 [wandb_setup.py:_flush():80] Configure stats pid to 3737058
3
- 2025-12-14 15:30:41,748 INFO MainThread:3737058 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
- 2025-12-14 15:30:41,749 INFO MainThread:3737058 [wandb_setup.py:_flush():80] Loading settings from /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/wandb/settings
5
- 2025-12-14 15:30:41,750 INFO MainThread:3737058 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
- 2025-12-14 15:30:41,751 INFO MainThread:3737058 [wandb_init.py:setup_run_log_directory():714] Logging user logs to checkpoints/OLMo-1B-as_fm3_omi2/wandb/wandb/run-20251214_153041-8qjie2ty/logs/debug.log
7
- 2025-12-14 15:30:41,752 INFO MainThread:3737058 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to checkpoints/OLMo-1B-as_fm3_omi2/wandb/wandb/run-20251214_153041-8qjie2ty/logs/debug-internal.log
8
- 2025-12-14 15:30:41,753 INFO MainThread:3737058 [wandb_init.py:init():841] calling init triggers
9
- 2025-12-14 15:30:41,754 INFO MainThread:3737058 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
- config: {'run_name': 'OLMo-1B-as_fm3_omi2', 'seed': 6198, 'epoch': None, 'dry_run': False, 'model': {'d_model': 2048, 'n_heads': 16, 'n_kv_heads': None, 'clip_qkv': None, 'n_layers': 16, 'mlp_ratio': 8, 'mlp_hidden_size': None, 'activation_type': 'swiglu', 'block_type': 'sequential', 'block_group_size': 1, 'alibi': False, 'alibi_bias_max': 8.0, 'rope': True, 'rope_full_precision': True, 'rope_theta': 10000, 'flash_attention': False, 'attention_dropout': 0.0, 'multi_query_attention': False, 'attention_layer_norm': False, 'residual_dropout': 0.0, 'embedding_dropout': 0.0, 'embedding_layer_norm': False, 'layer_norm_type': 'default', 'layer_norm_with_affine': False, 'layer_norm_eps': 1e-05, 'attention_layer_norm_with_affine': False, 'max_sequence_length': 2048, 'include_bias': False, 'bias_for_layer_norm': False, 'scale_logits': False, 'vocab_size': 32000, 'embedding_size': 32000, 'weight_tying': True, 'eos_token_id': 0, 'pad_token_id': 1, 'init_device': 'cuda', 'init_fn': 'normal', 'init_std': 0.02, 'init_cutoff_factor': 3.0, 'precision': 'amp_bf16', 'scale_emb_init': False, 'emb_init_std': None, 'norm_after': False}, 'optimizer': {'name': 'adamw', 'learning_rate': 0.0005, 'weight_decay': 0.1, 'betas': (0.9, 0.95), 'eps': 1e-08, 'no_decay_norm_and_bias': None, 'selective_updates': False, 'decay_norm_and_bias': True, 'decay_embeddings': True, 'metrics_log_interval': 10, 'record_update_metrics': False}, 'scheduler': {'name': 'cosine_with_warmup', 'units': 'steps', 't_warmup': 2000, 't_max': None, 'alpha_f': 0.1, 'grad_clip_warmup_steps': None, 'grad_clip_warmup_factor': None, 'warmup_min_lr': 0.0}, 'data': {'paths': ['data_token/as_fm3_omi2/algebraic-stack-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00055_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00056_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00057_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00058_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00059_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00060_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00061_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00062_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00063_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00064_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00065_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00066_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00067_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00068_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00069_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00070_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00071_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00072_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00073_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00074_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00075_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00076_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00077_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00078_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00017_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00018_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00055_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00056_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00057_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00058_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00059_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00060_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00061_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00062_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00063_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00064_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00065_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00066_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00067_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00068_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00069_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00070_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00071_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00072_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00073_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00074_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00075_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00076_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00077_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00078_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00079_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00080_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00081_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00082_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00083_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00084_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00085_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00086_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00087_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00088_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00089_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00090_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00091_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00092_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00093_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00094_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00095_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00096_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00097_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00098_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00099_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00017_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00018_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00054_00000_doc_shuffled.ds'], 'memmap_dtype': 'uint16', 'datasets': None, 'label_mask_paths': None, 'pad_direction': 'right', 'generate_attention_mask': False, 'generate_doc_lengths': False, 'num_workers': 32, 'drop_last': True, 'pin_memory': True, 'prefetch_factor': 8, 'persistent_workers': True, 'timeout': 0, 'seed': None, 'instance_filter': None, 'custom_dataset': None}, 'restore_dataloader': True, 'fast_forward_batches': None, 'evaluators': [], 'eval_interval': 5000, 'tokenizer': {'identifier': 'meta-llama/Llama-2-7b-hf', 'truncate_direction': 'right'}, 'save_folder': 'checkpoints/OLMo-1B-as_fm3_omi2', 'remote_save_folder': None, 'canceled_check_interval': 6000, 'save_interval': 3000, 'save_interval_unsharded': 3000, 'save_interval_ephemeral': None, 'save_num_checkpoints_to_keep': -1, 'save_num_unsharded_checkpoints_to_keep': -1, 'save_overwrite': True, 'force_save_unsharded': False, 'no_pre_train_checkpoint': False, 'load_path': None, 'load_path_sharded_checkpointer': None, 'try_load_latest_save': False, 'reset_optimizer_state': False, 'reset_trainer_state': False, 'sharded_checkpointer': 'torch_legacy', 'new_style_checkpoints': None, 'max_duration': '1ep', 'global_train_batch_size': 512, 'device_train_batch_size': 512, 'device_train_microbatch_size': 16, 'device_eval_batch_size': 16, 'eval_subset_num_batches': -1, 'eval_on_load': False, 'device_train_grad_accum': 32, 'max_grad_norm': 1.0, 'max_grad_norm_ratio': None, 'precision': 'amp_bf16', 'speed_monitor': {'window_size': 20, 'gpu_flops_available': None}, 'console_log_interval': 1, 'gen1_gc_interval': 1, 'compile': None, 'distributed_strategy': 'fsdp', 'fsdp': {'use_orig_params': True, 'sharding_strategy': <ShardingStrategy.FULL_SHARD: 1>, 'wrapping_strategy': None, 'precision': 'mixed', 'hybrid_sharding_num_model_replicas': None}, 'ddp': {'grad_sync_mode': 'batch', 'find_unused_params': False}, 'single': {'device': 'auto'}, 'softmax_auxiliary_loss': False, 'auxiliary_loss_multiplier': 0.0001, 'time_limit': None, 'extra_steps_after_cancel': 10, 'early_stopping_factor': None, 'save_data_indices': True, 'python_profiling': False, 'torch_profiling': False, 'stop_at': None, 'stop_after': None, 'activation_checkpointing': None, 'fused_loss': None, 'hf_datasets_cache_dir': None, 'module_outputs_save_steps': None, '_wandb': {}}
11
- 2025-12-14 15:30:41,755 INFO MainThread:3737058 [wandb_init.py:init():889] starting backend
12
- 2025-12-14 15:30:42,000 INFO MainThread:3737058 [wandb_init.py:init():892] sending inform_init request
13
- 2025-12-14 15:30:42,006 INFO MainThread:3737058 [wandb_init.py:init():900] backend started and connected
14
- 2025-12-14 15:30:42,009 INFO MainThread:3737058 [wandb_init.py:init():970] updated telemetry
15
- 2025-12-14 15:30:42,010 INFO MainThread:3737058 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
- 2025-12-14 15:30:42,684 INFO MainThread:3737058 [wandb_init.py:init():1041] starting run threads in backend
17
- 2025-12-14 15:30:42,776 INFO MainThread:3737058 [wandb_run.py:_console_start():2521] atexit reg
18
- 2025-12-14 15:30:42,778 INFO MainThread:3737058 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
- 2025-12-14 15:30:42,780 INFO MainThread:3737058 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
- 2025-12-14 15:30:42,781 INFO MainThread:3737058 [wandb_run.py:_redirect():2461] Redirects installed.
21
- 2025-12-14 15:30:42,783 INFO MainThread:3737058 [wandb_init.py:init():1081] run started, returning control to user process
22
- 2025-12-14 15:30:45,045 INFO wandb-AsyncioManager-main:3737058 [service_client.py:_forward_responses():80] Reached EOF.
23
- 2025-12-14 15:30:45,049 INFO wandb-AsyncioManager-main:3737058 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153041-8qjie2ty/run-8qjie2ty.wandb DELETED
Binary file (45.5 kB)
 
models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/files/config.yaml DELETED
@@ -1,575 +0,0 @@
1
- _wandb:
2
- value:
3
- cli_version: 0.23.1
4
- e:
5
- yq3dhquxezgxgixc3uqa3megunlrc2ta:
6
- args:
7
- - pretraining/configs/RL-1B.yaml
8
- codePath: OLMo/scripts/train.py
9
- codePathLocal: OLMo/scripts/train.py
10
- cpu_count: 112
11
- cpu_count_logical: 224
12
- cudaVersion: "13.0"
13
- disk:
14
- /:
15
- total: "2055141851136"
16
- used: "49254449152"
17
18
- executable: /opt/conda/bin/python
19
- gpu: NVIDIA H100 80GB HBM3
20
- gpu_count: 1
21
- gpu_nvidia:
22
- - architecture: Hopper
23
- cudaCores: 16896
24
- memoryTotal: "85520809984"
25
- name: NVIDIA H100 80GB HBM3
26
- uuid: GPU-4c999b2a-2578-9e62-0539-4b826d85fda8
27
- host: serv-3342
28
- memory:
29
- total: "2164176814080"
30
- os: Linux-6.8.0-1043-nvidia-x86_64-with-glibc2.35
31
- program: /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py
32
- python: CPython 3.11.11
33
- root: checkpoints/OLMo-1B-as_fm3_omi2/wandb
34
- slurm:
35
- cluster_name: pegasus
36
- conf: /etc/slurm/slurm.conf
37
- cpu_bind: quiet,mask_cpu:0x000000000001FE00000000000000000000000001FE00000000000000
38
- cpu_bind_list: 0x000000000001FE00000000000000000000000001FE00000000000000
39
- cpu_bind_type: 'mask_cpu:'
40
- cpu_bind_verbose: quiet
41
- cpus_on_node: "16"
42
- cpus_per_task: "16"
43
- distribution: cyclic
44
- gpus: "1"
45
- gpus_on_node: "1"
46
- gtids: "0"
47
- job_cpus_per_node: "16"
48
- job_end_time: "1765734524"
49
- job_gid: "8000"
50
- job_group: iml
51
- job_id: "2383756"
52
- job_name: bash
53
- job_nodelist: serv-3342
54
- job_num_nodes: "1"
55
- job_partition: H100
56
- job_qos: normal
57
- job_start_time: "1765720124"
58
- job_uid: "13262"
59
- job_user: nguyen
60
- jobid: "2383756"
61
- launch_node_ipaddr: 192.168.33.114
62
- localid: "0"
63
- mem_per_cpu: "16384"
64
- mpi_type: pmix
65
- nnodes: "1"
66
- nodeid: "0"
67
- nodelist: serv-3342
68
- nprocs: "1"
69
- ntasks: "1"
70
- oom_kill_step: "0"
71
- pmix_mapping_serv: (vector,(0,1,1))
72
- pmixp_abort_agent_port: "33735"
73
- prio_process: "1"
74
- procid: "0"
75
- pty_port: "45219"
76
- pty_win_col: "156"
77
- pty_win_row: "41"
78
- srun_comm_host: 192.168.33.114
79
- srun_comm_port: "35153"
80
- step_gpus: "5"
81
- step_id: "0"
82
- step_launcher_port: "35153"
83
- step_nodelist: serv-3342
84
- step_num_nodes: "1"
85
- step_num_tasks: "1"
86
- step_tasks_per_node: "1"
87
- stepid: "0"
88
- submit_dir: /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain
89
- submit_host: login1
90
- task_pid: "3684902"
91
- tasks_per_node: "1"
92
- topology_addr: serv-3342
93
- topology_addr_pattern: node
94
- tres_bind: gres/gpu:per_task:1
95
- tres_per_task: cpu=16,gres/gpu=1
96
- umask: "0022"
97
- startedAt: "2025-12-14T14:36:18.700317Z"
98
- writerId: yq3dhquxezgxgixc3uqa3megunlrc2ta
99
- m: []
100
- python_version: 3.11.11
101
- t:
102
- "1":
103
- - 1
104
- - 5
105
- - 11
106
- - 41
107
- - 49
108
- - 51
109
- - 53
110
- "2":
111
- - 1
112
- - 5
113
- - 11
114
- - 41
115
- - 49
116
- - 51
117
- - 53
118
- "3":
119
- - 13
120
- - 15
121
- - 16
122
- "4": 3.11.11
123
- "5": 0.23.1
124
- "6": 4.57.3
125
- "12": 0.23.1
126
- "13": linux-x86_64
127
- activation_checkpointing:
128
- value: null
129
- auxiliary_loss_multiplier:
130
- value: 0.0001
131
- canceled_check_interval:
132
- value: 6000
133
- compile:
134
- value: null
135
- console_log_interval:
136
- value: 1
137
- data:
138
- value:
139
- custom_dataset: null
140
- datasets: null
141
- drop_last: true
142
- generate_attention_mask: false
143
- generate_doc_lengths: false
144
- instance_filter: null
145
- label_mask_paths: null
146
- memmap_dtype: uint16
147
- num_workers: 32
148
- pad_direction: right
149
- paths:
150
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00000_00000_doc_shuffled.ds
151
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00001_00000_doc_shuffled.ds
152
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00002_00000_doc_shuffled.ds
153
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00003_00000_doc_shuffled.ds
154
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00004_00000_doc_shuffled.ds
155
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00005_00000_doc_shuffled.ds
156
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds
157
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00007_00000_doc_shuffled.ds
158
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00008_00000_doc_shuffled.ds
159
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00009_00000_doc_shuffled.ds
160
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00010_00000_doc_shuffled.ds
161
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00011_00000_doc_shuffled.ds
162
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00012_00000_doc_shuffled.ds
163
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00016_00000_doc_shuffled.ds
164
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00019_00000_doc_shuffled.ds
165
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00020_00000_doc_shuffled.ds
166
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00021_00000_doc_shuffled.ds
167
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00022_00000_doc_shuffled.ds
168
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00023_00000_doc_shuffled.ds
169
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00024_00000_doc_shuffled.ds
170
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00025_00000_doc_shuffled.ds
171
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00026_00000_doc_shuffled.ds
172
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00027_00000_doc_shuffled.ds
173
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00028_00000_doc_shuffled.ds
174
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00029_00000_doc_shuffled.ds
175
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00030_00000_doc_shuffled.ds
176
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00031_00000_doc_shuffled.ds
177
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00032_00000_doc_shuffled.ds
178
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00033_00000_doc_shuffled.ds
179
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00034_00000_doc_shuffled.ds
180
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00035_00000_doc_shuffled.ds
181
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00036_00000_doc_shuffled.ds
182
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00037_00000_doc_shuffled.ds
183
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00038_00000_doc_shuffled.ds
184
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00039_00000_doc_shuffled.ds
185
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00040_00000_doc_shuffled.ds
186
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00041_00000_doc_shuffled.ds
187
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00042_00000_doc_shuffled.ds
188
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00043_00000_doc_shuffled.ds
189
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00044_00000_doc_shuffled.ds
190
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00045_00000_doc_shuffled.ds
191
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00046_00000_doc_shuffled.ds
192
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00047_00000_doc_shuffled.ds
193
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00048_00000_doc_shuffled.ds
194
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00049_00000_doc_shuffled.ds
195
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00050_00000_doc_shuffled.ds
196
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00051_00000_doc_shuffled.ds
197
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00052_00000_doc_shuffled.ds
198
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00053_00000_doc_shuffled.ds
199
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00054_00000_doc_shuffled.ds
200
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00055_00000_doc_shuffled.ds
201
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00056_00000_doc_shuffled.ds
202
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00057_00000_doc_shuffled.ds
203
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00058_00000_doc_shuffled.ds
204
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00059_00000_doc_shuffled.ds
205
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00060_00000_doc_shuffled.ds
206
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00061_00000_doc_shuffled.ds
207
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00062_00000_doc_shuffled.ds
208
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00063_00000_doc_shuffled.ds
209
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00064_00000_doc_shuffled.ds
210
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00065_00000_doc_shuffled.ds
211
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00066_00000_doc_shuffled.ds
212
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00067_00000_doc_shuffled.ds
213
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00068_00000_doc_shuffled.ds
214
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00069_00000_doc_shuffled.ds
215
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00070_00000_doc_shuffled.ds
216
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00071_00000_doc_shuffled.ds
217
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00072_00000_doc_shuffled.ds
218
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00073_00000_doc_shuffled.ds
219
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00074_00000_doc_shuffled.ds
220
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00075_00000_doc_shuffled.ds
221
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00076_00000_doc_shuffled.ds
222
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00077_00000_doc_shuffled.ds
223
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00078_00000_doc_shuffled.ds
224
- - data_token/as_fm3_omi2/finemath3-tokenized/00000_00000_doc_shuffled.ds
225
- - data_token/as_fm3_omi2/finemath3-tokenized/00001_00000_doc_shuffled.ds
226
- - data_token/as_fm3_omi2/finemath3-tokenized/00002_00000_doc_shuffled.ds
227
- - data_token/as_fm3_omi2/finemath3-tokenized/00003_00000_doc_shuffled.ds
228
- - data_token/as_fm3_omi2/finemath3-tokenized/00004_00000_doc_shuffled.ds
229
- - data_token/as_fm3_omi2/finemath3-tokenized/00005_00000_doc_shuffled.ds
230
- - data_token/as_fm3_omi2/finemath3-tokenized/00006_00000_doc_shuffled.ds
231
- - data_token/as_fm3_omi2/finemath3-tokenized/00007_00000_doc_shuffled.ds
232
- - data_token/as_fm3_omi2/finemath3-tokenized/00008_00000_doc_shuffled.ds
233
- - data_token/as_fm3_omi2/finemath3-tokenized/00009_00000_doc_shuffled.ds
234
- - data_token/as_fm3_omi2/finemath3-tokenized/00010_00000_doc_shuffled.ds
235
- - data_token/as_fm3_omi2/finemath3-tokenized/00011_00000_doc_shuffled.ds
236
- - data_token/as_fm3_omi2/finemath3-tokenized/00012_00000_doc_shuffled.ds
237
- - data_token/as_fm3_omi2/finemath3-tokenized/00013_00000_doc_shuffled.ds
238
- - data_token/as_fm3_omi2/finemath3-tokenized/00014_00000_doc_shuffled.ds
239
- - data_token/as_fm3_omi2/finemath3-tokenized/00015_00000_doc_shuffled.ds
240
- - data_token/as_fm3_omi2/finemath3-tokenized/00016_00000_doc_shuffled.ds
241
- - data_token/as_fm3_omi2/finemath3-tokenized/00017_00000_doc_shuffled.ds
242
- - data_token/as_fm3_omi2/finemath3-tokenized/00018_00000_doc_shuffled.ds
243
- - data_token/as_fm3_omi2/finemath3-tokenized/00019_00000_doc_shuffled.ds
244
- - data_token/as_fm3_omi2/finemath3-tokenized/00020_00000_doc_shuffled.ds
245
- - data_token/as_fm3_omi2/finemath3-tokenized/00021_00000_doc_shuffled.ds
246
- - data_token/as_fm3_omi2/finemath3-tokenized/00022_00000_doc_shuffled.ds
247
- - data_token/as_fm3_omi2/finemath3-tokenized/00023_00000_doc_shuffled.ds
248
- - data_token/as_fm3_omi2/finemath3-tokenized/00024_00000_doc_shuffled.ds
249
- - data_token/as_fm3_omi2/finemath3-tokenized/00025_00000_doc_shuffled.ds
250
- - data_token/as_fm3_omi2/finemath3-tokenized/00026_00000_doc_shuffled.ds
251
- - data_token/as_fm3_omi2/finemath3-tokenized/00027_00000_doc_shuffled.ds
252
- - data_token/as_fm3_omi2/finemath3-tokenized/00028_00000_doc_shuffled.ds
253
- - data_token/as_fm3_omi2/finemath3-tokenized/00029_00000_doc_shuffled.ds
254
- - data_token/as_fm3_omi2/finemath3-tokenized/00030_00000_doc_shuffled.ds
255
- - data_token/as_fm3_omi2/finemath3-tokenized/00031_00000_doc_shuffled.ds
256
- - data_token/as_fm3_omi2/finemath3-tokenized/00032_00000_doc_shuffled.ds
257
- - data_token/as_fm3_omi2/finemath3-tokenized/00033_00000_doc_shuffled.ds
258
- - data_token/as_fm3_omi2/finemath3-tokenized/00034_00000_doc_shuffled.ds
259
- - data_token/as_fm3_omi2/finemath3-tokenized/00035_00000_doc_shuffled.ds
260
- - data_token/as_fm3_omi2/finemath3-tokenized/00036_00000_doc_shuffled.ds
261
- - data_token/as_fm3_omi2/finemath3-tokenized/00037_00000_doc_shuffled.ds
262
- - data_token/as_fm3_omi2/finemath3-tokenized/00038_00000_doc_shuffled.ds
263
- - data_token/as_fm3_omi2/finemath3-tokenized/00039_00000_doc_shuffled.ds
264
- - data_token/as_fm3_omi2/finemath3-tokenized/00040_00000_doc_shuffled.ds
265
- - data_token/as_fm3_omi2/finemath3-tokenized/00041_00000_doc_shuffled.ds
266
- - data_token/as_fm3_omi2/finemath3-tokenized/00042_00000_doc_shuffled.ds
267
- - data_token/as_fm3_omi2/finemath3-tokenized/00043_00000_doc_shuffled.ds
268
- - data_token/as_fm3_omi2/finemath3-tokenized/00044_00000_doc_shuffled.ds
269
- - data_token/as_fm3_omi2/finemath3-tokenized/00045_00000_doc_shuffled.ds
270
- - data_token/as_fm3_omi2/finemath3-tokenized/00046_00000_doc_shuffled.ds
271
- - data_token/as_fm3_omi2/finemath3-tokenized/00047_00000_doc_shuffled.ds
272
- - data_token/as_fm3_omi2/finemath3-tokenized/00048_00000_doc_shuffled.ds
273
- - data_token/as_fm3_omi2/finemath3-tokenized/00049_00000_doc_shuffled.ds
274
- - data_token/as_fm3_omi2/finemath3-tokenized/00050_00000_doc_shuffled.ds
275
- - data_token/as_fm3_omi2/finemath3-tokenized/00051_00000_doc_shuffled.ds
276
- - data_token/as_fm3_omi2/finemath3-tokenized/00052_00000_doc_shuffled.ds
277
- - data_token/as_fm3_omi2/finemath3-tokenized/00053_00000_doc_shuffled.ds
278
- - data_token/as_fm3_omi2/finemath3-tokenized/00054_00000_doc_shuffled.ds
279
- - data_token/as_fm3_omi2/finemath3-tokenized/00055_00000_doc_shuffled.ds
280
- - data_token/as_fm3_omi2/finemath3-tokenized/00056_00000_doc_shuffled.ds
281
- - data_token/as_fm3_omi2/finemath3-tokenized/00057_00000_doc_shuffled.ds
282
- - data_token/as_fm3_omi2/finemath3-tokenized/00058_00000_doc_shuffled.ds
283
- - data_token/as_fm3_omi2/finemath3-tokenized/00059_00000_doc_shuffled.ds
284
- - data_token/as_fm3_omi2/finemath3-tokenized/00060_00000_doc_shuffled.ds
285
- - data_token/as_fm3_omi2/finemath3-tokenized/00061_00000_doc_shuffled.ds
286
- - data_token/as_fm3_omi2/finemath3-tokenized/00062_00000_doc_shuffled.ds
287
- - data_token/as_fm3_omi2/finemath3-tokenized/00063_00000_doc_shuffled.ds
288
- - data_token/as_fm3_omi2/finemath3-tokenized/00064_00000_doc_shuffled.ds
289
- - data_token/as_fm3_omi2/finemath3-tokenized/00065_00000_doc_shuffled.ds
290
- - data_token/as_fm3_omi2/finemath3-tokenized/00066_00000_doc_shuffled.ds
291
- - data_token/as_fm3_omi2/finemath3-tokenized/00067_00000_doc_shuffled.ds
292
- - data_token/as_fm3_omi2/finemath3-tokenized/00068_00000_doc_shuffled.ds
293
- - data_token/as_fm3_omi2/finemath3-tokenized/00069_00000_doc_shuffled.ds
294
- - data_token/as_fm3_omi2/finemath3-tokenized/00070_00000_doc_shuffled.ds
295
- - data_token/as_fm3_omi2/finemath3-tokenized/00071_00000_doc_shuffled.ds
296
- - data_token/as_fm3_omi2/finemath3-tokenized/00072_00000_doc_shuffled.ds
297
- - data_token/as_fm3_omi2/finemath3-tokenized/00073_00000_doc_shuffled.ds
298
- - data_token/as_fm3_omi2/finemath3-tokenized/00074_00000_doc_shuffled.ds
299
- - data_token/as_fm3_omi2/finemath3-tokenized/00075_00000_doc_shuffled.ds
300
- - data_token/as_fm3_omi2/finemath3-tokenized/00076_00000_doc_shuffled.ds
301
- - data_token/as_fm3_omi2/finemath3-tokenized/00077_00000_doc_shuffled.ds
302
- - data_token/as_fm3_omi2/finemath3-tokenized/00078_00000_doc_shuffled.ds
303
- - data_token/as_fm3_omi2/finemath3-tokenized/00079_00000_doc_shuffled.ds
304
- - data_token/as_fm3_omi2/finemath3-tokenized/00080_00000_doc_shuffled.ds
305
- - data_token/as_fm3_omi2/finemath3-tokenized/00081_00000_doc_shuffled.ds
306
- - data_token/as_fm3_omi2/finemath3-tokenized/00082_00000_doc_shuffled.ds
307
- - data_token/as_fm3_omi2/finemath3-tokenized/00083_00000_doc_shuffled.ds
308
- - data_token/as_fm3_omi2/finemath3-tokenized/00084_00000_doc_shuffled.ds
309
- - data_token/as_fm3_omi2/finemath3-tokenized/00085_00000_doc_shuffled.ds
310
- - data_token/as_fm3_omi2/finemath3-tokenized/00086_00000_doc_shuffled.ds
311
- - data_token/as_fm3_omi2/finemath3-tokenized/00087_00000_doc_shuffled.ds
312
- - data_token/as_fm3_omi2/finemath3-tokenized/00088_00000_doc_shuffled.ds
313
- - data_token/as_fm3_omi2/finemath3-tokenized/00089_00000_doc_shuffled.ds
314
- - data_token/as_fm3_omi2/finemath3-tokenized/00090_00000_doc_shuffled.ds
315
- - data_token/as_fm3_omi2/finemath3-tokenized/00091_00000_doc_shuffled.ds
316
- - data_token/as_fm3_omi2/finemath3-tokenized/00092_00000_doc_shuffled.ds
317
- - data_token/as_fm3_omi2/finemath3-tokenized/00093_00000_doc_shuffled.ds
318
- - data_token/as_fm3_omi2/finemath3-tokenized/00094_00000_doc_shuffled.ds
319
- - data_token/as_fm3_omi2/finemath3-tokenized/00095_00000_doc_shuffled.ds
320
- - data_token/as_fm3_omi2/finemath3-tokenized/00096_00000_doc_shuffled.ds
321
- - data_token/as_fm3_omi2/finemath3-tokenized/00097_00000_doc_shuffled.ds
322
- - data_token/as_fm3_omi2/finemath3-tokenized/00098_00000_doc_shuffled.ds
323
- - data_token/as_fm3_omi2/finemath3-tokenized/00099_00000_doc_shuffled.ds
324
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00000_00000_doc_shuffled.ds
325
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00001_00000_doc_shuffled.ds
326
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00002_00000_doc_shuffled.ds
327
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00003_00000_doc_shuffled.ds
328
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00004_00000_doc_shuffled.ds
329
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00005_00000_doc_shuffled.ds
330
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00006_00000_doc_shuffled.ds
331
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00007_00000_doc_shuffled.ds
332
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00008_00000_doc_shuffled.ds
333
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00009_00000_doc_shuffled.ds
334
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00010_00000_doc_shuffled.ds
335
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00011_00000_doc_shuffled.ds
336
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00012_00000_doc_shuffled.ds
337
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00013_00000_doc_shuffled.ds
338
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00014_00000_doc_shuffled.ds
339
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00015_00000_doc_shuffled.ds
340
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00016_00000_doc_shuffled.ds
341
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00017_00000_doc_shuffled.ds
342
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00018_00000_doc_shuffled.ds
343
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00019_00000_doc_shuffled.ds
344
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00020_00000_doc_shuffled.ds
345
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00021_00000_doc_shuffled.ds
346
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00022_00000_doc_shuffled.ds
347
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00023_00000_doc_shuffled.ds
348
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00024_00000_doc_shuffled.ds
349
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00025_00000_doc_shuffled.ds
350
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00026_00000_doc_shuffled.ds
351
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00027_00000_doc_shuffled.ds
352
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00028_00000_doc_shuffled.ds
353
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00029_00000_doc_shuffled.ds
354
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00030_00000_doc_shuffled.ds
355
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00031_00000_doc_shuffled.ds
356
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00032_00000_doc_shuffled.ds
357
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00033_00000_doc_shuffled.ds
358
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00034_00000_doc_shuffled.ds
359
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00035_00000_doc_shuffled.ds
360
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00036_00000_doc_shuffled.ds
361
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00037_00000_doc_shuffled.ds
362
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00038_00000_doc_shuffled.ds
363
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00039_00000_doc_shuffled.ds
364
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00040_00000_doc_shuffled.ds
365
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00041_00000_doc_shuffled.ds
366
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00042_00000_doc_shuffled.ds
367
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00043_00000_doc_shuffled.ds
368
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00044_00000_doc_shuffled.ds
369
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00045_00000_doc_shuffled.ds
370
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00046_00000_doc_shuffled.ds
371
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00047_00000_doc_shuffled.ds
372
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00048_00000_doc_shuffled.ds
373
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00049_00000_doc_shuffled.ds
374
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00050_00000_doc_shuffled.ds
375
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00051_00000_doc_shuffled.ds
376
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00052_00000_doc_shuffled.ds
377
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00053_00000_doc_shuffled.ds
378
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00054_00000_doc_shuffled.ds
379
- persistent_workers: true
380
- pin_memory: true
381
- prefetch_factor: 8
382
- seed: null
383
- timeout: 0
384
- ddp:
385
- value:
386
- find_unused_params: false
387
- grad_sync_mode: batch
388
- device_eval_batch_size:
389
- value: 16
390
- device_train_batch_size:
391
- value: 512
392
- device_train_grad_accum:
393
- value: 32
394
- device_train_microbatch_size:
395
- value: 16
396
- distributed_strategy:
397
- value: fsdp
398
- dry_run:
399
- value: false
400
- early_stopping_factor:
401
- value: null
402
- epoch:
403
- value: null
404
- eval_interval:
405
- value: 5000
406
- eval_on_load:
407
- value: false
408
- eval_subset_num_batches:
409
- value: -1
410
- evaluators:
411
- value: []
412
- extra_steps_after_cancel:
413
- value: 10
414
- fast_forward_batches:
415
- value: null
416
- force_save_unsharded:
417
- value: false
418
- fsdp:
419
- value:
420
- hybrid_sharding_num_model_replicas: null
421
- precision: mixed
422
- sharding_strategy: FULL_SHARD
423
- use_orig_params: true
424
- wrapping_strategy: null
425
- fused_loss:
426
- value: null
427
- gen1_gc_interval:
428
- value: 1
429
- global_train_batch_size:
430
- value: 512
431
- hf_datasets_cache_dir:
432
- value: null
433
- load_path:
434
- value: null
435
- load_path_sharded_checkpointer:
436
- value: null
437
- max_duration:
438
- value: 1ep
439
- max_grad_norm:
440
- value: 1
441
- max_grad_norm_ratio:
442
- value: null
443
- model:
444
- value:
445
- activation_type: swiglu
446
- alibi: false
447
- alibi_bias_max: 8
448
- attention_dropout: 0
449
- attention_layer_norm: false
450
- attention_layer_norm_with_affine: false
451
- bias_for_layer_norm: false
452
- block_group_size: 1
453
- block_type: sequential
454
- clip_qkv: null
455
- d_model: 2048
456
- emb_init_std: null
457
- embedding_dropout: 0
458
- embedding_layer_norm: false
459
- embedding_size: 32000
460
- eos_token_id: 0
461
- flash_attention: false
462
- include_bias: false
463
- init_cutoff_factor: 3
464
- init_device: cuda
465
- init_fn: normal
466
- init_std: 0.02
467
- layer_norm_eps: 1e-05
468
- layer_norm_type: default
469
- layer_norm_with_affine: false
470
- max_sequence_length: 2048
471
- mlp_hidden_size: null
472
- mlp_ratio: 8
473
- multi_query_attention: false
474
- n_heads: 16
475
- n_kv_heads: null
476
- n_layers: 16
477
- norm_after: false
478
- pad_token_id: 1
479
- precision: amp_bf16
480
- residual_dropout: 0
481
- rope: true
482
- rope_full_precision: true
483
- rope_theta: 10000
484
- scale_emb_init: false
485
- scale_logits: false
486
- vocab_size: 32000
487
- weight_tying: true
488
- module_outputs_save_steps:
489
- value: null
490
- new_style_checkpoints:
491
- value: null
492
- no_pre_train_checkpoint:
493
- value: false
494
- optimizer:
495
- value:
496
- betas:
497
- - 0.9
498
- - 0.95
499
- decay_embeddings: true
500
- decay_norm_and_bias: true
501
- eps: 1e-08
502
- learning_rate: 0.0005
503
- metrics_log_interval: 10
504
- name: adamw
505
- no_decay_norm_and_bias: null
506
- record_update_metrics: false
507
- selective_updates: false
508
- weight_decay: 0.1
509
- precision:
510
- value: amp_bf16
511
- python_profiling:
512
- value: false
513
- remote_save_folder:
514
- value: null
515
- reset_optimizer_state:
516
- value: false
517
- reset_trainer_state:
518
- value: false
519
- restore_dataloader:
520
- value: true
521
- run_name:
522
- value: OLMo-1B-as_fm3_omi2
523
- save_data_indices:
524
- value: true
525
- save_folder:
526
- value: checkpoints/OLMo-1B-as_fm3_omi2
527
- save_interval:
528
- value: 3000
529
- save_interval_ephemeral:
530
- value: null
531
- save_interval_unsharded:
532
- value: 3000
533
- save_num_checkpoints_to_keep:
534
- value: -1
535
- save_num_unsharded_checkpoints_to_keep:
536
- value: -1
537
- save_overwrite:
538
- value: true
539
- scheduler:
540
- value:
541
- alpha_f: 0.1
542
- grad_clip_warmup_factor: null
543
- grad_clip_warmup_steps: null
544
- name: cosine_with_warmup
545
- t_max: null
546
- t_warmup: 2000
547
- units: steps
548
- warmup_min_lr: 0
549
- seed:
550
- value: 6198
551
- sharded_checkpointer:
552
- value: torch_legacy
553
- single:
554
- value:
555
- device: auto
556
- softmax_auxiliary_loss:
557
- value: false
558
- speed_monitor:
559
- value:
560
- gpu_flops_available: null
561
- window_size: 20
562
- stop_after:
563
- value: null
564
- stop_at:
565
- value: null
566
- time_limit:
567
- value: null
568
- tokenizer:
569
- value:
570
- identifier: meta-llama/Llama-2-7b-hf
571
- truncate_direction: right
572
- torch_profiling:
573
- value: false
574
- try_load_latest_save:
575
- value: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/files/output.log DELETED
@@ -1,241 +0,0 @@
1
- [2025-12-14 15:36:21] INFO  [olmo.data.iterable_dataset:79, rank=0] Saving global data order indices...
2
- [2025-12-14 15:36:23] INFO  [olmo.data.iterable_dataset:88, rank=0] Global data order indices saved to 'checkpoints/OLMo-1B-as_fm3_omi2/train_data/global_indices.npy'
3
- [2025-12-14 15:36:23] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/utils/data/dataloader.py:624: UserWarning: This DataLoader will create 32 worker processes in total. Our suggested max number of worker in current system is 16, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
4
- warnings.warn(
5
- Traceback (most recent call last):
6
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 402, in hf_raise_for_status
7
- response.raise_for_status()
8
- File "/opt/conda/lib/python3.11/site-packages/requests/models.py", line 1024, in raise_for_status
9
- raise HTTPError(http_error_msg, response=self)
10
- requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/tokenizer.json
11
-
12
- The above exception was the direct cause of the following exception:
13
-
14
- Traceback (most recent call last):
15
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py", line 436, in <module>
16
- main(cfg)
17
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py", line 135, in main
18
- evaluators = build_evaluators(cfg, device)
19
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
20
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/eval/__init__.py", line 111, in build_evaluators
21
- tokenizer = Tokenizer.from_train_config(cfg)
22
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
23
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/tokenizer.py", line 75, in from_train_config
24
- tokenizer = cls.from_pretrained(
25
- ^^^^^^^^^^^^^^^^^^^^
26
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/tokenizer.py", line 93, in from_pretrained
27
- base_tokenizer = BaseTokenizer.from_pretrained(identifier)
28
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
29
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
30
- return fn(*args, **kwargs)
31
- ^^^^^^^^^^^^^^^^^^^
32
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1007, in hf_hub_download
33
- return _hf_hub_download_to_cache_dir(
34
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
35
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1114, in _hf_hub_download_to_cache_dir
36
- _raise_on_head_call_error(head_call_error, force_download, local_files_only)
37
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1655, in _raise_on_head_call_error
38
- raise head_call_error
39
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1543, in _get_metadata_or_catch_error
40
- metadata = get_hf_file_metadata(
41
- ^^^^^^^^^^^^^^^^^^^^^
42
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
43
- return fn(*args, **kwargs)
44
- ^^^^^^^^^^^^^^^^^^^
45
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1460, in get_hf_file_metadata
46
- r = _request_wrapper(
47
- ^^^^^^^^^^^^^^^^^
48
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 283, in _request_wrapper
49
- response = _request_wrapper(
50
- ^^^^^^^^^^^^^^^^^
51
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 307, in _request_wrapper
52
- hf_raise_for_status(response)
53
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 419, in hf_raise_for_status
54
- raise _format(GatedRepoError, message, response) from e
55
- huggingface_hub.errors.GatedRepoError: 401 Client Error. (Request ID: Root=1-693ecb67-4dcf6cbc46f1f55a2357bb92;7f173849-a847-492f-a0d0-f552bb475103)
56
-
57
- Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/tokenizer.json.
58
- Access to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.
59
-
60
- [2025-12-14 15:36:23] CRITICAL [olmo.util:168, rank=0] Uncaught GatedRepoError: 401 Client Error. (Request ID: Root=1-693ecb67-4dcf6cbc46f1f55a2357bb92;7f173849-a847-492f-a0d0-f552bb475103)
61
-
62
- Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/tokenizer.json.
63
- Access to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.
64
- ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
65
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_http.py:402 in hf_raise_for_statu │
66
- │ │
67
- │ 399 │ > If request failed for a reason not listed above. │
68
- │ 400 │ """ │
69
- │ 401 │ try: │
70
- │ ❱ 402 │ │ response.raise_for_status() │
71
- │ 403 │ except HTTPError as e: │
72
- │ 404 │ │ error_code = response.headers.get("X-Error-Code") │
73
- │ 405 │ │ error_message = response.headers.get("X-Error-Message") │
74
- │ │
75
- │ /opt/conda/lib/python3.11/site-packages/requests/models.py:1024 in raise_for_status │
76
- │ │
77
- │ 1021 │ │ │ ) │
78
- │ 1022 │ │  │
79
- │ 1023 │ │ if http_error_msg: │
80
- │ ❱ 1024 │ │ │ raise HTTPError(http_error_msg, response=self) │
81
- │ 1025 │  │
82
- │ 1026 │ def close(self): │
83
- │ 1027 │ │ """Releases the connection back to the pool. Once this method has been │
84
- ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
85
- HTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/tokenizer.json
86
-
87
- The above exception was the direct cause of the following exception:
88
-
89
- ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
90
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py:436 │
91
- │ │
92
- │ 433 │ │ log.info("Device is CPU. Updating config...") │
93
- │ 434 │ │ cfg.model.init_device = "cpu" │
94
- │ 435 │ │ cfg.distributed_strategy = "single" # type: ignore │
95
- │ ❱ 436 │ main(cfg) │
96
- │ 437  │
97
- │ │
98
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py:135 │
99
- │ │
100
- │ 132 │ train_loader = build_train_dataloader(cfg) │
101
- │ 133 │  │
102
- │ 134 │ # Construct evaluators. │
103
- │ ❱ 135 │ evaluators = build_evaluators(cfg, device) │
104
- │ 136 │ barrier() │
105
- │ 137 │  │
106
- │ 138 │ # Initialize the model. │
107
- │ │
108
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/eval/__init__.p │
109
- │ │
110
- │ 108  │
111
- │ 109 def build_evaluators(cfg: TrainConfig, device: torch.device) -> List[Evaluator]: │
112
- │ 110 │ evaluators = [] │
113
- │ ❱ 111 │ tokenizer = Tokenizer.from_train_config(cfg) │
114
- │ 112 │ for eval_cfg in cfg.evaluators: │
115
- │ 113 │ │ evaluators.append(build_evaluator(cfg, eval_cfg, tokenizer, device)) │
116
- │ 114 │ return evaluators │
117
- │ │
118
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/tokenizer.py:75 │
119
- │ │
120
- │  72 │ │ │ │ │ pad_token_id=config.model.pad_token_id, │
121
- │  73 │ │ │ │ ) │
122
- │  74 │ │ else: │
123
- │ ❱  75 │ │ │ tokenizer = cls.from_pretrained( │
124
- │  76 │ │ │ │ tokenizer_identifier, │
125
- │  77 │ │ │ │ eos_token_id=config.model.eos_token_id, │
126
- │  78 │ │ │ │ pad_token_id=config.model.pad_token_id, │
127
- │ │
128
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/tokenizer.py:93 │
129
- │ │
130
- │  90 │ │ │ ``tokenizer.json`` file. │
131
- │  91 │ │ :param kwargs: Other key word arguments passed to :class:`Tokenizer`. │
132
- │  92 │ │ """ │
133
- │ ❱  93 │ │ base_tokenizer = BaseTokenizer.from_pretrained(identifier) │
134
- │  94 │ │ eos_token_id = kwargs.pop("eos_token_id", base_tokenizer.get_vocab_size() - 1) │
135
- │  95 │ │ return cls(base_tokenizer, eos_token_id, **kwargs) │
136
- │  96  │
137
- │ │
138
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:114 in _inner_fn │
139
- │ │
140
- │ 111 │ │ if check_use_auth_token: │
141
- │ 112 │ │ │ kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_ │
142
- │ 113 │ │  │
143
- │ ❱ 114 │ │ return fn(*args, **kwargs) │
144
- │ 115 │  │
145
- │ 116 │ return _inner_fn # type: ignore │
146
- │ 117  │
147
- │ │
148
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:1007 in hf_hub_download │
149
- │ │
150
- │ 1004 │ │ │ local_files_only=local_files_only, │
151
- │ 1005 │ │ ) │
152
- │ 1006 │ else: │
153
- │ ❱ 1007 │ │ return _hf_hub_download_to_cache_dir( │
154
- │ 1008 │ │ │ # Destination │
155
- │ 1009 │ │ │ cache_dir=cache_dir, │
156
- │ 1010 │ │ │ # File info │
157
- │ │
158
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:1114 in _hf_hub_downloa │
159
- │ │
160
- │ 1111 │ │ │ │ │ return pointer_path │
161
- │ 1112 │ │  │
162
- │ 1113 │ │ # Otherwise, raise appropriate error │
163
- │ ❱ 1114 │ │ _raise_on_head_call_error(head_call_error, force_download, local_files_only) │
164
- │ 1115 │  │
165
- │ 1116 │ # From now on, etag, commit_hash, url and size are not None. │
166
- │ 1117 │ assert etag is not None, "etag must have been retrieved from server" │
167
- │ │
168
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:1655 in _raise_on_head_ │
169
- │ │
170
- │ 1652 │ ): │
171
- │ 1653 │ │ # Repo not found or gated => let's raise the actual error │
172
- │ 1654 │ │ # Unauthorized => likely a token issue => let's raise the actual error │
173
- │ ❱ 1655 │ │ raise head_call_error │
174
- │ 1656 │ else: │
175
- │ 1657 │ │ # Otherwise: most likely a connection issue or Hub downtime => let's warn the use │
176
- │ 1658 │ │ raise LocalEntryNotFoundError( │
177
- │ │
178
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:1543 in _get_metadata_o │
179
- │ │
180
- │ 1540 │ if not local_files_only: │
181
- │ 1541 │ │ try: │
182
- │ 1542 │ │ │ try: │
183
- │ ❱ 1543 │ │ │ │ metadata = get_hf_file_metadata( │
184
- │ 1544 │ │ │ │ │ url=url, proxies=proxies, timeout=etag_timeout, headers=headers, toke │
185
- │ 1545 │ │ │ │ ) │
186
- │ 1546 │ │ │ except EntryNotFoundError as http_error: │
187
- │ │
188
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:114 in _inner_fn │
189
- │ │
190
- │ 111 │ │ if check_use_auth_token: │
191
- │ 112 │ │ │ kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_ │
192
- │ 113 │ │  │
193
- │ ❱ 114 │ │ return fn(*args, **kwargs) │
194
- │ 115 │  │
195
- │ 116 │ return _inner_fn # type: ignore │
196
- │ 117  │
197
- │ │
198
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:1460 in get_hf_file_met │
199
- │ │
200
- │ 1457 │ hf_headers["Accept-Encoding"] = "identity" # prevent any compression => we want to k │
201
- │ 1458 │  │
202
- │ 1459 │ # Retrieve metadata │
203
- │ ❱ 1460 │ r = _request_wrapper( │
204
- │ 1461 │ │ method="HEAD", │
205
- │ 1462 │ │ url=url, │
206
- │ 1463 │ │ headers=hf_headers, │
207
- │ │
208
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:283 in _request_wrapper │
209
- │ │
210
- │  280 │ """ │
211
- │  281 │ # Recursively follow relative redirects │
212
- │  282 │ if follow_relative_redirects: │
213
- │ ❱  283 │ │ response = _request_wrapper( │
214
- │  284 │ │ │ method=method, │
215
- │  285 │ │ │ url=url, │
216
- │  286 │ │ │ follow_relative_redirects=False, │
217
- │ │
218
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:307 in _request_wrapper │
219
- │ │
220
- │  304 │  │
221
- │  305 │ # Perform request and return if status_code is not in the retry list. │
222
- │  306 │ response = http_backoff(method=method, url=url, **params) │
223
- │ ❱  307 │ hf_raise_for_status(response) │
224
- │  308 │ return response │
225
- │  309  │
226
- │  310  │
227
- │ │
228
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_http.py:419 in hf_raise_for_statu │
229
- │ │
230
- │ 416 │ │ │ message = ( │
231
- │ 417 │ │ │ │ f"{response.status_code} Client Error." + "\n\n" + f"Cannot access gated r │
232
- │ 418 │ │ │ ) │
233
- │ ❱ 419 │ │ │ raise _format(GatedRepoError, message, response) from e │
234
- │ 420 │ │  │
235
- │ 421 │ │ elif error_message == "Access to this resource is disabled.": │
236
- │ 422 │ │ │ message = ( │
237
- ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
238
- GatedRepoError: 401 Client Error. (Request ID: Root=1-693ecb67-4dcf6cbc46f1f55a2357bb92;7f173849-a847-492f-a0d0-f552bb475103)
239
-
240
- Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/tokenizer.json.
241
- Access to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/files/requirements.txt DELETED
@@ -1,271 +0,0 @@
1
- scikit-learn==1.8.0
2
- joblib==1.5.2
3
- threadpoolctl==3.6.0
4
- torchmetrics==1.8.2
5
- lightning-utilities==0.15.2
6
- wandb==0.23.1
7
- GitPython==3.1.45
8
- gitdb==4.0.12
9
- smmap==5.0.2
10
- datasets==4.4.1
11
- pandas==2.3.3
12
- multiprocess==0.70.18
13
- pyarrow==22.0.0
14
- tzdata==2025.3
15
- xxhash==3.6.0
16
- ai2-olmo==0.6.0
17
- ai2-olmo-core==2.4.0
18
- cached_path==1.8.0
19
- google-cloud-storage==2.19.0
20
- google-cloud-core==2.5.0
21
- boto3==1.42.9
22
- google-api-core==2.28.1
23
- google-auth==2.43.0
24
- s3transfer==0.16.0
25
- botocore==1.42.9
26
- google-resumable-media==2.8.0
27
- pyasn1_modules==0.4.2
28
- rich==13.9.4
29
- rsa==4.9.1
30
- bettermap==1.3.1
31
- google-crc32c==1.7.1
32
- jmespath==1.0.1
33
- numpy==1.26.4
34
- omegaconf==2.3.0
35
- proto-plus==1.26.1
36
- pyasn1==0.6.1
37
- python-dateutil==2.9.0.post0
38
- antlr4-python3-runtime==4.9.3
39
- zstandard==0.23.0
40
- zipp==3.21.0
41
- yarl==1.22.0
42
- xgrammar==0.1.18
43
- xformers==0.0.29.post2
44
- wrapt==2.0.1
45
- wheel==0.45.1
46
- websockets==15.0.1
47
- wcwidth==0.2.13
48
- watchfiles==1.1.1
49
- vllm==0.8.5.post1
50
- uvloop==0.22.1
51
- uvicorn==0.38.0
52
- urllib3==2.3.0
53
- typing-inspection==0.4.2
54
- typing_extensions==4.15.0
55
- types-dataclasses==0.6.6
56
- typer==0.20.0
57
- truststore==0.10.0
58
- triton==3.2.0
59
- transformers==4.57.3
60
- traitlets==5.14.3
61
- tqdm==4.67.1
62
- torchvision==0.21.0+cu124
63
- torchelastic==0.2.2
64
- torchaudio==2.6.0+cu124
65
- torch==2.6.0+cu124
66
- tokenizers==0.22.1
67
- tiktoken==0.12.0
68
- sympy==1.13.1
69
- starlette==0.50.0
70
- stack_data==0.6.3
71
- soupsieve==2.5
72
- sortedcontainers==2.4.0
73
- sniffio==1.3.1
74
- six==1.17.0
75
- shellingham==1.5.4
76
- setuptools==75.8.0
77
- sentry-sdk==2.47.0
78
- sentencepiece==0.2.1
79
- scipy==1.16.3
80
- safetensors==0.7.0
81
- ruamel.yaml.clib==0.2.8
82
- ruamel.yaml==0.18.10
83
- rpds-py==0.22.3
84
- rignore==0.7.6
85
- rich-toolkit==0.17.0
86
- requests==2.32.3
87
- regex==2025.11.3
88
- referencing==0.36.2
89
- ray==2.52.1
90
- pyzmq==27.1.0
91
- pytz==2024.2
92
- python-multipart==0.0.20
93
- python-json-logger==4.0.0
94
- python-etcd==0.4.5
95
- python-dotenv==1.2.1
96
- Pygments==2.19.1
97
- pydantic-extra-types==2.10.6
98
- pydantic_core==2.41.5
99
- pydantic==2.12.5
100
- pycparser==2.22
101
- pycountry==24.6.1
102
- pycosat==0.6.6
103
- py-cpuinfo==9.0.0
104
- pure_eval==0.2.3
105
- ptyprocess==0.7.0
106
- psutil==6.1.1
107
- protobuf==4.25.8
108
- propcache==0.4.1
109
- prompt_toolkit==3.0.50
110
- prometheus-fastapi-instrumentator==7.1.0
111
- prometheus_client==0.23.1
112
- pluggy==1.5.0
113
- platformdirs==4.3.6
114
- pkgutil_resolve_name==1.3.10
115
- pkginfo==1.12.0
116
- pip==24.3.1
117
- pillow==11.0.0
118
- pickleshare==0.7.5
119
- pexpect==4.9.0
120
- partial-json-parser==0.2.1.1.post7
121
- parso==0.8.4
122
- packaging==24.2
123
- outlines_core==0.1.26
124
- outlines==0.1.11
125
- optree==0.14.0
126
- opentelemetry-semantic-conventions-ai==0.4.13
127
- opentelemetry-semantic-conventions==0.47b0
128
- opentelemetry-sdk==1.26.0
129
- opentelemetry-proto==1.26.0
130
- opentelemetry-exporter-otlp-proto-http==1.26.0
131
- opentelemetry-exporter-otlp-proto-grpc==1.26.0
132
- opentelemetry-exporter-otlp-proto-common==1.26.0
133
- opentelemetry-exporter-otlp==1.26.0
134
- opentelemetry-api==1.26.0
135
- opencv-python-headless==4.12.0.88
136
- openai==2.11.0
137
- nvidia-nvtx-cu12==12.4.127
138
- nvidia-nvjitlink-cu12==12.4.127
139
- nvidia-nccl-cu12==2.21.5
140
- nvidia-cusparselt-cu12==0.6.2
141
- nvidia-cusparse-cu12==12.3.1.170
142
- nvidia-cusolver-cu12==11.6.1.9
143
- nvidia-curand-cu12==10.3.5.147
144
- nvidia-cufft-cu12==11.2.1.3
145
- nvidia-cudnn-cu12==9.1.0.70
146
- nvidia-cuda-runtime-cu12==12.4.127
147
- nvidia-cuda-nvrtc-cu12==12.4.127
148
- nvidia-cuda-cupti-cu12==12.4.127
149
- nvidia-cublas-cu12==12.4.5.8
150
- numba==0.61.2
151
- ninja==1.11.1.3
152
- networkx==3.4.2
153
- nest-asyncio==1.6.0
154
- multidict==6.7.0
155
- msgspec==0.20.0
156
- msgpack==1.1.2
157
- mpmath==1.3.0
158
- more-itertools==10.6.0
159
- mistral_common==1.8.6
160
- menuinst==2.2.0
161
- mdurl==0.1.2
162
- matplotlib-inline==0.1.7
163
- math-verify==0.8.0
164
- markdown-it-py==4.0.0
165
- lm-format-enforcer==0.10.12
166
- llvmlite==0.44.0
167
- llguidance==0.7.30
168
- lintrunner==0.12.7
169
- lief==0.14.1
170
- libmambapy==2.0.5
171
- libarchive-c==5.1
172
- latex2sympy2_extended==1.10.2
173
- lark==1.2.2
174
- jsonschema-specifications==2024.10.1
175
- jsonschema==4.23.0
176
- jsonpointer==3.0.0
177
- jsonpatch==1.33
178
- jiter==0.12.0
179
- Jinja2==3.1.5
180
- jedi==0.19.2
181
- ipython==8.31.0
182
- interegular==0.3.3
183
- importlib_resources==6.5.2
184
- importlib_metadata==8.0.0
185
- idna==3.10
186
- hypothesis==6.124.7
187
- hyperframe==6.0.1
188
- huggingface-hub==0.36.0
189
- httpx==0.28.1
190
- httptools==0.7.1
191
- httpcore==1.0.9
192
- hpack==4.0.0
193
- hf-xet==1.2.0
194
- h2==4.1.0
195
- h11==0.16.0
196
- grpcio==1.76.0
197
- googleapis-common-protos==1.72.0
198
- gguf==0.17.1
199
- fsspec==2024.12.0
200
- frozenlist==1.8.0
201
- frozendict==2.4.6
202
- filelock==3.17.0
203
- fastrlock==0.8.3
204
- fastar==0.8.0
205
- fastapi-cloud-cli==0.6.0
206
- fastapi-cli==0.0.16
207
- fastapi==0.124.4
208
- expecttest==0.3.0
209
- executing==2.1.0
210
- exceptiongroup==1.2.2
211
- email-validator==2.3.0
212
- einops==0.8.1
213
- dnspython==2.7.0
214
- distro==1.9.0
215
- diskcache==5.6.3
216
- dill==0.4.0
217
- depyf==0.18.0
218
- Deprecated==1.3.1
219
- decorator==5.1.1
220
- cupy-cuda12x==13.6.0
221
- conda_package_streaming==0.11.0
222
- conda-package-handling==2.4.0
223
- conda-libmamba-solver==25.1.1
224
- conda_index==0.5.0
225
- conda-build==25.1.1
226
- conda==25.1.0
227
- compressed-tensors==0.9.3
228
- colorama==0.4.6
229
- cmake==3.31.4
230
- cloudpickle==3.1.2
231
- click==8.1.8
232
- charset-normalizer==3.4.1
233
- chardet==5.2.0
234
- cffi==1.17.1
235
- certifi==2024.12.14
236
- cachetools==6.2.3
237
- boltons==24.0.0
238
- blake3==1.0.8
239
- beautifulsoup4==4.12.3
240
- attrs==25.1.0
241
- astunparse==1.6.3
242
- asttokens==3.0.0
243
- astor==0.8.1
244
- archspec==0.2.5
245
- anyio==4.12.0
246
- annotated-types==0.7.0
247
- annotated-doc==0.0.4
248
- airportsdata==20250909
249
- aiosignal==1.4.0
250
- aiohttp==3.13.2
251
- aiohappyeyeballs==2.6.1
252
- PyYAML==6.0.2
253
- PySocks==1.7.1
254
- MarkupSafe==3.0.2
255
- Brotli==1.1.0
256
- zipp==3.19.2
257
- wheel==0.43.0
258
- typing_extensions==4.12.2
259
- typeguard==4.3.0
260
- tomli==2.0.1
261
- platformdirs==4.2.2
262
- packaging==24.2
263
- more-itertools==10.3.0
264
- jaraco.text==3.12.1
265
- jaraco.functools==4.0.1
266
- jaraco.context==5.3.0
267
- jaraco.collections==5.1.0
268
- inflect==7.3.1
269
- importlib_metadata==8.0.0
270
- backports.tarfile==1.2.0
271
- autocommand==2.2.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/files/wandb-metadata.json DELETED
@@ -1,103 +0,0 @@
1
- {
2
- "os": "Linux-6.8.0-1043-nvidia-x86_64-with-glibc2.35",
3
- "python": "CPython 3.11.11",
4
- "startedAt": "2025-12-14T14:36:18.700317Z",
5
- "args": [
6
- "pretraining/configs/RL-1B.yaml"
7
- ],
8
- "program": "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py",
9
- "codePath": "OLMo/scripts/train.py",
10
- "codePathLocal": "OLMo/scripts/train.py",
11
- "email": "[email protected]",
12
- "root": "checkpoints/OLMo-1B-as_fm3_omi2/wandb",
13
- "host": "serv-3342",
14
- "executable": "/opt/conda/bin/python",
15
- "cpu_count": 112,
16
- "cpu_count_logical": 224,
17
- "gpu": "NVIDIA H100 80GB HBM3",
18
- "gpu_count": 1,
19
- "disk": {
20
- "/": {
21
- "total": "2055141851136",
22
- "used": "49254449152"
23
- }
24
- },
25
- "memory": {
26
- "total": "2164176814080"
27
- },
28
- "gpu_nvidia": [
29
- {
30
- "name": "NVIDIA H100 80GB HBM3",
31
- "memoryTotal": "85520809984",
32
- "cudaCores": 16896,
33
- "architecture": "Hopper",
34
- "uuid": "GPU-4c999b2a-2578-9e62-0539-4b826d85fda8"
35
- }
36
- ],
37
- "cudaVersion": "13.0",
38
- "slurm": {
39
- "cluster_name": "pegasus",
40
- "conf": "/etc/slurm/slurm.conf",
41
- "cpu_bind": "quiet,mask_cpu:0x000000000001FE00000000000000000000000001FE00000000000000",
42
- "cpu_bind_list": "0x000000000001FE00000000000000000000000001FE00000000000000",
43
- "cpu_bind_type": "mask_cpu:",
44
- "cpu_bind_verbose": "quiet",
45
- "cpus_on_node": "16",
46
- "cpus_per_task": "16",
47
- "distribution": "cyclic",
48
- "gpus": "1",
49
- "gpus_on_node": "1",
50
- "gtids": "0",
51
- "job_cpus_per_node": "16",
52
- "job_end_time": "1765734524",
53
- "job_gid": "8000",
54
- "job_group": "iml",
55
- "job_id": "2383756",
56
- "job_name": "bash",
57
- "job_nodelist": "serv-3342",
58
- "job_num_nodes": "1",
59
- "job_partition": "H100",
60
- "job_qos": "normal",
61
- "job_start_time": "1765720124",
62
- "job_uid": "13262",
63
- "job_user": "nguyen",
64
- "jobid": "2383756",
65
- "launch_node_ipaddr": "192.168.33.114",
66
- "localid": "0",
67
- "mem_per_cpu": "16384",
68
- "mpi_type": "pmix",
69
- "nnodes": "1",
70
- "nodeid": "0",
71
- "nodelist": "serv-3342",
72
- "nprocs": "1",
73
- "ntasks": "1",
74
- "oom_kill_step": "0",
75
- "pmix_mapping_serv": "(vector,(0,1,1))",
76
- "pmixp_abort_agent_port": "33735",
77
- "prio_process": "1",
78
- "procid": "0",
79
- "pty_port": "45219",
80
- "pty_win_col": "156",
81
- "pty_win_row": "41",
82
- "srun_comm_host": "192.168.33.114",
83
- "srun_comm_port": "35153",
84
- "step_gpus": "5",
85
- "step_id": "0",
86
- "step_launcher_port": "35153",
87
- "step_nodelist": "serv-3342",
88
- "step_num_nodes": "1",
89
- "step_num_tasks": "1",
90
- "step_tasks_per_node": "1",
91
- "stepid": "0",
92
- "submit_dir": "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain",
93
- "submit_host": "login1",
94
- "task_pid": "3684902",
95
- "tasks_per_node": "1",
96
- "topology_addr": "serv-3342",
97
- "topology_addr_pattern": "node",
98
- "tres_bind": "gres/gpu:per_task:1",
99
- "tres_per_task": "cpu=16,gres/gpu=1",
100
- "umask": "0022"
101
- },
102
- "writerId": "yq3dhquxezgxgixc3uqa3megunlrc2ta"
103
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"_wandb":{"runtime":4},"_runtime":4}
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/logs/debug-internal.log DELETED
@@ -1,11 +0,0 @@
1
- {"time":"2025-12-14T15:36:19.017560403+01:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
- {"time":"2025-12-14T15:36:19.283685097+01:00","level":"INFO","msg":"stream: created new stream","id":"kgru6t23"}
3
- {"time":"2025-12-14T15:36:19.28461896+01:00","level":"INFO","msg":"handler: started","stream_id":"kgru6t23"}
4
- {"time":"2025-12-14T15:36:19.286998194+01:00","level":"INFO","msg":"stream: started","id":"kgru6t23"}
5
- {"time":"2025-12-14T15:36:19.287018288+01:00","level":"INFO","msg":"writer: started","stream_id":"kgru6t23"}
6
- {"time":"2025-12-14T15:36:19.287033949+01:00","level":"INFO","msg":"sender: started","stream_id":"kgru6t23"}
7
- {"time":"2025-12-14T15:36:23.966894569+01:00","level":"INFO","msg":"stream: closing","id":"kgru6t23"}
8
- {"time":"2025-12-14T15:36:24.755431422+01:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
- {"time":"2025-12-14T15:36:24.947056958+01:00","level":"INFO","msg":"handler: closed","stream_id":"kgru6t23"}
10
- {"time":"2025-12-14T15:36:24.954143642+01:00","level":"INFO","msg":"sender: closed","stream_id":"kgru6t23"}
11
- {"time":"2025-12-14T15:36:24.955118036+01:00","level":"INFO","msg":"stream: closed","id":"kgru6t23"}
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/logs/debug.log DELETED
@@ -1,23 +0,0 @@
1
- 2025-12-14 15:36:18,753 INFO MainThread:3744013 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
- 2025-12-14 15:36:18,754 INFO MainThread:3744013 [wandb_setup.py:_flush():80] Configure stats pid to 3744013
3
- 2025-12-14 15:36:18,755 INFO MainThread:3744013 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
- 2025-12-14 15:36:18,756 INFO MainThread:3744013 [wandb_setup.py:_flush():80] Loading settings from /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/wandb/settings
5
- 2025-12-14 15:36:18,757 INFO MainThread:3744013 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
- 2025-12-14 15:36:18,758 INFO MainThread:3744013 [wandb_init.py:setup_run_log_directory():714] Logging user logs to checkpoints/OLMo-1B-as_fm3_omi2/wandb/wandb/run-20251214_153618-kgru6t23/logs/debug.log
7
- 2025-12-14 15:36:18,759 INFO MainThread:3744013 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to checkpoints/OLMo-1B-as_fm3_omi2/wandb/wandb/run-20251214_153618-kgru6t23/logs/debug-internal.log
8
- 2025-12-14 15:36:18,760 INFO MainThread:3744013 [wandb_init.py:init():841] calling init triggers
9
- 2025-12-14 15:36:18,761 INFO MainThread:3744013 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
- config: {'run_name': 'OLMo-1B-as_fm3_omi2', 'seed': 6198, 'epoch': None, 'dry_run': False, 'model': {'d_model': 2048, 'n_heads': 16, 'n_kv_heads': None, 'clip_qkv': None, 'n_layers': 16, 'mlp_ratio': 8, 'mlp_hidden_size': None, 'activation_type': 'swiglu', 'block_type': 'sequential', 'block_group_size': 1, 'alibi': False, 'alibi_bias_max': 8.0, 'rope': True, 'rope_full_precision': True, 'rope_theta': 10000, 'flash_attention': False, 'attention_dropout': 0.0, 'multi_query_attention': False, 'attention_layer_norm': False, 'residual_dropout': 0.0, 'embedding_dropout': 0.0, 'embedding_layer_norm': False, 'layer_norm_type': 'default', 'layer_norm_with_affine': False, 'layer_norm_eps': 1e-05, 'attention_layer_norm_with_affine': False, 'max_sequence_length': 2048, 'include_bias': False, 'bias_for_layer_norm': False, 'scale_logits': False, 'vocab_size': 32000, 'embedding_size': 32000, 'weight_tying': True, 'eos_token_id': 0, 'pad_token_id': 1, 'init_device': 'cuda', 'init_fn': 'normal', 'init_std': 0.02, 'init_cutoff_factor': 3.0, 'precision': 'amp_bf16', 'scale_emb_init': False, 'emb_init_std': None, 'norm_after': False}, 'optimizer': {'name': 'adamw', 'learning_rate': 0.0005, 'weight_decay': 0.1, 'betas': (0.9, 0.95), 'eps': 1e-08, 'no_decay_norm_and_bias': None, 'selective_updates': False, 'decay_norm_and_bias': True, 'decay_embeddings': True, 'metrics_log_interval': 10, 'record_update_metrics': False}, 'scheduler': {'name': 'cosine_with_warmup', 'units': 'steps', 't_warmup': 2000, 't_max': None, 'alpha_f': 0.1, 'grad_clip_warmup_steps': None, 'grad_clip_warmup_factor': None, 'warmup_min_lr': 0.0}, 'data': {'paths': ['data_token/as_fm3_omi2/algebraic-stack-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00055_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00056_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00057_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00058_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00059_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00060_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00061_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00062_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00063_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00064_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00065_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00066_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00067_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00068_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00069_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00070_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00071_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00072_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00073_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00074_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00075_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00076_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00077_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00078_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00017_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00018_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00055_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00056_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00057_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00058_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00059_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00060_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00061_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00062_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00063_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00064_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00065_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00066_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00067_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00068_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00069_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00070_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00071_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00072_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00073_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00074_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00075_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00076_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00077_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00078_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00079_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00080_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00081_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00082_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00083_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00084_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00085_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00086_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00087_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00088_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00089_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00090_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00091_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00092_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00093_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00094_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00095_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00096_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00097_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00098_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00099_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00017_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00018_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00054_00000_doc_shuffled.ds'], 'memmap_dtype': 'uint16', 'datasets': None, 'label_mask_paths': None, 'pad_direction': 'right', 'generate_attention_mask': False, 'generate_doc_lengths': False, 'num_workers': 32, 'drop_last': True, 'pin_memory': True, 'prefetch_factor': 8, 'persistent_workers': True, 'timeout': 0, 'seed': None, 'instance_filter': None, 'custom_dataset': None}, 'restore_dataloader': True, 'fast_forward_batches': None, 'evaluators': [], 'eval_interval': 5000, 'tokenizer': {'identifier': 'meta-llama/Llama-2-7b-hf', 'truncate_direction': 'right'}, 'save_folder': 'checkpoints/OLMo-1B-as_fm3_omi2', 'remote_save_folder': None, 'canceled_check_interval': 6000, 'save_interval': 3000, 'save_interval_unsharded': 3000, 'save_interval_ephemeral': None, 'save_num_checkpoints_to_keep': -1, 'save_num_unsharded_checkpoints_to_keep': -1, 'save_overwrite': True, 'force_save_unsharded': False, 'no_pre_train_checkpoint': False, 'load_path': None, 'load_path_sharded_checkpointer': None, 'try_load_latest_save': False, 'reset_optimizer_state': False, 'reset_trainer_state': False, 'sharded_checkpointer': 'torch_legacy', 'new_style_checkpoints': None, 'max_duration': '1ep', 'global_train_batch_size': 512, 'device_train_batch_size': 512, 'device_train_microbatch_size': 16, 'device_eval_batch_size': 16, 'eval_subset_num_batches': -1, 'eval_on_load': False, 'device_train_grad_accum': 32, 'max_grad_norm': 1.0, 'max_grad_norm_ratio': None, 'precision': 'amp_bf16', 'speed_monitor': {'window_size': 20, 'gpu_flops_available': None}, 'console_log_interval': 1, 'gen1_gc_interval': 1, 'compile': None, 'distributed_strategy': 'fsdp', 'fsdp': {'use_orig_params': True, 'sharding_strategy': <ShardingStrategy.FULL_SHARD: 1>, 'wrapping_strategy': None, 'precision': 'mixed', 'hybrid_sharding_num_model_replicas': None}, 'ddp': {'grad_sync_mode': 'batch', 'find_unused_params': False}, 'single': {'device': 'auto'}, 'softmax_auxiliary_loss': False, 'auxiliary_loss_multiplier': 0.0001, 'time_limit': None, 'extra_steps_after_cancel': 10, 'early_stopping_factor': None, 'save_data_indices': True, 'python_profiling': False, 'torch_profiling': False, 'stop_at': None, 'stop_after': None, 'activation_checkpointing': None, 'fused_loss': None, 'hf_datasets_cache_dir': None, 'module_outputs_save_steps': None, '_wandb': {}}
11
- 2025-12-14 15:36:18,762 INFO MainThread:3744013 [wandb_init.py:init():889] starting backend
12
- 2025-12-14 15:36:19,006 INFO MainThread:3744013 [wandb_init.py:init():892] sending inform_init request
13
- 2025-12-14 15:36:19,014 INFO MainThread:3744013 [wandb_init.py:init():900] backend started and connected
14
- 2025-12-14 15:36:19,018 INFO MainThread:3744013 [wandb_init.py:init():970] updated telemetry
15
- 2025-12-14 15:36:19,021 INFO MainThread:3744013 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
- 2025-12-14 15:36:19,655 INFO MainThread:3744013 [wandb_init.py:init():1041] starting run threads in backend
17
- 2025-12-14 15:36:19,746 INFO MainThread:3744013 [wandb_run.py:_console_start():2521] atexit reg
18
- 2025-12-14 15:36:19,747 INFO MainThread:3744013 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
- 2025-12-14 15:36:19,748 INFO MainThread:3744013 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
- 2025-12-14 15:36:19,749 INFO MainThread:3744013 [wandb_run.py:_redirect():2461] Redirects installed.
21
- 2025-12-14 15:36:19,752 INFO MainThread:3744013 [wandb_init.py:init():1081] run started, returning control to user process
22
- 2025-12-14 15:36:23,966 INFO wandb-AsyncioManager-main:3744013 [service_client.py:_forward_responses():80] Reached EOF.
23
- 2025-12-14 15:36:23,967 INFO wandb-AsyncioManager-main:3744013 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153618-kgru6t23/run-kgru6t23.wandb DELETED
Binary file (58.7 kB)
 
models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/files/config.yaml DELETED
@@ -1,575 +0,0 @@
1
- _wandb:
2
- value:
3
- cli_version: 0.23.1
4
- e:
5
- sunp9aekjphrkaie8a5jhftzaeyt35vl:
6
- args:
7
- - pretraining/configs/RL-1B.yaml
8
- codePath: OLMo/scripts/train.py
9
- codePathLocal: OLMo/scripts/train.py
10
- cpu_count: 112
11
- cpu_count_logical: 224
12
- cudaVersion: "13.0"
13
- disk:
14
- /:
15
- total: "2055141851136"
16
- used: "49254453248"
17
18
- executable: /opt/conda/bin/python
19
- gpu: NVIDIA H100 80GB HBM3
20
- gpu_count: 1
21
- gpu_nvidia:
22
- - architecture: Hopper
23
- cudaCores: 16896
24
- memoryTotal: "85520809984"
25
- name: NVIDIA H100 80GB HBM3
26
- uuid: GPU-4c999b2a-2578-9e62-0539-4b826d85fda8
27
- host: serv-3342
28
- memory:
29
- total: "2164176814080"
30
- os: Linux-6.8.0-1043-nvidia-x86_64-with-glibc2.35
31
- program: /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py
32
- python: CPython 3.11.11
33
- root: checkpoints/OLMo-1B-as_fm3_omi2/wandb
34
- slurm:
35
- cluster_name: pegasus
36
- conf: /etc/slurm/slurm.conf
37
- cpu_bind: quiet,mask_cpu:0x000000000001FE00000000000000000000000001FE00000000000000
38
- cpu_bind_list: 0x000000000001FE00000000000000000000000001FE00000000000000
39
- cpu_bind_type: 'mask_cpu:'
40
- cpu_bind_verbose: quiet
41
- cpus_on_node: "16"
42
- cpus_per_task: "16"
43
- distribution: cyclic
44
- gpus: "1"
45
- gpus_on_node: "1"
46
- gtids: "0"
47
- job_cpus_per_node: "16"
48
- job_end_time: "1765734524"
49
- job_gid: "8000"
50
- job_group: iml
51
- job_id: "2383756"
52
- job_name: bash
53
- job_nodelist: serv-3342
54
- job_num_nodes: "1"
55
- job_partition: H100
56
- job_qos: normal
57
- job_start_time: "1765720124"
58
- job_uid: "13262"
59
- job_user: nguyen
60
- jobid: "2383756"
61
- launch_node_ipaddr: 192.168.33.114
62
- localid: "0"
63
- mem_per_cpu: "16384"
64
- mpi_type: pmix
65
- nnodes: "1"
66
- nodeid: "0"
67
- nodelist: serv-3342
68
- nprocs: "1"
69
- ntasks: "1"
70
- oom_kill_step: "0"
71
- pmix_mapping_serv: (vector,(0,1,1))
72
- pmixp_abort_agent_port: "33735"
73
- prio_process: "1"
74
- procid: "0"
75
- pty_port: "45219"
76
- pty_win_col: "156"
77
- pty_win_row: "41"
78
- srun_comm_host: 192.168.33.114
79
- srun_comm_port: "35153"
80
- step_gpus: "5"
81
- step_id: "0"
82
- step_launcher_port: "35153"
83
- step_nodelist: serv-3342
84
- step_num_nodes: "1"
85
- step_num_tasks: "1"
86
- step_tasks_per_node: "1"
87
- stepid: "0"
88
- submit_dir: /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain
89
- submit_host: login1
90
- task_pid: "3684902"
91
- tasks_per_node: "1"
92
- topology_addr: serv-3342
93
- topology_addr_pattern: node
94
- tres_bind: gres/gpu:per_task:1
95
- tres_per_task: cpu=16,gres/gpu=1
96
- umask: "0022"
97
- startedAt: "2025-12-14T14:37:00.974734Z"
98
- writerId: sunp9aekjphrkaie8a5jhftzaeyt35vl
99
- m: []
100
- python_version: 3.11.11
101
- t:
102
- "1":
103
- - 1
104
- - 5
105
- - 11
106
- - 41
107
- - 49
108
- - 51
109
- - 53
110
- "2":
111
- - 1
112
- - 5
113
- - 11
114
- - 41
115
- - 49
116
- - 51
117
- - 53
118
- "3":
119
- - 13
120
- - 15
121
- - 16
122
- "4": 3.11.11
123
- "5": 0.23.1
124
- "6": 4.57.3
125
- "12": 0.23.1
126
- "13": linux-x86_64
127
- activation_checkpointing:
128
- value: null
129
- auxiliary_loss_multiplier:
130
- value: 0.0001
131
- canceled_check_interval:
132
- value: 6000
133
- compile:
134
- value: null
135
- console_log_interval:
136
- value: 1
137
- data:
138
- value:
139
- custom_dataset: null
140
- datasets: null
141
- drop_last: true
142
- generate_attention_mask: false
143
- generate_doc_lengths: false
144
- instance_filter: null
145
- label_mask_paths: null
146
- memmap_dtype: uint16
147
- num_workers: 32
148
- pad_direction: right
149
- paths:
150
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00000_00000_doc_shuffled.ds
151
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00001_00000_doc_shuffled.ds
152
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00002_00000_doc_shuffled.ds
153
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00003_00000_doc_shuffled.ds
154
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00004_00000_doc_shuffled.ds
155
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00005_00000_doc_shuffled.ds
156
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds
157
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00007_00000_doc_shuffled.ds
158
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00008_00000_doc_shuffled.ds
159
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00009_00000_doc_shuffled.ds
160
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00010_00000_doc_shuffled.ds
161
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00011_00000_doc_shuffled.ds
162
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00012_00000_doc_shuffled.ds
163
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00016_00000_doc_shuffled.ds
164
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00019_00000_doc_shuffled.ds
165
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00020_00000_doc_shuffled.ds
166
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00021_00000_doc_shuffled.ds
167
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00022_00000_doc_shuffled.ds
168
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00023_00000_doc_shuffled.ds
169
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00024_00000_doc_shuffled.ds
170
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00025_00000_doc_shuffled.ds
171
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00026_00000_doc_shuffled.ds
172
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00027_00000_doc_shuffled.ds
173
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00028_00000_doc_shuffled.ds
174
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00029_00000_doc_shuffled.ds
175
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00030_00000_doc_shuffled.ds
176
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00031_00000_doc_shuffled.ds
177
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00032_00000_doc_shuffled.ds
178
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00033_00000_doc_shuffled.ds
179
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00034_00000_doc_shuffled.ds
180
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00035_00000_doc_shuffled.ds
181
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00036_00000_doc_shuffled.ds
182
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00037_00000_doc_shuffled.ds
183
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00038_00000_doc_shuffled.ds
184
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00039_00000_doc_shuffled.ds
185
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00040_00000_doc_shuffled.ds
186
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00041_00000_doc_shuffled.ds
187
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00042_00000_doc_shuffled.ds
188
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00043_00000_doc_shuffled.ds
189
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00044_00000_doc_shuffled.ds
190
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00045_00000_doc_shuffled.ds
191
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00046_00000_doc_shuffled.ds
192
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00047_00000_doc_shuffled.ds
193
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00048_00000_doc_shuffled.ds
194
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00049_00000_doc_shuffled.ds
195
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00050_00000_doc_shuffled.ds
196
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00051_00000_doc_shuffled.ds
197
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00052_00000_doc_shuffled.ds
198
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00053_00000_doc_shuffled.ds
199
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00054_00000_doc_shuffled.ds
200
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00055_00000_doc_shuffled.ds
201
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00056_00000_doc_shuffled.ds
202
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00057_00000_doc_shuffled.ds
203
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00058_00000_doc_shuffled.ds
204
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00059_00000_doc_shuffled.ds
205
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00060_00000_doc_shuffled.ds
206
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00061_00000_doc_shuffled.ds
207
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00062_00000_doc_shuffled.ds
208
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00063_00000_doc_shuffled.ds
209
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00064_00000_doc_shuffled.ds
210
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00065_00000_doc_shuffled.ds
211
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00066_00000_doc_shuffled.ds
212
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00067_00000_doc_shuffled.ds
213
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00068_00000_doc_shuffled.ds
214
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00069_00000_doc_shuffled.ds
215
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00070_00000_doc_shuffled.ds
216
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00071_00000_doc_shuffled.ds
217
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00072_00000_doc_shuffled.ds
218
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00073_00000_doc_shuffled.ds
219
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00074_00000_doc_shuffled.ds
220
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00075_00000_doc_shuffled.ds
221
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00076_00000_doc_shuffled.ds
222
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00077_00000_doc_shuffled.ds
223
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00078_00000_doc_shuffled.ds
224
- - data_token/as_fm3_omi2/finemath3-tokenized/00000_00000_doc_shuffled.ds
225
- - data_token/as_fm3_omi2/finemath3-tokenized/00001_00000_doc_shuffled.ds
226
- - data_token/as_fm3_omi2/finemath3-tokenized/00002_00000_doc_shuffled.ds
227
- - data_token/as_fm3_omi2/finemath3-tokenized/00003_00000_doc_shuffled.ds
228
- - data_token/as_fm3_omi2/finemath3-tokenized/00004_00000_doc_shuffled.ds
229
- - data_token/as_fm3_omi2/finemath3-tokenized/00005_00000_doc_shuffled.ds
230
- - data_token/as_fm3_omi2/finemath3-tokenized/00006_00000_doc_shuffled.ds
231
- - data_token/as_fm3_omi2/finemath3-tokenized/00007_00000_doc_shuffled.ds
232
- - data_token/as_fm3_omi2/finemath3-tokenized/00008_00000_doc_shuffled.ds
233
- - data_token/as_fm3_omi2/finemath3-tokenized/00009_00000_doc_shuffled.ds
234
- - data_token/as_fm3_omi2/finemath3-tokenized/00010_00000_doc_shuffled.ds
235
- - data_token/as_fm3_omi2/finemath3-tokenized/00011_00000_doc_shuffled.ds
236
- - data_token/as_fm3_omi2/finemath3-tokenized/00012_00000_doc_shuffled.ds
237
- - data_token/as_fm3_omi2/finemath3-tokenized/00013_00000_doc_shuffled.ds
238
- - data_token/as_fm3_omi2/finemath3-tokenized/00014_00000_doc_shuffled.ds
239
- - data_token/as_fm3_omi2/finemath3-tokenized/00015_00000_doc_shuffled.ds
240
- - data_token/as_fm3_omi2/finemath3-tokenized/00016_00000_doc_shuffled.ds
241
- - data_token/as_fm3_omi2/finemath3-tokenized/00017_00000_doc_shuffled.ds
242
- - data_token/as_fm3_omi2/finemath3-tokenized/00018_00000_doc_shuffled.ds
243
- - data_token/as_fm3_omi2/finemath3-tokenized/00019_00000_doc_shuffled.ds
244
- - data_token/as_fm3_omi2/finemath3-tokenized/00020_00000_doc_shuffled.ds
245
- - data_token/as_fm3_omi2/finemath3-tokenized/00021_00000_doc_shuffled.ds
246
- - data_token/as_fm3_omi2/finemath3-tokenized/00022_00000_doc_shuffled.ds
247
- - data_token/as_fm3_omi2/finemath3-tokenized/00023_00000_doc_shuffled.ds
248
- - data_token/as_fm3_omi2/finemath3-tokenized/00024_00000_doc_shuffled.ds
249
- - data_token/as_fm3_omi2/finemath3-tokenized/00025_00000_doc_shuffled.ds
250
- - data_token/as_fm3_omi2/finemath3-tokenized/00026_00000_doc_shuffled.ds
251
- - data_token/as_fm3_omi2/finemath3-tokenized/00027_00000_doc_shuffled.ds
252
- - data_token/as_fm3_omi2/finemath3-tokenized/00028_00000_doc_shuffled.ds
253
- - data_token/as_fm3_omi2/finemath3-tokenized/00029_00000_doc_shuffled.ds
254
- - data_token/as_fm3_omi2/finemath3-tokenized/00030_00000_doc_shuffled.ds
255
- - data_token/as_fm3_omi2/finemath3-tokenized/00031_00000_doc_shuffled.ds
256
- - data_token/as_fm3_omi2/finemath3-tokenized/00032_00000_doc_shuffled.ds
257
- - data_token/as_fm3_omi2/finemath3-tokenized/00033_00000_doc_shuffled.ds
258
- - data_token/as_fm3_omi2/finemath3-tokenized/00034_00000_doc_shuffled.ds
259
- - data_token/as_fm3_omi2/finemath3-tokenized/00035_00000_doc_shuffled.ds
260
- - data_token/as_fm3_omi2/finemath3-tokenized/00036_00000_doc_shuffled.ds
261
- - data_token/as_fm3_omi2/finemath3-tokenized/00037_00000_doc_shuffled.ds
262
- - data_token/as_fm3_omi2/finemath3-tokenized/00038_00000_doc_shuffled.ds
263
- - data_token/as_fm3_omi2/finemath3-tokenized/00039_00000_doc_shuffled.ds
264
- - data_token/as_fm3_omi2/finemath3-tokenized/00040_00000_doc_shuffled.ds
265
- - data_token/as_fm3_omi2/finemath3-tokenized/00041_00000_doc_shuffled.ds
266
- - data_token/as_fm3_omi2/finemath3-tokenized/00042_00000_doc_shuffled.ds
267
- - data_token/as_fm3_omi2/finemath3-tokenized/00043_00000_doc_shuffled.ds
268
- - data_token/as_fm3_omi2/finemath3-tokenized/00044_00000_doc_shuffled.ds
269
- - data_token/as_fm3_omi2/finemath3-tokenized/00045_00000_doc_shuffled.ds
270
- - data_token/as_fm3_omi2/finemath3-tokenized/00046_00000_doc_shuffled.ds
271
- - data_token/as_fm3_omi2/finemath3-tokenized/00047_00000_doc_shuffled.ds
272
- - data_token/as_fm3_omi2/finemath3-tokenized/00048_00000_doc_shuffled.ds
273
- - data_token/as_fm3_omi2/finemath3-tokenized/00049_00000_doc_shuffled.ds
274
- - data_token/as_fm3_omi2/finemath3-tokenized/00050_00000_doc_shuffled.ds
275
- - data_token/as_fm3_omi2/finemath3-tokenized/00051_00000_doc_shuffled.ds
276
- - data_token/as_fm3_omi2/finemath3-tokenized/00052_00000_doc_shuffled.ds
277
- - data_token/as_fm3_omi2/finemath3-tokenized/00053_00000_doc_shuffled.ds
278
- - data_token/as_fm3_omi2/finemath3-tokenized/00054_00000_doc_shuffled.ds
279
- - data_token/as_fm3_omi2/finemath3-tokenized/00055_00000_doc_shuffled.ds
280
- - data_token/as_fm3_omi2/finemath3-tokenized/00056_00000_doc_shuffled.ds
281
- - data_token/as_fm3_omi2/finemath3-tokenized/00057_00000_doc_shuffled.ds
282
- - data_token/as_fm3_omi2/finemath3-tokenized/00058_00000_doc_shuffled.ds
283
- - data_token/as_fm3_omi2/finemath3-tokenized/00059_00000_doc_shuffled.ds
284
- - data_token/as_fm3_omi2/finemath3-tokenized/00060_00000_doc_shuffled.ds
285
- - data_token/as_fm3_omi2/finemath3-tokenized/00061_00000_doc_shuffled.ds
286
- - data_token/as_fm3_omi2/finemath3-tokenized/00062_00000_doc_shuffled.ds
287
- - data_token/as_fm3_omi2/finemath3-tokenized/00063_00000_doc_shuffled.ds
288
- - data_token/as_fm3_omi2/finemath3-tokenized/00064_00000_doc_shuffled.ds
289
- - data_token/as_fm3_omi2/finemath3-tokenized/00065_00000_doc_shuffled.ds
290
- - data_token/as_fm3_omi2/finemath3-tokenized/00066_00000_doc_shuffled.ds
291
- - data_token/as_fm3_omi2/finemath3-tokenized/00067_00000_doc_shuffled.ds
292
- - data_token/as_fm3_omi2/finemath3-tokenized/00068_00000_doc_shuffled.ds
293
- - data_token/as_fm3_omi2/finemath3-tokenized/00069_00000_doc_shuffled.ds
294
- - data_token/as_fm3_omi2/finemath3-tokenized/00070_00000_doc_shuffled.ds
295
- - data_token/as_fm3_omi2/finemath3-tokenized/00071_00000_doc_shuffled.ds
296
- - data_token/as_fm3_omi2/finemath3-tokenized/00072_00000_doc_shuffled.ds
297
- - data_token/as_fm3_omi2/finemath3-tokenized/00073_00000_doc_shuffled.ds
298
- - data_token/as_fm3_omi2/finemath3-tokenized/00074_00000_doc_shuffled.ds
299
- - data_token/as_fm3_omi2/finemath3-tokenized/00075_00000_doc_shuffled.ds
300
- - data_token/as_fm3_omi2/finemath3-tokenized/00076_00000_doc_shuffled.ds
301
- - data_token/as_fm3_omi2/finemath3-tokenized/00077_00000_doc_shuffled.ds
302
- - data_token/as_fm3_omi2/finemath3-tokenized/00078_00000_doc_shuffled.ds
303
- - data_token/as_fm3_omi2/finemath3-tokenized/00079_00000_doc_shuffled.ds
304
- - data_token/as_fm3_omi2/finemath3-tokenized/00080_00000_doc_shuffled.ds
305
- - data_token/as_fm3_omi2/finemath3-tokenized/00081_00000_doc_shuffled.ds
306
- - data_token/as_fm3_omi2/finemath3-tokenized/00082_00000_doc_shuffled.ds
307
- - data_token/as_fm3_omi2/finemath3-tokenized/00083_00000_doc_shuffled.ds
308
- - data_token/as_fm3_omi2/finemath3-tokenized/00084_00000_doc_shuffled.ds
309
- - data_token/as_fm3_omi2/finemath3-tokenized/00085_00000_doc_shuffled.ds
310
- - data_token/as_fm3_omi2/finemath3-tokenized/00086_00000_doc_shuffled.ds
311
- - data_token/as_fm3_omi2/finemath3-tokenized/00087_00000_doc_shuffled.ds
312
- - data_token/as_fm3_omi2/finemath3-tokenized/00088_00000_doc_shuffled.ds
313
- - data_token/as_fm3_omi2/finemath3-tokenized/00089_00000_doc_shuffled.ds
314
- - data_token/as_fm3_omi2/finemath3-tokenized/00090_00000_doc_shuffled.ds
315
- - data_token/as_fm3_omi2/finemath3-tokenized/00091_00000_doc_shuffled.ds
316
- - data_token/as_fm3_omi2/finemath3-tokenized/00092_00000_doc_shuffled.ds
317
- - data_token/as_fm3_omi2/finemath3-tokenized/00093_00000_doc_shuffled.ds
318
- - data_token/as_fm3_omi2/finemath3-tokenized/00094_00000_doc_shuffled.ds
319
- - data_token/as_fm3_omi2/finemath3-tokenized/00095_00000_doc_shuffled.ds
320
- - data_token/as_fm3_omi2/finemath3-tokenized/00096_00000_doc_shuffled.ds
321
- - data_token/as_fm3_omi2/finemath3-tokenized/00097_00000_doc_shuffled.ds
322
- - data_token/as_fm3_omi2/finemath3-tokenized/00098_00000_doc_shuffled.ds
323
- - data_token/as_fm3_omi2/finemath3-tokenized/00099_00000_doc_shuffled.ds
324
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00000_00000_doc_shuffled.ds
325
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00001_00000_doc_shuffled.ds
326
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00002_00000_doc_shuffled.ds
327
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00003_00000_doc_shuffled.ds
328
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00004_00000_doc_shuffled.ds
329
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00005_00000_doc_shuffled.ds
330
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00006_00000_doc_shuffled.ds
331
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00007_00000_doc_shuffled.ds
332
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00008_00000_doc_shuffled.ds
333
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00009_00000_doc_shuffled.ds
334
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00010_00000_doc_shuffled.ds
335
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00011_00000_doc_shuffled.ds
336
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00012_00000_doc_shuffled.ds
337
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00013_00000_doc_shuffled.ds
338
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00014_00000_doc_shuffled.ds
339
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00015_00000_doc_shuffled.ds
340
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00016_00000_doc_shuffled.ds
341
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00017_00000_doc_shuffled.ds
342
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00018_00000_doc_shuffled.ds
343
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00019_00000_doc_shuffled.ds
344
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00020_00000_doc_shuffled.ds
345
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00021_00000_doc_shuffled.ds
346
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00022_00000_doc_shuffled.ds
347
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00023_00000_doc_shuffled.ds
348
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00024_00000_doc_shuffled.ds
349
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00025_00000_doc_shuffled.ds
350
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00026_00000_doc_shuffled.ds
351
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00027_00000_doc_shuffled.ds
352
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00028_00000_doc_shuffled.ds
353
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00029_00000_doc_shuffled.ds
354
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00030_00000_doc_shuffled.ds
355
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00031_00000_doc_shuffled.ds
356
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00032_00000_doc_shuffled.ds
357
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00033_00000_doc_shuffled.ds
358
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00034_00000_doc_shuffled.ds
359
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00035_00000_doc_shuffled.ds
360
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00036_00000_doc_shuffled.ds
361
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00037_00000_doc_shuffled.ds
362
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00038_00000_doc_shuffled.ds
363
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00039_00000_doc_shuffled.ds
364
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00040_00000_doc_shuffled.ds
365
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00041_00000_doc_shuffled.ds
366
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00042_00000_doc_shuffled.ds
367
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00043_00000_doc_shuffled.ds
368
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00044_00000_doc_shuffled.ds
369
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00045_00000_doc_shuffled.ds
370
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00046_00000_doc_shuffled.ds
371
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00047_00000_doc_shuffled.ds
372
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00048_00000_doc_shuffled.ds
373
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00049_00000_doc_shuffled.ds
374
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00050_00000_doc_shuffled.ds
375
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00051_00000_doc_shuffled.ds
376
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00052_00000_doc_shuffled.ds
377
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00053_00000_doc_shuffled.ds
378
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00054_00000_doc_shuffled.ds
379
- persistent_workers: true
380
- pin_memory: true
381
- prefetch_factor: 8
382
- seed: null
383
- timeout: 0
384
- ddp:
385
- value:
386
- find_unused_params: false
387
- grad_sync_mode: batch
388
- device_eval_batch_size:
389
- value: 16
390
- device_train_batch_size:
391
- value: 512
392
- device_train_grad_accum:
393
- value: 32
394
- device_train_microbatch_size:
395
- value: 16
396
- distributed_strategy:
397
- value: fsdp
398
- dry_run:
399
- value: false
400
- early_stopping_factor:
401
- value: null
402
- epoch:
403
- value: null
404
- eval_interval:
405
- value: 5000
406
- eval_on_load:
407
- value: false
408
- eval_subset_num_batches:
409
- value: -1
410
- evaluators:
411
- value: []
412
- extra_steps_after_cancel:
413
- value: 10
414
- fast_forward_batches:
415
- value: null
416
- force_save_unsharded:
417
- value: false
418
- fsdp:
419
- value:
420
- hybrid_sharding_num_model_replicas: null
421
- precision: mixed
422
- sharding_strategy: FULL_SHARD
423
- use_orig_params: true
424
- wrapping_strategy: null
425
- fused_loss:
426
- value: null
427
- gen1_gc_interval:
428
- value: 1
429
- global_train_batch_size:
430
- value: 512
431
- hf_datasets_cache_dir:
432
- value: null
433
- load_path:
434
- value: null
435
- load_path_sharded_checkpointer:
436
- value: null
437
- max_duration:
438
- value: 1ep
439
- max_grad_norm:
440
- value: 1
441
- max_grad_norm_ratio:
442
- value: null
443
- model:
444
- value:
445
- activation_type: swiglu
446
- alibi: false
447
- alibi_bias_max: 8
448
- attention_dropout: 0
449
- attention_layer_norm: false
450
- attention_layer_norm_with_affine: false
451
- bias_for_layer_norm: false
452
- block_group_size: 1
453
- block_type: sequential
454
- clip_qkv: null
455
- d_model: 2048
456
- emb_init_std: null
457
- embedding_dropout: 0
458
- embedding_layer_norm: false
459
- embedding_size: 32000
460
- eos_token_id: 0
461
- flash_attention: false
462
- include_bias: false
463
- init_cutoff_factor: 3
464
- init_device: cuda
465
- init_fn: normal
466
- init_std: 0.02
467
- layer_norm_eps: 1e-05
468
- layer_norm_type: default
469
- layer_norm_with_affine: false
470
- max_sequence_length: 2048
471
- mlp_hidden_size: null
472
- mlp_ratio: 8
473
- multi_query_attention: false
474
- n_heads: 16
475
- n_kv_heads: null
476
- n_layers: 16
477
- norm_after: false
478
- pad_token_id: 1
479
- precision: amp_bf16
480
- residual_dropout: 0
481
- rope: true
482
- rope_full_precision: true
483
- rope_theta: 10000
484
- scale_emb_init: false
485
- scale_logits: false
486
- vocab_size: 32000
487
- weight_tying: true
488
- module_outputs_save_steps:
489
- value: null
490
- new_style_checkpoints:
491
- value: null
492
- no_pre_train_checkpoint:
493
- value: false
494
- optimizer:
495
- value:
496
- betas:
497
- - 0.9
498
- - 0.95
499
- decay_embeddings: true
500
- decay_norm_and_bias: true
501
- eps: 1e-08
502
- learning_rate: 0.0005
503
- metrics_log_interval: 10
504
- name: adamw
505
- no_decay_norm_and_bias: null
506
- record_update_metrics: false
507
- selective_updates: false
508
- weight_decay: 0.1
509
- precision:
510
- value: amp_bf16
511
- python_profiling:
512
- value: false
513
- remote_save_folder:
514
- value: null
515
- reset_optimizer_state:
516
- value: false
517
- reset_trainer_state:
518
- value: false
519
- restore_dataloader:
520
- value: true
521
- run_name:
522
- value: OLMo-1B-as_fm3_omi2
523
- save_data_indices:
524
- value: true
525
- save_folder:
526
- value: checkpoints/OLMo-1B-as_fm3_omi2
527
- save_interval:
528
- value: 3000
529
- save_interval_ephemeral:
530
- value: null
531
- save_interval_unsharded:
532
- value: 3000
533
- save_num_checkpoints_to_keep:
534
- value: -1
535
- save_num_unsharded_checkpoints_to_keep:
536
- value: -1
537
- save_overwrite:
538
- value: true
539
- scheduler:
540
- value:
541
- alpha_f: 0.1
542
- grad_clip_warmup_factor: null
543
- grad_clip_warmup_steps: null
544
- name: cosine_with_warmup
545
- t_max: null
546
- t_warmup: 2000
547
- units: steps
548
- warmup_min_lr: 0
549
- seed:
550
- value: 6198
551
- sharded_checkpointer:
552
- value: torch_legacy
553
- single:
554
- value:
555
- device: auto
556
- softmax_auxiliary_loss:
557
- value: false
558
- speed_monitor:
559
- value:
560
- gpu_flops_available: null
561
- window_size: 20
562
- stop_after:
563
- value: null
564
- stop_at:
565
- value: null
566
- time_limit:
567
- value: null
568
- tokenizer:
569
- value:
570
- identifier: meta-llama/Llama-2-7b-hf
571
- truncate_direction: right
572
- torch_profiling:
573
- value: false
574
- try_load_latest_save:
575
- value: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/files/output.log DELETED
@@ -1,241 +0,0 @@
1
- [2025-12-14 15:37:04] INFO  [olmo.data.iterable_dataset:79, rank=0] Saving global data order indices...
2
- [2025-12-14 15:37:05] INFO  [olmo.data.iterable_dataset:88, rank=0] Global data order indices saved to 'checkpoints/OLMo-1B-as_fm3_omi2/train_data/global_indices.npy'
3
- [2025-12-14 15:37:05] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/utils/data/dataloader.py:624: UserWarning: This DataLoader will create 32 worker processes in total. Our suggested max number of worker in current system is 16, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
4
- warnings.warn(
5
- Traceback (most recent call last):
6
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 402, in hf_raise_for_status
7
- response.raise_for_status()
8
- File "/opt/conda/lib/python3.11/site-packages/requests/models.py", line 1024, in raise_for_status
9
- raise HTTPError(http_error_msg, response=self)
10
- requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/tokenizer.json
11
-
12
- The above exception was the direct cause of the following exception:
13
-
14
- Traceback (most recent call last):
15
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py", line 436, in <module>
16
- main(cfg)
17
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py", line 135, in main
18
- evaluators = build_evaluators(cfg, device)
19
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
20
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/eval/__init__.py", line 111, in build_evaluators
21
- tokenizer = Tokenizer.from_train_config(cfg)
22
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
23
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/tokenizer.py", line 75, in from_train_config
24
- tokenizer = cls.from_pretrained(
25
- ^^^^^^^^^^^^^^^^^^^^
26
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/tokenizer.py", line 93, in from_pretrained
27
- base_tokenizer = BaseTokenizer.from_pretrained(identifier)
28
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
29
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
30
- return fn(*args, **kwargs)
31
- ^^^^^^^^^^^^^^^^^^^
32
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1007, in hf_hub_download
33
- return _hf_hub_download_to_cache_dir(
34
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
35
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1114, in _hf_hub_download_to_cache_dir
36
- _raise_on_head_call_error(head_call_error, force_download, local_files_only)
37
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1655, in _raise_on_head_call_error
38
- raise head_call_error
39
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1543, in _get_metadata_or_catch_error
40
- metadata = get_hf_file_metadata(
41
- ^^^^^^^^^^^^^^^^^^^^^
42
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
43
- return fn(*args, **kwargs)
44
- ^^^^^^^^^^^^^^^^^^^
45
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1460, in get_hf_file_metadata
46
- r = _request_wrapper(
47
- ^^^^^^^^^^^^^^^^^
48
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 283, in _request_wrapper
49
- response = _request_wrapper(
50
- ^^^^^^^^^^^^^^^^^
51
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 307, in _request_wrapper
52
- hf_raise_for_status(response)
53
- File "/opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 419, in hf_raise_for_status
54
- raise _format(GatedRepoError, message, response) from e
55
- huggingface_hub.errors.GatedRepoError: 401 Client Error. (Request ID: Root=1-693ecb91-6af595934595c5324944523c;f0cd17a6-52aa-4d39-8750-5d75e2cca80a)
56
-
57
- Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/tokenizer.json.
58
- Access to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.
59
-
60
- [2025-12-14 15:37:05] CRITICAL [olmo.util:168, rank=0] Uncaught GatedRepoError: 401 Client Error. (Request ID: Root=1-693ecb91-6af595934595c5324944523c;f0cd17a6-52aa-4d39-8750-5d75e2cca80a)
61
-
62
- Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/tokenizer.json.
63
- Access to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.
64
- ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
65
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_http.py:402 in hf_raise_for_statu │
66
- │ │
67
- │ 399 │ > If request failed for a reason not listed above. │
68
- │ 400 │ """ │
69
- │ 401 │ try: │
70
- │ ❱ 402 │ │ response.raise_for_status() │
71
- │ 403 │ except HTTPError as e: │
72
- │ 404 │ │ error_code = response.headers.get("X-Error-Code") │
73
- │ 405 │ │ error_message = response.headers.get("X-Error-Message") │
74
- │ │
75
- │ /opt/conda/lib/python3.11/site-packages/requests/models.py:1024 in raise_for_status │
76
- │ │
77
- │ 1021 │ │ │ ) │
78
- │ 1022 │ │  │
79
- │ 1023 │ │ if http_error_msg: │
80
- │ ❱ 1024 │ │ │ raise HTTPError(http_error_msg, response=self) │
81
- │ 1025 │  │
82
- │ 1026 │ def close(self): │
83
- │ 1027 │ │ """Releases the connection back to the pool. Once this method has been │
84
- ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
85
- HTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/tokenizer.json
86
-
87
- The above exception was the direct cause of the following exception:
88
-
89
- ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
90
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py:436 │
91
- │ │
92
- │ 433 │ │ log.info("Device is CPU. Updating config...") │
93
- │ 434 │ │ cfg.model.init_device = "cpu" │
94
- │ 435 │ │ cfg.distributed_strategy = "single" # type: ignore │
95
- │ ❱ 436 │ main(cfg) │
96
- │ 437  │
97
- │ │
98
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py:135 │
99
- │ │
100
- │ 132 │ train_loader = build_train_dataloader(cfg) │
101
- │ 133 │  │
102
- │ 134 │ # Construct evaluators. │
103
- │ ❱ 135 │ evaluators = build_evaluators(cfg, device) │
104
- │ 136 │ barrier() │
105
- │ 137 │  │
106
- │ 138 │ # Initialize the model. │
107
- │ │
108
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/eval/__init__.p │
109
- │ │
110
- │ 108  │
111
- │ 109 def build_evaluators(cfg: TrainConfig, device: torch.device) -> List[Evaluator]: │
112
- │ 110 │ evaluators = [] │
113
- │ ❱ 111 │ tokenizer = Tokenizer.from_train_config(cfg) │
114
- │ 112 │ for eval_cfg in cfg.evaluators: │
115
- │ 113 │ │ evaluators.append(build_evaluator(cfg, eval_cfg, tokenizer, device)) │
116
- │ 114 │ return evaluators │
117
- │ │
118
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/tokenizer.py:75 │
119
- │ │
120
- │  72 │ │ │ │ │ pad_token_id=config.model.pad_token_id, │
121
- │  73 │ │ │ │ ) │
122
- │  74 │ │ else: │
123
- │ ❱  75 │ │ │ tokenizer = cls.from_pretrained( │
124
- │  76 │ │ │ │ tokenizer_identifier, │
125
- │  77 │ │ │ │ eos_token_id=config.model.eos_token_id, │
126
- │  78 │ │ │ │ pad_token_id=config.model.pad_token_id, │
127
- │ │
128
- │ /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/tokenizer.py:93 │
129
- │ │
130
- │  90 │ │ │ ``tokenizer.json`` file. │
131
- │  91 │ │ :param kwargs: Other key word arguments passed to :class:`Tokenizer`. │
132
- │  92 │ │ """ │
133
- │ ❱  93 │ │ base_tokenizer = BaseTokenizer.from_pretrained(identifier) │
134
- │  94 │ │ eos_token_id = kwargs.pop("eos_token_id", base_tokenizer.get_vocab_size() - 1) │
135
- │  95 │ │ return cls(base_tokenizer, eos_token_id, **kwargs) │
136
- │  96  │
137
- │ │
138
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:114 in _inner_fn │
139
- │ │
140
- │ 111 │ │ if check_use_auth_token: │
141
- │ 112 │ │ │ kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_ │
142
- │ 113 │ │  │
143
- │ ❱ 114 │ │ return fn(*args, **kwargs) │
144
- │ 115 │  │
145
- │ 116 │ return _inner_fn # type: ignore │
146
- │ 117  │
147
- │ │
148
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:1007 in hf_hub_download │
149
- │ │
150
- │ 1004 │ │ │ local_files_only=local_files_only, │
151
- │ 1005 │ │ ) │
152
- │ 1006 │ else: │
153
- │ ❱ 1007 │ │ return _hf_hub_download_to_cache_dir( │
154
- │ 1008 │ │ │ # Destination │
155
- │ 1009 │ │ │ cache_dir=cache_dir, │
156
- │ 1010 │ │ │ # File info │
157
- │ │
158
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:1114 in _hf_hub_downloa │
159
- │ │
160
- │ 1111 │ │ │ │ │ return pointer_path │
161
- │ 1112 │ │  │
162
- │ 1113 │ │ # Otherwise, raise appropriate error │
163
- │ ❱ 1114 │ │ _raise_on_head_call_error(head_call_error, force_download, local_files_only) │
164
- │ 1115 │  │
165
- │ 1116 │ # From now on, etag, commit_hash, url and size are not None. │
166
- │ 1117 │ assert etag is not None, "etag must have been retrieved from server" │
167
- │ │
168
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:1655 in _raise_on_head_ │
169
- │ │
170
- │ 1652 │ ): │
171
- │ 1653 │ │ # Repo not found or gated => let's raise the actual error │
172
- │ 1654 │ │ # Unauthorized => likely a token issue => let's raise the actual error │
173
- │ ❱ 1655 │ │ raise head_call_error │
174
- │ 1656 │ else: │
175
- │ 1657 │ │ # Otherwise: most likely a connection issue or Hub downtime => let's warn the use │
176
- │ 1658 │ │ raise LocalEntryNotFoundError( │
177
- │ │
178
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:1543 in _get_metadata_o │
179
- │ │
180
- │ 1540 │ if not local_files_only: │
181
- │ 1541 │ │ try: │
182
- │ 1542 │ │ │ try: │
183
- │ ❱ 1543 │ │ │ │ metadata = get_hf_file_metadata( │
184
- │ 1544 │ │ │ │ │ url=url, proxies=proxies, timeout=etag_timeout, headers=headers, toke │
185
- │ 1545 │ │ │ │ ) │
186
- │ 1546 │ │ │ except EntryNotFoundError as http_error: │
187
- │ │
188
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:114 in _inner_fn │
189
- │ │
190
- │ 111 │ │ if check_use_auth_token: │
191
- │ 112 │ │ │ kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_ │
192
- │ 113 │ │  │
193
- │ ❱ 114 │ │ return fn(*args, **kwargs) │
194
- │ 115 │  │
195
- │ 116 │ return _inner_fn # type: ignore │
196
- │ 117  │
197
- │ │
198
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:1460 in get_hf_file_met │
199
- │ │
200
- │ 1457 │ hf_headers["Accept-Encoding"] = "identity" # prevent any compression => we want to k │
201
- │ 1458 │  │
202
- │ 1459 │ # Retrieve metadata │
203
- │ ❱ 1460 │ r = _request_wrapper( │
204
- │ 1461 │ │ method="HEAD", │
205
- │ 1462 │ │ url=url, │
206
- │ 1463 │ │ headers=hf_headers, │
207
- │ │
208
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:283 in _request_wrapper │
209
- │ │
210
- │  280 │ """ │
211
- │  281 │ # Recursively follow relative redirects │
212
- │  282 │ if follow_relative_redirects: │
213
- │ ❱  283 │ │ response = _request_wrapper( │
214
- │  284 │ │ │ method=method, │
215
- │  285 │ │ │ url=url, │
216
- │  286 │ │ │ follow_relative_redirects=False, │
217
- │ │
218
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:307 in _request_wrapper │
219
- │ │
220
- │  304 │  │
221
- │  305 │ # Perform request and return if status_code is not in the retry list. │
222
- │  306 │ response = http_backoff(method=method, url=url, **params) │
223
- │ ❱  307 │ hf_raise_for_status(response) │
224
- │  308 │ return response │
225
- │  309  │
226
- │  310  │
227
- │ │
228
- │ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_http.py:419 in hf_raise_for_statu │
229
- │ │
230
- │ 416 │ │ │ message = ( │
231
- │ 417 │ │ │ │ f"{response.status_code} Client Error." + "\n\n" + f"Cannot access gated r │
232
- │ 418 │ │ │ ) │
233
- │ ❱ 419 │ │ │ raise _format(GatedRepoError, message, response) from e │
234
- │ 420 │ │  │
235
- │ 421 │ │ elif error_message == "Access to this resource is disabled.": │
236
- │ 422 │ │ │ message = ( │
237
- ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
238
- GatedRepoError: 401 Client Error. (Request ID: Root=1-693ecb91-6af595934595c5324944523c;f0cd17a6-52aa-4d39-8750-5d75e2cca80a)
239
-
240
- Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/tokenizer.json.
241
- Access to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/files/requirements.txt DELETED
@@ -1,271 +0,0 @@
1
- scikit-learn==1.8.0
2
- joblib==1.5.2
3
- threadpoolctl==3.6.0
4
- torchmetrics==1.8.2
5
- lightning-utilities==0.15.2
6
- wandb==0.23.1
7
- GitPython==3.1.45
8
- gitdb==4.0.12
9
- smmap==5.0.2
10
- datasets==4.4.1
11
- pandas==2.3.3
12
- multiprocess==0.70.18
13
- pyarrow==22.0.0
14
- tzdata==2025.3
15
- xxhash==3.6.0
16
- ai2-olmo==0.6.0
17
- ai2-olmo-core==2.4.0
18
- cached_path==1.8.0
19
- google-cloud-storage==2.19.0
20
- google-cloud-core==2.5.0
21
- boto3==1.42.9
22
- google-api-core==2.28.1
23
- google-auth==2.43.0
24
- s3transfer==0.16.0
25
- botocore==1.42.9
26
- google-resumable-media==2.8.0
27
- pyasn1_modules==0.4.2
28
- rich==13.9.4
29
- rsa==4.9.1
30
- bettermap==1.3.1
31
- google-crc32c==1.7.1
32
- jmespath==1.0.1
33
- numpy==1.26.4
34
- omegaconf==2.3.0
35
- proto-plus==1.26.1
36
- pyasn1==0.6.1
37
- python-dateutil==2.9.0.post0
38
- antlr4-python3-runtime==4.9.3
39
- zstandard==0.23.0
40
- zipp==3.21.0
41
- yarl==1.22.0
42
- xgrammar==0.1.18
43
- xformers==0.0.29.post2
44
- wrapt==2.0.1
45
- wheel==0.45.1
46
- websockets==15.0.1
47
- wcwidth==0.2.13
48
- watchfiles==1.1.1
49
- vllm==0.8.5.post1
50
- uvloop==0.22.1
51
- uvicorn==0.38.0
52
- urllib3==2.3.0
53
- typing-inspection==0.4.2
54
- typing_extensions==4.15.0
55
- types-dataclasses==0.6.6
56
- typer==0.20.0
57
- truststore==0.10.0
58
- triton==3.2.0
59
- transformers==4.57.3
60
- traitlets==5.14.3
61
- tqdm==4.67.1
62
- torchvision==0.21.0+cu124
63
- torchelastic==0.2.2
64
- torchaudio==2.6.0+cu124
65
- torch==2.6.0+cu124
66
- tokenizers==0.22.1
67
- tiktoken==0.12.0
68
- sympy==1.13.1
69
- starlette==0.50.0
70
- stack_data==0.6.3
71
- soupsieve==2.5
72
- sortedcontainers==2.4.0
73
- sniffio==1.3.1
74
- six==1.17.0
75
- shellingham==1.5.4
76
- setuptools==75.8.0
77
- sentry-sdk==2.47.0
78
- sentencepiece==0.2.1
79
- scipy==1.16.3
80
- safetensors==0.7.0
81
- ruamel.yaml.clib==0.2.8
82
- ruamel.yaml==0.18.10
83
- rpds-py==0.22.3
84
- rignore==0.7.6
85
- rich-toolkit==0.17.0
86
- requests==2.32.3
87
- regex==2025.11.3
88
- referencing==0.36.2
89
- ray==2.52.1
90
- pyzmq==27.1.0
91
- pytz==2024.2
92
- python-multipart==0.0.20
93
- python-json-logger==4.0.0
94
- python-etcd==0.4.5
95
- python-dotenv==1.2.1
96
- Pygments==2.19.1
97
- pydantic-extra-types==2.10.6
98
- pydantic_core==2.41.5
99
- pydantic==2.12.5
100
- pycparser==2.22
101
- pycountry==24.6.1
102
- pycosat==0.6.6
103
- py-cpuinfo==9.0.0
104
- pure_eval==0.2.3
105
- ptyprocess==0.7.0
106
- psutil==6.1.1
107
- protobuf==4.25.8
108
- propcache==0.4.1
109
- prompt_toolkit==3.0.50
110
- prometheus-fastapi-instrumentator==7.1.0
111
- prometheus_client==0.23.1
112
- pluggy==1.5.0
113
- platformdirs==4.3.6
114
- pkgutil_resolve_name==1.3.10
115
- pkginfo==1.12.0
116
- pip==24.3.1
117
- pillow==11.0.0
118
- pickleshare==0.7.5
119
- pexpect==4.9.0
120
- partial-json-parser==0.2.1.1.post7
121
- parso==0.8.4
122
- packaging==24.2
123
- outlines_core==0.1.26
124
- outlines==0.1.11
125
- optree==0.14.0
126
- opentelemetry-semantic-conventions-ai==0.4.13
127
- opentelemetry-semantic-conventions==0.47b0
128
- opentelemetry-sdk==1.26.0
129
- opentelemetry-proto==1.26.0
130
- opentelemetry-exporter-otlp-proto-http==1.26.0
131
- opentelemetry-exporter-otlp-proto-grpc==1.26.0
132
- opentelemetry-exporter-otlp-proto-common==1.26.0
133
- opentelemetry-exporter-otlp==1.26.0
134
- opentelemetry-api==1.26.0
135
- opencv-python-headless==4.12.0.88
136
- openai==2.11.0
137
- nvidia-nvtx-cu12==12.4.127
138
- nvidia-nvjitlink-cu12==12.4.127
139
- nvidia-nccl-cu12==2.21.5
140
- nvidia-cusparselt-cu12==0.6.2
141
- nvidia-cusparse-cu12==12.3.1.170
142
- nvidia-cusolver-cu12==11.6.1.9
143
- nvidia-curand-cu12==10.3.5.147
144
- nvidia-cufft-cu12==11.2.1.3
145
- nvidia-cudnn-cu12==9.1.0.70
146
- nvidia-cuda-runtime-cu12==12.4.127
147
- nvidia-cuda-nvrtc-cu12==12.4.127
148
- nvidia-cuda-cupti-cu12==12.4.127
149
- nvidia-cublas-cu12==12.4.5.8
150
- numba==0.61.2
151
- ninja==1.11.1.3
152
- networkx==3.4.2
153
- nest-asyncio==1.6.0
154
- multidict==6.7.0
155
- msgspec==0.20.0
156
- msgpack==1.1.2
157
- mpmath==1.3.0
158
- more-itertools==10.6.0
159
- mistral_common==1.8.6
160
- menuinst==2.2.0
161
- mdurl==0.1.2
162
- matplotlib-inline==0.1.7
163
- math-verify==0.8.0
164
- markdown-it-py==4.0.0
165
- lm-format-enforcer==0.10.12
166
- llvmlite==0.44.0
167
- llguidance==0.7.30
168
- lintrunner==0.12.7
169
- lief==0.14.1
170
- libmambapy==2.0.5
171
- libarchive-c==5.1
172
- latex2sympy2_extended==1.10.2
173
- lark==1.2.2
174
- jsonschema-specifications==2024.10.1
175
- jsonschema==4.23.0
176
- jsonpointer==3.0.0
177
- jsonpatch==1.33
178
- jiter==0.12.0
179
- Jinja2==3.1.5
180
- jedi==0.19.2
181
- ipython==8.31.0
182
- interegular==0.3.3
183
- importlib_resources==6.5.2
184
- importlib_metadata==8.0.0
185
- idna==3.10
186
- hypothesis==6.124.7
187
- hyperframe==6.0.1
188
- huggingface-hub==0.36.0
189
- httpx==0.28.1
190
- httptools==0.7.1
191
- httpcore==1.0.9
192
- hpack==4.0.0
193
- hf-xet==1.2.0
194
- h2==4.1.0
195
- h11==0.16.0
196
- grpcio==1.76.0
197
- googleapis-common-protos==1.72.0
198
- gguf==0.17.1
199
- fsspec==2024.12.0
200
- frozenlist==1.8.0
201
- frozendict==2.4.6
202
- filelock==3.17.0
203
- fastrlock==0.8.3
204
- fastar==0.8.0
205
- fastapi-cloud-cli==0.6.0
206
- fastapi-cli==0.0.16
207
- fastapi==0.124.4
208
- expecttest==0.3.0
209
- executing==2.1.0
210
- exceptiongroup==1.2.2
211
- email-validator==2.3.0
212
- einops==0.8.1
213
- dnspython==2.7.0
214
- distro==1.9.0
215
- diskcache==5.6.3
216
- dill==0.4.0
217
- depyf==0.18.0
218
- Deprecated==1.3.1
219
- decorator==5.1.1
220
- cupy-cuda12x==13.6.0
221
- conda_package_streaming==0.11.0
222
- conda-package-handling==2.4.0
223
- conda-libmamba-solver==25.1.1
224
- conda_index==0.5.0
225
- conda-build==25.1.1
226
- conda==25.1.0
227
- compressed-tensors==0.9.3
228
- colorama==0.4.6
229
- cmake==3.31.4
230
- cloudpickle==3.1.2
231
- click==8.1.8
232
- charset-normalizer==3.4.1
233
- chardet==5.2.0
234
- cffi==1.17.1
235
- certifi==2024.12.14
236
- cachetools==6.2.3
237
- boltons==24.0.0
238
- blake3==1.0.8
239
- beautifulsoup4==4.12.3
240
- attrs==25.1.0
241
- astunparse==1.6.3
242
- asttokens==3.0.0
243
- astor==0.8.1
244
- archspec==0.2.5
245
- anyio==4.12.0
246
- annotated-types==0.7.0
247
- annotated-doc==0.0.4
248
- airportsdata==20250909
249
- aiosignal==1.4.0
250
- aiohttp==3.13.2
251
- aiohappyeyeballs==2.6.1
252
- PyYAML==6.0.2
253
- PySocks==1.7.1
254
- MarkupSafe==3.0.2
255
- Brotli==1.1.0
256
- zipp==3.19.2
257
- wheel==0.43.0
258
- typing_extensions==4.12.2
259
- typeguard==4.3.0
260
- tomli==2.0.1
261
- platformdirs==4.2.2
262
- packaging==24.2
263
- more-itertools==10.3.0
264
- jaraco.text==3.12.1
265
- jaraco.functools==4.0.1
266
- jaraco.context==5.3.0
267
- jaraco.collections==5.1.0
268
- inflect==7.3.1
269
- importlib_metadata==8.0.0
270
- backports.tarfile==1.2.0
271
- autocommand==2.2.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/files/wandb-metadata.json DELETED
@@ -1,103 +0,0 @@
1
- {
2
- "os": "Linux-6.8.0-1043-nvidia-x86_64-with-glibc2.35",
3
- "python": "CPython 3.11.11",
4
- "startedAt": "2025-12-14T14:37:00.974734Z",
5
- "args": [
6
- "pretraining/configs/RL-1B.yaml"
7
- ],
8
- "program": "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py",
9
- "codePath": "OLMo/scripts/train.py",
10
- "codePathLocal": "OLMo/scripts/train.py",
11
- "email": "[email protected]",
12
- "root": "checkpoints/OLMo-1B-as_fm3_omi2/wandb",
13
- "host": "serv-3342",
14
- "executable": "/opt/conda/bin/python",
15
- "cpu_count": 112,
16
- "cpu_count_logical": 224,
17
- "gpu": "NVIDIA H100 80GB HBM3",
18
- "gpu_count": 1,
19
- "disk": {
20
- "/": {
21
- "total": "2055141851136",
22
- "used": "49254453248"
23
- }
24
- },
25
- "memory": {
26
- "total": "2164176814080"
27
- },
28
- "gpu_nvidia": [
29
- {
30
- "name": "NVIDIA H100 80GB HBM3",
31
- "memoryTotal": "85520809984",
32
- "cudaCores": 16896,
33
- "architecture": "Hopper",
34
- "uuid": "GPU-4c999b2a-2578-9e62-0539-4b826d85fda8"
35
- }
36
- ],
37
- "cudaVersion": "13.0",
38
- "slurm": {
39
- "cluster_name": "pegasus",
40
- "conf": "/etc/slurm/slurm.conf",
41
- "cpu_bind": "quiet,mask_cpu:0x000000000001FE00000000000000000000000001FE00000000000000",
42
- "cpu_bind_list": "0x000000000001FE00000000000000000000000001FE00000000000000",
43
- "cpu_bind_type": "mask_cpu:",
44
- "cpu_bind_verbose": "quiet",
45
- "cpus_on_node": "16",
46
- "cpus_per_task": "16",
47
- "distribution": "cyclic",
48
- "gpus": "1",
49
- "gpus_on_node": "1",
50
- "gtids": "0",
51
- "job_cpus_per_node": "16",
52
- "job_end_time": "1765734524",
53
- "job_gid": "8000",
54
- "job_group": "iml",
55
- "job_id": "2383756",
56
- "job_name": "bash",
57
- "job_nodelist": "serv-3342",
58
- "job_num_nodes": "1",
59
- "job_partition": "H100",
60
- "job_qos": "normal",
61
- "job_start_time": "1765720124",
62
- "job_uid": "13262",
63
- "job_user": "nguyen",
64
- "jobid": "2383756",
65
- "launch_node_ipaddr": "192.168.33.114",
66
- "localid": "0",
67
- "mem_per_cpu": "16384",
68
- "mpi_type": "pmix",
69
- "nnodes": "1",
70
- "nodeid": "0",
71
- "nodelist": "serv-3342",
72
- "nprocs": "1",
73
- "ntasks": "1",
74
- "oom_kill_step": "0",
75
- "pmix_mapping_serv": "(vector,(0,1,1))",
76
- "pmixp_abort_agent_port": "33735",
77
- "prio_process": "1",
78
- "procid": "0",
79
- "pty_port": "45219",
80
- "pty_win_col": "156",
81
- "pty_win_row": "41",
82
- "srun_comm_host": "192.168.33.114",
83
- "srun_comm_port": "35153",
84
- "step_gpus": "5",
85
- "step_id": "0",
86
- "step_launcher_port": "35153",
87
- "step_nodelist": "serv-3342",
88
- "step_num_nodes": "1",
89
- "step_num_tasks": "1",
90
- "step_tasks_per_node": "1",
91
- "stepid": "0",
92
- "submit_dir": "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain",
93
- "submit_host": "login1",
94
- "task_pid": "3684902",
95
- "tasks_per_node": "1",
96
- "topology_addr": "serv-3342",
97
- "topology_addr_pattern": "node",
98
- "tres_bind": "gres/gpu:per_task:1",
99
- "tres_per_task": "cpu=16,gres/gpu=1",
100
- "umask": "0022"
101
- },
102
- "writerId": "sunp9aekjphrkaie8a5jhftzaeyt35vl"
103
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"_runtime":4,"_wandb":{"runtime":4}}
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/logs/debug-internal.log DELETED
@@ -1,11 +0,0 @@
1
- {"time":"2025-12-14T15:37:01.275172336+01:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
- {"time":"2025-12-14T15:37:01.544361117+01:00","level":"INFO","msg":"stream: created new stream","id":"c0ptjfpp"}
3
- {"time":"2025-12-14T15:37:01.545813594+01:00","level":"INFO","msg":"handler: started","stream_id":"c0ptjfpp"}
4
- {"time":"2025-12-14T15:37:01.548802981+01:00","level":"INFO","msg":"stream: started","id":"c0ptjfpp"}
5
- {"time":"2025-12-14T15:37:01.548838014+01:00","level":"INFO","msg":"writer: started","stream_id":"c0ptjfpp"}
6
- {"time":"2025-12-14T15:37:01.548853437+01:00","level":"INFO","msg":"sender: started","stream_id":"c0ptjfpp"}
7
- {"time":"2025-12-14T15:37:06.118873288+01:00","level":"INFO","msg":"stream: closing","id":"c0ptjfpp"}
8
- {"time":"2025-12-14T15:37:06.807757435+01:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
- {"time":"2025-12-14T15:37:07.118752593+01:00","level":"INFO","msg":"handler: closed","stream_id":"c0ptjfpp"}
10
- {"time":"2025-12-14T15:37:07.122196378+01:00","level":"INFO","msg":"sender: closed","stream_id":"c0ptjfpp"}
11
- {"time":"2025-12-14T15:37:07.123213459+01:00","level":"INFO","msg":"stream: closed","id":"c0ptjfpp"}
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/logs/debug.log DELETED
@@ -1,23 +0,0 @@
1
- 2025-12-14 15:37:01,006 INFO MainThread:3745493 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
- 2025-12-14 15:37:01,007 INFO MainThread:3745493 [wandb_setup.py:_flush():80] Configure stats pid to 3745493
3
- 2025-12-14 15:37:01,008 INFO MainThread:3745493 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
- 2025-12-14 15:37:01,009 INFO MainThread:3745493 [wandb_setup.py:_flush():80] Loading settings from /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/wandb/settings
5
- 2025-12-14 15:37:01,010 INFO MainThread:3745493 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
- 2025-12-14 15:37:01,011 INFO MainThread:3745493 [wandb_init.py:setup_run_log_directory():714] Logging user logs to checkpoints/OLMo-1B-as_fm3_omi2/wandb/wandb/run-20251214_153700-c0ptjfpp/logs/debug.log
7
- 2025-12-14 15:37:01,013 INFO MainThread:3745493 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to checkpoints/OLMo-1B-as_fm3_omi2/wandb/wandb/run-20251214_153700-c0ptjfpp/logs/debug-internal.log
8
- 2025-12-14 15:37:01,014 INFO MainThread:3745493 [wandb_init.py:init():841] calling init triggers
9
- 2025-12-14 15:37:01,015 INFO MainThread:3745493 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
- config: {'run_name': 'OLMo-1B-as_fm3_omi2', 'seed': 6198, 'epoch': None, 'dry_run': False, 'model': {'d_model': 2048, 'n_heads': 16, 'n_kv_heads': None, 'clip_qkv': None, 'n_layers': 16, 'mlp_ratio': 8, 'mlp_hidden_size': None, 'activation_type': 'swiglu', 'block_type': 'sequential', 'block_group_size': 1, 'alibi': False, 'alibi_bias_max': 8.0, 'rope': True, 'rope_full_precision': True, 'rope_theta': 10000, 'flash_attention': False, 'attention_dropout': 0.0, 'multi_query_attention': False, 'attention_layer_norm': False, 'residual_dropout': 0.0, 'embedding_dropout': 0.0, 'embedding_layer_norm': False, 'layer_norm_type': 'default', 'layer_norm_with_affine': False, 'layer_norm_eps': 1e-05, 'attention_layer_norm_with_affine': False, 'max_sequence_length': 2048, 'include_bias': False, 'bias_for_layer_norm': False, 'scale_logits': False, 'vocab_size': 32000, 'embedding_size': 32000, 'weight_tying': True, 'eos_token_id': 0, 'pad_token_id': 1, 'init_device': 'cuda', 'init_fn': 'normal', 'init_std': 0.02, 'init_cutoff_factor': 3.0, 'precision': 'amp_bf16', 'scale_emb_init': False, 'emb_init_std': None, 'norm_after': False}, 'optimizer': {'name': 'adamw', 'learning_rate': 0.0005, 'weight_decay': 0.1, 'betas': (0.9, 0.95), 'eps': 1e-08, 'no_decay_norm_and_bias': None, 'selective_updates': False, 'decay_norm_and_bias': True, 'decay_embeddings': True, 'metrics_log_interval': 10, 'record_update_metrics': False}, 'scheduler': {'name': 'cosine_with_warmup', 'units': 'steps', 't_warmup': 2000, 't_max': None, 'alpha_f': 0.1, 'grad_clip_warmup_steps': None, 'grad_clip_warmup_factor': None, 'warmup_min_lr': 0.0}, 'data': {'paths': ['data_token/as_fm3_omi2/algebraic-stack-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00055_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00056_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00057_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00058_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00059_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00060_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00061_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00062_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00063_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00064_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00065_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00066_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00067_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00068_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00069_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00070_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00071_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00072_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00073_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00074_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00075_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00076_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00077_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00078_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00017_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00018_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00055_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00056_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00057_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00058_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00059_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00060_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00061_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00062_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00063_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00064_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00065_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00066_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00067_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00068_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00069_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00070_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00071_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00072_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00073_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00074_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00075_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00076_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00077_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00078_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00079_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00080_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00081_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00082_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00083_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00084_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00085_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00086_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00087_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00088_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00089_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00090_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00091_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00092_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00093_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00094_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00095_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00096_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00097_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00098_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00099_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00017_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00018_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00054_00000_doc_shuffled.ds'], 'memmap_dtype': 'uint16', 'datasets': None, 'label_mask_paths': None, 'pad_direction': 'right', 'generate_attention_mask': False, 'generate_doc_lengths': False, 'num_workers': 32, 'drop_last': True, 'pin_memory': True, 'prefetch_factor': 8, 'persistent_workers': True, 'timeout': 0, 'seed': None, 'instance_filter': None, 'custom_dataset': None}, 'restore_dataloader': True, 'fast_forward_batches': None, 'evaluators': [], 'eval_interval': 5000, 'tokenizer': {'identifier': 'meta-llama/Llama-2-7b-hf', 'truncate_direction': 'right'}, 'save_folder': 'checkpoints/OLMo-1B-as_fm3_omi2', 'remote_save_folder': None, 'canceled_check_interval': 6000, 'save_interval': 3000, 'save_interval_unsharded': 3000, 'save_interval_ephemeral': None, 'save_num_checkpoints_to_keep': -1, 'save_num_unsharded_checkpoints_to_keep': -1, 'save_overwrite': True, 'force_save_unsharded': False, 'no_pre_train_checkpoint': False, 'load_path': None, 'load_path_sharded_checkpointer': None, 'try_load_latest_save': False, 'reset_optimizer_state': False, 'reset_trainer_state': False, 'sharded_checkpointer': 'torch_legacy', 'new_style_checkpoints': None, 'max_duration': '1ep', 'global_train_batch_size': 512, 'device_train_batch_size': 512, 'device_train_microbatch_size': 16, 'device_eval_batch_size': 16, 'eval_subset_num_batches': -1, 'eval_on_load': False, 'device_train_grad_accum': 32, 'max_grad_norm': 1.0, 'max_grad_norm_ratio': None, 'precision': 'amp_bf16', 'speed_monitor': {'window_size': 20, 'gpu_flops_available': None}, 'console_log_interval': 1, 'gen1_gc_interval': 1, 'compile': None, 'distributed_strategy': 'fsdp', 'fsdp': {'use_orig_params': True, 'sharding_strategy': <ShardingStrategy.FULL_SHARD: 1>, 'wrapping_strategy': None, 'precision': 'mixed', 'hybrid_sharding_num_model_replicas': None}, 'ddp': {'grad_sync_mode': 'batch', 'find_unused_params': False}, 'single': {'device': 'auto'}, 'softmax_auxiliary_loss': False, 'auxiliary_loss_multiplier': 0.0001, 'time_limit': None, 'extra_steps_after_cancel': 10, 'early_stopping_factor': None, 'save_data_indices': True, 'python_profiling': False, 'torch_profiling': False, 'stop_at': None, 'stop_after': None, 'activation_checkpointing': None, 'fused_loss': None, 'hf_datasets_cache_dir': None, 'module_outputs_save_steps': None, '_wandb': {}}
11
- 2025-12-14 15:37:01,016 INFO MainThread:3745493 [wandb_init.py:init():889] starting backend
12
- 2025-12-14 15:37:01,265 INFO MainThread:3745493 [wandb_init.py:init():892] sending inform_init request
13
- 2025-12-14 15:37:01,273 INFO MainThread:3745493 [wandb_init.py:init():900] backend started and connected
14
- 2025-12-14 15:37:01,276 INFO MainThread:3745493 [wandb_init.py:init():970] updated telemetry
15
- 2025-12-14 15:37:01,277 INFO MainThread:3745493 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
- 2025-12-14 15:37:02,024 INFO MainThread:3745493 [wandb_init.py:init():1041] starting run threads in backend
17
- 2025-12-14 15:37:02,115 INFO MainThread:3745493 [wandb_run.py:_console_start():2521] atexit reg
18
- 2025-12-14 15:37:02,116 INFO MainThread:3745493 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
- 2025-12-14 15:37:02,117 INFO MainThread:3745493 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
- 2025-12-14 15:37:02,118 INFO MainThread:3745493 [wandb_run.py:_redirect():2461] Redirects installed.
21
- 2025-12-14 15:37:02,120 INFO MainThread:3745493 [wandb_init.py:init():1081] run started, returning control to user process
22
- 2025-12-14 15:37:06,118 INFO wandb-AsyncioManager-main:3745493 [service_client.py:_forward_responses():80] Reached EOF.
23
- 2025-12-14 15:37:06,120 INFO wandb-AsyncioManager-main:3745493 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153700-c0ptjfpp/run-c0ptjfpp.wandb DELETED
Binary file (58.7 kB)
 
models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/files/config.yaml DELETED
@@ -1,579 +0,0 @@
1
- _wandb:
2
- value:
3
- cli_version: 0.23.1
4
- e:
5
- mots0eu19y85h2hcucy8g5oum3x38ji3:
6
- args:
7
- - pretraining/configs/RL-1B.yaml
8
- codePath: OLMo/scripts/train.py
9
- codePathLocal: OLMo/scripts/train.py
10
- cpu_count: 112
11
- cpu_count_logical: 224
12
- cudaVersion: "13.0"
13
- disk:
14
- /:
15
- total: "2055141851136"
16
- used: "49254457344"
17
18
- executable: /opt/conda/bin/python
19
- gpu: NVIDIA H100 80GB HBM3
20
- gpu_count: 1
21
- gpu_nvidia:
22
- - architecture: Hopper
23
- cudaCores: 16896
24
- memoryTotal: "85520809984"
25
- name: NVIDIA H100 80GB HBM3
26
- uuid: GPU-4c999b2a-2578-9e62-0539-4b826d85fda8
27
- host: serv-3342
28
- memory:
29
- total: "2164176814080"
30
- os: Linux-6.8.0-1043-nvidia-x86_64-with-glibc2.35
31
- program: /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py
32
- python: CPython 3.11.11
33
- root: checkpoints/OLMo-1B-as_fm3_omi2/wandb
34
- slurm:
35
- cluster_name: pegasus
36
- conf: /etc/slurm/slurm.conf
37
- cpu_bind: quiet,mask_cpu:0x000000000001FE00000000000000000000000001FE00000000000000
38
- cpu_bind_list: 0x000000000001FE00000000000000000000000001FE00000000000000
39
- cpu_bind_type: 'mask_cpu:'
40
- cpu_bind_verbose: quiet
41
- cpus_on_node: "16"
42
- cpus_per_task: "16"
43
- distribution: cyclic
44
- gpus: "1"
45
- gpus_on_node: "1"
46
- gtids: "0"
47
- job_cpus_per_node: "16"
48
- job_end_time: "1765734524"
49
- job_gid: "8000"
50
- job_group: iml
51
- job_id: "2383756"
52
- job_name: bash
53
- job_nodelist: serv-3342
54
- job_num_nodes: "1"
55
- job_partition: H100
56
- job_qos: normal
57
- job_start_time: "1765720124"
58
- job_uid: "13262"
59
- job_user: nguyen
60
- jobid: "2383756"
61
- launch_node_ipaddr: 192.168.33.114
62
- localid: "0"
63
- mem_per_cpu: "16384"
64
- mpi_type: pmix
65
- nnodes: "1"
66
- nodeid: "0"
67
- nodelist: serv-3342
68
- nprocs: "1"
69
- ntasks: "1"
70
- oom_kill_step: "0"
71
- pmix_mapping_serv: (vector,(0,1,1))
72
- pmixp_abort_agent_port: "33735"
73
- prio_process: "1"
74
- procid: "0"
75
- pty_port: "45219"
76
- pty_win_col: "156"
77
- pty_win_row: "41"
78
- srun_comm_host: 192.168.33.114
79
- srun_comm_port: "35153"
80
- step_gpus: "5"
81
- step_id: "0"
82
- step_launcher_port: "35153"
83
- step_nodelist: serv-3342
84
- step_num_nodes: "1"
85
- step_num_tasks: "1"
86
- step_tasks_per_node: "1"
87
- stepid: "0"
88
- submit_dir: /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain
89
- submit_host: login1
90
- task_pid: "3684902"
91
- tasks_per_node: "1"
92
- topology_addr: serv-3342
93
- topology_addr_pattern: node
94
- tres_bind: gres/gpu:per_task:1
95
- tres_per_task: cpu=16,gres/gpu=1
96
- umask: "0022"
97
- startedAt: "2025-12-14T14:37:31.123422Z"
98
- writerId: mots0eu19y85h2hcucy8g5oum3x38ji3
99
- m: []
100
- python_version: 3.11.11
101
- t:
102
- "1":
103
- - 1
104
- - 5
105
- - 11
106
- - 41
107
- - 49
108
- - 51
109
- - 53
110
- "2":
111
- - 1
112
- - 5
113
- - 11
114
- - 41
115
- - 49
116
- - 51
117
- - 53
118
- "3":
119
- - 2
120
- - 13
121
- - 15
122
- - 16
123
- - 61
124
- "4": 3.11.11
125
- "5": 0.23.1
126
- "6": 4.57.3
127
- "10":
128
- - 19
129
- "12": 0.23.1
130
- "13": linux-x86_64
131
- activation_checkpointing:
132
- value: null
133
- auxiliary_loss_multiplier:
134
- value: 0.0001
135
- canceled_check_interval:
136
- value: 6000
137
- compile:
138
- value: null
139
- console_log_interval:
140
- value: 1
141
- data:
142
- value:
143
- custom_dataset: null
144
- datasets: null
145
- drop_last: true
146
- generate_attention_mask: false
147
- generate_doc_lengths: false
148
- instance_filter: null
149
- label_mask_paths: null
150
- memmap_dtype: uint16
151
- num_workers: 32
152
- pad_direction: right
153
- paths:
154
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00000_00000_doc_shuffled.ds
155
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00001_00000_doc_shuffled.ds
156
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00002_00000_doc_shuffled.ds
157
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00003_00000_doc_shuffled.ds
158
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00004_00000_doc_shuffled.ds
159
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00005_00000_doc_shuffled.ds
160
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds
161
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00007_00000_doc_shuffled.ds
162
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00008_00000_doc_shuffled.ds
163
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00009_00000_doc_shuffled.ds
164
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00010_00000_doc_shuffled.ds
165
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00011_00000_doc_shuffled.ds
166
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00012_00000_doc_shuffled.ds
167
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00016_00000_doc_shuffled.ds
168
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00019_00000_doc_shuffled.ds
169
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00020_00000_doc_shuffled.ds
170
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00021_00000_doc_shuffled.ds
171
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00022_00000_doc_shuffled.ds
172
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00023_00000_doc_shuffled.ds
173
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00024_00000_doc_shuffled.ds
174
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00025_00000_doc_shuffled.ds
175
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00026_00000_doc_shuffled.ds
176
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00027_00000_doc_shuffled.ds
177
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00028_00000_doc_shuffled.ds
178
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00029_00000_doc_shuffled.ds
179
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00030_00000_doc_shuffled.ds
180
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00031_00000_doc_shuffled.ds
181
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00032_00000_doc_shuffled.ds
182
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00033_00000_doc_shuffled.ds
183
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00034_00000_doc_shuffled.ds
184
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00035_00000_doc_shuffled.ds
185
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00036_00000_doc_shuffled.ds
186
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00037_00000_doc_shuffled.ds
187
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00038_00000_doc_shuffled.ds
188
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00039_00000_doc_shuffled.ds
189
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00040_00000_doc_shuffled.ds
190
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00041_00000_doc_shuffled.ds
191
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00042_00000_doc_shuffled.ds
192
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00043_00000_doc_shuffled.ds
193
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00044_00000_doc_shuffled.ds
194
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00045_00000_doc_shuffled.ds
195
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00046_00000_doc_shuffled.ds
196
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00047_00000_doc_shuffled.ds
197
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00048_00000_doc_shuffled.ds
198
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00049_00000_doc_shuffled.ds
199
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00050_00000_doc_shuffled.ds
200
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00051_00000_doc_shuffled.ds
201
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00052_00000_doc_shuffled.ds
202
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00053_00000_doc_shuffled.ds
203
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00054_00000_doc_shuffled.ds
204
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00055_00000_doc_shuffled.ds
205
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00056_00000_doc_shuffled.ds
206
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00057_00000_doc_shuffled.ds
207
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00058_00000_doc_shuffled.ds
208
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00059_00000_doc_shuffled.ds
209
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00060_00000_doc_shuffled.ds
210
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00061_00000_doc_shuffled.ds
211
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00062_00000_doc_shuffled.ds
212
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00063_00000_doc_shuffled.ds
213
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00064_00000_doc_shuffled.ds
214
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00065_00000_doc_shuffled.ds
215
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00066_00000_doc_shuffled.ds
216
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00067_00000_doc_shuffled.ds
217
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00068_00000_doc_shuffled.ds
218
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00069_00000_doc_shuffled.ds
219
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00070_00000_doc_shuffled.ds
220
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00071_00000_doc_shuffled.ds
221
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00072_00000_doc_shuffled.ds
222
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00073_00000_doc_shuffled.ds
223
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00074_00000_doc_shuffled.ds
224
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00075_00000_doc_shuffled.ds
225
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00076_00000_doc_shuffled.ds
226
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00077_00000_doc_shuffled.ds
227
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00078_00000_doc_shuffled.ds
228
- - data_token/as_fm3_omi2/finemath3-tokenized/00000_00000_doc_shuffled.ds
229
- - data_token/as_fm3_omi2/finemath3-tokenized/00001_00000_doc_shuffled.ds
230
- - data_token/as_fm3_omi2/finemath3-tokenized/00002_00000_doc_shuffled.ds
231
- - data_token/as_fm3_omi2/finemath3-tokenized/00003_00000_doc_shuffled.ds
232
- - data_token/as_fm3_omi2/finemath3-tokenized/00004_00000_doc_shuffled.ds
233
- - data_token/as_fm3_omi2/finemath3-tokenized/00005_00000_doc_shuffled.ds
234
- - data_token/as_fm3_omi2/finemath3-tokenized/00006_00000_doc_shuffled.ds
235
- - data_token/as_fm3_omi2/finemath3-tokenized/00007_00000_doc_shuffled.ds
236
- - data_token/as_fm3_omi2/finemath3-tokenized/00008_00000_doc_shuffled.ds
237
- - data_token/as_fm3_omi2/finemath3-tokenized/00009_00000_doc_shuffled.ds
238
- - data_token/as_fm3_omi2/finemath3-tokenized/00010_00000_doc_shuffled.ds
239
- - data_token/as_fm3_omi2/finemath3-tokenized/00011_00000_doc_shuffled.ds
240
- - data_token/as_fm3_omi2/finemath3-tokenized/00012_00000_doc_shuffled.ds
241
- - data_token/as_fm3_omi2/finemath3-tokenized/00013_00000_doc_shuffled.ds
242
- - data_token/as_fm3_omi2/finemath3-tokenized/00014_00000_doc_shuffled.ds
243
- - data_token/as_fm3_omi2/finemath3-tokenized/00015_00000_doc_shuffled.ds
244
- - data_token/as_fm3_omi2/finemath3-tokenized/00016_00000_doc_shuffled.ds
245
- - data_token/as_fm3_omi2/finemath3-tokenized/00017_00000_doc_shuffled.ds
246
- - data_token/as_fm3_omi2/finemath3-tokenized/00018_00000_doc_shuffled.ds
247
- - data_token/as_fm3_omi2/finemath3-tokenized/00019_00000_doc_shuffled.ds
248
- - data_token/as_fm3_omi2/finemath3-tokenized/00020_00000_doc_shuffled.ds
249
- - data_token/as_fm3_omi2/finemath3-tokenized/00021_00000_doc_shuffled.ds
250
- - data_token/as_fm3_omi2/finemath3-tokenized/00022_00000_doc_shuffled.ds
251
- - data_token/as_fm3_omi2/finemath3-tokenized/00023_00000_doc_shuffled.ds
252
- - data_token/as_fm3_omi2/finemath3-tokenized/00024_00000_doc_shuffled.ds
253
- - data_token/as_fm3_omi2/finemath3-tokenized/00025_00000_doc_shuffled.ds
254
- - data_token/as_fm3_omi2/finemath3-tokenized/00026_00000_doc_shuffled.ds
255
- - data_token/as_fm3_omi2/finemath3-tokenized/00027_00000_doc_shuffled.ds
256
- - data_token/as_fm3_omi2/finemath3-tokenized/00028_00000_doc_shuffled.ds
257
- - data_token/as_fm3_omi2/finemath3-tokenized/00029_00000_doc_shuffled.ds
258
- - data_token/as_fm3_omi2/finemath3-tokenized/00030_00000_doc_shuffled.ds
259
- - data_token/as_fm3_omi2/finemath3-tokenized/00031_00000_doc_shuffled.ds
260
- - data_token/as_fm3_omi2/finemath3-tokenized/00032_00000_doc_shuffled.ds
261
- - data_token/as_fm3_omi2/finemath3-tokenized/00033_00000_doc_shuffled.ds
262
- - data_token/as_fm3_omi2/finemath3-tokenized/00034_00000_doc_shuffled.ds
263
- - data_token/as_fm3_omi2/finemath3-tokenized/00035_00000_doc_shuffled.ds
264
- - data_token/as_fm3_omi2/finemath3-tokenized/00036_00000_doc_shuffled.ds
265
- - data_token/as_fm3_omi2/finemath3-tokenized/00037_00000_doc_shuffled.ds
266
- - data_token/as_fm3_omi2/finemath3-tokenized/00038_00000_doc_shuffled.ds
267
- - data_token/as_fm3_omi2/finemath3-tokenized/00039_00000_doc_shuffled.ds
268
- - data_token/as_fm3_omi2/finemath3-tokenized/00040_00000_doc_shuffled.ds
269
- - data_token/as_fm3_omi2/finemath3-tokenized/00041_00000_doc_shuffled.ds
270
- - data_token/as_fm3_omi2/finemath3-tokenized/00042_00000_doc_shuffled.ds
271
- - data_token/as_fm3_omi2/finemath3-tokenized/00043_00000_doc_shuffled.ds
272
- - data_token/as_fm3_omi2/finemath3-tokenized/00044_00000_doc_shuffled.ds
273
- - data_token/as_fm3_omi2/finemath3-tokenized/00045_00000_doc_shuffled.ds
274
- - data_token/as_fm3_omi2/finemath3-tokenized/00046_00000_doc_shuffled.ds
275
- - data_token/as_fm3_omi2/finemath3-tokenized/00047_00000_doc_shuffled.ds
276
- - data_token/as_fm3_omi2/finemath3-tokenized/00048_00000_doc_shuffled.ds
277
- - data_token/as_fm3_omi2/finemath3-tokenized/00049_00000_doc_shuffled.ds
278
- - data_token/as_fm3_omi2/finemath3-tokenized/00050_00000_doc_shuffled.ds
279
- - data_token/as_fm3_omi2/finemath3-tokenized/00051_00000_doc_shuffled.ds
280
- - data_token/as_fm3_omi2/finemath3-tokenized/00052_00000_doc_shuffled.ds
281
- - data_token/as_fm3_omi2/finemath3-tokenized/00053_00000_doc_shuffled.ds
282
- - data_token/as_fm3_omi2/finemath3-tokenized/00054_00000_doc_shuffled.ds
283
- - data_token/as_fm3_omi2/finemath3-tokenized/00055_00000_doc_shuffled.ds
284
- - data_token/as_fm3_omi2/finemath3-tokenized/00056_00000_doc_shuffled.ds
285
- - data_token/as_fm3_omi2/finemath3-tokenized/00057_00000_doc_shuffled.ds
286
- - data_token/as_fm3_omi2/finemath3-tokenized/00058_00000_doc_shuffled.ds
287
- - data_token/as_fm3_omi2/finemath3-tokenized/00059_00000_doc_shuffled.ds
288
- - data_token/as_fm3_omi2/finemath3-tokenized/00060_00000_doc_shuffled.ds
289
- - data_token/as_fm3_omi2/finemath3-tokenized/00061_00000_doc_shuffled.ds
290
- - data_token/as_fm3_omi2/finemath3-tokenized/00062_00000_doc_shuffled.ds
291
- - data_token/as_fm3_omi2/finemath3-tokenized/00063_00000_doc_shuffled.ds
292
- - data_token/as_fm3_omi2/finemath3-tokenized/00064_00000_doc_shuffled.ds
293
- - data_token/as_fm3_omi2/finemath3-tokenized/00065_00000_doc_shuffled.ds
294
- - data_token/as_fm3_omi2/finemath3-tokenized/00066_00000_doc_shuffled.ds
295
- - data_token/as_fm3_omi2/finemath3-tokenized/00067_00000_doc_shuffled.ds
296
- - data_token/as_fm3_omi2/finemath3-tokenized/00068_00000_doc_shuffled.ds
297
- - data_token/as_fm3_omi2/finemath3-tokenized/00069_00000_doc_shuffled.ds
298
- - data_token/as_fm3_omi2/finemath3-tokenized/00070_00000_doc_shuffled.ds
299
- - data_token/as_fm3_omi2/finemath3-tokenized/00071_00000_doc_shuffled.ds
300
- - data_token/as_fm3_omi2/finemath3-tokenized/00072_00000_doc_shuffled.ds
301
- - data_token/as_fm3_omi2/finemath3-tokenized/00073_00000_doc_shuffled.ds
302
- - data_token/as_fm3_omi2/finemath3-tokenized/00074_00000_doc_shuffled.ds
303
- - data_token/as_fm3_omi2/finemath3-tokenized/00075_00000_doc_shuffled.ds
304
- - data_token/as_fm3_omi2/finemath3-tokenized/00076_00000_doc_shuffled.ds
305
- - data_token/as_fm3_omi2/finemath3-tokenized/00077_00000_doc_shuffled.ds
306
- - data_token/as_fm3_omi2/finemath3-tokenized/00078_00000_doc_shuffled.ds
307
- - data_token/as_fm3_omi2/finemath3-tokenized/00079_00000_doc_shuffled.ds
308
- - data_token/as_fm3_omi2/finemath3-tokenized/00080_00000_doc_shuffled.ds
309
- - data_token/as_fm3_omi2/finemath3-tokenized/00081_00000_doc_shuffled.ds
310
- - data_token/as_fm3_omi2/finemath3-tokenized/00082_00000_doc_shuffled.ds
311
- - data_token/as_fm3_omi2/finemath3-tokenized/00083_00000_doc_shuffled.ds
312
- - data_token/as_fm3_omi2/finemath3-tokenized/00084_00000_doc_shuffled.ds
313
- - data_token/as_fm3_omi2/finemath3-tokenized/00085_00000_doc_shuffled.ds
314
- - data_token/as_fm3_omi2/finemath3-tokenized/00086_00000_doc_shuffled.ds
315
- - data_token/as_fm3_omi2/finemath3-tokenized/00087_00000_doc_shuffled.ds
316
- - data_token/as_fm3_omi2/finemath3-tokenized/00088_00000_doc_shuffled.ds
317
- - data_token/as_fm3_omi2/finemath3-tokenized/00089_00000_doc_shuffled.ds
318
- - data_token/as_fm3_omi2/finemath3-tokenized/00090_00000_doc_shuffled.ds
319
- - data_token/as_fm3_omi2/finemath3-tokenized/00091_00000_doc_shuffled.ds
320
- - data_token/as_fm3_omi2/finemath3-tokenized/00092_00000_doc_shuffled.ds
321
- - data_token/as_fm3_omi2/finemath3-tokenized/00093_00000_doc_shuffled.ds
322
- - data_token/as_fm3_omi2/finemath3-tokenized/00094_00000_doc_shuffled.ds
323
- - data_token/as_fm3_omi2/finemath3-tokenized/00095_00000_doc_shuffled.ds
324
- - data_token/as_fm3_omi2/finemath3-tokenized/00096_00000_doc_shuffled.ds
325
- - data_token/as_fm3_omi2/finemath3-tokenized/00097_00000_doc_shuffled.ds
326
- - data_token/as_fm3_omi2/finemath3-tokenized/00098_00000_doc_shuffled.ds
327
- - data_token/as_fm3_omi2/finemath3-tokenized/00099_00000_doc_shuffled.ds
328
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00000_00000_doc_shuffled.ds
329
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00001_00000_doc_shuffled.ds
330
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00002_00000_doc_shuffled.ds
331
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00003_00000_doc_shuffled.ds
332
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00004_00000_doc_shuffled.ds
333
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00005_00000_doc_shuffled.ds
334
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00006_00000_doc_shuffled.ds
335
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00007_00000_doc_shuffled.ds
336
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00008_00000_doc_shuffled.ds
337
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00009_00000_doc_shuffled.ds
338
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00010_00000_doc_shuffled.ds
339
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00011_00000_doc_shuffled.ds
340
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00012_00000_doc_shuffled.ds
341
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00013_00000_doc_shuffled.ds
342
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00014_00000_doc_shuffled.ds
343
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00015_00000_doc_shuffled.ds
344
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00016_00000_doc_shuffled.ds
345
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00017_00000_doc_shuffled.ds
346
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00018_00000_doc_shuffled.ds
347
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00019_00000_doc_shuffled.ds
348
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00020_00000_doc_shuffled.ds
349
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00021_00000_doc_shuffled.ds
350
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00022_00000_doc_shuffled.ds
351
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00023_00000_doc_shuffled.ds
352
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00024_00000_doc_shuffled.ds
353
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00025_00000_doc_shuffled.ds
354
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00026_00000_doc_shuffled.ds
355
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00027_00000_doc_shuffled.ds
356
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00028_00000_doc_shuffled.ds
357
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00029_00000_doc_shuffled.ds
358
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00030_00000_doc_shuffled.ds
359
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00031_00000_doc_shuffled.ds
360
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00032_00000_doc_shuffled.ds
361
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00033_00000_doc_shuffled.ds
362
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00034_00000_doc_shuffled.ds
363
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00035_00000_doc_shuffled.ds
364
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00036_00000_doc_shuffled.ds
365
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00037_00000_doc_shuffled.ds
366
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00038_00000_doc_shuffled.ds
367
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00039_00000_doc_shuffled.ds
368
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00040_00000_doc_shuffled.ds
369
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00041_00000_doc_shuffled.ds
370
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00042_00000_doc_shuffled.ds
371
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00043_00000_doc_shuffled.ds
372
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00044_00000_doc_shuffled.ds
373
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00045_00000_doc_shuffled.ds
374
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00046_00000_doc_shuffled.ds
375
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00047_00000_doc_shuffled.ds
376
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00048_00000_doc_shuffled.ds
377
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00049_00000_doc_shuffled.ds
378
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00050_00000_doc_shuffled.ds
379
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00051_00000_doc_shuffled.ds
380
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00052_00000_doc_shuffled.ds
381
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00053_00000_doc_shuffled.ds
382
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00054_00000_doc_shuffled.ds
383
- persistent_workers: true
384
- pin_memory: true
385
- prefetch_factor: 8
386
- seed: null
387
- timeout: 0
388
- ddp:
389
- value:
390
- find_unused_params: false
391
- grad_sync_mode: batch
392
- device_eval_batch_size:
393
- value: 16
394
- device_train_batch_size:
395
- value: 512
396
- device_train_grad_accum:
397
- value: 32
398
- device_train_microbatch_size:
399
- value: 16
400
- distributed_strategy:
401
- value: fsdp
402
- dry_run:
403
- value: false
404
- early_stopping_factor:
405
- value: null
406
- epoch:
407
- value: null
408
- eval_interval:
409
- value: 5000
410
- eval_on_load:
411
- value: false
412
- eval_subset_num_batches:
413
- value: -1
414
- evaluators:
415
- value: []
416
- extra_steps_after_cancel:
417
- value: 10
418
- fast_forward_batches:
419
- value: null
420
- force_save_unsharded:
421
- value: false
422
- fsdp:
423
- value:
424
- hybrid_sharding_num_model_replicas: null
425
- precision: mixed
426
- sharding_strategy: FULL_SHARD
427
- use_orig_params: true
428
- wrapping_strategy: null
429
- fused_loss:
430
- value: null
431
- gen1_gc_interval:
432
- value: 1
433
- global_train_batch_size:
434
- value: 512
435
- hf_datasets_cache_dir:
436
- value: null
437
- load_path:
438
- value: null
439
- load_path_sharded_checkpointer:
440
- value: null
441
- max_duration:
442
- value: 1ep
443
- max_grad_norm:
444
- value: 1
445
- max_grad_norm_ratio:
446
- value: null
447
- model:
448
- value:
449
- activation_type: swiglu
450
- alibi: false
451
- alibi_bias_max: 8
452
- attention_dropout: 0
453
- attention_layer_norm: false
454
- attention_layer_norm_with_affine: false
455
- bias_for_layer_norm: false
456
- block_group_size: 1
457
- block_type: sequential
458
- clip_qkv: null
459
- d_model: 2048
460
- emb_init_std: null
461
- embedding_dropout: 0
462
- embedding_layer_norm: false
463
- embedding_size: 32000
464
- eos_token_id: 0
465
- flash_attention: false
466
- include_bias: false
467
- init_cutoff_factor: 3
468
- init_device: cuda
469
- init_fn: normal
470
- init_std: 0.02
471
- layer_norm_eps: 1e-05
472
- layer_norm_type: default
473
- layer_norm_with_affine: false
474
- max_sequence_length: 2048
475
- mlp_hidden_size: null
476
- mlp_ratio: 8
477
- multi_query_attention: false
478
- n_heads: 16
479
- n_kv_heads: null
480
- n_layers: 16
481
- norm_after: false
482
- pad_token_id: 1
483
- precision: amp_bf16
484
- residual_dropout: 0
485
- rope: true
486
- rope_full_precision: true
487
- rope_theta: 10000
488
- scale_emb_init: false
489
- scale_logits: false
490
- vocab_size: 32000
491
- weight_tying: true
492
- module_outputs_save_steps:
493
- value: null
494
- new_style_checkpoints:
495
- value: null
496
- no_pre_train_checkpoint:
497
- value: false
498
- optimizer:
499
- value:
500
- betas:
501
- - 0.9
502
- - 0.95
503
- decay_embeddings: true
504
- decay_norm_and_bias: true
505
- eps: 1e-08
506
- learning_rate: 0.0005
507
- metrics_log_interval: 10
508
- name: adamw
509
- no_decay_norm_and_bias: null
510
- record_update_metrics: false
511
- selective_updates: false
512
- weight_decay: 0.1
513
- precision:
514
- value: amp_bf16
515
- python_profiling:
516
- value: false
517
- remote_save_folder:
518
- value: null
519
- reset_optimizer_state:
520
- value: false
521
- reset_trainer_state:
522
- value: false
523
- restore_dataloader:
524
- value: true
525
- run_name:
526
- value: OLMo-1B-as_fm3_omi2
527
- save_data_indices:
528
- value: true
529
- save_folder:
530
- value: checkpoints/OLMo-1B-as_fm3_omi2
531
- save_interval:
532
- value: 3000
533
- save_interval_ephemeral:
534
- value: null
535
- save_interval_unsharded:
536
- value: 3000
537
- save_num_checkpoints_to_keep:
538
- value: -1
539
- save_num_unsharded_checkpoints_to_keep:
540
- value: -1
541
- save_overwrite:
542
- value: true
543
- scheduler:
544
- value:
545
- alpha_f: 0.1
546
- grad_clip_warmup_factor: null
547
- grad_clip_warmup_steps: null
548
- name: cosine_with_warmup
549
- t_max: null
550
- t_warmup: 2000
551
- units: steps
552
- warmup_min_lr: 0
553
- seed:
554
- value: 6198
555
- sharded_checkpointer:
556
- value: torch_legacy
557
- single:
558
- value:
559
- device: auto
560
- softmax_auxiliary_loss:
561
- value: false
562
- speed_monitor:
563
- value:
564
- gpu_flops_available: null
565
- window_size: 20
566
- stop_after:
567
- value: null
568
- stop_at:
569
- value: null
570
- time_limit:
571
- value: null
572
- tokenizer:
573
- value:
574
- identifier: meta-llama/Llama-2-7b-hf
575
- truncate_direction: right
576
- torch_profiling:
577
- value: false
578
- try_load_latest_save:
579
- value: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/files/output.log DELETED
@@ -1,71 +0,0 @@
1
- [2025-12-14 15:37:34] INFO  [olmo.data.iterable_dataset:79, rank=0] Saving global data order indices...
2
- [2025-12-14 15:37:35] INFO  [olmo.data.iterable_dataset:88, rank=0] Global data order indices saved to 'checkpoints/OLMo-1B-as_fm3_omi2/train_data/global_indices.npy'
3
- [2025-12-14 15:37:35] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/utils/data/dataloader.py:624: UserWarning: This DataLoader will create 32 worker processes in total. Our suggested max number of worker in current system is 16, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
4
- warnings.warn(
5
- tokenizer.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 1.84M/1.84M [00:00<00:00, 9.47MB/s]
6
-
7
- [2025-12-14 15:37:36] INFO  [train:139, rank=0] Building model...
8
- [2025-12-14 15:37:36] INFO  [olmo.model:1174, rank=0] Initializing model parameters...
9
- [2025-12-14 15:37:36] INFO  [train:141, rank=0] Total number of parameters: 1,139,277,824
10
- [2025-12-14 15:37:36] INFO  [train:142, rank=0] Number of non-embedding parameters: 1,073,741,824
11
- [2025-12-14 15:37:36] INFO  [train:143, rank=0] Peak GPU Memory (MB) before fsdp: 4559
12
- [2025-12-14 15:37:36] INFO  [train:172, rank=0] Wrapping model with FSDP...
13
- [2025-12-14 15:37:36] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/_init_utils.py:444: UserWarning: FSDP is switching to use `NO_SHARD` instead of ShardingStrategy.FULL_SHARD since the world size is 1.
14
- warnings.warn(
15
-
16
- [2025-12-14 15:37:36] INFO  [olmo.model:1174, rank=0] Initializing model parameters...
17
- [2025-12-14 15:37:36] INFO  [train:232, rank=0] Peak GPU Memory (MB) after fsdp: 9116
18
- [2025-12-14 15:37:36] INFO  [train:233, rank=0] Model:
19
- [2025-12-14 15:37:36] INFO  [train:234, rank=0] FullyShardedDataParallel(
20
- (_fsdp_wrapped_module): OLMo(
21
- (transformer): ModuleDict(
22
- (wte): Embedding(32000, 2048)
23
- (emb_drop): Dropout(p=0.0, inplace=False)
24
- (ln_f): LayerNorm()
25
- (blocks): ModuleList(
26
- (0-15): 16 x OLMoSequentialBlock(
27
- (dropout): Dropout(p=0.0, inplace=False)
28
- (act): SwiGLU()
29
- (attn_out): Linear(in_features=2048, out_features=2048, bias=False)
30
- (ff_out): Linear(in_features=8192, out_features=2048, bias=False)
31
- (rotary_emb): RotaryEmbedding()
32
- (att_proj): Linear(in_features=2048, out_features=6144, bias=False)
33
- (ff_proj): Linear(in_features=2048, out_features=16384, bias=False)
34
- (attn_norm): LayerNorm()
35
- (ff_norm): LayerNorm()
36
- )
37
- )
38
- )
39
- )
40
- )
41
- [2025-12-14 15:37:36] INFO  [olmo.optim:944, rank=0] Constructing optimizer with 1 param groups
42
- [2025-12-14 15:37:36] INFO  [train:335, rank=0] Saving pre-train checkpoint...
43
- [2025-12-14 15:37:36] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:690: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
44
- warnings.warn(
45
-
46
- [2025-12-14 15:37:36] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py:773: UserWarning: When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict willbe returned.
47
- warnings.warn(
48
-
49
- [2025-12-14 15:37:36] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py:711: UserWarning: When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict willbe returned.
50
- warnings.warn(
51
-
52
- [2025-12-14 15:37:43] INFO  [olmo.checkpoint:607, rank=0] Saving config...
53
- [2025-12-14 15:37:44] INFO  [train:337, rank=0] Checkpoint saved to checkpoints/OLMo-1B-as_fm3_omi2/step0
54
- [2025-12-14 15:37:44] INFO  [train:340, rank=0] Attempting to load pre-train checkpoint...
55
- [2025-12-14 15:37:47] INFO  [olmo.checkpoint:1040, rank=0] Loading model state...
56
- [2025-12-14 15:37:47] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py:827: UserWarning: When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict willbe returned.
57
- warnings.warn(
58
-
59
- [2025-12-14 15:37:48] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py:864: UserWarning: When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict willbe returned.
60
- warnings.warn(
61
-
62
- [2025-12-14 15:37:48] INFO  [olmo.checkpoint:1044, rank=0] Loading optimizer state...
63
- [2025-12-14 15:37:48] INFO  [olmo.checkpoint:220, rank=0] Flattening sharded optimizer state...
64
- [2025-12-14 15:37:48] INFO  [olmo.checkpoint:234, rank=0] Loading flattened optimizer state...
65
- [2025-12-14 15:37:48] INFO  [olmo.train:409, rank=0] Resetting learning rate...
66
- [2025-12-14 15:37:48] INFO  [olmo.train:421, rank=0] Restoring RNG states...
67
- [2025-12-14 15:37:48] INFO  [train:344, rank=0] Checkpoint successfully loaded
68
- [2025-12-14 15:37:48] INFO  [train:375, rank=0] Starting training...
69
- [2025-12-14 15:37:48] INFO  [olmo.train:979, rank=0] Pre-train system metrics
70
- System/Peak GPU Memory (MB)=9,116
71
- wandb: WARNING The `quiet` argument to `wandb.run.finish()` is deprecated, use `wandb.Settings(quiet=...)` to set this instead.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/files/requirements.txt DELETED
@@ -1,271 +0,0 @@
1
- scikit-learn==1.8.0
2
- joblib==1.5.2
3
- threadpoolctl==3.6.0
4
- torchmetrics==1.8.2
5
- lightning-utilities==0.15.2
6
- wandb==0.23.1
7
- GitPython==3.1.45
8
- gitdb==4.0.12
9
- smmap==5.0.2
10
- datasets==4.4.1
11
- pandas==2.3.3
12
- multiprocess==0.70.18
13
- pyarrow==22.0.0
14
- tzdata==2025.3
15
- xxhash==3.6.0
16
- ai2-olmo==0.6.0
17
- ai2-olmo-core==2.4.0
18
- cached_path==1.8.0
19
- google-cloud-storage==2.19.0
20
- google-cloud-core==2.5.0
21
- boto3==1.42.9
22
- google-api-core==2.28.1
23
- google-auth==2.43.0
24
- s3transfer==0.16.0
25
- botocore==1.42.9
26
- google-resumable-media==2.8.0
27
- pyasn1_modules==0.4.2
28
- rich==13.9.4
29
- rsa==4.9.1
30
- bettermap==1.3.1
31
- google-crc32c==1.7.1
32
- jmespath==1.0.1
33
- numpy==1.26.4
34
- omegaconf==2.3.0
35
- proto-plus==1.26.1
36
- pyasn1==0.6.1
37
- python-dateutil==2.9.0.post0
38
- antlr4-python3-runtime==4.9.3
39
- zstandard==0.23.0
40
- zipp==3.21.0
41
- yarl==1.22.0
42
- xgrammar==0.1.18
43
- xformers==0.0.29.post2
44
- wrapt==2.0.1
45
- wheel==0.45.1
46
- websockets==15.0.1
47
- wcwidth==0.2.13
48
- watchfiles==1.1.1
49
- vllm==0.8.5.post1
50
- uvloop==0.22.1
51
- uvicorn==0.38.0
52
- urllib3==2.3.0
53
- typing-inspection==0.4.2
54
- typing_extensions==4.15.0
55
- types-dataclasses==0.6.6
56
- typer==0.20.0
57
- truststore==0.10.0
58
- triton==3.2.0
59
- transformers==4.57.3
60
- traitlets==5.14.3
61
- tqdm==4.67.1
62
- torchvision==0.21.0+cu124
63
- torchelastic==0.2.2
64
- torchaudio==2.6.0+cu124
65
- torch==2.6.0+cu124
66
- tokenizers==0.22.1
67
- tiktoken==0.12.0
68
- sympy==1.13.1
69
- starlette==0.50.0
70
- stack_data==0.6.3
71
- soupsieve==2.5
72
- sortedcontainers==2.4.0
73
- sniffio==1.3.1
74
- six==1.17.0
75
- shellingham==1.5.4
76
- setuptools==75.8.0
77
- sentry-sdk==2.47.0
78
- sentencepiece==0.2.1
79
- scipy==1.16.3
80
- safetensors==0.7.0
81
- ruamel.yaml.clib==0.2.8
82
- ruamel.yaml==0.18.10
83
- rpds-py==0.22.3
84
- rignore==0.7.6
85
- rich-toolkit==0.17.0
86
- requests==2.32.3
87
- regex==2025.11.3
88
- referencing==0.36.2
89
- ray==2.52.1
90
- pyzmq==27.1.0
91
- pytz==2024.2
92
- python-multipart==0.0.20
93
- python-json-logger==4.0.0
94
- python-etcd==0.4.5
95
- python-dotenv==1.2.1
96
- Pygments==2.19.1
97
- pydantic-extra-types==2.10.6
98
- pydantic_core==2.41.5
99
- pydantic==2.12.5
100
- pycparser==2.22
101
- pycountry==24.6.1
102
- pycosat==0.6.6
103
- py-cpuinfo==9.0.0
104
- pure_eval==0.2.3
105
- ptyprocess==0.7.0
106
- psutil==6.1.1
107
- protobuf==4.25.8
108
- propcache==0.4.1
109
- prompt_toolkit==3.0.50
110
- prometheus-fastapi-instrumentator==7.1.0
111
- prometheus_client==0.23.1
112
- pluggy==1.5.0
113
- platformdirs==4.3.6
114
- pkgutil_resolve_name==1.3.10
115
- pkginfo==1.12.0
116
- pip==24.3.1
117
- pillow==11.0.0
118
- pickleshare==0.7.5
119
- pexpect==4.9.0
120
- partial-json-parser==0.2.1.1.post7
121
- parso==0.8.4
122
- packaging==24.2
123
- outlines_core==0.1.26
124
- outlines==0.1.11
125
- optree==0.14.0
126
- opentelemetry-semantic-conventions-ai==0.4.13
127
- opentelemetry-semantic-conventions==0.47b0
128
- opentelemetry-sdk==1.26.0
129
- opentelemetry-proto==1.26.0
130
- opentelemetry-exporter-otlp-proto-http==1.26.0
131
- opentelemetry-exporter-otlp-proto-grpc==1.26.0
132
- opentelemetry-exporter-otlp-proto-common==1.26.0
133
- opentelemetry-exporter-otlp==1.26.0
134
- opentelemetry-api==1.26.0
135
- opencv-python-headless==4.12.0.88
136
- openai==2.11.0
137
- nvidia-nvtx-cu12==12.4.127
138
- nvidia-nvjitlink-cu12==12.4.127
139
- nvidia-nccl-cu12==2.21.5
140
- nvidia-cusparselt-cu12==0.6.2
141
- nvidia-cusparse-cu12==12.3.1.170
142
- nvidia-cusolver-cu12==11.6.1.9
143
- nvidia-curand-cu12==10.3.5.147
144
- nvidia-cufft-cu12==11.2.1.3
145
- nvidia-cudnn-cu12==9.1.0.70
146
- nvidia-cuda-runtime-cu12==12.4.127
147
- nvidia-cuda-nvrtc-cu12==12.4.127
148
- nvidia-cuda-cupti-cu12==12.4.127
149
- nvidia-cublas-cu12==12.4.5.8
150
- numba==0.61.2
151
- ninja==1.11.1.3
152
- networkx==3.4.2
153
- nest-asyncio==1.6.0
154
- multidict==6.7.0
155
- msgspec==0.20.0
156
- msgpack==1.1.2
157
- mpmath==1.3.0
158
- more-itertools==10.6.0
159
- mistral_common==1.8.6
160
- menuinst==2.2.0
161
- mdurl==0.1.2
162
- matplotlib-inline==0.1.7
163
- math-verify==0.8.0
164
- markdown-it-py==4.0.0
165
- lm-format-enforcer==0.10.12
166
- llvmlite==0.44.0
167
- llguidance==0.7.30
168
- lintrunner==0.12.7
169
- lief==0.14.1
170
- libmambapy==2.0.5
171
- libarchive-c==5.1
172
- latex2sympy2_extended==1.10.2
173
- lark==1.2.2
174
- jsonschema-specifications==2024.10.1
175
- jsonschema==4.23.0
176
- jsonpointer==3.0.0
177
- jsonpatch==1.33
178
- jiter==0.12.0
179
- Jinja2==3.1.5
180
- jedi==0.19.2
181
- ipython==8.31.0
182
- interegular==0.3.3
183
- importlib_resources==6.5.2
184
- importlib_metadata==8.0.0
185
- idna==3.10
186
- hypothesis==6.124.7
187
- hyperframe==6.0.1
188
- huggingface-hub==0.36.0
189
- httpx==0.28.1
190
- httptools==0.7.1
191
- httpcore==1.0.9
192
- hpack==4.0.0
193
- hf-xet==1.2.0
194
- h2==4.1.0
195
- h11==0.16.0
196
- grpcio==1.76.0
197
- googleapis-common-protos==1.72.0
198
- gguf==0.17.1
199
- fsspec==2024.12.0
200
- frozenlist==1.8.0
201
- frozendict==2.4.6
202
- filelock==3.17.0
203
- fastrlock==0.8.3
204
- fastar==0.8.0
205
- fastapi-cloud-cli==0.6.0
206
- fastapi-cli==0.0.16
207
- fastapi==0.124.4
208
- expecttest==0.3.0
209
- executing==2.1.0
210
- exceptiongroup==1.2.2
211
- email-validator==2.3.0
212
- einops==0.8.1
213
- dnspython==2.7.0
214
- distro==1.9.0
215
- diskcache==5.6.3
216
- dill==0.4.0
217
- depyf==0.18.0
218
- Deprecated==1.3.1
219
- decorator==5.1.1
220
- cupy-cuda12x==13.6.0
221
- conda_package_streaming==0.11.0
222
- conda-package-handling==2.4.0
223
- conda-libmamba-solver==25.1.1
224
- conda_index==0.5.0
225
- conda-build==25.1.1
226
- conda==25.1.0
227
- compressed-tensors==0.9.3
228
- colorama==0.4.6
229
- cmake==3.31.4
230
- cloudpickle==3.1.2
231
- click==8.1.8
232
- charset-normalizer==3.4.1
233
- chardet==5.2.0
234
- cffi==1.17.1
235
- certifi==2024.12.14
236
- cachetools==6.2.3
237
- boltons==24.0.0
238
- blake3==1.0.8
239
- beautifulsoup4==4.12.3
240
- attrs==25.1.0
241
- astunparse==1.6.3
242
- asttokens==3.0.0
243
- astor==0.8.1
244
- archspec==0.2.5
245
- anyio==4.12.0
246
- annotated-types==0.7.0
247
- annotated-doc==0.0.4
248
- airportsdata==20250909
249
- aiosignal==1.4.0
250
- aiohttp==3.13.2
251
- aiohappyeyeballs==2.6.1
252
- PyYAML==6.0.2
253
- PySocks==1.7.1
254
- MarkupSafe==3.0.2
255
- Brotli==1.1.0
256
- zipp==3.19.2
257
- wheel==0.43.0
258
- typing_extensions==4.12.2
259
- typeguard==4.3.0
260
- tomli==2.0.1
261
- platformdirs==4.2.2
262
- packaging==24.2
263
- more-itertools==10.3.0
264
- jaraco.text==3.12.1
265
- jaraco.functools==4.0.1
266
- jaraco.context==5.3.0
267
- jaraco.collections==5.1.0
268
- inflect==7.3.1
269
- importlib_metadata==8.0.0
270
- backports.tarfile==1.2.0
271
- autocommand==2.2.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/files/wandb-metadata.json DELETED
@@ -1,103 +0,0 @@
1
- {
2
- "os": "Linux-6.8.0-1043-nvidia-x86_64-with-glibc2.35",
3
- "python": "CPython 3.11.11",
4
- "startedAt": "2025-12-14T14:37:31.123422Z",
5
- "args": [
6
- "pretraining/configs/RL-1B.yaml"
7
- ],
8
- "program": "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py",
9
- "codePath": "OLMo/scripts/train.py",
10
- "codePathLocal": "OLMo/scripts/train.py",
11
- "email": "[email protected]",
12
- "root": "checkpoints/OLMo-1B-as_fm3_omi2/wandb",
13
- "host": "serv-3342",
14
- "executable": "/opt/conda/bin/python",
15
- "cpu_count": 112,
16
- "cpu_count_logical": 224,
17
- "gpu": "NVIDIA H100 80GB HBM3",
18
- "gpu_count": 1,
19
- "disk": {
20
- "/": {
21
- "total": "2055141851136",
22
- "used": "49254457344"
23
- }
24
- },
25
- "memory": {
26
- "total": "2164176814080"
27
- },
28
- "gpu_nvidia": [
29
- {
30
- "name": "NVIDIA H100 80GB HBM3",
31
- "memoryTotal": "85520809984",
32
- "cudaCores": 16896,
33
- "architecture": "Hopper",
34
- "uuid": "GPU-4c999b2a-2578-9e62-0539-4b826d85fda8"
35
- }
36
- ],
37
- "cudaVersion": "13.0",
38
- "slurm": {
39
- "cluster_name": "pegasus",
40
- "conf": "/etc/slurm/slurm.conf",
41
- "cpu_bind": "quiet,mask_cpu:0x000000000001FE00000000000000000000000001FE00000000000000",
42
- "cpu_bind_list": "0x000000000001FE00000000000000000000000001FE00000000000000",
43
- "cpu_bind_type": "mask_cpu:",
44
- "cpu_bind_verbose": "quiet",
45
- "cpus_on_node": "16",
46
- "cpus_per_task": "16",
47
- "distribution": "cyclic",
48
- "gpus": "1",
49
- "gpus_on_node": "1",
50
- "gtids": "0",
51
- "job_cpus_per_node": "16",
52
- "job_end_time": "1765734524",
53
- "job_gid": "8000",
54
- "job_group": "iml",
55
- "job_id": "2383756",
56
- "job_name": "bash",
57
- "job_nodelist": "serv-3342",
58
- "job_num_nodes": "1",
59
- "job_partition": "H100",
60
- "job_qos": "normal",
61
- "job_start_time": "1765720124",
62
- "job_uid": "13262",
63
- "job_user": "nguyen",
64
- "jobid": "2383756",
65
- "launch_node_ipaddr": "192.168.33.114",
66
- "localid": "0",
67
- "mem_per_cpu": "16384",
68
- "mpi_type": "pmix",
69
- "nnodes": "1",
70
- "nodeid": "0",
71
- "nodelist": "serv-3342",
72
- "nprocs": "1",
73
- "ntasks": "1",
74
- "oom_kill_step": "0",
75
- "pmix_mapping_serv": "(vector,(0,1,1))",
76
- "pmixp_abort_agent_port": "33735",
77
- "prio_process": "1",
78
- "procid": "0",
79
- "pty_port": "45219",
80
- "pty_win_col": "156",
81
- "pty_win_row": "41",
82
- "srun_comm_host": "192.168.33.114",
83
- "srun_comm_port": "35153",
84
- "step_gpus": "5",
85
- "step_id": "0",
86
- "step_launcher_port": "35153",
87
- "step_nodelist": "serv-3342",
88
- "step_num_nodes": "1",
89
- "step_num_tasks": "1",
90
- "step_tasks_per_node": "1",
91
- "stepid": "0",
92
- "submit_dir": "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain",
93
- "submit_host": "login1",
94
- "task_pid": "3684902",
95
- "tasks_per_node": "1",
96
- "topology_addr": "serv-3342",
97
- "topology_addr_pattern": "node",
98
- "tres_bind": "gres/gpu:per_task:1",
99
- "tres_per_task": "cpu=16,gres/gpu=1",
100
- "umask": "0022"
101
- },
102
- "writerId": "mots0eu19y85h2hcucy8g5oum3x38ji3"
103
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"System/Peak GPU Memory (MB)":9116.3232421875,"_timestamp":1.765723068840134e+09,"_wandb":{"runtime":30},"_runtime":30.93714515,"_step":0}
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/logs/debug-internal.log DELETED
@@ -1,12 +0,0 @@
1
- {"time":"2025-12-14T15:37:31.412986217+01:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
- {"time":"2025-12-14T15:37:31.680606576+01:00","level":"INFO","msg":"stream: created new stream","id":"i0ugjt0v"}
3
- {"time":"2025-12-14T15:37:31.683856288+01:00","level":"INFO","msg":"handler: started","stream_id":"i0ugjt0v"}
4
- {"time":"2025-12-14T15:37:31.686578199+01:00","level":"INFO","msg":"stream: started","id":"i0ugjt0v"}
5
- {"time":"2025-12-14T15:37:31.686601008+01:00","level":"INFO","msg":"writer: started","stream_id":"i0ugjt0v"}
6
- {"time":"2025-12-14T15:37:31.686623633+01:00","level":"INFO","msg":"sender: started","stream_id":"i0ugjt0v"}
7
- {"time":"2025-12-14T15:38:03.558761809+01:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
8
- {"time":"2025-12-14T15:38:03.861133675+01:00","level":"INFO","msg":"handler: operation stats","stats":{}}
9
- {"time":"2025-12-14T15:38:03.86751239+01:00","level":"INFO","msg":"stream: closing","id":"i0ugjt0v"}
10
- {"time":"2025-12-14T15:38:03.870419678+01:00","level":"INFO","msg":"handler: closed","stream_id":"i0ugjt0v"}
11
- {"time":"2025-12-14T15:38:03.87246051+01:00","level":"INFO","msg":"sender: closed","stream_id":"i0ugjt0v"}
12
- {"time":"2025-12-14T15:38:03.873159055+01:00","level":"INFO","msg":"stream: closed","id":"i0ugjt0v"}
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/logs/debug.log DELETED
@@ -1,26 +0,0 @@
1
- 2025-12-14 15:37:31,149 INFO MainThread:3746193 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
- 2025-12-14 15:37:31,151 INFO MainThread:3746193 [wandb_setup.py:_flush():80] Configure stats pid to 3746193
3
- 2025-12-14 15:37:31,151 INFO MainThread:3746193 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
- 2025-12-14 15:37:31,152 INFO MainThread:3746193 [wandb_setup.py:_flush():80] Loading settings from /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/wandb/settings
5
- 2025-12-14 15:37:31,153 INFO MainThread:3746193 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
- 2025-12-14 15:37:31,154 INFO MainThread:3746193 [wandb_init.py:setup_run_log_directory():714] Logging user logs to checkpoints/OLMo-1B-as_fm3_omi2/wandb/wandb/run-20251214_153731-i0ugjt0v/logs/debug.log
7
- 2025-12-14 15:37:31,156 INFO MainThread:3746193 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to checkpoints/OLMo-1B-as_fm3_omi2/wandb/wandb/run-20251214_153731-i0ugjt0v/logs/debug-internal.log
8
- 2025-12-14 15:37:31,159 INFO MainThread:3746193 [wandb_init.py:init():841] calling init triggers
9
- 2025-12-14 15:37:31,159 INFO MainThread:3746193 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
- config: {'run_name': 'OLMo-1B-as_fm3_omi2', 'seed': 6198, 'epoch': None, 'dry_run': False, 'model': {'d_model': 2048, 'n_heads': 16, 'n_kv_heads': None, 'clip_qkv': None, 'n_layers': 16, 'mlp_ratio': 8, 'mlp_hidden_size': None, 'activation_type': 'swiglu', 'block_type': 'sequential', 'block_group_size': 1, 'alibi': False, 'alibi_bias_max': 8.0, 'rope': True, 'rope_full_precision': True, 'rope_theta': 10000, 'flash_attention': False, 'attention_dropout': 0.0, 'multi_query_attention': False, 'attention_layer_norm': False, 'residual_dropout': 0.0, 'embedding_dropout': 0.0, 'embedding_layer_norm': False, 'layer_norm_type': 'default', 'layer_norm_with_affine': False, 'layer_norm_eps': 1e-05, 'attention_layer_norm_with_affine': False, 'max_sequence_length': 2048, 'include_bias': False, 'bias_for_layer_norm': False, 'scale_logits': False, 'vocab_size': 32000, 'embedding_size': 32000, 'weight_tying': True, 'eos_token_id': 0, 'pad_token_id': 1, 'init_device': 'cuda', 'init_fn': 'normal', 'init_std': 0.02, 'init_cutoff_factor': 3.0, 'precision': 'amp_bf16', 'scale_emb_init': False, 'emb_init_std': None, 'norm_after': False}, 'optimizer': {'name': 'adamw', 'learning_rate': 0.0005, 'weight_decay': 0.1, 'betas': (0.9, 0.95), 'eps': 1e-08, 'no_decay_norm_and_bias': None, 'selective_updates': False, 'decay_norm_and_bias': True, 'decay_embeddings': True, 'metrics_log_interval': 10, 'record_update_metrics': False}, 'scheduler': {'name': 'cosine_with_warmup', 'units': 'steps', 't_warmup': 2000, 't_max': None, 'alpha_f': 0.1, 'grad_clip_warmup_steps': None, 'grad_clip_warmup_factor': None, 'warmup_min_lr': 0.0}, 'data': {'paths': ['data_token/as_fm3_omi2/algebraic-stack-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00055_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00056_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00057_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00058_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00059_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00060_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00061_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00062_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00063_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00064_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00065_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00066_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00067_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00068_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00069_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00070_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00071_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00072_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00073_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00074_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00075_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00076_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00077_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00078_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00017_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00018_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00055_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00056_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00057_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00058_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00059_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00060_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00061_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00062_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00063_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00064_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00065_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00066_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00067_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00068_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00069_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00070_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00071_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00072_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00073_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00074_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00075_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00076_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00077_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00078_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00079_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00080_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00081_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00082_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00083_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00084_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00085_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00086_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00087_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00088_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00089_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00090_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00091_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00092_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00093_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00094_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00095_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00096_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00097_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00098_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00099_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00017_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00018_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00054_00000_doc_shuffled.ds'], 'memmap_dtype': 'uint16', 'datasets': None, 'label_mask_paths': None, 'pad_direction': 'right', 'generate_attention_mask': False, 'generate_doc_lengths': False, 'num_workers': 32, 'drop_last': True, 'pin_memory': True, 'prefetch_factor': 8, 'persistent_workers': True, 'timeout': 0, 'seed': None, 'instance_filter': None, 'custom_dataset': None}, 'restore_dataloader': True, 'fast_forward_batches': None, 'evaluators': [], 'eval_interval': 5000, 'tokenizer': {'identifier': 'meta-llama/Llama-2-7b-hf', 'truncate_direction': 'right'}, 'save_folder': 'checkpoints/OLMo-1B-as_fm3_omi2', 'remote_save_folder': None, 'canceled_check_interval': 6000, 'save_interval': 3000, 'save_interval_unsharded': 3000, 'save_interval_ephemeral': None, 'save_num_checkpoints_to_keep': -1, 'save_num_unsharded_checkpoints_to_keep': -1, 'save_overwrite': True, 'force_save_unsharded': False, 'no_pre_train_checkpoint': False, 'load_path': None, 'load_path_sharded_checkpointer': None, 'try_load_latest_save': False, 'reset_optimizer_state': False, 'reset_trainer_state': False, 'sharded_checkpointer': 'torch_legacy', 'new_style_checkpoints': None, 'max_duration': '1ep', 'global_train_batch_size': 512, 'device_train_batch_size': 512, 'device_train_microbatch_size': 16, 'device_eval_batch_size': 16, 'eval_subset_num_batches': -1, 'eval_on_load': False, 'device_train_grad_accum': 32, 'max_grad_norm': 1.0, 'max_grad_norm_ratio': None, 'precision': 'amp_bf16', 'speed_monitor': {'window_size': 20, 'gpu_flops_available': None}, 'console_log_interval': 1, 'gen1_gc_interval': 1, 'compile': None, 'distributed_strategy': 'fsdp', 'fsdp': {'use_orig_params': True, 'sharding_strategy': <ShardingStrategy.FULL_SHARD: 1>, 'wrapping_strategy': None, 'precision': 'mixed', 'hybrid_sharding_num_model_replicas': None}, 'ddp': {'grad_sync_mode': 'batch', 'find_unused_params': False}, 'single': {'device': 'auto'}, 'softmax_auxiliary_loss': False, 'auxiliary_loss_multiplier': 0.0001, 'time_limit': None, 'extra_steps_after_cancel': 10, 'early_stopping_factor': None, 'save_data_indices': True, 'python_profiling': False, 'torch_profiling': False, 'stop_at': None, 'stop_after': None, 'activation_checkpointing': None, 'fused_loss': None, 'hf_datasets_cache_dir': None, 'module_outputs_save_steps': None, '_wandb': {}}
11
- 2025-12-14 15:37:31,160 INFO MainThread:3746193 [wandb_init.py:init():889] starting backend
12
- 2025-12-14 15:37:31,404 INFO MainThread:3746193 [wandb_init.py:init():892] sending inform_init request
13
- 2025-12-14 15:37:31,411 INFO MainThread:3746193 [wandb_init.py:init():900] backend started and connected
14
- 2025-12-14 15:37:31,414 INFO MainThread:3746193 [wandb_init.py:init():970] updated telemetry
15
- 2025-12-14 15:37:31,415 INFO MainThread:3746193 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
- 2025-12-14 15:37:31,929 INFO MainThread:3746193 [wandb_init.py:init():1041] starting run threads in backend
17
- 2025-12-14 15:37:32,020 INFO MainThread:3746193 [wandb_run.py:_console_start():2521] atexit reg
18
- 2025-12-14 15:37:32,021 INFO MainThread:3746193 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
- 2025-12-14 15:37:32,022 INFO MainThread:3746193 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
- 2025-12-14 15:37:32,023 INFO MainThread:3746193 [wandb_run.py:_redirect():2461] Redirects installed.
21
- 2025-12-14 15:37:32,025 INFO MainThread:3746193 [wandb_init.py:init():1081] run started, returning control to user process
22
- 2025-12-14 15:38:02,853 INFO MainThread:3746193 [wandb_run.py:_finish():2287] finishing run marksmans/olmo-debug/i0ugjt0v
23
- 2025-12-14 15:38:02,861 INFO MainThread:3746193 [wandb_run.py:_atexit_cleanup():2486] got exitcode: 1
24
- 2025-12-14 15:38:02,862 INFO MainThread:3746193 [wandb_run.py:_restore():2468] restore
25
- 2025-12-14 15:38:02,863 INFO MainThread:3746193 [wandb_run.py:_restore():2474] restore done
26
- 2025-12-14 15:38:03,864 INFO MainThread:3746193 [wandb_run.py:_footer_sync_info():3862] logging synced files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_153731-i0ugjt0v/run-i0ugjt0v.wandb DELETED
Binary file (37.7 kB)
 
models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/files/config.yaml DELETED
@@ -1,579 +0,0 @@
1
- _wandb:
2
- value:
3
- cli_version: 0.23.1
4
- e:
5
- hif91sfokkg4a3putu9qm5wzemlxp65c:
6
- args:
7
- - pretraining/configs/RL-1B.yaml
8
- codePath: OLMo/scripts/train.py
9
- codePathLocal: OLMo/scripts/train.py
10
- cpu_count: 112
11
- cpu_count_logical: 224
12
- cudaVersion: "13.0"
13
- disk:
14
- /:
15
- total: "2055141851136"
16
- used: "49256366080"
17
18
- executable: /opt/conda/bin/python
19
- gpu: NVIDIA H100 80GB HBM3
20
- gpu_count: 1
21
- gpu_nvidia:
22
- - architecture: Hopper
23
- cudaCores: 16896
24
- memoryTotal: "85520809984"
25
- name: NVIDIA H100 80GB HBM3
26
- uuid: GPU-4c999b2a-2578-9e62-0539-4b826d85fda8
27
- host: serv-3342
28
- memory:
29
- total: "2164176814080"
30
- os: Linux-6.8.0-1043-nvidia-x86_64-with-glibc2.35
31
- program: /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py
32
- python: CPython 3.11.11
33
- root: checkpoints/OLMo-1B-as_fm3_omi2/wandb
34
- slurm:
35
- cluster_name: pegasus
36
- conf: /etc/slurm/slurm.conf
37
- cpu_bind: quiet,mask_cpu:0x000000000001FE00000000000000000000000001FE00000000000000
38
- cpu_bind_list: 0x000000000001FE00000000000000000000000001FE00000000000000
39
- cpu_bind_type: 'mask_cpu:'
40
- cpu_bind_verbose: quiet
41
- cpus_on_node: "16"
42
- cpus_per_task: "16"
43
- distribution: cyclic
44
- gpus: "1"
45
- gpus_on_node: "1"
46
- gtids: "0"
47
- job_cpus_per_node: "16"
48
- job_end_time: "1765734524"
49
- job_gid: "8000"
50
- job_group: iml
51
- job_id: "2383756"
52
- job_name: bash
53
- job_nodelist: serv-3342
54
- job_num_nodes: "1"
55
- job_partition: H100
56
- job_qos: normal
57
- job_start_time: "1765720124"
58
- job_uid: "13262"
59
- job_user: nguyen
60
- jobid: "2383756"
61
- launch_node_ipaddr: 192.168.33.114
62
- localid: "0"
63
- mem_per_cpu: "16384"
64
- mpi_type: pmix
65
- nnodes: "1"
66
- nodeid: "0"
67
- nodelist: serv-3342
68
- nprocs: "1"
69
- ntasks: "1"
70
- oom_kill_step: "0"
71
- pmix_mapping_serv: (vector,(0,1,1))
72
- pmixp_abort_agent_port: "33735"
73
- prio_process: "1"
74
- procid: "0"
75
- pty_port: "45219"
76
- pty_win_col: "156"
77
- pty_win_row: "41"
78
- srun_comm_host: 192.168.33.114
79
- srun_comm_port: "35153"
80
- step_gpus: "5"
81
- step_id: "0"
82
- step_launcher_port: "35153"
83
- step_nodelist: serv-3342
84
- step_num_nodes: "1"
85
- step_num_tasks: "1"
86
- step_tasks_per_node: "1"
87
- stepid: "0"
88
- submit_dir: /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain
89
- submit_host: login1
90
- task_pid: "3684902"
91
- tasks_per_node: "1"
92
- topology_addr: serv-3342
93
- topology_addr_pattern: node
94
- tres_bind: gres/gpu:per_task:1
95
- tres_per_task: cpu=16,gres/gpu=1
96
- umask: "0022"
97
- startedAt: "2025-12-14T14:40:39.859989Z"
98
- writerId: hif91sfokkg4a3putu9qm5wzemlxp65c
99
- m: []
100
- python_version: 3.11.11
101
- t:
102
- "1":
103
- - 1
104
- - 5
105
- - 11
106
- - 41
107
- - 49
108
- - 51
109
- - 53
110
- "2":
111
- - 1
112
- - 5
113
- - 11
114
- - 41
115
- - 49
116
- - 51
117
- - 53
118
- "3":
119
- - 2
120
- - 13
121
- - 15
122
- - 16
123
- - 61
124
- "4": 3.11.11
125
- "5": 0.23.1
126
- "6": 4.57.3
127
- "10":
128
- - 19
129
- "12": 0.23.1
130
- "13": linux-x86_64
131
- activation_checkpointing:
132
- value: null
133
- auxiliary_loss_multiplier:
134
- value: 0.0001
135
- canceled_check_interval:
136
- value: 6000
137
- compile:
138
- value: null
139
- console_log_interval:
140
- value: 1
141
- data:
142
- value:
143
- custom_dataset: null
144
- datasets: null
145
- drop_last: true
146
- generate_attention_mask: false
147
- generate_doc_lengths: false
148
- instance_filter: null
149
- label_mask_paths: null
150
- memmap_dtype: uint16
151
- num_workers: 32
152
- pad_direction: right
153
- paths:
154
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00000_00000_doc_shuffled.ds
155
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00001_00000_doc_shuffled.ds
156
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00002_00000_doc_shuffled.ds
157
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00003_00000_doc_shuffled.ds
158
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00004_00000_doc_shuffled.ds
159
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00005_00000_doc_shuffled.ds
160
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds
161
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00007_00000_doc_shuffled.ds
162
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00008_00000_doc_shuffled.ds
163
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00009_00000_doc_shuffled.ds
164
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00010_00000_doc_shuffled.ds
165
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00011_00000_doc_shuffled.ds
166
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00012_00000_doc_shuffled.ds
167
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00016_00000_doc_shuffled.ds
168
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00019_00000_doc_shuffled.ds
169
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00020_00000_doc_shuffled.ds
170
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00021_00000_doc_shuffled.ds
171
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00022_00000_doc_shuffled.ds
172
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00023_00000_doc_shuffled.ds
173
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00024_00000_doc_shuffled.ds
174
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00025_00000_doc_shuffled.ds
175
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00026_00000_doc_shuffled.ds
176
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00027_00000_doc_shuffled.ds
177
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00028_00000_doc_shuffled.ds
178
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00029_00000_doc_shuffled.ds
179
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00030_00000_doc_shuffled.ds
180
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00031_00000_doc_shuffled.ds
181
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00032_00000_doc_shuffled.ds
182
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00033_00000_doc_shuffled.ds
183
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00034_00000_doc_shuffled.ds
184
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00035_00000_doc_shuffled.ds
185
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00036_00000_doc_shuffled.ds
186
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00037_00000_doc_shuffled.ds
187
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00038_00000_doc_shuffled.ds
188
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00039_00000_doc_shuffled.ds
189
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00040_00000_doc_shuffled.ds
190
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00041_00000_doc_shuffled.ds
191
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00042_00000_doc_shuffled.ds
192
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00043_00000_doc_shuffled.ds
193
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00044_00000_doc_shuffled.ds
194
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00045_00000_doc_shuffled.ds
195
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00046_00000_doc_shuffled.ds
196
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00047_00000_doc_shuffled.ds
197
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00048_00000_doc_shuffled.ds
198
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00049_00000_doc_shuffled.ds
199
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00050_00000_doc_shuffled.ds
200
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00051_00000_doc_shuffled.ds
201
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00052_00000_doc_shuffled.ds
202
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00053_00000_doc_shuffled.ds
203
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00054_00000_doc_shuffled.ds
204
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00055_00000_doc_shuffled.ds
205
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00056_00000_doc_shuffled.ds
206
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00057_00000_doc_shuffled.ds
207
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00058_00000_doc_shuffled.ds
208
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00059_00000_doc_shuffled.ds
209
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00060_00000_doc_shuffled.ds
210
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00061_00000_doc_shuffled.ds
211
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00062_00000_doc_shuffled.ds
212
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00063_00000_doc_shuffled.ds
213
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00064_00000_doc_shuffled.ds
214
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00065_00000_doc_shuffled.ds
215
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00066_00000_doc_shuffled.ds
216
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00067_00000_doc_shuffled.ds
217
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00068_00000_doc_shuffled.ds
218
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00069_00000_doc_shuffled.ds
219
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00070_00000_doc_shuffled.ds
220
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00071_00000_doc_shuffled.ds
221
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00072_00000_doc_shuffled.ds
222
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00073_00000_doc_shuffled.ds
223
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00074_00000_doc_shuffled.ds
224
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00075_00000_doc_shuffled.ds
225
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00076_00000_doc_shuffled.ds
226
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00077_00000_doc_shuffled.ds
227
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00078_00000_doc_shuffled.ds
228
- - data_token/as_fm3_omi2/finemath3-tokenized/00000_00000_doc_shuffled.ds
229
- - data_token/as_fm3_omi2/finemath3-tokenized/00001_00000_doc_shuffled.ds
230
- - data_token/as_fm3_omi2/finemath3-tokenized/00002_00000_doc_shuffled.ds
231
- - data_token/as_fm3_omi2/finemath3-tokenized/00003_00000_doc_shuffled.ds
232
- - data_token/as_fm3_omi2/finemath3-tokenized/00004_00000_doc_shuffled.ds
233
- - data_token/as_fm3_omi2/finemath3-tokenized/00005_00000_doc_shuffled.ds
234
- - data_token/as_fm3_omi2/finemath3-tokenized/00006_00000_doc_shuffled.ds
235
- - data_token/as_fm3_omi2/finemath3-tokenized/00007_00000_doc_shuffled.ds
236
- - data_token/as_fm3_omi2/finemath3-tokenized/00008_00000_doc_shuffled.ds
237
- - data_token/as_fm3_omi2/finemath3-tokenized/00009_00000_doc_shuffled.ds
238
- - data_token/as_fm3_omi2/finemath3-tokenized/00010_00000_doc_shuffled.ds
239
- - data_token/as_fm3_omi2/finemath3-tokenized/00011_00000_doc_shuffled.ds
240
- - data_token/as_fm3_omi2/finemath3-tokenized/00012_00000_doc_shuffled.ds
241
- - data_token/as_fm3_omi2/finemath3-tokenized/00013_00000_doc_shuffled.ds
242
- - data_token/as_fm3_omi2/finemath3-tokenized/00014_00000_doc_shuffled.ds
243
- - data_token/as_fm3_omi2/finemath3-tokenized/00015_00000_doc_shuffled.ds
244
- - data_token/as_fm3_omi2/finemath3-tokenized/00016_00000_doc_shuffled.ds
245
- - data_token/as_fm3_omi2/finemath3-tokenized/00017_00000_doc_shuffled.ds
246
- - data_token/as_fm3_omi2/finemath3-tokenized/00018_00000_doc_shuffled.ds
247
- - data_token/as_fm3_omi2/finemath3-tokenized/00019_00000_doc_shuffled.ds
248
- - data_token/as_fm3_omi2/finemath3-tokenized/00020_00000_doc_shuffled.ds
249
- - data_token/as_fm3_omi2/finemath3-tokenized/00021_00000_doc_shuffled.ds
250
- - data_token/as_fm3_omi2/finemath3-tokenized/00022_00000_doc_shuffled.ds
251
- - data_token/as_fm3_omi2/finemath3-tokenized/00023_00000_doc_shuffled.ds
252
- - data_token/as_fm3_omi2/finemath3-tokenized/00024_00000_doc_shuffled.ds
253
- - data_token/as_fm3_omi2/finemath3-tokenized/00025_00000_doc_shuffled.ds
254
- - data_token/as_fm3_omi2/finemath3-tokenized/00026_00000_doc_shuffled.ds
255
- - data_token/as_fm3_omi2/finemath3-tokenized/00027_00000_doc_shuffled.ds
256
- - data_token/as_fm3_omi2/finemath3-tokenized/00028_00000_doc_shuffled.ds
257
- - data_token/as_fm3_omi2/finemath3-tokenized/00029_00000_doc_shuffled.ds
258
- - data_token/as_fm3_omi2/finemath3-tokenized/00030_00000_doc_shuffled.ds
259
- - data_token/as_fm3_omi2/finemath3-tokenized/00031_00000_doc_shuffled.ds
260
- - data_token/as_fm3_omi2/finemath3-tokenized/00032_00000_doc_shuffled.ds
261
- - data_token/as_fm3_omi2/finemath3-tokenized/00033_00000_doc_shuffled.ds
262
- - data_token/as_fm3_omi2/finemath3-tokenized/00034_00000_doc_shuffled.ds
263
- - data_token/as_fm3_omi2/finemath3-tokenized/00035_00000_doc_shuffled.ds
264
- - data_token/as_fm3_omi2/finemath3-tokenized/00036_00000_doc_shuffled.ds
265
- - data_token/as_fm3_omi2/finemath3-tokenized/00037_00000_doc_shuffled.ds
266
- - data_token/as_fm3_omi2/finemath3-tokenized/00038_00000_doc_shuffled.ds
267
- - data_token/as_fm3_omi2/finemath3-tokenized/00039_00000_doc_shuffled.ds
268
- - data_token/as_fm3_omi2/finemath3-tokenized/00040_00000_doc_shuffled.ds
269
- - data_token/as_fm3_omi2/finemath3-tokenized/00041_00000_doc_shuffled.ds
270
- - data_token/as_fm3_omi2/finemath3-tokenized/00042_00000_doc_shuffled.ds
271
- - data_token/as_fm3_omi2/finemath3-tokenized/00043_00000_doc_shuffled.ds
272
- - data_token/as_fm3_omi2/finemath3-tokenized/00044_00000_doc_shuffled.ds
273
- - data_token/as_fm3_omi2/finemath3-tokenized/00045_00000_doc_shuffled.ds
274
- - data_token/as_fm3_omi2/finemath3-tokenized/00046_00000_doc_shuffled.ds
275
- - data_token/as_fm3_omi2/finemath3-tokenized/00047_00000_doc_shuffled.ds
276
- - data_token/as_fm3_omi2/finemath3-tokenized/00048_00000_doc_shuffled.ds
277
- - data_token/as_fm3_omi2/finemath3-tokenized/00049_00000_doc_shuffled.ds
278
- - data_token/as_fm3_omi2/finemath3-tokenized/00050_00000_doc_shuffled.ds
279
- - data_token/as_fm3_omi2/finemath3-tokenized/00051_00000_doc_shuffled.ds
280
- - data_token/as_fm3_omi2/finemath3-tokenized/00052_00000_doc_shuffled.ds
281
- - data_token/as_fm3_omi2/finemath3-tokenized/00053_00000_doc_shuffled.ds
282
- - data_token/as_fm3_omi2/finemath3-tokenized/00054_00000_doc_shuffled.ds
283
- - data_token/as_fm3_omi2/finemath3-tokenized/00055_00000_doc_shuffled.ds
284
- - data_token/as_fm3_omi2/finemath3-tokenized/00056_00000_doc_shuffled.ds
285
- - data_token/as_fm3_omi2/finemath3-tokenized/00057_00000_doc_shuffled.ds
286
- - data_token/as_fm3_omi2/finemath3-tokenized/00058_00000_doc_shuffled.ds
287
- - data_token/as_fm3_omi2/finemath3-tokenized/00059_00000_doc_shuffled.ds
288
- - data_token/as_fm3_omi2/finemath3-tokenized/00060_00000_doc_shuffled.ds
289
- - data_token/as_fm3_omi2/finemath3-tokenized/00061_00000_doc_shuffled.ds
290
- - data_token/as_fm3_omi2/finemath3-tokenized/00062_00000_doc_shuffled.ds
291
- - data_token/as_fm3_omi2/finemath3-tokenized/00063_00000_doc_shuffled.ds
292
- - data_token/as_fm3_omi2/finemath3-tokenized/00064_00000_doc_shuffled.ds
293
- - data_token/as_fm3_omi2/finemath3-tokenized/00065_00000_doc_shuffled.ds
294
- - data_token/as_fm3_omi2/finemath3-tokenized/00066_00000_doc_shuffled.ds
295
- - data_token/as_fm3_omi2/finemath3-tokenized/00067_00000_doc_shuffled.ds
296
- - data_token/as_fm3_omi2/finemath3-tokenized/00068_00000_doc_shuffled.ds
297
- - data_token/as_fm3_omi2/finemath3-tokenized/00069_00000_doc_shuffled.ds
298
- - data_token/as_fm3_omi2/finemath3-tokenized/00070_00000_doc_shuffled.ds
299
- - data_token/as_fm3_omi2/finemath3-tokenized/00071_00000_doc_shuffled.ds
300
- - data_token/as_fm3_omi2/finemath3-tokenized/00072_00000_doc_shuffled.ds
301
- - data_token/as_fm3_omi2/finemath3-tokenized/00073_00000_doc_shuffled.ds
302
- - data_token/as_fm3_omi2/finemath3-tokenized/00074_00000_doc_shuffled.ds
303
- - data_token/as_fm3_omi2/finemath3-tokenized/00075_00000_doc_shuffled.ds
304
- - data_token/as_fm3_omi2/finemath3-tokenized/00076_00000_doc_shuffled.ds
305
- - data_token/as_fm3_omi2/finemath3-tokenized/00077_00000_doc_shuffled.ds
306
- - data_token/as_fm3_omi2/finemath3-tokenized/00078_00000_doc_shuffled.ds
307
- - data_token/as_fm3_omi2/finemath3-tokenized/00079_00000_doc_shuffled.ds
308
- - data_token/as_fm3_omi2/finemath3-tokenized/00080_00000_doc_shuffled.ds
309
- - data_token/as_fm3_omi2/finemath3-tokenized/00081_00000_doc_shuffled.ds
310
- - data_token/as_fm3_omi2/finemath3-tokenized/00082_00000_doc_shuffled.ds
311
- - data_token/as_fm3_omi2/finemath3-tokenized/00083_00000_doc_shuffled.ds
312
- - data_token/as_fm3_omi2/finemath3-tokenized/00084_00000_doc_shuffled.ds
313
- - data_token/as_fm3_omi2/finemath3-tokenized/00085_00000_doc_shuffled.ds
314
- - data_token/as_fm3_omi2/finemath3-tokenized/00086_00000_doc_shuffled.ds
315
- - data_token/as_fm3_omi2/finemath3-tokenized/00087_00000_doc_shuffled.ds
316
- - data_token/as_fm3_omi2/finemath3-tokenized/00088_00000_doc_shuffled.ds
317
- - data_token/as_fm3_omi2/finemath3-tokenized/00089_00000_doc_shuffled.ds
318
- - data_token/as_fm3_omi2/finemath3-tokenized/00090_00000_doc_shuffled.ds
319
- - data_token/as_fm3_omi2/finemath3-tokenized/00091_00000_doc_shuffled.ds
320
- - data_token/as_fm3_omi2/finemath3-tokenized/00092_00000_doc_shuffled.ds
321
- - data_token/as_fm3_omi2/finemath3-tokenized/00093_00000_doc_shuffled.ds
322
- - data_token/as_fm3_omi2/finemath3-tokenized/00094_00000_doc_shuffled.ds
323
- - data_token/as_fm3_omi2/finemath3-tokenized/00095_00000_doc_shuffled.ds
324
- - data_token/as_fm3_omi2/finemath3-tokenized/00096_00000_doc_shuffled.ds
325
- - data_token/as_fm3_omi2/finemath3-tokenized/00097_00000_doc_shuffled.ds
326
- - data_token/as_fm3_omi2/finemath3-tokenized/00098_00000_doc_shuffled.ds
327
- - data_token/as_fm3_omi2/finemath3-tokenized/00099_00000_doc_shuffled.ds
328
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00000_00000_doc_shuffled.ds
329
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00001_00000_doc_shuffled.ds
330
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00002_00000_doc_shuffled.ds
331
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00003_00000_doc_shuffled.ds
332
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00004_00000_doc_shuffled.ds
333
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00005_00000_doc_shuffled.ds
334
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00006_00000_doc_shuffled.ds
335
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00007_00000_doc_shuffled.ds
336
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00008_00000_doc_shuffled.ds
337
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00009_00000_doc_shuffled.ds
338
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00010_00000_doc_shuffled.ds
339
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00011_00000_doc_shuffled.ds
340
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00012_00000_doc_shuffled.ds
341
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00013_00000_doc_shuffled.ds
342
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00014_00000_doc_shuffled.ds
343
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00015_00000_doc_shuffled.ds
344
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00016_00000_doc_shuffled.ds
345
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00017_00000_doc_shuffled.ds
346
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00018_00000_doc_shuffled.ds
347
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00019_00000_doc_shuffled.ds
348
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00020_00000_doc_shuffled.ds
349
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00021_00000_doc_shuffled.ds
350
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00022_00000_doc_shuffled.ds
351
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00023_00000_doc_shuffled.ds
352
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00024_00000_doc_shuffled.ds
353
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00025_00000_doc_shuffled.ds
354
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00026_00000_doc_shuffled.ds
355
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00027_00000_doc_shuffled.ds
356
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00028_00000_doc_shuffled.ds
357
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00029_00000_doc_shuffled.ds
358
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00030_00000_doc_shuffled.ds
359
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00031_00000_doc_shuffled.ds
360
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00032_00000_doc_shuffled.ds
361
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00033_00000_doc_shuffled.ds
362
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00034_00000_doc_shuffled.ds
363
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00035_00000_doc_shuffled.ds
364
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00036_00000_doc_shuffled.ds
365
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00037_00000_doc_shuffled.ds
366
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00038_00000_doc_shuffled.ds
367
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00039_00000_doc_shuffled.ds
368
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00040_00000_doc_shuffled.ds
369
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00041_00000_doc_shuffled.ds
370
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00042_00000_doc_shuffled.ds
371
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00043_00000_doc_shuffled.ds
372
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00044_00000_doc_shuffled.ds
373
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00045_00000_doc_shuffled.ds
374
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00046_00000_doc_shuffled.ds
375
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00047_00000_doc_shuffled.ds
376
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00048_00000_doc_shuffled.ds
377
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00049_00000_doc_shuffled.ds
378
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00050_00000_doc_shuffled.ds
379
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00051_00000_doc_shuffled.ds
380
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00052_00000_doc_shuffled.ds
381
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00053_00000_doc_shuffled.ds
382
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00054_00000_doc_shuffled.ds
383
- persistent_workers: true
384
- pin_memory: true
385
- prefetch_factor: 8
386
- seed: null
387
- timeout: 0
388
- ddp:
389
- value:
390
- find_unused_params: false
391
- grad_sync_mode: batch
392
- device_eval_batch_size:
393
- value: 16
394
- device_train_batch_size:
395
- value: 512
396
- device_train_grad_accum:
397
- value: 32
398
- device_train_microbatch_size:
399
- value: 16
400
- distributed_strategy:
401
- value: fsdp
402
- dry_run:
403
- value: false
404
- early_stopping_factor:
405
- value: null
406
- epoch:
407
- value: null
408
- eval_interval:
409
- value: 5000
410
- eval_on_load:
411
- value: false
412
- eval_subset_num_batches:
413
- value: -1
414
- evaluators:
415
- value: []
416
- extra_steps_after_cancel:
417
- value: 10
418
- fast_forward_batches:
419
- value: null
420
- force_save_unsharded:
421
- value: false
422
- fsdp:
423
- value:
424
- hybrid_sharding_num_model_replicas: null
425
- precision: mixed
426
- sharding_strategy: FULL_SHARD
427
- use_orig_params: true
428
- wrapping_strategy: null
429
- fused_loss:
430
- value: null
431
- gen1_gc_interval:
432
- value: 1
433
- global_train_batch_size:
434
- value: 512
435
- hf_datasets_cache_dir:
436
- value: null
437
- load_path:
438
- value: null
439
- load_path_sharded_checkpointer:
440
- value: null
441
- max_duration:
442
- value: 1ep
443
- max_grad_norm:
444
- value: 1
445
- max_grad_norm_ratio:
446
- value: null
447
- model:
448
- value:
449
- activation_type: swiglu
450
- alibi: false
451
- alibi_bias_max: 8
452
- attention_dropout: 0
453
- attention_layer_norm: false
454
- attention_layer_norm_with_affine: false
455
- bias_for_layer_norm: false
456
- block_group_size: 1
457
- block_type: sequential
458
- clip_qkv: null
459
- d_model: 2048
460
- emb_init_std: null
461
- embedding_dropout: 0
462
- embedding_layer_norm: false
463
- embedding_size: 32000
464
- eos_token_id: 0
465
- flash_attention: false
466
- include_bias: false
467
- init_cutoff_factor: 3
468
- init_device: cuda
469
- init_fn: normal
470
- init_std: 0.02
471
- layer_norm_eps: 1e-05
472
- layer_norm_type: default
473
- layer_norm_with_affine: false
474
- max_sequence_length: 2048
475
- mlp_hidden_size: null
476
- mlp_ratio: 8
477
- multi_query_attention: false
478
- n_heads: 16
479
- n_kv_heads: null
480
- n_layers: 16
481
- norm_after: false
482
- pad_token_id: 1
483
- precision: amp_bf16
484
- residual_dropout: 0
485
- rope: true
486
- rope_full_precision: true
487
- rope_theta: 10000
488
- scale_emb_init: false
489
- scale_logits: false
490
- vocab_size: 32000
491
- weight_tying: true
492
- module_outputs_save_steps:
493
- value: null
494
- new_style_checkpoints:
495
- value: null
496
- no_pre_train_checkpoint:
497
- value: false
498
- optimizer:
499
- value:
500
- betas:
501
- - 0.9
502
- - 0.95
503
- decay_embeddings: true
504
- decay_norm_and_bias: true
505
- eps: 1e-08
506
- learning_rate: 0.0005
507
- metrics_log_interval: 10
508
- name: adamw
509
- no_decay_norm_and_bias: null
510
- record_update_metrics: false
511
- selective_updates: false
512
- weight_decay: 0.1
513
- precision:
514
- value: amp_bf16
515
- python_profiling:
516
- value: false
517
- remote_save_folder:
518
- value: null
519
- reset_optimizer_state:
520
- value: false
521
- reset_trainer_state:
522
- value: false
523
- restore_dataloader:
524
- value: true
525
- run_name:
526
- value: OLMo-1B-as_fm3_omi2
527
- save_data_indices:
528
- value: true
529
- save_folder:
530
- value: checkpoints/OLMo-1B-as_fm3_omi2
531
- save_interval:
532
- value: 3000
533
- save_interval_ephemeral:
534
- value: null
535
- save_interval_unsharded:
536
- value: 3000
537
- save_num_checkpoints_to_keep:
538
- value: -1
539
- save_num_unsharded_checkpoints_to_keep:
540
- value: -1
541
- save_overwrite:
542
- value: true
543
- scheduler:
544
- value:
545
- alpha_f: 0.1
546
- grad_clip_warmup_factor: null
547
- grad_clip_warmup_steps: null
548
- name: cosine_with_warmup
549
- t_max: null
550
- t_warmup: 2000
551
- units: steps
552
- warmup_min_lr: 0
553
- seed:
554
- value: 6198
555
- sharded_checkpointer:
556
- value: torch_legacy
557
- single:
558
- value:
559
- device: auto
560
- softmax_auxiliary_loss:
561
- value: false
562
- speed_monitor:
563
- value:
564
- gpu_flops_available: null
565
- window_size: 20
566
- stop_after:
567
- value: null
568
- stop_at:
569
- value: null
570
- time_limit:
571
- value: null
572
- tokenizer:
573
- value:
574
- identifier: meta-llama/Llama-2-7b-hf
575
- truncate_direction: right
576
- torch_profiling:
577
- value: false
578
- try_load_latest_save:
579
- value: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/files/output.log DELETED
@@ -1,78 +0,0 @@
1
- [2025-12-14 15:40:42] INFO  [olmo.data.iterable_dataset:79, rank=0] Saving global data order indices...
2
- [2025-12-14 15:40:44] INFO  [olmo.data.iterable_dataset:88, rank=0] Global data order indices saved to 'checkpoints/OLMo-1B-as_fm3_omi2/train_data/global_indices.npy'
3
- [2025-12-14 15:40:44] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/utils/data/dataloader.py:624: UserWarning: This DataLoader will create 32 worker processes in total. Our suggested max number of worker in current system is 16, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
4
- warnings.warn(
5
-
6
- [2025-12-14 15:40:44] INFO  [train:139, rank=0] Building model...
7
- [2025-12-14 15:40:44] INFO  [olmo.model:1174, rank=0] Initializing model parameters...
8
- [2025-12-14 15:40:44] INFO  [train:141, rank=0] Total number of parameters: 1,139,277,824
9
- [2025-12-14 15:40:44] INFO  [train:142, rank=0] Number of non-embedding parameters: 1,073,741,824
10
- [2025-12-14 15:40:44] INFO  [train:143, rank=0] Peak GPU Memory (MB) before fsdp: 4559
11
- [2025-12-14 15:40:44] INFO  [train:172, rank=0] Wrapping model with FSDP...
12
- [2025-12-14 15:40:44] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/_init_utils.py:444: UserWarning: FSDP is switching to use `NO_SHARD` instead of ShardingStrategy.FULL_SHARD since the world size is 1.
13
- warnings.warn(
14
-
15
- [2025-12-14 15:40:44] INFO  [olmo.model:1174, rank=0] Initializing model parameters...
16
- [2025-12-14 15:40:44] INFO  [train:232, rank=0] Peak GPU Memory (MB) after fsdp: 9116
17
- [2025-12-14 15:40:44] INFO  [train:233, rank=0] Model:
18
- [2025-12-14 15:40:44] INFO  [train:234, rank=0] FullyShardedDataParallel(
19
- (_fsdp_wrapped_module): OLMo(
20
- (transformer): ModuleDict(
21
- (wte): Embedding(32000, 2048)
22
- (emb_drop): Dropout(p=0.0, inplace=False)
23
- (ln_f): LayerNorm()
24
- (blocks): ModuleList(
25
- (0-15): 16 x OLMoSequentialBlock(
26
- (dropout): Dropout(p=0.0, inplace=False)
27
- (act): SwiGLU()
28
- (attn_out): Linear(in_features=2048, out_features=2048, bias=False)
29
- (ff_out): Linear(in_features=8192, out_features=2048, bias=False)
30
- (rotary_emb): RotaryEmbedding()
31
- (att_proj): Linear(in_features=2048, out_features=6144, bias=False)
32
- (ff_proj): Linear(in_features=2048, out_features=16384, bias=False)
33
- (attn_norm): LayerNorm()
34
- (ff_norm): LayerNorm()
35
- )
36
- )
37
- )
38
- )
39
- )
40
- [2025-12-14 15:40:44] INFO  [olmo.optim:944, rank=0] Constructing optimizer with 1 param groups
41
- [2025-12-14 15:40:44] INFO  [train:335, rank=0] Saving pre-train checkpoint...
42
- [2025-12-14 15:40:44] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:690: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
43
- warnings.warn(
44
-
45
- [2025-12-14 15:40:44] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py:773: UserWarning: When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict willbe returned.
46
- warnings.warn(
47
-
48
- [2025-12-14 15:40:44] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py:711: UserWarning: When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict willbe returned.
49
- warnings.warn(
50
-
51
- [2025-12-14 15:40:51] INFO  [olmo.checkpoint:607, rank=0] Saving config...
52
- [2025-12-14 15:40:51] INFO  [train:337, rank=0] Checkpoint saved to checkpoints/OLMo-1B-as_fm3_omi2/step0
53
- [2025-12-14 15:40:51] INFO  [train:340, rank=0] Attempting to load pre-train checkpoint...
54
- [2025-12-14 15:40:55] INFO  [olmo.checkpoint:1040, rank=0] Loading model state...
55
- [2025-12-14 15:40:55] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py:827: UserWarning: When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict willbe returned.
56
- warnings.warn(
57
-
58
- [2025-12-14 15:40:56] WARNING  [py.warnings:110, rank=0] /opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py:864: UserWarning: When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict willbe returned.
59
- warnings.warn(
60
-
61
- [2025-12-14 15:40:56] INFO  [olmo.checkpoint:1044, rank=0] Loading optimizer state...
62
- [2025-12-14 15:40:56] INFO  [olmo.checkpoint:220, rank=0] Flattening sharded optimizer state...
63
- [2025-12-14 15:40:56] INFO  [olmo.checkpoint:234, rank=0] Loading flattened optimizer state...
64
- [2025-12-14 15:40:56] INFO  [olmo.train:409, rank=0] Resetting learning rate...
65
- [2025-12-14 15:40:56] INFO  [olmo.train:421, rank=0] Restoring RNG states...
66
- [2025-12-14 15:40:56] INFO  [train:344, rank=0] Checkpoint successfully loaded
67
- [2025-12-14 15:40:56] INFO  [train:375, rank=0] Starting training...
68
- [2025-12-14 15:40:56] INFO  [olmo.train:979, rank=0] Pre-train system metrics
69
- System/Peak GPU Memory (MB)=9,116
70
- [2025-12-14 15:41:54] INFO  [olmo.train:979, rank=0] [step=1/59321,epoch=0]
71
- optim/total_grad_norm=9.088
72
- train/CrossEntropyLoss=10.76
73
- train/Perplexity=47,163
74
- throughput/total_tokens=1,048,576
75
- throughput/total_training_Gflops=7,874,702
76
- throughput/total_training_log_Gflops=15.88
77
- System/Peak GPU Memory (MB)=80,250
78
- wandb: WARNING The `quiet` argument to `wandb.run.finish()` is deprecated, use `wandb.Settings(quiet=...)` to set this instead.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/files/requirements.txt DELETED
@@ -1,271 +0,0 @@
1
- scikit-learn==1.8.0
2
- joblib==1.5.2
3
- threadpoolctl==3.6.0
4
- torchmetrics==1.8.2
5
- lightning-utilities==0.15.2
6
- wandb==0.23.1
7
- GitPython==3.1.45
8
- gitdb==4.0.12
9
- smmap==5.0.2
10
- datasets==4.4.1
11
- pandas==2.3.3
12
- multiprocess==0.70.18
13
- pyarrow==22.0.0
14
- tzdata==2025.3
15
- xxhash==3.6.0
16
- ai2-olmo==0.6.0
17
- ai2-olmo-core==2.4.0
18
- cached_path==1.8.0
19
- google-cloud-storage==2.19.0
20
- google-cloud-core==2.5.0
21
- boto3==1.42.9
22
- google-api-core==2.28.1
23
- google-auth==2.43.0
24
- s3transfer==0.16.0
25
- botocore==1.42.9
26
- google-resumable-media==2.8.0
27
- pyasn1_modules==0.4.2
28
- rich==13.9.4
29
- rsa==4.9.1
30
- bettermap==1.3.1
31
- google-crc32c==1.7.1
32
- jmespath==1.0.1
33
- numpy==1.26.4
34
- omegaconf==2.3.0
35
- proto-plus==1.26.1
36
- pyasn1==0.6.1
37
- python-dateutil==2.9.0.post0
38
- antlr4-python3-runtime==4.9.3
39
- zstandard==0.23.0
40
- zipp==3.21.0
41
- yarl==1.22.0
42
- xgrammar==0.1.18
43
- xformers==0.0.29.post2
44
- wrapt==2.0.1
45
- wheel==0.45.1
46
- websockets==15.0.1
47
- wcwidth==0.2.13
48
- watchfiles==1.1.1
49
- vllm==0.8.5.post1
50
- uvloop==0.22.1
51
- uvicorn==0.38.0
52
- urllib3==2.3.0
53
- typing-inspection==0.4.2
54
- typing_extensions==4.15.0
55
- types-dataclasses==0.6.6
56
- typer==0.20.0
57
- truststore==0.10.0
58
- triton==3.2.0
59
- transformers==4.57.3
60
- traitlets==5.14.3
61
- tqdm==4.67.1
62
- torchvision==0.21.0+cu124
63
- torchelastic==0.2.2
64
- torchaudio==2.6.0+cu124
65
- torch==2.6.0+cu124
66
- tokenizers==0.22.1
67
- tiktoken==0.12.0
68
- sympy==1.13.1
69
- starlette==0.50.0
70
- stack_data==0.6.3
71
- soupsieve==2.5
72
- sortedcontainers==2.4.0
73
- sniffio==1.3.1
74
- six==1.17.0
75
- shellingham==1.5.4
76
- setuptools==75.8.0
77
- sentry-sdk==2.47.0
78
- sentencepiece==0.2.1
79
- scipy==1.16.3
80
- safetensors==0.7.0
81
- ruamel.yaml.clib==0.2.8
82
- ruamel.yaml==0.18.10
83
- rpds-py==0.22.3
84
- rignore==0.7.6
85
- rich-toolkit==0.17.0
86
- requests==2.32.3
87
- regex==2025.11.3
88
- referencing==0.36.2
89
- ray==2.52.1
90
- pyzmq==27.1.0
91
- pytz==2024.2
92
- python-multipart==0.0.20
93
- python-json-logger==4.0.0
94
- python-etcd==0.4.5
95
- python-dotenv==1.2.1
96
- Pygments==2.19.1
97
- pydantic-extra-types==2.10.6
98
- pydantic_core==2.41.5
99
- pydantic==2.12.5
100
- pycparser==2.22
101
- pycountry==24.6.1
102
- pycosat==0.6.6
103
- py-cpuinfo==9.0.0
104
- pure_eval==0.2.3
105
- ptyprocess==0.7.0
106
- psutil==6.1.1
107
- protobuf==4.25.8
108
- propcache==0.4.1
109
- prompt_toolkit==3.0.50
110
- prometheus-fastapi-instrumentator==7.1.0
111
- prometheus_client==0.23.1
112
- pluggy==1.5.0
113
- platformdirs==4.3.6
114
- pkgutil_resolve_name==1.3.10
115
- pkginfo==1.12.0
116
- pip==24.3.1
117
- pillow==11.0.0
118
- pickleshare==0.7.5
119
- pexpect==4.9.0
120
- partial-json-parser==0.2.1.1.post7
121
- parso==0.8.4
122
- packaging==24.2
123
- outlines_core==0.1.26
124
- outlines==0.1.11
125
- optree==0.14.0
126
- opentelemetry-semantic-conventions-ai==0.4.13
127
- opentelemetry-semantic-conventions==0.47b0
128
- opentelemetry-sdk==1.26.0
129
- opentelemetry-proto==1.26.0
130
- opentelemetry-exporter-otlp-proto-http==1.26.0
131
- opentelemetry-exporter-otlp-proto-grpc==1.26.0
132
- opentelemetry-exporter-otlp-proto-common==1.26.0
133
- opentelemetry-exporter-otlp==1.26.0
134
- opentelemetry-api==1.26.0
135
- opencv-python-headless==4.12.0.88
136
- openai==2.11.0
137
- nvidia-nvtx-cu12==12.4.127
138
- nvidia-nvjitlink-cu12==12.4.127
139
- nvidia-nccl-cu12==2.21.5
140
- nvidia-cusparselt-cu12==0.6.2
141
- nvidia-cusparse-cu12==12.3.1.170
142
- nvidia-cusolver-cu12==11.6.1.9
143
- nvidia-curand-cu12==10.3.5.147
144
- nvidia-cufft-cu12==11.2.1.3
145
- nvidia-cudnn-cu12==9.1.0.70
146
- nvidia-cuda-runtime-cu12==12.4.127
147
- nvidia-cuda-nvrtc-cu12==12.4.127
148
- nvidia-cuda-cupti-cu12==12.4.127
149
- nvidia-cublas-cu12==12.4.5.8
150
- numba==0.61.2
151
- ninja==1.11.1.3
152
- networkx==3.4.2
153
- nest-asyncio==1.6.0
154
- multidict==6.7.0
155
- msgspec==0.20.0
156
- msgpack==1.1.2
157
- mpmath==1.3.0
158
- more-itertools==10.6.0
159
- mistral_common==1.8.6
160
- menuinst==2.2.0
161
- mdurl==0.1.2
162
- matplotlib-inline==0.1.7
163
- math-verify==0.8.0
164
- markdown-it-py==4.0.0
165
- lm-format-enforcer==0.10.12
166
- llvmlite==0.44.0
167
- llguidance==0.7.30
168
- lintrunner==0.12.7
169
- lief==0.14.1
170
- libmambapy==2.0.5
171
- libarchive-c==5.1
172
- latex2sympy2_extended==1.10.2
173
- lark==1.2.2
174
- jsonschema-specifications==2024.10.1
175
- jsonschema==4.23.0
176
- jsonpointer==3.0.0
177
- jsonpatch==1.33
178
- jiter==0.12.0
179
- Jinja2==3.1.5
180
- jedi==0.19.2
181
- ipython==8.31.0
182
- interegular==0.3.3
183
- importlib_resources==6.5.2
184
- importlib_metadata==8.0.0
185
- idna==3.10
186
- hypothesis==6.124.7
187
- hyperframe==6.0.1
188
- huggingface-hub==0.36.0
189
- httpx==0.28.1
190
- httptools==0.7.1
191
- httpcore==1.0.9
192
- hpack==4.0.0
193
- hf-xet==1.2.0
194
- h2==4.1.0
195
- h11==0.16.0
196
- grpcio==1.76.0
197
- googleapis-common-protos==1.72.0
198
- gguf==0.17.1
199
- fsspec==2024.12.0
200
- frozenlist==1.8.0
201
- frozendict==2.4.6
202
- filelock==3.17.0
203
- fastrlock==0.8.3
204
- fastar==0.8.0
205
- fastapi-cloud-cli==0.6.0
206
- fastapi-cli==0.0.16
207
- fastapi==0.124.4
208
- expecttest==0.3.0
209
- executing==2.1.0
210
- exceptiongroup==1.2.2
211
- email-validator==2.3.0
212
- einops==0.8.1
213
- dnspython==2.7.0
214
- distro==1.9.0
215
- diskcache==5.6.3
216
- dill==0.4.0
217
- depyf==0.18.0
218
- Deprecated==1.3.1
219
- decorator==5.1.1
220
- cupy-cuda12x==13.6.0
221
- conda_package_streaming==0.11.0
222
- conda-package-handling==2.4.0
223
- conda-libmamba-solver==25.1.1
224
- conda_index==0.5.0
225
- conda-build==25.1.1
226
- conda==25.1.0
227
- compressed-tensors==0.9.3
228
- colorama==0.4.6
229
- cmake==3.31.4
230
- cloudpickle==3.1.2
231
- click==8.1.8
232
- charset-normalizer==3.4.1
233
- chardet==5.2.0
234
- cffi==1.17.1
235
- certifi==2024.12.14
236
- cachetools==6.2.3
237
- boltons==24.0.0
238
- blake3==1.0.8
239
- beautifulsoup4==4.12.3
240
- attrs==25.1.0
241
- astunparse==1.6.3
242
- asttokens==3.0.0
243
- astor==0.8.1
244
- archspec==0.2.5
245
- anyio==4.12.0
246
- annotated-types==0.7.0
247
- annotated-doc==0.0.4
248
- airportsdata==20250909
249
- aiosignal==1.4.0
250
- aiohttp==3.13.2
251
- aiohappyeyeballs==2.6.1
252
- PyYAML==6.0.2
253
- PySocks==1.7.1
254
- MarkupSafe==3.0.2
255
- Brotli==1.1.0
256
- zipp==3.19.2
257
- wheel==0.43.0
258
- typing_extensions==4.12.2
259
- typeguard==4.3.0
260
- tomli==2.0.1
261
- platformdirs==4.2.2
262
- packaging==24.2
263
- more-itertools==10.3.0
264
- jaraco.text==3.12.1
265
- jaraco.functools==4.0.1
266
- jaraco.context==5.3.0
267
- jaraco.collections==5.1.0
268
- inflect==7.3.1
269
- importlib_metadata==8.0.0
270
- backports.tarfile==1.2.0
271
- autocommand==2.2.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/files/wandb-metadata.json DELETED
@@ -1,103 +0,0 @@
1
- {
2
- "os": "Linux-6.8.0-1043-nvidia-x86_64-with-glibc2.35",
3
- "python": "CPython 3.11.11",
4
- "startedAt": "2025-12-14T14:40:39.859989Z",
5
- "args": [
6
- "pretraining/configs/RL-1B.yaml"
7
- ],
8
- "program": "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py",
9
- "codePath": "OLMo/scripts/train.py",
10
- "codePathLocal": "OLMo/scripts/train.py",
11
- "email": "[email protected]",
12
- "root": "checkpoints/OLMo-1B-as_fm3_omi2/wandb",
13
- "host": "serv-3342",
14
- "executable": "/opt/conda/bin/python",
15
- "cpu_count": 112,
16
- "cpu_count_logical": 224,
17
- "gpu": "NVIDIA H100 80GB HBM3",
18
- "gpu_count": 1,
19
- "disk": {
20
- "/": {
21
- "total": "2055141851136",
22
- "used": "49256366080"
23
- }
24
- },
25
- "memory": {
26
- "total": "2164176814080"
27
- },
28
- "gpu_nvidia": [
29
- {
30
- "name": "NVIDIA H100 80GB HBM3",
31
- "memoryTotal": "85520809984",
32
- "cudaCores": 16896,
33
- "architecture": "Hopper",
34
- "uuid": "GPU-4c999b2a-2578-9e62-0539-4b826d85fda8"
35
- }
36
- ],
37
- "cudaVersion": "13.0",
38
- "slurm": {
39
- "cluster_name": "pegasus",
40
- "conf": "/etc/slurm/slurm.conf",
41
- "cpu_bind": "quiet,mask_cpu:0x000000000001FE00000000000000000000000001FE00000000000000",
42
- "cpu_bind_list": "0x000000000001FE00000000000000000000000001FE00000000000000",
43
- "cpu_bind_type": "mask_cpu:",
44
- "cpu_bind_verbose": "quiet",
45
- "cpus_on_node": "16",
46
- "cpus_per_task": "16",
47
- "distribution": "cyclic",
48
- "gpus": "1",
49
- "gpus_on_node": "1",
50
- "gtids": "0",
51
- "job_cpus_per_node": "16",
52
- "job_end_time": "1765734524",
53
- "job_gid": "8000",
54
- "job_group": "iml",
55
- "job_id": "2383756",
56
- "job_name": "bash",
57
- "job_nodelist": "serv-3342",
58
- "job_num_nodes": "1",
59
- "job_partition": "H100",
60
- "job_qos": "normal",
61
- "job_start_time": "1765720124",
62
- "job_uid": "13262",
63
- "job_user": "nguyen",
64
- "jobid": "2383756",
65
- "launch_node_ipaddr": "192.168.33.114",
66
- "localid": "0",
67
- "mem_per_cpu": "16384",
68
- "mpi_type": "pmix",
69
- "nnodes": "1",
70
- "nodeid": "0",
71
- "nodelist": "serv-3342",
72
- "nprocs": "1",
73
- "ntasks": "1",
74
- "oom_kill_step": "0",
75
- "pmix_mapping_serv": "(vector,(0,1,1))",
76
- "pmixp_abort_agent_port": "33735",
77
- "prio_process": "1",
78
- "procid": "0",
79
- "pty_port": "45219",
80
- "pty_win_col": "156",
81
- "pty_win_row": "41",
82
- "srun_comm_host": "192.168.33.114",
83
- "srun_comm_port": "35153",
84
- "step_gpus": "5",
85
- "step_id": "0",
86
- "step_launcher_port": "35153",
87
- "step_nodelist": "serv-3342",
88
- "step_num_nodes": "1",
89
- "step_num_tasks": "1",
90
- "step_tasks_per_node": "1",
91
- "stepid": "0",
92
- "submit_dir": "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain",
93
- "submit_host": "login1",
94
- "task_pid": "3684902",
95
- "tasks_per_node": "1",
96
- "topology_addr": "serv-3342",
97
- "topology_addr_pattern": "node",
98
- "tres_bind": "gres/gpu:per_task:1",
99
- "tres_per_task": "cpu=16,gres/gpu=1",
100
- "umask": "0022"
101
- },
102
- "writerId": "hif91sfokkg4a3putu9qm5wzemlxp65c"
103
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"optim/grad/transformer.blocks.6.attn_out.weight.norm":0.5236513018608093,"optim/grad/transformer.blocks.1.attn_out.weight.norm":1.62814199924469,"_step":1,"optim/grad/transformer.blocks.10.ff_out.weight.norm":0.31942349672317505,"optim/grad/transformer.blocks.8.ff_out.weight.norm":0.3655010461807251,"optim/grad/transformer.blocks.0.attn_out.weight.norm":3.5133237838745117,"_runtime":73.879944699,"optim/grad/transformer.blocks.10.att_proj.weight.norm":0.3661356270313263,"optim/grad/transformer.blocks.5.ff_out.weight.norm":0.5184721946716309,"optim/grad/transformer.blocks.11.ff_out.weight.norm":0.29604294896125793,"optim/grad/transformer.blocks.5.att_proj.weight.norm":0.5883283019065857,"optim/grad/transformer.blocks.2.attn_out.weight.norm":1.118513584136963,"optim/grad/transformer.wte.weight.norm":3.908214569091797,"optim/grad/transformer.blocks.11.ff_proj.weight.norm":0.41096439957618713,"optim/grad/transformer.blocks.2.att_proj.weight.norm":1.1237092018127441,"optim/grad/transformer.blocks.6.att_proj.weight.norm":0.5209723114967346,"optim/grad/transformer.blocks.6.ff_proj.weight.norm":0.6300004720687866,"optim/grad/transformer.blocks.4.ff_out.weight.norm":0.5870352983474731,"optim/grad/transformer.blocks.13.ff_proj.weight.norm":0.37113988399505615,"throughput/total_tokens":1048576,"optim/total_grad_norm":9.087934494018555,"optim/grad/transformer.blocks.0.ff_out.weight.norm":2.177250623703003,"optim/grad/transformer.blocks.5.attn_out.weight.norm":0.6065305471420288,"optim/grad/transformer.blocks.3.ff_out.weight.norm":0.7264525294303894,"System/Peak GPU Memory (MB)":80250.1953125,"optim/grad/transformer.blocks.12.attn_out.weight.norm":0.3190648555755615,"optim/grad/transformer.blocks.1.att_proj.weight.norm":1.6082570552825928,"optim/grad/transformer.blocks.14.att_proj.weight.norm":0.28815507888793945,"optim/grad/transformer.blocks.15.attn_out.weight.norm":0.27553632855415344,"_wandb":{"runtime":73},"optim/grad/transformer.blocks.11.attn_out.weight.norm":0.3472404479980469,"optim/grad/transformer.blocks.10.ff_proj.weight.norm":0.45583638548851013,"optim/grad/transformer.blocks.14.attn_out.weight.norm":0.2960440516471863,"optim/grad/transformer.blocks.8.ff_proj.weight.norm":0.5061823129653931,"optim/grad/transformer.blocks.11.att_proj.weight.norm":0.33966541290283203,"optim/grad/transformer.blocks.15.att_proj.weight.norm":0.2798300087451935,"throughput/total_training_Gflops":7.874702278131712e+06,"optim/grad/transformer.blocks.7.attn_out.weight.norm":0.47163599729537964,"optim/grad/transformer.blocks.0.att_proj.weight.norm":3.6439361572265625,"optim/grad/transformer.blocks.8.attn_out.weight.norm":0.4244263768196106,"optim/grad/transformer.blocks.9.attn_out.weight.norm":0.3973117172718048,"optim/grad/transformer.blocks.14.ff_proj.weight.norm":0.3621259927749634,"train/Perplexity":47163.62832898086,"optim/grad/transformer.blocks.12.ff_proj.weight.norm":0.3901353180408478,"optim/grad/transformer.blocks.13.attn_out.weight.norm":0.3037296533584595,"optim/grad/transformer.blocks.1.ff_out.weight.norm":1.2330187559127808,"optim/grad/transformer.blocks.2.ff_out.weight.norm":0.8915591835975647,"optim/grad/transformer.blocks.3.attn_out.weight.norm":0.8428605794906616,"throughput/total_training_log_Gflops":15.879165936009779,"optim/grad/transformer.blocks.15.ff_proj.weight.norm":0.33649954199790955,"optim/grad/transformer.blocks.15.ff_out.weight.norm":0.243862122297287,"optim/grad/transformer.blocks.1.ff_proj.weight.norm":1.740834355354309,"optim/grad/transformer.blocks.12.ff_out.weight.norm":0.27772974967956543,"optim/grad/transformer.blocks.4.att_proj.weight.norm":0.7019151449203491,"train/CrossEntropyLoss":10.761378288269043,"optim/grad/transformer.blocks.8.att_proj.weight.norm":0.4309239089488983,"optim/learning_rate_group0":2.5e-07,"optim/grad/transformer.blocks.10.attn_out.weight.norm":0.3706686198711395,"optim/grad/transformer.blocks.12.att_proj.weight.norm":0.32427260279655457,"optim/grad/transformer.blocks.5.ff_proj.weight.norm":0.7240243554115295,"optim/grad/transformer.blocks.2.ff_proj.weight.norm":1.256575584411621,"optim/grad/transformer.blocks.13.att_proj.weight.norm":0.30371585488319397,"optim/grad/transformer.blocks.3.att_proj.weight.norm":0.8451918363571167,"_timestamp":1.7657233141800284e+09,"optim/grad/transformer.blocks.14.ff_out.weight.norm":0.25873398780822754,"optim/grad/transformer.blocks.0.ff_proj.weight.norm":3.110273599624634,"optim/grad/transformer.blocks.4.ff_proj.weight.norm":0.8388200998306274,"optim/grad/transformer.blocks.7.ff_proj.weight.norm":0.5779657363891602,"optim/grad/transformer.blocks.7.ff_out.weight.norm":0.40811124444007874,"optim/grad/transformer.blocks.7.att_proj.weight.norm":0.47150084376335144,"optim/grad/transformer.blocks.13.ff_out.weight.norm":0.2664201855659485,"optim/grad/transformer.blocks.9.att_proj.weight.norm":0.3878222703933716,"optim/grad/transformer.blocks.3.ff_proj.weight.norm":1.0055341720581055,"optim/grad/transformer.blocks.9.ff_proj.weight.norm":0.47650912404060364,"optim/grad/transformer.blocks.4.attn_out.weight.norm":0.7104970216751099,"optim/grad/transformer.blocks.6.ff_out.weight.norm":0.4432903230190277,"optim/grad/transformer.blocks.9.ff_out.weight.norm":0.3376409411430359}
 
 
models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/logs/debug-internal.log DELETED
@@ -1,12 +0,0 @@
1
- {"time":"2025-12-14T15:40:40.168131806+01:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
- {"time":"2025-12-14T15:40:40.47528161+01:00","level":"INFO","msg":"stream: created new stream","id":"z9xp3525"}
3
- {"time":"2025-12-14T15:40:40.476452249+01:00","level":"INFO","msg":"handler: started","stream_id":"z9xp3525"}
4
- {"time":"2025-12-14T15:40:40.478203079+01:00","level":"INFO","msg":"stream: started","id":"z9xp3525"}
5
- {"time":"2025-12-14T15:40:40.47832097+01:00","level":"INFO","msg":"writer: started","stream_id":"z9xp3525"}
6
- {"time":"2025-12-14T15:40:40.478347155+01:00","level":"INFO","msg":"sender: started","stream_id":"z9xp3525"}
7
- {"time":"2025-12-14T15:41:55.381052285+01:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
8
- {"time":"2025-12-14T15:41:55.860048848+01:00","level":"INFO","msg":"handler: operation stats","stats":{}}
9
- {"time":"2025-12-14T15:41:55.865491898+01:00","level":"INFO","msg":"stream: closing","id":"z9xp3525"}
10
- {"time":"2025-12-14T15:41:55.866017166+01:00","level":"INFO","msg":"handler: closed","stream_id":"z9xp3525"}
11
- {"time":"2025-12-14T15:41:55.9059486+01:00","level":"INFO","msg":"sender: closed","stream_id":"z9xp3525"}
12
- {"time":"2025-12-14T15:41:55.906959791+01:00","level":"INFO","msg":"stream: closed","id":"z9xp3525"}
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/logs/debug.log DELETED
@@ -1,26 +0,0 @@
1
- 2025-12-14 15:40:39,906 INFO MainThread:3750944 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
- 2025-12-14 15:40:39,907 INFO MainThread:3750944 [wandb_setup.py:_flush():80] Configure stats pid to 3750944
3
- 2025-12-14 15:40:39,908 INFO MainThread:3750944 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
- 2025-12-14 15:40:39,909 INFO MainThread:3750944 [wandb_setup.py:_flush():80] Loading settings from /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/wandb/settings
5
- 2025-12-14 15:40:39,910 INFO MainThread:3750944 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
- 2025-12-14 15:40:39,911 INFO MainThread:3750944 [wandb_init.py:setup_run_log_directory():714] Logging user logs to checkpoints/OLMo-1B-as_fm3_omi2/wandb/wandb/run-20251214_154039-z9xp3525/logs/debug.log
7
- 2025-12-14 15:40:39,911 INFO MainThread:3750944 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to checkpoints/OLMo-1B-as_fm3_omi2/wandb/wandb/run-20251214_154039-z9xp3525/logs/debug-internal.log
8
- 2025-12-14 15:40:39,913 INFO MainThread:3750944 [wandb_init.py:init():841] calling init triggers
9
- 2025-12-14 15:40:39,914 INFO MainThread:3750944 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
- config: {'run_name': 'OLMo-1B-as_fm3_omi2', 'seed': 6198, 'epoch': None, 'dry_run': False, 'model': {'d_model': 2048, 'n_heads': 16, 'n_kv_heads': None, 'clip_qkv': None, 'n_layers': 16, 'mlp_ratio': 8, 'mlp_hidden_size': None, 'activation_type': 'swiglu', 'block_type': 'sequential', 'block_group_size': 1, 'alibi': False, 'alibi_bias_max': 8.0, 'rope': True, 'rope_full_precision': True, 'rope_theta': 10000, 'flash_attention': False, 'attention_dropout': 0.0, 'multi_query_attention': False, 'attention_layer_norm': False, 'residual_dropout': 0.0, 'embedding_dropout': 0.0, 'embedding_layer_norm': False, 'layer_norm_type': 'default', 'layer_norm_with_affine': False, 'layer_norm_eps': 1e-05, 'attention_layer_norm_with_affine': False, 'max_sequence_length': 2048, 'include_bias': False, 'bias_for_layer_norm': False, 'scale_logits': False, 'vocab_size': 32000, 'embedding_size': 32000, 'weight_tying': True, 'eos_token_id': 0, 'pad_token_id': 1, 'init_device': 'cuda', 'init_fn': 'normal', 'init_std': 0.02, 'init_cutoff_factor': 3.0, 'precision': 'amp_bf16', 'scale_emb_init': False, 'emb_init_std': None, 'norm_after': False}, 'optimizer': {'name': 'adamw', 'learning_rate': 0.0005, 'weight_decay': 0.1, 'betas': (0.9, 0.95), 'eps': 1e-08, 'no_decay_norm_and_bias': None, 'selective_updates': False, 'decay_norm_and_bias': True, 'decay_embeddings': True, 'metrics_log_interval': 10, 'record_update_metrics': False}, 'scheduler': {'name': 'cosine_with_warmup', 'units': 'steps', 't_warmup': 2000, 't_max': None, 'alpha_f': 0.1, 'grad_clip_warmup_steps': None, 'grad_clip_warmup_factor': None, 'warmup_min_lr': 0.0}, 'data': {'paths': ['data_token/as_fm3_omi2/algebraic-stack-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00055_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00056_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00057_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00058_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00059_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00060_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00061_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00062_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00063_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00064_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00065_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00066_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00067_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00068_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00069_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00070_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00071_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00072_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00073_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00074_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00075_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00076_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00077_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00078_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00017_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00018_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00055_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00056_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00057_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00058_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00059_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00060_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00061_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00062_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00063_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00064_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00065_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00066_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00067_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00068_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00069_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00070_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00071_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00072_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00073_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00074_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00075_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00076_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00077_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00078_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00079_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00080_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00081_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00082_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00083_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00084_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00085_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00086_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00087_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00088_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00089_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00090_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00091_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00092_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00093_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00094_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00095_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00096_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00097_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00098_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00099_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00017_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00018_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00054_00000_doc_shuffled.ds'], 'memmap_dtype': 'uint16', 'datasets': None, 'label_mask_paths': None, 'pad_direction': 'right', 'generate_attention_mask': False, 'generate_doc_lengths': False, 'num_workers': 32, 'drop_last': True, 'pin_memory': True, 'prefetch_factor': 8, 'persistent_workers': True, 'timeout': 0, 'seed': None, 'instance_filter': None, 'custom_dataset': None}, 'restore_dataloader': True, 'fast_forward_batches': None, 'evaluators': [], 'eval_interval': 5000, 'tokenizer': {'identifier': 'meta-llama/Llama-2-7b-hf', 'truncate_direction': 'right'}, 'save_folder': 'checkpoints/OLMo-1B-as_fm3_omi2', 'remote_save_folder': None, 'canceled_check_interval': 6000, 'save_interval': 3000, 'save_interval_unsharded': 3000, 'save_interval_ephemeral': None, 'save_num_checkpoints_to_keep': -1, 'save_num_unsharded_checkpoints_to_keep': -1, 'save_overwrite': True, 'force_save_unsharded': False, 'no_pre_train_checkpoint': False, 'load_path': None, 'load_path_sharded_checkpointer': None, 'try_load_latest_save': False, 'reset_optimizer_state': False, 'reset_trainer_state': False, 'sharded_checkpointer': 'torch_legacy', 'new_style_checkpoints': None, 'max_duration': '1ep', 'global_train_batch_size': 512, 'device_train_batch_size': 512, 'device_train_microbatch_size': 16, 'device_eval_batch_size': 16, 'eval_subset_num_batches': -1, 'eval_on_load': False, 'device_train_grad_accum': 32, 'max_grad_norm': 1.0, 'max_grad_norm_ratio': None, 'precision': 'amp_bf16', 'speed_monitor': {'window_size': 20, 'gpu_flops_available': None}, 'console_log_interval': 1, 'gen1_gc_interval': 1, 'compile': None, 'distributed_strategy': 'fsdp', 'fsdp': {'use_orig_params': True, 'sharding_strategy': <ShardingStrategy.FULL_SHARD: 1>, 'wrapping_strategy': None, 'precision': 'mixed', 'hybrid_sharding_num_model_replicas': None}, 'ddp': {'grad_sync_mode': 'batch', 'find_unused_params': False}, 'single': {'device': 'auto'}, 'softmax_auxiliary_loss': False, 'auxiliary_loss_multiplier': 0.0001, 'time_limit': None, 'extra_steps_after_cancel': 10, 'early_stopping_factor': None, 'save_data_indices': True, 'python_profiling': False, 'torch_profiling': False, 'stop_at': None, 'stop_after': None, 'activation_checkpointing': None, 'fused_loss': None, 'hf_datasets_cache_dir': None, 'module_outputs_save_steps': None, '_wandb': {}}
11
- 2025-12-14 15:40:39,915 INFO MainThread:3750944 [wandb_init.py:init():889] starting backend
12
- 2025-12-14 15:40:40,160 INFO MainThread:3750944 [wandb_init.py:init():892] sending inform_init request
13
- 2025-12-14 15:40:40,166 INFO MainThread:3750944 [wandb_init.py:init():900] backend started and connected
14
- 2025-12-14 15:40:40,169 INFO MainThread:3750944 [wandb_init.py:init():970] updated telemetry
15
- 2025-12-14 15:40:40,171 INFO MainThread:3750944 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
- 2025-12-14 15:40:40,795 INFO MainThread:3750944 [wandb_init.py:init():1041] starting run threads in backend
17
- 2025-12-14 15:40:40,887 INFO MainThread:3750944 [wandb_run.py:_console_start():2521] atexit reg
18
- 2025-12-14 15:40:40,888 INFO MainThread:3750944 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
- 2025-12-14 15:40:40,889 INFO MainThread:3750944 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
- 2025-12-14 15:40:40,890 INFO MainThread:3750944 [wandb_run.py:_redirect():2461] Redirects installed.
21
- 2025-12-14 15:40:40,892 INFO MainThread:3750944 [wandb_init.py:init():1081] run started, returning control to user process
22
- 2025-12-14 15:41:54,674 INFO MainThread:3750944 [wandb_run.py:_finish():2287] finishing run marksmans/olmo-debug/z9xp3525
23
- 2025-12-14 15:41:54,675 INFO MainThread:3750944 [wandb_run.py:_atexit_cleanup():2486] got exitcode: 1
24
- 2025-12-14 15:41:54,676 INFO MainThread:3750944 [wandb_run.py:_restore():2468] restore
25
- 2025-12-14 15:41:54,676 INFO MainThread:3750944 [wandb_run.py:_restore():2474] restore done
26
- 2025-12-14 15:41:55,863 INFO MainThread:3750944 [wandb_run.py:_footer_sync_info():3862] logging synced files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_154039-z9xp3525/run-z9xp3525.wandb DELETED
Binary file (49.7 kB)
 
models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/files/config.yaml DELETED
@@ -1,575 +0,0 @@
1
- _wandb:
2
- value:
3
- cli_version: 0.23.1
4
- e:
5
- 61ybci2w6pjjhx1rssgrci0sff4exdn7:
6
- args:
7
- - pretraining/configs/RL-1B.yaml
8
- codePath: OLMo/scripts/train.py
9
- codePathLocal: OLMo/scripts/train.py
10
- cpu_count: 112
11
- cpu_count_logical: 224
12
- cudaVersion: "13.0"
13
- disk:
14
- /:
15
- total: "2055141851136"
16
- used: "49256501248"
17
18
- executable: /opt/conda/bin/python
19
- gpu: NVIDIA H100 80GB HBM3
20
- gpu_count: 1
21
- gpu_nvidia:
22
- - architecture: Hopper
23
- cudaCores: 16896
24
- memoryTotal: "85520809984"
25
- name: NVIDIA H100 80GB HBM3
26
- uuid: GPU-4c999b2a-2578-9e62-0539-4b826d85fda8
27
- host: serv-3342
28
- memory:
29
- total: "2164176814080"
30
- os: Linux-6.8.0-1043-nvidia-x86_64-with-glibc2.35
31
- program: /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py
32
- python: CPython 3.11.11
33
- root: checkpoints/OLMo-1B-as_fm3_omi2/wandb
34
- slurm:
35
- cluster_name: pegasus
36
- conf: /etc/slurm/slurm.conf
37
- cpu_bind: quiet,mask_cpu:0x000000000001FE00000000000000000000000001FE00000000000000
38
- cpu_bind_list: 0x000000000001FE00000000000000000000000001FE00000000000000
39
- cpu_bind_type: 'mask_cpu:'
40
- cpu_bind_verbose: quiet
41
- cpus_on_node: "16"
42
- cpus_per_task: "16"
43
- distribution: cyclic
44
- gpus: "1"
45
- gpus_on_node: "1"
46
- gtids: "0"
47
- job_cpus_per_node: "16"
48
- job_end_time: "1765734524"
49
- job_gid: "8000"
50
- job_group: iml
51
- job_id: "2383756"
52
- job_name: bash
53
- job_nodelist: serv-3342
54
- job_num_nodes: "1"
55
- job_partition: H100
56
- job_qos: normal
57
- job_start_time: "1765720124"
58
- job_uid: "13262"
59
- job_user: nguyen
60
- jobid: "2383756"
61
- launch_node_ipaddr: 192.168.33.114
62
- localid: "0"
63
- mem_per_cpu: "16384"
64
- mpi_type: pmix
65
- nnodes: "1"
66
- nodeid: "0"
67
- nodelist: serv-3342
68
- nprocs: "1"
69
- ntasks: "1"
70
- oom_kill_step: "0"
71
- pmix_mapping_serv: (vector,(0,1,1))
72
- pmixp_abort_agent_port: "33735"
73
- prio_process: "1"
74
- procid: "0"
75
- pty_port: "45219"
76
- pty_win_col: "156"
77
- pty_win_row: "41"
78
- srun_comm_host: 192.168.33.114
79
- srun_comm_port: "35153"
80
- step_gpus: "5"
81
- step_id: "0"
82
- step_launcher_port: "35153"
83
- step_nodelist: serv-3342
84
- step_num_nodes: "1"
85
- step_num_tasks: "1"
86
- step_tasks_per_node: "1"
87
- stepid: "0"
88
- submit_dir: /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain
89
- submit_host: login1
90
- task_pid: "3684902"
91
- tasks_per_node: "1"
92
- topology_addr: serv-3342
93
- topology_addr_pattern: node
94
- tres_bind: gres/gpu:per_task:1
95
- tres_per_task: cpu=16,gres/gpu=1
96
- umask: "0022"
97
- startedAt: "2025-12-14T14:47:07.756230Z"
98
- writerId: 61ybci2w6pjjhx1rssgrci0sff4exdn7
99
- m: []
100
- python_version: 3.11.11
101
- t:
102
- "1":
103
- - 1
104
- - 5
105
- - 11
106
- - 41
107
- - 49
108
- - 51
109
- - 53
110
- "2":
111
- - 1
112
- - 5
113
- - 11
114
- - 41
115
- - 49
116
- - 51
117
- - 53
118
- "3":
119
- - 13
120
- - 15
121
- - 16
122
- "4": 3.11.11
123
- "5": 0.23.1
124
- "6": 4.57.3
125
- "12": 0.23.1
126
- "13": linux-x86_64
127
- activation_checkpointing:
128
- value: null
129
- auxiliary_loss_multiplier:
130
- value: 0.0001
131
- canceled_check_interval:
132
- value: 6000
133
- compile:
134
- value: null
135
- console_log_interval:
136
- value: 1
137
- data:
138
- value:
139
- custom_dataset: null
140
- datasets: null
141
- drop_last: true
142
- generate_attention_mask: false
143
- generate_doc_lengths: false
144
- instance_filter: null
145
- label_mask_paths: null
146
- memmap_dtype: uint16
147
- num_workers: 32
148
- pad_direction: right
149
- paths:
150
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00000_00000_doc_shuffled.ds
151
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00001_00000_doc_shuffled.ds
152
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00002_00000_doc_shuffled.ds
153
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00003_00000_doc_shuffled.ds
154
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00004_00000_doc_shuffled.ds
155
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00005_00000_doc_shuffled.ds
156
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds
157
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00007_00000_doc_shuffled.ds
158
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00008_00000_doc_shuffled.ds
159
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00009_00000_doc_shuffled.ds
160
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00010_00000_doc_shuffled.ds
161
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00011_00000_doc_shuffled.ds
162
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00012_00000_doc_shuffled.ds
163
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00016_00000_doc_shuffled.ds
164
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00019_00000_doc_shuffled.ds
165
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00020_00000_doc_shuffled.ds
166
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00021_00000_doc_shuffled.ds
167
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00022_00000_doc_shuffled.ds
168
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00023_00000_doc_shuffled.ds
169
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00024_00000_doc_shuffled.ds
170
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00025_00000_doc_shuffled.ds
171
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00026_00000_doc_shuffled.ds
172
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00027_00000_doc_shuffled.ds
173
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00028_00000_doc_shuffled.ds
174
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00029_00000_doc_shuffled.ds
175
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00030_00000_doc_shuffled.ds
176
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00031_00000_doc_shuffled.ds
177
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00032_00000_doc_shuffled.ds
178
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00033_00000_doc_shuffled.ds
179
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00034_00000_doc_shuffled.ds
180
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00035_00000_doc_shuffled.ds
181
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00036_00000_doc_shuffled.ds
182
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00037_00000_doc_shuffled.ds
183
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00038_00000_doc_shuffled.ds
184
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00039_00000_doc_shuffled.ds
185
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00040_00000_doc_shuffled.ds
186
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00041_00000_doc_shuffled.ds
187
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00042_00000_doc_shuffled.ds
188
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00043_00000_doc_shuffled.ds
189
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00044_00000_doc_shuffled.ds
190
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00045_00000_doc_shuffled.ds
191
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00046_00000_doc_shuffled.ds
192
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00047_00000_doc_shuffled.ds
193
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00048_00000_doc_shuffled.ds
194
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00049_00000_doc_shuffled.ds
195
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00050_00000_doc_shuffled.ds
196
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00051_00000_doc_shuffled.ds
197
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00052_00000_doc_shuffled.ds
198
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00053_00000_doc_shuffled.ds
199
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00054_00000_doc_shuffled.ds
200
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00055_00000_doc_shuffled.ds
201
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00056_00000_doc_shuffled.ds
202
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00057_00000_doc_shuffled.ds
203
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00058_00000_doc_shuffled.ds
204
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00059_00000_doc_shuffled.ds
205
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00060_00000_doc_shuffled.ds
206
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00061_00000_doc_shuffled.ds
207
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00062_00000_doc_shuffled.ds
208
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00063_00000_doc_shuffled.ds
209
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00064_00000_doc_shuffled.ds
210
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00065_00000_doc_shuffled.ds
211
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00066_00000_doc_shuffled.ds
212
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00067_00000_doc_shuffled.ds
213
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00068_00000_doc_shuffled.ds
214
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00069_00000_doc_shuffled.ds
215
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00070_00000_doc_shuffled.ds
216
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00071_00000_doc_shuffled.ds
217
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00072_00000_doc_shuffled.ds
218
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00073_00000_doc_shuffled.ds
219
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00074_00000_doc_shuffled.ds
220
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00075_00000_doc_shuffled.ds
221
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00076_00000_doc_shuffled.ds
222
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00077_00000_doc_shuffled.ds
223
- - data_token/as_fm3_omi2/algebraic-stack-tokenized/00078_00000_doc_shuffled.ds
224
- - data_token/as_fm3_omi2/finemath3-tokenized/00000_00000_doc_shuffled.ds
225
- - data_token/as_fm3_omi2/finemath3-tokenized/00001_00000_doc_shuffled.ds
226
- - data_token/as_fm3_omi2/finemath3-tokenized/00002_00000_doc_shuffled.ds
227
- - data_token/as_fm3_omi2/finemath3-tokenized/00003_00000_doc_shuffled.ds
228
- - data_token/as_fm3_omi2/finemath3-tokenized/00004_00000_doc_shuffled.ds
229
- - data_token/as_fm3_omi2/finemath3-tokenized/00005_00000_doc_shuffled.ds
230
- - data_token/as_fm3_omi2/finemath3-tokenized/00006_00000_doc_shuffled.ds
231
- - data_token/as_fm3_omi2/finemath3-tokenized/00007_00000_doc_shuffled.ds
232
- - data_token/as_fm3_omi2/finemath3-tokenized/00008_00000_doc_shuffled.ds
233
- - data_token/as_fm3_omi2/finemath3-tokenized/00009_00000_doc_shuffled.ds
234
- - data_token/as_fm3_omi2/finemath3-tokenized/00010_00000_doc_shuffled.ds
235
- - data_token/as_fm3_omi2/finemath3-tokenized/00011_00000_doc_shuffled.ds
236
- - data_token/as_fm3_omi2/finemath3-tokenized/00012_00000_doc_shuffled.ds
237
- - data_token/as_fm3_omi2/finemath3-tokenized/00013_00000_doc_shuffled.ds
238
- - data_token/as_fm3_omi2/finemath3-tokenized/00014_00000_doc_shuffled.ds
239
- - data_token/as_fm3_omi2/finemath3-tokenized/00015_00000_doc_shuffled.ds
240
- - data_token/as_fm3_omi2/finemath3-tokenized/00016_00000_doc_shuffled.ds
241
- - data_token/as_fm3_omi2/finemath3-tokenized/00017_00000_doc_shuffled.ds
242
- - data_token/as_fm3_omi2/finemath3-tokenized/00018_00000_doc_shuffled.ds
243
- - data_token/as_fm3_omi2/finemath3-tokenized/00019_00000_doc_shuffled.ds
244
- - data_token/as_fm3_omi2/finemath3-tokenized/00020_00000_doc_shuffled.ds
245
- - data_token/as_fm3_omi2/finemath3-tokenized/00021_00000_doc_shuffled.ds
246
- - data_token/as_fm3_omi2/finemath3-tokenized/00022_00000_doc_shuffled.ds
247
- - data_token/as_fm3_omi2/finemath3-tokenized/00023_00000_doc_shuffled.ds
248
- - data_token/as_fm3_omi2/finemath3-tokenized/00024_00000_doc_shuffled.ds
249
- - data_token/as_fm3_omi2/finemath3-tokenized/00025_00000_doc_shuffled.ds
250
- - data_token/as_fm3_omi2/finemath3-tokenized/00026_00000_doc_shuffled.ds
251
- - data_token/as_fm3_omi2/finemath3-tokenized/00027_00000_doc_shuffled.ds
252
- - data_token/as_fm3_omi2/finemath3-tokenized/00028_00000_doc_shuffled.ds
253
- - data_token/as_fm3_omi2/finemath3-tokenized/00029_00000_doc_shuffled.ds
254
- - data_token/as_fm3_omi2/finemath3-tokenized/00030_00000_doc_shuffled.ds
255
- - data_token/as_fm3_omi2/finemath3-tokenized/00031_00000_doc_shuffled.ds
256
- - data_token/as_fm3_omi2/finemath3-tokenized/00032_00000_doc_shuffled.ds
257
- - data_token/as_fm3_omi2/finemath3-tokenized/00033_00000_doc_shuffled.ds
258
- - data_token/as_fm3_omi2/finemath3-tokenized/00034_00000_doc_shuffled.ds
259
- - data_token/as_fm3_omi2/finemath3-tokenized/00035_00000_doc_shuffled.ds
260
- - data_token/as_fm3_omi2/finemath3-tokenized/00036_00000_doc_shuffled.ds
261
- - data_token/as_fm3_omi2/finemath3-tokenized/00037_00000_doc_shuffled.ds
262
- - data_token/as_fm3_omi2/finemath3-tokenized/00038_00000_doc_shuffled.ds
263
- - data_token/as_fm3_omi2/finemath3-tokenized/00039_00000_doc_shuffled.ds
264
- - data_token/as_fm3_omi2/finemath3-tokenized/00040_00000_doc_shuffled.ds
265
- - data_token/as_fm3_omi2/finemath3-tokenized/00041_00000_doc_shuffled.ds
266
- - data_token/as_fm3_omi2/finemath3-tokenized/00042_00000_doc_shuffled.ds
267
- - data_token/as_fm3_omi2/finemath3-tokenized/00043_00000_doc_shuffled.ds
268
- - data_token/as_fm3_omi2/finemath3-tokenized/00044_00000_doc_shuffled.ds
269
- - data_token/as_fm3_omi2/finemath3-tokenized/00045_00000_doc_shuffled.ds
270
- - data_token/as_fm3_omi2/finemath3-tokenized/00046_00000_doc_shuffled.ds
271
- - data_token/as_fm3_omi2/finemath3-tokenized/00047_00000_doc_shuffled.ds
272
- - data_token/as_fm3_omi2/finemath3-tokenized/00048_00000_doc_shuffled.ds
273
- - data_token/as_fm3_omi2/finemath3-tokenized/00049_00000_doc_shuffled.ds
274
- - data_token/as_fm3_omi2/finemath3-tokenized/00050_00000_doc_shuffled.ds
275
- - data_token/as_fm3_omi2/finemath3-tokenized/00051_00000_doc_shuffled.ds
276
- - data_token/as_fm3_omi2/finemath3-tokenized/00052_00000_doc_shuffled.ds
277
- - data_token/as_fm3_omi2/finemath3-tokenized/00053_00000_doc_shuffled.ds
278
- - data_token/as_fm3_omi2/finemath3-tokenized/00054_00000_doc_shuffled.ds
279
- - data_token/as_fm3_omi2/finemath3-tokenized/00055_00000_doc_shuffled.ds
280
- - data_token/as_fm3_omi2/finemath3-tokenized/00056_00000_doc_shuffled.ds
281
- - data_token/as_fm3_omi2/finemath3-tokenized/00057_00000_doc_shuffled.ds
282
- - data_token/as_fm3_omi2/finemath3-tokenized/00058_00000_doc_shuffled.ds
283
- - data_token/as_fm3_omi2/finemath3-tokenized/00059_00000_doc_shuffled.ds
284
- - data_token/as_fm3_omi2/finemath3-tokenized/00060_00000_doc_shuffled.ds
285
- - data_token/as_fm3_omi2/finemath3-tokenized/00061_00000_doc_shuffled.ds
286
- - data_token/as_fm3_omi2/finemath3-tokenized/00062_00000_doc_shuffled.ds
287
- - data_token/as_fm3_omi2/finemath3-tokenized/00063_00000_doc_shuffled.ds
288
- - data_token/as_fm3_omi2/finemath3-tokenized/00064_00000_doc_shuffled.ds
289
- - data_token/as_fm3_omi2/finemath3-tokenized/00065_00000_doc_shuffled.ds
290
- - data_token/as_fm3_omi2/finemath3-tokenized/00066_00000_doc_shuffled.ds
291
- - data_token/as_fm3_omi2/finemath3-tokenized/00067_00000_doc_shuffled.ds
292
- - data_token/as_fm3_omi2/finemath3-tokenized/00068_00000_doc_shuffled.ds
293
- - data_token/as_fm3_omi2/finemath3-tokenized/00069_00000_doc_shuffled.ds
294
- - data_token/as_fm3_omi2/finemath3-tokenized/00070_00000_doc_shuffled.ds
295
- - data_token/as_fm3_omi2/finemath3-tokenized/00071_00000_doc_shuffled.ds
296
- - data_token/as_fm3_omi2/finemath3-tokenized/00072_00000_doc_shuffled.ds
297
- - data_token/as_fm3_omi2/finemath3-tokenized/00073_00000_doc_shuffled.ds
298
- - data_token/as_fm3_omi2/finemath3-tokenized/00074_00000_doc_shuffled.ds
299
- - data_token/as_fm3_omi2/finemath3-tokenized/00075_00000_doc_shuffled.ds
300
- - data_token/as_fm3_omi2/finemath3-tokenized/00076_00000_doc_shuffled.ds
301
- - data_token/as_fm3_omi2/finemath3-tokenized/00077_00000_doc_shuffled.ds
302
- - data_token/as_fm3_omi2/finemath3-tokenized/00078_00000_doc_shuffled.ds
303
- - data_token/as_fm3_omi2/finemath3-tokenized/00079_00000_doc_shuffled.ds
304
- - data_token/as_fm3_omi2/finemath3-tokenized/00080_00000_doc_shuffled.ds
305
- - data_token/as_fm3_omi2/finemath3-tokenized/00081_00000_doc_shuffled.ds
306
- - data_token/as_fm3_omi2/finemath3-tokenized/00082_00000_doc_shuffled.ds
307
- - data_token/as_fm3_omi2/finemath3-tokenized/00083_00000_doc_shuffled.ds
308
- - data_token/as_fm3_omi2/finemath3-tokenized/00084_00000_doc_shuffled.ds
309
- - data_token/as_fm3_omi2/finemath3-tokenized/00085_00000_doc_shuffled.ds
310
- - data_token/as_fm3_omi2/finemath3-tokenized/00086_00000_doc_shuffled.ds
311
- - data_token/as_fm3_omi2/finemath3-tokenized/00087_00000_doc_shuffled.ds
312
- - data_token/as_fm3_omi2/finemath3-tokenized/00088_00000_doc_shuffled.ds
313
- - data_token/as_fm3_omi2/finemath3-tokenized/00089_00000_doc_shuffled.ds
314
- - data_token/as_fm3_omi2/finemath3-tokenized/00090_00000_doc_shuffled.ds
315
- - data_token/as_fm3_omi2/finemath3-tokenized/00091_00000_doc_shuffled.ds
316
- - data_token/as_fm3_omi2/finemath3-tokenized/00092_00000_doc_shuffled.ds
317
- - data_token/as_fm3_omi2/finemath3-tokenized/00093_00000_doc_shuffled.ds
318
- - data_token/as_fm3_omi2/finemath3-tokenized/00094_00000_doc_shuffled.ds
319
- - data_token/as_fm3_omi2/finemath3-tokenized/00095_00000_doc_shuffled.ds
320
- - data_token/as_fm3_omi2/finemath3-tokenized/00096_00000_doc_shuffled.ds
321
- - data_token/as_fm3_omi2/finemath3-tokenized/00097_00000_doc_shuffled.ds
322
- - data_token/as_fm3_omi2/finemath3-tokenized/00098_00000_doc_shuffled.ds
323
- - data_token/as_fm3_omi2/finemath3-tokenized/00099_00000_doc_shuffled.ds
324
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00000_00000_doc_shuffled.ds
325
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00001_00000_doc_shuffled.ds
326
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00002_00000_doc_shuffled.ds
327
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00003_00000_doc_shuffled.ds
328
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00004_00000_doc_shuffled.ds
329
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00005_00000_doc_shuffled.ds
330
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00006_00000_doc_shuffled.ds
331
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00007_00000_doc_shuffled.ds
332
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00008_00000_doc_shuffled.ds
333
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00009_00000_doc_shuffled.ds
334
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00010_00000_doc_shuffled.ds
335
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00011_00000_doc_shuffled.ds
336
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00012_00000_doc_shuffled.ds
337
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00013_00000_doc_shuffled.ds
338
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00014_00000_doc_shuffled.ds
339
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00015_00000_doc_shuffled.ds
340
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00016_00000_doc_shuffled.ds
341
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00017_00000_doc_shuffled.ds
342
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00018_00000_doc_shuffled.ds
343
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00019_00000_doc_shuffled.ds
344
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00020_00000_doc_shuffled.ds
345
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00021_00000_doc_shuffled.ds
346
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00022_00000_doc_shuffled.ds
347
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00023_00000_doc_shuffled.ds
348
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00024_00000_doc_shuffled.ds
349
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00025_00000_doc_shuffled.ds
350
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00026_00000_doc_shuffled.ds
351
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00027_00000_doc_shuffled.ds
352
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00028_00000_doc_shuffled.ds
353
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00029_00000_doc_shuffled.ds
354
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00030_00000_doc_shuffled.ds
355
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00031_00000_doc_shuffled.ds
356
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00032_00000_doc_shuffled.ds
357
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00033_00000_doc_shuffled.ds
358
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00034_00000_doc_shuffled.ds
359
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00035_00000_doc_shuffled.ds
360
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00036_00000_doc_shuffled.ds
361
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00037_00000_doc_shuffled.ds
362
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00038_00000_doc_shuffled.ds
363
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00039_00000_doc_shuffled.ds
364
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00040_00000_doc_shuffled.ds
365
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00041_00000_doc_shuffled.ds
366
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00042_00000_doc_shuffled.ds
367
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00043_00000_doc_shuffled.ds
368
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00044_00000_doc_shuffled.ds
369
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00045_00000_doc_shuffled.ds
370
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00046_00000_doc_shuffled.ds
371
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00047_00000_doc_shuffled.ds
372
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00048_00000_doc_shuffled.ds
373
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00049_00000_doc_shuffled.ds
374
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00050_00000_doc_shuffled.ds
375
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00051_00000_doc_shuffled.ds
376
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00052_00000_doc_shuffled.ds
377
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00053_00000_doc_shuffled.ds
378
- - data_token/as_fm3_omi2/openmathinstruct2-tokenized/00054_00000_doc_shuffled.ds
379
- persistent_workers: true
380
- pin_memory: true
381
- prefetch_factor: 8
382
- seed: null
383
- timeout: 0
384
- ddp:
385
- value:
386
- find_unused_params: false
387
- grad_sync_mode: batch
388
- device_eval_batch_size:
389
- value: 4
390
- device_train_batch_size:
391
- value: 512
392
- device_train_grad_accum:
393
- value: 128
394
- device_train_microbatch_size:
395
- value: 4
396
- distributed_strategy:
397
- value: fsdp
398
- dry_run:
399
- value: false
400
- early_stopping_factor:
401
- value: null
402
- epoch:
403
- value: null
404
- eval_interval:
405
- value: 5000
406
- eval_on_load:
407
- value: false
408
- eval_subset_num_batches:
409
- value: -1
410
- evaluators:
411
- value: []
412
- extra_steps_after_cancel:
413
- value: 10
414
- fast_forward_batches:
415
- value: null
416
- force_save_unsharded:
417
- value: false
418
- fsdp:
419
- value:
420
- hybrid_sharding_num_model_replicas: null
421
- precision: mixed
422
- sharding_strategy: FULL_SHARD
423
- use_orig_params: true
424
- wrapping_strategy: null
425
- fused_loss:
426
- value: null
427
- gen1_gc_interval:
428
- value: 1
429
- global_train_batch_size:
430
- value: 512
431
- hf_datasets_cache_dir:
432
- value: null
433
- load_path:
434
- value: null
435
- load_path_sharded_checkpointer:
436
- value: null
437
- max_duration:
438
- value: 1ep
439
- max_grad_norm:
440
- value: 1
441
- max_grad_norm_ratio:
442
- value: null
443
- model:
444
- value:
445
- activation_type: swiglu
446
- alibi: false
447
- alibi_bias_max: 8
448
- attention_dropout: 0
449
- attention_layer_norm: false
450
- attention_layer_norm_with_affine: false
451
- bias_for_layer_norm: false
452
- block_group_size: 1
453
- block_type: sequential
454
- clip_qkv: null
455
- d_model: 2048
456
- emb_init_std: null
457
- embedding_dropout: 0
458
- embedding_layer_norm: false
459
- embedding_size: 32000
460
- eos_token_id: 0
461
- flash_attention: false
462
- include_bias: false
463
- init_cutoff_factor: 3
464
- init_device: cuda
465
- init_fn: normal
466
- init_std: 0.02
467
- layer_norm_eps: 1e-05
468
- layer_norm_type: default
469
- layer_norm_with_affine: false
470
- max_sequence_length: 2048
471
- mlp_hidden_size: null
472
- mlp_ratio: 8
473
- multi_query_attention: false
474
- n_heads: 16
475
- n_kv_heads: null
476
- n_layers: 16
477
- norm_after: false
478
- pad_token_id: 1
479
- precision: amp_bf16
480
- residual_dropout: 0
481
- rope: true
482
- rope_full_precision: true
483
- rope_theta: 10000
484
- scale_emb_init: false
485
- scale_logits: false
486
- vocab_size: 32000
487
- weight_tying: true
488
- module_outputs_save_steps:
489
- value: null
490
- new_style_checkpoints:
491
- value: null
492
- no_pre_train_checkpoint:
493
- value: false
494
- optimizer:
495
- value:
496
- betas:
497
- - 0.9
498
- - 0.95
499
- decay_embeddings: true
500
- decay_norm_and_bias: true
501
- eps: 1e-08
502
- learning_rate: 0.0005
503
- metrics_log_interval: 10
504
- name: adamw
505
- no_decay_norm_and_bias: null
506
- record_update_metrics: false
507
- selective_updates: false
508
- weight_decay: 0.1
509
- precision:
510
- value: amp_bf16
511
- python_profiling:
512
- value: false
513
- remote_save_folder:
514
- value: null
515
- reset_optimizer_state:
516
- value: false
517
- reset_trainer_state:
518
- value: false
519
- restore_dataloader:
520
- value: true
521
- run_name:
522
- value: OLMo-1B-as_fm3_omi2
523
- save_data_indices:
524
- value: true
525
- save_folder:
526
- value: checkpoints/OLMo-1B-as_fm3_omi2
527
- save_interval:
528
- value: 3000
529
- save_interval_ephemeral:
530
- value: null
531
- save_interval_unsharded:
532
- value: 3000
533
- save_num_checkpoints_to_keep:
534
- value: -1
535
- save_num_unsharded_checkpoints_to_keep:
536
- value: -1
537
- save_overwrite:
538
- value: true
539
- scheduler:
540
- value:
541
- alpha_f: 0.1
542
- grad_clip_warmup_factor: null
543
- grad_clip_warmup_steps: null
544
- name: cosine_with_warmup
545
- t_max: null
546
- t_warmup: 2000
547
- units: steps
548
- warmup_min_lr: 0
549
- seed:
550
- value: 6198
551
- sharded_checkpointer:
552
- value: torch_legacy
553
- single:
554
- value:
555
- device: auto
556
- softmax_auxiliary_loss:
557
- value: false
558
- speed_monitor:
559
- value:
560
- gpu_flops_available: null
561
- window_size: 20
562
- stop_after:
563
- value: null
564
- stop_at:
565
- value: null
566
- time_limit:
567
- value: null
568
- tokenizer:
569
- value:
570
- identifier: meta-llama/Llama-2-7b-hf
571
- truncate_direction: right
572
- torch_profiling:
573
- value: false
574
- try_load_latest_save:
575
- value: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/files/output.log DELETED
@@ -1,35 +0,0 @@
1
- [2025-12-14 15:47:11] INFO  [olmo.data.iterable_dataset:79, rank=0] Saving global data order indices...
2
- Traceback (most recent call last):
3
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py", line 436, in <module>
4
- main(cfg)
5
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py", line 132, in main
6
- train_loader = build_train_dataloader(cfg)
7
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^
8
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/__init__.py", line 156, in build_train_dataloader
9
- dataset = IterableDataset(
10
- ^^^^^^^^^^^^^^^^
11
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/iterable_dataset.py", line 73, in __init__
12
- self._build_and_save_global_indices()
13
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/iterable_dataset.py", line 86, in _build_and_save_global_indices
14
- global_indices_mmap.flush()
15
- File "/opt/conda/lib/python3.11/site-packages/numpy/core/memmap.py", line 301, in flush
16
- def flush(self):
17
-
18
- KeyboardInterrupt
19
- Traceback (most recent call last):
20
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py", line 436, in <module>
21
- main(cfg)
22
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py", line 132, in main
23
- train_loader = build_train_dataloader(cfg)
24
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^
25
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/__init__.py", line 156, in build_train_dataloader
26
- dataset = IterableDataset(
27
- ^^^^^^^^^^^^^^^^
28
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/iterable_dataset.py", line 73, in __init__
29
- self._build_and_save_global_indices()
30
- File "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/olmo/data/iterable_dataset.py", line 86, in _build_and_save_global_indices
31
- global_indices_mmap.flush()
32
- File "/opt/conda/lib/python3.11/site-packages/numpy/core/memmap.py", line 301, in flush
33
- def flush(self):
34
-
35
- KeyboardInterrupt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/files/requirements.txt DELETED
@@ -1,271 +0,0 @@
1
- scikit-learn==1.8.0
2
- joblib==1.5.2
3
- threadpoolctl==3.6.0
4
- torchmetrics==1.8.2
5
- lightning-utilities==0.15.2
6
- wandb==0.23.1
7
- GitPython==3.1.45
8
- gitdb==4.0.12
9
- smmap==5.0.2
10
- datasets==4.4.1
11
- pandas==2.3.3
12
- multiprocess==0.70.18
13
- pyarrow==22.0.0
14
- tzdata==2025.3
15
- xxhash==3.6.0
16
- ai2-olmo==0.6.0
17
- ai2-olmo-core==2.4.0
18
- cached_path==1.8.0
19
- google-cloud-storage==2.19.0
20
- google-cloud-core==2.5.0
21
- boto3==1.42.9
22
- google-api-core==2.28.1
23
- google-auth==2.43.0
24
- s3transfer==0.16.0
25
- botocore==1.42.9
26
- google-resumable-media==2.8.0
27
- pyasn1_modules==0.4.2
28
- rich==13.9.4
29
- rsa==4.9.1
30
- bettermap==1.3.1
31
- google-crc32c==1.7.1
32
- jmespath==1.0.1
33
- numpy==1.26.4
34
- omegaconf==2.3.0
35
- proto-plus==1.26.1
36
- pyasn1==0.6.1
37
- python-dateutil==2.9.0.post0
38
- antlr4-python3-runtime==4.9.3
39
- zstandard==0.23.0
40
- zipp==3.21.0
41
- yarl==1.22.0
42
- xgrammar==0.1.18
43
- xformers==0.0.29.post2
44
- wrapt==2.0.1
45
- wheel==0.45.1
46
- websockets==15.0.1
47
- wcwidth==0.2.13
48
- watchfiles==1.1.1
49
- vllm==0.8.5.post1
50
- uvloop==0.22.1
51
- uvicorn==0.38.0
52
- urllib3==2.3.0
53
- typing-inspection==0.4.2
54
- typing_extensions==4.15.0
55
- types-dataclasses==0.6.6
56
- typer==0.20.0
57
- truststore==0.10.0
58
- triton==3.2.0
59
- transformers==4.57.3
60
- traitlets==5.14.3
61
- tqdm==4.67.1
62
- torchvision==0.21.0+cu124
63
- torchelastic==0.2.2
64
- torchaudio==2.6.0+cu124
65
- torch==2.6.0+cu124
66
- tokenizers==0.22.1
67
- tiktoken==0.12.0
68
- sympy==1.13.1
69
- starlette==0.50.0
70
- stack_data==0.6.3
71
- soupsieve==2.5
72
- sortedcontainers==2.4.0
73
- sniffio==1.3.1
74
- six==1.17.0
75
- shellingham==1.5.4
76
- setuptools==75.8.0
77
- sentry-sdk==2.47.0
78
- sentencepiece==0.2.1
79
- scipy==1.16.3
80
- safetensors==0.7.0
81
- ruamel.yaml.clib==0.2.8
82
- ruamel.yaml==0.18.10
83
- rpds-py==0.22.3
84
- rignore==0.7.6
85
- rich-toolkit==0.17.0
86
- requests==2.32.3
87
- regex==2025.11.3
88
- referencing==0.36.2
89
- ray==2.52.1
90
- pyzmq==27.1.0
91
- pytz==2024.2
92
- python-multipart==0.0.20
93
- python-json-logger==4.0.0
94
- python-etcd==0.4.5
95
- python-dotenv==1.2.1
96
- Pygments==2.19.1
97
- pydantic-extra-types==2.10.6
98
- pydantic_core==2.41.5
99
- pydantic==2.12.5
100
- pycparser==2.22
101
- pycountry==24.6.1
102
- pycosat==0.6.6
103
- py-cpuinfo==9.0.0
104
- pure_eval==0.2.3
105
- ptyprocess==0.7.0
106
- psutil==6.1.1
107
- protobuf==4.25.8
108
- propcache==0.4.1
109
- prompt_toolkit==3.0.50
110
- prometheus-fastapi-instrumentator==7.1.0
111
- prometheus_client==0.23.1
112
- pluggy==1.5.0
113
- platformdirs==4.3.6
114
- pkgutil_resolve_name==1.3.10
115
- pkginfo==1.12.0
116
- pip==24.3.1
117
- pillow==11.0.0
118
- pickleshare==0.7.5
119
- pexpect==4.9.0
120
- partial-json-parser==0.2.1.1.post7
121
- parso==0.8.4
122
- packaging==24.2
123
- outlines_core==0.1.26
124
- outlines==0.1.11
125
- optree==0.14.0
126
- opentelemetry-semantic-conventions-ai==0.4.13
127
- opentelemetry-semantic-conventions==0.47b0
128
- opentelemetry-sdk==1.26.0
129
- opentelemetry-proto==1.26.0
130
- opentelemetry-exporter-otlp-proto-http==1.26.0
131
- opentelemetry-exporter-otlp-proto-grpc==1.26.0
132
- opentelemetry-exporter-otlp-proto-common==1.26.0
133
- opentelemetry-exporter-otlp==1.26.0
134
- opentelemetry-api==1.26.0
135
- opencv-python-headless==4.12.0.88
136
- openai==2.11.0
137
- nvidia-nvtx-cu12==12.4.127
138
- nvidia-nvjitlink-cu12==12.4.127
139
- nvidia-nccl-cu12==2.21.5
140
- nvidia-cusparselt-cu12==0.6.2
141
- nvidia-cusparse-cu12==12.3.1.170
142
- nvidia-cusolver-cu12==11.6.1.9
143
- nvidia-curand-cu12==10.3.5.147
144
- nvidia-cufft-cu12==11.2.1.3
145
- nvidia-cudnn-cu12==9.1.0.70
146
- nvidia-cuda-runtime-cu12==12.4.127
147
- nvidia-cuda-nvrtc-cu12==12.4.127
148
- nvidia-cuda-cupti-cu12==12.4.127
149
- nvidia-cublas-cu12==12.4.5.8
150
- numba==0.61.2
151
- ninja==1.11.1.3
152
- networkx==3.4.2
153
- nest-asyncio==1.6.0
154
- multidict==6.7.0
155
- msgspec==0.20.0
156
- msgpack==1.1.2
157
- mpmath==1.3.0
158
- more-itertools==10.6.0
159
- mistral_common==1.8.6
160
- menuinst==2.2.0
161
- mdurl==0.1.2
162
- matplotlib-inline==0.1.7
163
- math-verify==0.8.0
164
- markdown-it-py==4.0.0
165
- lm-format-enforcer==0.10.12
166
- llvmlite==0.44.0
167
- llguidance==0.7.30
168
- lintrunner==0.12.7
169
- lief==0.14.1
170
- libmambapy==2.0.5
171
- libarchive-c==5.1
172
- latex2sympy2_extended==1.10.2
173
- lark==1.2.2
174
- jsonschema-specifications==2024.10.1
175
- jsonschema==4.23.0
176
- jsonpointer==3.0.0
177
- jsonpatch==1.33
178
- jiter==0.12.0
179
- Jinja2==3.1.5
180
- jedi==0.19.2
181
- ipython==8.31.0
182
- interegular==0.3.3
183
- importlib_resources==6.5.2
184
- importlib_metadata==8.0.0
185
- idna==3.10
186
- hypothesis==6.124.7
187
- hyperframe==6.0.1
188
- huggingface-hub==0.36.0
189
- httpx==0.28.1
190
- httptools==0.7.1
191
- httpcore==1.0.9
192
- hpack==4.0.0
193
- hf-xet==1.2.0
194
- h2==4.1.0
195
- h11==0.16.0
196
- grpcio==1.76.0
197
- googleapis-common-protos==1.72.0
198
- gguf==0.17.1
199
- fsspec==2024.12.0
200
- frozenlist==1.8.0
201
- frozendict==2.4.6
202
- filelock==3.17.0
203
- fastrlock==0.8.3
204
- fastar==0.8.0
205
- fastapi-cloud-cli==0.6.0
206
- fastapi-cli==0.0.16
207
- fastapi==0.124.4
208
- expecttest==0.3.0
209
- executing==2.1.0
210
- exceptiongroup==1.2.2
211
- email-validator==2.3.0
212
- einops==0.8.1
213
- dnspython==2.7.0
214
- distro==1.9.0
215
- diskcache==5.6.3
216
- dill==0.4.0
217
- depyf==0.18.0
218
- Deprecated==1.3.1
219
- decorator==5.1.1
220
- cupy-cuda12x==13.6.0
221
- conda_package_streaming==0.11.0
222
- conda-package-handling==2.4.0
223
- conda-libmamba-solver==25.1.1
224
- conda_index==0.5.0
225
- conda-build==25.1.1
226
- conda==25.1.0
227
- compressed-tensors==0.9.3
228
- colorama==0.4.6
229
- cmake==3.31.4
230
- cloudpickle==3.1.2
231
- click==8.1.8
232
- charset-normalizer==3.4.1
233
- chardet==5.2.0
234
- cffi==1.17.1
235
- certifi==2024.12.14
236
- cachetools==6.2.3
237
- boltons==24.0.0
238
- blake3==1.0.8
239
- beautifulsoup4==4.12.3
240
- attrs==25.1.0
241
- astunparse==1.6.3
242
- asttokens==3.0.0
243
- astor==0.8.1
244
- archspec==0.2.5
245
- anyio==4.12.0
246
- annotated-types==0.7.0
247
- annotated-doc==0.0.4
248
- airportsdata==20250909
249
- aiosignal==1.4.0
250
- aiohttp==3.13.2
251
- aiohappyeyeballs==2.6.1
252
- PyYAML==6.0.2
253
- PySocks==1.7.1
254
- MarkupSafe==3.0.2
255
- Brotli==1.1.0
256
- zipp==3.19.2
257
- wheel==0.43.0
258
- typing_extensions==4.12.2
259
- typeguard==4.3.0
260
- tomli==2.0.1
261
- platformdirs==4.2.2
262
- packaging==24.2
263
- more-itertools==10.3.0
264
- jaraco.text==3.12.1
265
- jaraco.functools==4.0.1
266
- jaraco.context==5.3.0
267
- jaraco.collections==5.1.0
268
- inflect==7.3.1
269
- importlib_metadata==8.0.0
270
- backports.tarfile==1.2.0
271
- autocommand==2.2.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/files/wandb-metadata.json DELETED
@@ -1,103 +0,0 @@
1
- {
2
- "os": "Linux-6.8.0-1043-nvidia-x86_64-with-glibc2.35",
3
- "python": "CPython 3.11.11",
4
- "startedAt": "2025-12-14T14:47:07.756230Z",
5
- "args": [
6
- "pretraining/configs/RL-1B.yaml"
7
- ],
8
- "program": "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/OLMo/scripts/train.py",
9
- "codePath": "OLMo/scripts/train.py",
10
- "codePathLocal": "OLMo/scripts/train.py",
11
- "email": "[email protected]",
12
- "root": "checkpoints/OLMo-1B-as_fm3_omi2/wandb",
13
- "host": "serv-3342",
14
- "executable": "/opt/conda/bin/python",
15
- "cpu_count": 112,
16
- "cpu_count_logical": 224,
17
- "gpu": "NVIDIA H100 80GB HBM3",
18
- "gpu_count": 1,
19
- "disk": {
20
- "/": {
21
- "total": "2055141851136",
22
- "used": "49256501248"
23
- }
24
- },
25
- "memory": {
26
- "total": "2164176814080"
27
- },
28
- "gpu_nvidia": [
29
- {
30
- "name": "NVIDIA H100 80GB HBM3",
31
- "memoryTotal": "85520809984",
32
- "cudaCores": 16896,
33
- "architecture": "Hopper",
34
- "uuid": "GPU-4c999b2a-2578-9e62-0539-4b826d85fda8"
35
- }
36
- ],
37
- "cudaVersion": "13.0",
38
- "slurm": {
39
- "cluster_name": "pegasus",
40
- "conf": "/etc/slurm/slurm.conf",
41
- "cpu_bind": "quiet,mask_cpu:0x000000000001FE00000000000000000000000001FE00000000000000",
42
- "cpu_bind_list": "0x000000000001FE00000000000000000000000001FE00000000000000",
43
- "cpu_bind_type": "mask_cpu:",
44
- "cpu_bind_verbose": "quiet",
45
- "cpus_on_node": "16",
46
- "cpus_per_task": "16",
47
- "distribution": "cyclic",
48
- "gpus": "1",
49
- "gpus_on_node": "1",
50
- "gtids": "0",
51
- "job_cpus_per_node": "16",
52
- "job_end_time": "1765734524",
53
- "job_gid": "8000",
54
- "job_group": "iml",
55
- "job_id": "2383756",
56
- "job_name": "bash",
57
- "job_nodelist": "serv-3342",
58
- "job_num_nodes": "1",
59
- "job_partition": "H100",
60
- "job_qos": "normal",
61
- "job_start_time": "1765720124",
62
- "job_uid": "13262",
63
- "job_user": "nguyen",
64
- "jobid": "2383756",
65
- "launch_node_ipaddr": "192.168.33.114",
66
- "localid": "0",
67
- "mem_per_cpu": "16384",
68
- "mpi_type": "pmix",
69
- "nnodes": "1",
70
- "nodeid": "0",
71
- "nodelist": "serv-3342",
72
- "nprocs": "1",
73
- "ntasks": "1",
74
- "oom_kill_step": "0",
75
- "pmix_mapping_serv": "(vector,(0,1,1))",
76
- "pmixp_abort_agent_port": "33735",
77
- "prio_process": "1",
78
- "procid": "0",
79
- "pty_port": "45219",
80
- "pty_win_col": "156",
81
- "pty_win_row": "41",
82
- "srun_comm_host": "192.168.33.114",
83
- "srun_comm_port": "35153",
84
- "step_gpus": "5",
85
- "step_id": "0",
86
- "step_launcher_port": "35153",
87
- "step_nodelist": "serv-3342",
88
- "step_num_nodes": "1",
89
- "step_num_tasks": "1",
90
- "step_tasks_per_node": "1",
91
- "stepid": "0",
92
- "submit_dir": "/netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain",
93
- "submit_host": "login1",
94
- "task_pid": "3684902",
95
- "tasks_per_node": "1",
96
- "topology_addr": "serv-3342",
97
- "topology_addr_pattern": "node",
98
- "tres_bind": "gres/gpu:per_task:1",
99
- "tres_per_task": "cpu=16,gres/gpu=1",
100
- "umask": "0022"
101
- },
102
- "writerId": "61ybci2w6pjjhx1rssgrci0sff4exdn7"
103
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"_wandb":{"runtime":3},"_runtime":3}
 
 
models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/logs/debug-internal.log DELETED
@@ -1,11 +0,0 @@
1
- {"time":"2025-12-14T15:47:08.105165256+01:00","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
- {"time":"2025-12-14T15:47:08.37501868+01:00","level":"INFO","msg":"stream: created new stream","id":"2hopup81"}
3
- {"time":"2025-12-14T15:47:08.376220545+01:00","level":"INFO","msg":"handler: started","stream_id":"2hopup81"}
4
- {"time":"2025-12-14T15:47:08.491014958+01:00","level":"INFO","msg":"stream: started","id":"2hopup81"}
5
- {"time":"2025-12-14T15:47:08.491053676+01:00","level":"INFO","msg":"writer: started","stream_id":"2hopup81"}
6
- {"time":"2025-12-14T15:47:08.491071748+01:00","level":"INFO","msg":"sender: started","stream_id":"2hopup81"}
7
- {"time":"2025-12-14T15:47:12.190256481+01:00","level":"INFO","msg":"stream: closing","id":"2hopup81"}
8
- {"time":"2025-12-14T15:47:13.127841021+01:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
- {"time":"2025-12-14T15:47:13.326969802+01:00","level":"INFO","msg":"handler: closed","stream_id":"2hopup81"}
10
- {"time":"2025-12-14T15:47:13.330040788+01:00","level":"INFO","msg":"sender: closed","stream_id":"2hopup81"}
11
- {"time":"2025-12-14T15:47:13.330899511+01:00","level":"INFO","msg":"stream: closed","id":"2hopup81"}
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/logs/debug.log DELETED
@@ -1,23 +0,0 @@
1
- 2025-12-14 15:47:07,839 INFO MainThread:3760501 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
- 2025-12-14 15:47:07,840 INFO MainThread:3760501 [wandb_setup.py:_flush():80] Configure stats pid to 3760501
3
- 2025-12-14 15:47:07,841 INFO MainThread:3760501 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
- 2025-12-14 15:47:07,842 INFO MainThread:3760501 [wandb_setup.py:_flush():80] Loading settings from /netscratch/duynguyen/Research/gen_collapse/run_code/openrlhf-pretrain/wandb/settings
5
- 2025-12-14 15:47:07,843 INFO MainThread:3760501 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
- 2025-12-14 15:47:07,845 INFO MainThread:3760501 [wandb_init.py:setup_run_log_directory():714] Logging user logs to checkpoints/OLMo-1B-as_fm3_omi2/wandb/wandb/run-20251214_154707-2hopup81/logs/debug.log
7
- 2025-12-14 15:47:07,846 INFO MainThread:3760501 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to checkpoints/OLMo-1B-as_fm3_omi2/wandb/wandb/run-20251214_154707-2hopup81/logs/debug-internal.log
8
- 2025-12-14 15:47:07,847 INFO MainThread:3760501 [wandb_init.py:init():841] calling init triggers
9
- 2025-12-14 15:47:07,848 INFO MainThread:3760501 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
- config: {'run_name': 'OLMo-1B-as_fm3_omi2', 'seed': 6198, 'epoch': None, 'dry_run': False, 'model': {'d_model': 2048, 'n_heads': 16, 'n_kv_heads': None, 'clip_qkv': None, 'n_layers': 16, 'mlp_ratio': 8, 'mlp_hidden_size': None, 'activation_type': 'swiglu', 'block_type': 'sequential', 'block_group_size': 1, 'alibi': False, 'alibi_bias_max': 8.0, 'rope': True, 'rope_full_precision': True, 'rope_theta': 10000, 'flash_attention': False, 'attention_dropout': 0.0, 'multi_query_attention': False, 'attention_layer_norm': False, 'residual_dropout': 0.0, 'embedding_dropout': 0.0, 'embedding_layer_norm': False, 'layer_norm_type': 'default', 'layer_norm_with_affine': False, 'layer_norm_eps': 1e-05, 'attention_layer_norm_with_affine': False, 'max_sequence_length': 2048, 'include_bias': False, 'bias_for_layer_norm': False, 'scale_logits': False, 'vocab_size': 32000, 'embedding_size': 32000, 'weight_tying': True, 'eos_token_id': 0, 'pad_token_id': 1, 'init_device': 'cuda', 'init_fn': 'normal', 'init_std': 0.02, 'init_cutoff_factor': 3.0, 'precision': 'amp_bf16', 'scale_emb_init': False, 'emb_init_std': None, 'norm_after': False}, 'optimizer': {'name': 'adamw', 'learning_rate': 0.0005, 'weight_decay': 0.1, 'betas': (0.9, 0.95), 'eps': 1e-08, 'no_decay_norm_and_bias': None, 'selective_updates': False, 'decay_norm_and_bias': True, 'decay_embeddings': True, 'metrics_log_interval': 10, 'record_update_metrics': False}, 'scheduler': {'name': 'cosine_with_warmup', 'units': 'steps', 't_warmup': 2000, 't_max': None, 'alpha_f': 0.1, 'grad_clip_warmup_steps': None, 'grad_clip_warmup_factor': None, 'warmup_min_lr': 0.0}, 'data': {'paths': ['data_token/as_fm3_omi2/algebraic-stack-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00055_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00056_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00057_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00058_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00059_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00060_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00061_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00062_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00063_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00064_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00065_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00066_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00067_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00068_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00069_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00070_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00071_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00072_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00073_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00074_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00075_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00076_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00077_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/algebraic-stack-tokenized/00078_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00017_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00018_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00054_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00055_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00056_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00057_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00058_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00059_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00060_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00061_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00062_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00063_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00064_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00065_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00066_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00067_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00068_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00069_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00070_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00071_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00072_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00073_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00074_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00075_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00076_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00077_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00078_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00079_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00080_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00081_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00082_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00083_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00084_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00085_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00086_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00087_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00088_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00089_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00090_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00091_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00092_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00093_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00094_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00095_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00096_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00097_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00098_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/finemath3-tokenized/00099_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00000_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00001_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00002_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00003_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00004_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00005_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00006_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00007_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00008_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00009_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00010_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00011_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00012_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00013_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00014_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00015_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00016_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00017_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00018_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00019_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00020_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00021_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00022_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00023_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00024_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00025_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00026_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00027_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00028_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00029_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00030_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00031_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00032_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00033_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00034_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00035_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00036_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00037_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00038_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00039_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00040_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00041_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00042_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00043_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00044_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00045_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00046_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00047_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00048_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00049_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00050_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00051_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00052_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00053_00000_doc_shuffled.ds', 'data_token/as_fm3_omi2/openmathinstruct2-tokenized/00054_00000_doc_shuffled.ds'], 'memmap_dtype': 'uint16', 'datasets': None, 'label_mask_paths': None, 'pad_direction': 'right', 'generate_attention_mask': False, 'generate_doc_lengths': False, 'num_workers': 32, 'drop_last': True, 'pin_memory': True, 'prefetch_factor': 8, 'persistent_workers': True, 'timeout': 0, 'seed': None, 'instance_filter': None, 'custom_dataset': None}, 'restore_dataloader': True, 'fast_forward_batches': None, 'evaluators': [], 'eval_interval': 5000, 'tokenizer': {'identifier': 'meta-llama/Llama-2-7b-hf', 'truncate_direction': 'right'}, 'save_folder': 'checkpoints/OLMo-1B-as_fm3_omi2', 'remote_save_folder': None, 'canceled_check_interval': 6000, 'save_interval': 3000, 'save_interval_unsharded': 3000, 'save_interval_ephemeral': None, 'save_num_checkpoints_to_keep': -1, 'save_num_unsharded_checkpoints_to_keep': -1, 'save_overwrite': True, 'force_save_unsharded': False, 'no_pre_train_checkpoint': False, 'load_path': None, 'load_path_sharded_checkpointer': None, 'try_load_latest_save': False, 'reset_optimizer_state': False, 'reset_trainer_state': False, 'sharded_checkpointer': 'torch_legacy', 'new_style_checkpoints': None, 'max_duration': '1ep', 'global_train_batch_size': 512, 'device_train_batch_size': 512, 'device_train_microbatch_size': 4, 'device_eval_batch_size': 4, 'eval_subset_num_batches': -1, 'eval_on_load': False, 'device_train_grad_accum': 128, 'max_grad_norm': 1.0, 'max_grad_norm_ratio': None, 'precision': 'amp_bf16', 'speed_monitor': {'window_size': 20, 'gpu_flops_available': None}, 'console_log_interval': 1, 'gen1_gc_interval': 1, 'compile': None, 'distributed_strategy': 'fsdp', 'fsdp': {'use_orig_params': True, 'sharding_strategy': <ShardingStrategy.FULL_SHARD: 1>, 'wrapping_strategy': None, 'precision': 'mixed', 'hybrid_sharding_num_model_replicas': None}, 'ddp': {'grad_sync_mode': 'batch', 'find_unused_params': False}, 'single': {'device': 'auto'}, 'softmax_auxiliary_loss': False, 'auxiliary_loss_multiplier': 0.0001, 'time_limit': None, 'extra_steps_after_cancel': 10, 'early_stopping_factor': None, 'save_data_indices': True, 'python_profiling': False, 'torch_profiling': False, 'stop_at': None, 'stop_after': None, 'activation_checkpointing': None, 'fused_loss': None, 'hf_datasets_cache_dir': None, 'module_outputs_save_steps': None, '_wandb': {}}
11
- 2025-12-14 15:47:07,849 INFO MainThread:3760501 [wandb_init.py:init():889] starting backend
12
- 2025-12-14 15:47:08,094 INFO MainThread:3760501 [wandb_init.py:init():892] sending inform_init request
13
- 2025-12-14 15:47:08,103 INFO MainThread:3760501 [wandb_init.py:init():900] backend started and connected
14
- 2025-12-14 15:47:08,106 INFO MainThread:3760501 [wandb_init.py:init():970] updated telemetry
15
- 2025-12-14 15:47:08,107 INFO MainThread:3760501 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
- 2025-12-14 15:47:08,863 INFO MainThread:3760501 [wandb_init.py:init():1041] starting run threads in backend
17
- 2025-12-14 15:47:08,953 INFO MainThread:3760501 [wandb_run.py:_console_start():2521] atexit reg
18
- 2025-12-14 15:47:08,954 INFO MainThread:3760501 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
- 2025-12-14 15:47:08,955 INFO MainThread:3760501 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
- 2025-12-14 15:47:08,956 INFO MainThread:3760501 [wandb_run.py:_redirect():2461] Redirects installed.
21
- 2025-12-14 15:47:08,958 INFO MainThread:3760501 [wandb_init.py:init():1081] run started, returning control to user process
22
- 2025-12-14 15:47:12,190 INFO wandb-AsyncioManager-main:3760501 [service_client.py:_forward_responses():80] Reached EOF.
23
- 2025-12-14 15:47:12,191 INFO wandb-AsyncioManager-main:3760501 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/OLMo-1B/wandb/wandb/run-20251214_154707-2hopup81/run-2hopup81.wandb DELETED
Binary file (33.7 kB)