File size: 2,388 Bytes
9c5668e
 
 
 
 
 
 
 
 
 
 
 
b891330
9c5668e
b891330
9c5668e
 
 
 
b891330
9c5668e
b891330
9c5668e
 
 
 
b891330
9c5668e
b891330
9c5668e
 
 
 
b891330
9c5668e
b891330
9c5668e
 
 
 
b891330
9c5668e
b891330
9c5668e
 
 
 
b891330
9c5668e
b891330
9c5668e
 
 
 
b891330
9c5668e
b891330
9c5668e
 
 
 
b891330
9c5668e
b891330
9c5668e
 
 
 
 
 
b891330
 
 
9c5668e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 4.0,
  "eval_steps": 500,
  "global_step": 2060,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.4854368932038835,
      "grad_norm": 0.9415706992149353,
      "learning_rate": 8.794946550048592e-05,
      "loss": 0.8636,
      "step": 250
    },
    {
      "epoch": 0.970873786407767,
      "grad_norm": 0.6905472278594971,
      "learning_rate": 7.580174927113704e-05,
      "loss": 0.4257,
      "step": 500
    },
    {
      "epoch": 1.4563106796116505,
      "grad_norm": 0.6437392234802246,
      "learning_rate": 6.365403304178815e-05,
      "loss": 0.3792,
      "step": 750
    },
    {
      "epoch": 1.941747572815534,
      "grad_norm": 0.6311036348342896,
      "learning_rate": 5.150631681243926e-05,
      "loss": 0.3628,
      "step": 1000
    },
    {
      "epoch": 2.4271844660194173,
      "grad_norm": 0.6062882542610168,
      "learning_rate": 3.9358600583090386e-05,
      "loss": 0.3511,
      "step": 1250
    },
    {
      "epoch": 2.912621359223301,
      "grad_norm": 0.6469098925590515,
      "learning_rate": 2.72108843537415e-05,
      "loss": 0.3425,
      "step": 1500
    },
    {
      "epoch": 3.3980582524271843,
      "grad_norm": 0.6484191417694092,
      "learning_rate": 1.5063168124392615e-05,
      "loss": 0.329,
      "step": 1750
    },
    {
      "epoch": 3.883495145631068,
      "grad_norm": 0.6347299218177795,
      "learning_rate": 2.915451895043732e-06,
      "loss": 0.3209,
      "step": 2000
    },
    {
      "epoch": 4.0,
      "step": 2060,
      "total_flos": 1.4445804612483994e+18,
      "train_loss": 0.4188447378214123,
      "train_runtime": 30284.0025,
      "train_samples_per_second": 17.413,
      "train_steps_per_second": 0.068
    }
  ],
  "logging_steps": 250,
  "max_steps": 2060,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.4445804612483994e+18,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}