Rolv-Arild commited on
Commit
8888e5c
·
1 Parent(s): db00921
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 64.0,
3
+ "eval_accuracy": 0.9457035788509057,
4
+ "eval_f1": 0.9276842027293449,
5
+ "eval_loss": 0.14763057231903076,
6
+ "eval_matthews_correlation": 0.8879028981021795,
7
+ "eval_precision": 0.932791895372278,
8
+ "eval_recall": 0.9230343944305247,
9
+ "eval_runtime": 1043.6137,
10
+ "eval_samples_per_second": 58.555,
11
+ "eval_steps_per_second": 1.83,
12
+ "train_loss": 0.1566047928276214,
13
+ "train_runtime": 597098.8722,
14
+ "train_samples_per_second": 25.809,
15
+ "train_steps_per_second": 0.05
16
+ }
checkpoint-28000/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-384",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Front",
13
+ "1": "Middle",
14
+ "2": "Back"
15
+ },
16
+ "image_size": 384,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "Back": "2",
21
+ "Front": "0",
22
+ "Middle": "1"
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "model_type": "vit",
26
+ "num_attention_heads": 12,
27
+ "num_channels": 3,
28
+ "num_hidden_layers": 12,
29
+ "patch_size": 16,
30
+ "problem_type": "single_label_classification",
31
+ "qkv_bias": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.20.0"
34
+ }
checkpoint-28000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92e44f29773990940a6ee484deaaa7d159495afbb0bdc69529b9ce94621330d5
3
+ size 688858465
checkpoint-28000/preprocessor_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_resize": true,
4
+ "feature_extractor_type": "ViTFeatureExtractor",
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_std": [
11
+ 0.5,
12
+ 0.5,
13
+ 0.5
14
+ ],
15
+ "resample": 2,
16
+ "size": 384
17
+ }
checkpoint-28000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7aeae6401eee261a920c4636609b82fc52762b4b566ab785089a4bb27c1d0cb
3
+ size 344437425
checkpoint-28000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a776feb5bb30ae56498d0b3c161acbc1b74bf481ba2a790baa01b5a88a1df305
3
+ size 14503
checkpoint-28000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec8e921f8503d7f4128558bb1c0ff0d25de6dfea42f98051387e4689fa3cf243
3
+ size 559
checkpoint-28000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d80178b31d11242832b8dac59294e6a975ca255697b7afdea0e8c6651ef7890c
3
+ size 623
checkpoint-28000/trainer_state.json ADDED
@@ -0,0 +1,3516 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.14763057231903076,
3
+ "best_model_checkpoint": "./vit-front-page-384-complete-v2/checkpoint-28000",
4
+ "epoch": 59.57408637873754,
5
+ "global_step": 28000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.21,
12
+ "learning_rate": 7e-08,
13
+ "loss": 1.0485,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.43,
18
+ "learning_rate": 1.4e-07,
19
+ "loss": 0.9833,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.43,
24
+ "eval_accuracy": 0.6194995827128573,
25
+ "eval_f1": 0.2849637284042793,
26
+ "eval_loss": 0.9414144158363342,
27
+ "eval_matthews_correlation": -0.046627386105974944,
28
+ "eval_precision": 0.28223649662424866,
29
+ "eval_recall": 0.3194839105028776,
30
+ "eval_runtime": 1120.0285,
31
+ "eval_samples_per_second": 54.56,
32
+ "eval_steps_per_second": 1.705,
33
+ "step": 200
34
+ },
35
+ {
36
+ "epoch": 0.64,
37
+ "learning_rate": 2.0999999999999997e-07,
38
+ "loss": 0.9031,
39
+ "step": 300
40
+ },
41
+ {
42
+ "epoch": 0.85,
43
+ "learning_rate": 2.8e-07,
44
+ "loss": 0.8378,
45
+ "step": 400
46
+ },
47
+ {
48
+ "epoch": 0.85,
49
+ "eval_accuracy": 0.6742542015087793,
50
+ "eval_f1": 0.27318592449369794,
51
+ "eval_loss": 0.8087042570114136,
52
+ "eval_matthews_correlation": -0.008566496106974774,
53
+ "eval_precision": 0.33248655167577795,
54
+ "eval_recall": 0.3329403574518804,
55
+ "eval_runtime": 1146.8267,
56
+ "eval_samples_per_second": 53.285,
57
+ "eval_steps_per_second": 1.665,
58
+ "step": 400
59
+ },
60
+ {
61
+ "epoch": 1.06,
62
+ "learning_rate": 3.5e-07,
63
+ "loss": 0.7846,
64
+ "step": 500
65
+ },
66
+ {
67
+ "epoch": 1.28,
68
+ "learning_rate": 4.1999999999999995e-07,
69
+ "loss": 0.725,
70
+ "step": 600
71
+ },
72
+ {
73
+ "epoch": 1.28,
74
+ "eval_accuracy": 0.7299906724050467,
75
+ "eval_f1": 0.45416361692283386,
76
+ "eval_loss": 0.6847068071365356,
77
+ "eval_matthews_correlation": 0.3478679528690636,
78
+ "eval_precision": 0.7356178375809365,
79
+ "eval_recall": 0.4490403016153819,
80
+ "eval_runtime": 1122.3931,
81
+ "eval_samples_per_second": 54.445,
82
+ "eval_steps_per_second": 1.702,
83
+ "step": 600
84
+ },
85
+ {
86
+ "epoch": 1.49,
87
+ "learning_rate": 4.9e-07,
88
+ "loss": 0.6452,
89
+ "step": 700
90
+ },
91
+ {
92
+ "epoch": 1.7,
93
+ "learning_rate": 5.6e-07,
94
+ "loss": 0.5769,
95
+ "step": 800
96
+ },
97
+ {
98
+ "epoch": 1.7,
99
+ "eval_accuracy": 0.7984093995974406,
100
+ "eval_f1": 0.6063804641618133,
101
+ "eval_loss": 0.5433278679847717,
102
+ "eval_matthews_correlation": 0.554774970478316,
103
+ "eval_precision": 0.782372439805869,
104
+ "eval_recall": 0.6092247400862097,
105
+ "eval_runtime": 1136.2592,
106
+ "eval_samples_per_second": 53.781,
107
+ "eval_steps_per_second": 1.681,
108
+ "step": 800
109
+ },
110
+ {
111
+ "epoch": 1.91,
112
+ "learning_rate": 6.3e-07,
113
+ "loss": 0.5208,
114
+ "step": 900
115
+ },
116
+ {
117
+ "epoch": 2.13,
118
+ "learning_rate": 7e-07,
119
+ "loss": 0.4706,
120
+ "step": 1000
121
+ },
122
+ {
123
+ "epoch": 2.13,
124
+ "eval_accuracy": 0.8339688098316124,
125
+ "eval_f1": 0.6871318497564141,
126
+ "eval_loss": 0.4425855875015259,
127
+ "eval_matthews_correlation": 0.6440133138789277,
128
+ "eval_precision": 0.8172733311232007,
129
+ "eval_recall": 0.6865053904162712,
130
+ "eval_runtime": 1131.2502,
131
+ "eval_samples_per_second": 54.019,
132
+ "eval_steps_per_second": 1.688,
133
+ "step": 1000
134
+ },
135
+ {
136
+ "epoch": 2.34,
137
+ "learning_rate": 6.975928473177441e-07,
138
+ "loss": 0.4247,
139
+ "step": 1100
140
+ },
141
+ {
142
+ "epoch": 2.55,
143
+ "learning_rate": 6.951856946354883e-07,
144
+ "loss": 0.3902,
145
+ "step": 1200
146
+ },
147
+ {
148
+ "epoch": 2.55,
149
+ "eval_accuracy": 0.8571732478031059,
150
+ "eval_f1": 0.7517270150716192,
151
+ "eval_loss": 0.3730458915233612,
152
+ "eval_matthews_correlation": 0.6959063906454223,
153
+ "eval_precision": 0.8447385961080253,
154
+ "eval_recall": 0.7349271721409717,
155
+ "eval_runtime": 1143.842,
156
+ "eval_samples_per_second": 53.424,
157
+ "eval_steps_per_second": 1.67,
158
+ "step": 1200
159
+ },
160
+ {
161
+ "epoch": 2.77,
162
+ "learning_rate": 6.927785419532324e-07,
163
+ "loss": 0.3604,
164
+ "step": 1300
165
+ },
166
+ {
167
+ "epoch": 2.98,
168
+ "learning_rate": 6.903713892709766e-07,
169
+ "loss": 0.3392,
170
+ "step": 1400
171
+ },
172
+ {
173
+ "epoch": 2.98,
174
+ "eval_accuracy": 0.8786430803973229,
175
+ "eval_f1": 0.8069413230330182,
176
+ "eval_loss": 0.32644152641296387,
177
+ "eval_matthews_correlation": 0.7428749706793645,
178
+ "eval_precision": 0.8632726130534206,
179
+ "eval_recall": 0.7855049543193654,
180
+ "eval_runtime": 1136.2051,
181
+ "eval_samples_per_second": 53.783,
182
+ "eval_steps_per_second": 1.681,
183
+ "step": 1400
184
+ },
185
+ {
186
+ "epoch": 3.19,
187
+ "learning_rate": 6.879642365887207e-07,
188
+ "loss": 0.3188,
189
+ "step": 1500
190
+ },
191
+ {
192
+ "epoch": 3.4,
193
+ "learning_rate": 6.855570839064649e-07,
194
+ "loss": 0.3044,
195
+ "step": 1600
196
+ },
197
+ {
198
+ "epoch": 3.4,
199
+ "eval_accuracy": 0.8894925461061383,
200
+ "eval_f1": 0.8329212820595306,
201
+ "eval_loss": 0.2972641587257385,
202
+ "eval_matthews_correlation": 0.7664388748302527,
203
+ "eval_precision": 0.8723203701164862,
204
+ "eval_recall": 0.8120687720492296,
205
+ "eval_runtime": 1138.2797,
206
+ "eval_samples_per_second": 53.685,
207
+ "eval_steps_per_second": 1.678,
208
+ "step": 1600
209
+ },
210
+ {
211
+ "epoch": 3.62,
212
+ "learning_rate": 6.83149931224209e-07,
213
+ "loss": 0.2943,
214
+ "step": 1700
215
+ },
216
+ {
217
+ "epoch": 3.83,
218
+ "learning_rate": 6.807427785419532e-07,
219
+ "loss": 0.2795,
220
+ "step": 1800
221
+ },
222
+ {
223
+ "epoch": 3.83,
224
+ "eval_accuracy": 0.8961691403884862,
225
+ "eval_f1": 0.8432760143718706,
226
+ "eval_loss": 0.2766903340816498,
227
+ "eval_matthews_correlation": 0.781173318686851,
228
+ "eval_precision": 0.8834852690618357,
229
+ "eval_recall": 0.8221927104856026,
230
+ "eval_runtime": 1109.6741,
231
+ "eval_samples_per_second": 55.069,
232
+ "eval_steps_per_second": 1.721,
233
+ "step": 1800
234
+ },
235
+ {
236
+ "epoch": 4.04,
237
+ "learning_rate": 6.783356258596973e-07,
238
+ "loss": 0.2738,
239
+ "step": 1900
240
+ },
241
+ {
242
+ "epoch": 4.26,
243
+ "learning_rate": 6.759284731774416e-07,
244
+ "loss": 0.262,
245
+ "step": 2000
246
+ },
247
+ {
248
+ "epoch": 4.26,
249
+ "eval_accuracy": 0.9019784319821957,
250
+ "eval_f1": 0.8550525638971442,
251
+ "eval_loss": 0.26178261637687683,
252
+ "eval_matthews_correlation": 0.7937491549035616,
253
+ "eval_precision": 0.8893566012052573,
254
+ "eval_recall": 0.8349244764526037,
255
+ "eval_runtime": 1138.053,
256
+ "eval_samples_per_second": 53.696,
257
+ "eval_steps_per_second": 1.678,
258
+ "step": 2000
259
+ },
260
+ {
261
+ "epoch": 4.47,
262
+ "learning_rate": 6.735213204951857e-07,
263
+ "loss": 0.2567,
264
+ "step": 2100
265
+ },
266
+ {
267
+ "epoch": 4.68,
268
+ "learning_rate": 6.711141678129298e-07,
269
+ "loss": 0.2497,
270
+ "step": 2200
271
+ },
272
+ {
273
+ "epoch": 4.68,
274
+ "eval_accuracy": 0.9061185750053183,
275
+ "eval_f1": 0.8647877107942263,
276
+ "eval_loss": 0.25028473138809204,
277
+ "eval_matthews_correlation": 0.8031001323768835,
278
+ "eval_precision": 0.888845641786976,
279
+ "eval_recall": 0.8490665078134735,
280
+ "eval_runtime": 1128.8955,
281
+ "eval_samples_per_second": 54.132,
282
+ "eval_steps_per_second": 1.692,
283
+ "step": 2200
284
+ },
285
+ {
286
+ "epoch": 4.89,
287
+ "learning_rate": 6.68707015130674e-07,
288
+ "loss": 0.2444,
289
+ "step": 2300
290
+ },
291
+ {
292
+ "epoch": 5.11,
293
+ "learning_rate": 6.662998624484181e-07,
294
+ "loss": 0.2414,
295
+ "step": 2400
296
+ },
297
+ {
298
+ "epoch": 5.11,
299
+ "eval_accuracy": 0.908589569457854,
300
+ "eval_f1": 0.8705010060852946,
301
+ "eval_loss": 0.24158228933811188,
302
+ "eval_matthews_correlation": 0.808821805927799,
303
+ "eval_precision": 0.888714743861985,
304
+ "eval_recall": 0.8577216170630284,
305
+ "eval_runtime": 1121.8959,
306
+ "eval_samples_per_second": 54.469,
307
+ "eval_steps_per_second": 1.702,
308
+ "step": 2400
309
+ },
310
+ {
311
+ "epoch": 5.32,
312
+ "learning_rate": 6.638927097661623e-07,
313
+ "loss": 0.2321,
314
+ "step": 2500
315
+ },
316
+ {
317
+ "epoch": 5.53,
318
+ "learning_rate": 6.614855570839064e-07,
319
+ "loss": 0.2356,
320
+ "step": 2600
321
+ },
322
+ {
323
+ "epoch": 5.53,
324
+ "eval_accuracy": 0.9110441997087172,
325
+ "eval_f1": 0.8747151973222319,
326
+ "eval_loss": 0.23398438096046448,
327
+ "eval_matthews_correlation": 0.8139518481821442,
328
+ "eval_precision": 0.8928668344520597,
329
+ "eval_recall": 0.861416198724987,
330
+ "eval_runtime": 1133.6213,
331
+ "eval_samples_per_second": 53.906,
332
+ "eval_steps_per_second": 1.685,
333
+ "step": 2600
334
+ },
335
+ {
336
+ "epoch": 5.74,
337
+ "learning_rate": 6.590784044016506e-07,
338
+ "loss": 0.226,
339
+ "step": 2700
340
+ },
341
+ {
342
+ "epoch": 5.96,
343
+ "learning_rate": 6.566712517193947e-07,
344
+ "loss": 0.2251,
345
+ "step": 2800
346
+ },
347
+ {
348
+ "epoch": 5.96,
349
+ "eval_accuracy": 0.9136297435729598,
350
+ "eval_f1": 0.8786518904171384,
351
+ "eval_loss": 0.22776289284229279,
352
+ "eval_matthews_correlation": 0.8196091308971525,
353
+ "eval_precision": 0.8955149466291931,
354
+ "eval_recall": 0.8663159550061658,
355
+ "eval_runtime": 1128.8199,
356
+ "eval_samples_per_second": 54.135,
357
+ "eval_steps_per_second": 1.692,
358
+ "step": 2800
359
+ },
360
+ {
361
+ "epoch": 6.17,
362
+ "learning_rate": 6.542640990371389e-07,
363
+ "loss": 0.2216,
364
+ "step": 2900
365
+ },
366
+ {
367
+ "epoch": 6.38,
368
+ "learning_rate": 6.51856946354883e-07,
369
+ "loss": 0.217,
370
+ "step": 3000
371
+ },
372
+ {
373
+ "epoch": 6.38,
374
+ "eval_accuracy": 0.915577083571978,
375
+ "eval_f1": 0.8816198142871025,
376
+ "eval_loss": 0.22265483438968658,
377
+ "eval_matthews_correlation": 0.8236394377439027,
378
+ "eval_precision": 0.8990606065527942,
379
+ "eval_recall": 0.8686464677513945,
380
+ "eval_runtime": 1127.8916,
381
+ "eval_samples_per_second": 54.18,
382
+ "eval_steps_per_second": 1.693,
383
+ "step": 3000
384
+ },
385
+ {
386
+ "epoch": 6.6,
387
+ "learning_rate": 6.494497936726272e-07,
388
+ "loss": 0.2175,
389
+ "step": 3100
390
+ },
391
+ {
392
+ "epoch": 6.81,
393
+ "learning_rate": 6.470426409903714e-07,
394
+ "loss": 0.2133,
395
+ "step": 3200
396
+ },
397
+ {
398
+ "epoch": 6.81,
399
+ "eval_accuracy": 0.9171153185291856,
400
+ "eval_f1": 0.8827723003585022,
401
+ "eval_loss": 0.21786576509475708,
402
+ "eval_matthews_correlation": 0.8266852106519758,
403
+ "eval_precision": 0.9034666000752295,
404
+ "eval_recall": 0.8679497941871164,
405
+ "eval_runtime": 1129.6487,
406
+ "eval_samples_per_second": 54.096,
407
+ "eval_steps_per_second": 1.691,
408
+ "step": 3200
409
+ },
410
+ {
411
+ "epoch": 7.02,
412
+ "learning_rate": 6.446354883081155e-07,
413
+ "loss": 0.2117,
414
+ "step": 3300
415
+ },
416
+ {
417
+ "epoch": 7.23,
418
+ "learning_rate": 6.422283356258597e-07,
419
+ "loss": 0.2098,
420
+ "step": 3400
421
+ },
422
+ {
423
+ "epoch": 7.23,
424
+ "eval_accuracy": 0.9182935410495999,
425
+ "eval_f1": 0.884903972857821,
426
+ "eval_loss": 0.21342970430850983,
427
+ "eval_matthews_correlation": 0.8292988605784972,
428
+ "eval_precision": 0.9040958524512837,
429
+ "eval_recall": 0.870991653583178,
430
+ "eval_runtime": 1152.1924,
431
+ "eval_samples_per_second": 53.037,
432
+ "eval_steps_per_second": 1.658,
433
+ "step": 3400
434
+ },
435
+ {
436
+ "epoch": 7.45,
437
+ "learning_rate": 6.398211829436038e-07,
438
+ "loss": 0.2021,
439
+ "step": 3500
440
+ },
441
+ {
442
+ "epoch": 7.66,
443
+ "learning_rate": 6.37414030261348e-07,
444
+ "loss": 0.2062,
445
+ "step": 3600
446
+ },
447
+ {
448
+ "epoch": 7.66,
449
+ "eval_accuracy": 0.9201754242419283,
450
+ "eval_f1": 0.8875922877918782,
451
+ "eval_loss": 0.20977966487407684,
452
+ "eval_matthews_correlation": 0.833123364179002,
453
+ "eval_precision": 0.9084794166339746,
454
+ "eval_recall": 0.872252852044762,
455
+ "eval_runtime": 1135.8312,
456
+ "eval_samples_per_second": 53.801,
457
+ "eval_steps_per_second": 1.682,
458
+ "step": 3600
459
+ },
460
+ {
461
+ "epoch": 7.87,
462
+ "learning_rate": 6.350068775790921e-07,
463
+ "loss": 0.2033,
464
+ "step": 3700
465
+ },
466
+ {
467
+ "epoch": 8.09,
468
+ "learning_rate": 6.325997248968363e-07,
469
+ "loss": 0.1982,
470
+ "step": 3800
471
+ },
472
+ {
473
+ "epoch": 8.09,
474
+ "eval_accuracy": 0.9213863751656876,
475
+ "eval_f1": 0.88930772340783,
476
+ "eval_loss": 0.20641230046749115,
477
+ "eval_matthews_correlation": 0.8357746010338397,
478
+ "eval_precision": 0.9095846117319936,
479
+ "eval_recall": 0.8745329047658825,
480
+ "eval_runtime": 1143.3313,
481
+ "eval_samples_per_second": 53.448,
482
+ "eval_steps_per_second": 1.671,
483
+ "step": 3800
484
+ },
485
+ {
486
+ "epoch": 8.3,
487
+ "learning_rate": 6.301925722145805e-07,
488
+ "loss": 0.196,
489
+ "step": 3900
490
+ },
491
+ {
492
+ "epoch": 8.51,
493
+ "learning_rate": 6.277854195323246e-07,
494
+ "loss": 0.1983,
495
+ "step": 4000
496
+ },
497
+ {
498
+ "epoch": 8.51,
499
+ "eval_accuracy": 0.9222700420559983,
500
+ "eval_f1": 0.8900453287552592,
501
+ "eval_loss": 0.20386044681072235,
502
+ "eval_matthews_correlation": 0.837555384879415,
503
+ "eval_precision": 0.9123527733555781,
504
+ "eval_recall": 0.8740593137023058,
505
+ "eval_runtime": 1129.2304,
506
+ "eval_samples_per_second": 54.116,
507
+ "eval_steps_per_second": 1.691,
508
+ "step": 4000
509
+ },
510
+ {
511
+ "epoch": 8.72,
512
+ "learning_rate": 6.253782668500687e-07,
513
+ "loss": 0.1965,
514
+ "step": 4100
515
+ },
516
+ {
517
+ "epoch": 8.94,
518
+ "learning_rate": 6.229711141678129e-07,
519
+ "loss": 0.1936,
520
+ "step": 4200
521
+ },
522
+ {
523
+ "epoch": 8.94,
524
+ "eval_accuracy": 0.9233828077697229,
525
+ "eval_f1": 0.8932485448536257,
526
+ "eval_loss": 0.20058760046958923,
527
+ "eval_matthews_correlation": 0.8401209500114148,
528
+ "eval_precision": 0.9107366418558472,
529
+ "eval_recall": 0.8798434624360084,
530
+ "eval_runtime": 1120.0565,
531
+ "eval_samples_per_second": 54.559,
532
+ "eval_steps_per_second": 1.705,
533
+ "step": 4200
534
+ },
535
+ {
536
+ "epoch": 9.15,
537
+ "learning_rate": 6.205880330123796e-07,
538
+ "loss": 0.1922,
539
+ "step": 4300
540
+ },
541
+ {
542
+ "epoch": 9.36,
543
+ "learning_rate": 6.181808803301237e-07,
544
+ "loss": 0.1953,
545
+ "step": 4400
546
+ },
547
+ {
548
+ "epoch": 9.36,
549
+ "eval_accuracy": 0.9238246412148783,
550
+ "eval_f1": 0.8951433768565843,
551
+ "eval_loss": 0.19883869588375092,
552
+ "eval_matthews_correlation": 0.8415676876590389,
553
+ "eval_precision": 0.9072146230906633,
554
+ "eval_recall": 0.885557435823166,
555
+ "eval_runtime": 1137.9471,
556
+ "eval_samples_per_second": 53.701,
557
+ "eval_steps_per_second": 1.678,
558
+ "step": 4400
559
+ },
560
+ {
561
+ "epoch": 9.57,
562
+ "learning_rate": 6.15773727647868e-07,
563
+ "loss": 0.1843,
564
+ "step": 4500
565
+ },
566
+ {
567
+ "epoch": 9.79,
568
+ "learning_rate": 6.133665749656121e-07,
569
+ "loss": 0.1852,
570
+ "step": 4600
571
+ },
572
+ {
573
+ "epoch": 9.79,
574
+ "eval_accuracy": 0.9253792403737584,
575
+ "eval_f1": 0.8946482066160663,
576
+ "eval_loss": 0.19628000259399414,
577
+ "eval_matthews_correlation": 0.8441339816426117,
578
+ "eval_precision": 0.9173420280370865,
579
+ "eval_recall": 0.8782309033246154,
580
+ "eval_runtime": 1124.1932,
581
+ "eval_samples_per_second": 54.358,
582
+ "eval_steps_per_second": 1.699,
583
+ "step": 4600
584
+ },
585
+ {
586
+ "epoch": 10.0,
587
+ "learning_rate": 6.109594222833563e-07,
588
+ "loss": 0.1845,
589
+ "step": 4700
590
+ },
591
+ {
592
+ "epoch": 10.21,
593
+ "learning_rate": 6.085522696011004e-07,
594
+ "loss": 0.1839,
595
+ "step": 4800
596
+ },
597
+ {
598
+ "epoch": 10.21,
599
+ "eval_accuracy": 0.9258374380205862,
600
+ "eval_f1": 0.8972498157769592,
601
+ "eval_loss": 0.1933354139328003,
602
+ "eval_matthews_correlation": 0.8454266170310101,
603
+ "eval_precision": 0.9128401019975159,
604
+ "eval_recall": 0.8849382250200719,
605
+ "eval_runtime": 1122.9259,
606
+ "eval_samples_per_second": 54.419,
607
+ "eval_steps_per_second": 1.701,
608
+ "step": 4800
609
+ },
610
+ {
611
+ "epoch": 10.43,
612
+ "learning_rate": 6.061451169188445e-07,
613
+ "loss": 0.1845,
614
+ "step": 4900
615
+ },
616
+ {
617
+ "epoch": 10.64,
618
+ "learning_rate": 6.037379642365887e-07,
619
+ "loss": 0.1791,
620
+ "step": 5000
621
+ },
622
+ {
623
+ "epoch": 10.64,
624
+ "eval_accuracy": 0.9267047407092245,
625
+ "eval_f1": 0.8983700490176202,
626
+ "eval_loss": 0.19171090424060822,
627
+ "eval_matthews_correlation": 0.8472176589659597,
628
+ "eval_precision": 0.9145474859208962,
629
+ "eval_recall": 0.8856752741366947,
630
+ "eval_runtime": 1134.0907,
631
+ "eval_samples_per_second": 53.884,
632
+ "eval_steps_per_second": 1.684,
633
+ "step": 5000
634
+ },
635
+ {
636
+ "epoch": 10.85,
637
+ "learning_rate": 6.013308115543328e-07,
638
+ "loss": 0.182,
639
+ "step": 5100
640
+ },
641
+ {
642
+ "epoch": 11.06,
643
+ "learning_rate": 5.98923658872077e-07,
644
+ "loss": 0.18,
645
+ "step": 5200
646
+ },
647
+ {
648
+ "epoch": 11.06,
649
+ "eval_accuracy": 0.9271138457510351,
650
+ "eval_f1": 0.8995050329011628,
651
+ "eval_loss": 0.1895856410264969,
652
+ "eval_matthews_correlation": 0.848294574832317,
653
+ "eval_precision": 0.9131718148389393,
654
+ "eval_recall": 0.8885987433408609,
655
+ "eval_runtime": 1126.9856,
656
+ "eval_samples_per_second": 54.223,
657
+ "eval_steps_per_second": 1.695,
658
+ "step": 5200
659
+ },
660
+ {
661
+ "epoch": 11.28,
662
+ "learning_rate": 5.965165061898212e-07,
663
+ "loss": 0.1763,
664
+ "step": 5300
665
+ },
666
+ {
667
+ "epoch": 11.49,
668
+ "learning_rate": 5.941093535075653e-07,
669
+ "loss": 0.1782,
670
+ "step": 5400
671
+ },
672
+ {
673
+ "epoch": 11.49,
674
+ "eval_accuracy": 0.9275556791961904,
675
+ "eval_f1": 0.9002696190246419,
676
+ "eval_loss": 0.18787473440170288,
677
+ "eval_matthews_correlation": 0.84921482512736,
678
+ "eval_precision": 0.9139059903894043,
679
+ "eval_recall": 0.88930836639531,
680
+ "eval_runtime": 1128.4943,
681
+ "eval_samples_per_second": 54.151,
682
+ "eval_steps_per_second": 1.693,
683
+ "step": 5400
684
+ },
685
+ {
686
+ "epoch": 11.7,
687
+ "learning_rate": 5.917022008253094e-07,
688
+ "loss": 0.1788,
689
+ "step": 5500
690
+ },
691
+ {
692
+ "epoch": 11.91,
693
+ "learning_rate": 5.892950481430536e-07,
694
+ "loss": 0.1712,
695
+ "step": 5600
696
+ },
697
+ {
698
+ "epoch": 11.91,
699
+ "eval_accuracy": 0.9287175375149324,
700
+ "eval_f1": 0.9011964058613507,
701
+ "eval_loss": 0.1861330270767212,
702
+ "eval_matthews_correlation": 0.8514678062818634,
703
+ "eval_precision": 0.9173976756930257,
704
+ "eval_recall": 0.8884787278098573,
705
+ "eval_runtime": 1120.0822,
706
+ "eval_samples_per_second": 54.558,
707
+ "eval_steps_per_second": 1.705,
708
+ "step": 5600
709
+ },
710
+ {
711
+ "epoch": 12.13,
712
+ "learning_rate": 5.868878954607978e-07,
713
+ "loss": 0.172,
714
+ "step": 5700
715
+ },
716
+ {
717
+ "epoch": 12.34,
718
+ "learning_rate": 5.84480742778542e-07,
719
+ "loss": 0.1737,
720
+ "step": 5800
721
+ },
722
+ {
723
+ "epoch": 12.34,
724
+ "eval_accuracy": 0.9296993896152776,
725
+ "eval_f1": 0.9010626619953944,
726
+ "eval_loss": 0.18573108315467834,
727
+ "eval_matthews_correlation": 0.8533347114503049,
728
+ "eval_precision": 0.9231173958750271,
729
+ "eval_recall": 0.8849159091550628,
730
+ "eval_runtime": 1156.0243,
731
+ "eval_samples_per_second": 52.861,
732
+ "eval_steps_per_second": 1.652,
733
+ "step": 5800
734
+ },
735
+ {
736
+ "epoch": 12.55,
737
+ "learning_rate": 5.820735900962861e-07,
738
+ "loss": 0.1726,
739
+ "step": 5900
740
+ },
741
+ {
742
+ "epoch": 12.77,
743
+ "learning_rate": 5.796664374140302e-07,
744
+ "loss": 0.1696,
745
+ "step": 6000
746
+ },
747
+ {
748
+ "epoch": 12.77,
749
+ "eval_accuracy": 0.930026673648726,
750
+ "eval_f1": 0.9031918377265686,
751
+ "eval_loss": 0.1828816682100296,
752
+ "eval_matthews_correlation": 0.8542309647721633,
753
+ "eval_precision": 0.9192960205580971,
754
+ "eval_recall": 0.8904958440603054,
755
+ "eval_runtime": 1124.8361,
756
+ "eval_samples_per_second": 54.327,
757
+ "eval_steps_per_second": 1.698,
758
+ "step": 6000
759
+ },
760
+ {
761
+ "epoch": 12.98,
762
+ "learning_rate": 5.772592847317744e-07,
763
+ "loss": 0.1682,
764
+ "step": 6100
765
+ },
766
+ {
767
+ "epoch": 13.19,
768
+ "learning_rate": 5.748521320495185e-07,
769
+ "loss": 0.1698,
770
+ "step": 6200
771
+ },
772
+ {
773
+ "epoch": 13.19,
774
+ "eval_accuracy": 0.9302885008754848,
775
+ "eval_f1": 0.9048186507788922,
776
+ "eval_loss": 0.18205700814723969,
777
+ "eval_matthews_correlation": 0.8551184049776112,
778
+ "eval_precision": 0.9164794554225942,
779
+ "eval_recall": 0.8951361110635792,
780
+ "eval_runtime": 1130.5739,
781
+ "eval_samples_per_second": 54.051,
782
+ "eval_steps_per_second": 1.689,
783
+ "step": 6200
784
+ },
785
+ {
786
+ "epoch": 13.4,
787
+ "learning_rate": 5.724449793672627e-07,
788
+ "loss": 0.1682,
789
+ "step": 6300
790
+ },
791
+ {
792
+ "epoch": 13.62,
793
+ "learning_rate": 5.700378266850069e-07,
794
+ "loss": 0.1667,
795
+ "step": 6400
796
+ },
797
+ {
798
+ "epoch": 13.62,
799
+ "eval_accuracy": 0.9303048650771573,
800
+ "eval_f1": 0.9056480109419961,
801
+ "eval_loss": 0.18090908229351044,
802
+ "eval_matthews_correlation": 0.855428359187355,
803
+ "eval_precision": 0.9147426706946558,
804
+ "eval_recall": 0.8978667208863417,
805
+ "eval_runtime": 1113.9498,
806
+ "eval_samples_per_second": 54.858,
807
+ "eval_steps_per_second": 1.715,
808
+ "step": 6400
809
+ },
810
+ {
811
+ "epoch": 13.83,
812
+ "learning_rate": 5.67630674002751e-07,
813
+ "loss": 0.1673,
814
+ "step": 6500
815
+ },
816
+ {
817
+ "epoch": 14.04,
818
+ "learning_rate": 5.652235213204951e-07,
819
+ "loss": 0.1651,
820
+ "step": 6600
821
+ },
822
+ {
823
+ "epoch": 14.04,
824
+ "eval_accuracy": 0.9307794269256574,
825
+ "eval_f1": 0.90568642496187,
826
+ "eval_loss": 0.17903146147727966,
827
+ "eval_matthews_correlation": 0.8562905390552094,
828
+ "eval_precision": 0.9160847482436384,
829
+ "eval_recall": 0.8970366589757913,
830
+ "eval_runtime": 1123.7851,
831
+ "eval_samples_per_second": 54.378,
832
+ "eval_steps_per_second": 1.7,
833
+ "step": 6600
834
+ },
835
+ {
836
+ "epoch": 14.26,
837
+ "learning_rate": 5.628163686382393e-07,
838
+ "loss": 0.1638,
839
+ "step": 6700
840
+ },
841
+ {
842
+ "epoch": 14.47,
843
+ "learning_rate": 5.604092159559834e-07,
844
+ "loss": 0.1614,
845
+ "step": 6800
846
+ },
847
+ {
848
+ "epoch": 14.47,
849
+ "eval_accuracy": 0.9319412852443993,
850
+ "eval_f1": 0.9062846542418327,
851
+ "eval_loss": 0.17761486768722534,
852
+ "eval_matthews_correlation": 0.8583632583058451,
853
+ "eval_precision": 0.9210761772993599,
854
+ "eval_recall": 0.8945216174285454,
855
+ "eval_runtime": 1135.9567,
856
+ "eval_samples_per_second": 53.795,
857
+ "eval_steps_per_second": 1.681,
858
+ "step": 6800
859
+ },
860
+ {
861
+ "epoch": 14.68,
862
+ "learning_rate": 5.580261348005502e-07,
863
+ "loss": 0.1629,
864
+ "step": 6900
865
+ },
866
+ {
867
+ "epoch": 14.89,
868
+ "learning_rate": 5.556189821182944e-07,
869
+ "loss": 0.16,
870
+ "step": 7000
871
+ },
872
+ {
873
+ "epoch": 14.89,
874
+ "eval_accuracy": 0.9323667544878823,
875
+ "eval_f1": 0.9057483267194343,
876
+ "eval_loss": 0.177241712808609,
877
+ "eval_matthews_correlation": 0.8590673404186833,
878
+ "eval_precision": 0.9249219537323391,
879
+ "eval_recall": 0.8912560253961389,
880
+ "eval_runtime": 1147.9689,
881
+ "eval_samples_per_second": 53.232,
882
+ "eval_steps_per_second": 1.664,
883
+ "step": 7000
884
+ },
885
+ {
886
+ "epoch": 15.11,
887
+ "learning_rate": 5.532118294360385e-07,
888
+ "loss": 0.1622,
889
+ "step": 7100
890
+ },
891
+ {
892
+ "epoch": 15.32,
893
+ "learning_rate": 5.508046767537827e-07,
894
+ "loss": 0.1591,
895
+ "step": 7200
896
+ },
897
+ {
898
+ "epoch": 15.32,
899
+ "eval_accuracy": 0.9334467917982622,
900
+ "eval_f1": 0.90852409435722,
901
+ "eval_loss": 0.17550112307071686,
902
+ "eval_matthews_correlation": 0.8615051567396046,
903
+ "eval_precision": 0.9232173166653612,
904
+ "eval_recall": 0.8966351633696954,
905
+ "eval_runtime": 1144.6322,
906
+ "eval_samples_per_second": 53.387,
907
+ "eval_steps_per_second": 1.669,
908
+ "step": 7200
909
+ },
910
+ {
911
+ "epoch": 15.53,
912
+ "learning_rate": 5.483975240715268e-07,
913
+ "loss": 0.16,
914
+ "step": 7300
915
+ },
916
+ {
917
+ "epoch": 15.74,
918
+ "learning_rate": 5.45990371389271e-07,
919
+ "loss": 0.1601,
920
+ "step": 7400
921
+ },
922
+ {
923
+ "epoch": 15.74,
924
+ "eval_accuracy": 0.9333322423865552,
925
+ "eval_f1": 0.9088120224095171,
926
+ "eval_loss": 0.17453713715076447,
927
+ "eval_matthews_correlation": 0.8613787118816909,
928
+ "eval_precision": 0.9218305477922321,
929
+ "eval_recall": 0.8980828107442319,
930
+ "eval_runtime": 1143.4792,
931
+ "eval_samples_per_second": 53.441,
932
+ "eval_steps_per_second": 1.67,
933
+ "step": 7400
934
+ },
935
+ {
936
+ "epoch": 15.96,
937
+ "learning_rate": 5.435832187070151e-07,
938
+ "loss": 0.1584,
939
+ "step": 7500
940
+ },
941
+ {
942
+ "epoch": 16.17,
943
+ "learning_rate": 5.411760660247592e-07,
944
+ "loss": 0.1558,
945
+ "step": 7600
946
+ },
947
+ {
948
+ "epoch": 16.17,
949
+ "eval_accuracy": 0.9338395326384002,
950
+ "eval_f1": 0.9096298761981343,
951
+ "eval_loss": 0.17321471869945526,
952
+ "eval_matthews_correlation": 0.8625748894777745,
953
+ "eval_precision": 0.9213568402254939,
954
+ "eval_recall": 0.8999913944640027,
955
+ "eval_runtime": 1116.2047,
956
+ "eval_samples_per_second": 54.747,
957
+ "eval_steps_per_second": 1.711,
958
+ "step": 7600
959
+ },
960
+ {
961
+ "epoch": 16.38,
962
+ "learning_rate": 5.387689133425034e-07,
963
+ "loss": 0.1544,
964
+ "step": 7700
965
+ },
966
+ {
967
+ "epoch": 16.6,
968
+ "learning_rate": 5.363617606602475e-07,
969
+ "loss": 0.1552,
970
+ "step": 7800
971
+ },
972
+ {
973
+ "epoch": 16.6,
974
+ "eval_accuracy": 0.9334631559999346,
975
+ "eval_f1": 0.9101663960284817,
976
+ "eval_loss": 0.17314866185188293,
977
+ "eval_matthews_correlation": 0.8621017254091837,
978
+ "eval_precision": 0.9185625631174649,
979
+ "eval_recall": 0.9029180840503908,
980
+ "eval_runtime": 1134.8054,
981
+ "eval_samples_per_second": 53.85,
982
+ "eval_steps_per_second": 1.683,
983
+ "step": 7800
984
+ },
985
+ {
986
+ "epoch": 16.81,
987
+ "learning_rate": 5.339546079779917e-07,
988
+ "loss": 0.1571,
989
+ "step": 7900
990
+ },
991
+ {
992
+ "epoch": 17.02,
993
+ "learning_rate": 5.315474552957359e-07,
994
+ "loss": 0.1526,
995
+ "step": 8000
996
+ },
997
+ {
998
+ "epoch": 17.02,
999
+ "eval_accuracy": 0.9340849956634866,
1000
+ "eval_f1": 0.9110344713781423,
1001
+ "eval_loss": 0.17203205823898315,
1002
+ "eval_matthews_correlation": 0.8634918058087261,
1003
+ "eval_precision": 0.9187082054219985,
1004
+ "eval_recall": 0.9044161914490357,
1005
+ "eval_runtime": 1153.6876,
1006
+ "eval_samples_per_second": 52.968,
1007
+ "eval_steps_per_second": 1.656,
1008
+ "step": 8000
1009
+ },
1010
+ {
1011
+ "epoch": 17.23,
1012
+ "learning_rate": 5.2914030261348e-07,
1013
+ "loss": 0.1485,
1014
+ "step": 8100
1015
+ },
1016
+ {
1017
+ "epoch": 17.45,
1018
+ "learning_rate": 5.267331499312242e-07,
1019
+ "loss": 0.1532,
1020
+ "step": 8200
1021
+ },
1022
+ {
1023
+ "epoch": 17.45,
1024
+ "eval_accuracy": 0.9346741069236937,
1025
+ "eval_f1": 0.9117218687868994,
1026
+ "eval_loss": 0.17076529562473297,
1027
+ "eval_matthews_correlation": 0.8646272156128906,
1028
+ "eval_precision": 0.9201726149656478,
1029
+ "eval_recall": 0.9044721170341236,
1030
+ "eval_runtime": 1135.1604,
1031
+ "eval_samples_per_second": 53.833,
1032
+ "eval_steps_per_second": 1.683,
1033
+ "step": 8200
1034
+ },
1035
+ {
1036
+ "epoch": 17.66,
1037
+ "learning_rate": 5.243259972489684e-07,
1038
+ "loss": 0.1551,
1039
+ "step": 8300
1040
+ },
1041
+ {
1042
+ "epoch": 17.87,
1043
+ "learning_rate": 5.219188445667125e-07,
1044
+ "loss": 0.1539,
1045
+ "step": 8400
1046
+ },
1047
+ {
1048
+ "epoch": 17.87,
1049
+ "eval_accuracy": 0.9351159403688492,
1050
+ "eval_f1": 0.911610683009681,
1051
+ "eval_loss": 0.1695307195186615,
1052
+ "eval_matthews_correlation": 0.865289074877206,
1053
+ "eval_precision": 0.9226978459827032,
1054
+ "eval_recall": 0.9023554489680277,
1055
+ "eval_runtime": 1122.959,
1056
+ "eval_samples_per_second": 54.418,
1057
+ "eval_steps_per_second": 1.701,
1058
+ "step": 8400
1059
+ },
1060
+ {
1061
+ "epoch": 18.09,
1062
+ "learning_rate": 5.195116918844567e-07,
1063
+ "loss": 0.1477,
1064
+ "step": 8500
1065
+ },
1066
+ {
1067
+ "epoch": 18.3,
1068
+ "learning_rate": 5.171045392022008e-07,
1069
+ "loss": 0.1483,
1070
+ "step": 8600
1071
+ },
1072
+ {
1073
+ "epoch": 18.3,
1074
+ "eval_accuracy": 0.9356723232257115,
1075
+ "eval_f1": 0.9116987398295661,
1076
+ "eval_loss": 0.16896899044513702,
1077
+ "eval_matthews_correlation": 0.8662695435076357,
1078
+ "eval_precision": 0.9255532080817271,
1079
+ "eval_recall": 0.9005543243181938,
1080
+ "eval_runtime": 1135.8593,
1081
+ "eval_samples_per_second": 53.8,
1082
+ "eval_steps_per_second": 1.682,
1083
+ "step": 8600
1084
+ },
1085
+ {
1086
+ "epoch": 18.51,
1087
+ "learning_rate": 5.146973865199449e-07,
1088
+ "loss": 0.1489,
1089
+ "step": 8700
1090
+ },
1091
+ {
1092
+ "epoch": 18.72,
1093
+ "learning_rate": 5.122902338376891e-07,
1094
+ "loss": 0.1469,
1095
+ "step": 8800
1096
+ },
1097
+ {
1098
+ "epoch": 18.72,
1099
+ "eval_accuracy": 0.9358032368390908,
1100
+ "eval_f1": 0.9135814294302561,
1101
+ "eval_loss": 0.16858318448066711,
1102
+ "eval_matthews_correlation": 0.8670812623876053,
1103
+ "eval_precision": 0.9210624088966272,
1104
+ "eval_recall": 0.907023142033475,
1105
+ "eval_runtime": 1122.9881,
1106
+ "eval_samples_per_second": 54.416,
1107
+ "eval_steps_per_second": 1.701,
1108
+ "step": 8800
1109
+ },
1110
+ {
1111
+ "epoch": 18.94,
1112
+ "learning_rate": 5.099071526822558e-07,
1113
+ "loss": 0.1507,
1114
+ "step": 8900
1115
+ },
1116
+ {
1117
+ "epoch": 19.15,
1118
+ "learning_rate": 5.075e-07,
1119
+ "loss": 0.1461,
1120
+ "step": 9000
1121
+ },
1122
+ {
1123
+ "epoch": 19.15,
1124
+ "eval_accuracy": 0.9359177862507978,
1125
+ "eval_f1": 0.9125886359052826,
1126
+ "eval_loss": 0.16734813153743744,
1127
+ "eval_matthews_correlation": 0.8668498402291797,
1128
+ "eval_precision": 0.9249962574783411,
1129
+ "eval_recall": 0.9022824765643582,
1130
+ "eval_runtime": 1124.669,
1131
+ "eval_samples_per_second": 54.335,
1132
+ "eval_steps_per_second": 1.698,
1133
+ "step": 9000
1134
+ },
1135
+ {
1136
+ "epoch": 19.36,
1137
+ "learning_rate": 5.050928473177441e-07,
1138
+ "loss": 0.1453,
1139
+ "step": 9100
1140
+ },
1141
+ {
1142
+ "epoch": 19.57,
1143
+ "learning_rate": 5.026856946354882e-07,
1144
+ "loss": 0.1487,
1145
+ "step": 9200
1146
+ },
1147
+ {
1148
+ "epoch": 19.57,
1149
+ "eval_accuracy": 0.936588718519367,
1150
+ "eval_f1": 0.9144201514784519,
1151
+ "eval_loss": 0.16664335131645203,
1152
+ "eval_matthews_correlation": 0.8685701549851123,
1153
+ "eval_precision": 0.9231788402692221,
1154
+ "eval_recall": 0.9068124874405709,
1155
+ "eval_runtime": 1132.0219,
1156
+ "eval_samples_per_second": 53.982,
1157
+ "eval_steps_per_second": 1.687,
1158
+ "step": 9200
1159
+ },
1160
+ {
1161
+ "epoch": 19.79,
1162
+ "learning_rate": 5.00302613480055e-07,
1163
+ "loss": 0.1499,
1164
+ "step": 9300
1165
+ },
1166
+ {
1167
+ "epoch": 20.0,
1168
+ "learning_rate": 4.978954607977991e-07,
1169
+ "loss": 0.1433,
1170
+ "step": 9400
1171
+ },
1172
+ {
1173
+ "epoch": 20.0,
1174
+ "eval_accuracy": 0.9369978235611776,
1175
+ "eval_f1": 0.91467275197079,
1176
+ "eval_loss": 0.16577279567718506,
1177
+ "eval_matthews_correlation": 0.8693728793586808,
1178
+ "eval_precision": 0.924099137774855,
1179
+ "eval_recall": 0.9066503727608389,
1180
+ "eval_runtime": 1144.9998,
1181
+ "eval_samples_per_second": 53.37,
1182
+ "eval_steps_per_second": 1.668,
1183
+ "step": 9400
1184
+ },
1185
+ {
1186
+ "epoch": 20.21,
1187
+ "learning_rate": 4.954883081155432e-07,
1188
+ "loss": 0.144,
1189
+ "step": 9500
1190
+ },
1191
+ {
1192
+ "epoch": 20.43,
1193
+ "learning_rate": 4.930811554332874e-07,
1194
+ "loss": 0.1437,
1195
+ "step": 9600
1196
+ },
1197
+ {
1198
+ "epoch": 20.43,
1199
+ "eval_accuracy": 0.9370796445695396,
1200
+ "eval_f1": 0.9143506341308217,
1201
+ "eval_loss": 0.1650729477405548,
1202
+ "eval_matthews_correlation": 0.8693576464491103,
1203
+ "eval_precision": 0.9258633996979286,
1204
+ "eval_recall": 0.9047199267971502,
1205
+ "eval_runtime": 1124.1861,
1206
+ "eval_samples_per_second": 54.358,
1207
+ "eval_steps_per_second": 1.699,
1208
+ "step": 9600
1209
+ },
1210
+ {
1211
+ "epoch": 20.64,
1212
+ "learning_rate": 4.906740027510316e-07,
1213
+ "loss": 0.1419,
1214
+ "step": 9700
1215
+ },
1216
+ {
1217
+ "epoch": 20.85,
1218
+ "learning_rate": 4.882668500687758e-07,
1219
+ "loss": 0.1434,
1220
+ "step": 9800
1221
+ },
1222
+ {
1223
+ "epoch": 20.85,
1224
+ "eval_accuracy": 0.9370796445695396,
1225
+ "eval_f1": 0.9154200976052094,
1226
+ "eval_loss": 0.1648997962474823,
1227
+ "eval_matthews_correlation": 0.8697098635796452,
1228
+ "eval_precision": 0.9230816943100145,
1229
+ "eval_recall": 0.9086611768001123,
1230
+ "eval_runtime": 1148.703,
1231
+ "eval_samples_per_second": 53.198,
1232
+ "eval_steps_per_second": 1.663,
1233
+ "step": 9800
1234
+ },
1235
+ {
1236
+ "epoch": 21.06,
1237
+ "learning_rate": 4.8585969738652e-07,
1238
+ "loss": 0.1439,
1239
+ "step": 9900
1240
+ },
1241
+ {
1242
+ "epoch": 21.28,
1243
+ "learning_rate": 4.834525447042641e-07,
1244
+ "loss": 0.1421,
1245
+ "step": 10000
1246
+ },
1247
+ {
1248
+ "epoch": 21.28,
1249
+ "eval_accuracy": 0.9374232928046605,
1250
+ "eval_f1": 0.9159962475642537,
1251
+ "eval_loss": 0.16433577239513397,
1252
+ "eval_matthews_correlation": 0.8705984477158324,
1253
+ "eval_precision": 0.9223208386659031,
1254
+ "eval_recall": 0.9104375438916276,
1255
+ "eval_runtime": 1138.7887,
1256
+ "eval_samples_per_second": 53.661,
1257
+ "eval_steps_per_second": 1.677,
1258
+ "step": 10000
1259
+ },
1260
+ {
1261
+ "epoch": 21.49,
1262
+ "learning_rate": 4.810453920220083e-07,
1263
+ "loss": 0.1386,
1264
+ "step": 10100
1265
+ },
1266
+ {
1267
+ "epoch": 21.7,
1268
+ "learning_rate": 4.786382393397524e-07,
1269
+ "loss": 0.1383,
1270
+ "step": 10200
1271
+ },
1272
+ {
1273
+ "epoch": 21.7,
1274
+ "eval_accuracy": 0.9377669410397814,
1275
+ "eval_f1": 0.9154593212298457,
1276
+ "eval_loss": 0.16332927346229553,
1277
+ "eval_matthews_correlation": 0.8708321918919477,
1278
+ "eval_precision": 0.9264808218886014,
1279
+ "eval_recall": 0.9061723895970233,
1280
+ "eval_runtime": 1130.5862,
1281
+ "eval_samples_per_second": 54.051,
1282
+ "eval_steps_per_second": 1.689,
1283
+ "step": 10200
1284
+ },
1285
+ {
1286
+ "epoch": 21.91,
1287
+ "learning_rate": 4.7623108665749656e-07,
1288
+ "loss": 0.144,
1289
+ "step": 10300
1290
+ },
1291
+ {
1292
+ "epoch": 22.13,
1293
+ "learning_rate": 4.7382393397524066e-07,
1294
+ "loss": 0.137,
1295
+ "step": 10400
1296
+ },
1297
+ {
1298
+ "epoch": 22.13,
1299
+ "eval_accuracy": 0.937325107594626,
1300
+ "eval_f1": 0.9155487613959368,
1301
+ "eval_loss": 0.1631198674440384,
1302
+ "eval_matthews_correlation": 0.8701584028013903,
1303
+ "eval_precision": 0.9238303074503852,
1304
+ "eval_recall": 0.9083409162157898,
1305
+ "eval_runtime": 1138.2164,
1306
+ "eval_samples_per_second": 53.688,
1307
+ "eval_steps_per_second": 1.678,
1308
+ "step": 10400
1309
+ },
1310
+ {
1311
+ "epoch": 22.34,
1312
+ "learning_rate": 4.714167812929848e-07,
1313
+ "loss": 0.1446,
1314
+ "step": 10500
1315
+ },
1316
+ {
1317
+ "epoch": 22.55,
1318
+ "learning_rate": 4.69009628610729e-07,
1319
+ "loss": 0.1387,
1320
+ "step": 10600
1321
+ },
1322
+ {
1323
+ "epoch": 22.55,
1324
+ "eval_accuracy": 0.9385851511234025,
1325
+ "eval_f1": 0.9166627207954582,
1326
+ "eval_loss": 0.1620563566684723,
1327
+ "eval_matthews_correlation": 0.8725817378443448,
1328
+ "eval_precision": 0.9271811452031979,
1329
+ "eval_recall": 0.9077409974100349,
1330
+ "eval_runtime": 1145.5718,
1331
+ "eval_samples_per_second": 53.344,
1332
+ "eval_steps_per_second": 1.667,
1333
+ "step": 10600
1334
+ },
1335
+ {
1336
+ "epoch": 22.77,
1337
+ "learning_rate": 4.6660247592847314e-07,
1338
+ "loss": 0.1367,
1339
+ "step": 10700
1340
+ },
1341
+ {
1342
+ "epoch": 22.98,
1343
+ "learning_rate": 4.641953232462173e-07,
1344
+ "loss": 0.1369,
1345
+ "step": 10800
1346
+ },
1347
+ {
1348
+ "epoch": 22.98,
1349
+ "eval_accuracy": 0.9383396880983161,
1350
+ "eval_f1": 0.9172324787033365,
1351
+ "eval_loss": 0.1618376225233078,
1352
+ "eval_matthews_correlation": 0.8723936452197988,
1353
+ "eval_precision": 0.9243962275737605,
1354
+ "eval_recall": 0.9109134783912377,
1355
+ "eval_runtime": 1141.2693,
1356
+ "eval_samples_per_second": 53.545,
1357
+ "eval_steps_per_second": 1.674,
1358
+ "step": 10800
1359
+ },
1360
+ {
1361
+ "epoch": 23.19,
1362
+ "learning_rate": 4.617881705639615e-07,
1363
+ "loss": 0.1348,
1364
+ "step": 10900
1365
+ },
1366
+ {
1367
+ "epoch": 23.4,
1368
+ "learning_rate": 4.5938101788170567e-07,
1369
+ "loss": 0.1378,
1370
+ "step": 11000
1371
+ },
1372
+ {
1373
+ "epoch": 23.4,
1374
+ "eval_accuracy": 0.9388797067535061,
1375
+ "eval_f1": 0.9170030704594044,
1376
+ "eval_loss": 0.16103394329547882,
1377
+ "eval_matthews_correlation": 0.8732414480101915,
1378
+ "eval_precision": 0.9272751714823327,
1379
+ "eval_recall": 0.908404227472793,
1380
+ "eval_runtime": 1131.6065,
1381
+ "eval_samples_per_second": 54.002,
1382
+ "eval_steps_per_second": 1.688,
1383
+ "step": 11000
1384
+ },
1385
+ {
1386
+ "epoch": 23.62,
1387
+ "learning_rate": 4.569738651994498e-07,
1388
+ "loss": 0.1375,
1389
+ "step": 11100
1390
+ },
1391
+ {
1392
+ "epoch": 23.83,
1393
+ "learning_rate": 4.5456671251719393e-07,
1394
+ "loss": 0.1366,
1395
+ "step": 11200
1396
+ },
1397
+ {
1398
+ "epoch": 23.83,
1399
+ "eval_accuracy": 0.9389451635601958,
1400
+ "eval_f1": 0.9178736477228994,
1401
+ "eval_loss": 0.16072671115398407,
1402
+ "eval_matthews_correlation": 0.8736416443228668,
1403
+ "eval_precision": 0.9252270842575737,
1404
+ "eval_recall": 0.9114700960342543,
1405
+ "eval_runtime": 1137.3148,
1406
+ "eval_samples_per_second": 53.731,
1407
+ "eval_steps_per_second": 1.679,
1408
+ "step": 11200
1409
+ },
1410
+ {
1411
+ "epoch": 24.04,
1412
+ "learning_rate": 4.521595598349381e-07,
1413
+ "loss": 0.1356,
1414
+ "step": 11300
1415
+ },
1416
+ {
1417
+ "epoch": 24.26,
1418
+ "learning_rate": 4.4975240715268225e-07,
1419
+ "loss": 0.1348,
1420
+ "step": 11400
1421
+ },
1422
+ {
1423
+ "epoch": 24.26,
1424
+ "eval_accuracy": 0.9389287993585232,
1425
+ "eval_f1": 0.9184615824628515,
1426
+ "eval_loss": 0.16080142557621002,
1427
+ "eval_matthews_correlation": 0.8739011778237431,
1428
+ "eval_precision": 0.9233816839860781,
1429
+ "eval_recall": 0.9140549878602235,
1430
+ "eval_runtime": 1135.8462,
1431
+ "eval_samples_per_second": 53.8,
1432
+ "eval_steps_per_second": 1.682,
1433
+ "step": 11400
1434
+ },
1435
+ {
1436
+ "epoch": 24.47,
1437
+ "learning_rate": 4.4734525447042636e-07,
1438
+ "loss": 0.1331,
1439
+ "step": 11500
1440
+ },
1441
+ {
1442
+ "epoch": 24.68,
1443
+ "learning_rate": 4.449381017881705e-07,
1444
+ "loss": 0.1318,
1445
+ "step": 11600
1446
+ },
1447
+ {
1448
+ "epoch": 24.68,
1449
+ "eval_accuracy": 0.9396324600304374,
1450
+ "eval_f1": 0.9179972413838614,
1451
+ "eval_loss": 0.1595466136932373,
1452
+ "eval_matthews_correlation": 0.874736229093411,
1453
+ "eval_precision": 0.9290778467923758,
1454
+ "eval_recall": 0.9086960867914774,
1455
+ "eval_runtime": 1138.0648,
1456
+ "eval_samples_per_second": 53.696,
1457
+ "eval_steps_per_second": 1.678,
1458
+ "step": 11600
1459
+ },
1460
+ {
1461
+ "epoch": 24.89,
1462
+ "learning_rate": 4.425309491059147e-07,
1463
+ "loss": 0.133,
1464
+ "step": 11700
1465
+ },
1466
+ {
1467
+ "epoch": 25.11,
1468
+ "learning_rate": 4.4012379642365883e-07,
1469
+ "loss": 0.133,
1470
+ "step": 11800
1471
+ },
1472
+ {
1473
+ "epoch": 25.11,
1474
+ "eval_accuracy": 0.9398942872571961,
1475
+ "eval_f1": 0.9190977119048673,
1476
+ "eval_loss": 0.1590360403060913,
1477
+ "eval_matthews_correlation": 0.8755172814743742,
1478
+ "eval_precision": 0.9272724566504644,
1479
+ "eval_recall": 0.9119361728726462,
1480
+ "eval_runtime": 1134.5021,
1481
+ "eval_samples_per_second": 53.864,
1482
+ "eval_steps_per_second": 1.684,
1483
+ "step": 11800
1484
+ },
1485
+ {
1486
+ "epoch": 25.32,
1487
+ "learning_rate": 4.37716643741403e-07,
1488
+ "loss": 0.1335,
1489
+ "step": 11900
1490
+ },
1491
+ {
1492
+ "epoch": 25.53,
1493
+ "learning_rate": 4.353094910591472e-07,
1494
+ "loss": 0.1314,
1495
+ "step": 12000
1496
+ },
1497
+ {
1498
+ "epoch": 25.53,
1499
+ "eval_accuracy": 0.9395015464170581,
1500
+ "eval_f1": 0.9190581177761091,
1501
+ "eval_loss": 0.1591825634241104,
1502
+ "eval_matthews_correlation": 0.8750075920131161,
1503
+ "eval_precision": 0.9245997125950819,
1504
+ "eval_recall": 0.9141288069140882,
1505
+ "eval_runtime": 1127.6358,
1506
+ "eval_samples_per_second": 54.192,
1507
+ "eval_steps_per_second": 1.694,
1508
+ "step": 12000
1509
+ },
1510
+ {
1511
+ "epoch": 25.74,
1512
+ "learning_rate": 4.3290233837689136e-07,
1513
+ "loss": 0.1312,
1514
+ "step": 12100
1515
+ },
1516
+ {
1517
+ "epoch": 25.96,
1518
+ "learning_rate": 4.30519257221458e-07,
1519
+ "loss": 0.1321,
1520
+ "step": 12200
1521
+ },
1522
+ {
1523
+ "epoch": 25.96,
1524
+ "eval_accuracy": 0.9405324911224205,
1525
+ "eval_f1": 0.9189856286227304,
1526
+ "eval_loss": 0.1582149714231491,
1527
+ "eval_matthews_correlation": 0.8765464055444899,
1528
+ "eval_precision": 0.9312260325323165,
1529
+ "eval_recall": 0.9088528341675056,
1530
+ "eval_runtime": 1128.8638,
1531
+ "eval_samples_per_second": 54.133,
1532
+ "eval_steps_per_second": 1.692,
1533
+ "step": 12200
1534
+ },
1535
+ {
1536
+ "epoch": 26.17,
1537
+ "learning_rate": 4.2811210453920216e-07,
1538
+ "loss": 0.1296,
1539
+ "step": 12300
1540
+ },
1541
+ {
1542
+ "epoch": 26.38,
1543
+ "learning_rate": 4.257049518569464e-07,
1544
+ "loss": 0.129,
1545
+ "step": 12400
1546
+ },
1547
+ {
1548
+ "epoch": 26.38,
1549
+ "eval_accuracy": 0.9404997627190758,
1550
+ "eval_f1": 0.9200009556213352,
1551
+ "eval_loss": 0.15780366957187653,
1552
+ "eval_matthews_correlation": 0.8768428582724813,
1553
+ "eval_precision": 0.9276058379537663,
1554
+ "eval_recall": 0.9133484566347269,
1555
+ "eval_runtime": 1119.4943,
1556
+ "eval_samples_per_second": 54.586,
1557
+ "eval_steps_per_second": 1.706,
1558
+ "step": 12400
1559
+ },
1560
+ {
1561
+ "epoch": 26.6,
1562
+ "learning_rate": 4.2329779917469053e-07,
1563
+ "loss": 0.1322,
1564
+ "step": 12500
1565
+ },
1566
+ {
1567
+ "epoch": 26.81,
1568
+ "learning_rate": 4.2089064649243464e-07,
1569
+ "loss": 0.1274,
1570
+ "step": 12600
1571
+ },
1572
+ {
1573
+ "epoch": 26.81,
1574
+ "eval_accuracy": 0.9409579603659035,
1575
+ "eval_f1": 0.9200809083075646,
1576
+ "eval_loss": 0.15745599567890167,
1577
+ "eval_matthews_correlation": 0.8775856097831178,
1578
+ "eval_precision": 0.9300727450861266,
1579
+ "eval_recall": 0.9115769049685479,
1580
+ "eval_runtime": 1129.8115,
1581
+ "eval_samples_per_second": 54.088,
1582
+ "eval_steps_per_second": 1.691,
1583
+ "step": 12600
1584
+ },
1585
+ {
1586
+ "epoch": 27.02,
1587
+ "learning_rate": 4.184834938101788e-07,
1588
+ "loss": 0.1298,
1589
+ "step": 12700
1590
+ },
1591
+ {
1592
+ "epoch": 27.23,
1593
+ "learning_rate": 4.1607634112792296e-07,
1594
+ "loss": 0.1229,
1595
+ "step": 12800
1596
+ },
1597
+ {
1598
+ "epoch": 27.23,
1599
+ "eval_accuracy": 0.9406797689374724,
1600
+ "eval_f1": 0.9207006984009518,
1601
+ "eval_loss": 0.15740837156772614,
1602
+ "eval_matthews_correlation": 0.8774253242767724,
1603
+ "eval_precision": 0.9264401397553,
1604
+ "eval_recall": 0.915588980453698,
1605
+ "eval_runtime": 1120.579,
1606
+ "eval_samples_per_second": 54.533,
1607
+ "eval_steps_per_second": 1.704,
1608
+ "step": 12800
1609
+ },
1610
+ {
1611
+ "epoch": 27.45,
1612
+ "learning_rate": 4.136691884456671e-07,
1613
+ "loss": 0.1288,
1614
+ "step": 12900
1615
+ },
1616
+ {
1617
+ "epoch": 27.66,
1618
+ "learning_rate": 4.112620357634113e-07,
1619
+ "loss": 0.1297,
1620
+ "step": 13000
1621
+ },
1622
+ {
1623
+ "epoch": 27.66,
1624
+ "eval_accuracy": 0.9410070529709208,
1625
+ "eval_f1": 0.9195664023927276,
1626
+ "eval_loss": 0.15692035853862762,
1627
+ "eval_matthews_correlation": 0.8774949999573018,
1628
+ "eval_precision": 0.9325444450940837,
1629
+ "eval_recall": 0.9088784383760194,
1630
+ "eval_runtime": 1122.3792,
1631
+ "eval_samples_per_second": 54.446,
1632
+ "eval_steps_per_second": 1.702,
1633
+ "step": 13000
1634
+ },
1635
+ {
1636
+ "epoch": 27.87,
1637
+ "learning_rate": 4.088548830811554e-07,
1638
+ "loss": 0.1294,
1639
+ "step": 13100
1640
+ },
1641
+ {
1642
+ "epoch": 28.09,
1643
+ "learning_rate": 4.0644773039889954e-07,
1644
+ "loss": 0.127,
1645
+ "step": 13200
1646
+ },
1647
+ {
1648
+ "epoch": 28.09,
1649
+ "eval_accuracy": 0.9410725097776105,
1650
+ "eval_f1": 0.9215107403838404,
1651
+ "eval_loss": 0.15686339139938354,
1652
+ "eval_matthews_correlation": 0.8783377225875042,
1653
+ "eval_precision": 0.9264187923955762,
1654
+ "eval_recall": 0.9170523743755302,
1655
+ "eval_runtime": 1130.091,
1656
+ "eval_samples_per_second": 54.074,
1657
+ "eval_steps_per_second": 1.69,
1658
+ "step": 13200
1659
+ },
1660
+ {
1661
+ "epoch": 28.3,
1662
+ "learning_rate": 4.040405777166437e-07,
1663
+ "loss": 0.1283,
1664
+ "step": 13300
1665
+ },
1666
+ {
1667
+ "epoch": 28.51,
1668
+ "learning_rate": 4.0163342503438786e-07,
1669
+ "loss": 0.1277,
1670
+ "step": 13400
1671
+ },
1672
+ {
1673
+ "epoch": 28.51,
1674
+ "eval_accuracy": 0.9413343370043692,
1675
+ "eval_f1": 0.9206744312110247,
1676
+ "eval_loss": 0.15601210296154022,
1677
+ "eval_matthews_correlation": 0.8783752668296679,
1678
+ "eval_precision": 0.9305381948772499,
1679
+ "eval_recall": 0.9122284555164281,
1680
+ "eval_runtime": 1130.3041,
1681
+ "eval_samples_per_second": 54.064,
1682
+ "eval_steps_per_second": 1.69,
1683
+ "step": 13400
1684
+ },
1685
+ {
1686
+ "epoch": 28.72,
1687
+ "learning_rate": 3.9922627235213207e-07,
1688
+ "loss": 0.1253,
1689
+ "step": 13500
1690
+ },
1691
+ {
1692
+ "epoch": 28.94,
1693
+ "learning_rate": 3.9681911966987623e-07,
1694
+ "loss": 0.1207,
1695
+ "step": 13600
1696
+ },
1697
+ {
1698
+ "epoch": 28.94,
1699
+ "eval_accuracy": 0.9411379665843002,
1700
+ "eval_f1": 0.9214834197039864,
1701
+ "eval_loss": 0.1558128446340561,
1702
+ "eval_matthews_correlation": 0.8783956105526514,
1703
+ "eval_precision": 0.926977605882486,
1704
+ "eval_recall": 0.9164881442500526,
1705
+ "eval_runtime": 1129.013,
1706
+ "eval_samples_per_second": 54.126,
1707
+ "eval_steps_per_second": 1.692,
1708
+ "step": 13600
1709
+ },
1710
+ {
1711
+ "epoch": 29.15,
1712
+ "learning_rate": 3.9441196698762033e-07,
1713
+ "loss": 0.126,
1714
+ "step": 13700
1715
+ },
1716
+ {
1717
+ "epoch": 29.36,
1718
+ "learning_rate": 3.920048143053645e-07,
1719
+ "loss": 0.1233,
1720
+ "step": 13800
1721
+ },
1722
+ {
1723
+ "epoch": 29.36,
1724
+ "eval_accuracy": 0.9414652506177487,
1725
+ "eval_f1": 0.9214108705582076,
1726
+ "eval_loss": 0.155442476272583,
1727
+ "eval_matthews_correlation": 0.8788308394514168,
1728
+ "eval_precision": 0.9290811733963565,
1729
+ "eval_recall": 0.9146004475707356,
1730
+ "eval_runtime": 1139.9159,
1731
+ "eval_samples_per_second": 53.608,
1732
+ "eval_steps_per_second": 1.676,
1733
+ "step": 13800
1734
+ },
1735
+ {
1736
+ "epoch": 29.57,
1737
+ "learning_rate": 3.8959766162310865e-07,
1738
+ "loss": 0.1231,
1739
+ "step": 13900
1740
+ },
1741
+ {
1742
+ "epoch": 29.79,
1743
+ "learning_rate": 3.871905089408528e-07,
1744
+ "loss": 0.1263,
1745
+ "step": 14000
1746
+ },
1747
+ {
1748
+ "epoch": 29.79,
1749
+ "eval_accuracy": 0.9417598062478522,
1750
+ "eval_f1": 0.9216422791525614,
1751
+ "eval_loss": 0.15486527979373932,
1752
+ "eval_matthews_correlation": 0.8794389554648475,
1753
+ "eval_precision": 0.9295396447855918,
1754
+ "eval_recall": 0.9147710213831234,
1755
+ "eval_runtime": 1125.9339,
1756
+ "eval_samples_per_second": 54.274,
1757
+ "eval_steps_per_second": 1.696,
1758
+ "step": 14000
1759
+ },
1760
+ {
1761
+ "epoch": 30.0,
1762
+ "learning_rate": 3.8478335625859697e-07,
1763
+ "loss": 0.1245,
1764
+ "step": 14100
1765
+ },
1766
+ {
1767
+ "epoch": 30.21,
1768
+ "learning_rate": 3.823762035763411e-07,
1769
+ "loss": 0.1237,
1770
+ "step": 14200
1771
+ },
1772
+ {
1773
+ "epoch": 30.21,
1774
+ "eval_accuracy": 0.9416288926344728,
1775
+ "eval_f1": 0.9223605864239737,
1776
+ "eval_loss": 0.15512850880622864,
1777
+ "eval_matthews_correlation": 0.8795357475488215,
1778
+ "eval_precision": 0.9269163530437144,
1779
+ "eval_recall": 0.9182071872279179,
1780
+ "eval_runtime": 1104.6568,
1781
+ "eval_samples_per_second": 55.319,
1782
+ "eval_steps_per_second": 1.729,
1783
+ "step": 14200
1784
+ },
1785
+ {
1786
+ "epoch": 30.43,
1787
+ "learning_rate": 3.7996905089408523e-07,
1788
+ "loss": 0.1227,
1789
+ "step": 14300
1790
+ },
1791
+ {
1792
+ "epoch": 30.64,
1793
+ "learning_rate": 3.775618982118294e-07,
1794
+ "loss": 0.1223,
1795
+ "step": 14400
1796
+ },
1797
+ {
1798
+ "epoch": 30.64,
1799
+ "eval_accuracy": 0.9419725408695937,
1800
+ "eval_f1": 0.9224586705089605,
1801
+ "eval_loss": 0.1544523686170578,
1802
+ "eval_matthews_correlation": 0.8800542555052714,
1803
+ "eval_precision": 0.9285558015982011,
1804
+ "eval_recall": 0.9169498289882058,
1805
+ "eval_runtime": 1103.5815,
1806
+ "eval_samples_per_second": 55.373,
1807
+ "eval_steps_per_second": 1.731,
1808
+ "step": 14400
1809
+ },
1810
+ {
1811
+ "epoch": 30.85,
1812
+ "learning_rate": 3.7515474552957355e-07,
1813
+ "loss": 0.122,
1814
+ "step": 14500
1815
+ },
1816
+ {
1817
+ "epoch": 31.06,
1818
+ "learning_rate": 3.7274759284731776e-07,
1819
+ "loss": 0.1223,
1820
+ "step": 14600
1821
+ },
1822
+ {
1823
+ "epoch": 31.06,
1824
+ "eval_accuracy": 0.9423980101130767,
1825
+ "eval_f1": 0.9223331133049456,
1826
+ "eval_loss": 0.15414074063301086,
1827
+ "eval_matthews_correlation": 0.8806243664613724,
1828
+ "eval_precision": 0.9317093562981557,
1829
+ "eval_recall": 0.9142028224825047,
1830
+ "eval_runtime": 1127.3394,
1831
+ "eval_samples_per_second": 54.206,
1832
+ "eval_steps_per_second": 1.694,
1833
+ "step": 14600
1834
+ },
1835
+ {
1836
+ "epoch": 31.28,
1837
+ "learning_rate": 3.703404401650619e-07,
1838
+ "loss": 0.1222,
1839
+ "step": 14700
1840
+ },
1841
+ {
1842
+ "epoch": 31.49,
1843
+ "learning_rate": 3.6795735900962856e-07,
1844
+ "loss": 0.121,
1845
+ "step": 14800
1846
+ },
1847
+ {
1848
+ "epoch": 31.49,
1849
+ "eval_accuracy": 0.9424143743147491,
1850
+ "eval_f1": 0.9226743391675636,
1851
+ "eval_loss": 0.15365062654018402,
1852
+ "eval_matthews_correlation": 0.8807922676782367,
1853
+ "eval_precision": 0.9305864010842684,
1854
+ "eval_recall": 0.9157406855938447,
1855
+ "eval_runtime": 1096.9802,
1856
+ "eval_samples_per_second": 55.707,
1857
+ "eval_steps_per_second": 1.741,
1858
+ "step": 14800
1859
+ },
1860
+ {
1861
+ "epoch": 31.7,
1862
+ "learning_rate": 3.655502063273727e-07,
1863
+ "loss": 0.1198,
1864
+ "step": 14900
1865
+ },
1866
+ {
1867
+ "epoch": 31.91,
1868
+ "learning_rate": 3.6314305364511693e-07,
1869
+ "loss": 0.1198,
1870
+ "step": 15000
1871
+ },
1872
+ {
1873
+ "epoch": 31.91,
1874
+ "eval_accuracy": 0.9423652817097318,
1875
+ "eval_f1": 0.9218109899084128,
1876
+ "eval_loss": 0.15379200875759125,
1877
+ "eval_matthews_correlation": 0.8804685582055614,
1878
+ "eval_precision": 0.9326720200447173,
1879
+ "eval_recall": 0.9126828528399417,
1880
+ "eval_runtime": 1105.4205,
1881
+ "eval_samples_per_second": 55.281,
1882
+ "eval_steps_per_second": 1.728,
1883
+ "step": 15000
1884
+ },
1885
+ {
1886
+ "epoch": 32.13,
1887
+ "learning_rate": 3.607359009628611e-07,
1888
+ "loss": 0.1193,
1889
+ "step": 15100
1890
+ },
1891
+ {
1892
+ "epoch": 32.34,
1893
+ "learning_rate": 3.5832874828060525e-07,
1894
+ "loss": 0.1193,
1895
+ "step": 15200
1896
+ },
1897
+ {
1898
+ "epoch": 32.34,
1899
+ "eval_accuracy": 0.9424961953231111,
1900
+ "eval_f1": 0.9231857675285348,
1901
+ "eval_loss": 0.15343151986598969,
1902
+ "eval_matthews_correlation": 0.8811431437323947,
1903
+ "eval_precision": 0.9293213427686996,
1904
+ "eval_recall": 0.9176996310804476,
1905
+ "eval_runtime": 1124.2062,
1906
+ "eval_samples_per_second": 54.357,
1907
+ "eval_steps_per_second": 1.699,
1908
+ "step": 15200
1909
+ },
1910
+ {
1911
+ "epoch": 32.55,
1912
+ "learning_rate": 3.5592159559834936e-07,
1913
+ "loss": 0.1194,
1914
+ "step": 15300
1915
+ },
1916
+ {
1917
+ "epoch": 32.77,
1918
+ "learning_rate": 3.535144429160935e-07,
1919
+ "loss": 0.1215,
1920
+ "step": 15400
1921
+ },
1922
+ {
1923
+ "epoch": 32.77,
1924
+ "eval_accuracy": 0.9421198186846454,
1925
+ "eval_f1": 0.9235682615682063,
1926
+ "eval_loss": 0.15412107110023499,
1927
+ "eval_matthews_correlation": 0.8809311583014519,
1928
+ "eval_precision": 0.9255101036658555,
1929
+ "eval_recall": 0.9217672088255878,
1930
+ "eval_runtime": 1116.4465,
1931
+ "eval_samples_per_second": 54.735,
1932
+ "eval_steps_per_second": 1.711,
1933
+ "step": 15400
1934
+ },
1935
+ {
1936
+ "epoch": 32.98,
1937
+ "learning_rate": 3.511072902338377e-07,
1938
+ "loss": 0.1168,
1939
+ "step": 15500
1940
+ },
1941
+ {
1942
+ "epoch": 33.19,
1943
+ "learning_rate": 3.4870013755158183e-07,
1944
+ "loss": 0.1177,
1945
+ "step": 15600
1946
+ },
1947
+ {
1948
+ "epoch": 33.19,
1949
+ "eval_accuracy": 0.9427252941465251,
1950
+ "eval_f1": 0.9240914268595931,
1951
+ "eval_loss": 0.15340538322925568,
1952
+ "eval_matthews_correlation": 0.8819306422216652,
1953
+ "eval_precision": 0.9276927357523115,
1954
+ "eval_recall": 0.9207421406121524,
1955
+ "eval_runtime": 1132.2669,
1956
+ "eval_samples_per_second": 53.97,
1957
+ "eval_steps_per_second": 1.687,
1958
+ "step": 15600
1959
+ },
1960
+ {
1961
+ "epoch": 33.4,
1962
+ "learning_rate": 3.46292984869326e-07,
1963
+ "loss": 0.1177,
1964
+ "step": 15700
1965
+ },
1966
+ {
1967
+ "epoch": 33.62,
1968
+ "learning_rate": 3.4388583218707015e-07,
1969
+ "loss": 0.1158,
1970
+ "step": 15800
1971
+ },
1972
+ {
1973
+ "epoch": 33.62,
1974
+ "eval_accuracy": 0.9428725719615768,
1975
+ "eval_f1": 0.9230758188747847,
1976
+ "eval_loss": 0.152634397149086,
1977
+ "eval_matthews_correlation": 0.8817097283454953,
1978
+ "eval_precision": 0.9314416673623378,
1979
+ "eval_recall": 0.9158101125474704,
1980
+ "eval_runtime": 1135.4508,
1981
+ "eval_samples_per_second": 53.819,
1982
+ "eval_steps_per_second": 1.682,
1983
+ "step": 15800
1984
+ },
1985
+ {
1986
+ "epoch": 33.83,
1987
+ "learning_rate": 3.414786795048143e-07,
1988
+ "loss": 0.1204,
1989
+ "step": 15900
1990
+ },
1991
+ {
1992
+ "epoch": 34.04,
1993
+ "learning_rate": 3.3907152682255847e-07,
1994
+ "loss": 0.1162,
1995
+ "step": 16000
1996
+ },
1997
+ {
1998
+ "epoch": 34.04,
1999
+ "eval_accuracy": 0.9432980412050598,
2000
+ "eval_f1": 0.9240152122705202,
2001
+ "eval_loss": 0.1521940678358078,
2002
+ "eval_matthews_correlation": 0.8826973056971621,
2003
+ "eval_precision": 0.931184031044368,
2004
+ "eval_recall": 0.9176525879531132,
2005
+ "eval_runtime": 1108.668,
2006
+ "eval_samples_per_second": 55.119,
2007
+ "eval_steps_per_second": 1.723,
2008
+ "step": 16000
2009
+ },
2010
+ {
2011
+ "epoch": 34.26,
2012
+ "learning_rate": 3.3666437414030257e-07,
2013
+ "loss": 0.1166,
2014
+ "step": 16100
2015
+ },
2016
+ {
2017
+ "epoch": 34.47,
2018
+ "learning_rate": 3.3425722145804673e-07,
2019
+ "loss": 0.1147,
2020
+ "step": 16200
2021
+ },
2022
+ {
2023
+ "epoch": 34.47,
2024
+ "eval_accuracy": 0.9433962264150944,
2025
+ "eval_f1": 0.9241868328582813,
2026
+ "eval_loss": 0.15216147899627686,
2027
+ "eval_matthews_correlation": 0.8829792231347647,
2028
+ "eval_precision": 0.9306640302786223,
2029
+ "eval_recall": 0.9184527518121871,
2030
+ "eval_runtime": 1132.2249,
2031
+ "eval_samples_per_second": 53.972,
2032
+ "eval_steps_per_second": 1.687,
2033
+ "step": 16200
2034
+ },
2035
+ {
2036
+ "epoch": 34.68,
2037
+ "learning_rate": 3.3185006877579094e-07,
2038
+ "loss": 0.1144,
2039
+ "step": 16300
2040
+ },
2041
+ {
2042
+ "epoch": 34.89,
2043
+ "learning_rate": 3.2944291609353505e-07,
2044
+ "loss": 0.1172,
2045
+ "step": 16400
2046
+ },
2047
+ {
2048
+ "epoch": 34.89,
2049
+ "eval_accuracy": 0.943576232633491,
2050
+ "eval_f1": 0.9243561247442994,
2051
+ "eval_loss": 0.15176311135292053,
2052
+ "eval_matthews_correlation": 0.8833109156622424,
2053
+ "eval_precision": 0.9312279042987696,
2054
+ "eval_recall": 0.9182818529319778,
2055
+ "eval_runtime": 1127.5884,
2056
+ "eval_samples_per_second": 54.194,
2057
+ "eval_steps_per_second": 1.694,
2058
+ "step": 16400
2059
+ },
2060
+ {
2061
+ "epoch": 35.11,
2062
+ "learning_rate": 3.270357634112792e-07,
2063
+ "loss": 0.1184,
2064
+ "step": 16500
2065
+ },
2066
+ {
2067
+ "epoch": 35.32,
2068
+ "learning_rate": 3.2462861072902337e-07,
2069
+ "loss": 0.1185,
2070
+ "step": 16600
2071
+ },
2072
+ {
2073
+ "epoch": 35.32,
2074
+ "eval_accuracy": 0.9433471338100771,
2075
+ "eval_f1": 0.9235180133752681,
2076
+ "eval_loss": 0.1514737904071808,
2077
+ "eval_matthews_correlation": 0.8825807929321668,
2078
+ "eval_precision": 0.9332458708783546,
2079
+ "eval_recall": 0.9151356262949316,
2080
+ "eval_runtime": 1129.6017,
2081
+ "eval_samples_per_second": 54.098,
2082
+ "eval_steps_per_second": 1.691,
2083
+ "step": 16600
2084
+ },
2085
+ {
2086
+ "epoch": 35.53,
2087
+ "learning_rate": 3.222214580467675e-07,
2088
+ "loss": 0.1162,
2089
+ "step": 16700
2090
+ },
2091
+ {
2092
+ "epoch": 35.74,
2093
+ "learning_rate": 3.198143053645117e-07,
2094
+ "loss": 0.1116,
2095
+ "step": 16800
2096
+ },
2097
+ {
2098
+ "epoch": 35.74,
2099
+ "eval_accuracy": 0.943461683221784,
2100
+ "eval_f1": 0.9242074753818227,
2101
+ "eval_loss": 0.15152059495449066,
2102
+ "eval_matthews_correlation": 0.8830141159932162,
2103
+ "eval_precision": 0.9315726012507216,
2104
+ "eval_recall": 0.9176551581996923,
2105
+ "eval_runtime": 1106.75,
2106
+ "eval_samples_per_second": 55.215,
2107
+ "eval_steps_per_second": 1.726,
2108
+ "step": 16800
2109
+ },
2110
+ {
2111
+ "epoch": 35.96,
2112
+ "learning_rate": 3.1740715268225584e-07,
2113
+ "loss": 0.113,
2114
+ "step": 16900
2115
+ },
2116
+ {
2117
+ "epoch": 36.17,
2118
+ "learning_rate": 3.1502407152682254e-07,
2119
+ "loss": 0.1132,
2120
+ "step": 17000
2121
+ },
2122
+ {
2123
+ "epoch": 36.17,
2124
+ "eval_accuracy": 0.9433307696084047,
2125
+ "eval_f1": 0.923392580714841,
2126
+ "eval_loss": 0.1517263650894165,
2127
+ "eval_matthews_correlation": 0.8825102281931411,
2128
+ "eval_precision": 0.933593905999869,
2129
+ "eval_recall": 0.9146349342370028,
2130
+ "eval_runtime": 1132.4177,
2131
+ "eval_samples_per_second": 53.963,
2132
+ "eval_steps_per_second": 1.687,
2133
+ "step": 17000
2134
+ },
2135
+ {
2136
+ "epoch": 36.38,
2137
+ "learning_rate": 3.126169188445667e-07,
2138
+ "loss": 0.1127,
2139
+ "step": 17100
2140
+ },
2141
+ {
2142
+ "epoch": 36.6,
2143
+ "learning_rate": 3.1020976616231086e-07,
2144
+ "loss": 0.1155,
2145
+ "step": 17200
2146
+ },
2147
+ {
2148
+ "epoch": 36.6,
2149
+ "eval_accuracy": 0.9438707882635946,
2150
+ "eval_f1": 0.9249328530871743,
2151
+ "eval_loss": 0.15113794803619385,
2152
+ "eval_matthews_correlation": 0.8839614616032178,
2153
+ "eval_precision": 0.9313619229067885,
2154
+ "eval_recall": 0.9191784464817534,
2155
+ "eval_runtime": 1121.0877,
2156
+ "eval_samples_per_second": 54.509,
2157
+ "eval_steps_per_second": 1.704,
2158
+ "step": 17200
2159
+ },
2160
+ {
2161
+ "epoch": 36.81,
2162
+ "learning_rate": 3.07802613480055e-07,
2163
+ "loss": 0.1144,
2164
+ "step": 17300
2165
+ },
2166
+ {
2167
+ "epoch": 37.02,
2168
+ "learning_rate": 3.053954607977992e-07,
2169
+ "loss": 0.1105,
2170
+ "step": 17400
2171
+ },
2172
+ {
2173
+ "epoch": 37.02,
2174
+ "eval_accuracy": 0.9439689734736291,
2175
+ "eval_f1": 0.9247302421488585,
2176
+ "eval_loss": 0.15101274847984314,
2177
+ "eval_matthews_correlation": 0.8840791715082791,
2178
+ "eval_precision": 0.9321543176187953,
2179
+ "eval_recall": 0.9182413488263638,
2180
+ "eval_runtime": 1117.9497,
2181
+ "eval_samples_per_second": 54.662,
2182
+ "eval_steps_per_second": 1.708,
2183
+ "step": 17400
2184
+ },
2185
+ {
2186
+ "epoch": 37.23,
2187
+ "learning_rate": 3.0298830811554333e-07,
2188
+ "loss": 0.1121,
2189
+ "step": 17500
2190
+ },
2191
+ {
2192
+ "epoch": 37.45,
2193
+ "learning_rate": 3.0058115543328744e-07,
2194
+ "loss": 0.1103,
2195
+ "step": 17600
2196
+ },
2197
+ {
2198
+ "epoch": 37.45,
2199
+ "eval_accuracy": 0.9438544240619221,
2200
+ "eval_f1": 0.9248424621748557,
2201
+ "eval_loss": 0.15083517134189606,
2202
+ "eval_matthews_correlation": 0.8839318423708653,
2203
+ "eval_precision": 0.9312687941809888,
2204
+ "eval_recall": 0.9191185521215243,
2205
+ "eval_runtime": 1136.279,
2206
+ "eval_samples_per_second": 53.78,
2207
+ "eval_steps_per_second": 1.681,
2208
+ "step": 17600
2209
+ },
2210
+ {
2211
+ "epoch": 37.66,
2212
+ "learning_rate": 2.981740027510316e-07,
2213
+ "loss": 0.1168,
2214
+ "step": 17700
2215
+ },
2216
+ {
2217
+ "epoch": 37.87,
2218
+ "learning_rate": 2.9576685006877576e-07,
2219
+ "loss": 0.1104,
2220
+ "step": 17800
2221
+ },
2222
+ {
2223
+ "epoch": 37.87,
2224
+ "eval_accuracy": 0.944001701876974,
2225
+ "eval_f1": 0.9255109565953598,
2226
+ "eval_loss": 0.15076182782649994,
2227
+ "eval_matthews_correlation": 0.8844550049466842,
2228
+ "eval_precision": 0.9300012352177646,
2229
+ "eval_recall": 0.9214207921543845,
2230
+ "eval_runtime": 1114.2003,
2231
+ "eval_samples_per_second": 54.846,
2232
+ "eval_steps_per_second": 1.714,
2233
+ "step": 17800
2234
+ },
2235
+ {
2236
+ "epoch": 38.09,
2237
+ "learning_rate": 2.9335969738651997e-07,
2238
+ "loss": 0.1111,
2239
+ "step": 17900
2240
+ },
2241
+ {
2242
+ "epoch": 38.3,
2243
+ "learning_rate": 2.9095254470426407e-07,
2244
+ "loss": 0.1134,
2245
+ "step": 18000
2246
+ },
2247
+ {
2248
+ "epoch": 38.3,
2249
+ "eval_accuracy": 0.9439362450702843,
2250
+ "eval_f1": 0.9253671889189082,
2251
+ "eval_loss": 0.1507214605808258,
2252
+ "eval_matthews_correlation": 0.8842287570521736,
2253
+ "eval_precision": 0.9305811742807507,
2254
+ "eval_recall": 0.9206107706340921,
2255
+ "eval_runtime": 1111.784,
2256
+ "eval_samples_per_second": 54.965,
2257
+ "eval_steps_per_second": 1.718,
2258
+ "step": 18000
2259
+ },
2260
+ {
2261
+ "epoch": 38.51,
2262
+ "learning_rate": 2.8856946354883077e-07,
2263
+ "loss": 0.1089,
2264
+ "step": 18100
2265
+ },
2266
+ {
2267
+ "epoch": 38.72,
2268
+ "learning_rate": 2.8616231086657493e-07,
2269
+ "loss": 0.1106,
2270
+ "step": 18200
2271
+ },
2272
+ {
2273
+ "epoch": 38.72,
2274
+ "eval_accuracy": 0.9441162512886809,
2275
+ "eval_f1": 0.9251360377006875,
2276
+ "eval_loss": 0.15033245086669922,
2277
+ "eval_matthews_correlation": 0.8843929494825783,
2278
+ "eval_precision": 0.9323618927884416,
2279
+ "eval_recall": 0.9187285395157545,
2280
+ "eval_runtime": 1121.674,
2281
+ "eval_samples_per_second": 54.48,
2282
+ "eval_steps_per_second": 1.703,
2283
+ "step": 18200
2284
+ },
2285
+ {
2286
+ "epoch": 38.94,
2287
+ "learning_rate": 2.8375515818431914e-07,
2288
+ "loss": 0.1114,
2289
+ "step": 18300
2290
+ },
2291
+ {
2292
+ "epoch": 39.15,
2293
+ "learning_rate": 2.8134800550206325e-07,
2294
+ "loss": 0.1095,
2295
+ "step": 18400
2296
+ },
2297
+ {
2298
+ "epoch": 39.15,
2299
+ "eval_accuracy": 0.943887152465267,
2300
+ "eval_f1": 0.9255818230394176,
2301
+ "eval_loss": 0.15064190328121185,
2302
+ "eval_matthews_correlation": 0.8842602118056405,
2303
+ "eval_precision": 0.9297138876991945,
2304
+ "eval_recall": 0.9217534806465043,
2305
+ "eval_runtime": 1127.018,
2306
+ "eval_samples_per_second": 54.222,
2307
+ "eval_steps_per_second": 1.695,
2308
+ "step": 18400
2309
+ },
2310
+ {
2311
+ "epoch": 39.36,
2312
+ "learning_rate": 2.789408528198074e-07,
2313
+ "loss": 0.11,
2314
+ "step": 18500
2315
+ },
2316
+ {
2317
+ "epoch": 39.57,
2318
+ "learning_rate": 2.7653370013755156e-07,
2319
+ "loss": 0.1122,
2320
+ "step": 18600
2321
+ },
2322
+ {
2323
+ "epoch": 39.57,
2324
+ "eval_accuracy": 0.9441489796920257,
2325
+ "eval_f1": 0.9250135407540009,
2326
+ "eval_loss": 0.14996594190597534,
2327
+ "eval_matthews_correlation": 0.884420266208998,
2328
+ "eval_precision": 0.9326893583639841,
2329
+ "eval_recall": 0.9182519548805012,
2330
+ "eval_runtime": 1113.4663,
2331
+ "eval_samples_per_second": 54.882,
2332
+ "eval_steps_per_second": 1.715,
2333
+ "step": 18600
2334
+ },
2335
+ {
2336
+ "epoch": 39.79,
2337
+ "learning_rate": 2.741265474552957e-07,
2338
+ "loss": 0.1083,
2339
+ "step": 18700
2340
+ },
2341
+ {
2342
+ "epoch": 40.0,
2343
+ "learning_rate": 2.717193947730399e-07,
2344
+ "loss": 0.1104,
2345
+ "step": 18800
2346
+ },
2347
+ {
2348
+ "epoch": 40.0,
2349
+ "eval_accuracy": 0.9440180660786464,
2350
+ "eval_f1": 0.9253930021308615,
2351
+ "eval_loss": 0.15009328722953796,
2352
+ "eval_matthews_correlation": 0.8844160433170132,
2353
+ "eval_precision": 0.9304927678821387,
2354
+ "eval_recall": 0.920780596053561,
2355
+ "eval_runtime": 1131.9242,
2356
+ "eval_samples_per_second": 53.987,
2357
+ "eval_steps_per_second": 1.687,
2358
+ "step": 18800
2359
+ },
2360
+ {
2361
+ "epoch": 40.21,
2362
+ "learning_rate": 2.6931224209078404e-07,
2363
+ "loss": 0.1095,
2364
+ "step": 18900
2365
+ },
2366
+ {
2367
+ "epoch": 40.43,
2368
+ "learning_rate": 2.669050894085282e-07,
2369
+ "loss": 0.1099,
2370
+ "step": 19000
2371
+ },
2372
+ {
2373
+ "epoch": 40.43,
2374
+ "eval_accuracy": 0.9441817080953706,
2375
+ "eval_f1": 0.9259059644722075,
2376
+ "eval_loss": 0.15028244256973267,
2377
+ "eval_matthews_correlation": 0.8848699358673907,
2378
+ "eval_precision": 0.9300530378191784,
2379
+ "eval_recall": 0.9221055593932391,
2380
+ "eval_runtime": 1133.4466,
2381
+ "eval_samples_per_second": 53.914,
2382
+ "eval_steps_per_second": 1.685,
2383
+ "step": 19000
2384
+ },
2385
+ {
2386
+ "epoch": 40.64,
2387
+ "learning_rate": 2.6449793672627236e-07,
2388
+ "loss": 0.105,
2389
+ "step": 19100
2390
+ },
2391
+ {
2392
+ "epoch": 40.85,
2393
+ "learning_rate": 2.6209078404401646e-07,
2394
+ "loss": 0.1098,
2395
+ "step": 19200
2396
+ },
2397
+ {
2398
+ "epoch": 40.85,
2399
+ "eval_accuracy": 0.944198072297043,
2400
+ "eval_f1": 0.9254187143519279,
2401
+ "eval_loss": 0.14963504672050476,
2402
+ "eval_matthews_correlation": 0.8846911439559728,
2403
+ "eval_precision": 0.9313966189199636,
2404
+ "eval_recall": 0.9200651823703107,
2405
+ "eval_runtime": 1146.6556,
2406
+ "eval_samples_per_second": 53.293,
2407
+ "eval_steps_per_second": 1.666,
2408
+ "step": 19200
2409
+ },
2410
+ {
2411
+ "epoch": 41.06,
2412
+ "learning_rate": 2.596836313617606e-07,
2413
+ "loss": 0.1087,
2414
+ "step": 19300
2415
+ },
2416
+ {
2417
+ "epoch": 41.28,
2418
+ "learning_rate": 2.5727647867950483e-07,
2419
+ "loss": 0.1105,
2420
+ "step": 19400
2421
+ },
2422
+ {
2423
+ "epoch": 41.28,
2424
+ "eval_accuracy": 0.9444435353221293,
2425
+ "eval_f1": 0.9256968337682165,
2426
+ "eval_loss": 0.14939628541469574,
2427
+ "eval_matthews_correlation": 0.8851581931672046,
2428
+ "eval_precision": 0.9320685204142367,
2429
+ "eval_recall": 0.920004214549341,
2430
+ "eval_runtime": 1134.2905,
2431
+ "eval_samples_per_second": 53.874,
2432
+ "eval_steps_per_second": 1.684,
2433
+ "step": 19400
2434
+ },
2435
+ {
2436
+ "epoch": 41.49,
2437
+ "learning_rate": 2.5486932599724894e-07,
2438
+ "loss": 0.1053,
2439
+ "step": 19500
2440
+ },
2441
+ {
2442
+ "epoch": 41.7,
2443
+ "learning_rate": 2.524621733149931e-07,
2444
+ "loss": 0.1085,
2445
+ "step": 19600
2446
+ },
2447
+ {
2448
+ "epoch": 41.7,
2449
+ "eval_accuracy": 0.9446726341455433,
2450
+ "eval_f1": 0.925738034839588,
2451
+ "eval_loss": 0.14932939410209656,
2452
+ "eval_matthews_correlation": 0.8854889075253487,
2453
+ "eval_precision": 0.9336149730127167,
2454
+ "eval_recall": 0.9187943227620606,
2455
+ "eval_runtime": 1117.9194,
2456
+ "eval_samples_per_second": 54.663,
2457
+ "eval_steps_per_second": 1.709,
2458
+ "step": 19600
2459
+ },
2460
+ {
2461
+ "epoch": 41.91,
2462
+ "learning_rate": 2.5005502063273726e-07,
2463
+ "loss": 0.1062,
2464
+ "step": 19700
2465
+ },
2466
+ {
2467
+ "epoch": 42.13,
2468
+ "learning_rate": 2.476478679504814e-07,
2469
+ "loss": 0.108,
2470
+ "step": 19800
2471
+ },
2472
+ {
2473
+ "epoch": 42.13,
2474
+ "eval_accuracy": 0.9447217267505604,
2475
+ "eval_f1": 0.9262048825799548,
2476
+ "eval_loss": 0.1495121866464615,
2477
+ "eval_matthews_correlation": 0.8858026294978001,
2478
+ "eval_precision": 0.9319552420338207,
2479
+ "eval_recall": 0.9210483479495699,
2480
+ "eval_runtime": 1115.2527,
2481
+ "eval_samples_per_second": 54.794,
2482
+ "eval_steps_per_second": 1.713,
2483
+ "step": 19800
2484
+ },
2485
+ {
2486
+ "epoch": 42.34,
2487
+ "learning_rate": 2.4524071526822557e-07,
2488
+ "loss": 0.1076,
2489
+ "step": 19900
2490
+ },
2491
+ {
2492
+ "epoch": 42.55,
2493
+ "learning_rate": 2.4283356258596973e-07,
2494
+ "loss": 0.1044,
2495
+ "step": 20000
2496
+ },
2497
+ {
2498
+ "epoch": 42.55,
2499
+ "eval_accuracy": 0.944508992128819,
2500
+ "eval_f1": 0.9261252498145538,
2501
+ "eval_loss": 0.14950193464756012,
2502
+ "eval_matthews_correlation": 0.8854199049552949,
2503
+ "eval_precision": 0.9313193606961945,
2504
+ "eval_recall": 0.9214106094253447,
2505
+ "eval_runtime": 1130.1649,
2506
+ "eval_samples_per_second": 54.071,
2507
+ "eval_steps_per_second": 1.69,
2508
+ "step": 20000
2509
+ },
2510
+ {
2511
+ "epoch": 42.77,
2512
+ "learning_rate": 2.404264099037139e-07,
2513
+ "loss": 0.106,
2514
+ "step": 20100
2515
+ },
2516
+ {
2517
+ "epoch": 42.98,
2518
+ "learning_rate": 2.3801925722145802e-07,
2519
+ "loss": 0.1076,
2520
+ "step": 20200
2521
+ },
2522
+ {
2523
+ "epoch": 42.98,
2524
+ "eval_accuracy": 0.9446562699438708,
2525
+ "eval_f1": 0.9261296033746204,
2526
+ "eval_loss": 0.14920221269130707,
2527
+ "eval_matthews_correlation": 0.8855912047264707,
2528
+ "eval_precision": 0.9324907409462333,
2529
+ "eval_recall": 0.9203679728109541,
2530
+ "eval_runtime": 1137.936,
2531
+ "eval_samples_per_second": 53.702,
2532
+ "eval_steps_per_second": 1.678,
2533
+ "step": 20200
2534
+ },
2535
+ {
2536
+ "epoch": 43.19,
2537
+ "learning_rate": 2.3561210453920218e-07,
2538
+ "loss": 0.107,
2539
+ "step": 20300
2540
+ },
2541
+ {
2542
+ "epoch": 43.4,
2543
+ "learning_rate": 2.3320495185694637e-07,
2544
+ "loss": 0.1044,
2545
+ "step": 20400
2546
+ },
2547
+ {
2548
+ "epoch": 43.4,
2549
+ "eval_accuracy": 0.9445417205321638,
2550
+ "eval_f1": 0.9266300766293115,
2551
+ "eval_loss": 0.1497085839509964,
2552
+ "eval_matthews_correlation": 0.8857559939442303,
2553
+ "eval_precision": 0.9296997536026302,
2554
+ "eval_recall": 0.9237603159106365,
2555
+ "eval_runtime": 1136.0812,
2556
+ "eval_samples_per_second": 53.789,
2557
+ "eval_steps_per_second": 1.681,
2558
+ "step": 20400
2559
+ },
2560
+ {
2561
+ "epoch": 43.62,
2562
+ "learning_rate": 2.307977991746905e-07,
2563
+ "loss": 0.105,
2564
+ "step": 20500
2565
+ },
2566
+ {
2567
+ "epoch": 43.83,
2568
+ "learning_rate": 2.2839064649243466e-07,
2569
+ "loss": 0.1055,
2570
+ "step": 20600
2571
+ },
2572
+ {
2573
+ "epoch": 43.83,
2574
+ "eval_accuracy": 0.9446889983472156,
2575
+ "eval_f1": 0.9266109603225074,
2576
+ "eval_loss": 0.14924582839012146,
2577
+ "eval_matthews_correlation": 0.8859684030602588,
2578
+ "eval_precision": 0.9303740597513821,
2579
+ "eval_recall": 0.9231584769939456,
2580
+ "eval_runtime": 1125.8192,
2581
+ "eval_samples_per_second": 54.28,
2582
+ "eval_steps_per_second": 1.697,
2583
+ "step": 20600
2584
+ },
2585
+ {
2586
+ "epoch": 44.04,
2587
+ "learning_rate": 2.2598349381017882e-07,
2588
+ "loss": 0.1065,
2589
+ "step": 20700
2590
+ },
2591
+ {
2592
+ "epoch": 44.26,
2593
+ "learning_rate": 2.2357634112792295e-07,
2594
+ "loss": 0.1043,
2595
+ "step": 20800
2596
+ },
2597
+ {
2598
+ "epoch": 44.26,
2599
+ "eval_accuracy": 0.9446071773388536,
2600
+ "eval_f1": 0.9263262351610323,
2601
+ "eval_loss": 0.14892685413360596,
2602
+ "eval_matthews_correlation": 0.8856938722284602,
2603
+ "eval_precision": 0.9309299279423926,
2604
+ "eval_recall": 0.9221329854554128,
2605
+ "eval_runtime": 1129.4248,
2606
+ "eval_samples_per_second": 54.106,
2607
+ "eval_steps_per_second": 1.691,
2608
+ "step": 20800
2609
+ },
2610
+ {
2611
+ "epoch": 44.47,
2612
+ "learning_rate": 2.211691884456671e-07,
2613
+ "loss": 0.1029,
2614
+ "step": 20900
2615
+ },
2616
+ {
2617
+ "epoch": 44.68,
2618
+ "learning_rate": 2.187620357634113e-07,
2619
+ "loss": 0.1042,
2620
+ "step": 21000
2621
+ },
2622
+ {
2623
+ "epoch": 44.68,
2624
+ "eval_accuracy": 0.944705362548888,
2625
+ "eval_f1": 0.9260999899887844,
2626
+ "eval_loss": 0.1488848179578781,
2627
+ "eval_matthews_correlation": 0.8857253503507664,
2628
+ "eval_precision": 0.9322554110387342,
2629
+ "eval_recall": 0.9205995144443117,
2630
+ "eval_runtime": 1114.3176,
2631
+ "eval_samples_per_second": 54.84,
2632
+ "eval_steps_per_second": 1.714,
2633
+ "step": 21000
2634
+ },
2635
+ {
2636
+ "epoch": 44.89,
2637
+ "learning_rate": 2.1635488308115542e-07,
2638
+ "loss": 0.1076,
2639
+ "step": 21100
2640
+ },
2641
+ {
2642
+ "epoch": 45.11,
2643
+ "learning_rate": 2.1394773039889958e-07,
2644
+ "loss": 0.1024,
2645
+ "step": 21200
2646
+ },
2647
+ {
2648
+ "epoch": 45.11,
2649
+ "eval_accuracy": 0.9449508255739744,
2650
+ "eval_f1": 0.9260476048473397,
2651
+ "eval_loss": 0.1487448662519455,
2652
+ "eval_matthews_correlation": 0.8860574300049961,
2653
+ "eval_precision": 0.9340386476068,
2654
+ "eval_recall": 0.9190087193195606,
2655
+ "eval_runtime": 1127.35,
2656
+ "eval_samples_per_second": 54.206,
2657
+ "eval_steps_per_second": 1.694,
2658
+ "step": 21200
2659
+ },
2660
+ {
2661
+ "epoch": 45.32,
2662
+ "learning_rate": 2.1154057771664372e-07,
2663
+ "loss": 0.1038,
2664
+ "step": 21300
2665
+ },
2666
+ {
2667
+ "epoch": 45.53,
2668
+ "learning_rate": 2.0913342503438787e-07,
2669
+ "loss": 0.1053,
2670
+ "step": 21400
2671
+ },
2672
+ {
2673
+ "epoch": 45.53,
2674
+ "eval_accuracy": 0.9449671897756468,
2675
+ "eval_f1": 0.9266078787740653,
2676
+ "eval_loss": 0.14872543513774872,
2677
+ "eval_matthews_correlation": 0.8863451628405484,
2678
+ "eval_precision": 0.9320275040537918,
2679
+ "eval_recall": 0.9217133998659879,
2680
+ "eval_runtime": 1121.4402,
2681
+ "eval_samples_per_second": 54.492,
2682
+ "eval_steps_per_second": 1.703,
2683
+ "step": 21400
2684
+ },
2685
+ {
2686
+ "epoch": 45.74,
2687
+ "learning_rate": 2.0672627235213206e-07,
2688
+ "loss": 0.1037,
2689
+ "step": 21500
2690
+ },
2691
+ {
2692
+ "epoch": 45.96,
2693
+ "learning_rate": 2.0431911966987622e-07,
2694
+ "loss": 0.1023,
2695
+ "step": 21600
2696
+ },
2697
+ {
2698
+ "epoch": 45.96,
2699
+ "eval_accuracy": 0.9447871835572501,
2700
+ "eval_f1": 0.9262474411932805,
2701
+ "eval_loss": 0.1485784500837326,
2702
+ "eval_matthews_correlation": 0.8859042819526408,
2703
+ "eval_precision": 0.9322667829591439,
2704
+ "eval_recall": 0.9208211984164509,
2705
+ "eval_runtime": 1107.2279,
2706
+ "eval_samples_per_second": 55.191,
2707
+ "eval_steps_per_second": 1.725,
2708
+ "step": 21600
2709
+ },
2710
+ {
2711
+ "epoch": 46.17,
2712
+ "learning_rate": 2.0191196698762035e-07,
2713
+ "loss": 0.1039,
2714
+ "step": 21700
2715
+ },
2716
+ {
2717
+ "epoch": 46.38,
2718
+ "learning_rate": 1.995048143053645e-07,
2719
+ "loss": 0.1037,
2720
+ "step": 21800
2721
+ },
2722
+ {
2723
+ "epoch": 46.38,
2724
+ "eval_accuracy": 0.9448362761622674,
2725
+ "eval_f1": 0.926612727616941,
2726
+ "eval_loss": 0.14873600006103516,
2727
+ "eval_matthews_correlation": 0.8861794892897553,
2728
+ "eval_precision": 0.9311237661981897,
2729
+ "eval_recall": 0.9225047795245841,
2730
+ "eval_runtime": 1134.6231,
2731
+ "eval_samples_per_second": 53.858,
2732
+ "eval_steps_per_second": 1.683,
2733
+ "step": 21800
2734
+ },
2735
+ {
2736
+ "epoch": 46.6,
2737
+ "learning_rate": 1.9709766162310864e-07,
2738
+ "loss": 0.1029,
2739
+ "step": 21900
2740
+ },
2741
+ {
2742
+ "epoch": 46.81,
2743
+ "learning_rate": 1.946905089408528e-07,
2744
+ "loss": 0.1046,
2745
+ "step": 22000
2746
+ },
2747
+ {
2748
+ "epoch": 46.81,
2749
+ "eval_accuracy": 0.944819911960595,
2750
+ "eval_f1": 0.9266815604872392,
2751
+ "eval_loss": 0.14862757921218872,
2752
+ "eval_matthews_correlation": 0.8861793421063061,
2753
+ "eval_precision": 0.9309033406756093,
2754
+ "eval_recall": 0.922807895033049,
2755
+ "eval_runtime": 1129.167,
2756
+ "eval_samples_per_second": 54.119,
2757
+ "eval_steps_per_second": 1.692,
2758
+ "step": 22000
2759
+ },
2760
+ {
2761
+ "epoch": 47.02,
2762
+ "learning_rate": 1.9230742778541952e-07,
2763
+ "loss": 0.1012,
2764
+ "step": 22100
2765
+ },
2766
+ {
2767
+ "epoch": 47.23,
2768
+ "learning_rate": 1.8990027510316368e-07,
2769
+ "loss": 0.1027,
2770
+ "step": 22200
2771
+ },
2772
+ {
2773
+ "epoch": 47.23,
2774
+ "eval_accuracy": 0.9451144675906986,
2775
+ "eval_f1": 0.9268280716567586,
2776
+ "eval_loss": 0.1484626680612564,
2777
+ "eval_matthews_correlation": 0.8866650411840761,
2778
+ "eval_precision": 0.9320985784552063,
2779
+ "eval_recall": 0.922045014897367,
2780
+ "eval_runtime": 1111.3061,
2781
+ "eval_samples_per_second": 54.988,
2782
+ "eval_steps_per_second": 1.719,
2783
+ "step": 22200
2784
+ },
2785
+ {
2786
+ "epoch": 47.45,
2787
+ "learning_rate": 1.8749312242090781e-07,
2788
+ "loss": 0.1015,
2789
+ "step": 22300
2790
+ },
2791
+ {
2792
+ "epoch": 47.66,
2793
+ "learning_rate": 1.8508596973865197e-07,
2794
+ "loss": 0.1013,
2795
+ "step": 22400
2796
+ },
2797
+ {
2798
+ "epoch": 47.66,
2799
+ "eval_accuracy": 0.9451144675906986,
2800
+ "eval_f1": 0.9269906309252122,
2801
+ "eval_loss": 0.14851711690425873,
2802
+ "eval_matthews_correlation": 0.8867653549869635,
2803
+ "eval_precision": 0.9314070719703338,
2804
+ "eval_recall": 0.9229525395691017,
2805
+ "eval_runtime": 1067.1147,
2806
+ "eval_samples_per_second": 57.266,
2807
+ "eval_steps_per_second": 1.79,
2808
+ "step": 22400
2809
+ },
2810
+ {
2811
+ "epoch": 47.87,
2812
+ "learning_rate": 1.8267881705639616e-07,
2813
+ "loss": 0.1035,
2814
+ "step": 22500
2815
+ },
2816
+ {
2817
+ "epoch": 48.09,
2818
+ "learning_rate": 1.8027166437414032e-07,
2819
+ "loss": 0.1024,
2820
+ "step": 22600
2821
+ },
2822
+ {
2823
+ "epoch": 48.09,
2824
+ "eval_accuracy": 0.9452944738090953,
2825
+ "eval_f1": 0.9269075563677092,
2826
+ "eval_loss": 0.14834338426589966,
2827
+ "eval_matthews_correlation": 0.8869764186987058,
2828
+ "eval_precision": 0.9327962247988729,
2829
+ "eval_recall": 0.92164075253014,
2830
+ "eval_runtime": 1041.7879,
2831
+ "eval_samples_per_second": 58.658,
2832
+ "eval_steps_per_second": 1.833,
2833
+ "step": 22600
2834
+ },
2835
+ {
2836
+ "epoch": 48.3,
2837
+ "learning_rate": 1.7786451169188445e-07,
2838
+ "loss": 0.1019,
2839
+ "step": 22700
2840
+ },
2841
+ {
2842
+ "epoch": 48.51,
2843
+ "learning_rate": 1.754573590096286e-07,
2844
+ "loss": 0.1017,
2845
+ "step": 22800
2846
+ },
2847
+ {
2848
+ "epoch": 48.51,
2849
+ "eval_accuracy": 0.945016282380664,
2850
+ "eval_f1": 0.9267798368823591,
2851
+ "eval_loss": 0.1482786238193512,
2852
+ "eval_matthews_correlation": 0.8864825429378855,
2853
+ "eval_precision": 0.9318586242689054,
2854
+ "eval_recall": 0.9221523757100277,
2855
+ "eval_runtime": 1047.8174,
2856
+ "eval_samples_per_second": 58.32,
2857
+ "eval_steps_per_second": 1.823,
2858
+ "step": 22800
2859
+ },
2860
+ {
2861
+ "epoch": 48.72,
2862
+ "learning_rate": 1.7305020632737277e-07,
2863
+ "loss": 0.1023,
2864
+ "step": 22900
2865
+ },
2866
+ {
2867
+ "epoch": 48.94,
2868
+ "learning_rate": 1.7066712517193946e-07,
2869
+ "loss": 0.1003,
2870
+ "step": 23000
2871
+ },
2872
+ {
2873
+ "epoch": 48.94,
2874
+ "eval_accuracy": 0.9450490107840089,
2875
+ "eval_f1": 0.927166667024839,
2876
+ "eval_loss": 0.1484668105840683,
2877
+ "eval_matthews_correlation": 0.8866843486297423,
2878
+ "eval_precision": 0.9311237652714297,
2879
+ "eval_recall": 0.9234908404182439,
2880
+ "eval_runtime": 1048.467,
2881
+ "eval_samples_per_second": 58.284,
2882
+ "eval_steps_per_second": 1.822,
2883
+ "step": 23000
2884
+ },
2885
+ {
2886
+ "epoch": 49.15,
2887
+ "learning_rate": 1.6825997248968362e-07,
2888
+ "loss": 0.1007,
2889
+ "step": 23100
2890
+ },
2891
+ {
2892
+ "epoch": 49.36,
2893
+ "learning_rate": 1.6585281980742778e-07,
2894
+ "loss": 0.1019,
2895
+ "step": 23200
2896
+ },
2897
+ {
2898
+ "epoch": 49.36,
2899
+ "eval_accuracy": 0.9454908442291643,
2900
+ "eval_f1": 0.9270481922188618,
2901
+ "eval_loss": 0.14815160632133484,
2902
+ "eval_matthews_correlation": 0.887288083753074,
2903
+ "eval_precision": 0.9338332607657969,
2904
+ "eval_recall": 0.9209852332071188,
2905
+ "eval_runtime": 1043.8215,
2906
+ "eval_samples_per_second": 58.544,
2907
+ "eval_steps_per_second": 1.83,
2908
+ "step": 23200
2909
+ },
2910
+ {
2911
+ "epoch": 49.57,
2912
+ "learning_rate": 1.6344566712517194e-07,
2913
+ "loss": 0.1008,
2914
+ "step": 23300
2915
+ },
2916
+ {
2917
+ "epoch": 49.79,
2918
+ "learning_rate": 1.610385144429161e-07,
2919
+ "loss": 0.0984,
2920
+ "step": 23400
2921
+ },
2922
+ {
2923
+ "epoch": 49.79,
2924
+ "eval_accuracy": 0.9450490107840089,
2925
+ "eval_f1": 0.9272141389942649,
2926
+ "eval_loss": 0.1484888792037964,
2927
+ "eval_matthews_correlation": 0.8867470893089248,
2928
+ "eval_precision": 0.930697822539048,
2929
+ "eval_recall": 0.9239834966686141,
2930
+ "eval_runtime": 1046.2775,
2931
+ "eval_samples_per_second": 58.406,
2932
+ "eval_steps_per_second": 1.826,
2933
+ "step": 23400
2934
+ },
2935
+ {
2936
+ "epoch": 50.0,
2937
+ "learning_rate": 1.5863136176066023e-07,
2938
+ "loss": 0.1005,
2939
+ "step": 23500
2940
+ },
2941
+ {
2942
+ "epoch": 50.21,
2943
+ "learning_rate": 1.5622420907840441e-07,
2944
+ "loss": 0.1007,
2945
+ "step": 23600
2946
+ },
2947
+ {
2948
+ "epoch": 50.21,
2949
+ "eval_accuracy": 0.9451799243973883,
2950
+ "eval_f1": 0.9271281688970315,
2951
+ "eval_loss": 0.14829137921333313,
2952
+ "eval_matthews_correlation": 0.8868975027838668,
2953
+ "eval_precision": 0.9315634975546264,
2954
+ "eval_recall": 0.9230624706283415,
2955
+ "eval_runtime": 1042.8498,
2956
+ "eval_samples_per_second": 58.598,
2957
+ "eval_steps_per_second": 1.832,
2958
+ "step": 23600
2959
+ },
2960
+ {
2961
+ "epoch": 50.43,
2962
+ "learning_rate": 1.5381705639614855e-07,
2963
+ "loss": 0.1001,
2964
+ "step": 23700
2965
+ },
2966
+ {
2967
+ "epoch": 50.64,
2968
+ "learning_rate": 1.514099037138927e-07,
2969
+ "loss": 0.0968,
2970
+ "step": 23800
2971
+ },
2972
+ {
2973
+ "epoch": 50.64,
2974
+ "eval_accuracy": 0.9454090232208022,
2975
+ "eval_f1": 0.9272991700931191,
2976
+ "eval_loss": 0.14818619191646576,
2977
+ "eval_matthews_correlation": 0.8872938388879654,
2978
+ "eval_precision": 0.9323994024344309,
2979
+ "eval_recall": 0.9226563864074547,
2980
+ "eval_runtime": 1044.5886,
2981
+ "eval_samples_per_second": 58.501,
2982
+ "eval_steps_per_second": 1.828,
2983
+ "step": 23800
2984
+ },
2985
+ {
2986
+ "epoch": 50.85,
2987
+ "learning_rate": 1.4900275103163686e-07,
2988
+ "loss": 0.102,
2989
+ "step": 23900
2990
+ },
2991
+ {
2992
+ "epoch": 51.06,
2993
+ "learning_rate": 1.46595598349381e-07,
2994
+ "loss": 0.102,
2995
+ "step": 24000
2996
+ },
2997
+ {
2998
+ "epoch": 51.06,
2999
+ "eval_accuracy": 0.9451635601957159,
3000
+ "eval_f1": 0.9269136270508408,
3001
+ "eval_loss": 0.14807577431201935,
3002
+ "eval_matthews_correlation": 0.8867722689003014,
3003
+ "eval_precision": 0.9321366838249306,
3004
+ "eval_recall": 0.9221728394253835,
3005
+ "eval_runtime": 1040.229,
3006
+ "eval_samples_per_second": 58.746,
3007
+ "eval_steps_per_second": 1.836,
3008
+ "step": 24000
3009
+ },
3010
+ {
3011
+ "epoch": 51.28,
3012
+ "learning_rate": 1.4418844566712518e-07,
3013
+ "loss": 0.0996,
3014
+ "step": 24100
3015
+ },
3016
+ {
3017
+ "epoch": 51.49,
3018
+ "learning_rate": 1.417812929848693e-07,
3019
+ "loss": 0.0974,
3020
+ "step": 24200
3021
+ },
3022
+ {
3023
+ "epoch": 51.49,
3024
+ "eval_accuracy": 0.9452290170024056,
3025
+ "eval_f1": 0.9270979729816141,
3026
+ "eval_loss": 0.1479618400335312,
3027
+ "eval_matthews_correlation": 0.8869160040122801,
3028
+ "eval_precision": 0.9322256627524821,
3029
+ "eval_recall": 0.9224124168662996,
3030
+ "eval_runtime": 1046.5218,
3031
+ "eval_samples_per_second": 58.392,
3032
+ "eval_steps_per_second": 1.825,
3033
+ "step": 24200
3034
+ },
3035
+ {
3036
+ "epoch": 51.7,
3037
+ "learning_rate": 1.3937414030261347e-07,
3038
+ "loss": 0.1015,
3039
+ "step": 24300
3040
+ },
3041
+ {
3042
+ "epoch": 51.91,
3043
+ "learning_rate": 1.3696698762035763e-07,
3044
+ "loss": 0.0997,
3045
+ "step": 24400
3046
+ },
3047
+ {
3048
+ "epoch": 51.91,
3049
+ "eval_accuracy": 0.945441751624147,
3050
+ "eval_f1": 0.927200876203088,
3051
+ "eval_loss": 0.14792127907276154,
3052
+ "eval_matthews_correlation": 0.8872321998942353,
3053
+ "eval_precision": 0.9334694817739616,
3054
+ "eval_recall": 0.9215315698638195,
3055
+ "eval_runtime": 1046.7981,
3056
+ "eval_samples_per_second": 58.377,
3057
+ "eval_steps_per_second": 1.825,
3058
+ "step": 24400
3059
+ },
3060
+ {
3061
+ "epoch": 52.13,
3062
+ "learning_rate": 1.345598349381018e-07,
3063
+ "loss": 0.0969,
3064
+ "step": 24500
3065
+ },
3066
+ {
3067
+ "epoch": 52.34,
3068
+ "learning_rate": 1.3215268225584592e-07,
3069
+ "loss": 0.0991,
3070
+ "step": 24600
3071
+ },
3072
+ {
3073
+ "epoch": 52.34,
3074
+ "eval_accuracy": 0.9454090232208022,
3075
+ "eval_f1": 0.9275124073961015,
3076
+ "eval_loss": 0.148192897439003,
3077
+ "eval_matthews_correlation": 0.8874202841376583,
3078
+ "eval_precision": 0.9315495664101637,
3079
+ "eval_recall": 0.9237972745662067,
3080
+ "eval_runtime": 1056.3168,
3081
+ "eval_samples_per_second": 57.851,
3082
+ "eval_steps_per_second": 1.808,
3083
+ "step": 24600
3084
+ },
3085
+ {
3086
+ "epoch": 52.55,
3087
+ "learning_rate": 1.297455295735901e-07,
3088
+ "loss": 0.0999,
3089
+ "step": 24700
3090
+ },
3091
+ {
3092
+ "epoch": 52.77,
3093
+ "learning_rate": 1.2733837689133424e-07,
3094
+ "loss": 0.0961,
3095
+ "step": 24800
3096
+ },
3097
+ {
3098
+ "epoch": 52.77,
3099
+ "eval_accuracy": 0.9454090232208022,
3100
+ "eval_f1": 0.9276125960433504,
3101
+ "eval_loss": 0.14825843274593353,
3102
+ "eval_matthews_correlation": 0.8874404098766001,
3103
+ "eval_precision": 0.931482950472307,
3104
+ "eval_recall": 0.9240306380532243,
3105
+ "eval_runtime": 1049.1068,
3106
+ "eval_samples_per_second": 58.249,
3107
+ "eval_steps_per_second": 1.821,
3108
+ "step": 24800
3109
+ },
3110
+ {
3111
+ "epoch": 52.98,
3112
+ "learning_rate": 1.249312242090784e-07,
3113
+ "loss": 0.1015,
3114
+ "step": 24900
3115
+ },
3116
+ {
3117
+ "epoch": 53.19,
3118
+ "learning_rate": 1.2252407152682256e-07,
3119
+ "loss": 0.1003,
3120
+ "step": 25000
3121
+ },
3122
+ {
3123
+ "epoch": 53.19,
3124
+ "eval_accuracy": 0.9454908442291643,
3125
+ "eval_f1": 0.9269883425691566,
3126
+ "eval_loss": 0.14782409369945526,
3127
+ "eval_matthews_correlation": 0.8872681574002631,
3128
+ "eval_precision": 0.9339854248197991,
3129
+ "eval_recall": 0.9207518697201013,
3130
+ "eval_runtime": 1048.4394,
3131
+ "eval_samples_per_second": 58.286,
3132
+ "eval_steps_per_second": 1.822,
3133
+ "step": 25000
3134
+ },
3135
+ {
3136
+ "epoch": 53.4,
3137
+ "learning_rate": 1.2011691884456671e-07,
3138
+ "loss": 0.0969,
3139
+ "step": 25100
3140
+ },
3141
+ {
3142
+ "epoch": 53.62,
3143
+ "learning_rate": 1.1770976616231087e-07,
3144
+ "loss": 0.0988,
3145
+ "step": 25200
3146
+ },
3147
+ {
3148
+ "epoch": 53.62,
3149
+ "eval_accuracy": 0.9456053936408713,
3150
+ "eval_f1": 0.9274062041867052,
3151
+ "eval_loss": 0.1478436291217804,
3152
+ "eval_matthews_correlation": 0.8876168167324782,
3153
+ "eval_precision": 0.9332789328012385,
3154
+ "eval_recall": 0.9221045841897744,
3155
+ "eval_runtime": 1048.83,
3156
+ "eval_samples_per_second": 58.264,
3157
+ "eval_steps_per_second": 1.821,
3158
+ "step": 25200
3159
+ },
3160
+ {
3161
+ "epoch": 53.83,
3162
+ "learning_rate": 1.1530261348005502e-07,
3163
+ "loss": 0.098,
3164
+ "step": 25300
3165
+ },
3166
+ {
3167
+ "epoch": 54.04,
3168
+ "learning_rate": 1.1289546079779916e-07,
3169
+ "loss": 0.0986,
3170
+ "step": 25400
3171
+ },
3172
+ {
3173
+ "epoch": 54.04,
3174
+ "eval_accuracy": 0.9456708504475609,
3175
+ "eval_f1": 0.9274017208924298,
3176
+ "eval_loss": 0.1477716565132141,
3177
+ "eval_matthews_correlation": 0.8877505832830167,
3178
+ "eval_precision": 0.9333189939052119,
3179
+ "eval_recall": 0.9220848688673379,
3180
+ "eval_runtime": 1045.0852,
3181
+ "eval_samples_per_second": 58.473,
3182
+ "eval_steps_per_second": 1.828,
3183
+ "step": 25400
3184
+ },
3185
+ {
3186
+ "epoch": 54.26,
3187
+ "learning_rate": 1.1048830811554334e-07,
3188
+ "loss": 0.0963,
3189
+ "step": 25500
3190
+ },
3191
+ {
3192
+ "epoch": 54.47,
3193
+ "learning_rate": 1.0808115543328748e-07,
3194
+ "loss": 0.1003,
3195
+ "step": 25600
3196
+ },
3197
+ {
3198
+ "epoch": 54.47,
3199
+ "eval_accuracy": 0.945441751624147,
3200
+ "eval_f1": 0.9272576298526598,
3201
+ "eval_loss": 0.1478091925382614,
3202
+ "eval_matthews_correlation": 0.8873054994658675,
3203
+ "eval_precision": 0.9328576996809682,
3204
+ "eval_recall": 0.9221798017722014,
3205
+ "eval_runtime": 1047.8846,
3206
+ "eval_samples_per_second": 58.317,
3207
+ "eval_steps_per_second": 1.823,
3208
+ "step": 25600
3209
+ },
3210
+ {
3211
+ "epoch": 54.68,
3212
+ "learning_rate": 1.0567400275103163e-07,
3213
+ "loss": 0.0967,
3214
+ "step": 25700
3215
+ },
3216
+ {
3217
+ "epoch": 54.89,
3218
+ "learning_rate": 1.0326685006877579e-07,
3219
+ "loss": 0.0974,
3220
+ "step": 25800
3221
+ },
3222
+ {
3223
+ "epoch": 54.89,
3224
+ "eval_accuracy": 0.9454090232208022,
3225
+ "eval_f1": 0.9272324111514144,
3226
+ "eval_loss": 0.14785851538181305,
3227
+ "eval_matthews_correlation": 0.8872587311118877,
3228
+ "eval_precision": 0.9326477457141271,
3229
+ "eval_recall": 0.9223193058150961,
3230
+ "eval_runtime": 1042.5297,
3231
+ "eval_samples_per_second": 58.616,
3232
+ "eval_steps_per_second": 1.832,
3233
+ "step": 25800
3234
+ },
3235
+ {
3236
+ "epoch": 55.11,
3237
+ "learning_rate": 1.0085969738651993e-07,
3238
+ "loss": 0.0978,
3239
+ "step": 25900
3240
+ },
3241
+ {
3242
+ "epoch": 55.32,
3243
+ "learning_rate": 9.84525447042641e-08,
3244
+ "loss": 0.0985,
3245
+ "step": 26000
3246
+ },
3247
+ {
3248
+ "epoch": 55.32,
3249
+ "eval_accuracy": 0.9455890294391988,
3250
+ "eval_f1": 0.92763614192194,
3251
+ "eval_loss": 0.14787529408931732,
3252
+ "eval_matthews_correlation": 0.8877198200242857,
3253
+ "eval_precision": 0.9322731450973873,
3254
+ "eval_recall": 0.9233930121989798,
3255
+ "eval_runtime": 1052.7253,
3256
+ "eval_samples_per_second": 58.048,
3257
+ "eval_steps_per_second": 1.814,
3258
+ "step": 26000
3259
+ },
3260
+ {
3261
+ "epoch": 55.53,
3262
+ "learning_rate": 9.604539202200825e-08,
3263
+ "loss": 0.0974,
3264
+ "step": 26100
3265
+ },
3266
+ {
3267
+ "epoch": 55.74,
3268
+ "learning_rate": 9.36382393397524e-08,
3269
+ "loss": 0.0968,
3270
+ "step": 26200
3271
+ },
3272
+ {
3273
+ "epoch": 55.74,
3274
+ "eval_accuracy": 0.9453599306157849,
3275
+ "eval_f1": 0.9277711434618366,
3276
+ "eval_loss": 0.1481289565563202,
3277
+ "eval_matthews_correlation": 0.8874539900521103,
3278
+ "eval_precision": 0.930771508987792,
3279
+ "eval_recall": 0.9249659138549541,
3280
+ "eval_runtime": 1046.0675,
3281
+ "eval_samples_per_second": 58.418,
3282
+ "eval_steps_per_second": 1.826,
3283
+ "step": 26200
3284
+ },
3285
+ {
3286
+ "epoch": 55.96,
3287
+ "learning_rate": 9.123108665749657e-08,
3288
+ "loss": 0.0965,
3289
+ "step": 26300
3290
+ },
3291
+ {
3292
+ "epoch": 56.17,
3293
+ "learning_rate": 8.882393397524071e-08,
3294
+ "loss": 0.0994,
3295
+ "step": 26400
3296
+ },
3297
+ {
3298
+ "epoch": 56.17,
3299
+ "eval_accuracy": 0.9454581158258194,
3300
+ "eval_f1": 0.9273153737422503,
3301
+ "eval_loss": 0.14774879813194275,
3302
+ "eval_matthews_correlation": 0.8873663018017705,
3303
+ "eval_precision": 0.9326728846418783,
3304
+ "eval_recall": 0.9224471303431127,
3305
+ "eval_runtime": 1044.2012,
3306
+ "eval_samples_per_second": 58.522,
3307
+ "eval_steps_per_second": 1.829,
3308
+ "step": 26400
3309
+ },
3310
+ {
3311
+ "epoch": 56.38,
3312
+ "learning_rate": 8.641678129298487e-08,
3313
+ "loss": 0.0986,
3314
+ "step": 26500
3315
+ },
3316
+ {
3317
+ "epoch": 56.6,
3318
+ "learning_rate": 8.400962861072902e-08,
3319
+ "loss": 0.0967,
3320
+ "step": 26600
3321
+ },
3322
+ {
3323
+ "epoch": 56.6,
3324
+ "eval_accuracy": 0.9452617454057504,
3325
+ "eval_f1": 0.9275893994739569,
3326
+ "eval_loss": 0.14803829789161682,
3327
+ "eval_matthews_correlation": 0.8872133913846761,
3328
+ "eval_precision": 0.9308688570782452,
3329
+ "eval_recall": 0.9245287598645741,
3330
+ "eval_runtime": 1045.1221,
3331
+ "eval_samples_per_second": 58.471,
3332
+ "eval_steps_per_second": 1.828,
3333
+ "step": 26600
3334
+ },
3335
+ {
3336
+ "epoch": 56.81,
3337
+ "learning_rate": 8.160247592847317e-08,
3338
+ "loss": 0.0928,
3339
+ "step": 26700
3340
+ },
3341
+ {
3342
+ "epoch": 57.02,
3343
+ "learning_rate": 7.919532324621733e-08,
3344
+ "loss": 0.0981,
3345
+ "step": 26800
3346
+ },
3347
+ {
3348
+ "epoch": 57.02,
3349
+ "eval_accuracy": 0.9457035788509057,
3350
+ "eval_f1": 0.927635695095954,
3351
+ "eval_loss": 0.14773297309875488,
3352
+ "eval_matthews_correlation": 0.8878658482696629,
3353
+ "eval_precision": 0.9330727346915296,
3354
+ "eval_recall": 0.9226973138381661,
3355
+ "eval_runtime": 1044.5351,
3356
+ "eval_samples_per_second": 58.504,
3357
+ "eval_steps_per_second": 1.829,
3358
+ "step": 26800
3359
+ },
3360
+ {
3361
+ "epoch": 57.23,
3362
+ "learning_rate": 7.678817056396149e-08,
3363
+ "loss": 0.0974,
3364
+ "step": 26900
3365
+ },
3366
+ {
3367
+ "epoch": 57.45,
3368
+ "learning_rate": 7.438101788170564e-08,
3369
+ "loss": 0.0971,
3370
+ "step": 27000
3371
+ },
3372
+ {
3373
+ "epoch": 57.45,
3374
+ "eval_accuracy": 0.9454581158258194,
3375
+ "eval_f1": 0.9276437107725939,
3376
+ "eval_loss": 0.14780069887638092,
3377
+ "eval_matthews_correlation": 0.8875004428021673,
3378
+ "eval_precision": 0.931844513397709,
3379
+ "eval_recall": 0.9237695234362117,
3380
+ "eval_runtime": 1046.8891,
3381
+ "eval_samples_per_second": 58.372,
3382
+ "eval_steps_per_second": 1.824,
3383
+ "step": 27000
3384
+ },
3385
+ {
3386
+ "epoch": 57.66,
3387
+ "learning_rate": 7.197386519944978e-08,
3388
+ "loss": 0.0964,
3389
+ "step": 27100
3390
+ },
3391
+ {
3392
+ "epoch": 57.87,
3393
+ "learning_rate": 6.956671251719394e-08,
3394
+ "loss": 0.096,
3395
+ "step": 27200
3396
+ },
3397
+ {
3398
+ "epoch": 57.87,
3399
+ "eval_accuracy": 0.9454908442291643,
3400
+ "eval_f1": 0.9274937428896638,
3401
+ "eval_loss": 0.1477871984243393,
3402
+ "eval_matthews_correlation": 0.8875013471949721,
3403
+ "eval_precision": 0.9322600481554396,
3404
+ "eval_recall": 0.9231373631429468,
3405
+ "eval_runtime": 1045.0428,
3406
+ "eval_samples_per_second": 58.475,
3407
+ "eval_steps_per_second": 1.828,
3408
+ "step": 27200
3409
+ },
3410
+ {
3411
+ "epoch": 58.09,
3412
+ "learning_rate": 6.71595598349381e-08,
3413
+ "loss": 0.0983,
3414
+ "step": 27300
3415
+ },
3416
+ {
3417
+ "epoch": 58.3,
3418
+ "learning_rate": 6.475240715268225e-08,
3419
+ "loss": 0.0971,
3420
+ "step": 27400
3421
+ },
3422
+ {
3423
+ "epoch": 58.3,
3424
+ "eval_accuracy": 0.9455235726325091,
3425
+ "eval_f1": 0.927552029801575,
3426
+ "eval_loss": 0.14768995344638824,
3427
+ "eval_matthews_correlation": 0.8875535000143042,
3428
+ "eval_precision": 0.9324442829035213,
3429
+ "eval_recall": 0.9230756469290579,
3430
+ "eval_runtime": 1046.0553,
3431
+ "eval_samples_per_second": 58.419,
3432
+ "eval_steps_per_second": 1.826,
3433
+ "step": 27400
3434
+ },
3435
+ {
3436
+ "epoch": 58.51,
3437
+ "learning_rate": 6.23452544704264e-08,
3438
+ "loss": 0.0952,
3439
+ "step": 27500
3440
+ },
3441
+ {
3442
+ "epoch": 58.72,
3443
+ "learning_rate": 5.993810178817056e-08,
3444
+ "loss": 0.0961,
3445
+ "step": 27600
3446
+ },
3447
+ {
3448
+ "epoch": 58.72,
3449
+ "eval_accuracy": 0.945638122044216,
3450
+ "eval_f1": 0.9279565448439383,
3451
+ "eval_loss": 0.14801862835884094,
3452
+ "eval_matthews_correlation": 0.8879656940505886,
3453
+ "eval_precision": 0.9314274864696435,
3454
+ "eval_recall": 0.9247395127147543,
3455
+ "eval_runtime": 1043.4327,
3456
+ "eval_samples_per_second": 58.565,
3457
+ "eval_steps_per_second": 1.83,
3458
+ "step": 27600
3459
+ },
3460
+ {
3461
+ "epoch": 58.94,
3462
+ "learning_rate": 5.753094910591472e-08,
3463
+ "loss": 0.0959,
3464
+ "step": 27700
3465
+ },
3466
+ {
3467
+ "epoch": 59.15,
3468
+ "learning_rate": 5.512379642365887e-08,
3469
+ "loss": 0.0966,
3470
+ "step": 27800
3471
+ },
3472
+ {
3473
+ "epoch": 59.15,
3474
+ "eval_accuracy": 0.945638122044216,
3475
+ "eval_f1": 0.9278932427247907,
3476
+ "eval_loss": 0.14785251021385193,
3477
+ "eval_matthews_correlation": 0.8879297115031657,
3478
+ "eval_precision": 0.9316436960957745,
3479
+ "eval_recall": 0.9244283613987311,
3480
+ "eval_runtime": 1042.6,
3481
+ "eval_samples_per_second": 58.612,
3482
+ "eval_steps_per_second": 1.832,
3483
+ "step": 27800
3484
+ },
3485
+ {
3486
+ "epoch": 59.36,
3487
+ "learning_rate": 5.2716643741403025e-08,
3488
+ "loss": 0.0964,
3489
+ "step": 27900
3490
+ },
3491
+ {
3492
+ "epoch": 59.57,
3493
+ "learning_rate": 5.030949105914718e-08,
3494
+ "loss": 0.0951,
3495
+ "step": 28000
3496
+ },
3497
+ {
3498
+ "epoch": 59.57,
3499
+ "eval_accuracy": 0.9457035788509057,
3500
+ "eval_f1": 0.9276842027293449,
3501
+ "eval_loss": 0.14763057231903076,
3502
+ "eval_matthews_correlation": 0.8879028981021795,
3503
+ "eval_precision": 0.932791895372278,
3504
+ "eval_recall": 0.9230343944305247,
3505
+ "eval_runtime": 1042.025,
3506
+ "eval_samples_per_second": 58.644,
3507
+ "eval_steps_per_second": 1.833,
3508
+ "step": 28000
3509
+ }
3510
+ ],
3511
+ "max_steps": 30080,
3512
+ "num_train_epochs": 64,
3513
+ "total_flos": 3.277848535588889e+21,
3514
+ "trial_name": null,
3515
+ "trial_params": null
3516
+ }
checkpoint-28000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57baa79002412724df84c2245162da376aadf840a069713b1da995d4d81fba74
3
+ size 3311
checkpoint-30000/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-384",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Front",
13
+ "1": "Middle",
14
+ "2": "Back"
15
+ },
16
+ "image_size": 384,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "Back": "2",
21
+ "Front": "0",
22
+ "Middle": "1"
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "model_type": "vit",
26
+ "num_attention_heads": 12,
27
+ "num_channels": 3,
28
+ "num_hidden_layers": 12,
29
+ "patch_size": 16,
30
+ "problem_type": "single_label_classification",
31
+ "qkv_bias": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.20.0"
34
+ }
checkpoint-30000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f1e69aa37cc1e49234f6e96d5c4e46d8d3b20a2eb5c45cc47abb6f2337c63d7
3
+ size 688858465
checkpoint-30000/preprocessor_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_resize": true,
4
+ "feature_extractor_type": "ViTFeatureExtractor",
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_std": [
11
+ 0.5,
12
+ 0.5,
13
+ 0.5
14
+ ],
15
+ "resample": 2,
16
+ "size": 384
17
+ }
checkpoint-30000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adfad27bae3bf040a7aad83c624d32b8f31090364d2999e14a37e4f58d2592a5
3
+ size 344437425
checkpoint-30000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d014eefe837880243c764eb322650e1c8847dd50f3b2085c84680feb5cfd0e2a
3
+ size 14503
checkpoint-30000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c63aaeae1fc9a56c321f3b1530994d52feddba4e1d9af2e871f56cd917ad345
3
+ size 559
checkpoint-30000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d5e64a3317dae6ac6b918e32a6ed7ffe8a2f8ea5a0e32ed3a021e91fb9c2251
3
+ size 623
checkpoint-30000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-30000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57baa79002412724df84c2245162da376aadf840a069713b1da995d4d81fba74
3
+ size 3311
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-384",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Front",
13
+ "1": "Middle",
14
+ "2": "Back"
15
+ },
16
+ "image_size": 384,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "Back": "2",
21
+ "Front": "0",
22
+ "Middle": "1"
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "model_type": "vit",
26
+ "num_attention_heads": 12,
27
+ "num_channels": 3,
28
+ "num_hidden_layers": 12,
29
+ "patch_size": 16,
30
+ "problem_type": "single_label_classification",
31
+ "qkv_bias": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.20.0"
34
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 64.0,
3
+ "eval_accuracy": 0.9457035788509057,
4
+ "eval_f1": 0.9276842027293449,
5
+ "eval_loss": 0.14763057231903076,
6
+ "eval_matthews_correlation": 0.8879028981021795,
7
+ "eval_precision": 0.932791895372278,
8
+ "eval_recall": 0.9230343944305247,
9
+ "eval_runtime": 1043.6137,
10
+ "eval_samples_per_second": 58.555,
11
+ "eval_steps_per_second": 1.83
12
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_resize": true,
4
+ "feature_extractor_type": "ViTFeatureExtractor",
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_std": [
11
+ 0.5,
12
+ 0.5,
13
+ 0.5
14
+ ],
15
+ "resample": 2,
16
+ "size": 384
17
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7aeae6401eee261a920c4636609b82fc52762b4b566ab785089a4bb27c1d0cb
3
+ size 344437425
runs/Jun29_00-57-15_ficino/1656457039.4100714/events.out.tfevents.1656457039.ficino.3045589.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d310e8ad33d183473343c5c2b89944b35045b3370fef4e53d73c8f30d9634e93
3
+ size 5372
runs/Jun29_00-57-15_ficino/events.out.tfevents.1656457039.ficino.3045589.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa3577efc50404a59b241804b1bff7130a02113cc8385937ba8ad370aac6aa78
3
+ size 132647
runs/Jun29_00-57-15_ficino/events.out.tfevents.1657055182.ficino.3045589.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41d7f6643ab946ac2aedf7a50e2d774f1ad7f3898489ad9bcd444d356b21831c
3
+ size 586
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 64.0,
3
+ "train_loss": 0.1566047928276214,
4
+ "train_runtime": 597098.8722,
5
+ "train_samples_per_second": 25.809,
6
+ "train_steps_per_second": 0.05
7
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57baa79002412724df84c2245162da376aadf840a069713b1da995d4d81fba74
3
+ size 3311