Step... (24000/50000 | Loss: 1.6508632898330688, Acc: 0.6671841740608215): 48%|█████████████ | 24215/50000 [9:36:14<10:45:10, 1.50s/it]
Browse files- flax_model.msgpack +1 -1
- outputs/checkpoints/checkpoint-17000/training_state.json +0 -1
- outputs/checkpoints/checkpoint-18000/training_state.json +0 -1
- outputs/checkpoints/checkpoint-19000/training_state.json +0 -1
- outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/config.json +0 -0
- outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/data_collator.joblib +0 -0
- outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/flax_model.msgpack +1 -1
- outputs/checkpoints/{checkpoint-19000 → checkpoint-22000}/optimizer_state.msgpack +1 -1
- outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/training_args.joblib +0 -0
- outputs/checkpoints/checkpoint-22000/training_state.json +1 -0
- outputs/checkpoints/{checkpoint-18000 → checkpoint-23000}/config.json +0 -0
- outputs/checkpoints/{checkpoint-18000 → checkpoint-23000}/data_collator.joblib +0 -0
- outputs/checkpoints/{checkpoint-19000 → checkpoint-23000}/flax_model.msgpack +1 -1
- outputs/checkpoints/{checkpoint-17000 → checkpoint-23000}/optimizer_state.msgpack +1 -1
- outputs/checkpoints/{checkpoint-18000 → checkpoint-23000}/training_args.joblib +0 -0
- outputs/checkpoints/checkpoint-23000/training_state.json +1 -0
- outputs/checkpoints/{checkpoint-19000 → checkpoint-24000}/config.json +0 -0
- outputs/checkpoints/{checkpoint-19000 → checkpoint-24000}/data_collator.joblib +0 -0
- outputs/checkpoints/{checkpoint-18000 → checkpoint-24000}/flax_model.msgpack +1 -1
- outputs/checkpoints/{checkpoint-18000 → checkpoint-24000}/optimizer_state.msgpack +1 -1
- outputs/checkpoints/{checkpoint-19000 → checkpoint-24000}/training_args.joblib +0 -0
- outputs/checkpoints/checkpoint-24000/training_state.json +1 -0
- outputs/events.out.tfevents.1627258355.tablespoon.3000110.3.v2 +2 -2
- outputs/flax_model.msgpack +1 -1
- outputs/optimizer_state.msgpack +1 -1
- outputs/training_state.json +1 -1
- pytorch_model.bin +1 -1
- run_stream.512.log +0 -0
- wandb/run-20210726_001233-17u6inbn/files/output.log +1717 -0
- wandb/run-20210726_001233-17u6inbn/files/wandb-summary.json +1 -1
- wandb/run-20210726_001233-17u6inbn/logs/debug-internal.log +2 -2
- wandb/run-20210726_001233-17u6inbn/run-17u6inbn.wandb +2 -2
flax_model.msgpack
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 249750019
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b22d22612dd38ad92ffdda4b0cf432e201d6c90dd5386d04a2cdf4d19cdfd1ed
|
| 3 |
size 249750019
|
outputs/checkpoints/checkpoint-17000/training_state.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"step": 17001}
|
|
|
|
|
|
outputs/checkpoints/checkpoint-18000/training_state.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"step": 18001}
|
|
|
|
|
|
outputs/checkpoints/checkpoint-19000/training_state.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
{"step": 19001}
|
|
|
|
|
|
outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/config.json
RENAMED
|
File without changes
|
outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/data_collator.joblib
RENAMED
|
File without changes
|
outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/flax_model.msgpack
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 249750019
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce6736afa967315a5ccac23ff15ab3d3f2f90881f2858be1c86b98b60e0fa764
|
| 3 |
size 249750019
|
outputs/checkpoints/{checkpoint-19000 → checkpoint-22000}/optimizer_state.msgpack
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 499500278
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02cabdf326b00115bc75530d1d7bc3f9a82e57d038202548c3edee7d57c661ae
|
| 3 |
size 499500278
|
outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/training_args.joblib
RENAMED
|
File without changes
|
outputs/checkpoints/checkpoint-22000/training_state.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"step": 22001}
|
outputs/checkpoints/{checkpoint-18000 → checkpoint-23000}/config.json
RENAMED
|
File without changes
|
outputs/checkpoints/{checkpoint-18000 → checkpoint-23000}/data_collator.joblib
RENAMED
|
File without changes
|
outputs/checkpoints/{checkpoint-19000 → checkpoint-23000}/flax_model.msgpack
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 249750019
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:559baf67a4fa12f4ddb4ea45aaf285d2e5d700ac5aa0e7ffb854af49e075634d
|
| 3 |
size 249750019
|
outputs/checkpoints/{checkpoint-17000 → checkpoint-23000}/optimizer_state.msgpack
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 499500278
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6bcd19d800843747a4fd81108e8654c0d431e94bacbd45321125f28f4eda9857
|
| 3 |
size 499500278
|
outputs/checkpoints/{checkpoint-18000 → checkpoint-23000}/training_args.joblib
RENAMED
|
File without changes
|
outputs/checkpoints/checkpoint-23000/training_state.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"step": 23001}
|
outputs/checkpoints/{checkpoint-19000 → checkpoint-24000}/config.json
RENAMED
|
File without changes
|
outputs/checkpoints/{checkpoint-19000 → checkpoint-24000}/data_collator.joblib
RENAMED
|
File without changes
|
outputs/checkpoints/{checkpoint-18000 → checkpoint-24000}/flax_model.msgpack
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 249750019
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b22d22612dd38ad92ffdda4b0cf432e201d6c90dd5386d04a2cdf4d19cdfd1ed
|
| 3 |
size 249750019
|
outputs/checkpoints/{checkpoint-18000 → checkpoint-24000}/optimizer_state.msgpack
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 499500278
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bcac7bac463ddd6530546523b0141118f658d528e0d7ec682da2661fe2a0f7df
|
| 3 |
size 499500278
|
outputs/checkpoints/{checkpoint-19000 → checkpoint-24000}/training_args.joblib
RENAMED
|
File without changes
|
outputs/checkpoints/checkpoint-24000/training_state.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"step": 24001}
|
outputs/events.out.tfevents.1627258355.tablespoon.3000110.3.v2
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:187bfd40e3dd6f12ab8cd6df2018b0fef55ab1ab89a973e1cc1b5427620d8135
|
| 3 |
+
size 3549865
|
outputs/flax_model.msgpack
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 249750019
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b22d22612dd38ad92ffdda4b0cf432e201d6c90dd5386d04a2cdf4d19cdfd1ed
|
| 3 |
size 249750019
|
outputs/optimizer_state.msgpack
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 499500278
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bcac7bac463ddd6530546523b0141118f658d528e0d7ec682da2661fe2a0f7df
|
| 3 |
size 499500278
|
outputs/training_state.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"step":
|
|
|
|
| 1 |
+
{"step": 24001}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 498858859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d50ca6bc265a7b18cee3972966e847d1c5891e5fec62a6e912bbbe885e2e82da
|
| 3 |
size 498858859
|
run_stream.512.log
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/run-20210726_001233-17u6inbn/files/output.log
CHANGED
|
@@ -14630,6 +14630,1723 @@ You should probably TRAIN this model on a down-stream task to be able to use it
|
|
| 14630 |
|
| 14631 |
|
| 14632 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14633 |
|
| 14634 |
|
| 14635 |
|
|
|
|
| 14630 |
|
| 14631 |
|
| 14632 |
|
| 14633 |
+
|
| 14634 |
+
|
| 14635 |
+
|
| 14636 |
+
|
| 14637 |
+
|
| 14638 |
+
|
| 14639 |
+
|
| 14640 |
+
|
| 14641 |
+
|
| 14642 |
+
|
| 14643 |
+
|
| 14644 |
+
|
| 14645 |
+
|
| 14646 |
+
|
| 14647 |
+
|
| 14648 |
+
|
| 14649 |
+
|
| 14650 |
+
|
| 14651 |
+
|
| 14652 |
+
|
| 14653 |
+
|
| 14654 |
+
|
| 14655 |
+
|
| 14656 |
+
|
| 14657 |
+
|
| 14658 |
+
|
| 14659 |
+
|
| 14660 |
+
|
| 14661 |
+
|
| 14662 |
+
|
| 14663 |
+
|
| 14664 |
+
|
| 14665 |
+
|
| 14666 |
+
|
| 14667 |
+
|
| 14668 |
+
|
| 14669 |
+
|
| 14670 |
+
|
| 14671 |
+
|
| 14672 |
+
|
| 14673 |
+
|
| 14674 |
+
|
| 14675 |
+
|
| 14676 |
+
|
| 14677 |
+
|
| 14678 |
+
|
| 14679 |
+
|
| 14680 |
+
|
| 14681 |
+
|
| 14682 |
+
|
| 14683 |
+
|
| 14684 |
+
|
| 14685 |
+
|
| 14686 |
+
|
| 14687 |
+
|
| 14688 |
+
|
| 14689 |
+
|
| 14690 |
+
|
| 14691 |
+
|
| 14692 |
+
|
| 14693 |
+
|
| 14694 |
+
|
| 14695 |
+
|
| 14696 |
+
|
| 14697 |
+
|
| 14698 |
+
|
| 14699 |
+
|
| 14700 |
+
|
| 14701 |
+
|
| 14702 |
+
|
| 14703 |
+
|
| 14704 |
+
|
| 14705 |
+
|
| 14706 |
+
|
| 14707 |
+
|
| 14708 |
+
|
| 14709 |
+
|
| 14710 |
+
|
| 14711 |
+
|
| 14712 |
+
|
| 14713 |
+
|
| 14714 |
+
|
| 14715 |
+
|
| 14716 |
+
|
| 14717 |
+
|
| 14718 |
+
|
| 14719 |
+
|
| 14720 |
+
|
| 14721 |
+
|
| 14722 |
+
|
| 14723 |
+
|
| 14724 |
+
|
| 14725 |
+
|
| 14726 |
+
|
| 14727 |
+
|
| 14728 |
+
|
| 14729 |
+
|
| 14730 |
+
|
| 14731 |
+
|
| 14732 |
+
|
| 14733 |
+
|
| 14734 |
+
|
| 14735 |
+
|
| 14736 |
+
|
| 14737 |
+
|
| 14738 |
+
|
| 14739 |
+
|
| 14740 |
+
|
| 14741 |
+
|
| 14742 |
+
|
| 14743 |
+
|
| 14744 |
+
|
| 14745 |
+
|
| 14746 |
+
|
| 14747 |
+
|
| 14748 |
+
|
| 14749 |
+
|
| 14750 |
+
|
| 14751 |
+
|
| 14752 |
+
|
| 14753 |
+
|
| 14754 |
+
|
| 14755 |
+
|
| 14756 |
+
|
| 14757 |
+
|
| 14758 |
+
|
| 14759 |
+
|
| 14760 |
+
|
| 14761 |
+
|
| 14762 |
+
|
| 14763 |
+
|
| 14764 |
+
|
| 14765 |
+
|
| 14766 |
+
|
| 14767 |
+
|
| 14768 |
+
|
| 14769 |
+
|
| 14770 |
+
|
| 14771 |
+
|
| 14772 |
+
|
| 14773 |
+
|
| 14774 |
+
|
| 14775 |
+
|
| 14776 |
+
|
| 14777 |
+
|
| 14778 |
+
|
| 14779 |
+
|
| 14780 |
+
|
| 14781 |
+
|
| 14782 |
+
|
| 14783 |
+
|
| 14784 |
+
|
| 14785 |
+
|
| 14786 |
+
|
| 14787 |
+
|
| 14788 |
+
|
| 14789 |
+
|
| 14790 |
+
|
| 14791 |
+
|
| 14792 |
+
|
| 14793 |
+
|
| 14794 |
+
|
| 14795 |
+
|
| 14796 |
+
|
| 14797 |
+
|
| 14798 |
+
|
| 14799 |
+
|
| 14800 |
+
|
| 14801 |
+
|
| 14802 |
+
|
| 14803 |
+
|
| 14804 |
+
|
| 14805 |
+
|
| 14806 |
+
|
| 14807 |
+
|
| 14808 |
+
|
| 14809 |
+
|
| 14810 |
+
|
| 14811 |
+
Step... (21000/50000 | Loss: 1.669716238975525, Acc: 0.6647850275039673): 44%|████████████▎ | 22000/50000 [8:40:53<13:14:50, 1.70s/it]
|
| 14812 |
+
Step... (21500 | Loss: 1.764472484588623, Learning Rate: 0.00034545455127954483)
|
| 14813 |
+
Step... (21000/50000 | Loss: 1.669716238975525, Acc: 0.6647850275039673): 44%|████████████▎ | 22000/50000 [8:40:55<13:14:50, 1.70s/it]
|
| 14814 |
+
|
| 14815 |
+
|
| 14816 |
+
|
| 14817 |
+
|
| 14818 |
+
|
| 14819 |
+
|
| 14820 |
+
|
| 14821 |
+
|
| 14822 |
+
|
| 14823 |
+
|
| 14824 |
+
|
| 14825 |
+
|
| 14826 |
+
[10:49:19] - INFO - __main__ - Saving checkpoint at 22000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
|
| 14827 |
+
All Flax model weights were used when initializing RobertaForMaskedLM.
|
| 14828 |
+
Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
|
| 14829 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
| 14830 |
+
|
| 14831 |
+
|
| 14832 |
+
|
| 14833 |
+
|
| 14834 |
+
|
| 14835 |
+
|
| 14836 |
+
|
| 14837 |
+
|
| 14838 |
+
|
| 14839 |
+
|
| 14840 |
+
|
| 14841 |
+
|
| 14842 |
+
|
| 14843 |
+
|
| 14844 |
+
|
| 14845 |
+
|
| 14846 |
+
|
| 14847 |
+
|
| 14848 |
+
|
| 14849 |
+
|
| 14850 |
+
|
| 14851 |
+
|
| 14852 |
+
|
| 14853 |
+
|
| 14854 |
+
|
| 14855 |
+
|
| 14856 |
+
|
| 14857 |
+
|
| 14858 |
+
|
| 14859 |
+
|
| 14860 |
+
|
| 14861 |
+
|
| 14862 |
+
|
| 14863 |
+
|
| 14864 |
+
|
| 14865 |
+
|
| 14866 |
+
|
| 14867 |
+
|
| 14868 |
+
|
| 14869 |
+
|
| 14870 |
+
|
| 14871 |
+
|
| 14872 |
+
|
| 14873 |
+
|
| 14874 |
+
|
| 14875 |
+
|
| 14876 |
+
|
| 14877 |
+
|
| 14878 |
+
|
| 14879 |
+
|
| 14880 |
+
|
| 14881 |
+
|
| 14882 |
+
|
| 14883 |
+
|
| 14884 |
+
|
| 14885 |
+
|
| 14886 |
+
|
| 14887 |
+
|
| 14888 |
+
|
| 14889 |
+
|
| 14890 |
+
|
| 14891 |
+
|
| 14892 |
+
|
| 14893 |
+
|
| 14894 |
+
|
| 14895 |
+
|
| 14896 |
+
|
| 14897 |
+
|
| 14898 |
+
|
| 14899 |
+
|
| 14900 |
+
|
| 14901 |
+
|
| 14902 |
+
|
| 14903 |
+
|
| 14904 |
+
|
| 14905 |
+
|
| 14906 |
+
|
| 14907 |
+
|
| 14908 |
+
|
| 14909 |
+
|
| 14910 |
+
|
| 14911 |
+
|
| 14912 |
+
|
| 14913 |
+
|
| 14914 |
+
|
| 14915 |
+
|
| 14916 |
+
|
| 14917 |
+
|
| 14918 |
+
|
| 14919 |
+
|
| 14920 |
+
|
| 14921 |
+
|
| 14922 |
+
|
| 14923 |
+
|
| 14924 |
+
|
| 14925 |
+
|
| 14926 |
+
|
| 14927 |
+
|
| 14928 |
+
|
| 14929 |
+
|
| 14930 |
+
|
| 14931 |
+
|
| 14932 |
+
|
| 14933 |
+
|
| 14934 |
+
|
| 14935 |
+
|
| 14936 |
+
|
| 14937 |
+
|
| 14938 |
+
|
| 14939 |
+
|
| 14940 |
+
|
| 14941 |
+
|
| 14942 |
+
|
| 14943 |
+
|
| 14944 |
+
|
| 14945 |
+
|
| 14946 |
+
|
| 14947 |
+
|
| 14948 |
+
|
| 14949 |
+
|
| 14950 |
+
|
| 14951 |
+
|
| 14952 |
+
|
| 14953 |
+
|
| 14954 |
+
|
| 14955 |
+
|
| 14956 |
+
|
| 14957 |
+
|
| 14958 |
+
|
| 14959 |
+
|
| 14960 |
+
|
| 14961 |
+
|
| 14962 |
+
|
| 14963 |
+
|
| 14964 |
+
|
| 14965 |
+
|
| 14966 |
+
|
| 14967 |
+
|
| 14968 |
+
|
| 14969 |
+
|
| 14970 |
+
|
| 14971 |
+
|
| 14972 |
+
|
| 14973 |
+
|
| 14974 |
+
|
| 14975 |
+
|
| 14976 |
+
|
| 14977 |
+
|
| 14978 |
+
|
| 14979 |
+
|
| 14980 |
+
|
| 14981 |
+
|
| 14982 |
+
|
| 14983 |
+
|
| 14984 |
+
|
| 14985 |
+
|
| 14986 |
+
|
| 14987 |
+
|
| 14988 |
+
|
| 14989 |
+
|
| 14990 |
+
|
| 14991 |
+
|
| 14992 |
+
|
| 14993 |
+
|
| 14994 |
+
|
| 14995 |
+
|
| 14996 |
+
|
| 14997 |
+
|
| 14998 |
+
|
| 14999 |
+
|
| 15000 |
+
|
| 15001 |
+
|
| 15002 |
+
|
| 15003 |
+
|
| 15004 |
+
|
| 15005 |
+
|
| 15006 |
+
|
| 15007 |
+
|
| 15008 |
+
|
| 15009 |
+
|
| 15010 |
+
|
| 15011 |
+
|
| 15012 |
+
|
| 15013 |
+
|
| 15014 |
+
|
| 15015 |
+
|
| 15016 |
+
|
| 15017 |
+
|
| 15018 |
+
|
| 15019 |
+
|
| 15020 |
+
|
| 15021 |
+
|
| 15022 |
+
|
| 15023 |
+
|
| 15024 |
+
|
| 15025 |
+
|
| 15026 |
+
|
| 15027 |
+
|
| 15028 |
+
|
| 15029 |
+
|
| 15030 |
+
|
| 15031 |
+
|
| 15032 |
+
|
| 15033 |
+
|
| 15034 |
+
|
| 15035 |
+
|
| 15036 |
+
|
| 15037 |
+
|
| 15038 |
+
|
| 15039 |
+
|
| 15040 |
+
|
| 15041 |
+
|
| 15042 |
+
|
| 15043 |
+
|
| 15044 |
+
|
| 15045 |
+
|
| 15046 |
+
|
| 15047 |
+
|
| 15048 |
+
|
| 15049 |
+
|
| 15050 |
+
|
| 15051 |
+
|
| 15052 |
+
|
| 15053 |
+
|
| 15054 |
+
|
| 15055 |
+
|
| 15056 |
+
|
| 15057 |
+
|
| 15058 |
+
|
| 15059 |
+
|
| 15060 |
+
|
| 15061 |
+
|
| 15062 |
+
|
| 15063 |
+
|
| 15064 |
+
|
| 15065 |
+
|
| 15066 |
+
|
| 15067 |
+
|
| 15068 |
+
|
| 15069 |
+
|
| 15070 |
+
|
| 15071 |
+
|
| 15072 |
+
|
| 15073 |
+
|
| 15074 |
+
|
| 15075 |
+
|
| 15076 |
+
|
| 15077 |
+
|
| 15078 |
+
|
| 15079 |
+
|
| 15080 |
+
|
| 15081 |
+
|
| 15082 |
+
|
| 15083 |
+
|
| 15084 |
+
|
| 15085 |
+
|
| 15086 |
+
|
| 15087 |
+
|
| 15088 |
+
|
| 15089 |
+
|
| 15090 |
+
|
| 15091 |
+
|
| 15092 |
+
|
| 15093 |
+
|
| 15094 |
+
|
| 15095 |
+
|
| 15096 |
+
|
| 15097 |
+
|
| 15098 |
+
|
| 15099 |
+
|
| 15100 |
+
|
| 15101 |
+
|
| 15102 |
+
|
| 15103 |
+
|
| 15104 |
+
|
| 15105 |
+
|
| 15106 |
+
|
| 15107 |
+
|
| 15108 |
+
|
| 15109 |
+
|
| 15110 |
+
|
| 15111 |
+
|
| 15112 |
+
|
| 15113 |
+
|
| 15114 |
+
|
| 15115 |
+
|
| 15116 |
+
|
| 15117 |
+
|
| 15118 |
+
|
| 15119 |
+
|
| 15120 |
+
|
| 15121 |
+
|
| 15122 |
+
|
| 15123 |
+
|
| 15124 |
+
|
| 15125 |
+
|
| 15126 |
+
|
| 15127 |
+
|
| 15128 |
+
|
| 15129 |
+
|
| 15130 |
+
|
| 15131 |
+
|
| 15132 |
+
|
| 15133 |
+
|
| 15134 |
+
|
| 15135 |
+
|
| 15136 |
+
|
| 15137 |
+
|
| 15138 |
+
|
| 15139 |
+
|
| 15140 |
+
|
| 15141 |
+
|
| 15142 |
+
|
| 15143 |
+
|
| 15144 |
+
|
| 15145 |
+
|
| 15146 |
+
|
| 15147 |
+
|
| 15148 |
+
|
| 15149 |
+
|
| 15150 |
+
|
| 15151 |
+
|
| 15152 |
+
|
| 15153 |
+
|
| 15154 |
+
|
| 15155 |
+
|
| 15156 |
+
|
| 15157 |
+
|
| 15158 |
+
|
| 15159 |
+
|
| 15160 |
+
|
| 15161 |
+
|
| 15162 |
+
|
| 15163 |
+
|
| 15164 |
+
|
| 15165 |
+
|
| 15166 |
+
|
| 15167 |
+
|
| 15168 |
+
|
| 15169 |
+
|
| 15170 |
+
|
| 15171 |
+
|
| 15172 |
+
|
| 15173 |
+
|
| 15174 |
+
|
| 15175 |
+
|
| 15176 |
+
|
| 15177 |
+
|
| 15178 |
+
|
| 15179 |
+
|
| 15180 |
+
|
| 15181 |
+
|
| 15182 |
+
|
| 15183 |
+
|
| 15184 |
+
|
| 15185 |
+
|
| 15186 |
+
|
| 15187 |
+
|
| 15188 |
+
|
| 15189 |
+
|
| 15190 |
+
|
| 15191 |
+
|
| 15192 |
+
|
| 15193 |
+
|
| 15194 |
+
|
| 15195 |
+
|
| 15196 |
+
|
| 15197 |
+
|
| 15198 |
+
|
| 15199 |
+
|
| 15200 |
+
|
| 15201 |
+
|
| 15202 |
+
|
| 15203 |
+
|
| 15204 |
+
|
| 15205 |
+
|
| 15206 |
+
|
| 15207 |
+
|
| 15208 |
+
|
| 15209 |
+
|
| 15210 |
+
|
| 15211 |
+
|
| 15212 |
+
|
| 15213 |
+
|
| 15214 |
+
|
| 15215 |
+
|
| 15216 |
+
|
| 15217 |
+
|
| 15218 |
+
|
| 15219 |
+
|
| 15220 |
+
|
| 15221 |
+
|
| 15222 |
+
|
| 15223 |
+
|
| 15224 |
+
|
| 15225 |
+
|
| 15226 |
+
|
| 15227 |
+
|
| 15228 |
+
|
| 15229 |
+
|
| 15230 |
+
|
| 15231 |
+
|
| 15232 |
+
|
| 15233 |
+
|
| 15234 |
+
|
| 15235 |
+
|
| 15236 |
+
|
| 15237 |
+
|
| 15238 |
+
|
| 15239 |
+
|
| 15240 |
+
|
| 15241 |
+
|
| 15242 |
+
|
| 15243 |
+
|
| 15244 |
+
|
| 15245 |
+
|
| 15246 |
+
|
| 15247 |
+
|
| 15248 |
+
|
| 15249 |
+
|
| 15250 |
+
|
| 15251 |
+
|
| 15252 |
+
|
| 15253 |
+
|
| 15254 |
+
|
| 15255 |
+
|
| 15256 |
+
|
| 15257 |
+
|
| 15258 |
+
|
| 15259 |
+
|
| 15260 |
+
|
| 15261 |
+
|
| 15262 |
+
|
| 15263 |
+
|
| 15264 |
+
|
| 15265 |
+
|
| 15266 |
+
|
| 15267 |
+
|
| 15268 |
+
|
| 15269 |
+
|
| 15270 |
+
|
| 15271 |
+
|
| 15272 |
+
|
| 15273 |
+
|
| 15274 |
+
|
| 15275 |
+
|
| 15276 |
+
|
| 15277 |
+
|
| 15278 |
+
|
| 15279 |
+
|
| 15280 |
+
|
| 15281 |
+
|
| 15282 |
+
|
| 15283 |
+
|
| 15284 |
+
|
| 15285 |
+
|
| 15286 |
+
|
| 15287 |
+
|
| 15288 |
+
|
| 15289 |
+
|
| 15290 |
+
|
| 15291 |
+
|
| 15292 |
+
|
| 15293 |
+
|
| 15294 |
+
|
| 15295 |
+
|
| 15296 |
+
|
| 15297 |
+
|
| 15298 |
+
|
| 15299 |
+
|
| 15300 |
+
|
| 15301 |
+
|
| 15302 |
+
|
| 15303 |
+
|
| 15304 |
+
|
| 15305 |
+
|
| 15306 |
+
|
| 15307 |
+
|
| 15308 |
+
|
| 15309 |
+
|
| 15310 |
+
|
| 15311 |
+
|
| 15312 |
+
|
| 15313 |
+
|
| 15314 |
+
|
| 15315 |
+
|
| 15316 |
+
|
| 15317 |
+
|
| 15318 |
+
|
| 15319 |
+
|
| 15320 |
+
|
| 15321 |
+
|
| 15322 |
+
|
| 15323 |
+
|
| 15324 |
+
|
| 15325 |
+
|
| 15326 |
+
|
| 15327 |
+
|
| 15328 |
+
|
| 15329 |
+
|
| 15330 |
+
|
| 15331 |
+
|
| 15332 |
+
|
| 15333 |
+
|
| 15334 |
+
|
| 15335 |
+
|
| 15336 |
+
|
| 15337 |
+
|
| 15338 |
+
|
| 15339 |
+
|
| 15340 |
+
|
| 15341 |
+
|
| 15342 |
+
|
| 15343 |
+
|
| 15344 |
+
|
| 15345 |
+
|
| 15346 |
+
|
| 15347 |
+
|
| 15348 |
+
|
| 15349 |
+
|
| 15350 |
+
|
| 15351 |
+
|
| 15352 |
+
|
| 15353 |
+
|
| 15354 |
+
|
| 15355 |
+
|
| 15356 |
+
|
| 15357 |
+
|
| 15358 |
+
|
| 15359 |
+
|
| 15360 |
+
|
| 15361 |
+
|
| 15362 |
+
|
| 15363 |
+
|
| 15364 |
+
|
| 15365 |
+
|
| 15366 |
+
|
| 15367 |
+
|
| 15368 |
+
|
| 15369 |
+
|
| 15370 |
+
|
| 15371 |
+
|
| 15372 |
+
|
| 15373 |
+
|
| 15374 |
+
|
| 15375 |
+
|
| 15376 |
+
|
| 15377 |
+
|
| 15378 |
+
|
| 15379 |
+
|
| 15380 |
+
|
| 15381 |
+
|
| 15382 |
+
|
| 15383 |
+
|
| 15384 |
+
|
| 15385 |
+
|
| 15386 |
+
|
| 15387 |
+
|
| 15388 |
+
|
| 15389 |
+
|
| 15390 |
+
|
| 15391 |
+
|
| 15392 |
+
|
| 15393 |
+
|
| 15394 |
+
|
| 15395 |
+
|
| 15396 |
+
|
| 15397 |
+
|
| 15398 |
+
|
| 15399 |
+
|
| 15400 |
+
|
| 15401 |
+
|
| 15402 |
+
|
| 15403 |
+
|
| 15404 |
+
|
| 15405 |
+
|
| 15406 |
+
|
| 15407 |
+
|
| 15408 |
+
|
| 15409 |
+
|
| 15410 |
+
|
| 15411 |
+
|
| 15412 |
+
|
| 15413 |
+
|
| 15414 |
+
|
| 15415 |
+
|
| 15416 |
+
|
| 15417 |
+
|
| 15418 |
+
|
| 15419 |
+
|
| 15420 |
+
|
| 15421 |
+
|
| 15422 |
+
|
| 15423 |
+
|
| 15424 |
+
|
| 15425 |
+
|
| 15426 |
+
|
| 15427 |
+
|
| 15428 |
+
|
| 15429 |
+
|
| 15430 |
+
|
| 15431 |
+
|
| 15432 |
+
|
| 15433 |
+
|
| 15434 |
+
|
| 15435 |
+
|
| 15436 |
+
|
| 15437 |
+
|
| 15438 |
+
|
| 15439 |
+
|
| 15440 |
+
|
| 15441 |
+
|
| 15442 |
+
|
| 15443 |
+
|
| 15444 |
+
|
| 15445 |
+
|
| 15446 |
+
|
| 15447 |
+
|
| 15448 |
+
|
| 15449 |
+
|
| 15450 |
+
|
| 15451 |
+
|
| 15452 |
+
|
| 15453 |
+
|
| 15454 |
+
|
| 15455 |
+
|
| 15456 |
+
|
| 15457 |
+
|
| 15458 |
+
|
| 15459 |
+
|
| 15460 |
+
|
| 15461 |
+
|
| 15462 |
+
|
| 15463 |
+
|
| 15464 |
+
|
| 15465 |
+
|
| 15466 |
+
|
| 15467 |
+
|
| 15468 |
+
|
| 15469 |
+
|
| 15470 |
+
|
| 15471 |
+
|
| 15472 |
+
|
| 15473 |
+
|
| 15474 |
+
|
| 15475 |
+
|
| 15476 |
+
|
| 15477 |
+
|
| 15478 |
+
|
| 15479 |
+
|
| 15480 |
+
|
| 15481 |
+
|
| 15482 |
+
|
| 15483 |
+
|
| 15484 |
+
|
| 15485 |
+
|
| 15486 |
+
|
| 15487 |
+
|
| 15488 |
+
|
| 15489 |
+
|
| 15490 |
+
|
| 15491 |
+
|
| 15492 |
+
|
| 15493 |
+
|
| 15494 |
+
|
| 15495 |
+
|
| 15496 |
+
|
| 15497 |
+
|
| 15498 |
+
|
| 15499 |
+
|
| 15500 |
+
|
| 15501 |
+
|
| 15502 |
+
|
| 15503 |
+
|
| 15504 |
+
|
| 15505 |
+
|
| 15506 |
+
|
| 15507 |
+
|
| 15508 |
+
|
| 15509 |
+
|
| 15510 |
+
|
| 15511 |
+
|
| 15512 |
+
|
| 15513 |
+
|
| 15514 |
+
Step... (22000/50000 | Loss: 1.6613430976867676, Acc: 0.6655245423316956): 46%|████████████▍ | 23000/50000 [9:05:24<10:18:31, 1.37s/it]
|
| 15515 |
+
Evaluating ...: 3%|██▉ | 4/130 [00:00<00:08, 14.65it/s]
|
| 15516 |
+
Step... (22500 | Loss: 1.9999163150787354, Learning Rate: 0.0003333333588670939)
|
| 15517 |
+
|
| 15518 |
+
|
| 15519 |
+
|
| 15520 |
+
|
| 15521 |
+
|
| 15522 |
+
|
| 15523 |
+
|
| 15524 |
+
|
| 15525 |
+
|
| 15526 |
+
|
| 15527 |
+
|
| 15528 |
+
|
| 15529 |
+
[11:13:47] - INFO - __main__ - Saving checkpoint at 23000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
|
| 15530 |
+
All Flax model weights were used when initializing RobertaForMaskedLM.
|
| 15531 |
+
Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
|
| 15532 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
| 15533 |
+
|
| 15534 |
+
|
| 15535 |
+
|
| 15536 |
+
|
| 15537 |
+
|
| 15538 |
+
|
| 15539 |
+
|
| 15540 |
+
|
| 15541 |
+
|
| 15542 |
+
|
| 15543 |
+
|
| 15544 |
+
|
| 15545 |
+
|
| 15546 |
+
|
| 15547 |
+
|
| 15548 |
+
|
| 15549 |
+
|
| 15550 |
+
|
| 15551 |
+
|
| 15552 |
+
|
| 15553 |
+
|
| 15554 |
+
|
| 15555 |
+
|
| 15556 |
+
|
| 15557 |
+
|
| 15558 |
+
|
| 15559 |
+
|
| 15560 |
+
|
| 15561 |
+
|
| 15562 |
+
|
| 15563 |
+
|
| 15564 |
+
|
| 15565 |
+
|
| 15566 |
+
|
| 15567 |
+
|
| 15568 |
+
|
| 15569 |
+
|
| 15570 |
+
|
| 15571 |
+
|
| 15572 |
+
|
| 15573 |
+
|
| 15574 |
+
|
| 15575 |
+
|
| 15576 |
+
|
| 15577 |
+
|
| 15578 |
+
|
| 15579 |
+
|
| 15580 |
+
|
| 15581 |
+
|
| 15582 |
+
|
| 15583 |
+
|
| 15584 |
+
|
| 15585 |
+
|
| 15586 |
+
|
| 15587 |
+
|
| 15588 |
+
|
| 15589 |
+
|
| 15590 |
+
|
| 15591 |
+
|
| 15592 |
+
|
| 15593 |
+
|
| 15594 |
+
|
| 15595 |
+
|
| 15596 |
+
|
| 15597 |
+
|
| 15598 |
+
|
| 15599 |
+
|
| 15600 |
+
|
| 15601 |
+
|
| 15602 |
+
|
| 15603 |
+
|
| 15604 |
+
|
| 15605 |
+
|
| 15606 |
+
|
| 15607 |
+
|
| 15608 |
+
|
| 15609 |
+
|
| 15610 |
+
|
| 15611 |
+
|
| 15612 |
+
|
| 15613 |
+
|
| 15614 |
+
|
| 15615 |
+
|
| 15616 |
+
|
| 15617 |
+
|
| 15618 |
+
|
| 15619 |
+
|
| 15620 |
+
|
| 15621 |
+
|
| 15622 |
+
|
| 15623 |
+
|
| 15624 |
+
|
| 15625 |
+
|
| 15626 |
+
|
| 15627 |
+
|
| 15628 |
+
|
| 15629 |
+
|
| 15630 |
+
|
| 15631 |
+
|
| 15632 |
+
|
| 15633 |
+
|
| 15634 |
+
|
| 15635 |
+
|
| 15636 |
+
|
| 15637 |
+
|
| 15638 |
+
|
| 15639 |
+
|
| 15640 |
+
|
| 15641 |
+
|
| 15642 |
+
|
| 15643 |
+
|
| 15644 |
+
|
| 15645 |
+
|
| 15646 |
+
|
| 15647 |
+
|
| 15648 |
+
|
| 15649 |
+
|
| 15650 |
+
|
| 15651 |
+
|
| 15652 |
+
|
| 15653 |
+
|
| 15654 |
+
|
| 15655 |
+
|
| 15656 |
+
|
| 15657 |
+
|
| 15658 |
+
|
| 15659 |
+
|
| 15660 |
+
|
| 15661 |
+
|
| 15662 |
+
|
| 15663 |
+
|
| 15664 |
+
|
| 15665 |
+
|
| 15666 |
+
|
| 15667 |
+
|
| 15668 |
+
|
| 15669 |
+
|
| 15670 |
+
|
| 15671 |
+
|
| 15672 |
+
|
| 15673 |
+
|
| 15674 |
+
|
| 15675 |
+
|
| 15676 |
+
|
| 15677 |
+
|
| 15678 |
+
|
| 15679 |
+
|
| 15680 |
+
|
| 15681 |
+
|
| 15682 |
+
|
| 15683 |
+
|
| 15684 |
+
|
| 15685 |
+
|
| 15686 |
+
|
| 15687 |
+
|
| 15688 |
+
|
| 15689 |
+
|
| 15690 |
+
|
| 15691 |
+
|
| 15692 |
+
|
| 15693 |
+
|
| 15694 |
+
|
| 15695 |
+
|
| 15696 |
+
|
| 15697 |
+
|
| 15698 |
+
|
| 15699 |
+
|
| 15700 |
+
|
| 15701 |
+
|
| 15702 |
+
|
| 15703 |
+
|
| 15704 |
+
|
| 15705 |
+
|
| 15706 |
+
|
| 15707 |
+
|
| 15708 |
+
|
| 15709 |
+
|
| 15710 |
+
|
| 15711 |
+
|
| 15712 |
+
|
| 15713 |
+
|
| 15714 |
+
|
| 15715 |
+
|
| 15716 |
+
|
| 15717 |
+
|
| 15718 |
+
|
| 15719 |
+
|
| 15720 |
+
|
| 15721 |
+
|
| 15722 |
+
|
| 15723 |
+
|
| 15724 |
+
|
| 15725 |
+
|
| 15726 |
+
|
| 15727 |
+
|
| 15728 |
+
|
| 15729 |
+
|
| 15730 |
+
|
| 15731 |
+
|
| 15732 |
+
|
| 15733 |
+
|
| 15734 |
+
|
| 15735 |
+
|
| 15736 |
+
|
| 15737 |
+
|
| 15738 |
+
|
| 15739 |
+
|
| 15740 |
+
|
| 15741 |
+
|
| 15742 |
+
|
| 15743 |
+
|
| 15744 |
+
|
| 15745 |
+
|
| 15746 |
+
|
| 15747 |
+
|
| 15748 |
+
|
| 15749 |
+
|
| 15750 |
+
|
| 15751 |
+
|
| 15752 |
+
|
| 15753 |
+
|
| 15754 |
+
|
| 15755 |
+
|
| 15756 |
+
|
| 15757 |
+
|
| 15758 |
+
|
| 15759 |
+
|
| 15760 |
+
|
| 15761 |
+
|
| 15762 |
+
|
| 15763 |
+
|
| 15764 |
+
|
| 15765 |
+
|
| 15766 |
+
|
| 15767 |
+
|
| 15768 |
+
|
| 15769 |
+
|
| 15770 |
+
|
| 15771 |
+
|
| 15772 |
+
|
| 15773 |
+
|
| 15774 |
+
|
| 15775 |
+
|
| 15776 |
+
|
| 15777 |
+
|
| 15778 |
+
|
| 15779 |
+
|
| 15780 |
+
|
| 15781 |
+
|
| 15782 |
+
|
| 15783 |
+
|
| 15784 |
+
|
| 15785 |
+
|
| 15786 |
+
|
| 15787 |
+
|
| 15788 |
+
|
| 15789 |
+
|
| 15790 |
+
|
| 15791 |
+
|
| 15792 |
+
|
| 15793 |
+
|
| 15794 |
+
|
| 15795 |
+
|
| 15796 |
+
|
| 15797 |
+
|
| 15798 |
+
|
| 15799 |
+
|
| 15800 |
+
|
| 15801 |
+
|
| 15802 |
+
|
| 15803 |
+
|
| 15804 |
+
|
| 15805 |
+
|
| 15806 |
+
|
| 15807 |
+
|
| 15808 |
+
|
| 15809 |
+
|
| 15810 |
+
|
| 15811 |
+
|
| 15812 |
+
|
| 15813 |
+
|
| 15814 |
+
|
| 15815 |
+
|
| 15816 |
+
|
| 15817 |
+
|
| 15818 |
+
|
| 15819 |
+
|
| 15820 |
+
|
| 15821 |
+
|
| 15822 |
+
|
| 15823 |
+
|
| 15824 |
+
|
| 15825 |
+
|
| 15826 |
+
|
| 15827 |
+
|
| 15828 |
+
|
| 15829 |
+
|
| 15830 |
+
|
| 15831 |
+
|
| 15832 |
+
|
| 15833 |
+
|
| 15834 |
+
|
| 15835 |
+
|
| 15836 |
+
|
| 15837 |
+
|
| 15838 |
+
|
| 15839 |
+
|
| 15840 |
+
|
| 15841 |
+
|
| 15842 |
+
|
| 15843 |
+
|
| 15844 |
+
|
| 15845 |
+
|
| 15846 |
+
|
| 15847 |
+
|
| 15848 |
+
|
| 15849 |
+
|
| 15850 |
+
|
| 15851 |
+
|
| 15852 |
+
|
| 15853 |
+
|
| 15854 |
+
|
| 15855 |
+
|
| 15856 |
+
|
| 15857 |
+
|
| 15858 |
+
|
| 15859 |
+
|
| 15860 |
+
|
| 15861 |
+
|
| 15862 |
+
|
| 15863 |
+
|
| 15864 |
+
|
| 15865 |
+
|
| 15866 |
+
|
| 15867 |
+
|
| 15868 |
+
|
| 15869 |
+
|
| 15870 |
+
|
| 15871 |
+
|
| 15872 |
+
|
| 15873 |
+
|
| 15874 |
+
|
| 15875 |
+
|
| 15876 |
+
|
| 15877 |
+
|
| 15878 |
+
|
| 15879 |
+
|
| 15880 |
+
|
| 15881 |
+
|
| 15882 |
+
|
| 15883 |
+
|
| 15884 |
+
|
| 15885 |
+
|
| 15886 |
+
|
| 15887 |
+
|
| 15888 |
+
|
| 15889 |
+
|
| 15890 |
+
|
| 15891 |
+
|
| 15892 |
+
|
| 15893 |
+
|
| 15894 |
+
|
| 15895 |
+
|
| 15896 |
+
|
| 15897 |
+
|
| 15898 |
+
|
| 15899 |
+
|
| 15900 |
+
|
| 15901 |
+
|
| 15902 |
+
|
| 15903 |
+
|
| 15904 |
+
|
| 15905 |
+
|
| 15906 |
+
|
| 15907 |
+
|
| 15908 |
+
|
| 15909 |
+
|
| 15910 |
+
|
| 15911 |
+
|
| 15912 |
+
|
| 15913 |
+
|
| 15914 |
+
|
| 15915 |
+
|
| 15916 |
+
|
| 15917 |
+
|
| 15918 |
+
|
| 15919 |
+
|
| 15920 |
+
|
| 15921 |
+
|
| 15922 |
+
|
| 15923 |
+
|
| 15924 |
+
|
| 15925 |
+
|
| 15926 |
+
|
| 15927 |
+
|
| 15928 |
+
|
| 15929 |
+
|
| 15930 |
+
|
| 15931 |
+
|
| 15932 |
+
|
| 15933 |
+
|
| 15934 |
+
|
| 15935 |
+
|
| 15936 |
+
|
| 15937 |
+
|
| 15938 |
+
|
| 15939 |
+
|
| 15940 |
+
|
| 15941 |
+
|
| 15942 |
+
|
| 15943 |
+
|
| 15944 |
+
|
| 15945 |
+
|
| 15946 |
+
|
| 15947 |
+
|
| 15948 |
+
|
| 15949 |
+
|
| 15950 |
+
|
| 15951 |
+
|
| 15952 |
+
|
| 15953 |
+
|
| 15954 |
+
|
| 15955 |
+
|
| 15956 |
+
|
| 15957 |
+
|
| 15958 |
+
|
| 15959 |
+
|
| 15960 |
+
|
| 15961 |
+
|
| 15962 |
+
|
| 15963 |
+
|
| 15964 |
+
|
| 15965 |
+
|
| 15966 |
+
|
| 15967 |
+
|
| 15968 |
+
|
| 15969 |
+
|
| 15970 |
+
|
| 15971 |
+
|
| 15972 |
+
|
| 15973 |
+
|
| 15974 |
+
|
| 15975 |
+
|
| 15976 |
+
|
| 15977 |
+
|
| 15978 |
+
|
| 15979 |
+
|
| 15980 |
+
|
| 15981 |
+
|
| 15982 |
+
|
| 15983 |
+
|
| 15984 |
+
|
| 15985 |
+
|
| 15986 |
+
|
| 15987 |
+
|
| 15988 |
+
|
| 15989 |
+
|
| 15990 |
+
|
| 15991 |
+
|
| 15992 |
+
|
| 15993 |
+
|
| 15994 |
+
|
| 15995 |
+
|
| 15996 |
+
|
| 15997 |
+
|
| 15998 |
+
|
| 15999 |
+
|
| 16000 |
+
|
| 16001 |
+
|
| 16002 |
+
|
| 16003 |
+
|
| 16004 |
+
|
| 16005 |
+
|
| 16006 |
+
|
| 16007 |
+
|
| 16008 |
+
|
| 16009 |
+
|
| 16010 |
+
|
| 16011 |
+
|
| 16012 |
+
|
| 16013 |
+
|
| 16014 |
+
|
| 16015 |
+
|
| 16016 |
+
|
| 16017 |
+
|
| 16018 |
+
|
| 16019 |
+
|
| 16020 |
+
|
| 16021 |
+
|
| 16022 |
+
|
| 16023 |
+
|
| 16024 |
+
|
| 16025 |
+
|
| 16026 |
+
|
| 16027 |
+
|
| 16028 |
+
|
| 16029 |
+
|
| 16030 |
+
|
| 16031 |
+
|
| 16032 |
+
|
| 16033 |
+
|
| 16034 |
+
|
| 16035 |
+
|
| 16036 |
+
|
| 16037 |
+
|
| 16038 |
+
|
| 16039 |
+
|
| 16040 |
+
|
| 16041 |
+
|
| 16042 |
+
|
| 16043 |
+
|
| 16044 |
+
|
| 16045 |
+
|
| 16046 |
+
|
| 16047 |
+
|
| 16048 |
+
|
| 16049 |
+
|
| 16050 |
+
|
| 16051 |
+
|
| 16052 |
+
|
| 16053 |
+
|
| 16054 |
+
|
| 16055 |
+
|
| 16056 |
+
|
| 16057 |
+
|
| 16058 |
+
|
| 16059 |
+
|
| 16060 |
+
|
| 16061 |
+
|
| 16062 |
+
|
| 16063 |
+
|
| 16064 |
+
|
| 16065 |
+
|
| 16066 |
+
|
| 16067 |
+
|
| 16068 |
+
|
| 16069 |
+
|
| 16070 |
+
|
| 16071 |
+
|
| 16072 |
+
|
| 16073 |
+
|
| 16074 |
+
|
| 16075 |
+
|
| 16076 |
+
|
| 16077 |
+
|
| 16078 |
+
|
| 16079 |
+
|
| 16080 |
+
|
| 16081 |
+
|
| 16082 |
+
|
| 16083 |
+
|
| 16084 |
+
|
| 16085 |
+
|
| 16086 |
+
|
| 16087 |
+
|
| 16088 |
+
|
| 16089 |
+
|
| 16090 |
+
|
| 16091 |
+
|
| 16092 |
+
|
| 16093 |
+
|
| 16094 |
+
|
| 16095 |
+
|
| 16096 |
+
|
| 16097 |
+
|
| 16098 |
+
|
| 16099 |
+
|
| 16100 |
+
|
| 16101 |
+
|
| 16102 |
+
|
| 16103 |
+
|
| 16104 |
+
|
| 16105 |
+
|
| 16106 |
+
|
| 16107 |
+
|
| 16108 |
+
|
| 16109 |
+
|
| 16110 |
+
|
| 16111 |
+
|
| 16112 |
+
|
| 16113 |
+
|
| 16114 |
+
|
| 16115 |
+
|
| 16116 |
+
|
| 16117 |
+
|
| 16118 |
+
|
| 16119 |
+
|
| 16120 |
+
|
| 16121 |
+
|
| 16122 |
+
|
| 16123 |
+
|
| 16124 |
+
|
| 16125 |
+
|
| 16126 |
+
|
| 16127 |
+
|
| 16128 |
+
|
| 16129 |
+
|
| 16130 |
+
|
| 16131 |
+
|
| 16132 |
+
|
| 16133 |
+
|
| 16134 |
+
|
| 16135 |
+
|
| 16136 |
+
|
| 16137 |
+
|
| 16138 |
+
|
| 16139 |
+
|
| 16140 |
+
|
| 16141 |
+
|
| 16142 |
+
|
| 16143 |
+
|
| 16144 |
+
|
| 16145 |
+
|
| 16146 |
+
|
| 16147 |
+
|
| 16148 |
+
|
| 16149 |
+
|
| 16150 |
+
|
| 16151 |
+
|
| 16152 |
+
|
| 16153 |
+
|
| 16154 |
+
|
| 16155 |
+
|
| 16156 |
+
|
| 16157 |
+
|
| 16158 |
+
|
| 16159 |
+
|
| 16160 |
+
|
| 16161 |
+
|
| 16162 |
+
|
| 16163 |
+
|
| 16164 |
+
|
| 16165 |
+
|
| 16166 |
+
|
| 16167 |
+
|
| 16168 |
+
|
| 16169 |
+
|
| 16170 |
+
|
| 16171 |
+
|
| 16172 |
+
|
| 16173 |
+
|
| 16174 |
+
|
| 16175 |
+
|
| 16176 |
+
|
| 16177 |
+
|
| 16178 |
+
|
| 16179 |
+
|
| 16180 |
+
|
| 16181 |
+
|
| 16182 |
+
|
| 16183 |
+
|
| 16184 |
+
|
| 16185 |
+
|
| 16186 |
+
|
| 16187 |
+
|
| 16188 |
+
|
| 16189 |
+
|
| 16190 |
+
|
| 16191 |
+
|
| 16192 |
+
|
| 16193 |
+
|
| 16194 |
+
|
| 16195 |
+
|
| 16196 |
+
|
| 16197 |
+
|
| 16198 |
+
|
| 16199 |
+
|
| 16200 |
+
|
| 16201 |
+
|
| 16202 |
+
|
| 16203 |
+
|
| 16204 |
+
|
| 16205 |
+
|
| 16206 |
+
|
| 16207 |
+
|
| 16208 |
+
|
| 16209 |
+
|
| 16210 |
+
|
| 16211 |
+
|
| 16212 |
+
|
| 16213 |
+
Step... (23000/50000 | Loss: 1.6572293043136597, Acc: 0.6663545966148376): 48%|████████████▉ | 24000/50000 [9:30:12<11:34:04, 1.60s/it]
|
| 16214 |
+
Step... (23500 | Loss: 1.7666906118392944, Learning Rate: 0.00032121213735081255)
|
| 16215 |
+
Step... (24000 | Loss: 1.657638430595398, Learning Rate: 0.00031515152659267187)
|
| 16216 |
+
|
| 16217 |
+
|
| 16218 |
+
|
| 16219 |
+
|
| 16220 |
+
|
| 16221 |
+
|
| 16222 |
+
|
| 16223 |
+
|
| 16224 |
+
|
| 16225 |
+
|
| 16226 |
+
|
| 16227 |
+
[11:38:36] - INFO - __main__ - Saving checkpoint at 24000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
|
| 16228 |
+
All Flax model weights were used when initializing RobertaForMaskedLM.
|
| 16229 |
+
Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
|
| 16230 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
| 16231 |
+
|
| 16232 |
+
|
| 16233 |
+
|
| 16234 |
+
|
| 16235 |
+
|
| 16236 |
+
|
| 16237 |
+
|
| 16238 |
+
|
| 16239 |
+
|
| 16240 |
+
|
| 16241 |
+
|
| 16242 |
+
|
| 16243 |
+
|
| 16244 |
+
|
| 16245 |
+
|
| 16246 |
+
|
| 16247 |
+
|
| 16248 |
+
|
| 16249 |
+
|
| 16250 |
+
|
| 16251 |
+
|
| 16252 |
+
|
| 16253 |
+
|
| 16254 |
+
|
| 16255 |
+
|
| 16256 |
+
|
| 16257 |
+
|
| 16258 |
+
|
| 16259 |
+
|
| 16260 |
+
|
| 16261 |
+
|
| 16262 |
+
|
| 16263 |
+
|
| 16264 |
+
|
| 16265 |
+
|
| 16266 |
+
|
| 16267 |
+
|
| 16268 |
+
|
| 16269 |
+
|
| 16270 |
+
|
| 16271 |
+
|
| 16272 |
+
|
| 16273 |
+
|
| 16274 |
+
|
| 16275 |
+
|
| 16276 |
+
|
| 16277 |
+
|
| 16278 |
+
|
| 16279 |
+
|
| 16280 |
+
|
| 16281 |
+
|
| 16282 |
+
|
| 16283 |
+
|
| 16284 |
+
|
| 16285 |
+
|
| 16286 |
+
|
| 16287 |
+
|
| 16288 |
+
|
| 16289 |
+
|
| 16290 |
+
|
| 16291 |
+
|
| 16292 |
+
|
| 16293 |
+
|
| 16294 |
+
|
| 16295 |
+
|
| 16296 |
+
|
| 16297 |
+
|
| 16298 |
+
|
| 16299 |
+
|
| 16300 |
+
|
| 16301 |
+
|
| 16302 |
+
|
| 16303 |
+
|
| 16304 |
+
|
| 16305 |
+
|
| 16306 |
+
|
| 16307 |
+
|
| 16308 |
+
|
| 16309 |
+
|
| 16310 |
+
|
| 16311 |
+
|
| 16312 |
+
|
| 16313 |
+
|
| 16314 |
+
|
| 16315 |
+
|
| 16316 |
+
|
| 16317 |
+
|
| 16318 |
+
|
| 16319 |
+
|
| 16320 |
+
|
| 16321 |
+
|
| 16322 |
+
|
| 16323 |
+
|
| 16324 |
+
|
| 16325 |
+
|
| 16326 |
+
|
| 16327 |
+
|
| 16328 |
+
|
| 16329 |
+
|
| 16330 |
+
|
| 16331 |
+
|
| 16332 |
+
|
| 16333 |
+
|
| 16334 |
+
|
| 16335 |
+
|
| 16336 |
+
|
| 16337 |
+
|
| 16338 |
+
|
| 16339 |
+
|
| 16340 |
+
|
| 16341 |
+
|
| 16342 |
+
|
| 16343 |
+
|
| 16344 |
+
|
| 16345 |
+
|
| 16346 |
+
|
| 16347 |
+
|
| 16348 |
+
|
| 16349 |
+
|
| 16350 |
|
| 16351 |
|
| 16352 |
|
wandb/run-20210726_001233-17u6inbn/files/wandb-summary.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"global_step":
|
|
|
|
| 1 |
+
{"global_step": 24000, "_timestamp": 1627299487.452405, "train_time": 1156106.125, "train_learning_rate": 0.00031515152659267187, "_step": 47856, "train_loss": 1.7166345119476318, "eval_accuracy": 0.6663545966148376, "eval_loss": 1.6572293043136597}
|
wandb/run-20210726_001233-17u6inbn/logs/debug-internal.log
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e82989e4b19c6c0abd610b0181219b8926bc8d5e7d84c1812150b24b6b6a4d6e
|
| 3 |
+
size 18951993
|
wandb/run-20210726_001233-17u6inbn/run-17u6inbn.wandb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c32d64082b6ac9a729c131c88cc2d56813251ca3d7cc69eb10cf688204a79ff
|
| 3 |
+
size 9437234
|