versae commited on
Commit
de8489d
·
1 Parent(s): 4619063

Step... (24000/50000 | Loss: 1.6508632898330688, Acc: 0.6671841740608215): 48%|█████████████ | 24215/50000 [9:36:14<10:45:10, 1.50s/it]

Browse files
Files changed (32) hide show
  1. flax_model.msgpack +1 -1
  2. outputs/checkpoints/checkpoint-17000/training_state.json +0 -1
  3. outputs/checkpoints/checkpoint-18000/training_state.json +0 -1
  4. outputs/checkpoints/checkpoint-19000/training_state.json +0 -1
  5. outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/config.json +0 -0
  6. outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/data_collator.joblib +0 -0
  7. outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/flax_model.msgpack +1 -1
  8. outputs/checkpoints/{checkpoint-19000 → checkpoint-22000}/optimizer_state.msgpack +1 -1
  9. outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/training_args.joblib +0 -0
  10. outputs/checkpoints/checkpoint-22000/training_state.json +1 -0
  11. outputs/checkpoints/{checkpoint-18000 → checkpoint-23000}/config.json +0 -0
  12. outputs/checkpoints/{checkpoint-18000 → checkpoint-23000}/data_collator.joblib +0 -0
  13. outputs/checkpoints/{checkpoint-19000 → checkpoint-23000}/flax_model.msgpack +1 -1
  14. outputs/checkpoints/{checkpoint-17000 → checkpoint-23000}/optimizer_state.msgpack +1 -1
  15. outputs/checkpoints/{checkpoint-18000 → checkpoint-23000}/training_args.joblib +0 -0
  16. outputs/checkpoints/checkpoint-23000/training_state.json +1 -0
  17. outputs/checkpoints/{checkpoint-19000 → checkpoint-24000}/config.json +0 -0
  18. outputs/checkpoints/{checkpoint-19000 → checkpoint-24000}/data_collator.joblib +0 -0
  19. outputs/checkpoints/{checkpoint-18000 → checkpoint-24000}/flax_model.msgpack +1 -1
  20. outputs/checkpoints/{checkpoint-18000 → checkpoint-24000}/optimizer_state.msgpack +1 -1
  21. outputs/checkpoints/{checkpoint-19000 → checkpoint-24000}/training_args.joblib +0 -0
  22. outputs/checkpoints/checkpoint-24000/training_state.json +1 -0
  23. outputs/events.out.tfevents.1627258355.tablespoon.3000110.3.v2 +2 -2
  24. outputs/flax_model.msgpack +1 -1
  25. outputs/optimizer_state.msgpack +1 -1
  26. outputs/training_state.json +1 -1
  27. pytorch_model.bin +1 -1
  28. run_stream.512.log +0 -0
  29. wandb/run-20210726_001233-17u6inbn/files/output.log +1717 -0
  30. wandb/run-20210726_001233-17u6inbn/files/wandb-summary.json +1 -1
  31. wandb/run-20210726_001233-17u6inbn/logs/debug-internal.log +2 -2
  32. wandb/run-20210726_001233-17u6inbn/run-17u6inbn.wandb +2 -2
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ba1daf7b1dad5bf7c386bc7b53d5537a8f26b3cfee5b0fc009a750ad077eab0
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b22d22612dd38ad92ffdda4b0cf432e201d6c90dd5386d04a2cdf4d19cdfd1ed
3
  size 249750019
outputs/checkpoints/checkpoint-17000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 17001}
 
 
outputs/checkpoints/checkpoint-18000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 18001}
 
 
outputs/checkpoints/checkpoint-19000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 19001}
 
 
outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/data_collator.joblib RENAMED
File without changes
outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6453368e8fd0e3c80ecb0b3dd860a84293d6cc3788ee6f32b9a7cb9a77fa001a
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce6736afa967315a5ccac23ff15ab3d3f2f90881f2858be1c86b98b60e0fa764
3
  size 249750019
outputs/checkpoints/{checkpoint-19000 → checkpoint-22000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fd17bbca5658a6226151a6f85c1c6b4064b42b9ce32213f96be1f4b4993a48c
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02cabdf326b00115bc75530d1d7bc3f9a82e57d038202548c3edee7d57c661ae
3
  size 499500278
outputs/checkpoints/{checkpoint-17000 → checkpoint-22000}/training_args.joblib RENAMED
File without changes
outputs/checkpoints/checkpoint-22000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 22001}
outputs/checkpoints/{checkpoint-18000 → checkpoint-23000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-18000 → checkpoint-23000}/data_collator.joblib RENAMED
File without changes
outputs/checkpoints/{checkpoint-19000 → checkpoint-23000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d0ae4178820ed8ec84d010dda13f1c110189fa19d49afd4d14283cf09774bee
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:559baf67a4fa12f4ddb4ea45aaf285d2e5d700ac5aa0e7ffb854af49e075634d
3
  size 249750019
outputs/checkpoints/{checkpoint-17000 → checkpoint-23000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77b05dc72072a294b97d7184e57ba9c0046b55665a7eb760f5ff414d319abe87
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bcd19d800843747a4fd81108e8654c0d431e94bacbd45321125f28f4eda9857
3
  size 499500278
outputs/checkpoints/{checkpoint-18000 → checkpoint-23000}/training_args.joblib RENAMED
File without changes
outputs/checkpoints/checkpoint-23000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 23001}
outputs/checkpoints/{checkpoint-19000 → checkpoint-24000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-19000 → checkpoint-24000}/data_collator.joblib RENAMED
File without changes
outputs/checkpoints/{checkpoint-18000 → checkpoint-24000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5a36a0b75be789eed389d6d8014081085f305abe5ca5007d4fd9bf9decf73d2
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b22d22612dd38ad92ffdda4b0cf432e201d6c90dd5386d04a2cdf4d19cdfd1ed
3
  size 249750019
outputs/checkpoints/{checkpoint-18000 → checkpoint-24000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:472de67734e639ea41e43bd17705bf1c8e3ce22ee74865cab8ef338731f0cf9f
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcac7bac463ddd6530546523b0141118f658d528e0d7ec682da2661fe2a0f7df
3
  size 499500278
outputs/checkpoints/{checkpoint-19000 → checkpoint-24000}/training_args.joblib RENAMED
File without changes
outputs/checkpoints/checkpoint-24000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 24001}
outputs/events.out.tfevents.1627258355.tablespoon.3000110.3.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3cc46840b5336c96adfc10b39ed6dd9d36d3759fb574ca64e28191207730bfb
3
- size 3176589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:187bfd40e3dd6f12ab8cd6df2018b0fef55ab1ab89a973e1cc1b5427620d8135
3
+ size 3549865
outputs/flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ba1daf7b1dad5bf7c386bc7b53d5537a8f26b3cfee5b0fc009a750ad077eab0
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b22d22612dd38ad92ffdda4b0cf432e201d6c90dd5386d04a2cdf4d19cdfd1ed
3
  size 249750019
outputs/optimizer_state.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd862c6893d8672a836d674b5ef9d3eaab357c385ad5b064b7202eccc581ff05
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcac7bac463ddd6530546523b0141118f658d528e0d7ec682da2661fe2a0f7df
3
  size 499500278
outputs/training_state.json CHANGED
@@ -1 +1 @@
1
- {"step": 21001}
 
1
+ {"step": 24001}
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56ddc0bfdddad2ca72308b9edd1fc42a1a815c78826b2a838c898083e3d5041e
3
  size 498858859
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d50ca6bc265a7b18cee3972966e847d1c5891e5fec62a6e912bbbe885e2e82da
3
  size 498858859
run_stream.512.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20210726_001233-17u6inbn/files/output.log CHANGED
@@ -14630,6 +14630,1723 @@ You should probably TRAIN this model on a down-stream task to be able to use it
14630
 
14631
 
14632
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14633
 
14634
 
14635
 
 
14630
 
14631
 
14632
 
14633
+
14634
+
14635
+
14636
+
14637
+
14638
+
14639
+
14640
+
14641
+
14642
+
14643
+
14644
+
14645
+
14646
+
14647
+
14648
+
14649
+
14650
+
14651
+
14652
+
14653
+
14654
+
14655
+
14656
+
14657
+
14658
+
14659
+
14660
+
14661
+
14662
+
14663
+
14664
+
14665
+
14666
+
14667
+
14668
+
14669
+
14670
+
14671
+
14672
+
14673
+
14674
+
14675
+
14676
+
14677
+
14678
+
14679
+
14680
+
14681
+
14682
+
14683
+
14684
+
14685
+
14686
+
14687
+
14688
+
14689
+
14690
+
14691
+
14692
+
14693
+
14694
+
14695
+
14696
+
14697
+
14698
+
14699
+
14700
+
14701
+
14702
+
14703
+
14704
+
14705
+
14706
+
14707
+
14708
+
14709
+
14710
+
14711
+
14712
+
14713
+
14714
+
14715
+
14716
+
14717
+
14718
+
14719
+
14720
+
14721
+
14722
+
14723
+
14724
+
14725
+
14726
+
14727
+
14728
+
14729
+
14730
+
14731
+
14732
+
14733
+
14734
+
14735
+
14736
+
14737
+
14738
+
14739
+
14740
+
14741
+
14742
+
14743
+
14744
+
14745
+
14746
+
14747
+
14748
+
14749
+
14750
+
14751
+
14752
+
14753
+
14754
+
14755
+
14756
+
14757
+
14758
+
14759
+
14760
+
14761
+
14762
+
14763
+
14764
+
14765
+
14766
+
14767
+
14768
+
14769
+
14770
+
14771
+
14772
+
14773
+
14774
+
14775
+
14776
+
14777
+
14778
+
14779
+
14780
+
14781
+
14782
+
14783
+
14784
+
14785
+
14786
+
14787
+
14788
+
14789
+
14790
+
14791
+
14792
+
14793
+
14794
+
14795
+
14796
+
14797
+
14798
+
14799
+
14800
+
14801
+
14802
+
14803
+
14804
+
14805
+
14806
+
14807
+
14808
+
14809
+
14810
+
14811
+ Step... (21000/50000 | Loss: 1.669716238975525, Acc: 0.6647850275039673): 44%|████████████▎ | 22000/50000 [8:40:53<13:14:50, 1.70s/it]
14812
+ Step... (21500 | Loss: 1.764472484588623, Learning Rate: 0.00034545455127954483)
14813
+ Step... (21000/50000 | Loss: 1.669716238975525, Acc: 0.6647850275039673): 44%|████████████▎ | 22000/50000 [8:40:55<13:14:50, 1.70s/it]
14814
+
14815
+
14816
+
14817
+
14818
+
14819
+
14820
+
14821
+
14822
+
14823
+
14824
+
14825
+
14826
+ [10:49:19] - INFO - __main__ - Saving checkpoint at 22000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
14827
+ All Flax model weights were used when initializing RobertaForMaskedLM.
14828
+ Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
14829
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
14830
+
14831
+
14832
+
14833
+
14834
+
14835
+
14836
+
14837
+
14838
+
14839
+
14840
+
14841
+
14842
+
14843
+
14844
+
14845
+
14846
+
14847
+
14848
+
14849
+
14850
+
14851
+
14852
+
14853
+
14854
+
14855
+
14856
+
14857
+
14858
+
14859
+
14860
+
14861
+
14862
+
14863
+
14864
+
14865
+
14866
+
14867
+
14868
+
14869
+
14870
+
14871
+
14872
+
14873
+
14874
+
14875
+
14876
+
14877
+
14878
+
14879
+
14880
+
14881
+
14882
+
14883
+
14884
+
14885
+
14886
+
14887
+
14888
+
14889
+
14890
+
14891
+
14892
+
14893
+
14894
+
14895
+
14896
+
14897
+
14898
+
14899
+
14900
+
14901
+
14902
+
14903
+
14904
+
14905
+
14906
+
14907
+
14908
+
14909
+
14910
+
14911
+
14912
+
14913
+
14914
+
14915
+
14916
+
14917
+
14918
+
14919
+
14920
+
14921
+
14922
+
14923
+
14924
+
14925
+
14926
+
14927
+
14928
+
14929
+
14930
+
14931
+
14932
+
14933
+
14934
+
14935
+
14936
+
14937
+
14938
+
14939
+
14940
+
14941
+
14942
+
14943
+
14944
+
14945
+
14946
+
14947
+
14948
+
14949
+
14950
+
14951
+
14952
+
14953
+
14954
+
14955
+
14956
+
14957
+
14958
+
14959
+
14960
+
14961
+
14962
+
14963
+
14964
+
14965
+
14966
+
14967
+
14968
+
14969
+
14970
+
14971
+
14972
+
14973
+
14974
+
14975
+
14976
+
14977
+
14978
+
14979
+
14980
+
14981
+
14982
+
14983
+
14984
+
14985
+
14986
+
14987
+
14988
+
14989
+
14990
+
14991
+
14992
+
14993
+
14994
+
14995
+
14996
+
14997
+
14998
+
14999
+
15000
+
15001
+
15002
+
15003
+
15004
+
15005
+
15006
+
15007
+
15008
+
15009
+
15010
+
15011
+
15012
+
15013
+
15014
+
15015
+
15016
+
15017
+
15018
+
15019
+
15020
+
15021
+
15022
+
15023
+
15024
+
15025
+
15026
+
15027
+
15028
+
15029
+
15030
+
15031
+
15032
+
15033
+
15034
+
15035
+
15036
+
15037
+
15038
+
15039
+
15040
+
15041
+
15042
+
15043
+
15044
+
15045
+
15046
+
15047
+
15048
+
15049
+
15050
+
15051
+
15052
+
15053
+
15054
+
15055
+
15056
+
15057
+
15058
+
15059
+
15060
+
15061
+
15062
+
15063
+
15064
+
15065
+
15066
+
15067
+
15068
+
15069
+
15070
+
15071
+
15072
+
15073
+
15074
+
15075
+
15076
+
15077
+
15078
+
15079
+
15080
+
15081
+
15082
+
15083
+
15084
+
15085
+
15086
+
15087
+
15088
+
15089
+
15090
+
15091
+
15092
+
15093
+
15094
+
15095
+
15096
+
15097
+
15098
+
15099
+
15100
+
15101
+
15102
+
15103
+
15104
+
15105
+
15106
+
15107
+
15108
+
15109
+
15110
+
15111
+
15112
+
15113
+
15114
+
15115
+
15116
+
15117
+
15118
+
15119
+
15120
+
15121
+
15122
+
15123
+
15124
+
15125
+
15126
+
15127
+
15128
+
15129
+
15130
+
15131
+
15132
+
15133
+
15134
+
15135
+
15136
+
15137
+
15138
+
15139
+
15140
+
15141
+
15142
+
15143
+
15144
+
15145
+
15146
+
15147
+
15148
+
15149
+
15150
+
15151
+
15152
+
15153
+
15154
+
15155
+
15156
+
15157
+
15158
+
15159
+
15160
+
15161
+
15162
+
15163
+
15164
+
15165
+
15166
+
15167
+
15168
+
15169
+
15170
+
15171
+
15172
+
15173
+
15174
+
15175
+
15176
+
15177
+
15178
+
15179
+
15180
+
15181
+
15182
+
15183
+
15184
+
15185
+
15186
+
15187
+
15188
+
15189
+
15190
+
15191
+
15192
+
15193
+
15194
+
15195
+
15196
+
15197
+
15198
+
15199
+
15200
+
15201
+
15202
+
15203
+
15204
+
15205
+
15206
+
15207
+
15208
+
15209
+
15210
+
15211
+
15212
+
15213
+
15214
+
15215
+
15216
+
15217
+
15218
+
15219
+
15220
+
15221
+
15222
+
15223
+
15224
+
15225
+
15226
+
15227
+
15228
+
15229
+
15230
+
15231
+
15232
+
15233
+
15234
+
15235
+
15236
+
15237
+
15238
+
15239
+
15240
+
15241
+
15242
+
15243
+
15244
+
15245
+
15246
+
15247
+
15248
+
15249
+
15250
+
15251
+
15252
+
15253
+
15254
+
15255
+
15256
+
15257
+
15258
+
15259
+
15260
+
15261
+
15262
+
15263
+
15264
+
15265
+
15266
+
15267
+
15268
+
15269
+
15270
+
15271
+
15272
+
15273
+
15274
+
15275
+
15276
+
15277
+
15278
+
15279
+
15280
+
15281
+
15282
+
15283
+
15284
+
15285
+
15286
+
15287
+
15288
+
15289
+
15290
+
15291
+
15292
+
15293
+
15294
+
15295
+
15296
+
15297
+
15298
+
15299
+
15300
+
15301
+
15302
+
15303
+
15304
+
15305
+
15306
+
15307
+
15308
+
15309
+
15310
+
15311
+
15312
+
15313
+
15314
+
15315
+
15316
+
15317
+
15318
+
15319
+
15320
+
15321
+
15322
+
15323
+
15324
+
15325
+
15326
+
15327
+
15328
+
15329
+
15330
+
15331
+
15332
+
15333
+
15334
+
15335
+
15336
+
15337
+
15338
+
15339
+
15340
+
15341
+
15342
+
15343
+
15344
+
15345
+
15346
+
15347
+
15348
+
15349
+
15350
+
15351
+
15352
+
15353
+
15354
+
15355
+
15356
+
15357
+
15358
+
15359
+
15360
+
15361
+
15362
+
15363
+
15364
+
15365
+
15366
+
15367
+
15368
+
15369
+
15370
+
15371
+
15372
+
15373
+
15374
+
15375
+
15376
+
15377
+
15378
+
15379
+
15380
+
15381
+
15382
+
15383
+
15384
+
15385
+
15386
+
15387
+
15388
+
15389
+
15390
+
15391
+
15392
+
15393
+
15394
+
15395
+
15396
+
15397
+
15398
+
15399
+
15400
+
15401
+
15402
+
15403
+
15404
+
15405
+
15406
+
15407
+
15408
+
15409
+
15410
+
15411
+
15412
+
15413
+
15414
+
15415
+
15416
+
15417
+
15418
+
15419
+
15420
+
15421
+
15422
+
15423
+
15424
+
15425
+
15426
+
15427
+
15428
+
15429
+
15430
+
15431
+
15432
+
15433
+
15434
+
15435
+
15436
+
15437
+
15438
+
15439
+
15440
+
15441
+
15442
+
15443
+
15444
+
15445
+
15446
+
15447
+
15448
+
15449
+
15450
+
15451
+
15452
+
15453
+
15454
+
15455
+
15456
+
15457
+
15458
+
15459
+
15460
+
15461
+
15462
+
15463
+
15464
+
15465
+
15466
+
15467
+
15468
+
15469
+
15470
+
15471
+
15472
+
15473
+
15474
+
15475
+
15476
+
15477
+
15478
+
15479
+
15480
+
15481
+
15482
+
15483
+
15484
+
15485
+
15486
+
15487
+
15488
+
15489
+
15490
+
15491
+
15492
+
15493
+
15494
+
15495
+
15496
+
15497
+
15498
+
15499
+
15500
+
15501
+
15502
+
15503
+
15504
+
15505
+
15506
+
15507
+
15508
+
15509
+
15510
+
15511
+
15512
+
15513
+
15514
+ Step... (22000/50000 | Loss: 1.6613430976867676, Acc: 0.6655245423316956): 46%|████████████▍ | 23000/50000 [9:05:24<10:18:31, 1.37s/it]
15515
+ Evaluating ...: 3%|██▉ | 4/130 [00:00<00:08, 14.65it/s]
15516
+ Step... (22500 | Loss: 1.9999163150787354, Learning Rate: 0.0003333333588670939)
15517
+
15518
+
15519
+
15520
+
15521
+
15522
+
15523
+
15524
+
15525
+
15526
+
15527
+
15528
+
15529
+ [11:13:47] - INFO - __main__ - Saving checkpoint at 23000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
15530
+ All Flax model weights were used when initializing RobertaForMaskedLM.
15531
+ Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
15532
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
15533
+
15534
+
15535
+
15536
+
15537
+
15538
+
15539
+
15540
+
15541
+
15542
+
15543
+
15544
+
15545
+
15546
+
15547
+
15548
+
15549
+
15550
+
15551
+
15552
+
15553
+
15554
+
15555
+
15556
+
15557
+
15558
+
15559
+
15560
+
15561
+
15562
+
15563
+
15564
+
15565
+
15566
+
15567
+
15568
+
15569
+
15570
+
15571
+
15572
+
15573
+
15574
+
15575
+
15576
+
15577
+
15578
+
15579
+
15580
+
15581
+
15582
+
15583
+
15584
+
15585
+
15586
+
15587
+
15588
+
15589
+
15590
+
15591
+
15592
+
15593
+
15594
+
15595
+
15596
+
15597
+
15598
+
15599
+
15600
+
15601
+
15602
+
15603
+
15604
+
15605
+
15606
+
15607
+
15608
+
15609
+
15610
+
15611
+
15612
+
15613
+
15614
+
15615
+
15616
+
15617
+
15618
+
15619
+
15620
+
15621
+
15622
+
15623
+
15624
+
15625
+
15626
+
15627
+
15628
+
15629
+
15630
+
15631
+
15632
+
15633
+
15634
+
15635
+
15636
+
15637
+
15638
+
15639
+
15640
+
15641
+
15642
+
15643
+
15644
+
15645
+
15646
+
15647
+
15648
+
15649
+
15650
+
15651
+
15652
+
15653
+
15654
+
15655
+
15656
+
15657
+
15658
+
15659
+
15660
+
15661
+
15662
+
15663
+
15664
+
15665
+
15666
+
15667
+
15668
+
15669
+
15670
+
15671
+
15672
+
15673
+
15674
+
15675
+
15676
+
15677
+
15678
+
15679
+
15680
+
15681
+
15682
+
15683
+
15684
+
15685
+
15686
+
15687
+
15688
+
15689
+
15690
+
15691
+
15692
+
15693
+
15694
+
15695
+
15696
+
15697
+
15698
+
15699
+
15700
+
15701
+
15702
+
15703
+
15704
+
15705
+
15706
+
15707
+
15708
+
15709
+
15710
+
15711
+
15712
+
15713
+
15714
+
15715
+
15716
+
15717
+
15718
+
15719
+
15720
+
15721
+
15722
+
15723
+
15724
+
15725
+
15726
+
15727
+
15728
+
15729
+
15730
+
15731
+
15732
+
15733
+
15734
+
15735
+
15736
+
15737
+
15738
+
15739
+
15740
+
15741
+
15742
+
15743
+
15744
+
15745
+
15746
+
15747
+
15748
+
15749
+
15750
+
15751
+
15752
+
15753
+
15754
+
15755
+
15756
+
15757
+
15758
+
15759
+
15760
+
15761
+
15762
+
15763
+
15764
+
15765
+
15766
+
15767
+
15768
+
15769
+
15770
+
15771
+
15772
+
15773
+
15774
+
15775
+
15776
+
15777
+
15778
+
15779
+
15780
+
15781
+
15782
+
15783
+
15784
+
15785
+
15786
+
15787
+
15788
+
15789
+
15790
+
15791
+
15792
+
15793
+
15794
+
15795
+
15796
+
15797
+
15798
+
15799
+
15800
+
15801
+
15802
+
15803
+
15804
+
15805
+
15806
+
15807
+
15808
+
15809
+
15810
+
15811
+
15812
+
15813
+
15814
+
15815
+
15816
+
15817
+
15818
+
15819
+
15820
+
15821
+
15822
+
15823
+
15824
+
15825
+
15826
+
15827
+
15828
+
15829
+
15830
+
15831
+
15832
+
15833
+
15834
+
15835
+
15836
+
15837
+
15838
+
15839
+
15840
+
15841
+
15842
+
15843
+
15844
+
15845
+
15846
+
15847
+
15848
+
15849
+
15850
+
15851
+
15852
+
15853
+
15854
+
15855
+
15856
+
15857
+
15858
+
15859
+
15860
+
15861
+
15862
+
15863
+
15864
+
15865
+
15866
+
15867
+
15868
+
15869
+
15870
+
15871
+
15872
+
15873
+
15874
+
15875
+
15876
+
15877
+
15878
+
15879
+
15880
+
15881
+
15882
+
15883
+
15884
+
15885
+
15886
+
15887
+
15888
+
15889
+
15890
+
15891
+
15892
+
15893
+
15894
+
15895
+
15896
+
15897
+
15898
+
15899
+
15900
+
15901
+
15902
+
15903
+
15904
+
15905
+
15906
+
15907
+
15908
+
15909
+
15910
+
15911
+
15912
+
15913
+
15914
+
15915
+
15916
+
15917
+
15918
+
15919
+
15920
+
15921
+
15922
+
15923
+
15924
+
15925
+
15926
+
15927
+
15928
+
15929
+
15930
+
15931
+
15932
+
15933
+
15934
+
15935
+
15936
+
15937
+
15938
+
15939
+
15940
+
15941
+
15942
+
15943
+
15944
+
15945
+
15946
+
15947
+
15948
+
15949
+
15950
+
15951
+
15952
+
15953
+
15954
+
15955
+
15956
+
15957
+
15958
+
15959
+
15960
+
15961
+
15962
+
15963
+
15964
+
15965
+
15966
+
15967
+
15968
+
15969
+
15970
+
15971
+
15972
+
15973
+
15974
+
15975
+
15976
+
15977
+
15978
+
15979
+
15980
+
15981
+
15982
+
15983
+
15984
+
15985
+
15986
+
15987
+
15988
+
15989
+
15990
+
15991
+
15992
+
15993
+
15994
+
15995
+
15996
+
15997
+
15998
+
15999
+
16000
+
16001
+
16002
+
16003
+
16004
+
16005
+
16006
+
16007
+
16008
+
16009
+
16010
+
16011
+
16012
+
16013
+
16014
+
16015
+
16016
+
16017
+
16018
+
16019
+
16020
+
16021
+
16022
+
16023
+
16024
+
16025
+
16026
+
16027
+
16028
+
16029
+
16030
+
16031
+
16032
+
16033
+
16034
+
16035
+
16036
+
16037
+
16038
+
16039
+
16040
+
16041
+
16042
+
16043
+
16044
+
16045
+
16046
+
16047
+
16048
+
16049
+
16050
+
16051
+
16052
+
16053
+
16054
+
16055
+
16056
+
16057
+
16058
+
16059
+
16060
+
16061
+
16062
+
16063
+
16064
+
16065
+
16066
+
16067
+
16068
+
16069
+
16070
+
16071
+
16072
+
16073
+
16074
+
16075
+
16076
+
16077
+
16078
+
16079
+
16080
+
16081
+
16082
+
16083
+
16084
+
16085
+
16086
+
16087
+
16088
+
16089
+
16090
+
16091
+
16092
+
16093
+
16094
+
16095
+
16096
+
16097
+
16098
+
16099
+
16100
+
16101
+
16102
+
16103
+
16104
+
16105
+
16106
+
16107
+
16108
+
16109
+
16110
+
16111
+
16112
+
16113
+
16114
+
16115
+
16116
+
16117
+
16118
+
16119
+
16120
+
16121
+
16122
+
16123
+
16124
+
16125
+
16126
+
16127
+
16128
+
16129
+
16130
+
16131
+
16132
+
16133
+
16134
+
16135
+
16136
+
16137
+
16138
+
16139
+
16140
+
16141
+
16142
+
16143
+
16144
+
16145
+
16146
+
16147
+
16148
+
16149
+
16150
+
16151
+
16152
+
16153
+
16154
+
16155
+
16156
+
16157
+
16158
+
16159
+
16160
+
16161
+
16162
+
16163
+
16164
+
16165
+
16166
+
16167
+
16168
+
16169
+
16170
+
16171
+
16172
+
16173
+
16174
+
16175
+
16176
+
16177
+
16178
+
16179
+
16180
+
16181
+
16182
+
16183
+
16184
+
16185
+
16186
+
16187
+
16188
+
16189
+
16190
+
16191
+
16192
+
16193
+
16194
+
16195
+
16196
+
16197
+
16198
+
16199
+
16200
+
16201
+
16202
+
16203
+
16204
+
16205
+
16206
+
16207
+
16208
+
16209
+
16210
+
16211
+
16212
+
16213
+ Step... (23000/50000 | Loss: 1.6572293043136597, Acc: 0.6663545966148376): 48%|████████████▉ | 24000/50000 [9:30:12<11:34:04, 1.60s/it]
16214
+ Step... (23500 | Loss: 1.7666906118392944, Learning Rate: 0.00032121213735081255)
16215
+ Step... (24000 | Loss: 1.657638430595398, Learning Rate: 0.00031515152659267187)
16216
+
16217
+
16218
+
16219
+
16220
+
16221
+
16222
+
16223
+
16224
+
16225
+
16226
+
16227
+ [11:38:36] - INFO - __main__ - Saving checkpoint at 24000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
16228
+ All Flax model weights were used when initializing RobertaForMaskedLM.
16229
+ Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
16230
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
16231
+
16232
+
16233
+
16234
+
16235
+
16236
+
16237
+
16238
+
16239
+
16240
+
16241
+
16242
+
16243
+
16244
+
16245
+
16246
+
16247
+
16248
+
16249
+
16250
+
16251
+
16252
+
16253
+
16254
+
16255
+
16256
+
16257
+
16258
+
16259
+
16260
+
16261
+
16262
+
16263
+
16264
+
16265
+
16266
+
16267
+
16268
+
16269
+
16270
+
16271
+
16272
+
16273
+
16274
+
16275
+
16276
+
16277
+
16278
+
16279
+
16280
+
16281
+
16282
+
16283
+
16284
+
16285
+
16286
+
16287
+
16288
+
16289
+
16290
+
16291
+
16292
+
16293
+
16294
+
16295
+
16296
+
16297
+
16298
+
16299
+
16300
+
16301
+
16302
+
16303
+
16304
+
16305
+
16306
+
16307
+
16308
+
16309
+
16310
+
16311
+
16312
+
16313
+
16314
+
16315
+
16316
+
16317
+
16318
+
16319
+
16320
+
16321
+
16322
+
16323
+
16324
+
16325
+
16326
+
16327
+
16328
+
16329
+
16330
+
16331
+
16332
+
16333
+
16334
+
16335
+
16336
+
16337
+
16338
+
16339
+
16340
+
16341
+
16342
+
16343
+
16344
+
16345
+
16346
+
16347
+
16348
+
16349
+
16350
 
16351
 
16352
 
wandb/run-20210726_001233-17u6inbn/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"global_step": 21500, "_timestamp": 1627295817.37153, "train_time": 957886.375, "train_learning_rate": 0.00034545455127954483, "_step": 42871, "train_loss": 1.6961593627929688, "eval_accuracy": 0.6647850275039673, "eval_loss": 1.669716238975525}
 
1
+ {"global_step": 24000, "_timestamp": 1627299487.452405, "train_time": 1156106.125, "train_learning_rate": 0.00031515152659267187, "_step": 47856, "train_loss": 1.7166345119476318, "eval_accuracy": 0.6663545966148376, "eval_loss": 1.6572293043136597}
wandb/run-20210726_001233-17u6inbn/logs/debug-internal.log CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efeb439af32e6eb761cd222b4de30fb8c299ae62524e09ab6574d273aa9ccb62
3
- size 16987693
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e82989e4b19c6c0abd610b0181219b8926bc8d5e7d84c1812150b24b6b6a4d6e
3
+ size 18951993
wandb/run-20210726_001233-17u6inbn/run-17u6inbn.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0563d981cabfb744be4dba9411f8759967f5c165cc116bd1736d9615afb67aa9
3
- size 8433368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c32d64082b6ac9a729c131c88cc2d56813251ca3d7cc69eb10cf688204a79ff
3
+ size 9437234