Ahmed HAZOURLI (BFR)
		
	commited on
		
		
					Commit 
							
							·
						
						5d43385
	
1
								Parent(s):
							
							6a060b6
								
FinancialBERT model
Browse files- config.json +3 -4
 - optimizer.pt +3 -0
 - pytorch_model.bin +2 -2
 - rng_state.pth +3 -0
 - scheduler.pt +3 -0
 - tokenizer.json +0 -0
 - tokenizer_config.json +1 -1
 - trainer_state.json +376 -0
 - training_args.bin +3 -0
 
    	
        config.json
    CHANGED
    
    | 
         @@ -1,11 +1,10 @@ 
     | 
|
| 1 | 
         
             
            {
         
     | 
| 2 | 
         
            -
              "_name_or_path": " 
     | 
| 3 | 
         
             
              "architectures": [
         
     | 
| 4 | 
         
            -
                " 
     | 
| 5 | 
         
             
              ],
         
     | 
| 6 | 
         
             
              "attention_probs_dropout_prob": 0.1,
         
     | 
| 7 | 
         
             
              "classifier_dropout": null,
         
     | 
| 8 | 
         
            -
              "gradient_checkpointing": false,
         
     | 
| 9 | 
         
             
              "hidden_act": "gelu",
         
     | 
| 10 | 
         
             
              "hidden_dropout_prob": 0.1,
         
     | 
| 11 | 
         
             
              "hidden_size": 768,
         
     | 
| 
         @@ -19,7 +18,7 @@ 
     | 
|
| 19 | 
         
             
              "pad_token_id": 0,
         
     | 
| 20 | 
         
             
              "position_embedding_type": "absolute",
         
     | 
| 21 | 
         
             
              "torch_dtype": "float32",
         
     | 
| 22 | 
         
            -
              "transformers_version": "4.3 
     | 
| 23 | 
         
             
              "type_vocab_size": 2,
         
     | 
| 24 | 
         
             
              "use_cache": true,
         
     | 
| 25 | 
         
             
              "vocab_size": 30873
         
     | 
| 
         | 
|
| 1 | 
         
             
            {
         
     | 
| 2 | 
         
            +
              "_name_or_path": "yiyanghkust/finbert-pretrain",
         
     | 
| 3 | 
         
             
              "architectures": [
         
     | 
| 4 | 
         
            +
                "BertForMaskedLM"
         
     | 
| 5 | 
         
             
              ],
         
     | 
| 6 | 
         
             
              "attention_probs_dropout_prob": 0.1,
         
     | 
| 7 | 
         
             
              "classifier_dropout": null,
         
     | 
| 
         | 
|
| 8 | 
         
             
              "hidden_act": "gelu",
         
     | 
| 9 | 
         
             
              "hidden_dropout_prob": 0.1,
         
     | 
| 10 | 
         
             
              "hidden_size": 768,
         
     | 
| 
         | 
|
| 18 | 
         
             
              "pad_token_id": 0,
         
     | 
| 19 | 
         
             
              "position_embedding_type": "absolute",
         
     | 
| 20 | 
         
             
              "torch_dtype": "float32",
         
     | 
| 21 | 
         
            +
              "transformers_version": "4.11.3",
         
     | 
| 22 | 
         
             
              "type_vocab_size": 2,
         
     | 
| 23 | 
         
             
              "use_cache": true,
         
     | 
| 24 | 
         
             
              "vocab_size": 30873
         
     | 
    	
        optimizer.pt
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:341d38f413808753b6021444cbb214fabef5ffaf71fe393503a38489ee4faaec
         
     | 
| 3 | 
         
            +
            size 878393999
         
     | 
    	
        pytorch_model.bin
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
            -
            size  
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:0ca8d153f1176ab4c32c84920399708f9b141829f6634b8121cdc3a8a59dd690
         
     | 
| 3 | 
         
            +
            size 439223687
         
     | 
    	
        rng_state.pth
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:26c4882ca45d2d5f53ca75d9e2b10b1d72c61b642b4e81ff2280af722bd2d378
         
     | 
| 3 | 
         
            +
            size 13623
         
     | 
    	
        scheduler.pt
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:2db0d79aac40995c74029f564da57bd35d556559edd54ee0b5744bbd7cdc1125
         
     | 
| 3 | 
         
            +
            size 623
         
     | 
    	
        tokenizer.json
    ADDED
    
    | 
         The diff for this file is too large to render. 
		See raw diff 
     | 
| 
         | 
    	
        tokenizer_config.json
    CHANGED
    
    | 
         @@ -1 +1 @@ 
     | 
|
| 1 | 
         
            -
            {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "max_len": 512, "special_tokens_map_file": null, "name_or_path": " 
     | 
| 
         | 
|
| 1 | 
         
            +
            {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "max_len": 512, "special_tokens_map_file": null, "name_or_path": "yiyanghkust/finbert-pretrain", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}
         
     | 
    	
        trainer_state.json
    ADDED
    
    | 
         @@ -0,0 +1,376 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "best_metric": null,
         
     | 
| 3 | 
         
            +
              "best_model_checkpoint": null,
         
     | 
| 4 | 
         
            +
              "epoch": 0.8670770831526923,
         
     | 
| 5 | 
         
            +
              "global_step": 30000,
         
     | 
| 6 | 
         
            +
              "is_hyper_param_search": false,
         
     | 
| 7 | 
         
            +
              "is_local_process_zero": true,
         
     | 
| 8 | 
         
            +
              "is_world_process_zero": true,
         
     | 
| 9 | 
         
            +
              "log_history": [
         
     | 
| 10 | 
         
            +
                {
         
     | 
| 11 | 
         
            +
                  "epoch": 0.01,
         
     | 
| 12 | 
         
            +
                  "learning_rate": 4.9277435764039425e-05,
         
     | 
| 13 | 
         
            +
                  "loss": 4.5138,
         
     | 
| 14 | 
         
            +
                  "step": 500
         
     | 
| 15 | 
         
            +
                },
         
     | 
| 16 | 
         
            +
                {
         
     | 
| 17 | 
         
            +
                  "epoch": 0.03,
         
     | 
| 18 | 
         
            +
                  "learning_rate": 4.855487152807885e-05,
         
     | 
| 19 | 
         
            +
                  "loss": 4.3608,
         
     | 
| 20 | 
         
            +
                  "step": 1000
         
     | 
| 21 | 
         
            +
                },
         
     | 
| 22 | 
         
            +
                {
         
     | 
| 23 | 
         
            +
                  "epoch": 0.04,
         
     | 
| 24 | 
         
            +
                  "learning_rate": 4.783230729211827e-05,
         
     | 
| 25 | 
         
            +
                  "loss": 4.031,
         
     | 
| 26 | 
         
            +
                  "step": 1500
         
     | 
| 27 | 
         
            +
                },
         
     | 
| 28 | 
         
            +
                {
         
     | 
| 29 | 
         
            +
                  "epoch": 0.06,
         
     | 
| 30 | 
         
            +
                  "learning_rate": 4.710974305615769e-05,
         
     | 
| 31 | 
         
            +
                  "loss": 3.3475,
         
     | 
| 32 | 
         
            +
                  "step": 2000
         
     | 
| 33 | 
         
            +
                },
         
     | 
| 34 | 
         
            +
                {
         
     | 
| 35 | 
         
            +
                  "epoch": 0.07,
         
     | 
| 36 | 
         
            +
                  "learning_rate": 4.638717882019712e-05,
         
     | 
| 37 | 
         
            +
                  "loss": 2.8365,
         
     | 
| 38 | 
         
            +
                  "step": 2500
         
     | 
| 39 | 
         
            +
                },
         
     | 
| 40 | 
         
            +
                {
         
     | 
| 41 | 
         
            +
                  "epoch": 0.09,
         
     | 
| 42 | 
         
            +
                  "learning_rate": 4.5664614584236545e-05,
         
     | 
| 43 | 
         
            +
                  "loss": 2.5081,
         
     | 
| 44 | 
         
            +
                  "step": 3000
         
     | 
| 45 | 
         
            +
                },
         
     | 
| 46 | 
         
            +
                {
         
     | 
| 47 | 
         
            +
                  "epoch": 0.1,
         
     | 
| 48 | 
         
            +
                  "learning_rate": 4.494205034827597e-05,
         
     | 
| 49 | 
         
            +
                  "loss": 2.2906,
         
     | 
| 50 | 
         
            +
                  "step": 3500
         
     | 
| 51 | 
         
            +
                },
         
     | 
| 52 | 
         
            +
                {
         
     | 
| 53 | 
         
            +
                  "epoch": 0.12,
         
     | 
| 54 | 
         
            +
                  "learning_rate": 4.421948611231539e-05,
         
     | 
| 55 | 
         
            +
                  "loss": 2.1787,
         
     | 
| 56 | 
         
            +
                  "step": 4000
         
     | 
| 57 | 
         
            +
                },
         
     | 
| 58 | 
         
            +
                {
         
     | 
| 59 | 
         
            +
                  "epoch": 0.13,
         
     | 
| 60 | 
         
            +
                  "learning_rate": 4.349692187635481e-05,
         
     | 
| 61 | 
         
            +
                  "loss": 2.1026,
         
     | 
| 62 | 
         
            +
                  "step": 4500
         
     | 
| 63 | 
         
            +
                },
         
     | 
| 64 | 
         
            +
                {
         
     | 
| 65 | 
         
            +
                  "epoch": 0.14,
         
     | 
| 66 | 
         
            +
                  "learning_rate": 4.277435764039423e-05,
         
     | 
| 67 | 
         
            +
                  "loss": 2.0387,
         
     | 
| 68 | 
         
            +
                  "step": 5000
         
     | 
| 69 | 
         
            +
                },
         
     | 
| 70 | 
         
            +
                {
         
     | 
| 71 | 
         
            +
                  "epoch": 0.16,
         
     | 
| 72 | 
         
            +
                  "learning_rate": 4.205179340443365e-05,
         
     | 
| 73 | 
         
            +
                  "loss": 2.0001,
         
     | 
| 74 | 
         
            +
                  "step": 5500
         
     | 
| 75 | 
         
            +
                },
         
     | 
| 76 | 
         
            +
                {
         
     | 
| 77 | 
         
            +
                  "epoch": 0.17,
         
     | 
| 78 | 
         
            +
                  "learning_rate": 4.1329229168473074e-05,
         
     | 
| 79 | 
         
            +
                  "loss": 1.9804,
         
     | 
| 80 | 
         
            +
                  "step": 6000
         
     | 
| 81 | 
         
            +
                },
         
     | 
| 82 | 
         
            +
                {
         
     | 
| 83 | 
         
            +
                  "epoch": 0.19,
         
     | 
| 84 | 
         
            +
                  "learning_rate": 4.06066649325125e-05,
         
     | 
| 85 | 
         
            +
                  "loss": 1.951,
         
     | 
| 86 | 
         
            +
                  "step": 6500
         
     | 
| 87 | 
         
            +
                },
         
     | 
| 88 | 
         
            +
                {
         
     | 
| 89 | 
         
            +
                  "epoch": 0.2,
         
     | 
| 90 | 
         
            +
                  "learning_rate": 3.9884100696551927e-05,
         
     | 
| 91 | 
         
            +
                  "loss": 1.9113,
         
     | 
| 92 | 
         
            +
                  "step": 7000
         
     | 
| 93 | 
         
            +
                },
         
     | 
| 94 | 
         
            +
                {
         
     | 
| 95 | 
         
            +
                  "epoch": 0.22,
         
     | 
| 96 | 
         
            +
                  "learning_rate": 3.916153646059135e-05,
         
     | 
| 97 | 
         
            +
                  "loss": 1.9078,
         
     | 
| 98 | 
         
            +
                  "step": 7500
         
     | 
| 99 | 
         
            +
                },
         
     | 
| 100 | 
         
            +
                {
         
     | 
| 101 | 
         
            +
                  "epoch": 0.23,
         
     | 
| 102 | 
         
            +
                  "learning_rate": 3.843897222463077e-05,
         
     | 
| 103 | 
         
            +
                  "loss": 1.8793,
         
     | 
| 104 | 
         
            +
                  "step": 8000
         
     | 
| 105 | 
         
            +
                },
         
     | 
| 106 | 
         
            +
                {
         
     | 
| 107 | 
         
            +
                  "epoch": 0.25,
         
     | 
| 108 | 
         
            +
                  "learning_rate": 3.7716407988670195e-05,
         
     | 
| 109 | 
         
            +
                  "loss": 1.8833,
         
     | 
| 110 | 
         
            +
                  "step": 8500
         
     | 
| 111 | 
         
            +
                },
         
     | 
| 112 | 
         
            +
                {
         
     | 
| 113 | 
         
            +
                  "epoch": 0.26,
         
     | 
| 114 | 
         
            +
                  "learning_rate": 3.699384375270962e-05,
         
     | 
| 115 | 
         
            +
                  "loss": 1.8618,
         
     | 
| 116 | 
         
            +
                  "step": 9000
         
     | 
| 117 | 
         
            +
                },
         
     | 
| 118 | 
         
            +
                {
         
     | 
| 119 | 
         
            +
                  "epoch": 0.27,
         
     | 
| 120 | 
         
            +
                  "learning_rate": 3.627127951674904e-05,
         
     | 
| 121 | 
         
            +
                  "loss": 1.835,
         
     | 
| 122 | 
         
            +
                  "step": 9500
         
     | 
| 123 | 
         
            +
                },
         
     | 
| 124 | 
         
            +
                {
         
     | 
| 125 | 
         
            +
                  "epoch": 0.29,
         
     | 
| 126 | 
         
            +
                  "learning_rate": 3.554871528078846e-05,
         
     | 
| 127 | 
         
            +
                  "loss": 1.8187,
         
     | 
| 128 | 
         
            +
                  "step": 10000
         
     | 
| 129 | 
         
            +
                },
         
     | 
| 130 | 
         
            +
                {
         
     | 
| 131 | 
         
            +
                  "epoch": 0.3,
         
     | 
| 132 | 
         
            +
                  "learning_rate": 3.4826151044827885e-05,
         
     | 
| 133 | 
         
            +
                  "loss": 1.8097,
         
     | 
| 134 | 
         
            +
                  "step": 10500
         
     | 
| 135 | 
         
            +
                },
         
     | 
| 136 | 
         
            +
                {
         
     | 
| 137 | 
         
            +
                  "epoch": 0.32,
         
     | 
| 138 | 
         
            +
                  "learning_rate": 3.410358680886731e-05,
         
     | 
| 139 | 
         
            +
                  "loss": 1.7973,
         
     | 
| 140 | 
         
            +
                  "step": 11000
         
     | 
| 141 | 
         
            +
                },
         
     | 
| 142 | 
         
            +
                {
         
     | 
| 143 | 
         
            +
                  "epoch": 0.33,
         
     | 
| 144 | 
         
            +
                  "learning_rate": 3.338102257290673e-05,
         
     | 
| 145 | 
         
            +
                  "loss": 1.8006,
         
     | 
| 146 | 
         
            +
                  "step": 11500
         
     | 
| 147 | 
         
            +
                },
         
     | 
| 148 | 
         
            +
                {
         
     | 
| 149 | 
         
            +
                  "epoch": 0.35,
         
     | 
| 150 | 
         
            +
                  "learning_rate": 3.265845833694616e-05,
         
     | 
| 151 | 
         
            +
                  "loss": 1.7698,
         
     | 
| 152 | 
         
            +
                  "step": 12000
         
     | 
| 153 | 
         
            +
                },
         
     | 
| 154 | 
         
            +
                {
         
     | 
| 155 | 
         
            +
                  "epoch": 0.36,
         
     | 
| 156 | 
         
            +
                  "learning_rate": 3.193589410098558e-05,
         
     | 
| 157 | 
         
            +
                  "loss": 1.7673,
         
     | 
| 158 | 
         
            +
                  "step": 12500
         
     | 
| 159 | 
         
            +
                },
         
     | 
| 160 | 
         
            +
                {
         
     | 
| 161 | 
         
            +
                  "epoch": 0.38,
         
     | 
| 162 | 
         
            +
                  "learning_rate": 3.1213329865025005e-05,
         
     | 
| 163 | 
         
            +
                  "loss": 1.7731,
         
     | 
| 164 | 
         
            +
                  "step": 13000
         
     | 
| 165 | 
         
            +
                },
         
     | 
| 166 | 
         
            +
                {
         
     | 
| 167 | 
         
            +
                  "epoch": 0.39,
         
     | 
| 168 | 
         
            +
                  "learning_rate": 3.0490765629064428e-05,
         
     | 
| 169 | 
         
            +
                  "loss": 1.7643,
         
     | 
| 170 | 
         
            +
                  "step": 13500
         
     | 
| 171 | 
         
            +
                },
         
     | 
| 172 | 
         
            +
                {
         
     | 
| 173 | 
         
            +
                  "epoch": 0.4,
         
     | 
| 174 | 
         
            +
                  "learning_rate": 2.976820139310385e-05,
         
     | 
| 175 | 
         
            +
                  "loss": 1.7534,
         
     | 
| 176 | 
         
            +
                  "step": 14000
         
     | 
| 177 | 
         
            +
                },
         
     | 
| 178 | 
         
            +
                {
         
     | 
| 179 | 
         
            +
                  "epoch": 0.42,
         
     | 
| 180 | 
         
            +
                  "learning_rate": 2.9045637157143273e-05,
         
     | 
| 181 | 
         
            +
                  "loss": 1.7307,
         
     | 
| 182 | 
         
            +
                  "step": 14500
         
     | 
| 183 | 
         
            +
                },
         
     | 
| 184 | 
         
            +
                {
         
     | 
| 185 | 
         
            +
                  "epoch": 0.43,
         
     | 
| 186 | 
         
            +
                  "learning_rate": 2.8323072921182693e-05,
         
     | 
| 187 | 
         
            +
                  "loss": 1.7279,
         
     | 
| 188 | 
         
            +
                  "step": 15000
         
     | 
| 189 | 
         
            +
                },
         
     | 
| 190 | 
         
            +
                {
         
     | 
| 191 | 
         
            +
                  "epoch": 0.45,
         
     | 
| 192 | 
         
            +
                  "learning_rate": 2.7600508685222115e-05,
         
     | 
| 193 | 
         
            +
                  "loss": 1.7162,
         
     | 
| 194 | 
         
            +
                  "step": 15500
         
     | 
| 195 | 
         
            +
                },
         
     | 
| 196 | 
         
            +
                {
         
     | 
| 197 | 
         
            +
                  "epoch": 0.46,
         
     | 
| 198 | 
         
            +
                  "learning_rate": 2.6877944449261538e-05,
         
     | 
| 199 | 
         
            +
                  "loss": 1.7006,
         
     | 
| 200 | 
         
            +
                  "step": 16000
         
     | 
| 201 | 
         
            +
                },
         
     | 
| 202 | 
         
            +
                {
         
     | 
| 203 | 
         
            +
                  "epoch": 0.48,
         
     | 
| 204 | 
         
            +
                  "learning_rate": 2.615538021330096e-05,
         
     | 
| 205 | 
         
            +
                  "loss": 1.7176,
         
     | 
| 206 | 
         
            +
                  "step": 16500
         
     | 
| 207 | 
         
            +
                },
         
     | 
| 208 | 
         
            +
                {
         
     | 
| 209 | 
         
            +
                  "epoch": 0.49,
         
     | 
| 210 | 
         
            +
                  "learning_rate": 2.5432815977340387e-05,
         
     | 
| 211 | 
         
            +
                  "loss": 1.6928,
         
     | 
| 212 | 
         
            +
                  "step": 17000
         
     | 
| 213 | 
         
            +
                },
         
     | 
| 214 | 
         
            +
                {
         
     | 
| 215 | 
         
            +
                  "epoch": 0.51,
         
     | 
| 216 | 
         
            +
                  "learning_rate": 2.471025174137981e-05,
         
     | 
| 217 | 
         
            +
                  "loss": 1.6819,
         
     | 
| 218 | 
         
            +
                  "step": 17500
         
     | 
| 219 | 
         
            +
                },
         
     | 
| 220 | 
         
            +
                {
         
     | 
| 221 | 
         
            +
                  "epoch": 0.52,
         
     | 
| 222 | 
         
            +
                  "learning_rate": 2.3987687505419232e-05,
         
     | 
| 223 | 
         
            +
                  "loss": 1.6864,
         
     | 
| 224 | 
         
            +
                  "step": 18000
         
     | 
| 225 | 
         
            +
                },
         
     | 
| 226 | 
         
            +
                {
         
     | 
| 227 | 
         
            +
                  "epoch": 0.53,
         
     | 
| 228 | 
         
            +
                  "learning_rate": 2.3265123269458655e-05,
         
     | 
| 229 | 
         
            +
                  "loss": 1.6813,
         
     | 
| 230 | 
         
            +
                  "step": 18500
         
     | 
| 231 | 
         
            +
                },
         
     | 
| 232 | 
         
            +
                {
         
     | 
| 233 | 
         
            +
                  "epoch": 0.55,
         
     | 
| 234 | 
         
            +
                  "learning_rate": 2.2542559033498077e-05,
         
     | 
| 235 | 
         
            +
                  "loss": 1.6785,
         
     | 
| 236 | 
         
            +
                  "step": 19000
         
     | 
| 237 | 
         
            +
                },
         
     | 
| 238 | 
         
            +
                {
         
     | 
| 239 | 
         
            +
                  "epoch": 0.56,
         
     | 
| 240 | 
         
            +
                  "learning_rate": 2.1819994797537503e-05,
         
     | 
| 241 | 
         
            +
                  "loss": 1.6734,
         
     | 
| 242 | 
         
            +
                  "step": 19500
         
     | 
| 243 | 
         
            +
                },
         
     | 
| 244 | 
         
            +
                {
         
     | 
| 245 | 
         
            +
                  "epoch": 0.58,
         
     | 
| 246 | 
         
            +
                  "learning_rate": 2.1097430561576926e-05,
         
     | 
| 247 | 
         
            +
                  "loss": 1.6543,
         
     | 
| 248 | 
         
            +
                  "step": 20000
         
     | 
| 249 | 
         
            +
                },
         
     | 
| 250 | 
         
            +
                {
         
     | 
| 251 | 
         
            +
                  "epoch": 0.59,
         
     | 
| 252 | 
         
            +
                  "learning_rate": 2.037486632561635e-05,
         
     | 
| 253 | 
         
            +
                  "loss": 1.6633,
         
     | 
| 254 | 
         
            +
                  "step": 20500
         
     | 
| 255 | 
         
            +
                },
         
     | 
| 256 | 
         
            +
                {
         
     | 
| 257 | 
         
            +
                  "epoch": 0.61,
         
     | 
| 258 | 
         
            +
                  "learning_rate": 1.965230208965577e-05,
         
     | 
| 259 | 
         
            +
                  "loss": 1.668,
         
     | 
| 260 | 
         
            +
                  "step": 21000
         
     | 
| 261 | 
         
            +
                },
         
     | 
| 262 | 
         
            +
                {
         
     | 
| 263 | 
         
            +
                  "epoch": 0.62,
         
     | 
| 264 | 
         
            +
                  "learning_rate": 1.8929737853695194e-05,
         
     | 
| 265 | 
         
            +
                  "loss": 1.6554,
         
     | 
| 266 | 
         
            +
                  "step": 21500
         
     | 
| 267 | 
         
            +
                },
         
     | 
| 268 | 
         
            +
                {
         
     | 
| 269 | 
         
            +
                  "epoch": 0.64,
         
     | 
| 270 | 
         
            +
                  "learning_rate": 1.820717361773462e-05,
         
     | 
| 271 | 
         
            +
                  "loss": 1.6451,
         
     | 
| 272 | 
         
            +
                  "step": 22000
         
     | 
| 273 | 
         
            +
                },
         
     | 
| 274 | 
         
            +
                {
         
     | 
| 275 | 
         
            +
                  "epoch": 0.65,
         
     | 
| 276 | 
         
            +
                  "learning_rate": 1.748460938177404e-05,
         
     | 
| 277 | 
         
            +
                  "loss": 1.6347,
         
     | 
| 278 | 
         
            +
                  "step": 22500
         
     | 
| 279 | 
         
            +
                },
         
     | 
| 280 | 
         
            +
                {
         
     | 
| 281 | 
         
            +
                  "epoch": 0.66,
         
     | 
| 282 | 
         
            +
                  "learning_rate": 1.6762045145813462e-05,
         
     | 
| 283 | 
         
            +
                  "loss": 1.6288,
         
     | 
| 284 | 
         
            +
                  "step": 23000
         
     | 
| 285 | 
         
            +
                },
         
     | 
| 286 | 
         
            +
                {
         
     | 
| 287 | 
         
            +
                  "epoch": 0.68,
         
     | 
| 288 | 
         
            +
                  "learning_rate": 1.6039480909852885e-05,
         
     | 
| 289 | 
         
            +
                  "loss": 1.6476,
         
     | 
| 290 | 
         
            +
                  "step": 23500
         
     | 
| 291 | 
         
            +
                },
         
     | 
| 292 | 
         
            +
                {
         
     | 
| 293 | 
         
            +
                  "epoch": 0.69,
         
     | 
| 294 | 
         
            +
                  "learning_rate": 1.531691667389231e-05,
         
     | 
| 295 | 
         
            +
                  "loss": 1.629,
         
     | 
| 296 | 
         
            +
                  "step": 24000
         
     | 
| 297 | 
         
            +
                },
         
     | 
| 298 | 
         
            +
                {
         
     | 
| 299 | 
         
            +
                  "epoch": 0.71,
         
     | 
| 300 | 
         
            +
                  "learning_rate": 1.4594352437931733e-05,
         
     | 
| 301 | 
         
            +
                  "loss": 1.6197,
         
     | 
| 302 | 
         
            +
                  "step": 24500
         
     | 
| 303 | 
         
            +
                },
         
     | 
| 304 | 
         
            +
                {
         
     | 
| 305 | 
         
            +
                  "epoch": 0.72,
         
     | 
| 306 | 
         
            +
                  "learning_rate": 1.3871788201971156e-05,
         
     | 
| 307 | 
         
            +
                  "loss": 1.6158,
         
     | 
| 308 | 
         
            +
                  "step": 25000
         
     | 
| 309 | 
         
            +
                },
         
     | 
| 310 | 
         
            +
                {
         
     | 
| 311 | 
         
            +
                  "epoch": 0.74,
         
     | 
| 312 | 
         
            +
                  "learning_rate": 1.3149223966010579e-05,
         
     | 
| 313 | 
         
            +
                  "loss": 1.6208,
         
     | 
| 314 | 
         
            +
                  "step": 25500
         
     | 
| 315 | 
         
            +
                },
         
     | 
| 316 | 
         
            +
                {
         
     | 
| 317 | 
         
            +
                  "epoch": 0.75,
         
     | 
| 318 | 
         
            +
                  "learning_rate": 1.2426659730050003e-05,
         
     | 
| 319 | 
         
            +
                  "loss": 1.6081,
         
     | 
| 320 | 
         
            +
                  "step": 26000
         
     | 
| 321 | 
         
            +
                },
         
     | 
| 322 | 
         
            +
                {
         
     | 
| 323 | 
         
            +
                  "epoch": 0.77,
         
     | 
| 324 | 
         
            +
                  "learning_rate": 1.1704095494089424e-05,
         
     | 
| 325 | 
         
            +
                  "loss": 1.6147,
         
     | 
| 326 | 
         
            +
                  "step": 26500
         
     | 
| 327 | 
         
            +
                },
         
     | 
| 328 | 
         
            +
                {
         
     | 
| 329 | 
         
            +
                  "epoch": 0.78,
         
     | 
| 330 | 
         
            +
                  "learning_rate": 1.0981531258128848e-05,
         
     | 
| 331 | 
         
            +
                  "loss": 1.6003,
         
     | 
| 332 | 
         
            +
                  "step": 27000
         
     | 
| 333 | 
         
            +
                },
         
     | 
| 334 | 
         
            +
                {
         
     | 
| 335 | 
         
            +
                  "epoch": 0.79,
         
     | 
| 336 | 
         
            +
                  "learning_rate": 1.0258967022168271e-05,
         
     | 
| 337 | 
         
            +
                  "loss": 1.5853,
         
     | 
| 338 | 
         
            +
                  "step": 27500
         
     | 
| 339 | 
         
            +
                },
         
     | 
| 340 | 
         
            +
                {
         
     | 
| 341 | 
         
            +
                  "epoch": 0.81,
         
     | 
| 342 | 
         
            +
                  "learning_rate": 9.536402786207694e-06,
         
     | 
| 343 | 
         
            +
                  "loss": 1.6039,
         
     | 
| 344 | 
         
            +
                  "step": 28000
         
     | 
| 345 | 
         
            +
                },
         
     | 
| 346 | 
         
            +
                {
         
     | 
| 347 | 
         
            +
                  "epoch": 0.82,
         
     | 
| 348 | 
         
            +
                  "learning_rate": 8.813838550247118e-06,
         
     | 
| 349 | 
         
            +
                  "loss": 1.594,
         
     | 
| 350 | 
         
            +
                  "step": 28500
         
     | 
| 351 | 
         
            +
                },
         
     | 
| 352 | 
         
            +
                {
         
     | 
| 353 | 
         
            +
                  "epoch": 0.84,
         
     | 
| 354 | 
         
            +
                  "learning_rate": 8.091274314286539e-06,
         
     | 
| 355 | 
         
            +
                  "loss": 1.5874,
         
     | 
| 356 | 
         
            +
                  "step": 29000
         
     | 
| 357 | 
         
            +
                },
         
     | 
| 358 | 
         
            +
                {
         
     | 
| 359 | 
         
            +
                  "epoch": 0.85,
         
     | 
| 360 | 
         
            +
                  "learning_rate": 7.3687100783259635e-06,
         
     | 
| 361 | 
         
            +
                  "loss": 1.5707,
         
     | 
| 362 | 
         
            +
                  "step": 29500
         
     | 
| 363 | 
         
            +
                },
         
     | 
| 364 | 
         
            +
                {
         
     | 
| 365 | 
         
            +
                  "epoch": 0.87,
         
     | 
| 366 | 
         
            +
                  "learning_rate": 6.646145842365387e-06,
         
     | 
| 367 | 
         
            +
                  "loss": 1.5902,
         
     | 
| 368 | 
         
            +
                  "step": 30000
         
     | 
| 369 | 
         
            +
                }
         
     | 
| 370 | 
         
            +
              ],
         
     | 
| 371 | 
         
            +
              "max_steps": 34599,
         
     | 
| 372 | 
         
            +
              "num_train_epochs": 1,
         
     | 
| 373 | 
         
            +
              "total_flos": 7.89617673216e+16,
         
     | 
| 374 | 
         
            +
              "trial_name": null,
         
     | 
| 375 | 
         
            +
              "trial_params": null
         
     | 
| 376 | 
         
            +
            }
         
     | 
    	
        training_args.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:0472f6463f4dfa6cf999584b9d6fd53d1a5eb8f38caaaef68f2961121dd5e971
         
     | 
| 3 | 
         
            +
            size 2863
         
     |