lnewbould commited on
Commit
2dd8abe
·
verified ·
1 Parent(s): dce11b8

Training in progress, step 14

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. config.json +62 -0
  2. model.safetensors +3 -0
  3. run-0/checkpoint-14/config.json +62 -0
  4. run-0/checkpoint-14/model.safetensors +3 -0
  5. run-0/checkpoint-14/optimizer.pt +3 -0
  6. run-0/checkpoint-14/rng_state.pth +3 -0
  7. run-0/checkpoint-14/scheduler.pt +3 -0
  8. run-0/checkpoint-14/special_tokens_map.json +7 -0
  9. run-0/checkpoint-14/tokenizer.json +0 -0
  10. run-0/checkpoint-14/tokenizer_config.json +56 -0
  11. run-0/checkpoint-14/trainer_state.json +47 -0
  12. run-0/checkpoint-14/training_args.bin +3 -0
  13. run-0/checkpoint-14/vocab.txt +0 -0
  14. run-0/checkpoint-20/config.json +63 -0
  15. run-0/checkpoint-20/model.safetensors +3 -0
  16. run-0/checkpoint-20/optimizer.pt +3 -0
  17. run-0/checkpoint-20/rng_state.pth +3 -0
  18. run-0/checkpoint-20/scheduler.pt +3 -0
  19. run-0/checkpoint-20/special_tokens_map.json +7 -0
  20. run-0/checkpoint-20/tokenizer.json +0 -0
  21. run-0/checkpoint-20/tokenizer_config.json +56 -0
  22. run-0/checkpoint-20/trainer_state.json +74 -0
  23. run-0/checkpoint-20/training_args.bin +3 -0
  24. run-0/checkpoint-20/vocab.txt +0 -0
  25. run-1/checkpoint-12/config.json +63 -0
  26. run-1/checkpoint-12/model.safetensors +3 -0
  27. run-1/checkpoint-12/optimizer.pt +3 -0
  28. run-1/checkpoint-12/rng_state.pth +3 -0
  29. run-1/checkpoint-12/scheduler.pt +3 -0
  30. run-1/checkpoint-12/special_tokens_map.json +7 -0
  31. run-1/checkpoint-12/tokenizer.json +0 -0
  32. run-1/checkpoint-12/tokenizer_config.json +56 -0
  33. run-1/checkpoint-12/trainer_state.json +56 -0
  34. run-1/checkpoint-12/training_args.bin +3 -0
  35. run-1/checkpoint-12/vocab.txt +0 -0
  36. run-2/checkpoint-8/config.json +63 -0
  37. run-2/checkpoint-8/model.safetensors +3 -0
  38. run-2/checkpoint-8/optimizer.pt +3 -0
  39. run-2/checkpoint-8/rng_state.pth +3 -0
  40. run-2/checkpoint-8/scheduler.pt +3 -0
  41. run-2/checkpoint-8/special_tokens_map.json +7 -0
  42. run-2/checkpoint-8/tokenizer.json +0 -0
  43. run-2/checkpoint-8/tokenizer_config.json +56 -0
  44. run-2/checkpoint-8/trainer_state.json +47 -0
  45. run-2/checkpoint-8/training_args.bin +3 -0
  46. run-2/checkpoint-8/vocab.txt +0 -0
  47. special_tokens_map.json +7 -0
  48. tokenizer.json +0 -0
  49. tokenizer_config.json +56 -0
  50. training_args.bin +3 -0
config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-cased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "business",
13
+ "1": "civil-law",
14
+ "2": "constitutional-law",
15
+ "3": "contract",
16
+ "4": "contract-law",
17
+ "5": "copyright",
18
+ "6": "criminal-law",
19
+ "7": "employment",
20
+ "8": "intellectual-property",
21
+ "9": "internet",
22
+ "10": "liability",
23
+ "11": "licensing",
24
+ "12": "privacy",
25
+ "13": "software",
26
+ "14": "tax-law",
27
+ "15": "trademark"
28
+ },
29
+ "initializer_range": 0.02,
30
+ "label2id": {
31
+ "business": 0,
32
+ "civil-law": 1,
33
+ "constitutional-law": 2,
34
+ "contract": 3,
35
+ "contract-law": 4,
36
+ "copyright": 5,
37
+ "criminal-law": 6,
38
+ "employment": 7,
39
+ "intellectual-property": 8,
40
+ "internet": 9,
41
+ "liability": 10,
42
+ "licensing": 11,
43
+ "privacy": 12,
44
+ "software": 13,
45
+ "tax-law": 14,
46
+ "trademark": 15
47
+ },
48
+ "max_position_embeddings": 512,
49
+ "model_type": "distilbert",
50
+ "n_heads": 12,
51
+ "n_layers": 6,
52
+ "output_past": true,
53
+ "pad_token_id": 0,
54
+ "problem_type": "single_label_classification",
55
+ "qa_dropout": 0.1,
56
+ "seq_classif_dropout": 0.2,
57
+ "sinusoidal_pos_embds": false,
58
+ "tie_weights_": true,
59
+ "torch_dtype": "float32",
60
+ "transformers_version": "4.47.1",
61
+ "vocab_size": 28996
62
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a70896cda70c39157466e77a386e43d614d7340d26d1719e4aa7130a3a66c35
3
+ size 263187752
run-0/checkpoint-14/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-cased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "business",
13
+ "1": "civil-law",
14
+ "2": "constitutional-law",
15
+ "3": "contract",
16
+ "4": "contract-law",
17
+ "5": "copyright",
18
+ "6": "criminal-law",
19
+ "7": "employment",
20
+ "8": "intellectual-property",
21
+ "9": "internet",
22
+ "10": "liability",
23
+ "11": "licensing",
24
+ "12": "privacy",
25
+ "13": "software",
26
+ "14": "tax-law",
27
+ "15": "trademark"
28
+ },
29
+ "initializer_range": 0.02,
30
+ "label2id": {
31
+ "business": 0,
32
+ "civil-law": 1,
33
+ "constitutional-law": 2,
34
+ "contract": 3,
35
+ "contract-law": 4,
36
+ "copyright": 5,
37
+ "criminal-law": 6,
38
+ "employment": 7,
39
+ "intellectual-property": 8,
40
+ "internet": 9,
41
+ "liability": 10,
42
+ "licensing": 11,
43
+ "privacy": 12,
44
+ "software": 13,
45
+ "tax-law": 14,
46
+ "trademark": 15
47
+ },
48
+ "max_position_embeddings": 512,
49
+ "model_type": "distilbert",
50
+ "n_heads": 12,
51
+ "n_layers": 6,
52
+ "output_past": true,
53
+ "pad_token_id": 0,
54
+ "problem_type": "single_label_classification",
55
+ "qa_dropout": 0.1,
56
+ "seq_classif_dropout": 0.2,
57
+ "sinusoidal_pos_embds": false,
58
+ "tie_weights_": true,
59
+ "torch_dtype": "float32",
60
+ "transformers_version": "4.47.1",
61
+ "vocab_size": 28996
62
+ }
run-0/checkpoint-14/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a70896cda70c39157466e77a386e43d614d7340d26d1719e4aa7130a3a66c35
3
+ size 263187752
run-0/checkpoint-14/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25b2494ab1b9dac2e6484e9d3478241e16951cdd84a8149adac7ac0685da688f
3
+ size 526434810
run-0/checkpoint-14/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c0060471a9340f7cc602c9a86d1d9d308db5b4a83df50147b0235d18be03463
3
+ size 13990
run-0/checkpoint-14/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27c53eed2722d60729f122e697a32e584798b0e53daf02ec90e6986cd0796210
3
+ size 1064
run-0/checkpoint-14/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-14/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-14/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
run-0/checkpoint-14/trainer_state.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 14,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.32,
14
+ "eval_loss": 2.4279966354370117,
15
+ "eval_runtime": 17.893,
16
+ "eval_samples_per_second": 2.794,
17
+ "eval_steps_per_second": 0.391,
18
+ "step": 7
19
+ }
20
+ ],
21
+ "logging_steps": 500,
22
+ "max_steps": 14,
23
+ "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 2,
25
+ "save_steps": 500,
26
+ "stateful_callbacks": {
27
+ "TrainerControl": {
28
+ "args": {
29
+ "should_epoch_stop": false,
30
+ "should_evaluate": false,
31
+ "should_log": false,
32
+ "should_save": true,
33
+ "should_training_stop": true
34
+ },
35
+ "attributes": {}
36
+ }
37
+ },
38
+ "total_flos": 0,
39
+ "train_batch_size": 8,
40
+ "trial_name": null,
41
+ "trial_params": {
42
+ "learning_rate": 0.0003461152202579902,
43
+ "num_train_epochs": 2,
44
+ "per_device_train_batch_size": 8,
45
+ "weight_decay": 0.014902312802435893
46
+ }
47
+ }
run-0/checkpoint-14/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1ed3819cb2fe27bc2256360d46825e1851f5dc0f656a064e111a2321bc7183d
3
+ size 5240
run-0/checkpoint-14/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-20/config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15"
29
+ },
30
+ "initializer_range": 0.02,
31
+ "intermediate_size": 3072,
32
+ "label2id": {
33
+ "LABEL_0": 0,
34
+ "LABEL_1": 1,
35
+ "LABEL_10": 10,
36
+ "LABEL_11": 11,
37
+ "LABEL_12": 12,
38
+ "LABEL_13": 13,
39
+ "LABEL_14": 14,
40
+ "LABEL_15": 15,
41
+ "LABEL_2": 2,
42
+ "LABEL_3": 3,
43
+ "LABEL_4": 4,
44
+ "LABEL_5": 5,
45
+ "LABEL_6": 6,
46
+ "LABEL_7": 7,
47
+ "LABEL_8": 8,
48
+ "LABEL_9": 9
49
+ },
50
+ "layer_norm_eps": 1e-12,
51
+ "max_position_embeddings": 512,
52
+ "model_type": "bert",
53
+ "num_attention_heads": 12,
54
+ "num_hidden_layers": 12,
55
+ "pad_token_id": 0,
56
+ "position_embedding_type": "absolute",
57
+ "problem_type": "single_label_classification",
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.47.1",
60
+ "type_vocab_size": 2,
61
+ "use_cache": true,
62
+ "vocab_size": 28996
63
+ }
run-0/checkpoint-20/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b5c78f06d53acb708acbededcab71be33db1f82befa21988d544cf84e01a340
3
+ size 433313832
run-0/checkpoint-20/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebdba344973f22ac06783a08cb9d50a765090a0451d50ceee301dfe9109d9a6c
3
+ size 866743162
run-0/checkpoint-20/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a5e70f9a75e0aac0493ef5d71898140525e32ebc99cfd539bb3be380e6c5d74
3
+ size 13990
run-0/checkpoint-20/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f443f63848e306442cd3d7354d278af7ad02b517bd970aab51ae156d07ab0146
3
+ size 1064
run-0/checkpoint-20/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-20/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-20/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
run-0/checkpoint-20/trainer_state.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 20,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.32,
14
+ "eval_loss": 2.5643558502197266,
15
+ "eval_runtime": 390.6347,
16
+ "eval_samples_per_second": 0.128,
17
+ "eval_steps_per_second": 0.018,
18
+ "step": 4
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_accuracy": 0.04,
23
+ "eval_loss": 2.9646835327148438,
24
+ "eval_runtime": 34.0619,
25
+ "eval_samples_per_second": 1.468,
26
+ "eval_steps_per_second": 0.206,
27
+ "step": 8
28
+ },
29
+ {
30
+ "epoch": 3.0,
31
+ "eval_accuracy": 0.32,
32
+ "eval_loss": 2.424799680709839,
33
+ "eval_runtime": 36.2929,
34
+ "eval_samples_per_second": 1.378,
35
+ "eval_steps_per_second": 0.193,
36
+ "step": 12
37
+ },
38
+ {
39
+ "epoch": 4.0,
40
+ "eval_accuracy": 0.32,
41
+ "eval_loss": 2.441561460494995,
42
+ "eval_runtime": 37.5071,
43
+ "eval_samples_per_second": 1.333,
44
+ "eval_steps_per_second": 0.187,
45
+ "step": 16
46
+ }
47
+ ],
48
+ "logging_steps": 500,
49
+ "max_steps": 20,
50
+ "num_input_tokens_seen": 0,
51
+ "num_train_epochs": 5,
52
+ "save_steps": 500,
53
+ "stateful_callbacks": {
54
+ "TrainerControl": {
55
+ "args": {
56
+ "should_epoch_stop": false,
57
+ "should_evaluate": false,
58
+ "should_log": false,
59
+ "should_save": true,
60
+ "should_training_stop": true
61
+ },
62
+ "attributes": {}
63
+ }
64
+ },
65
+ "total_flos": 0,
66
+ "train_batch_size": 16,
67
+ "trial_name": null,
68
+ "trial_params": {
69
+ "learning_rate": 0.0003155201980058913,
70
+ "num_train_epochs": 5,
71
+ "per_device_train_batch_size": 16,
72
+ "weight_decay": 0.08349846837378451
73
+ }
74
+ }
run-0/checkpoint-20/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:416fa693723afbe856dcf1cebbf9340c6ae11df99f25c7e0b635d211fd4eee07
3
+ size 5240
run-0/checkpoint-20/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-1/checkpoint-12/config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15"
29
+ },
30
+ "initializer_range": 0.02,
31
+ "intermediate_size": 3072,
32
+ "label2id": {
33
+ "LABEL_0": 0,
34
+ "LABEL_1": 1,
35
+ "LABEL_10": 10,
36
+ "LABEL_11": 11,
37
+ "LABEL_12": 12,
38
+ "LABEL_13": 13,
39
+ "LABEL_14": 14,
40
+ "LABEL_15": 15,
41
+ "LABEL_2": 2,
42
+ "LABEL_3": 3,
43
+ "LABEL_4": 4,
44
+ "LABEL_5": 5,
45
+ "LABEL_6": 6,
46
+ "LABEL_7": 7,
47
+ "LABEL_8": 8,
48
+ "LABEL_9": 9
49
+ },
50
+ "layer_norm_eps": 1e-12,
51
+ "max_position_embeddings": 512,
52
+ "model_type": "bert",
53
+ "num_attention_heads": 12,
54
+ "num_hidden_layers": 12,
55
+ "pad_token_id": 0,
56
+ "position_embedding_type": "absolute",
57
+ "problem_type": "single_label_classification",
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.47.1",
60
+ "type_vocab_size": 2,
61
+ "use_cache": true,
62
+ "vocab_size": 28996
63
+ }
run-1/checkpoint-12/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30520ddf055b0086a5163e36b5ada08bd1c3e9a6f51e2571fa49704ca27339ea
3
+ size 433313832
run-1/checkpoint-12/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac0234d7747b84c444f6eeb87ec5b8551c1e637989c561c85a404a7e66dad78a
3
+ size 866743162
run-1/checkpoint-12/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59cc0ca17123db86667fe2b999dc4d45bc2292f0becb83d9a367470fdcfcb14d
3
+ size 13990
run-1/checkpoint-12/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e3d6714bfd9178c1fcd6acd23754c8b42175fb1c04f634c3dec9fe783e15bda
3
+ size 1064
run-1/checkpoint-12/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-1/checkpoint-12/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-1/checkpoint-12/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
run-1/checkpoint-12/trainer_state.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 12,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.06,
14
+ "eval_loss": 2.7594950199127197,
15
+ "eval_runtime": 33.7649,
16
+ "eval_samples_per_second": 1.481,
17
+ "eval_steps_per_second": 0.207,
18
+ "step": 4
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_accuracy": 0.08,
23
+ "eval_loss": 2.6855175495147705,
24
+ "eval_runtime": 36.3771,
25
+ "eval_samples_per_second": 1.374,
26
+ "eval_steps_per_second": 0.192,
27
+ "step": 8
28
+ }
29
+ ],
30
+ "logging_steps": 500,
31
+ "max_steps": 12,
32
+ "num_input_tokens_seen": 0,
33
+ "num_train_epochs": 3,
34
+ "save_steps": 500,
35
+ "stateful_callbacks": {
36
+ "TrainerControl": {
37
+ "args": {
38
+ "should_epoch_stop": false,
39
+ "should_evaluate": false,
40
+ "should_log": false,
41
+ "should_save": true,
42
+ "should_training_stop": true
43
+ },
44
+ "attributes": {}
45
+ }
46
+ },
47
+ "total_flos": 0,
48
+ "train_batch_size": 16,
49
+ "trial_name": null,
50
+ "trial_params": {
51
+ "learning_rate": 3.057465826568694e-05,
52
+ "num_train_epochs": 3,
53
+ "per_device_train_batch_size": 16,
54
+ "weight_decay": 0.03723974867102458
55
+ }
56
+ }
run-1/checkpoint-12/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a57174aa6cf65fa9f420f1adf1f57db12599f6d01b36cbb0bd8ebf327a14f28e
3
+ size 5240
run-1/checkpoint-12/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-2/checkpoint-8/config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15"
29
+ },
30
+ "initializer_range": 0.02,
31
+ "intermediate_size": 3072,
32
+ "label2id": {
33
+ "LABEL_0": 0,
34
+ "LABEL_1": 1,
35
+ "LABEL_10": 10,
36
+ "LABEL_11": 11,
37
+ "LABEL_12": 12,
38
+ "LABEL_13": 13,
39
+ "LABEL_14": 14,
40
+ "LABEL_15": 15,
41
+ "LABEL_2": 2,
42
+ "LABEL_3": 3,
43
+ "LABEL_4": 4,
44
+ "LABEL_5": 5,
45
+ "LABEL_6": 6,
46
+ "LABEL_7": 7,
47
+ "LABEL_8": 8,
48
+ "LABEL_9": 9
49
+ },
50
+ "layer_norm_eps": 1e-12,
51
+ "max_position_embeddings": 512,
52
+ "model_type": "bert",
53
+ "num_attention_heads": 12,
54
+ "num_hidden_layers": 12,
55
+ "pad_token_id": 0,
56
+ "position_embedding_type": "absolute",
57
+ "problem_type": "single_label_classification",
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.47.1",
60
+ "type_vocab_size": 2,
61
+ "use_cache": true,
62
+ "vocab_size": 28996
63
+ }
run-2/checkpoint-8/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d0ff9d7553e5f401f66acc306718599e93884dd508d791993f33833deb328ca
3
+ size 433313832
run-2/checkpoint-8/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:454214c0144d04c453ea1d2ec2b5a7df1c3423f6818f9756a14284705fa8283a
3
+ size 866743162
run-2/checkpoint-8/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feb9296950c146245afd5a3147821b22e6313de8735f850c30a3188a431ebf85
3
+ size 13990
run-2/checkpoint-8/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af27b588f87c0fb3db5b0d16c4f98bbfbf7bb779576355ce3f3345eaaa70abdc
3
+ size 1064
run-2/checkpoint-8/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-2/checkpoint-8/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-2/checkpoint-8/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
run-2/checkpoint-8/trainer_state.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 8,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.08,
14
+ "eval_loss": 2.832538366317749,
15
+ "eval_runtime": 34.3925,
16
+ "eval_samples_per_second": 1.454,
17
+ "eval_steps_per_second": 0.204,
18
+ "step": 4
19
+ }
20
+ ],
21
+ "logging_steps": 500,
22
+ "max_steps": 8,
23
+ "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 2,
25
+ "save_steps": 500,
26
+ "stateful_callbacks": {
27
+ "TrainerControl": {
28
+ "args": {
29
+ "should_epoch_stop": false,
30
+ "should_evaluate": false,
31
+ "should_log": false,
32
+ "should_save": true,
33
+ "should_training_stop": true
34
+ },
35
+ "attributes": {}
36
+ }
37
+ },
38
+ "total_flos": 0,
39
+ "train_batch_size": 16,
40
+ "trial_name": null,
41
+ "trial_params": {
42
+ "learning_rate": 1.2303999115294106e-05,
43
+ "num_train_epochs": 2,
44
+ "per_device_train_batch_size": 16,
45
+ "weight_decay": 0.08430555410163074
46
+ }
47
+ }
run-2/checkpoint-8/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e6eda4fffe5514c3c46c347e7e1ea3b00ffee475601396824284afe9850a3a9
3
+ size 5240
run-2/checkpoint-8/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1ed3819cb2fe27bc2256360d46825e1851f5dc0f656a064e111a2321bc7183d
3
+ size 5240