JayR7 commited on
Commit
e2dc7a4
·
verified ·
1 Parent(s): c97db81

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. distilbert-base-cased/README.md +112 -0
  2. distilbert-base-cased/checkpoint-1000/config.json +57 -0
  3. distilbert-base-cased/checkpoint-1000/model.safetensors +3 -0
  4. distilbert-base-cased/checkpoint-1000/optimizer.pt +3 -0
  5. distilbert-base-cased/checkpoint-1000/rng_state.pth +3 -0
  6. distilbert-base-cased/checkpoint-1000/scheduler.pt +3 -0
  7. distilbert-base-cased/checkpoint-1000/special_tokens_map.json +7 -0
  8. distilbert-base-cased/checkpoint-1000/tokenizer.json +0 -0
  9. distilbert-base-cased/checkpoint-1000/tokenizer_config.json +55 -0
  10. distilbert-base-cased/checkpoint-1000/trainer_state.json +201 -0
  11. distilbert-base-cased/checkpoint-1000/training_args.bin +3 -0
  12. distilbert-base-cased/checkpoint-1000/vocab.txt +0 -0
  13. distilbert-base-cased/checkpoint-1500/config.json +57 -0
  14. distilbert-base-cased/checkpoint-1500/model.safetensors +3 -0
  15. distilbert-base-cased/checkpoint-1500/optimizer.pt +3 -0
  16. distilbert-base-cased/checkpoint-1500/rng_state.pth +3 -0
  17. distilbert-base-cased/checkpoint-1500/scheduler.pt +3 -0
  18. distilbert-base-cased/checkpoint-1500/special_tokens_map.json +7 -0
  19. distilbert-base-cased/checkpoint-1500/tokenizer.json +0 -0
  20. distilbert-base-cased/checkpoint-1500/tokenizer_config.json +55 -0
  21. distilbert-base-cased/checkpoint-1500/trainer_state.json +291 -0
  22. distilbert-base-cased/checkpoint-1500/training_args.bin +3 -0
  23. distilbert-base-cased/checkpoint-1500/vocab.txt +0 -0
  24. distilbert-base-cased/checkpoint-2000/config.json +57 -0
  25. distilbert-base-cased/checkpoint-2000/model.safetensors +3 -0
  26. distilbert-base-cased/checkpoint-2000/optimizer.pt +3 -0
  27. distilbert-base-cased/checkpoint-2000/rng_state.pth +3 -0
  28. distilbert-base-cased/checkpoint-2000/scheduler.pt +3 -0
  29. distilbert-base-cased/checkpoint-2000/special_tokens_map.json +7 -0
  30. distilbert-base-cased/checkpoint-2000/tokenizer.json +0 -0
  31. distilbert-base-cased/checkpoint-2000/tokenizer_config.json +55 -0
  32. distilbert-base-cased/checkpoint-2000/trainer_state.json +381 -0
  33. distilbert-base-cased/checkpoint-2000/training_args.bin +3 -0
  34. distilbert-base-cased/checkpoint-2000/vocab.txt +0 -0
  35. distilbert-base-cased/checkpoint-2500/config.json +57 -0
  36. distilbert-base-cased/checkpoint-2500/model.safetensors +3 -0
  37. distilbert-base-cased/checkpoint-2500/optimizer.pt +3 -0
  38. distilbert-base-cased/checkpoint-2500/rng_state.pth +3 -0
  39. distilbert-base-cased/checkpoint-2500/scheduler.pt +3 -0
  40. distilbert-base-cased/checkpoint-2500/special_tokens_map.json +7 -0
  41. distilbert-base-cased/checkpoint-2500/tokenizer.json +0 -0
  42. distilbert-base-cased/checkpoint-2500/tokenizer_config.json +55 -0
  43. distilbert-base-cased/checkpoint-2500/trainer_state.json +471 -0
  44. distilbert-base-cased/checkpoint-2500/training_args.bin +3 -0
  45. distilbert-base-cased/checkpoint-2500/vocab.txt +0 -0
  46. distilbert-base-cased/checkpoint-3000/config.json +57 -0
  47. distilbert-base-cased/checkpoint-3000/model.safetensors +3 -0
  48. distilbert-base-cased/checkpoint-3000/optimizer.pt +3 -0
  49. distilbert-base-cased/checkpoint-3000/rng_state.pth +3 -0
  50. distilbert-base-cased/checkpoint-3000/scheduler.pt +3 -0
distilbert-base-cased/README.md ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - generated_from_trainer
4
+ metrics:
5
+ - precision
6
+ - recall
7
+ - f1
8
+ - accuracy
9
+ model-index:
10
+ - name: distilbert-base-cased
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # distilbert-base-cased
18
+
19
+ This model was trained from scratch on the None dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.0755
22
+ - Precision: 0.9528
23
+ - Recall: 0.9528
24
+ - F1: 0.9528
25
+ - Accuracy: 0.9528
26
+
27
+ ## Model description
28
+
29
+ More information needed
30
+
31
+ ## Intended uses & limitations
32
+
33
+ More information needed
34
+
35
+ ## Training and evaluation data
36
+
37
+ More information needed
38
+
39
+ ## Training procedure
40
+
41
+ ### Training hyperparameters
42
+
43
+ The following hyperparameters were used during training:
44
+ - learning_rate: 2e-05
45
+ - train_batch_size: 16
46
+ - eval_batch_size: 16
47
+ - seed: 42
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: linear
50
+ - num_epochs: 2
51
+
52
+ ### Training results
53
+
54
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
55
+ |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
56
+ | 0.7657 | 0.04 | 100 | 0.2461 | 0.8912 | 0.8912 | 0.8912 | 0.8912 |
57
+ | 0.2295 | 0.08 | 200 | 0.1786 | 0.9193 | 0.9193 | 0.9193 | 0.9193 |
58
+ | 0.1704 | 0.12 | 300 | 0.1497 | 0.9293 | 0.9293 | 0.9293 | 0.9293 |
59
+ | 0.1618 | 0.16 | 400 | 0.1299 | 0.9360 | 0.9360 | 0.9360 | 0.9360 |
60
+ | 0.1366 | 0.2 | 500 | 0.1322 | 0.9360 | 0.9360 | 0.9360 | 0.9360 |
61
+ | 0.1252 | 0.24 | 600 | 0.1152 | 0.9404 | 0.9404 | 0.9404 | 0.9404 |
62
+ | 0.1201 | 0.28 | 700 | 0.1068 | 0.9427 | 0.9427 | 0.9427 | 0.9427 |
63
+ | 0.1192 | 0.32 | 800 | 0.1052 | 0.9439 | 0.9439 | 0.9439 | 0.9439 |
64
+ | 0.1124 | 0.36 | 900 | 0.1028 | 0.9447 | 0.9447 | 0.9447 | 0.9447 |
65
+ | 0.109 | 0.4 | 1000 | 0.0996 | 0.9459 | 0.9459 | 0.9459 | 0.9459 |
66
+ | 0.1026 | 0.44 | 1100 | 0.0975 | 0.9460 | 0.9460 | 0.9460 | 0.9460 |
67
+ | 0.0984 | 0.48 | 1200 | 0.0956 | 0.9475 | 0.9475 | 0.9475 | 0.9475 |
68
+ | 0.1014 | 0.52 | 1300 | 0.0943 | 0.9480 | 0.9480 | 0.9480 | 0.9480 |
69
+ | 0.1085 | 0.56 | 1400 | 0.0909 | 0.9477 | 0.9477 | 0.9477 | 0.9477 |
70
+ | 0.0965 | 0.6 | 1500 | 0.0913 | 0.9481 | 0.9481 | 0.9481 | 0.9481 |
71
+ | 0.0932 | 0.64 | 1600 | 0.0891 | 0.9485 | 0.9485 | 0.9485 | 0.9485 |
72
+ | 0.0948 | 0.68 | 1700 | 0.0880 | 0.9489 | 0.9489 | 0.9489 | 0.9489 |
73
+ | 0.0922 | 0.72 | 1800 | 0.0863 | 0.9493 | 0.9493 | 0.9493 | 0.9493 |
74
+ | 0.0889 | 0.76 | 1900 | 0.0857 | 0.9499 | 0.9499 | 0.9499 | 0.9499 |
75
+ | 0.0864 | 0.8 | 2000 | 0.0844 | 0.9502 | 0.9502 | 0.9502 | 0.9502 |
76
+ | 0.0941 | 0.84 | 2100 | 0.0830 | 0.9503 | 0.9503 | 0.9503 | 0.9503 |
77
+ | 0.0895 | 0.88 | 2200 | 0.0828 | 0.9505 | 0.9505 | 0.9505 | 0.9505 |
78
+ | 0.088 | 0.92 | 2300 | 0.0827 | 0.9506 | 0.9506 | 0.9506 | 0.9506 |
79
+ | 0.0881 | 0.96 | 2400 | 0.0835 | 0.9500 | 0.9500 | 0.9500 | 0.9500 |
80
+ | 0.0842 | 1.0 | 2500 | 0.0818 | 0.9504 | 0.9504 | 0.9504 | 0.9504 |
81
+ | 0.0767 | 1.04 | 2600 | 0.0839 | 0.9508 | 0.9508 | 0.9508 | 0.9508 |
82
+ | 0.0793 | 1.08 | 2700 | 0.0823 | 0.9505 | 0.9505 | 0.9505 | 0.9505 |
83
+ | 0.0788 | 1.12 | 2800 | 0.0822 | 0.9509 | 0.9509 | 0.9509 | 0.9509 |
84
+ | 0.0724 | 1.16 | 2900 | 0.0809 | 0.9514 | 0.9514 | 0.9514 | 0.9514 |
85
+ | 0.0749 | 1.2 | 3000 | 0.0806 | 0.9508 | 0.9508 | 0.9508 | 0.9508 |
86
+ | 0.0741 | 1.24 | 3100 | 0.0798 | 0.9511 | 0.9511 | 0.9511 | 0.9511 |
87
+ | 0.076 | 1.28 | 3200 | 0.0783 | 0.9517 | 0.9517 | 0.9517 | 0.9517 |
88
+ | 0.0723 | 1.32 | 3300 | 0.0797 | 0.9517 | 0.9517 | 0.9517 | 0.9517 |
89
+ | 0.0815 | 1.36 | 3400 | 0.0791 | 0.9520 | 0.9520 | 0.9520 | 0.9520 |
90
+ | 0.0769 | 1.4 | 3500 | 0.0779 | 0.9524 | 0.9524 | 0.9524 | 0.9524 |
91
+ | 0.0761 | 1.44 | 3600 | 0.0777 | 0.9527 | 0.9527 | 0.9527 | 0.9527 |
92
+ | 0.0718 | 1.48 | 3700 | 0.0781 | 0.9520 | 0.9520 | 0.9520 | 0.9520 |
93
+ | 0.0769 | 1.52 | 3800 | 0.0773 | 0.9526 | 0.9526 | 0.9526 | 0.9526 |
94
+ | 0.0653 | 1.56 | 3900 | 0.0779 | 0.9520 | 0.9520 | 0.9520 | 0.9520 |
95
+ | 0.0724 | 1.6 | 4000 | 0.0775 | 0.9522 | 0.9522 | 0.9522 | 0.9522 |
96
+ | 0.0701 | 1.64 | 4100 | 0.0776 | 0.9522 | 0.9522 | 0.9522 | 0.9522 |
97
+ | 0.0731 | 1.68 | 4200 | 0.0773 | 0.9523 | 0.9523 | 0.9523 | 0.9523 |
98
+ | 0.07 | 1.72 | 4300 | 0.0772 | 0.9524 | 0.9524 | 0.9524 | 0.9524 |
99
+ | 0.0721 | 1.76 | 4400 | 0.0769 | 0.9528 | 0.9528 | 0.9528 | 0.9528 |
100
+ | 0.0736 | 1.8 | 4500 | 0.0765 | 0.9529 | 0.9529 | 0.9529 | 0.9529 |
101
+ | 0.0721 | 1.84 | 4600 | 0.0762 | 0.9523 | 0.9523 | 0.9523 | 0.9523 |
102
+ | 0.0763 | 1.88 | 4700 | 0.0758 | 0.9528 | 0.9528 | 0.9528 | 0.9528 |
103
+ | 0.0683 | 1.92 | 4800 | 0.0759 | 0.9528 | 0.9528 | 0.9528 | 0.9528 |
104
+ | 0.0772 | 1.96 | 4900 | 0.0755 | 0.9528 | 0.9528 | 0.9528 | 0.9528 |
105
+
106
+
107
+ ### Framework versions
108
+
109
+ - Transformers 4.37.2
110
+ - Pytorch 2.1.0+cu121
111
+ - Datasets 2.17.1
112
+ - Tokenizers 0.15.2
distilbert-base-cased/checkpoint-1000/config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-cased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForTokenClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "B-ProductNameEn",
13
+ "1": "I-ProductNameEn",
14
+ "2": "B-TradeMarkEn",
15
+ "3": "I-TradeMarkEn",
16
+ "4": "B-Country",
17
+ "5": "I-Country",
18
+ "6": "B-HSCode",
19
+ "7": "I-HSCode",
20
+ "8": "B-HSCodeEn",
21
+ "9": "I-HSCodeEn",
22
+ "10": "B-ManufacturerEn",
23
+ "11": "I-ManufacturerEn",
24
+ "12": "B-ModelNo",
25
+ "13": "I-ModelNo"
26
+ },
27
+ "initializer_range": 0.02,
28
+ "label2id": {
29
+ "B-Country": 4,
30
+ "B-HSCode": 6,
31
+ "B-HSCodeEn": 8,
32
+ "B-ManufacturerEn": 10,
33
+ "B-ModelNo": 12,
34
+ "B-ProductNameEn": 0,
35
+ "B-TradeMarkEn": 2,
36
+ "I-Country": 5,
37
+ "I-HSCode": 7,
38
+ "I-HSCodeEn": 9,
39
+ "I-ManufacturerEn": 11,
40
+ "I-ModelNo": 13,
41
+ "I-ProductNameEn": 1,
42
+ "I-TradeMarkEn": 3
43
+ },
44
+ "max_position_embeddings": 512,
45
+ "model_type": "distilbert",
46
+ "n_heads": 12,
47
+ "n_layers": 6,
48
+ "output_past": true,
49
+ "pad_token_id": 0,
50
+ "qa_dropout": 0.1,
51
+ "seq_classif_dropout": 0.2,
52
+ "sinusoidal_pos_embds": false,
53
+ "tie_weights_": true,
54
+ "torch_dtype": "float32",
55
+ "transformers_version": "4.37.2",
56
+ "vocab_size": 28996
57
+ }
distilbert-base-cased/checkpoint-1000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b6703f50fa88008f88ab717481d44ffc45cb8026595cce865da8941c586e356
3
+ size 260819048
distilbert-base-cased/checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fca981663ffcb9ae07a89a1a4e4dede55d09e512f9d18ae6b030afc8310dc5f
3
+ size 521698874
distilbert-base-cased/checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb9339a8cbb82ba5ee2f361dedf227b21ca995a5d566d33bd610f189892e2091
3
+ size 14244
distilbert-base-cased/checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8adaa2d8045a871ac5d0f25de752ac87c7094e40a3cc90f08c7ddad38cba70de
3
+ size 1064
distilbert-base-cased/checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
distilbert-base-cased/checkpoint-1000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
distilbert-base-cased/checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
distilbert-base-cased/checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.0995732992887497,
3
+ "best_model_checkpoint": "JayR7/distilbert-base-cased/checkpoint-1000",
4
+ "epoch": 0.40080160320641284,
5
+ "eval_steps": 100,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04,
13
+ "learning_rate": 1.959919839679359e-05,
14
+ "loss": 0.7657,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.04,
19
+ "eval_accuracy": 0.891240629358438,
20
+ "eval_f1": 0.891240629358438,
21
+ "eval_loss": 0.2461320161819458,
22
+ "eval_precision": 0.891240629358438,
23
+ "eval_recall": 0.891240629358438,
24
+ "eval_runtime": 70.2088,
25
+ "eval_samples_per_second": 142.133,
26
+ "eval_steps_per_second": 8.888,
27
+ "step": 100
28
+ },
29
+ {
30
+ "epoch": 0.08,
31
+ "learning_rate": 1.9198396793587175e-05,
32
+ "loss": 0.2295,
33
+ "step": 200
34
+ },
35
+ {
36
+ "epoch": 0.08,
37
+ "eval_accuracy": 0.9192844534518828,
38
+ "eval_f1": 0.9192844534518828,
39
+ "eval_loss": 0.17861397564411163,
40
+ "eval_precision": 0.9192844534518828,
41
+ "eval_recall": 0.9192844534518828,
42
+ "eval_runtime": 70.0566,
43
+ "eval_samples_per_second": 142.442,
44
+ "eval_steps_per_second": 8.907,
45
+ "step": 200
46
+ },
47
+ {
48
+ "epoch": 0.12,
49
+ "learning_rate": 1.8797595190380762e-05,
50
+ "loss": 0.1704,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.12,
55
+ "eval_accuracy": 0.9293279288702929,
56
+ "eval_f1": 0.9293279288702929,
57
+ "eval_loss": 0.1497383564710617,
58
+ "eval_precision": 0.9293279288702929,
59
+ "eval_recall": 0.9293279288702929,
60
+ "eval_runtime": 69.6477,
61
+ "eval_samples_per_second": 143.278,
62
+ "eval_steps_per_second": 8.959,
63
+ "step": 300
64
+ },
65
+ {
66
+ "epoch": 0.16,
67
+ "learning_rate": 1.839679358717435e-05,
68
+ "loss": 0.1618,
69
+ "step": 400
70
+ },
71
+ {
72
+ "epoch": 0.16,
73
+ "eval_accuracy": 0.9359854428172942,
74
+ "eval_f1": 0.9359854428172942,
75
+ "eval_loss": 0.12991881370544434,
76
+ "eval_precision": 0.9359854428172942,
77
+ "eval_recall": 0.9359854428172942,
78
+ "eval_runtime": 70.1002,
79
+ "eval_samples_per_second": 142.353,
80
+ "eval_steps_per_second": 8.902,
81
+ "step": 400
82
+ },
83
+ {
84
+ "epoch": 0.2,
85
+ "learning_rate": 1.7995991983967936e-05,
86
+ "loss": 0.1366,
87
+ "step": 500
88
+ },
89
+ {
90
+ "epoch": 0.2,
91
+ "eval_accuracy": 0.9360371992677824,
92
+ "eval_f1": 0.9360371992677824,
93
+ "eval_loss": 0.13222643733024597,
94
+ "eval_precision": 0.9360371992677824,
95
+ "eval_recall": 0.9360371992677824,
96
+ "eval_runtime": 69.7532,
97
+ "eval_samples_per_second": 143.062,
98
+ "eval_steps_per_second": 8.946,
99
+ "step": 500
100
+ },
101
+ {
102
+ "epoch": 0.24,
103
+ "learning_rate": 1.7595190380761523e-05,
104
+ "loss": 0.1252,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 0.24,
109
+ "eval_accuracy": 0.9404364975592747,
110
+ "eval_f1": 0.9404364975592747,
111
+ "eval_loss": 0.11517041176557541,
112
+ "eval_precision": 0.9404364975592747,
113
+ "eval_recall": 0.9404364975592747,
114
+ "eval_runtime": 69.595,
115
+ "eval_samples_per_second": 143.387,
116
+ "eval_steps_per_second": 8.966,
117
+ "step": 600
118
+ },
119
+ {
120
+ "epoch": 0.28,
121
+ "learning_rate": 1.719438877755511e-05,
122
+ "loss": 0.1201,
123
+ "step": 700
124
+ },
125
+ {
126
+ "epoch": 0.28,
127
+ "eval_accuracy": 0.9426865411436541,
128
+ "eval_f1": 0.9426865411436541,
129
+ "eval_loss": 0.10682988166809082,
130
+ "eval_precision": 0.9426865411436541,
131
+ "eval_recall": 0.9426865411436541,
132
+ "eval_runtime": 70.4798,
133
+ "eval_samples_per_second": 141.587,
134
+ "eval_steps_per_second": 8.854,
135
+ "step": 700
136
+ },
137
+ {
138
+ "epoch": 0.32,
139
+ "learning_rate": 1.6793587174348697e-05,
140
+ "loss": 0.1192,
141
+ "step": 800
142
+ },
143
+ {
144
+ "epoch": 0.32,
145
+ "eval_accuracy": 0.9439395920502092,
146
+ "eval_f1": 0.9439395920502092,
147
+ "eval_loss": 0.10515566915273666,
148
+ "eval_precision": 0.9439395920502092,
149
+ "eval_recall": 0.9439395920502092,
150
+ "eval_runtime": 69.7899,
151
+ "eval_samples_per_second": 142.986,
152
+ "eval_steps_per_second": 8.941,
153
+ "step": 800
154
+ },
155
+ {
156
+ "epoch": 0.36,
157
+ "learning_rate": 1.6392785571142287e-05,
158
+ "loss": 0.1124,
159
+ "step": 900
160
+ },
161
+ {
162
+ "epoch": 0.36,
163
+ "eval_accuracy": 0.9446532862622036,
164
+ "eval_f1": 0.9446532862622036,
165
+ "eval_loss": 0.10284145176410675,
166
+ "eval_precision": 0.9446532862622036,
167
+ "eval_recall": 0.9446532862622036,
168
+ "eval_runtime": 69.8472,
169
+ "eval_samples_per_second": 142.869,
170
+ "eval_steps_per_second": 8.934,
171
+ "step": 900
172
+ },
173
+ {
174
+ "epoch": 0.4,
175
+ "learning_rate": 1.5991983967935874e-05,
176
+ "loss": 0.109,
177
+ "step": 1000
178
+ },
179
+ {
180
+ "epoch": 0.4,
181
+ "eval_accuracy": 0.9458845449790795,
182
+ "eval_f1": 0.9458845449790795,
183
+ "eval_loss": 0.0995732992887497,
184
+ "eval_precision": 0.9458845449790795,
185
+ "eval_recall": 0.9458845449790795,
186
+ "eval_runtime": 69.7698,
187
+ "eval_samples_per_second": 143.028,
188
+ "eval_steps_per_second": 8.944,
189
+ "step": 1000
190
+ }
191
+ ],
192
+ "logging_steps": 100,
193
+ "max_steps": 4990,
194
+ "num_input_tokens_seen": 0,
195
+ "num_train_epochs": 2,
196
+ "save_steps": 500,
197
+ "total_flos": 2090903175168000.0,
198
+ "train_batch_size": 16,
199
+ "trial_name": null,
200
+ "trial_params": null
201
+ }
distilbert-base-cased/checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da0d85271a02ae6175d7c1e0b4bc6de536afd6c48c218ee02c7929cf1116aefd
3
+ size 4728
distilbert-base-cased/checkpoint-1000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
distilbert-base-cased/checkpoint-1500/config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-cased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForTokenClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "B-ProductNameEn",
13
+ "1": "I-ProductNameEn",
14
+ "2": "B-TradeMarkEn",
15
+ "3": "I-TradeMarkEn",
16
+ "4": "B-Country",
17
+ "5": "I-Country",
18
+ "6": "B-HSCode",
19
+ "7": "I-HSCode",
20
+ "8": "B-HSCodeEn",
21
+ "9": "I-HSCodeEn",
22
+ "10": "B-ManufacturerEn",
23
+ "11": "I-ManufacturerEn",
24
+ "12": "B-ModelNo",
25
+ "13": "I-ModelNo"
26
+ },
27
+ "initializer_range": 0.02,
28
+ "label2id": {
29
+ "B-Country": 4,
30
+ "B-HSCode": 6,
31
+ "B-HSCodeEn": 8,
32
+ "B-ManufacturerEn": 10,
33
+ "B-ModelNo": 12,
34
+ "B-ProductNameEn": 0,
35
+ "B-TradeMarkEn": 2,
36
+ "I-Country": 5,
37
+ "I-HSCode": 7,
38
+ "I-HSCodeEn": 9,
39
+ "I-ManufacturerEn": 11,
40
+ "I-ModelNo": 13,
41
+ "I-ProductNameEn": 1,
42
+ "I-TradeMarkEn": 3
43
+ },
44
+ "max_position_embeddings": 512,
45
+ "model_type": "distilbert",
46
+ "n_heads": 12,
47
+ "n_layers": 6,
48
+ "output_past": true,
49
+ "pad_token_id": 0,
50
+ "qa_dropout": 0.1,
51
+ "seq_classif_dropout": 0.2,
52
+ "sinusoidal_pos_embds": false,
53
+ "tie_weights_": true,
54
+ "torch_dtype": "float32",
55
+ "transformers_version": "4.37.2",
56
+ "vocab_size": 28996
57
+ }
distilbert-base-cased/checkpoint-1500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab6b60caa294043f335c6676072fb224aaee94e4bea52026580c3c9448222bc7
3
+ size 260819048
distilbert-base-cased/checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf7751e30f60eb80951d566ee77f561d8d164ad636c263d6ccd00fdf4d0d8ac1
3
+ size 521698874
distilbert-base-cased/checkpoint-1500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d8ce9c605147b35cecdd390c023be3c028ee93ee0b95b61dc0e17a30bc29ab4
3
+ size 14244
distilbert-base-cased/checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36743f70cac6ff710096a2cc9caa181a9f9b98bdececc0b13a72a9886a686fd1
3
+ size 1064
distilbert-base-cased/checkpoint-1500/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
distilbert-base-cased/checkpoint-1500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
distilbert-base-cased/checkpoint-1500/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
distilbert-base-cased/checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.09130043536424637,
3
+ "best_model_checkpoint": "JayR7/distilbert-base-cased/checkpoint-1500",
4
+ "epoch": 0.6012024048096193,
5
+ "eval_steps": 100,
6
+ "global_step": 1500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04,
13
+ "learning_rate": 1.959919839679359e-05,
14
+ "loss": 0.7657,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.04,
19
+ "eval_accuracy": 0.891240629358438,
20
+ "eval_f1": 0.891240629358438,
21
+ "eval_loss": 0.2461320161819458,
22
+ "eval_precision": 0.891240629358438,
23
+ "eval_recall": 0.891240629358438,
24
+ "eval_runtime": 70.2088,
25
+ "eval_samples_per_second": 142.133,
26
+ "eval_steps_per_second": 8.888,
27
+ "step": 100
28
+ },
29
+ {
30
+ "epoch": 0.08,
31
+ "learning_rate": 1.9198396793587175e-05,
32
+ "loss": 0.2295,
33
+ "step": 200
34
+ },
35
+ {
36
+ "epoch": 0.08,
37
+ "eval_accuracy": 0.9192844534518828,
38
+ "eval_f1": 0.9192844534518828,
39
+ "eval_loss": 0.17861397564411163,
40
+ "eval_precision": 0.9192844534518828,
41
+ "eval_recall": 0.9192844534518828,
42
+ "eval_runtime": 70.0566,
43
+ "eval_samples_per_second": 142.442,
44
+ "eval_steps_per_second": 8.907,
45
+ "step": 200
46
+ },
47
+ {
48
+ "epoch": 0.12,
49
+ "learning_rate": 1.8797595190380762e-05,
50
+ "loss": 0.1704,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.12,
55
+ "eval_accuracy": 0.9293279288702929,
56
+ "eval_f1": 0.9293279288702929,
57
+ "eval_loss": 0.1497383564710617,
58
+ "eval_precision": 0.9293279288702929,
59
+ "eval_recall": 0.9293279288702929,
60
+ "eval_runtime": 69.6477,
61
+ "eval_samples_per_second": 143.278,
62
+ "eval_steps_per_second": 8.959,
63
+ "step": 300
64
+ },
65
+ {
66
+ "epoch": 0.16,
67
+ "learning_rate": 1.839679358717435e-05,
68
+ "loss": 0.1618,
69
+ "step": 400
70
+ },
71
+ {
72
+ "epoch": 0.16,
73
+ "eval_accuracy": 0.9359854428172942,
74
+ "eval_f1": 0.9359854428172942,
75
+ "eval_loss": 0.12991881370544434,
76
+ "eval_precision": 0.9359854428172942,
77
+ "eval_recall": 0.9359854428172942,
78
+ "eval_runtime": 70.1002,
79
+ "eval_samples_per_second": 142.353,
80
+ "eval_steps_per_second": 8.902,
81
+ "step": 400
82
+ },
83
+ {
84
+ "epoch": 0.2,
85
+ "learning_rate": 1.7995991983967936e-05,
86
+ "loss": 0.1366,
87
+ "step": 500
88
+ },
89
+ {
90
+ "epoch": 0.2,
91
+ "eval_accuracy": 0.9360371992677824,
92
+ "eval_f1": 0.9360371992677824,
93
+ "eval_loss": 0.13222643733024597,
94
+ "eval_precision": 0.9360371992677824,
95
+ "eval_recall": 0.9360371992677824,
96
+ "eval_runtime": 69.7532,
97
+ "eval_samples_per_second": 143.062,
98
+ "eval_steps_per_second": 8.946,
99
+ "step": 500
100
+ },
101
+ {
102
+ "epoch": 0.24,
103
+ "learning_rate": 1.7595190380761523e-05,
104
+ "loss": 0.1252,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 0.24,
109
+ "eval_accuracy": 0.9404364975592747,
110
+ "eval_f1": 0.9404364975592747,
111
+ "eval_loss": 0.11517041176557541,
112
+ "eval_precision": 0.9404364975592747,
113
+ "eval_recall": 0.9404364975592747,
114
+ "eval_runtime": 69.595,
115
+ "eval_samples_per_second": 143.387,
116
+ "eval_steps_per_second": 8.966,
117
+ "step": 600
118
+ },
119
+ {
120
+ "epoch": 0.28,
121
+ "learning_rate": 1.719438877755511e-05,
122
+ "loss": 0.1201,
123
+ "step": 700
124
+ },
125
+ {
126
+ "epoch": 0.28,
127
+ "eval_accuracy": 0.9426865411436541,
128
+ "eval_f1": 0.9426865411436541,
129
+ "eval_loss": 0.10682988166809082,
130
+ "eval_precision": 0.9426865411436541,
131
+ "eval_recall": 0.9426865411436541,
132
+ "eval_runtime": 70.4798,
133
+ "eval_samples_per_second": 141.587,
134
+ "eval_steps_per_second": 8.854,
135
+ "step": 700
136
+ },
137
+ {
138
+ "epoch": 0.32,
139
+ "learning_rate": 1.6793587174348697e-05,
140
+ "loss": 0.1192,
141
+ "step": 800
142
+ },
143
+ {
144
+ "epoch": 0.32,
145
+ "eval_accuracy": 0.9439395920502092,
146
+ "eval_f1": 0.9439395920502092,
147
+ "eval_loss": 0.10515566915273666,
148
+ "eval_precision": 0.9439395920502092,
149
+ "eval_recall": 0.9439395920502092,
150
+ "eval_runtime": 69.7899,
151
+ "eval_samples_per_second": 142.986,
152
+ "eval_steps_per_second": 8.941,
153
+ "step": 800
154
+ },
155
+ {
156
+ "epoch": 0.36,
157
+ "learning_rate": 1.6392785571142287e-05,
158
+ "loss": 0.1124,
159
+ "step": 900
160
+ },
161
+ {
162
+ "epoch": 0.36,
163
+ "eval_accuracy": 0.9446532862622036,
164
+ "eval_f1": 0.9446532862622036,
165
+ "eval_loss": 0.10284145176410675,
166
+ "eval_precision": 0.9446532862622036,
167
+ "eval_recall": 0.9446532862622036,
168
+ "eval_runtime": 69.8472,
169
+ "eval_samples_per_second": 142.869,
170
+ "eval_steps_per_second": 8.934,
171
+ "step": 900
172
+ },
173
+ {
174
+ "epoch": 0.4,
175
+ "learning_rate": 1.5991983967935874e-05,
176
+ "loss": 0.109,
177
+ "step": 1000
178
+ },
179
+ {
180
+ "epoch": 0.4,
181
+ "eval_accuracy": 0.9458845449790795,
182
+ "eval_f1": 0.9458845449790795,
183
+ "eval_loss": 0.0995732992887497,
184
+ "eval_precision": 0.9458845449790795,
185
+ "eval_recall": 0.9458845449790795,
186
+ "eval_runtime": 69.7698,
187
+ "eval_samples_per_second": 143.028,
188
+ "eval_steps_per_second": 8.944,
189
+ "step": 1000
190
+ },
191
+ {
192
+ "epoch": 0.44,
193
+ "learning_rate": 1.559118236472946e-05,
194
+ "loss": 0.1026,
195
+ "step": 1100
196
+ },
197
+ {
198
+ "epoch": 0.44,
199
+ "eval_accuracy": 0.946009850069735,
200
+ "eval_f1": 0.946009850069735,
201
+ "eval_loss": 0.09745196253061295,
202
+ "eval_precision": 0.946009850069735,
203
+ "eval_recall": 0.946009850069735,
204
+ "eval_runtime": 70.3509,
205
+ "eval_samples_per_second": 141.846,
206
+ "eval_steps_per_second": 8.87,
207
+ "step": 1100
208
+ },
209
+ {
210
+ "epoch": 0.48,
211
+ "learning_rate": 1.5190380761523047e-05,
212
+ "loss": 0.0984,
213
+ "step": 1200
214
+ },
215
+ {
216
+ "epoch": 0.48,
217
+ "eval_accuracy": 0.9475189592050209,
218
+ "eval_f1": 0.9475189592050209,
219
+ "eval_loss": 0.09556742012500763,
220
+ "eval_precision": 0.9475189592050209,
221
+ "eval_recall": 0.9475189592050209,
222
+ "eval_runtime": 70.7974,
223
+ "eval_samples_per_second": 140.951,
224
+ "eval_steps_per_second": 8.814,
225
+ "step": 1200
226
+ },
227
+ {
228
+ "epoch": 0.52,
229
+ "learning_rate": 1.4789579158316633e-05,
230
+ "loss": 0.1014,
231
+ "step": 1300
232
+ },
233
+ {
234
+ "epoch": 0.52,
235
+ "eval_accuracy": 0.9479520789748954,
236
+ "eval_f1": 0.9479520789748954,
237
+ "eval_loss": 0.09433256834745407,
238
+ "eval_precision": 0.9479520789748954,
239
+ "eval_recall": 0.9479520789748954,
240
+ "eval_runtime": 69.9794,
241
+ "eval_samples_per_second": 142.599,
242
+ "eval_steps_per_second": 8.917,
243
+ "step": 1300
244
+ },
245
+ {
246
+ "epoch": 0.56,
247
+ "learning_rate": 1.4388777555110222e-05,
248
+ "loss": 0.1085,
249
+ "step": 1400
250
+ },
251
+ {
252
+ "epoch": 0.56,
253
+ "eval_accuracy": 0.9476878486750349,
254
+ "eval_f1": 0.9476878486750349,
255
+ "eval_loss": 0.0909435972571373,
256
+ "eval_precision": 0.9476878486750349,
257
+ "eval_recall": 0.9476878486750349,
258
+ "eval_runtime": 70.3051,
259
+ "eval_samples_per_second": 141.939,
260
+ "eval_steps_per_second": 8.876,
261
+ "step": 1400
262
+ },
263
+ {
264
+ "epoch": 0.6,
265
+ "learning_rate": 1.3987975951903809e-05,
266
+ "loss": 0.0965,
267
+ "step": 1500
268
+ },
269
+ {
270
+ "epoch": 0.6,
271
+ "eval_accuracy": 0.9481454846582985,
272
+ "eval_f1": 0.9481454846582985,
273
+ "eval_loss": 0.09130043536424637,
274
+ "eval_precision": 0.9481454846582985,
275
+ "eval_recall": 0.9481454846582985,
276
+ "eval_runtime": 70.3437,
277
+ "eval_samples_per_second": 141.861,
278
+ "eval_steps_per_second": 8.871,
279
+ "step": 1500
280
+ }
281
+ ],
282
+ "logging_steps": 100,
283
+ "max_steps": 4990,
284
+ "num_input_tokens_seen": 0,
285
+ "num_train_epochs": 2,
286
+ "save_steps": 500,
287
+ "total_flos": 3136354762752000.0,
288
+ "train_batch_size": 16,
289
+ "trial_name": null,
290
+ "trial_params": null
291
+ }
distilbert-base-cased/checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da0d85271a02ae6175d7c1e0b4bc6de536afd6c48c218ee02c7929cf1116aefd
3
+ size 4728
distilbert-base-cased/checkpoint-1500/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
distilbert-base-cased/checkpoint-2000/config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-cased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForTokenClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "B-ProductNameEn",
13
+ "1": "I-ProductNameEn",
14
+ "2": "B-TradeMarkEn",
15
+ "3": "I-TradeMarkEn",
16
+ "4": "B-Country",
17
+ "5": "I-Country",
18
+ "6": "B-HSCode",
19
+ "7": "I-HSCode",
20
+ "8": "B-HSCodeEn",
21
+ "9": "I-HSCodeEn",
22
+ "10": "B-ManufacturerEn",
23
+ "11": "I-ManufacturerEn",
24
+ "12": "B-ModelNo",
25
+ "13": "I-ModelNo"
26
+ },
27
+ "initializer_range": 0.02,
28
+ "label2id": {
29
+ "B-Country": 4,
30
+ "B-HSCode": 6,
31
+ "B-HSCodeEn": 8,
32
+ "B-ManufacturerEn": 10,
33
+ "B-ModelNo": 12,
34
+ "B-ProductNameEn": 0,
35
+ "B-TradeMarkEn": 2,
36
+ "I-Country": 5,
37
+ "I-HSCode": 7,
38
+ "I-HSCodeEn": 9,
39
+ "I-ManufacturerEn": 11,
40
+ "I-ModelNo": 13,
41
+ "I-ProductNameEn": 1,
42
+ "I-TradeMarkEn": 3
43
+ },
44
+ "max_position_embeddings": 512,
45
+ "model_type": "distilbert",
46
+ "n_heads": 12,
47
+ "n_layers": 6,
48
+ "output_past": true,
49
+ "pad_token_id": 0,
50
+ "qa_dropout": 0.1,
51
+ "seq_classif_dropout": 0.2,
52
+ "sinusoidal_pos_embds": false,
53
+ "tie_weights_": true,
54
+ "torch_dtype": "float32",
55
+ "transformers_version": "4.37.2",
56
+ "vocab_size": 28996
57
+ }
distilbert-base-cased/checkpoint-2000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb1d0e486f282b7b882540956eaf17cb71ce9934d2f49b0693c033b4d492f20e
3
+ size 260819048
distilbert-base-cased/checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cef26972582a00dfe2ca3ec2807b32f1300a86441592251f00354b776d92c83c
3
+ size 521698874
distilbert-base-cased/checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c1b4f86230697a32c1a7490e542f2f67b5a4a1f21d67b1ae0d148402158d8a9
3
+ size 14244
distilbert-base-cased/checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:701e37da4aab50072e3b78d31cf7207a36ae829c3754a5d40cf3adffe1cc8713
3
+ size 1064
distilbert-base-cased/checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
distilbert-base-cased/checkpoint-2000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
distilbert-base-cased/checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
distilbert-base-cased/checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,381 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.08437436819076538,
3
+ "best_model_checkpoint": "JayR7/distilbert-base-cased/checkpoint-2000",
4
+ "epoch": 0.8016032064128257,
5
+ "eval_steps": 100,
6
+ "global_step": 2000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04,
13
+ "learning_rate": 1.959919839679359e-05,
14
+ "loss": 0.7657,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.04,
19
+ "eval_accuracy": 0.891240629358438,
20
+ "eval_f1": 0.891240629358438,
21
+ "eval_loss": 0.2461320161819458,
22
+ "eval_precision": 0.891240629358438,
23
+ "eval_recall": 0.891240629358438,
24
+ "eval_runtime": 70.2088,
25
+ "eval_samples_per_second": 142.133,
26
+ "eval_steps_per_second": 8.888,
27
+ "step": 100
28
+ },
29
+ {
30
+ "epoch": 0.08,
31
+ "learning_rate": 1.9198396793587175e-05,
32
+ "loss": 0.2295,
33
+ "step": 200
34
+ },
35
+ {
36
+ "epoch": 0.08,
37
+ "eval_accuracy": 0.9192844534518828,
38
+ "eval_f1": 0.9192844534518828,
39
+ "eval_loss": 0.17861397564411163,
40
+ "eval_precision": 0.9192844534518828,
41
+ "eval_recall": 0.9192844534518828,
42
+ "eval_runtime": 70.0566,
43
+ "eval_samples_per_second": 142.442,
44
+ "eval_steps_per_second": 8.907,
45
+ "step": 200
46
+ },
47
+ {
48
+ "epoch": 0.12,
49
+ "learning_rate": 1.8797595190380762e-05,
50
+ "loss": 0.1704,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.12,
55
+ "eval_accuracy": 0.9293279288702929,
56
+ "eval_f1": 0.9293279288702929,
57
+ "eval_loss": 0.1497383564710617,
58
+ "eval_precision": 0.9293279288702929,
59
+ "eval_recall": 0.9293279288702929,
60
+ "eval_runtime": 69.6477,
61
+ "eval_samples_per_second": 143.278,
62
+ "eval_steps_per_second": 8.959,
63
+ "step": 300
64
+ },
65
+ {
66
+ "epoch": 0.16,
67
+ "learning_rate": 1.839679358717435e-05,
68
+ "loss": 0.1618,
69
+ "step": 400
70
+ },
71
+ {
72
+ "epoch": 0.16,
73
+ "eval_accuracy": 0.9359854428172942,
74
+ "eval_f1": 0.9359854428172942,
75
+ "eval_loss": 0.12991881370544434,
76
+ "eval_precision": 0.9359854428172942,
77
+ "eval_recall": 0.9359854428172942,
78
+ "eval_runtime": 70.1002,
79
+ "eval_samples_per_second": 142.353,
80
+ "eval_steps_per_second": 8.902,
81
+ "step": 400
82
+ },
83
+ {
84
+ "epoch": 0.2,
85
+ "learning_rate": 1.7995991983967936e-05,
86
+ "loss": 0.1366,
87
+ "step": 500
88
+ },
89
+ {
90
+ "epoch": 0.2,
91
+ "eval_accuracy": 0.9360371992677824,
92
+ "eval_f1": 0.9360371992677824,
93
+ "eval_loss": 0.13222643733024597,
94
+ "eval_precision": 0.9360371992677824,
95
+ "eval_recall": 0.9360371992677824,
96
+ "eval_runtime": 69.7532,
97
+ "eval_samples_per_second": 143.062,
98
+ "eval_steps_per_second": 8.946,
99
+ "step": 500
100
+ },
101
+ {
102
+ "epoch": 0.24,
103
+ "learning_rate": 1.7595190380761523e-05,
104
+ "loss": 0.1252,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 0.24,
109
+ "eval_accuracy": 0.9404364975592747,
110
+ "eval_f1": 0.9404364975592747,
111
+ "eval_loss": 0.11517041176557541,
112
+ "eval_precision": 0.9404364975592747,
113
+ "eval_recall": 0.9404364975592747,
114
+ "eval_runtime": 69.595,
115
+ "eval_samples_per_second": 143.387,
116
+ "eval_steps_per_second": 8.966,
117
+ "step": 600
118
+ },
119
+ {
120
+ "epoch": 0.28,
121
+ "learning_rate": 1.719438877755511e-05,
122
+ "loss": 0.1201,
123
+ "step": 700
124
+ },
125
+ {
126
+ "epoch": 0.28,
127
+ "eval_accuracy": 0.9426865411436541,
128
+ "eval_f1": 0.9426865411436541,
129
+ "eval_loss": 0.10682988166809082,
130
+ "eval_precision": 0.9426865411436541,
131
+ "eval_recall": 0.9426865411436541,
132
+ "eval_runtime": 70.4798,
133
+ "eval_samples_per_second": 141.587,
134
+ "eval_steps_per_second": 8.854,
135
+ "step": 700
136
+ },
137
+ {
138
+ "epoch": 0.32,
139
+ "learning_rate": 1.6793587174348697e-05,
140
+ "loss": 0.1192,
141
+ "step": 800
142
+ },
143
+ {
144
+ "epoch": 0.32,
145
+ "eval_accuracy": 0.9439395920502092,
146
+ "eval_f1": 0.9439395920502092,
147
+ "eval_loss": 0.10515566915273666,
148
+ "eval_precision": 0.9439395920502092,
149
+ "eval_recall": 0.9439395920502092,
150
+ "eval_runtime": 69.7899,
151
+ "eval_samples_per_second": 142.986,
152
+ "eval_steps_per_second": 8.941,
153
+ "step": 800
154
+ },
155
+ {
156
+ "epoch": 0.36,
157
+ "learning_rate": 1.6392785571142287e-05,
158
+ "loss": 0.1124,
159
+ "step": 900
160
+ },
161
+ {
162
+ "epoch": 0.36,
163
+ "eval_accuracy": 0.9446532862622036,
164
+ "eval_f1": 0.9446532862622036,
165
+ "eval_loss": 0.10284145176410675,
166
+ "eval_precision": 0.9446532862622036,
167
+ "eval_recall": 0.9446532862622036,
168
+ "eval_runtime": 69.8472,
169
+ "eval_samples_per_second": 142.869,
170
+ "eval_steps_per_second": 8.934,
171
+ "step": 900
172
+ },
173
+ {
174
+ "epoch": 0.4,
175
+ "learning_rate": 1.5991983967935874e-05,
176
+ "loss": 0.109,
177
+ "step": 1000
178
+ },
179
+ {
180
+ "epoch": 0.4,
181
+ "eval_accuracy": 0.9458845449790795,
182
+ "eval_f1": 0.9458845449790795,
183
+ "eval_loss": 0.0995732992887497,
184
+ "eval_precision": 0.9458845449790795,
185
+ "eval_recall": 0.9458845449790795,
186
+ "eval_runtime": 69.7698,
187
+ "eval_samples_per_second": 143.028,
188
+ "eval_steps_per_second": 8.944,
189
+ "step": 1000
190
+ },
191
+ {
192
+ "epoch": 0.44,
193
+ "learning_rate": 1.559118236472946e-05,
194
+ "loss": 0.1026,
195
+ "step": 1100
196
+ },
197
+ {
198
+ "epoch": 0.44,
199
+ "eval_accuracy": 0.946009850069735,
200
+ "eval_f1": 0.946009850069735,
201
+ "eval_loss": 0.09745196253061295,
202
+ "eval_precision": 0.946009850069735,
203
+ "eval_recall": 0.946009850069735,
204
+ "eval_runtime": 70.3509,
205
+ "eval_samples_per_second": 141.846,
206
+ "eval_steps_per_second": 8.87,
207
+ "step": 1100
208
+ },
209
+ {
210
+ "epoch": 0.48,
211
+ "learning_rate": 1.5190380761523047e-05,
212
+ "loss": 0.0984,
213
+ "step": 1200
214
+ },
215
+ {
216
+ "epoch": 0.48,
217
+ "eval_accuracy": 0.9475189592050209,
218
+ "eval_f1": 0.9475189592050209,
219
+ "eval_loss": 0.09556742012500763,
220
+ "eval_precision": 0.9475189592050209,
221
+ "eval_recall": 0.9475189592050209,
222
+ "eval_runtime": 70.7974,
223
+ "eval_samples_per_second": 140.951,
224
+ "eval_steps_per_second": 8.814,
225
+ "step": 1200
226
+ },
227
+ {
228
+ "epoch": 0.52,
229
+ "learning_rate": 1.4789579158316633e-05,
230
+ "loss": 0.1014,
231
+ "step": 1300
232
+ },
233
+ {
234
+ "epoch": 0.52,
235
+ "eval_accuracy": 0.9479520789748954,
236
+ "eval_f1": 0.9479520789748954,
237
+ "eval_loss": 0.09433256834745407,
238
+ "eval_precision": 0.9479520789748954,
239
+ "eval_recall": 0.9479520789748954,
240
+ "eval_runtime": 69.9794,
241
+ "eval_samples_per_second": 142.599,
242
+ "eval_steps_per_second": 8.917,
243
+ "step": 1300
244
+ },
245
+ {
246
+ "epoch": 0.56,
247
+ "learning_rate": 1.4388777555110222e-05,
248
+ "loss": 0.1085,
249
+ "step": 1400
250
+ },
251
+ {
252
+ "epoch": 0.56,
253
+ "eval_accuracy": 0.9476878486750349,
254
+ "eval_f1": 0.9476878486750349,
255
+ "eval_loss": 0.0909435972571373,
256
+ "eval_precision": 0.9476878486750349,
257
+ "eval_recall": 0.9476878486750349,
258
+ "eval_runtime": 70.3051,
259
+ "eval_samples_per_second": 141.939,
260
+ "eval_steps_per_second": 8.876,
261
+ "step": 1400
262
+ },
263
+ {
264
+ "epoch": 0.6,
265
+ "learning_rate": 1.3987975951903809e-05,
266
+ "loss": 0.0965,
267
+ "step": 1500
268
+ },
269
+ {
270
+ "epoch": 0.6,
271
+ "eval_accuracy": 0.9481454846582985,
272
+ "eval_f1": 0.9481454846582985,
273
+ "eval_loss": 0.09130043536424637,
274
+ "eval_precision": 0.9481454846582985,
275
+ "eval_recall": 0.9481454846582985,
276
+ "eval_runtime": 70.3437,
277
+ "eval_samples_per_second": 141.861,
278
+ "eval_steps_per_second": 8.871,
279
+ "step": 1500
280
+ },
281
+ {
282
+ "epoch": 0.64,
283
+ "learning_rate": 1.3587174348697396e-05,
284
+ "loss": 0.0932,
285
+ "step": 1600
286
+ },
287
+ {
288
+ "epoch": 0.64,
289
+ "eval_accuracy": 0.948521399930265,
290
+ "eval_f1": 0.948521399930265,
291
+ "eval_loss": 0.08914197236299515,
292
+ "eval_precision": 0.948521399930265,
293
+ "eval_recall": 0.948521399930265,
294
+ "eval_runtime": 70.1844,
295
+ "eval_samples_per_second": 142.183,
296
+ "eval_steps_per_second": 8.891,
297
+ "step": 1600
298
+ },
299
+ {
300
+ "epoch": 0.68,
301
+ "learning_rate": 1.3186372745490983e-05,
302
+ "loss": 0.0948,
303
+ "step": 1700
304
+ },
305
+ {
306
+ "epoch": 0.68,
307
+ "eval_accuracy": 0.9489163833682008,
308
+ "eval_f1": 0.9489163833682008,
309
+ "eval_loss": 0.08800023049116135,
310
+ "eval_precision": 0.9489163833682008,
311
+ "eval_recall": 0.9489163833682008,
312
+ "eval_runtime": 70.4997,
313
+ "eval_samples_per_second": 141.547,
314
+ "eval_steps_per_second": 8.851,
315
+ "step": 1700
316
+ },
317
+ {
318
+ "epoch": 0.72,
319
+ "learning_rate": 1.278557114228457e-05,
320
+ "loss": 0.0922,
321
+ "step": 1800
322
+ },
323
+ {
324
+ "epoch": 0.72,
325
+ "eval_accuracy": 0.9492814025453278,
326
+ "eval_f1": 0.9492814025453278,
327
+ "eval_loss": 0.08633574843406677,
328
+ "eval_precision": 0.9492814025453278,
329
+ "eval_recall": 0.9492814025453278,
330
+ "eval_runtime": 70.4874,
331
+ "eval_samples_per_second": 141.571,
332
+ "eval_steps_per_second": 8.853,
333
+ "step": 1800
334
+ },
335
+ {
336
+ "epoch": 0.76,
337
+ "learning_rate": 1.2384769539078157e-05,
338
+ "loss": 0.0889,
339
+ "step": 1900
340
+ },
341
+ {
342
+ "epoch": 0.76,
343
+ "eval_accuracy": 0.9499106520223152,
344
+ "eval_f1": 0.9499106520223152,
345
+ "eval_loss": 0.08566854894161224,
346
+ "eval_precision": 0.9499106520223152,
347
+ "eval_recall": 0.9499106520223152,
348
+ "eval_runtime": 70.5777,
349
+ "eval_samples_per_second": 141.39,
350
+ "eval_steps_per_second": 8.841,
351
+ "step": 1900
352
+ },
353
+ {
354
+ "epoch": 0.8,
355
+ "learning_rate": 1.1983967935871745e-05,
356
+ "loss": 0.0864,
357
+ "step": 2000
358
+ },
359
+ {
360
+ "epoch": 0.8,
361
+ "eval_accuracy": 0.9501558141562064,
362
+ "eval_f1": 0.9501558141562064,
363
+ "eval_loss": 0.08437436819076538,
364
+ "eval_precision": 0.9501558141562064,
365
+ "eval_recall": 0.9501558141562064,
366
+ "eval_runtime": 70.1921,
367
+ "eval_samples_per_second": 142.167,
368
+ "eval_steps_per_second": 8.89,
369
+ "step": 2000
370
+ }
371
+ ],
372
+ "logging_steps": 100,
373
+ "max_steps": 4990,
374
+ "num_input_tokens_seen": 0,
375
+ "num_train_epochs": 2,
376
+ "save_steps": 500,
377
+ "total_flos": 4181806350336000.0,
378
+ "train_batch_size": 16,
379
+ "trial_name": null,
380
+ "trial_params": null
381
+ }
distilbert-base-cased/checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da0d85271a02ae6175d7c1e0b4bc6de536afd6c48c218ee02c7929cf1116aefd
3
+ size 4728
distilbert-base-cased/checkpoint-2000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
distilbert-base-cased/checkpoint-2500/config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-cased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForTokenClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "B-ProductNameEn",
13
+ "1": "I-ProductNameEn",
14
+ "2": "B-TradeMarkEn",
15
+ "3": "I-TradeMarkEn",
16
+ "4": "B-Country",
17
+ "5": "I-Country",
18
+ "6": "B-HSCode",
19
+ "7": "I-HSCode",
20
+ "8": "B-HSCodeEn",
21
+ "9": "I-HSCodeEn",
22
+ "10": "B-ManufacturerEn",
23
+ "11": "I-ManufacturerEn",
24
+ "12": "B-ModelNo",
25
+ "13": "I-ModelNo"
26
+ },
27
+ "initializer_range": 0.02,
28
+ "label2id": {
29
+ "B-Country": 4,
30
+ "B-HSCode": 6,
31
+ "B-HSCodeEn": 8,
32
+ "B-ManufacturerEn": 10,
33
+ "B-ModelNo": 12,
34
+ "B-ProductNameEn": 0,
35
+ "B-TradeMarkEn": 2,
36
+ "I-Country": 5,
37
+ "I-HSCode": 7,
38
+ "I-HSCodeEn": 9,
39
+ "I-ManufacturerEn": 11,
40
+ "I-ModelNo": 13,
41
+ "I-ProductNameEn": 1,
42
+ "I-TradeMarkEn": 3
43
+ },
44
+ "max_position_embeddings": 512,
45
+ "model_type": "distilbert",
46
+ "n_heads": 12,
47
+ "n_layers": 6,
48
+ "output_past": true,
49
+ "pad_token_id": 0,
50
+ "qa_dropout": 0.1,
51
+ "seq_classif_dropout": 0.2,
52
+ "sinusoidal_pos_embds": false,
53
+ "tie_weights_": true,
54
+ "torch_dtype": "float32",
55
+ "transformers_version": "4.37.2",
56
+ "vocab_size": 28996
57
+ }
distilbert-base-cased/checkpoint-2500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf578dd14247b7f6e066dd1dd1afd7f2865087668233d3007d095926f203fe83
3
+ size 260819048
distilbert-base-cased/checkpoint-2500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be9f0d39c84adc96212fc18c7068eaa2b0086ba8f55d4ba58f5cf98eee28ac04
3
+ size 521698874
distilbert-base-cased/checkpoint-2500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82ba329afed7cc48d5d95b8d6fd8c61cbd6f211c85874cf3035e095a6ed44041
3
+ size 14244
distilbert-base-cased/checkpoint-2500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f892d31cbfc307076f6d29d3fc084241cbfad373ec3833bbcf5185f53ee5763c
3
+ size 1064
distilbert-base-cased/checkpoint-2500/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
distilbert-base-cased/checkpoint-2500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
distilbert-base-cased/checkpoint-2500/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
distilbert-base-cased/checkpoint-2500/trainer_state.json ADDED
@@ -0,0 +1,471 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.08178979158401489,
3
+ "best_model_checkpoint": "JayR7/distilbert-base-cased/checkpoint-2500",
4
+ "epoch": 1.002004008016032,
5
+ "eval_steps": 100,
6
+ "global_step": 2500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04,
13
+ "learning_rate": 1.959919839679359e-05,
14
+ "loss": 0.7657,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.04,
19
+ "eval_accuracy": 0.891240629358438,
20
+ "eval_f1": 0.891240629358438,
21
+ "eval_loss": 0.2461320161819458,
22
+ "eval_precision": 0.891240629358438,
23
+ "eval_recall": 0.891240629358438,
24
+ "eval_runtime": 70.2088,
25
+ "eval_samples_per_second": 142.133,
26
+ "eval_steps_per_second": 8.888,
27
+ "step": 100
28
+ },
29
+ {
30
+ "epoch": 0.08,
31
+ "learning_rate": 1.9198396793587175e-05,
32
+ "loss": 0.2295,
33
+ "step": 200
34
+ },
35
+ {
36
+ "epoch": 0.08,
37
+ "eval_accuracy": 0.9192844534518828,
38
+ "eval_f1": 0.9192844534518828,
39
+ "eval_loss": 0.17861397564411163,
40
+ "eval_precision": 0.9192844534518828,
41
+ "eval_recall": 0.9192844534518828,
42
+ "eval_runtime": 70.0566,
43
+ "eval_samples_per_second": 142.442,
44
+ "eval_steps_per_second": 8.907,
45
+ "step": 200
46
+ },
47
+ {
48
+ "epoch": 0.12,
49
+ "learning_rate": 1.8797595190380762e-05,
50
+ "loss": 0.1704,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.12,
55
+ "eval_accuracy": 0.9293279288702929,
56
+ "eval_f1": 0.9293279288702929,
57
+ "eval_loss": 0.1497383564710617,
58
+ "eval_precision": 0.9293279288702929,
59
+ "eval_recall": 0.9293279288702929,
60
+ "eval_runtime": 69.6477,
61
+ "eval_samples_per_second": 143.278,
62
+ "eval_steps_per_second": 8.959,
63
+ "step": 300
64
+ },
65
+ {
66
+ "epoch": 0.16,
67
+ "learning_rate": 1.839679358717435e-05,
68
+ "loss": 0.1618,
69
+ "step": 400
70
+ },
71
+ {
72
+ "epoch": 0.16,
73
+ "eval_accuracy": 0.9359854428172942,
74
+ "eval_f1": 0.9359854428172942,
75
+ "eval_loss": 0.12991881370544434,
76
+ "eval_precision": 0.9359854428172942,
77
+ "eval_recall": 0.9359854428172942,
78
+ "eval_runtime": 70.1002,
79
+ "eval_samples_per_second": 142.353,
80
+ "eval_steps_per_second": 8.902,
81
+ "step": 400
82
+ },
83
+ {
84
+ "epoch": 0.2,
85
+ "learning_rate": 1.7995991983967936e-05,
86
+ "loss": 0.1366,
87
+ "step": 500
88
+ },
89
+ {
90
+ "epoch": 0.2,
91
+ "eval_accuracy": 0.9360371992677824,
92
+ "eval_f1": 0.9360371992677824,
93
+ "eval_loss": 0.13222643733024597,
94
+ "eval_precision": 0.9360371992677824,
95
+ "eval_recall": 0.9360371992677824,
96
+ "eval_runtime": 69.7532,
97
+ "eval_samples_per_second": 143.062,
98
+ "eval_steps_per_second": 8.946,
99
+ "step": 500
100
+ },
101
+ {
102
+ "epoch": 0.24,
103
+ "learning_rate": 1.7595190380761523e-05,
104
+ "loss": 0.1252,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 0.24,
109
+ "eval_accuracy": 0.9404364975592747,
110
+ "eval_f1": 0.9404364975592747,
111
+ "eval_loss": 0.11517041176557541,
112
+ "eval_precision": 0.9404364975592747,
113
+ "eval_recall": 0.9404364975592747,
114
+ "eval_runtime": 69.595,
115
+ "eval_samples_per_second": 143.387,
116
+ "eval_steps_per_second": 8.966,
117
+ "step": 600
118
+ },
119
+ {
120
+ "epoch": 0.28,
121
+ "learning_rate": 1.719438877755511e-05,
122
+ "loss": 0.1201,
123
+ "step": 700
124
+ },
125
+ {
126
+ "epoch": 0.28,
127
+ "eval_accuracy": 0.9426865411436541,
128
+ "eval_f1": 0.9426865411436541,
129
+ "eval_loss": 0.10682988166809082,
130
+ "eval_precision": 0.9426865411436541,
131
+ "eval_recall": 0.9426865411436541,
132
+ "eval_runtime": 70.4798,
133
+ "eval_samples_per_second": 141.587,
134
+ "eval_steps_per_second": 8.854,
135
+ "step": 700
136
+ },
137
+ {
138
+ "epoch": 0.32,
139
+ "learning_rate": 1.6793587174348697e-05,
140
+ "loss": 0.1192,
141
+ "step": 800
142
+ },
143
+ {
144
+ "epoch": 0.32,
145
+ "eval_accuracy": 0.9439395920502092,
146
+ "eval_f1": 0.9439395920502092,
147
+ "eval_loss": 0.10515566915273666,
148
+ "eval_precision": 0.9439395920502092,
149
+ "eval_recall": 0.9439395920502092,
150
+ "eval_runtime": 69.7899,
151
+ "eval_samples_per_second": 142.986,
152
+ "eval_steps_per_second": 8.941,
153
+ "step": 800
154
+ },
155
+ {
156
+ "epoch": 0.36,
157
+ "learning_rate": 1.6392785571142287e-05,
158
+ "loss": 0.1124,
159
+ "step": 900
160
+ },
161
+ {
162
+ "epoch": 0.36,
163
+ "eval_accuracy": 0.9446532862622036,
164
+ "eval_f1": 0.9446532862622036,
165
+ "eval_loss": 0.10284145176410675,
166
+ "eval_precision": 0.9446532862622036,
167
+ "eval_recall": 0.9446532862622036,
168
+ "eval_runtime": 69.8472,
169
+ "eval_samples_per_second": 142.869,
170
+ "eval_steps_per_second": 8.934,
171
+ "step": 900
172
+ },
173
+ {
174
+ "epoch": 0.4,
175
+ "learning_rate": 1.5991983967935874e-05,
176
+ "loss": 0.109,
177
+ "step": 1000
178
+ },
179
+ {
180
+ "epoch": 0.4,
181
+ "eval_accuracy": 0.9458845449790795,
182
+ "eval_f1": 0.9458845449790795,
183
+ "eval_loss": 0.0995732992887497,
184
+ "eval_precision": 0.9458845449790795,
185
+ "eval_recall": 0.9458845449790795,
186
+ "eval_runtime": 69.7698,
187
+ "eval_samples_per_second": 143.028,
188
+ "eval_steps_per_second": 8.944,
189
+ "step": 1000
190
+ },
191
+ {
192
+ "epoch": 0.44,
193
+ "learning_rate": 1.559118236472946e-05,
194
+ "loss": 0.1026,
195
+ "step": 1100
196
+ },
197
+ {
198
+ "epoch": 0.44,
199
+ "eval_accuracy": 0.946009850069735,
200
+ "eval_f1": 0.946009850069735,
201
+ "eval_loss": 0.09745196253061295,
202
+ "eval_precision": 0.946009850069735,
203
+ "eval_recall": 0.946009850069735,
204
+ "eval_runtime": 70.3509,
205
+ "eval_samples_per_second": 141.846,
206
+ "eval_steps_per_second": 8.87,
207
+ "step": 1100
208
+ },
209
+ {
210
+ "epoch": 0.48,
211
+ "learning_rate": 1.5190380761523047e-05,
212
+ "loss": 0.0984,
213
+ "step": 1200
214
+ },
215
+ {
216
+ "epoch": 0.48,
217
+ "eval_accuracy": 0.9475189592050209,
218
+ "eval_f1": 0.9475189592050209,
219
+ "eval_loss": 0.09556742012500763,
220
+ "eval_precision": 0.9475189592050209,
221
+ "eval_recall": 0.9475189592050209,
222
+ "eval_runtime": 70.7974,
223
+ "eval_samples_per_second": 140.951,
224
+ "eval_steps_per_second": 8.814,
225
+ "step": 1200
226
+ },
227
+ {
228
+ "epoch": 0.52,
229
+ "learning_rate": 1.4789579158316633e-05,
230
+ "loss": 0.1014,
231
+ "step": 1300
232
+ },
233
+ {
234
+ "epoch": 0.52,
235
+ "eval_accuracy": 0.9479520789748954,
236
+ "eval_f1": 0.9479520789748954,
237
+ "eval_loss": 0.09433256834745407,
238
+ "eval_precision": 0.9479520789748954,
239
+ "eval_recall": 0.9479520789748954,
240
+ "eval_runtime": 69.9794,
241
+ "eval_samples_per_second": 142.599,
242
+ "eval_steps_per_second": 8.917,
243
+ "step": 1300
244
+ },
245
+ {
246
+ "epoch": 0.56,
247
+ "learning_rate": 1.4388777555110222e-05,
248
+ "loss": 0.1085,
249
+ "step": 1400
250
+ },
251
+ {
252
+ "epoch": 0.56,
253
+ "eval_accuracy": 0.9476878486750349,
254
+ "eval_f1": 0.9476878486750349,
255
+ "eval_loss": 0.0909435972571373,
256
+ "eval_precision": 0.9476878486750349,
257
+ "eval_recall": 0.9476878486750349,
258
+ "eval_runtime": 70.3051,
259
+ "eval_samples_per_second": 141.939,
260
+ "eval_steps_per_second": 8.876,
261
+ "step": 1400
262
+ },
263
+ {
264
+ "epoch": 0.6,
265
+ "learning_rate": 1.3987975951903809e-05,
266
+ "loss": 0.0965,
267
+ "step": 1500
268
+ },
269
+ {
270
+ "epoch": 0.6,
271
+ "eval_accuracy": 0.9481454846582985,
272
+ "eval_f1": 0.9481454846582985,
273
+ "eval_loss": 0.09130043536424637,
274
+ "eval_precision": 0.9481454846582985,
275
+ "eval_recall": 0.9481454846582985,
276
+ "eval_runtime": 70.3437,
277
+ "eval_samples_per_second": 141.861,
278
+ "eval_steps_per_second": 8.871,
279
+ "step": 1500
280
+ },
281
+ {
282
+ "epoch": 0.64,
283
+ "learning_rate": 1.3587174348697396e-05,
284
+ "loss": 0.0932,
285
+ "step": 1600
286
+ },
287
+ {
288
+ "epoch": 0.64,
289
+ "eval_accuracy": 0.948521399930265,
290
+ "eval_f1": 0.948521399930265,
291
+ "eval_loss": 0.08914197236299515,
292
+ "eval_precision": 0.948521399930265,
293
+ "eval_recall": 0.948521399930265,
294
+ "eval_runtime": 70.1844,
295
+ "eval_samples_per_second": 142.183,
296
+ "eval_steps_per_second": 8.891,
297
+ "step": 1600
298
+ },
299
+ {
300
+ "epoch": 0.68,
301
+ "learning_rate": 1.3186372745490983e-05,
302
+ "loss": 0.0948,
303
+ "step": 1700
304
+ },
305
+ {
306
+ "epoch": 0.68,
307
+ "eval_accuracy": 0.9489163833682008,
308
+ "eval_f1": 0.9489163833682008,
309
+ "eval_loss": 0.08800023049116135,
310
+ "eval_precision": 0.9489163833682008,
311
+ "eval_recall": 0.9489163833682008,
312
+ "eval_runtime": 70.4997,
313
+ "eval_samples_per_second": 141.547,
314
+ "eval_steps_per_second": 8.851,
315
+ "step": 1700
316
+ },
317
+ {
318
+ "epoch": 0.72,
319
+ "learning_rate": 1.278557114228457e-05,
320
+ "loss": 0.0922,
321
+ "step": 1800
322
+ },
323
+ {
324
+ "epoch": 0.72,
325
+ "eval_accuracy": 0.9492814025453278,
326
+ "eval_f1": 0.9492814025453278,
327
+ "eval_loss": 0.08633574843406677,
328
+ "eval_precision": 0.9492814025453278,
329
+ "eval_recall": 0.9492814025453278,
330
+ "eval_runtime": 70.4874,
331
+ "eval_samples_per_second": 141.571,
332
+ "eval_steps_per_second": 8.853,
333
+ "step": 1800
334
+ },
335
+ {
336
+ "epoch": 0.76,
337
+ "learning_rate": 1.2384769539078157e-05,
338
+ "loss": 0.0889,
339
+ "step": 1900
340
+ },
341
+ {
342
+ "epoch": 0.76,
343
+ "eval_accuracy": 0.9499106520223152,
344
+ "eval_f1": 0.9499106520223152,
345
+ "eval_loss": 0.08566854894161224,
346
+ "eval_precision": 0.9499106520223152,
347
+ "eval_recall": 0.9499106520223152,
348
+ "eval_runtime": 70.5777,
349
+ "eval_samples_per_second": 141.39,
350
+ "eval_steps_per_second": 8.841,
351
+ "step": 1900
352
+ },
353
+ {
354
+ "epoch": 0.8,
355
+ "learning_rate": 1.1983967935871745e-05,
356
+ "loss": 0.0864,
357
+ "step": 2000
358
+ },
359
+ {
360
+ "epoch": 0.8,
361
+ "eval_accuracy": 0.9501558141562064,
362
+ "eval_f1": 0.9501558141562064,
363
+ "eval_loss": 0.08437436819076538,
364
+ "eval_precision": 0.9501558141562064,
365
+ "eval_recall": 0.9501558141562064,
366
+ "eval_runtime": 70.1921,
367
+ "eval_samples_per_second": 142.167,
368
+ "eval_steps_per_second": 8.89,
369
+ "step": 2000
370
+ },
371
+ {
372
+ "epoch": 0.84,
373
+ "learning_rate": 1.1583166332665332e-05,
374
+ "loss": 0.0941,
375
+ "step": 2100
376
+ },
377
+ {
378
+ "epoch": 0.84,
379
+ "eval_accuracy": 0.9502593270571827,
380
+ "eval_f1": 0.9502593270571827,
381
+ "eval_loss": 0.08298122137784958,
382
+ "eval_precision": 0.9502593270571827,
383
+ "eval_recall": 0.9502593270571827,
384
+ "eval_runtime": 70.5911,
385
+ "eval_samples_per_second": 141.363,
386
+ "eval_steps_per_second": 8.84,
387
+ "step": 2100
388
+ },
389
+ {
390
+ "epoch": 0.88,
391
+ "learning_rate": 1.118236472945892e-05,
392
+ "loss": 0.0895,
393
+ "step": 2200
394
+ },
395
+ {
396
+ "epoch": 0.88,
397
+ "eval_accuracy": 0.9504554567642957,
398
+ "eval_f1": 0.9504554567642957,
399
+ "eval_loss": 0.08282341808080673,
400
+ "eval_precision": 0.9504554567642957,
401
+ "eval_recall": 0.9504554567642957,
402
+ "eval_runtime": 69.735,
403
+ "eval_samples_per_second": 143.099,
404
+ "eval_steps_per_second": 8.948,
405
+ "step": 2200
406
+ },
407
+ {
408
+ "epoch": 0.92,
409
+ "learning_rate": 1.0781563126252506e-05,
410
+ "loss": 0.088,
411
+ "step": 2300
412
+ },
413
+ {
414
+ "epoch": 0.92,
415
+ "eval_accuracy": 0.9506080020920502,
416
+ "eval_f1": 0.9506080020920502,
417
+ "eval_loss": 0.08267684280872345,
418
+ "eval_precision": 0.9506080020920502,
419
+ "eval_recall": 0.9506080020920502,
420
+ "eval_runtime": 69.7037,
421
+ "eval_samples_per_second": 143.163,
422
+ "eval_steps_per_second": 8.952,
423
+ "step": 2300
424
+ },
425
+ {
426
+ "epoch": 0.96,
427
+ "learning_rate": 1.0380761523046093e-05,
428
+ "loss": 0.0881,
429
+ "step": 2400
430
+ },
431
+ {
432
+ "epoch": 0.96,
433
+ "eval_accuracy": 0.9500359571129707,
434
+ "eval_f1": 0.9500359571129707,
435
+ "eval_loss": 0.08347389101982117,
436
+ "eval_precision": 0.9500359571129707,
437
+ "eval_recall": 0.9500359571129707,
438
+ "eval_runtime": 69.6797,
439
+ "eval_samples_per_second": 143.213,
440
+ "eval_steps_per_second": 8.955,
441
+ "step": 2400
442
+ },
443
+ {
444
+ "epoch": 1.0,
445
+ "learning_rate": 9.97995991983968e-06,
446
+ "loss": 0.0842,
447
+ "step": 2500
448
+ },
449
+ {
450
+ "epoch": 1.0,
451
+ "eval_accuracy": 0.9503955282426778,
452
+ "eval_f1": 0.9503955282426778,
453
+ "eval_loss": 0.08178979158401489,
454
+ "eval_precision": 0.9503955282426778,
455
+ "eval_recall": 0.9503955282426778,
456
+ "eval_runtime": 69.8247,
457
+ "eval_samples_per_second": 142.915,
458
+ "eval_steps_per_second": 8.937,
459
+ "step": 2500
460
+ }
461
+ ],
462
+ "logging_steps": 100,
463
+ "max_steps": 4990,
464
+ "num_input_tokens_seen": 0,
465
+ "num_train_epochs": 2,
466
+ "save_steps": 500,
467
+ "total_flos": 5226212486332416.0,
468
+ "train_batch_size": 16,
469
+ "trial_name": null,
470
+ "trial_params": null
471
+ }
distilbert-base-cased/checkpoint-2500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da0d85271a02ae6175d7c1e0b4bc6de536afd6c48c218ee02c7929cf1116aefd
3
+ size 4728
distilbert-base-cased/checkpoint-2500/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
distilbert-base-cased/checkpoint-3000/config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-cased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForTokenClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "B-ProductNameEn",
13
+ "1": "I-ProductNameEn",
14
+ "2": "B-TradeMarkEn",
15
+ "3": "I-TradeMarkEn",
16
+ "4": "B-Country",
17
+ "5": "I-Country",
18
+ "6": "B-HSCode",
19
+ "7": "I-HSCode",
20
+ "8": "B-HSCodeEn",
21
+ "9": "I-HSCodeEn",
22
+ "10": "B-ManufacturerEn",
23
+ "11": "I-ManufacturerEn",
24
+ "12": "B-ModelNo",
25
+ "13": "I-ModelNo"
26
+ },
27
+ "initializer_range": 0.02,
28
+ "label2id": {
29
+ "B-Country": 4,
30
+ "B-HSCode": 6,
31
+ "B-HSCodeEn": 8,
32
+ "B-ManufacturerEn": 10,
33
+ "B-ModelNo": 12,
34
+ "B-ProductNameEn": 0,
35
+ "B-TradeMarkEn": 2,
36
+ "I-Country": 5,
37
+ "I-HSCode": 7,
38
+ "I-HSCodeEn": 9,
39
+ "I-ManufacturerEn": 11,
40
+ "I-ModelNo": 13,
41
+ "I-ProductNameEn": 1,
42
+ "I-TradeMarkEn": 3
43
+ },
44
+ "max_position_embeddings": 512,
45
+ "model_type": "distilbert",
46
+ "n_heads": 12,
47
+ "n_layers": 6,
48
+ "output_past": true,
49
+ "pad_token_id": 0,
50
+ "qa_dropout": 0.1,
51
+ "seq_classif_dropout": 0.2,
52
+ "sinusoidal_pos_embds": false,
53
+ "tie_weights_": true,
54
+ "torch_dtype": "float32",
55
+ "transformers_version": "4.37.2",
56
+ "vocab_size": 28996
57
+ }
distilbert-base-cased/checkpoint-3000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8fc26d248cdaccab3b9bdfde50bc4e3178ee79de1b8fba73d7b43f9f8bdf9a0
3
+ size 260819048
distilbert-base-cased/checkpoint-3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21db337f10ab5cc3307b13972bb3b25b437944fac31364b422e8231b16beab50
3
+ size 521698874
distilbert-base-cased/checkpoint-3000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ba5fed01faf2db1d553b012b7f1ad7f7984ed16a93567e67404d6440f61714a
3
+ size 14244
distilbert-base-cased/checkpoint-3000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c96ccfbb993c3b6d42e2452b8de60ed9ffa995f21987cc81ea4e361a437e336
3
+ size 1064