UniversalAlgorithmic commited on
Commit
049a232
·
verified ·
1 Parent(s): 0e90673

Upload 16 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ qa/eval_nbest_predictions.json filter=lfs diff=lfs merge=lfs -text
qa/README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: google-bert/bert-base-uncased
5
+ tags:
6
+ - generated_from_trainer
7
+ datasets:
8
+ - squad
9
+ model-index:
10
+ - name: baseline
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # baseline
18
+
19
+ This model is a fine-tuned version of [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) on the squad dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 3e-05
39
+ - train_batch_size: 12
40
+ - eval_batch_size: 8
41
+ - seed: 42
42
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
+ - lr_scheduler_type: linear
44
+ - num_epochs: 2.0
45
+
46
+ ### Training results
47
+
48
+
49
+
50
+ ### Framework versions
51
+
52
+ - Transformers 4.49.0
53
+ - Pytorch 2.6.0+cu118
54
+ - Datasets 3.3.1
55
+ - Tokenizers 0.21.0
qa/all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_exact_match": 81.49479659413434,
4
+ "eval_f1": 88.62945564424126,
5
+ "eval_runtime": 61.0301,
6
+ "eval_samples": 10784,
7
+ "eval_samples_per_second": 176.7,
8
+ "eval_steps_per_second": 22.087,
9
+ "total_flos": 3.541929151120589e+16,
10
+ "train_loss": 1.148573803161563,
11
+ "train_runtime": 3245.3985,
12
+ "train_samples": 88524,
13
+ "train_samples_per_second": 54.554,
14
+ "train_steps_per_second": 4.546
15
+ }
qa/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-bert/bert-base-uncased",
3
+ "architectures": [
4
+ "BertForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.49.0",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
qa/eval_nbest_predictions.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b8d44953cbe0ce20d1d1b62b72e7adba18bf1dc81d055492e22bfa21ff46657
3
+ size 49596120
qa/eval_predictions.json ADDED
The diff for this file is too large to render. See raw diff
 
qa/eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_exact_match": 81.49479659413434,
4
+ "eval_f1": 88.62945564424126,
5
+ "eval_runtime": 61.0301,
6
+ "eval_samples": 10784,
7
+ "eval_samples_per_second": 176.7,
8
+ "eval_steps_per_second": 22.087
9
+ }
qa/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38003bd65e4bfa70dd16886f29af7ab00d1aa0ae4de191b0a7de4d7883d17dde
3
+ size 442683784
qa/runs/May15_03-24-14_cs-Precision-7960-Tower/events.out.tfevents.1747293859.cs-Precision-7960-Tower.147971.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36bfca6273a2422943de7b634cf75efd69b8e92079abe84df9e9c9e026d497f6
3
+ size 11535
qa/runs/May15_03-24-14_cs-Precision-7960-Tower/events.out.tfevents.1747297197.cs-Precision-7960-Tower.147971.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:259c79a03ba9c522b1fd728e92dae5cfc31c6cd73b2377d124749c83a0163910
3
+ size 412
qa/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
qa/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
qa/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
qa/train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "total_flos": 3.541929151120589e+16,
4
+ "train_loss": 1.148573803161563,
5
+ "train_runtime": 3245.3985,
6
+ "train_samples": 88524,
7
+ "train_samples_per_second": 54.554,
8
+ "train_steps_per_second": 4.546
9
+ }
qa/trainer_state.json ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 14754,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06777822963264199,
13
+ "grad_norm": 31.397275924682617,
14
+ "learning_rate": 2.8983326555510372e-05,
15
+ "loss": 2.7299,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.13555645926528398,
20
+ "grad_norm": 25.8492431640625,
21
+ "learning_rate": 2.796665311102074e-05,
22
+ "loss": 1.752,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.203334688897926,
27
+ "grad_norm": 29.627431869506836,
28
+ "learning_rate": 2.694997966653111e-05,
29
+ "loss": 1.5588,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.27111291853056796,
34
+ "grad_norm": 21.147193908691406,
35
+ "learning_rate": 2.593330622204148e-05,
36
+ "loss": 1.5014,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.33889114816321,
41
+ "grad_norm": 17.81966781616211,
42
+ "learning_rate": 2.491663277755185e-05,
43
+ "loss": 1.4768,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.406669377795852,
48
+ "grad_norm": 20.26822853088379,
49
+ "learning_rate": 2.389995933306222e-05,
50
+ "loss": 1.4064,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 0.47444760742849396,
55
+ "grad_norm": 16.216028213500977,
56
+ "learning_rate": 2.288328588857259e-05,
57
+ "loss": 1.3502,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.5422258370611359,
62
+ "grad_norm": 17.930505752563477,
63
+ "learning_rate": 2.1866612444082963e-05,
64
+ "loss": 1.3101,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 0.6100040666937779,
69
+ "grad_norm": 26.499574661254883,
70
+ "learning_rate": 2.084993899959333e-05,
71
+ "loss": 1.2922,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 0.67778229632642,
76
+ "grad_norm": 26.83368492126465,
77
+ "learning_rate": 1.9833265555103702e-05,
78
+ "loss": 1.3053,
79
+ "step": 5000
80
+ },
81
+ {
82
+ "epoch": 0.745560525959062,
83
+ "grad_norm": 22.85872459411621,
84
+ "learning_rate": 1.8816592110614073e-05,
85
+ "loss": 1.2555,
86
+ "step": 5500
87
+ },
88
+ {
89
+ "epoch": 0.813338755591704,
90
+ "grad_norm": 23.48080825805664,
91
+ "learning_rate": 1.779991866612444e-05,
92
+ "loss": 1.2068,
93
+ "step": 6000
94
+ },
95
+ {
96
+ "epoch": 0.8811169852243459,
97
+ "grad_norm": 20.919252395629883,
98
+ "learning_rate": 1.6783245221634812e-05,
99
+ "loss": 1.1991,
100
+ "step": 6500
101
+ },
102
+ {
103
+ "epoch": 0.9488952148569879,
104
+ "grad_norm": 23.9005126953125,
105
+ "learning_rate": 1.576657177714518e-05,
106
+ "loss": 1.2156,
107
+ "step": 7000
108
+ },
109
+ {
110
+ "epoch": 1.01667344448963,
111
+ "grad_norm": 22.660743713378906,
112
+ "learning_rate": 1.4749898332655551e-05,
113
+ "loss": 1.0827,
114
+ "step": 7500
115
+ },
116
+ {
117
+ "epoch": 1.0844516741222718,
118
+ "grad_norm": 25.28419303894043,
119
+ "learning_rate": 1.373322488816592e-05,
120
+ "loss": 0.8481,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 1.152229903754914,
125
+ "grad_norm": 14.510698318481445,
126
+ "learning_rate": 1.271655144367629e-05,
127
+ "loss": 0.872,
128
+ "step": 8500
129
+ },
130
+ {
131
+ "epoch": 1.2200081333875559,
132
+ "grad_norm": 29.12289810180664,
133
+ "learning_rate": 1.1699877999186661e-05,
134
+ "loss": 0.8375,
135
+ "step": 9000
136
+ },
137
+ {
138
+ "epoch": 1.287786363020198,
139
+ "grad_norm": 19.038454055786133,
140
+ "learning_rate": 1.0683204554697033e-05,
141
+ "loss": 0.8464,
142
+ "step": 9500
143
+ },
144
+ {
145
+ "epoch": 1.35556459265284,
146
+ "grad_norm": 21.09101676940918,
147
+ "learning_rate": 9.666531110207402e-06,
148
+ "loss": 0.8746,
149
+ "step": 10000
150
+ },
151
+ {
152
+ "epoch": 1.4233428222854818,
153
+ "grad_norm": 20.79250144958496,
154
+ "learning_rate": 8.649857665717772e-06,
155
+ "loss": 0.8776,
156
+ "step": 10500
157
+ },
158
+ {
159
+ "epoch": 1.491121051918124,
160
+ "grad_norm": 21.217571258544922,
161
+ "learning_rate": 7.633184221228141e-06,
162
+ "loss": 0.8523,
163
+ "step": 11000
164
+ },
165
+ {
166
+ "epoch": 1.5588992815507658,
167
+ "grad_norm": 15.557079315185547,
168
+ "learning_rate": 6.616510776738511e-06,
169
+ "loss": 0.8387,
170
+ "step": 11500
171
+ },
172
+ {
173
+ "epoch": 1.626677511183408,
174
+ "grad_norm": 14.53345012664795,
175
+ "learning_rate": 5.5998373322488825e-06,
176
+ "loss": 0.8377,
177
+ "step": 12000
178
+ },
179
+ {
180
+ "epoch": 1.6944557408160499,
181
+ "grad_norm": 26.921611785888672,
182
+ "learning_rate": 4.583163887759252e-06,
183
+ "loss": 0.8449,
184
+ "step": 12500
185
+ },
186
+ {
187
+ "epoch": 1.7622339704486918,
188
+ "grad_norm": 12.789366722106934,
189
+ "learning_rate": 3.566490443269622e-06,
190
+ "loss": 0.8547,
191
+ "step": 13000
192
+ },
193
+ {
194
+ "epoch": 1.830012200081334,
195
+ "grad_norm": 37.19759750366211,
196
+ "learning_rate": 2.549816998779992e-06,
197
+ "loss": 0.818,
198
+ "step": 13500
199
+ },
200
+ {
201
+ "epoch": 1.8977904297139758,
202
+ "grad_norm": 14.62682819366455,
203
+ "learning_rate": 1.533143554290362e-06,
204
+ "loss": 0.8128,
205
+ "step": 14000
206
+ },
207
+ {
208
+ "epoch": 1.965568659346618,
209
+ "grad_norm": 21.051790237426758,
210
+ "learning_rate": 5.164701098007319e-07,
211
+ "loss": 0.8115,
212
+ "step": 14500
213
+ },
214
+ {
215
+ "epoch": 2.0,
216
+ "step": 14754,
217
+ "total_flos": 3.541929151120589e+16,
218
+ "train_loss": 1.148573803161563,
219
+ "train_runtime": 3245.3985,
220
+ "train_samples_per_second": 54.554,
221
+ "train_steps_per_second": 4.546
222
+ }
223
+ ],
224
+ "logging_steps": 500,
225
+ "max_steps": 14754,
226
+ "num_input_tokens_seen": 0,
227
+ "num_train_epochs": 2,
228
+ "save_steps": 500,
229
+ "stateful_callbacks": {
230
+ "TrainerControl": {
231
+ "args": {
232
+ "should_epoch_stop": false,
233
+ "should_evaluate": false,
234
+ "should_log": false,
235
+ "should_save": true,
236
+ "should_training_stop": true
237
+ },
238
+ "attributes": {}
239
+ }
240
+ },
241
+ "total_flos": 3.541929151120589e+16,
242
+ "train_batch_size": 12,
243
+ "trial_name": null,
244
+ "trial_params": null
245
+ }
qa/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe8e61ba1ca1cb106ca9adca5e9262fa9a262238814728a69256855c78c32f51
3
+ size 5304
qa/vocab.txt ADDED
The diff for this file is too large to render. See raw diff