thainq107 commited on
Commit
df88ed0
·
1 Parent(s): 5bac2fd

End of training

Browse files
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - generated_from_trainer
5
+ metrics:
6
+ - accuracy
7
+ model-index:
8
+ - name: flan-t5-small-twitter-sentiment-analysis-lora
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # flan-t5-small-twitter-sentiment-analysis-lora
16
+
17
+ This model is a fine-tuned version of [google/flan-t5-small](https://huggingface.co/google/flan-t5-small) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 0.2093
20
+ - Accuracy: 0.8115
21
+
22
+ ## Model description
23
+
24
+ More information needed
25
+
26
+ ## Intended uses & limitations
27
+
28
+ More information needed
29
+
30
+ ## Training and evaluation data
31
+
32
+ More information needed
33
+
34
+ ## Training procedure
35
+
36
+ ### Training hyperparameters
37
+
38
+ The following hyperparameters were used during training:
39
+ - learning_rate: 5e-05
40
+ - train_batch_size: 128
41
+ - eval_batch_size: 128
42
+ - seed: 42
43
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
+ - lr_scheduler_type: linear
45
+ - num_epochs: 10.0
46
+
47
+ ### Training results
48
+
49
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
50
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
51
+ | 0.2573 | 1.0 | 938 | 0.2270 | 0.7941 |
52
+ | 0.2369 | 2.0 | 1876 | 0.2176 | 0.8057 |
53
+ | 0.2286 | 3.0 | 2814 | 0.2132 | 0.8091 |
54
+ | 0.2227 | 4.0 | 3752 | 0.2089 | 0.8122 |
55
+ | 0.2204 | 5.0 | 4690 | 0.2083 | 0.8141 |
56
+ | 0.2191 | 6.0 | 5628 | 0.2077 | 0.8143 |
57
+ | 0.2183 | 7.0 | 6566 | 0.2048 | 0.8169 |
58
+ | 0.2155 | 8.0 | 7504 | 0.2066 | 0.8156 |
59
+ | 0.2164 | 9.0 | 8442 | 0.2047 | 0.8174 |
60
+ | 0.2167 | 10.0 | 9380 | 0.2041 | 0.8177 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - Transformers 4.27.1
66
+ - Pytorch 2.0.1
67
+ - Datasets 2.9.0
68
+ - Tokenizers 0.13.3
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.8115100487112488,
4
+ "eval_loss": 0.2092970758676529,
5
+ "eval_runtime": 178.7127,
6
+ "eval_samples": 29997,
7
+ "eval_samples_per_second": 346.914,
8
+ "eval_steps_per_second": 2.714,
9
+ "gpu_memory": 19726,
10
+ "test_samples": 61998,
11
+ "train_loss": 0.22465838043928654,
12
+ "train_runtime": 5138.7739,
13
+ "train_samples": 119988,
14
+ "train_samples_per_second": 233.495,
15
+ "train_steps_per_second": 1.825
16
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.8177151048438177,
4
+ "eval_loss": 0.2040574848651886,
5
+ "eval_runtime": 86.0001,
6
+ "eval_samples": 29997,
7
+ "eval_samples_per_second": 348.802,
8
+ "eval_steps_per_second": 2.733
9
+ }
logs/events.out.tfevents.1695402211.serverailab12gb-System-Product-Name.12427.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c8b9fd21716ede26eab5d39688ecaab03bbd4fe9dde3884816bc3111405158b
3
- size 11134
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51e9413e41b309b737da4f0475424b25222f560086bd9870fcb6ac9717b8cb95
3
+ size 11488
logs/events.out.tfevents.1695407436.serverailab12gb-System-Product-Name.12427.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b80ec44b5c2a4d08272f9b2a88a7dbaad264ce68c63ddac60f6dcd85957f8400
3
+ size 734
predict_results.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
test_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.8115100487112488,
4
+ "eval_loss": 0.2092970758676529,
5
+ "eval_runtime": 178.7127,
6
+ "eval_samples_per_second": 346.914,
7
+ "eval_steps_per_second": 2.714,
8
+ "test_samples": 61998
9
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "extra_ids": 100,
106
+ "model_max_length": 512,
107
+ "pad_token": "<pad>",
108
+ "sp_model_kwargs": {},
109
+ "special_tokens_map_file": "/home/younes_huggingface_co/.cache/huggingface/hub/models--google--t5-v1_1-small/snapshots/fb7e6cba609f7bab11c614294bc04f82f613c7b1/special_tokens_map.json",
110
+ "tokenizer_class": "T5Tokenizer",
111
+ "unk_token": "<unk>"
112
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "gpu_memory": 19726,
4
+ "train_loss": 0.22465838043928654,
5
+ "train_runtime": 5138.7739,
6
+ "train_samples": 119988,
7
+ "train_samples_per_second": 233.495,
8
+ "train_steps_per_second": 1.825
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8177151048438177,
3
+ "best_model_checkpoint": "save_model/flan-t5-small-twitter-sentiment-analysis-lora/checkpoint-9380",
4
+ "epoch": 10.0,
5
+ "global_step": 9380,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.53,
12
+ "learning_rate": 4.7334754797441364e-05,
13
+ "loss": 0.2573,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_accuracy": 0.7940794079407941,
19
+ "eval_loss": 0.22702552378177643,
20
+ "eval_runtime": 86.6302,
21
+ "eval_samples_per_second": 346.265,
22
+ "eval_steps_per_second": 2.713,
23
+ "step": 938
24
+ },
25
+ {
26
+ "epoch": 1.07,
27
+ "learning_rate": 4.466950959488273e-05,
28
+ "loss": 0.2445,
29
+ "step": 1000
30
+ },
31
+ {
32
+ "epoch": 1.6,
33
+ "learning_rate": 4.2004264392324094e-05,
34
+ "loss": 0.2369,
35
+ "step": 1500
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_accuracy": 0.8056805680568057,
40
+ "eval_loss": 0.21760082244873047,
41
+ "eval_runtime": 86.3882,
42
+ "eval_samples_per_second": 347.235,
43
+ "eval_steps_per_second": 2.72,
44
+ "step": 1876
45
+ },
46
+ {
47
+ "epoch": 2.13,
48
+ "learning_rate": 3.9339019189765456e-05,
49
+ "loss": 0.2314,
50
+ "step": 2000
51
+ },
52
+ {
53
+ "epoch": 2.67,
54
+ "learning_rate": 3.6673773987206824e-05,
55
+ "loss": 0.2286,
56
+ "step": 2500
57
+ },
58
+ {
59
+ "epoch": 3.0,
60
+ "eval_accuracy": 0.8091142447578091,
61
+ "eval_loss": 0.2131662368774414,
62
+ "eval_runtime": 86.4294,
63
+ "eval_samples_per_second": 347.069,
64
+ "eval_steps_per_second": 2.719,
65
+ "step": 2814
66
+ },
67
+ {
68
+ "epoch": 3.2,
69
+ "learning_rate": 3.400852878464819e-05,
70
+ "loss": 0.2246,
71
+ "step": 3000
72
+ },
73
+ {
74
+ "epoch": 3.73,
75
+ "learning_rate": 3.1343283582089554e-05,
76
+ "loss": 0.2227,
77
+ "step": 3500
78
+ },
79
+ {
80
+ "epoch": 4.0,
81
+ "eval_accuracy": 0.8122478914558122,
82
+ "eval_loss": 0.20891216397285461,
83
+ "eval_runtime": 86.5057,
84
+ "eval_samples_per_second": 346.763,
85
+ "eval_steps_per_second": 2.717,
86
+ "step": 3752
87
+ },
88
+ {
89
+ "epoch": 4.26,
90
+ "learning_rate": 2.867803837953092e-05,
91
+ "loss": 0.223,
92
+ "step": 4000
93
+ },
94
+ {
95
+ "epoch": 4.8,
96
+ "learning_rate": 2.6012793176972285e-05,
97
+ "loss": 0.2204,
98
+ "step": 4500
99
+ },
100
+ {
101
+ "epoch": 5.0,
102
+ "eval_accuracy": 0.8141480814748141,
103
+ "eval_loss": 0.2083190530538559,
104
+ "eval_runtime": 86.2794,
105
+ "eval_samples_per_second": 347.673,
106
+ "eval_steps_per_second": 2.724,
107
+ "step": 4690
108
+ },
109
+ {
110
+ "epoch": 5.33,
111
+ "learning_rate": 2.3347547974413646e-05,
112
+ "loss": 0.2216,
113
+ "step": 5000
114
+ },
115
+ {
116
+ "epoch": 5.86,
117
+ "learning_rate": 2.068230277185501e-05,
118
+ "loss": 0.2191,
119
+ "step": 5500
120
+ },
121
+ {
122
+ "epoch": 6.0,
123
+ "eval_accuracy": 0.8143481014768144,
124
+ "eval_loss": 0.2076815366744995,
125
+ "eval_runtime": 86.2336,
126
+ "eval_samples_per_second": 347.858,
127
+ "eval_steps_per_second": 2.725,
128
+ "step": 5628
129
+ },
130
+ {
131
+ "epoch": 6.4,
132
+ "learning_rate": 1.8017057569296376e-05,
133
+ "loss": 0.2178,
134
+ "step": 6000
135
+ },
136
+ {
137
+ "epoch": 6.93,
138
+ "learning_rate": 1.535181236673774e-05,
139
+ "loss": 0.2183,
140
+ "step": 6500
141
+ },
142
+ {
143
+ "epoch": 7.0,
144
+ "eval_accuracy": 0.8169150248358169,
145
+ "eval_loss": 0.204830601811409,
146
+ "eval_runtime": 86.2943,
147
+ "eval_samples_per_second": 347.613,
148
+ "eval_steps_per_second": 2.723,
149
+ "step": 6566
150
+ },
151
+ {
152
+ "epoch": 7.46,
153
+ "learning_rate": 1.2686567164179105e-05,
154
+ "loss": 0.2186,
155
+ "step": 7000
156
+ },
157
+ {
158
+ "epoch": 8.0,
159
+ "learning_rate": 1.002132196162047e-05,
160
+ "loss": 0.2155,
161
+ "step": 7500
162
+ },
163
+ {
164
+ "epoch": 8.0,
165
+ "eval_accuracy": 0.8156148948228156,
166
+ "eval_loss": 0.206589013338089,
167
+ "eval_runtime": 86.3165,
168
+ "eval_samples_per_second": 347.523,
169
+ "eval_steps_per_second": 2.723,
170
+ "step": 7504
171
+ },
172
+ {
173
+ "epoch": 8.53,
174
+ "learning_rate": 7.356076759061833e-06,
175
+ "loss": 0.2164,
176
+ "step": 8000
177
+ },
178
+ {
179
+ "epoch": 9.0,
180
+ "eval_accuracy": 0.8174150748408174,
181
+ "eval_loss": 0.20466509461402893,
182
+ "eval_runtime": 86.2755,
183
+ "eval_samples_per_second": 347.688,
184
+ "eval_steps_per_second": 2.724,
185
+ "step": 8442
186
+ },
187
+ {
188
+ "epoch": 9.06,
189
+ "learning_rate": 4.690831556503199e-06,
190
+ "loss": 0.2156,
191
+ "step": 8500
192
+ },
193
+ {
194
+ "epoch": 9.59,
195
+ "learning_rate": 2.025586353944563e-06,
196
+ "loss": 0.2167,
197
+ "step": 9000
198
+ },
199
+ {
200
+ "epoch": 10.0,
201
+ "eval_accuracy": 0.8177151048438177,
202
+ "eval_loss": 0.2040574848651886,
203
+ "eval_runtime": 86.4923,
204
+ "eval_samples_per_second": 346.817,
205
+ "eval_steps_per_second": 2.717,
206
+ "step": 9380
207
+ },
208
+ {
209
+ "epoch": 10.0,
210
+ "step": 9380,
211
+ "total_flos": 1.1215726473904128e+17,
212
+ "train_loss": 0.22465838043928654,
213
+ "train_runtime": 5138.7739,
214
+ "train_samples_per_second": 233.495,
215
+ "train_steps_per_second": 1.825
216
+ }
217
+ ],
218
+ "max_steps": 9380,
219
+ "num_train_epochs": 10,
220
+ "total_flos": 1.1215726473904128e+17,
221
+ "trial_name": null,
222
+ "trial_params": null
223
+ }