WpythonW commited on
Commit
b297c91
·
verified ·
1 Parent(s): 981f4d3

Upload folder using huggingface_hub

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ae32286831883c3b1833ed0c21764008ee3cb690aa947501478767a461c5a83
3
  size 88267432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f2afdc8a317e49097600321b2dfb755c839a630cc1f71155b585d3ceaa59c45
3
  size 88267432
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eef0d344dc7c5e9180eda212cdb90b24bbc7adb1e6748db6d283d9c84790b76a
3
  size 176666106
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac88973c2d55093d476f47113a923099eefec1ffa04aea584d69b41b2c9192db
3
  size 176666106
preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.485,
13
+ 0.456,
14
+ 0.406
15
+ ],
16
+ "image_processor_type": "BitImageProcessor",
17
+ "image_std": [
18
+ 0.229,
19
+ 0.224,
20
+ 0.225
21
+ ],
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "shortest_edge": 256
26
+ }
27
+ }
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d138cfe3a4adf21f048848ee35837c9a757a0a3616ff7adbb45b69aac247435
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25d8f282f995a3c1a59d83dfe504bd41bcdf8f79e071dbd6f372623251fc02b2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fd1ce5b56368017ca604769d5a033115cd645195ee71523b8fc56abe270f26f
3
  size 1064
trainer_state.json CHANGED
@@ -1,160 +1,112 @@
1
  {
2
- "best_metric": 0.9713101001869517,
3
- "best_model_checkpoint": "./results/checkpoint-1500",
4
- "epoch": 0.2901353965183752,
5
  "eval_steps": 500,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.019342359767891684,
13
- "grad_norm": 1871413.875,
14
  "learning_rate": 1e-05,
15
- "loss": 0.6564,
16
  "step": 100
17
  },
18
  {
19
- "epoch": 0.03868471953578337,
20
- "grad_norm": 3112622.75,
21
  "learning_rate": 2e-05,
22
- "loss": 0.4794,
23
  "step": 200
24
  },
25
  {
26
- "epoch": 0.058027079303675046,
27
- "grad_norm": 2537678.5,
28
  "learning_rate": 3e-05,
29
- "loss": 0.4037,
30
  "step": 300
31
  },
32
  {
33
- "epoch": 0.07736943907156674,
34
- "grad_norm": 2662464.0,
35
  "learning_rate": 4e-05,
36
- "loss": 0.3265,
37
  "step": 400
38
  },
39
  {
40
- "epoch": 0.09671179883945841,
41
- "grad_norm": 1437540.625,
42
  "learning_rate": 5e-05,
43
- "loss": 0.3744,
44
  "step": 500
45
  },
46
  {
47
- "epoch": 0.09671179883945841,
48
- "eval_accuracy": 0.8466634429400387,
49
- "eval_f1": 0.8663012543480553,
50
- "eval_loss": 0.33316776156425476,
51
- "eval_precision": 0.7684238083662135,
52
- "eval_recall": 0.9927523977484116,
53
- "eval_roc_auc": 0.8465467895292255,
54
- "eval_runtime": 1179.0948,
55
- "eval_samples_per_second": 140.311,
56
- "eval_steps_per_second": 0.549,
57
  "step": 500
58
  },
59
  {
60
- "epoch": 0.11605415860735009,
61
- "grad_norm": 2934780.75,
62
- "learning_rate": 4.892933618843683e-05,
63
- "loss": 0.3006,
64
  "step": 600
65
  },
66
  {
67
- "epoch": 0.13539651837524178,
68
- "grad_norm": 1091141.375,
69
- "learning_rate": 4.785867237687366e-05,
70
- "loss": 0.2714,
71
  "step": 700
72
  },
73
  {
74
- "epoch": 0.15473887814313347,
75
- "grad_norm": 3842419.0,
76
- "learning_rate": 4.6788008565310496e-05,
77
- "loss": 0.1778,
78
  "step": 800
79
  },
80
  {
81
- "epoch": 0.17408123791102514,
82
- "grad_norm": 1596435.75,
83
- "learning_rate": 4.571734475374733e-05,
84
- "loss": 0.1743,
85
  "step": 900
86
  },
87
  {
88
- "epoch": 0.19342359767891681,
89
- "grad_norm": 1802532.125,
90
- "learning_rate": 4.4646680942184155e-05,
91
- "loss": 0.1531,
92
  "step": 1000
93
  },
94
  {
95
- "epoch": 0.19342359767891681,
96
- "eval_accuracy": 0.9334683268858801,
97
- "eval_f1": 0.929946602343387,
98
- "eval_loss": 0.16633380949497223,
99
- "eval_precision": 0.9827945706714019,
100
- "eval_recall": 0.8824922088275795,
101
- "eval_roc_auc": 0.9335090317978244,
102
- "eval_runtime": 970.6452,
103
- "eval_samples_per_second": 170.443,
104
- "eval_steps_per_second": 0.667,
105
  "step": 1000
106
- },
107
- {
108
- "epoch": 0.2127659574468085,
109
- "grad_norm": 1385128.5,
110
- "learning_rate": 4.357601713062099e-05,
111
- "loss": 0.1432,
112
- "step": 1100
113
- },
114
- {
115
- "epoch": 0.23210831721470018,
116
- "grad_norm": 306230.6875,
117
- "learning_rate": 4.2505353319057815e-05,
118
- "loss": 0.1386,
119
- "step": 1200
120
- },
121
- {
122
- "epoch": 0.2514506769825919,
123
- "grad_norm": 1276595.5,
124
- "learning_rate": 4.143468950749465e-05,
125
- "loss": 0.1273,
126
- "step": 1300
127
- },
128
- {
129
- "epoch": 0.27079303675048355,
130
- "grad_norm": 417115.5625,
131
- "learning_rate": 4.036402569593148e-05,
132
- "loss": 0.1075,
133
- "step": 1400
134
- },
135
- {
136
- "epoch": 0.2901353965183752,
137
- "grad_norm": 1014551.625,
138
- "learning_rate": 3.9293361884368315e-05,
139
- "loss": 0.1314,
140
- "step": 1500
141
- },
142
- {
143
- "epoch": 0.2901353965183752,
144
- "eval_accuracy": 0.971058994197292,
145
- "eval_f1": 0.9713101001869517,
146
- "eval_loss": 0.08320203423500061,
147
- "eval_precision": 0.9637107322061307,
148
- "eval_recall": 0.9790302708187375,
149
- "eval_roc_auc": 0.971052629057589,
150
- "eval_runtime": 954.2031,
151
- "eval_samples_per_second": 173.38,
152
- "eval_steps_per_second": 0.678,
153
- "step": 1500
154
  }
155
  ],
156
  "logging_steps": 100,
157
- "max_steps": 5170,
158
  "num_input_tokens_seen": 0,
159
  "num_train_epochs": 1,
160
  "save_steps": 500,
@@ -170,7 +122,7 @@
170
  "attributes": {}
171
  }
172
  },
173
- "total_flos": 4.9963434246144e+18,
174
  "train_batch_size": 128,
175
  "trial_name": null,
176
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9534415899175958,
3
+ "best_model_checkpoint": "./results/checkpoint-1000",
4
+ "epoch": 0.1547269070091289,
5
  "eval_steps": 500,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.01547269070091289,
13
+ "grad_norm": 2257314.0,
14
  "learning_rate": 1e-05,
15
+ "loss": 0.6363,
16
  "step": 100
17
  },
18
  {
19
+ "epoch": 0.03094538140182578,
20
+ "grad_norm": 3129538.0,
21
  "learning_rate": 2e-05,
22
+ "loss": 0.4287,
23
  "step": 200
24
  },
25
  {
26
+ "epoch": 0.04641807210273867,
27
+ "grad_norm": 3898528.75,
28
  "learning_rate": 3e-05,
29
+ "loss": 0.3508,
30
  "step": 300
31
  },
32
  {
33
+ "epoch": 0.06189076280365156,
34
+ "grad_norm": 1689890.875,
35
  "learning_rate": 4e-05,
36
+ "loss": 0.4636,
37
  "step": 400
38
  },
39
  {
40
+ "epoch": 0.07736345350456444,
41
+ "grad_norm": 2084754.375,
42
  "learning_rate": 5e-05,
43
+ "loss": 0.268,
44
  "step": 500
45
  },
46
  {
47
+ "epoch": 0.07736345350456444,
48
+ "eval_accuracy": 0.9384707581369304,
49
+ "eval_f1": 0.9266550339028704,
50
+ "eval_loss": 0.16631442308425903,
51
+ "eval_precision": 0.877639751552795,
52
+ "eval_recall": 0.9814690779191784,
53
+ "eval_roc_auc": 0.945872962460679,
54
+ "eval_runtime": 764.2798,
55
+ "eval_samples_per_second": 266.366,
56
+ "eval_steps_per_second": 1.042,
57
  "step": 500
58
  },
59
  {
60
+ "epoch": 0.09283614420547734,
61
+ "grad_norm": 2382709.75,
62
+ "learning_rate": 4.916149589132987e-05,
63
+ "loss": 0.2394,
64
  "step": 600
65
  },
66
  {
67
+ "epoch": 0.10830883490639022,
68
+ "grad_norm": 936833.5,
69
+ "learning_rate": 4.832299178265974e-05,
70
+ "loss": 0.2343,
71
  "step": 700
72
  },
73
  {
74
+ "epoch": 0.12378152560730311,
75
+ "grad_norm": 1470909.625,
76
+ "learning_rate": 4.7484487673989605e-05,
77
+ "loss": 0.1875,
78
  "step": 800
79
  },
80
  {
81
+ "epoch": 0.139254216308216,
82
+ "grad_norm": 1565031.125,
83
+ "learning_rate": 4.664598356531947e-05,
84
+ "loss": 0.1592,
85
  "step": 900
86
  },
87
  {
88
+ "epoch": 0.1547269070091289,
89
+ "grad_norm": 450809.3125,
90
+ "learning_rate": 4.580747945664934e-05,
91
+ "loss": 0.1575,
92
  "step": 1000
93
  },
94
  {
95
+ "epoch": 0.1547269070091289,
96
+ "eval_accuracy": 0.9622552535146234,
97
+ "eval_f1": 0.9534415899175958,
98
+ "eval_loss": 0.11282139271497726,
99
+ "eval_precision": 0.9320050226255064,
100
+ "eval_recall": 0.9758874748827864,
101
+ "eval_roc_auc": 0.9646020542376454,
102
+ "eval_runtime": 752.2615,
103
+ "eval_samples_per_second": 270.621,
104
+ "eval_steps_per_second": 1.058,
105
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  }
107
  ],
108
  "logging_steps": 100,
109
+ "max_steps": 6463,
110
  "num_input_tokens_seen": 0,
111
  "num_train_epochs": 1,
112
  "save_steps": 500,
 
122
  "attributes": {}
123
  }
124
  },
125
+ "total_flos": 2.5502169563136e+18,
126
  "train_batch_size": 128,
127
  "trial_name": null,
128
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:061af69f446ec4684d6cdb1ee48c5bd4e0e460f08c3bbc6485e12e3606db3bfa
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a13da7ad5f6f6f9a3e14fb23bf538ce181b942a01961411ee74d105b0f0a404c
3
  size 5304