voroninip commited on
Commit
4f93b6b
·
verified ·
1 Parent(s): 0d3518f

End of training

Browse files
Files changed (4) hide show
  1. README.md +66 -10
  2. config.json +292 -0
  3. model.safetensors +3 -0
  4. spm.model +3 -0
README.md CHANGED
@@ -1,10 +1,66 @@
1
- ---
2
- datasets:
3
- - arxiv-community/arxiv_dataset
4
- metrics:
5
- - accuracy
6
- base_model:
7
- - microsoft/deberta-v3-base
8
- ---
9
- # ArXiv paper category classifier
10
- DistilBERT model fine-tuned on the ArXiv dataset to predict the category of a given paper based on its title and abstract
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: mit
4
+ base_model: microsoft/deberta-v3-base
5
+ tags:
6
+ - generated_from_trainer
7
+ metrics:
8
+ - accuracy
9
+ model-index:
10
+ - name: bert-paper-classifier-arxiv
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # bert-paper-classifier-arxiv
18
+
19
+ This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the None dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 3.7652
22
+ - Accuracy: 0.31
23
+
24
+ ## Model description
25
+
26
+ More information needed
27
+
28
+ ## Intended uses & limitations
29
+
30
+ More information needed
31
+
32
+ ## Training and evaluation data
33
+
34
+ More information needed
35
+
36
+ ## Training procedure
37
+
38
+ ### Training hyperparameters
39
+
40
+ The following hyperparameters were used during training:
41
+ - learning_rate: 5e-05
42
+ - train_batch_size: 64
43
+ - eval_batch_size: 8
44
+ - seed: 42
45
+ - gradient_accumulation_steps: 2
46
+ - total_train_batch_size: 128
47
+ - optimizer: Use OptimizerNames.ADAFACTOR and the args are:
48
+ No additional optimizer arguments
49
+ - lr_scheduler_type: cosine
50
+ - num_epochs: 2
51
+ - mixed_precision_training: Native AMP
52
+
53
+ ### Training results
54
+
55
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
56
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
57
+ | No log | 1.0 | 8 | 4.0299 | 0.31 |
58
+ | No log | 1.8 | 14 | 3.7652 | 0.31 |
59
+
60
+
61
+ ### Framework versions
62
+
63
+ - Transformers 4.48.3
64
+ - Pytorch 2.5.1+cu124
65
+ - Datasets 3.5.0
66
+ - Tokenizers 0.21.0
config.json ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-base",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "adap-org",
12
+ "1": "astro-ph",
13
+ "2": "astro-ph.CO",
14
+ "3": "astro-ph.EP",
15
+ "4": "astro-ph.GA",
16
+ "5": "astro-ph.IM",
17
+ "6": "astro-ph.SR",
18
+ "7": "cmp-lg",
19
+ "8": "cond-mat",
20
+ "9": "cond-mat.dis-nn",
21
+ "10": "cond-mat.mtrl-sci",
22
+ "11": "cond-mat.other",
23
+ "12": "cond-mat.soft",
24
+ "13": "cond-mat.stat-mech",
25
+ "14": "cond-mat.supr-con",
26
+ "15": "cs.AI",
27
+ "16": "cs.AR",
28
+ "17": "cs.CC",
29
+ "18": "cs.CE",
30
+ "19": "cs.CG",
31
+ "20": "cs.CL",
32
+ "21": "cs.CR",
33
+ "22": "cs.CV",
34
+ "23": "cs.CY",
35
+ "24": "cs.DB",
36
+ "25": "cs.DC",
37
+ "26": "cs.DL",
38
+ "27": "cs.DM",
39
+ "28": "cs.DS",
40
+ "29": "cs.ET",
41
+ "30": "cs.FL",
42
+ "31": "cs.GL",
43
+ "32": "cs.GR",
44
+ "33": "cs.GT",
45
+ "34": "cs.HC",
46
+ "35": "cs.IR",
47
+ "36": "cs.IT",
48
+ "37": "cs.LG",
49
+ "38": "cs.LO",
50
+ "39": "cs.MA",
51
+ "40": "cs.MM",
52
+ "41": "cs.MS",
53
+ "42": "cs.NA",
54
+ "43": "cs.NE",
55
+ "44": "cs.NI",
56
+ "45": "cs.OH",
57
+ "46": "cs.OS",
58
+ "47": "cs.PF",
59
+ "48": "cs.PL",
60
+ "49": "cs.RO",
61
+ "50": "cs.SC",
62
+ "51": "cs.SD",
63
+ "52": "cs.SE",
64
+ "53": "cs.SI",
65
+ "54": "cs.SY",
66
+ "55": "econ.EM",
67
+ "56": "eess.AS",
68
+ "57": "eess.IV",
69
+ "58": "eess.SP",
70
+ "59": "gr-qc",
71
+ "60": "hep-ex",
72
+ "61": "hep-lat",
73
+ "62": "hep-ph",
74
+ "63": "hep-th",
75
+ "64": "math.AG",
76
+ "65": "math.AP",
77
+ "66": "math.AT",
78
+ "67": "math.CA",
79
+ "68": "math.CO",
80
+ "69": "math.CT",
81
+ "70": "math.DG",
82
+ "71": "math.DS",
83
+ "72": "math.FA",
84
+ "73": "math.GM",
85
+ "74": "math.GN",
86
+ "75": "math.GR",
87
+ "76": "math.GT",
88
+ "77": "math.HO",
89
+ "78": "math.LO",
90
+ "79": "math.MG",
91
+ "80": "math.NA",
92
+ "81": "math.NT",
93
+ "82": "math.OC",
94
+ "83": "math.PR",
95
+ "84": "math.RA",
96
+ "85": "math.RT",
97
+ "86": "math.ST",
98
+ "87": "nlin.AO",
99
+ "88": "nlin.CD",
100
+ "89": "nlin.CG",
101
+ "90": "nlin.PS",
102
+ "91": "nucl-th",
103
+ "92": "physics.ao-ph",
104
+ "93": "physics.bio-ph",
105
+ "94": "physics.chem-ph",
106
+ "95": "physics.class-ph",
107
+ "96": "physics.comp-ph",
108
+ "97": "physics.data-an",
109
+ "98": "physics.gen-ph",
110
+ "99": "physics.geo-ph",
111
+ "100": "physics.hist-ph",
112
+ "101": "physics.ins-det",
113
+ "102": "physics.med-ph",
114
+ "103": "physics.optics",
115
+ "104": "physics.soc-ph",
116
+ "105": "q-bio.BM",
117
+ "106": "q-bio.CB",
118
+ "107": "q-bio.GN",
119
+ "108": "q-bio.MN",
120
+ "109": "q-bio.NC",
121
+ "110": "q-bio.PE",
122
+ "111": "q-bio.QM",
123
+ "112": "q-bio.TO",
124
+ "113": "q-fin.CP",
125
+ "114": "q-fin.EC",
126
+ "115": "q-fin.GN",
127
+ "116": "q-fin.PM",
128
+ "117": "q-fin.RM",
129
+ "118": "q-fin.ST",
130
+ "119": "q-fin.TR",
131
+ "120": "quant-ph",
132
+ "121": "stat.AP",
133
+ "122": "stat.CO",
134
+ "123": "stat.ME",
135
+ "124": "stat.ML",
136
+ "125": "stat.OT"
137
+ },
138
+ "initializer_range": 0.02,
139
+ "intermediate_size": 3072,
140
+ "label2id": {
141
+ "adap-org": 0,
142
+ "astro-ph": 1,
143
+ "astro-ph.CO": 2,
144
+ "astro-ph.EP": 3,
145
+ "astro-ph.GA": 4,
146
+ "astro-ph.IM": 5,
147
+ "astro-ph.SR": 6,
148
+ "cmp-lg": 7,
149
+ "cond-mat": 8,
150
+ "cond-mat.dis-nn": 9,
151
+ "cond-mat.mtrl-sci": 10,
152
+ "cond-mat.other": 11,
153
+ "cond-mat.soft": 12,
154
+ "cond-mat.stat-mech": 13,
155
+ "cond-mat.supr-con": 14,
156
+ "cs.AI": 15,
157
+ "cs.AR": 16,
158
+ "cs.CC": 17,
159
+ "cs.CE": 18,
160
+ "cs.CG": 19,
161
+ "cs.CL": 20,
162
+ "cs.CR": 21,
163
+ "cs.CV": 22,
164
+ "cs.CY": 23,
165
+ "cs.DB": 24,
166
+ "cs.DC": 25,
167
+ "cs.DL": 26,
168
+ "cs.DM": 27,
169
+ "cs.DS": 28,
170
+ "cs.ET": 29,
171
+ "cs.FL": 30,
172
+ "cs.GL": 31,
173
+ "cs.GR": 32,
174
+ "cs.GT": 33,
175
+ "cs.HC": 34,
176
+ "cs.IR": 35,
177
+ "cs.IT": 36,
178
+ "cs.LG": 37,
179
+ "cs.LO": 38,
180
+ "cs.MA": 39,
181
+ "cs.MM": 40,
182
+ "cs.MS": 41,
183
+ "cs.NA": 42,
184
+ "cs.NE": 43,
185
+ "cs.NI": 44,
186
+ "cs.OH": 45,
187
+ "cs.OS": 46,
188
+ "cs.PF": 47,
189
+ "cs.PL": 48,
190
+ "cs.RO": 49,
191
+ "cs.SC": 50,
192
+ "cs.SD": 51,
193
+ "cs.SE": 52,
194
+ "cs.SI": 53,
195
+ "cs.SY": 54,
196
+ "econ.EM": 55,
197
+ "eess.AS": 56,
198
+ "eess.IV": 57,
199
+ "eess.SP": 58,
200
+ "gr-qc": 59,
201
+ "hep-ex": 60,
202
+ "hep-lat": 61,
203
+ "hep-ph": 62,
204
+ "hep-th": 63,
205
+ "math.AG": 64,
206
+ "math.AP": 65,
207
+ "math.AT": 66,
208
+ "math.CA": 67,
209
+ "math.CO": 68,
210
+ "math.CT": 69,
211
+ "math.DG": 70,
212
+ "math.DS": 71,
213
+ "math.FA": 72,
214
+ "math.GM": 73,
215
+ "math.GN": 74,
216
+ "math.GR": 75,
217
+ "math.GT": 76,
218
+ "math.HO": 77,
219
+ "math.LO": 78,
220
+ "math.MG": 79,
221
+ "math.NA": 80,
222
+ "math.NT": 81,
223
+ "math.OC": 82,
224
+ "math.PR": 83,
225
+ "math.RA": 84,
226
+ "math.RT": 85,
227
+ "math.ST": 86,
228
+ "nlin.AO": 87,
229
+ "nlin.CD": 88,
230
+ "nlin.CG": 89,
231
+ "nlin.PS": 90,
232
+ "nucl-th": 91,
233
+ "physics.ao-ph": 92,
234
+ "physics.bio-ph": 93,
235
+ "physics.chem-ph": 94,
236
+ "physics.class-ph": 95,
237
+ "physics.comp-ph": 96,
238
+ "physics.data-an": 97,
239
+ "physics.gen-ph": 98,
240
+ "physics.geo-ph": 99,
241
+ "physics.hist-ph": 100,
242
+ "physics.ins-det": 101,
243
+ "physics.med-ph": 102,
244
+ "physics.optics": 103,
245
+ "physics.soc-ph": 104,
246
+ "q-bio.BM": 105,
247
+ "q-bio.CB": 106,
248
+ "q-bio.GN": 107,
249
+ "q-bio.MN": 108,
250
+ "q-bio.NC": 109,
251
+ "q-bio.PE": 110,
252
+ "q-bio.QM": 111,
253
+ "q-bio.TO": 112,
254
+ "q-fin.CP": 113,
255
+ "q-fin.EC": 114,
256
+ "q-fin.GN": 115,
257
+ "q-fin.PM": 116,
258
+ "q-fin.RM": 117,
259
+ "q-fin.ST": 118,
260
+ "q-fin.TR": 119,
261
+ "quant-ph": 120,
262
+ "stat.AP": 121,
263
+ "stat.CO": 122,
264
+ "stat.ME": 123,
265
+ "stat.ML": 124,
266
+ "stat.OT": 125
267
+ },
268
+ "layer_norm_eps": 1e-07,
269
+ "legacy": true,
270
+ "max_position_embeddings": 512,
271
+ "max_relative_positions": -1,
272
+ "model_type": "deberta-v2",
273
+ "norm_rel_ebd": "layer_norm",
274
+ "num_attention_heads": 12,
275
+ "num_hidden_layers": 12,
276
+ "pad_token_id": 0,
277
+ "pooler_dropout": 0,
278
+ "pooler_hidden_act": "gelu",
279
+ "pooler_hidden_size": 768,
280
+ "pos_att_type": [
281
+ "p2c",
282
+ "c2p"
283
+ ],
284
+ "position_biased_input": false,
285
+ "position_buckets": 256,
286
+ "relative_attention": true,
287
+ "share_att_key": true,
288
+ "torch_dtype": "float32",
289
+ "transformers_version": "4.48.3",
290
+ "type_vocab_size": 0,
291
+ "vocab_size": 128100
292
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f3fb104f4ad4cff08e2ea25577c49fbeaddfcc3e35e1c4d32e3404a53f72735
3
+ size 738100720
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616