MariaUDmitrieva commited on
Commit
c713558
·
verified ·
1 Parent(s): 641e524

Upload 7 files

Browse files
app.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import time
3
+
4
+ # model part
5
+
6
+ import json
7
+ import torch
8
+ from torch import nn
9
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
+
11
+ with open('categories_with_names.json', 'r') as f:
12
+ cat_with_names = json.load(f)
13
+ with open('categories_from_model.json', 'r') as f:
14
+ categories_from_model = json.load(f)
15
+
16
+ @st.cache_resource
17
+ def load_models_and_tokenizer():
18
+ tokenizer = AutoTokenizer.from_pretrained("oracat/bert-paper-classifier-arxiv")
19
+
20
+ model_titles = AutoModelForSequenceClassification.from_pretrained(
21
+ "powerful_model_titles/checkpoint-13472", num_labels=len(categories_from_model), problem_type="multi_label_classification"
22
+ )
23
+ model_titles.eval()
24
+ model_abstracts = AutoModelForSequenceClassification.from_pretrained(
25
+ "powerful_model_abstracts/checkpoint-13472", num_labels=len(categories_from_model), problem_type="multi_label_classification"
26
+ )
27
+ model_abstracts.eval()
28
+
29
+ return model_titles, model_abstracts, tokenizer
30
+
31
+ model_titles, model_abstracts, tokenizer = load_models_and_tokenizer()
32
+
33
+ def categorize_text(title: str | None = None, abstract: str | None = None, progress_bar = None):
34
+ if title is None and abstract is None:
35
+ raise ValueError('title is None and abstract is None')
36
+
37
+ models_to_run = 2 if (title is not None and abstract is not None) else 1
38
+
39
+ proba_title = None
40
+ if title is not None:
41
+ progresses = (10, 30) if models_to_run == 2 else (20, 60)
42
+ my_bar.progress(progresses[0], text='computing titles')
43
+ input_tok = tokenizer(title, return_tensors='pt')
44
+ with torch.no_grad():
45
+ logits = model_titles(**input_tok)['logits']
46
+ proba_title = torch.sigmoid(logits)[0]
47
+ my_bar.progress(progresses[1], text='computed titles')
48
+
49
+ proba_abstract = None
50
+ if abstract is not None:
51
+ progresses = (40, 70) if models_to_run == 2 else (20, 60)
52
+ my_bar.progress(progresses[0], text='computing abstracts')
53
+ input_tok = tokenizer(abstract, return_tensors='pt')
54
+ with torch.no_grad():
55
+ logits = model_abstracts(**input_tok)['logits']
56
+ proba_abstract = torch.sigmoid(logits)[0]
57
+ my_bar.progress(progresses[0], text='computed abstracts')
58
+
59
+ if title is None:
60
+ proba = proba_abstract
61
+ elif abstract is None:
62
+ proba = proba_title
63
+ else:
64
+ proba = proba_title * 0.1 + proba_abstract * 0.9
65
+
66
+ progresses = (80, 90) if models_to_run == 2 else (70, 90)
67
+
68
+ my_bar.progress(progresses[0], text='computed proba')
69
+
70
+ sorted_proba, indices = torch.sort(proba, descending=True)
71
+ my_bar.progress(progresses[1], text='sorted proba')
72
+ to_take = 1
73
+ while sorted_proba[:to_take].sum() < 0.95 and to_take < len(categories_from_model):
74
+ to_take += 1
75
+ output = [(cat_with_names[categories_from_model[index]], proba[index].item())
76
+ for index in indices[:to_take]]
77
+ my_bar.progress(100, text='generated output')
78
+ return output
79
+
80
+ # front part
81
+
82
+ st.markdown("<h1 style='text-align: center;'>Classify your paper!</h1>", unsafe_allow_html=True)
83
+
84
+
85
+ if "title" not in st.session_state:
86
+ st.session_state.title = ""
87
+ if "abstract" not in st.session_state:
88
+ st.session_state.abstract = ""
89
+ if "title_input_key" not in st.session_state:
90
+ st.session_state.title_input_key = ""
91
+ if "abstract_input_key" not in st.session_state:
92
+ st.session_state.abstract_input_key = ""
93
+ if "model_type" not in st.session_state:
94
+ st.session_state.model_type = []
95
+
96
+ def input_error():
97
+ if not st.session_state.model_type:
98
+ return 'you have to select title or abstract'
99
+ if 'Title' in model_type and not st.session_state.title:
100
+ return 'Title is empty'
101
+ if 'Abstract' in model_type and not st.session_state.abstract:
102
+ return 'Abstract is empty'
103
+ return ''
104
+
105
+
106
+ def clear_input():
107
+ st.session_state.title = title.title()
108
+ st.session_state.abstract = abstract.title()
109
+ if not input_error():
110
+ if "Title" in st.session_state.model_type:
111
+ st.session_state.title_input_key = ""
112
+ if "Abstract" in st.session_state.model_type:
113
+ st.session_state.abstract_input_key = ""
114
+
115
+
116
+ title = st.text_input(r"$\textsf{\Large Title}$", key="title_input_key")
117
+
118
+ abstract = st.text_input(r"$\textsf{\Large Abstract}$", key="abstract_input_key")
119
+
120
+
121
+ model_type = st.multiselect(
122
+ r"$\textsf{\large Classify by:}$",
123
+ ['Title', 'Abstract'],
124
+ )
125
+
126
+ st.session_state.model_type = model_type
127
+
128
+ if(st.button('Submit', on_click=clear_input)):
129
+ if input_error():
130
+ st.error(input_error())
131
+ else:
132
+ send_time = time.localtime(time.time())
133
+ #st.success(f"Submitted {(' and '.join(st.session_state.model_type)).lower()} on {time.strftime('%d.%m %H:%M:%S', send_time)}")
134
+ model_input = dict()
135
+ if 'Title' in st.session_state.model_type:
136
+ model_input['title'] = st.session_state.title
137
+ if 'Abstract' in st.session_state.model_type:
138
+ model_input['abstract'] = st.session_state.abstract
139
+ #st.success(f'{model_input=}')
140
+ my_bar = st.progress(0, text='starting model')
141
+ model_result = categorize_text(**model_input, progress_bar=my_bar)
142
+ st.markdown("<h1 style='text-align: center;'>Classification completed!</h1>", unsafe_allow_html=True)
143
+ small_categories = []
144
+ cat, proba = model_result[0]
145
+ st.write(r"$\textsf{\Large " + f'{cat}: {round(100*proba)}' + r"\%}$")
146
+ for cat, proba in model_result[1:]:
147
+ if proba < 0.1:
148
+ small_categories.append(f'{cat}: {round(100*proba, 1)}' + r"\%")
149
+ else:
150
+ st.write(r"$\textsf{\large " + f'{cat}: {round(100*proba)}' + r"\%}$")
151
+ if small_categories:
152
+ st.write(', '.join(small_categories))
153
+
154
+
155
+
156
+
157
+
categories_from_model.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["adap-org", "astro-ph", "astro-ph.CO", "astro-ph.EP", "astro-ph.GA", "astro-ph.IM", "astro-ph.SR", "cmp-lg", "cond-mat", "cond-mat.dis-nn", "cond-mat.mtrl-sci", "cond-mat.other", "cond-mat.soft", "cond-mat.stat-mech", "cond-mat.supr-con", "cs.AI", "cs.AR", "cs.CC", "cs.CE", "cs.CG", "cs.CL", "cs.CR", "cs.CV", "cs.CY", "cs.DB", "cs.DC", "cs.DL", "cs.DM", "cs.DS", "cs.ET", "cs.FL", "cs.GL", "cs.GR", "cs.GT", "cs.HC", "cs.IR", "cs.IT", "cs.LG", "cs.LO", "cs.MA", "cs.MM", "cs.MS", "cs.NA", "cs.NE", "cs.NI", "cs.OH", "cs.OS", "cs.PF", "cs.PL", "cs.RO", "cs.SC", "cs.SD", "cs.SE", "cs.SI", "cs.SY", "econ.EM", "eess.AS", "eess.IV", "eess.SP", "gr-qc", "hep-ex", "hep-lat", "hep-ph", "hep-th", "math.AG", "math.AP", "math.AT", "math.CA", "math.CO", "math.CT", "math.DG", "math.DS", "math.FA", "math.GM", "math.GN", "math.GR", "math.GT", "math.HO", "math.LO", "math.MG", "math.NA", "math.NT", "math.OC", "math.PR", "math.RA", "math.RT", "math.ST", "nlin.AO", "nlin.CD", "nlin.CG", "nlin.PS", "nucl-th", "physics.ao-ph", "physics.bio-ph", "physics.chem-ph", "physics.class-ph", "physics.comp-ph", "physics.data-an", "physics.gen-ph", "physics.geo-ph", "physics.hist-ph", "physics.ins-det", "physics.med-ph", "physics.optics", "physics.soc-ph", "q-bio.BM", "q-bio.CB", "q-bio.GN", "q-bio.MN", "q-bio.NC", "q-bio.PE", "q-bio.QM", "q-bio.TO", "q-fin.CP", "q-fin.EC", "q-fin.GN", "q-fin.PM", "q-fin.RM", "q-fin.ST", "q-fin.TR", "quant-ph", "stat.AP", "stat.CO", "stat.ME", "stat.ML", "stat.OT"]
categories_with_names.json ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"cs.AI": "Artificial Intelligence",
2
+ "cs.AR": "Hardware Architecture",
3
+ "cs.CC": "Computational Complexity",
4
+ "cs.CE": "Computational Engineering, Finance, and Science",
5
+ "cs.CG": "Computational Geometry",
6
+ "cs.CL": "Computation and Language",
7
+ "cs.CR": "Cryptography and Security",
8
+ "cs.CV": "Computer Vision and Pattern Recognition",
9
+ "cs.CY": "Computers and Society",
10
+ "cs.DB": "Databases",
11
+ "cs.DC": "Distributed, Parallel, and Cluster Computing",
12
+ "cs.DL": "Digital Libraries",
13
+ "cs.DM": "Discrete Mathematics",
14
+ "cs.DS": "Data Structures and Algorithms",
15
+ "cs.ET": "Emerging Technologies",
16
+ "cs.FL": "Formal Languages and Automata Theory",
17
+ "cs.GL": "General Literature",
18
+ "cs.GR": "Graphics",
19
+ "cs.GT": "Computer Science and Game Theory",
20
+ "cs.HC": "Human-Computer Interaction",
21
+ "cs.IR": "Information Retrieval",
22
+ "cs.IT": "Information Theory",
23
+ "cs.LG": "Machine Learning",
24
+ "cs.LO": "Logic in Computer Science",
25
+ "cs.MA": "Multiagent Systems",
26
+ "cs.MM": "Multimedia",
27
+ "cs.MS": "Mathematical Software",
28
+ "cs.NA": "Numerical Analysis",
29
+ "cs.NE": "Neural and Evolutionary Computing",
30
+ "cs.NI": "Networking and Internet Architecture",
31
+ "cs.OH": "Other Computer Science",
32
+ "cs.OS": "Operating Systems",
33
+ "cs.PF": "Performance",
34
+ "cs.PL": "Programming Languages",
35
+ "cs.RO": "Robotics",
36
+ "cs.SC": "Symbolic Computation",
37
+ "cs.SD": "Sound",
38
+ "cs.SE": "Software Engineering",
39
+ "cs.SI": "Social and Information Networks",
40
+ "cs.SY": "Systems and Control",
41
+ "econ.EM": "Econometrics",
42
+ "econ.GN": "General Economics",
43
+ "econ.TH": "Theoretical Economics",
44
+ "eess.AS": "Audio and Speech Processing",
45
+ "eess.IV": "Image and Video Processing",
46
+ "eess.SP": "Signal Processing",
47
+ "eess.SY": "Systems and Control",
48
+ "math.AC": "Commutative Algebra",
49
+ "math.AG": "Algebraic Geometry",
50
+ "math.AP": "Analysis of PDEs",
51
+ "math.AT": "Algebraic Topology",
52
+ "math.CA": "Classical Analysis and ODEs",
53
+ "math.CO": "Combinatorics",
54
+ "math.CT": "Category Theory",
55
+ "math.CV": "Complex Variables",
56
+ "math.DG": "Differential Geometry",
57
+ "math.DS": "Dynamical Systems",
58
+ "math.FA": "Functional Analysis",
59
+ "math.GM": "General Mathematics",
60
+ "math.GN": "General Topology",
61
+ "math.GR": "Group Theory",
62
+ "math.GT": "Geometric Topology",
63
+ "math.HO": "History and Overview",
64
+ "math.IT": "Information Theory",
65
+ "math.KT": "K-Theory and Homology",
66
+ "math.LO": "Logic",
67
+ "math.MG": "Metric Geometry",
68
+ "math.MP": "Mathematical Physics",
69
+ "math.NA": "Numerical Analysis",
70
+ "math.NT": "Number Theory",
71
+ "math.OA": "Operator Algebras",
72
+ "math.OC": "Optimization and Control",
73
+ "math.PR": "Probability",
74
+ "math.QA": "Quantum Algebra",
75
+ "math.RA": "Rings and Algebras",
76
+ "math.RT": "Representation Theory",
77
+ "math.SG": "Symplectic Geometry",
78
+ "math.SP": "Spectral Theory",
79
+ "math.ST": "Statistics Theory",
80
+ "astro-ph.CO": "Cosmology and Nongalactic Astrophysics",
81
+ "astro-ph.EP": "Earth and Planetary Astrophysics",
82
+ "astro-ph.GA": "Astrophysics of Galaxies",
83
+ "astro-ph.HE": "High Energy Astrophysical Phenomena",
84
+ "astro-ph.IM": "Instrumentation and Methods for Astrophysics",
85
+ "astro-ph.SR": "Solar and Stellar Astrophysics",
86
+ "cond-mat.dis-nn": "Disordered Systems and Neural Networks",
87
+ "cond-mat.mes-hall": "Mesoscale and Nanoscale Physics",
88
+ "cond-mat.mtrl-sci": "Materials Science",
89
+ "cond-mat.other": "Other Condensed Matter",
90
+ "cond-mat.quant-gas": "Quantum Gases",
91
+ "cond-mat.soft": "Soft Condensed Matter",
92
+ "cond-mat.stat-mech": "Statistical Mechanics",
93
+ "cond-mat.str-el": "Strongly Correlated Electrons",
94
+ "cond-mat.supr-con": "Superconductivity",
95
+ "gr-qc": "General Relativity and Quantum Cosmology",
96
+ "hep-ex": "High Energy Physics - Experiment",
97
+ "hep-lat": "High Energy Physics - Lattice",
98
+ "hep-ph": "High Energy Physics - Phenomenology",
99
+ "hep-th": "High Energy Physics - Theory",
100
+ "math-ph": "Mathematical Physics",
101
+ "nlin.AO": "Adaptation and Self-Organizing Systems",
102
+ "nlin.CD": "Chaotic Dynamics",
103
+ "nlin.CG": "Cellular Automata and Lattice Gases",
104
+ "nlin.PS": "Pattern Formation and Solitons",
105
+ "nlin.SI": "Exactly Solvable and Integrable Systems",
106
+ "nucl-ex": "Nuclear Experiment",
107
+ "nucl-th": "Nuclear Theory",
108
+ "physics.acc-ph": "Accelerator Physics",
109
+ "physics.ao-ph": "Atmospheric and Oceanic Physics",
110
+ "physics.app-ph": "Applied Physics",
111
+ "physics.atm-clus": "Atomic and Molecular Clusters",
112
+ "physics.atom-ph": "Atomic Physics",
113
+ "physics.bio-ph": "Biological Physics",
114
+ "physics.chem-ph": "Chemical Physics",
115
+ "physics.class-ph": "Classical Physics",
116
+ "physics.comp-ph": "Computational Physics",
117
+ "physics.data-an": "Data Analysis, Statistics and Probability",
118
+ "physics.ed-ph": "Physics Education",
119
+ "physics.flu-dyn": "Fluid Dynamics",
120
+ "physics.gen-ph": "General Physics",
121
+ "physics.geo-ph": "Geophysics",
122
+ "physics.hist-ph": "History and Philosophy of Physics",
123
+ "physics.ins-det": "Instrumentation and Detectors",
124
+ "physics.med-ph": "Medical Physics",
125
+ "physics.optics": "Optics",
126
+ "physics.plasm-ph": "Plasma Physics",
127
+ "physics.pop-ph": "Popular Physics",
128
+ "physics.soc-ph": "Physics and Society",
129
+ "physics.space-ph": "Space Physics",
130
+ "quant-ph": "Quantum Physics",
131
+ "q-bio.BM": "Biomolecules",
132
+ "q-bio.CB": "Cell Behavior",
133
+ "q-bio.GN": "Genomics",
134
+ "q-bio.MN": "Molecular Networks",
135
+ "q-bio.NC": "Neurons and Cognition",
136
+ "q-bio.OT": "Other Quantitative Biology",
137
+ "q-bio.PE": "Populations and Evolution",
138
+ "q-bio.QM": "Quantitative Methods",
139
+ "q-bio.SC": "Subcellular Processes",
140
+ "q-bio.TO": "Tissues and Organs",
141
+ "q-fin.CP": "Computational Finance",
142
+ "q-fin.EC": "Economics",
143
+ "q-fin.GN": "General Finance",
144
+ "q-fin.MF": "Mathematical Finance",
145
+ "q-fin.PM": "Portfolio Management",
146
+ "q-fin.PR": "Pricing of Securities",
147
+ "q-fin.RM": "Risk Management",
148
+ "q-fin.ST": "Statistical Finance",
149
+ "q-fin.TR": "Trading and Market Microstructure",
150
+ "stat.AP": "Applications",
151
+ "stat.CO": "Computation",
152
+ "stat.ME": "Methodology",
153
+ "stat.ML": "Machine Learning",
154
+ "stat.OT": "Other Statistics",
155
+ "stat.TH": "Statistics Theory",
156
+ "astro-ph": "Astrophysics",
157
+ "adap-org": "Adaptation and Self-Organizing Systems",
158
+ "cmp-lg": "Computation and Language",
159
+ "cond-mat": "Condensed Matter"}
powerful_model_abstracts/checkpoint-13472/config.json ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "adap-org",
12
+ "1": "astro-ph",
13
+ "2": "astro-ph.CO",
14
+ "3": "astro-ph.EP",
15
+ "4": "astro-ph.GA",
16
+ "5": "astro-ph.IM",
17
+ "6": "astro-ph.SR",
18
+ "7": "cmp-lg",
19
+ "8": "cond-mat",
20
+ "9": "cond-mat.dis-nn",
21
+ "10": "cond-mat.mtrl-sci",
22
+ "11": "cond-mat.other",
23
+ "12": "cond-mat.soft",
24
+ "13": "cond-mat.stat-mech",
25
+ "14": "cond-mat.supr-con",
26
+ "15": "cs.AI",
27
+ "16": "cs.AR",
28
+ "17": "cs.CC",
29
+ "18": "cs.CE",
30
+ "19": "cs.CG",
31
+ "20": "cs.CL",
32
+ "21": "cs.CR",
33
+ "22": "cs.CV",
34
+ "23": "cs.CY",
35
+ "24": "cs.DB",
36
+ "25": "cs.DC",
37
+ "26": "cs.DL",
38
+ "27": "cs.DM",
39
+ "28": "cs.DS",
40
+ "29": "cs.ET",
41
+ "30": "cs.FL",
42
+ "31": "cs.GL",
43
+ "32": "cs.GR",
44
+ "33": "cs.GT",
45
+ "34": "cs.HC",
46
+ "35": "cs.IR",
47
+ "36": "cs.IT",
48
+ "37": "cs.LG",
49
+ "38": "cs.LO",
50
+ "39": "cs.MA",
51
+ "40": "cs.MM",
52
+ "41": "cs.MS",
53
+ "42": "cs.NA",
54
+ "43": "cs.NE",
55
+ "44": "cs.NI",
56
+ "45": "cs.OH",
57
+ "46": "cs.OS",
58
+ "47": "cs.PF",
59
+ "48": "cs.PL",
60
+ "49": "cs.RO",
61
+ "50": "cs.SC",
62
+ "51": "cs.SD",
63
+ "52": "cs.SE",
64
+ "53": "cs.SI",
65
+ "54": "cs.SY",
66
+ "55": "econ.EM",
67
+ "56": "eess.AS",
68
+ "57": "eess.IV",
69
+ "58": "eess.SP",
70
+ "59": "gr-qc",
71
+ "60": "hep-ex",
72
+ "61": "hep-lat",
73
+ "62": "hep-ph",
74
+ "63": "hep-th",
75
+ "64": "math.AG",
76
+ "65": "math.AP",
77
+ "66": "math.AT",
78
+ "67": "math.CA",
79
+ "68": "math.CO",
80
+ "69": "math.CT",
81
+ "70": "math.DG",
82
+ "71": "math.DS",
83
+ "72": "math.FA",
84
+ "73": "math.GM",
85
+ "74": "math.GN",
86
+ "75": "math.GR",
87
+ "76": "math.GT",
88
+ "77": "math.HO",
89
+ "78": "math.LO",
90
+ "79": "math.MG",
91
+ "80": "math.NA",
92
+ "81": "math.NT",
93
+ "82": "math.OC",
94
+ "83": "math.PR",
95
+ "84": "math.RA",
96
+ "85": "math.RT",
97
+ "86": "math.ST",
98
+ "87": "nlin.AO",
99
+ "88": "nlin.CD",
100
+ "89": "nlin.CG",
101
+ "90": "nlin.PS",
102
+ "91": "nucl-th",
103
+ "92": "physics.ao-ph",
104
+ "93": "physics.bio-ph",
105
+ "94": "physics.chem-ph",
106
+ "95": "physics.class-ph",
107
+ "96": "physics.comp-ph",
108
+ "97": "physics.data-an",
109
+ "98": "physics.gen-ph",
110
+ "99": "physics.geo-ph",
111
+ "100": "physics.hist-ph",
112
+ "101": "physics.ins-det",
113
+ "102": "physics.med-ph",
114
+ "103": "physics.optics",
115
+ "104": "physics.soc-ph",
116
+ "105": "q-bio.BM",
117
+ "106": "q-bio.CB",
118
+ "107": "q-bio.GN",
119
+ "108": "q-bio.MN",
120
+ "109": "q-bio.NC",
121
+ "110": "q-bio.PE",
122
+ "111": "q-bio.QM",
123
+ "112": "q-bio.TO",
124
+ "113": "q-fin.CP",
125
+ "114": "q-fin.EC",
126
+ "115": "q-fin.GN",
127
+ "116": "q-fin.PM",
128
+ "117": "q-fin.RM",
129
+ "118": "q-fin.ST",
130
+ "119": "q-fin.TR",
131
+ "120": "quant-ph",
132
+ "121": "stat.AP",
133
+ "122": "stat.CO",
134
+ "123": "stat.ME",
135
+ "124": "stat.ML",
136
+ "125": "stat.OT"
137
+ },
138
+ "initializer_range": 0.02,
139
+ "intermediate_size": 3072,
140
+ "label2id": {
141
+ "adap-org": 0,
142
+ "astro-ph": 1,
143
+ "astro-ph.CO": 2,
144
+ "astro-ph.EP": 3,
145
+ "astro-ph.GA": 4,
146
+ "astro-ph.IM": 5,
147
+ "astro-ph.SR": 6,
148
+ "cmp-lg": 7,
149
+ "cond-mat": 8,
150
+ "cond-mat.dis-nn": 9,
151
+ "cond-mat.mtrl-sci": 10,
152
+ "cond-mat.other": 11,
153
+ "cond-mat.soft": 12,
154
+ "cond-mat.stat-mech": 13,
155
+ "cond-mat.supr-con": 14,
156
+ "cs.AI": 15,
157
+ "cs.AR": 16,
158
+ "cs.CC": 17,
159
+ "cs.CE": 18,
160
+ "cs.CG": 19,
161
+ "cs.CL": 20,
162
+ "cs.CR": 21,
163
+ "cs.CV": 22,
164
+ "cs.CY": 23,
165
+ "cs.DB": 24,
166
+ "cs.DC": 25,
167
+ "cs.DL": 26,
168
+ "cs.DM": 27,
169
+ "cs.DS": 28,
170
+ "cs.ET": 29,
171
+ "cs.FL": 30,
172
+ "cs.GL": 31,
173
+ "cs.GR": 32,
174
+ "cs.GT": 33,
175
+ "cs.HC": 34,
176
+ "cs.IR": 35,
177
+ "cs.IT": 36,
178
+ "cs.LG": 37,
179
+ "cs.LO": 38,
180
+ "cs.MA": 39,
181
+ "cs.MM": 40,
182
+ "cs.MS": 41,
183
+ "cs.NA": 42,
184
+ "cs.NE": 43,
185
+ "cs.NI": 44,
186
+ "cs.OH": 45,
187
+ "cs.OS": 46,
188
+ "cs.PF": 47,
189
+ "cs.PL": 48,
190
+ "cs.RO": 49,
191
+ "cs.SC": 50,
192
+ "cs.SD": 51,
193
+ "cs.SE": 52,
194
+ "cs.SI": 53,
195
+ "cs.SY": 54,
196
+ "econ.EM": 55,
197
+ "eess.AS": 56,
198
+ "eess.IV": 57,
199
+ "eess.SP": 58,
200
+ "gr-qc": 59,
201
+ "hep-ex": 60,
202
+ "hep-lat": 61,
203
+ "hep-ph": 62,
204
+ "hep-th": 63,
205
+ "math.AG": 64,
206
+ "math.AP": 65,
207
+ "math.AT": 66,
208
+ "math.CA": 67,
209
+ "math.CO": 68,
210
+ "math.CT": 69,
211
+ "math.DG": 70,
212
+ "math.DS": 71,
213
+ "math.FA": 72,
214
+ "math.GM": 73,
215
+ "math.GN": 74,
216
+ "math.GR": 75,
217
+ "math.GT": 76,
218
+ "math.HO": 77,
219
+ "math.LO": 78,
220
+ "math.MG": 79,
221
+ "math.NA": 80,
222
+ "math.NT": 81,
223
+ "math.OC": 82,
224
+ "math.PR": 83,
225
+ "math.RA": 84,
226
+ "math.RT": 85,
227
+ "math.ST": 86,
228
+ "nlin.AO": 87,
229
+ "nlin.CD": 88,
230
+ "nlin.CG": 89,
231
+ "nlin.PS": 90,
232
+ "nucl-th": 91,
233
+ "physics.ao-ph": 92,
234
+ "physics.bio-ph": 93,
235
+ "physics.chem-ph": 94,
236
+ "physics.class-ph": 95,
237
+ "physics.comp-ph": 96,
238
+ "physics.data-an": 97,
239
+ "physics.gen-ph": 98,
240
+ "physics.geo-ph": 99,
241
+ "physics.hist-ph": 100,
242
+ "physics.ins-det": 101,
243
+ "physics.med-ph": 102,
244
+ "physics.optics": 103,
245
+ "physics.soc-ph": 104,
246
+ "q-bio.BM": 105,
247
+ "q-bio.CB": 106,
248
+ "q-bio.GN": 107,
249
+ "q-bio.MN": 108,
250
+ "q-bio.NC": 109,
251
+ "q-bio.PE": 110,
252
+ "q-bio.QM": 111,
253
+ "q-bio.TO": 112,
254
+ "q-fin.CP": 113,
255
+ "q-fin.EC": 114,
256
+ "q-fin.GN": 115,
257
+ "q-fin.PM": 116,
258
+ "q-fin.RM": 117,
259
+ "q-fin.ST": 118,
260
+ "q-fin.TR": 119,
261
+ "quant-ph": 120,
262
+ "stat.AP": 121,
263
+ "stat.CO": 122,
264
+ "stat.ME": 123,
265
+ "stat.ML": 124,
266
+ "stat.OT": 125
267
+ },
268
+ "layer_norm_eps": 1e-12,
269
+ "max_position_embeddings": 512,
270
+ "model_type": "bert",
271
+ "num_attention_heads": 12,
272
+ "num_hidden_layers": 12,
273
+ "pad_token_id": 0,
274
+ "position_embedding_type": "absolute",
275
+ "problem_type": "multi_label_classification",
276
+ "torch_dtype": "float32",
277
+ "transformers_version": "4.50.1",
278
+ "type_vocab_size": 2,
279
+ "use_cache": true,
280
+ "vocab_size": 30522
281
+ }
powerful_model_abstracts/checkpoint-13472/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31017a376090f42d5ad1bda9cddb0f4ab56464c87a863166e4d426d264e3bbb5
3
+ size 438340072
powerful_model_titles/checkpoint-13472/config.json ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "adap-org",
12
+ "1": "astro-ph",
13
+ "2": "astro-ph.CO",
14
+ "3": "astro-ph.EP",
15
+ "4": "astro-ph.GA",
16
+ "5": "astro-ph.IM",
17
+ "6": "astro-ph.SR",
18
+ "7": "cmp-lg",
19
+ "8": "cond-mat",
20
+ "9": "cond-mat.dis-nn",
21
+ "10": "cond-mat.mtrl-sci",
22
+ "11": "cond-mat.other",
23
+ "12": "cond-mat.soft",
24
+ "13": "cond-mat.stat-mech",
25
+ "14": "cond-mat.supr-con",
26
+ "15": "cs.AI",
27
+ "16": "cs.AR",
28
+ "17": "cs.CC",
29
+ "18": "cs.CE",
30
+ "19": "cs.CG",
31
+ "20": "cs.CL",
32
+ "21": "cs.CR",
33
+ "22": "cs.CV",
34
+ "23": "cs.CY",
35
+ "24": "cs.DB",
36
+ "25": "cs.DC",
37
+ "26": "cs.DL",
38
+ "27": "cs.DM",
39
+ "28": "cs.DS",
40
+ "29": "cs.ET",
41
+ "30": "cs.FL",
42
+ "31": "cs.GL",
43
+ "32": "cs.GR",
44
+ "33": "cs.GT",
45
+ "34": "cs.HC",
46
+ "35": "cs.IR",
47
+ "36": "cs.IT",
48
+ "37": "cs.LG",
49
+ "38": "cs.LO",
50
+ "39": "cs.MA",
51
+ "40": "cs.MM",
52
+ "41": "cs.MS",
53
+ "42": "cs.NA",
54
+ "43": "cs.NE",
55
+ "44": "cs.NI",
56
+ "45": "cs.OH",
57
+ "46": "cs.OS",
58
+ "47": "cs.PF",
59
+ "48": "cs.PL",
60
+ "49": "cs.RO",
61
+ "50": "cs.SC",
62
+ "51": "cs.SD",
63
+ "52": "cs.SE",
64
+ "53": "cs.SI",
65
+ "54": "cs.SY",
66
+ "55": "econ.EM",
67
+ "56": "eess.AS",
68
+ "57": "eess.IV",
69
+ "58": "eess.SP",
70
+ "59": "gr-qc",
71
+ "60": "hep-ex",
72
+ "61": "hep-lat",
73
+ "62": "hep-ph",
74
+ "63": "hep-th",
75
+ "64": "math.AG",
76
+ "65": "math.AP",
77
+ "66": "math.AT",
78
+ "67": "math.CA",
79
+ "68": "math.CO",
80
+ "69": "math.CT",
81
+ "70": "math.DG",
82
+ "71": "math.DS",
83
+ "72": "math.FA",
84
+ "73": "math.GM",
85
+ "74": "math.GN",
86
+ "75": "math.GR",
87
+ "76": "math.GT",
88
+ "77": "math.HO",
89
+ "78": "math.LO",
90
+ "79": "math.MG",
91
+ "80": "math.NA",
92
+ "81": "math.NT",
93
+ "82": "math.OC",
94
+ "83": "math.PR",
95
+ "84": "math.RA",
96
+ "85": "math.RT",
97
+ "86": "math.ST",
98
+ "87": "nlin.AO",
99
+ "88": "nlin.CD",
100
+ "89": "nlin.CG",
101
+ "90": "nlin.PS",
102
+ "91": "nucl-th",
103
+ "92": "physics.ao-ph",
104
+ "93": "physics.bio-ph",
105
+ "94": "physics.chem-ph",
106
+ "95": "physics.class-ph",
107
+ "96": "physics.comp-ph",
108
+ "97": "physics.data-an",
109
+ "98": "physics.gen-ph",
110
+ "99": "physics.geo-ph",
111
+ "100": "physics.hist-ph",
112
+ "101": "physics.ins-det",
113
+ "102": "physics.med-ph",
114
+ "103": "physics.optics",
115
+ "104": "physics.soc-ph",
116
+ "105": "q-bio.BM",
117
+ "106": "q-bio.CB",
118
+ "107": "q-bio.GN",
119
+ "108": "q-bio.MN",
120
+ "109": "q-bio.NC",
121
+ "110": "q-bio.PE",
122
+ "111": "q-bio.QM",
123
+ "112": "q-bio.TO",
124
+ "113": "q-fin.CP",
125
+ "114": "q-fin.EC",
126
+ "115": "q-fin.GN",
127
+ "116": "q-fin.PM",
128
+ "117": "q-fin.RM",
129
+ "118": "q-fin.ST",
130
+ "119": "q-fin.TR",
131
+ "120": "quant-ph",
132
+ "121": "stat.AP",
133
+ "122": "stat.CO",
134
+ "123": "stat.ME",
135
+ "124": "stat.ML",
136
+ "125": "stat.OT"
137
+ },
138
+ "initializer_range": 0.02,
139
+ "intermediate_size": 3072,
140
+ "label2id": {
141
+ "adap-org": 0,
142
+ "astro-ph": 1,
143
+ "astro-ph.CO": 2,
144
+ "astro-ph.EP": 3,
145
+ "astro-ph.GA": 4,
146
+ "astro-ph.IM": 5,
147
+ "astro-ph.SR": 6,
148
+ "cmp-lg": 7,
149
+ "cond-mat": 8,
150
+ "cond-mat.dis-nn": 9,
151
+ "cond-mat.mtrl-sci": 10,
152
+ "cond-mat.other": 11,
153
+ "cond-mat.soft": 12,
154
+ "cond-mat.stat-mech": 13,
155
+ "cond-mat.supr-con": 14,
156
+ "cs.AI": 15,
157
+ "cs.AR": 16,
158
+ "cs.CC": 17,
159
+ "cs.CE": 18,
160
+ "cs.CG": 19,
161
+ "cs.CL": 20,
162
+ "cs.CR": 21,
163
+ "cs.CV": 22,
164
+ "cs.CY": 23,
165
+ "cs.DB": 24,
166
+ "cs.DC": 25,
167
+ "cs.DL": 26,
168
+ "cs.DM": 27,
169
+ "cs.DS": 28,
170
+ "cs.ET": 29,
171
+ "cs.FL": 30,
172
+ "cs.GL": 31,
173
+ "cs.GR": 32,
174
+ "cs.GT": 33,
175
+ "cs.HC": 34,
176
+ "cs.IR": 35,
177
+ "cs.IT": 36,
178
+ "cs.LG": 37,
179
+ "cs.LO": 38,
180
+ "cs.MA": 39,
181
+ "cs.MM": 40,
182
+ "cs.MS": 41,
183
+ "cs.NA": 42,
184
+ "cs.NE": 43,
185
+ "cs.NI": 44,
186
+ "cs.OH": 45,
187
+ "cs.OS": 46,
188
+ "cs.PF": 47,
189
+ "cs.PL": 48,
190
+ "cs.RO": 49,
191
+ "cs.SC": 50,
192
+ "cs.SD": 51,
193
+ "cs.SE": 52,
194
+ "cs.SI": 53,
195
+ "cs.SY": 54,
196
+ "econ.EM": 55,
197
+ "eess.AS": 56,
198
+ "eess.IV": 57,
199
+ "eess.SP": 58,
200
+ "gr-qc": 59,
201
+ "hep-ex": 60,
202
+ "hep-lat": 61,
203
+ "hep-ph": 62,
204
+ "hep-th": 63,
205
+ "math.AG": 64,
206
+ "math.AP": 65,
207
+ "math.AT": 66,
208
+ "math.CA": 67,
209
+ "math.CO": 68,
210
+ "math.CT": 69,
211
+ "math.DG": 70,
212
+ "math.DS": 71,
213
+ "math.FA": 72,
214
+ "math.GM": 73,
215
+ "math.GN": 74,
216
+ "math.GR": 75,
217
+ "math.GT": 76,
218
+ "math.HO": 77,
219
+ "math.LO": 78,
220
+ "math.MG": 79,
221
+ "math.NA": 80,
222
+ "math.NT": 81,
223
+ "math.OC": 82,
224
+ "math.PR": 83,
225
+ "math.RA": 84,
226
+ "math.RT": 85,
227
+ "math.ST": 86,
228
+ "nlin.AO": 87,
229
+ "nlin.CD": 88,
230
+ "nlin.CG": 89,
231
+ "nlin.PS": 90,
232
+ "nucl-th": 91,
233
+ "physics.ao-ph": 92,
234
+ "physics.bio-ph": 93,
235
+ "physics.chem-ph": 94,
236
+ "physics.class-ph": 95,
237
+ "physics.comp-ph": 96,
238
+ "physics.data-an": 97,
239
+ "physics.gen-ph": 98,
240
+ "physics.geo-ph": 99,
241
+ "physics.hist-ph": 100,
242
+ "physics.ins-det": 101,
243
+ "physics.med-ph": 102,
244
+ "physics.optics": 103,
245
+ "physics.soc-ph": 104,
246
+ "q-bio.BM": 105,
247
+ "q-bio.CB": 106,
248
+ "q-bio.GN": 107,
249
+ "q-bio.MN": 108,
250
+ "q-bio.NC": 109,
251
+ "q-bio.PE": 110,
252
+ "q-bio.QM": 111,
253
+ "q-bio.TO": 112,
254
+ "q-fin.CP": 113,
255
+ "q-fin.EC": 114,
256
+ "q-fin.GN": 115,
257
+ "q-fin.PM": 116,
258
+ "q-fin.RM": 117,
259
+ "q-fin.ST": 118,
260
+ "q-fin.TR": 119,
261
+ "quant-ph": 120,
262
+ "stat.AP": 121,
263
+ "stat.CO": 122,
264
+ "stat.ME": 123,
265
+ "stat.ML": 124,
266
+ "stat.OT": 125
267
+ },
268
+ "layer_norm_eps": 1e-12,
269
+ "max_position_embeddings": 512,
270
+ "model_type": "bert",
271
+ "num_attention_heads": 12,
272
+ "num_hidden_layers": 12,
273
+ "pad_token_id": 0,
274
+ "position_embedding_type": "absolute",
275
+ "problem_type": "multi_label_classification",
276
+ "torch_dtype": "float32",
277
+ "transformers_version": "4.50.1",
278
+ "type_vocab_size": 2,
279
+ "use_cache": true,
280
+ "vocab_size": 30522
281
+ }
powerful_model_titles/checkpoint-13472/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:245a73ba7b8f454bf53f90b354101db69b44d9100e152d6abe06dc47a408a548
3
+ size 438340072