File size: 5,491 Bytes
a1ad5de ce0c8db bd0a951 a1ad5de bd0a951 ce0c8db ce32a21 bd0a951 ce0c8db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import numpy as np
import pandas as pd
from languages import en, ru
category_init = "label"
certainty_init = "score"
def filter_results(results: pd.DataFrame, top_k=0.95):
certs = results[certainty_init].to_numpy()
cum_certs = certs.cumsum()
do_no_show = cum_certs < top_k
first_not_to_show_id = np.argmin(do_no_show) + 1
results = results.iloc[:first_not_to_show_id]
results = results[results[certainty_init] >= 0.01]
return results
def process_keys(results: pd.DataFrame, lang):
category = {en: "Category", ru: "Категория"}
certainty = {en: "Certainty", ru: "Уверенность"}
results = results.rename(
columns={
category_init: category.get(lang, category[en]),
certainty_init: certainty.get(lang, certainty[en]),
}
)
return results
def process_categories(results, lang):
categories = {
en: {
"math": "Math",
"astro-ph": "Astrophysics",
"cond-mat": "Condensed matter physics",
"hep-ph": "High energy physics -- Phenomenology",
"physics": "Physics",
"hep-th": "High energy physics -- Theory",
"cs": "Computer Science",
"quant-ph": "Quantum physics",
"gr-qc": "General Relativity and Quantum Cosmology",
"math-ph": "Mathematical Physics",
"nucl-th": "Nuclear Theory",
"eess": "Electrical Engineering and Systems Science",
"q-bio": "Quantitative Biology",
"nlin": "Nonlinear Sciences",
"stat": "Statistics",
"hep-lat": "High Energy Physics - Lattice",
"hep-ex": "High Energy Physics - Experiment",
"nucl-ex": "Nuclear Experiment",
"econ": "Economins",
"q-alg": "Quantum Algebra",
"q-fin": "Quantitative Finance",
"alg-geom": "Algebraic Geometry",
"supr-con": "Superconductivity",
"chao-dyn": "Chaotic dynamics",
"dg-ga": "Differential Geometry",
"funct-an": "Functional analysis",
"atom-ph": "Atomic physics",
"chem-ph": "Chemical Physics",
"ao-sci": "Atmospheric and Oceanic Physics",
"acc-phys": "Accelerator Physics",
"bayes-an": "Bayesian statistics",
"plasm-ph": "Plasma Physics",
},
ru: {
"math": "Математика",
"astro-ph": "Астрофизика",
"cond-mat": "Физика конденсированного состояния",
"hep-ph": "Физика элементарных частиц -- Феноменология",
"physics": "Физика",
"hep-th": "Физика элементарных частиц -- Теория",
"cs": "Компьютерные науки",
"quant-ph": "Квантовая физика",
"gr-qc": "Общая теория относительности и квантовая космология",
"math-ph": "Математическая физика",
"nucl-th": "Ядерная физика",
"eess": "Электротехника и системоведение",
"q-bio": "Количественная биология",
"nlin": "Нелинейные науки",
"stat": "Статистика",
"hep-lat": "Физика элементарных частиц -- Решетки",
"hep-ex": "Экспериментальная физика элементарных частиц",
"nucl-ex": "Ядерный эксперимент",
"econ": "Экономика",
"q-alg": "Квантовая алгебра",
"q-fin": "Количественные финансы",
"alg-geom": "Алгебраическая геометрия",
"supr-con": "Сверхпроводимость",
"chao-dyn": "Теория хаоса",
"dg-ga": "Дифференциальная геометрия",
"funct-an": "Функциональный анализ",
"atom-ph": "Атомная физика",
"chem-ph": "Химическая физика",
"ao-sci": "Физика атмосферы и океана",
"acc-phys": "Физика ускорителей",
"bayes-an": "Байесовская статистика",
"plasm-ph": "Физика плазмы",
},
}
def process_category(category):
if "." in category:
category = category[: category.index(".")]
return categories.get(lang, {}).get(category, category)
results[category_init] = results[category_init].apply(process_category)
return results
def process_certainities(results):
results[certainty_init] = results[certainty_init].apply(
lambda certainty: "{0:0.2f}%".format(100 * certainty)
)
return results
def process_results(results, lang):
results = pd.DataFrame(results)
results = process_categories(results, lang)
results = results.groupby(by=category_init, as_index=False).sum()
results = results.sort_values(by=[certainty_init], ascending=False)
results = filter_results(results)
results = process_certainities(results)
results = process_keys(results, lang)
return results
|