import numpy as np import pandas as pd from languages import en, ru category_init = "label" certainty_init = "score" def filter_results(results: pd.DataFrame, top_k=0.95): certs = results[certainty_init].to_numpy() cum_certs = certs.cumsum() do_no_show = cum_certs < top_k first_not_to_show_id = np.argmin(do_no_show) + 1 results = results.iloc[:first_not_to_show_id] results = results[results[certainty_init] >= 0.01] return results def process_keys(results: pd.DataFrame, lang): category = {en: "Category", ru: "Категория"} certainty = {en: "Certainty", ru: "Уверенность"} results = results.rename( columns={ category_init: category.get(lang, category[en]), certainty_init: certainty.get(lang, certainty[en]), } ) return results def process_categories(results, lang): categories = { en: { "math": "Math", "astro-ph": "Astrophysics", "cond-mat": "Condensed matter physics", "hep-ph": "High energy physics -- Phenomenology", "physics": "Physics", "hep-th": "High energy physics -- Theory", "cs": "Computer Science", "quant-ph": "Quantum physics", "gr-qc": "General Relativity and Quantum Cosmology", "math-ph": "Mathematical Physics", "nucl-th": "Nuclear Theory", "eess": "Electrical Engineering and Systems Science", "q-bio": "Quantitative Biology", "nlin": "Nonlinear Sciences", "stat": "Statistics", "hep-lat": "High Energy Physics - Lattice", "hep-ex": "High Energy Physics - Experiment", "nucl-ex": "Nuclear Experiment", "econ": "Economins", "q-alg": "Quantum Algebra", "q-fin": "Quantitative Finance", "alg-geom": "Algebraic Geometry", "supr-con": "Superconductivity", "chao-dyn": "Chaotic dynamics", "dg-ga": "Differential Geometry", "funct-an": "Functional analysis", "atom-ph": "Atomic physics", "chem-ph": "Chemical Physics", "ao-sci": "Atmospheric and Oceanic Physics", "acc-phys": "Accelerator Physics", "bayes-an": "Bayesian statistics", "plasm-ph": "Plasma Physics", }, ru: { "math": "Математика", "astro-ph": "Астрофизика", "cond-mat": "Физика конденсированного состояния", "hep-ph": "Физика элементарных частиц -- Феноменология", "physics": "Физика", "hep-th": "Физика элементарных частиц -- Теория", "cs": "Компьютерные науки", "quant-ph": "Квантовая физика", "gr-qc": "Общая теория относительности и квантовая космология", "math-ph": "Математическая физика", "nucl-th": "Ядерная физика", "eess": "Электротехника и системоведение", "q-bio": "Количественная биология", "nlin": "Нелинейные науки", "stat": "Статистика", "hep-lat": "Физика элементарных частиц -- Решетки", "hep-ex": "Экспериментальная физика элементарных частиц", "nucl-ex": "Ядерный эксперимент", "econ": "Экономика", "q-alg": "Квантовая алгебра", "q-fin": "Количественные финансы", "alg-geom": "Алгебраическая геометрия", "supr-con": "Сверхпроводимость", "chao-dyn": "Теория хаоса", "dg-ga": "Дифференциальная геометрия", "funct-an": "Функциональный анализ", "atom-ph": "Атомная физика", "chem-ph": "Химическая физика", "ao-sci": "Физика атмосферы и океана", "acc-phys": "Физика ускорителей", "bayes-an": "Байесовская статистика", "plasm-ph": "Физика плазмы", }, } def process_category(category): if "." in category: category = category[: category.index(".")] return categories.get(lang, {}).get(category, category) results[category_init] = results[category_init].apply(process_category) return results def process_certainities(results): results[certainty_init] = results[certainty_init].apply( lambda certainty: "{0:0.2f}%".format(100 * certainty) ) return results def process_results(results, lang): results = pd.DataFrame(results) results = process_categories(results, lang) results = results.groupby(by=category_init, as_index=False).sum() results = results.sort_values(by=[certainty_init], ascending=False) results = filter_results(results) results = process_certainities(results) results = process_keys(results, lang) return results