import streamlit as st from transformers import pipeline @st.cache_resource # кэширование def load_model(): return pipeline("text-classification", model="voroninip/bert-paper-classifier-arxiv", top_k=None) model = load_model() def top_pct(preds, threshold=.95): preds = sorted(preds, key=lambda x: -x["score"]) cum_score = 0 for i, item in enumerate(preds): cum_score += item["score"] if cum_score >= threshold: break preds = preds[:(i+1)] return preds def format_predictions(preds) -> str: """ Prepare predictions and their scores for printing to the user """ out = "" for i, item in enumerate(preds): out += f"{i+1}. {item['label']} (score {item['score']:.2f})\n" return out st.markdown( """ """, unsafe_allow_html=True ) st.markdown("""

""", unsafe_allow_html=True) st.markdown("""

🚀 arXiv paper categories predictor

""", unsafe_allow_html=True) st.markdown("""

Paste Title and Abstract of the paper and get most likely categories of the paper in the arXiv taxonomy

""", unsafe_allow_html=True) title = st.text_input("Title", value="") abstract = st.text_input("Abstract", value="") st.markdown("""

Most likely categories of the paper:

""", unsafe_allow_html=True) query = title + '\n' + abstract if title or abstract: result = format_predictions(top_pct(model(query)[0])) st.write(result)