Spaces:

poedator
/

scence_article_topics

Runtime error

App Files Files Community

Poe Dator commited on Mar 27, 2022

Commit

b339b00

1 Parent(s): 90cb2de

inference code, init version

Browse files

Files changed (1) hide show

app.py +66 -1

app.py CHANGED Viewed

@@ -1,9 +1,74 @@
 import streamlit as st
 st.markdown("### Privet, mir!")
 st.markdown("<img width=200px src='https://i.pinimg.com/736x/11/33/19/113319f0ffe91f4bb0f468914b9916da.jpg'>", unsafe_allow_html=True)
 text = st.text_area("ENTER TEXT HERE")
 t2 = text.upper()
 st.markdown(f"{t2}")

 import streamlit as st
+import torch
+from torch import nn
+from transformers import BertModel, AutoTokenizer, AutoModel, pipeline
+# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+device = 'cpu'
 st.markdown("### Privet, mir!")
 st.markdown("<img width=200px src='https://i.pinimg.com/736x/11/33/19/113319f0ffe91f4bb0f468914b9916da.jpg'>", unsafe_allow_html=True)
 text = st.text_area("ENTER TEXT HERE")
 t2 = text.upper()
 st.markdown(f"{t2}")
+# dict for decoding / enclding labels
+labels = {'cs.NE': 0, 'cs.CL': 1, 'cs.AI': 2, 'stat.ML': 3, 'cs.CV': 4, 'cs.LG': 5}
+labels_decoder = {'cs.NE': 'Neural and Evolutionary Computing', 'cs.CL': 'Computation and Language', 'cs.AI': 'Artificial Intelligence',
+ 'stat.ML': 'Machine Learning (stat)', 'cs.CV': 'Computer Vision', 'cs.LG': 'Machine Learning'}
+model_name = 'bert-base-uncased'
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+class BertClassifier(nn.Module):
+    def __init__(self, n_classes, dropout=0.5, model_name='bert-base-uncased'):
+        super(BertClassifier, self).__init__()
+        self.bert = BertModel.from_pretrained(model_name)
+        self.dropout = nn.Dropout(dropout)
+        self.linear = nn.Linear(768, n_classes)
+        self.relu = nn.ReLU()
+    def forward(self, input_id, mask):
+        _, pooled_output = self.bert(input_ids=input_id, attention_mask=mask,return_dict=False)
+        dropout_output = self.dropout(pooled_output)
+        linear_output = self.linear(dropout_output)
+        final_layer = self.relu(linear_output)
+        return final_layer
+model = BertClassifier(n_classes=len(labels))
+model.load_state_dict(torch.load('model_weights_1.pt'))
+model.eval()
+def inference(txt, mode=None):
+    # infers classes for text topic based on the trained model from above
+    # has separate mode 'print' for just output
+    txt = txt.lower().replace('\n', '')
+    t2 = tokenizer(txt,
+       padding='max_length', max_length = 512, truncation=True,
+       return_tensors="pt")
+    inp2 =  t2['input_ids'].to(device)
+    mask2 = t2['attention_mask'].unsqueeze(0).to(device)
+    out = model(inp2, mask2)
+    out = out.cpu().detach().numpy().reshape(-1)
+    out = out/out.sum() * 100
+    res = [(l, o) for l, o in zip (list(labels.keys()), out.tolist())]
+    if mode == 'print':
+        res.sort(key = lambda x : - x[1])
+        for lbl, score in res:
+            if score >=1:
+                print(f"[{lbl:<7}] {labels_decoder[lbl]:<35}  {score:.1f}%")
+    elif mode == 'debug':
+        return out, res
+    else:
+        return res
+res = inference(text, mode=None)
+st.markdown(f"{res}")