File size: 3,199 Bytes
45110eb 4975d28 45110eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import logging
import warnings
import wikipedia
import streamlit as st
from typing import List
from scanner_utils import *
from xgboost import XGBClassifier
from streamlit_searchbox import st_searchbox
from transformers import logging as hflogging
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)
st.set_page_config(layout="centered", page_title="Egyptian Wikipedia Scanner", page_icon="πͺπ¬")
with open('.streamlit/style.css') as f:
st.markdown(f'<style>{}</style>', unsafe_allow_html=True)
<h1 style='text-align: center';>Egyptian Arabic Wikipedia Scanner</h1>
<h5 style='text-align: center';>Automatic Detection of Template-translated Articles in the Egyptian Wikipedia</h5>
""", unsafe_allow_html=True)
st.markdown("", unsafe_allow_html=True)
def search_wikipedia(searchterm: str) -> List[any]:
return if searchterm else []
def load_xgb_model(model):
loaded_xgb_classifier = XGBClassifier()
return loaded_xgb_classifier
selected_title = st_searchbox(search_wikipedia, label="Search for an article in Egyptian Arabic Wikipedia:",
placeholder="Search for an article", rerun_on_update=True, clear_on_submit=False, key="wiki_searchbox")
if selected_title:
X, article, dataframe, selected_title = prepare_features(selected_title)
st.write(f':black_small_square: Collected Metadata of **{selected_title}**')
st.dataframe(dataframe, hide_index=True , use_container_width=True)
loaded_xgb_classifier = load_xgb_model("XGBoost.model")
id2label = {0:'Human-generated Article', 1:'Template-translated Article'}
result = id2label[int(loaded_xgb_classifier.predict(X))]
if result =='Human-generated Article':
st.write(f":black_small_square: Automatic Classification of **{selected_title}**")
st.success(result, icon="β
st.write(f":black_small_square: Automatic Classification of **{selected_title}**")
st.error(result, icon="π¨")
st.write(f":black_small_square: Full Summary of **{selected_title}**")
with st.expander(f'**{selected_title}**', expanded=True):
st.markdown('<style>p {text-align: justify;}</style>', unsafe_allow_html=True)
article_text = wikipedia.summary(selected_title)
except wikipedia.exceptions.DisambiguationError as e:
article_text = wikipedia.summary(e.options[0])
st.write(f'> :globe_with_meridians: Read Full Text of **{selected_title}**: <br>{article.url}', unsafe_allow_html=True)
st.markdown('<br><br>', unsafe_allow_html=True)
<div class="footer"> <p class="p1">Copyright Β© 2024 by *****************<br>Hosted with Hugging Face Spaces π€</p> </div>
st.markdown(footer, unsafe_allow_html=True) |