taatiknet / app.py
malper's picture
multiword
b4a47e2
raw
history blame
691 Bytes
import streamlit as st
from transformers import pipeline
import unicodedata
def normalize(text):
return unicodedata.normalize('NFC', text
).replace('\u05ba', '\u05b9'
).replace('\u05be', '-'
).replace('״', '"'
).replace("׳", "'")
with st.spinner('Loading TaatikNet framework...'):
pipe = pipeline("text2text-generation", model='malper/taatiknet', device_map="auto")
st.success('Loaded!')
text = st.text_area('Enter text and press ctrl/command+enter:')
if text:
words = [normalize(x) for x in text.split()]
outputs = pipe(words, max_length=40)
output_text = ' '.join([x['generated_text'] for x in outputs])
st.write(output_text)