File size: 3,223 Bytes
2a97daa
 
 
 
 
 
1a4bd79
 
2a97daa
1a4bd79
 
c4c785c
 
2a97daa
c4c785c
 
 
 
 
2a97daa
 
 
1a4bd79
 
 
 
2a97daa
 
 
 
 
 
 
b00df13
2a97daa
 
1a4bd79
c4c785c
 
2a97daa
 
1a4bd79
2a97daa
 
c4c785c
 
2a97daa
c4c785c
 
 
 
2a97daa
c4c785c
 
 
 
 
 
 
 
1a4bd79
c4c785c
 
 
 
 
dc91b3c
 
c4c785c
dc91b3c
c4c785c
1a4bd79
2a97daa
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from configs.download_files import FileDownloader
from configs.db_configs import add_one_item
from streamlit.components.v1 import html
from configs.html_features import set_image
from sacrebleu.compat import corpus_bleu  
import pandas as pd



def translate_text_to_text(text, source_lang, target_lang):
    prefix = f'translate {source_lang} to {target_lang}: '
    text = prefix + text
    tokenizer = AutoTokenizer.from_pretrained('stevhliu/my_awesome_opus_books_model')
    input_ids = tokenizer(text, return_tensors='pt').input_ids
    model = AutoModelForSeq2SeqLM.from_pretrained('stevhliu/my_awesome_opus_books_model')
    output_ids = model.generate(input_ids, max_new_tokens=len(input_ids[0]) * 3, do_sample=False, top_k=30, top_p=0.95)
    translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return translated_text


def validate_translation(original_text, translated_text):
    return corpus_bleu(translated_text, [original_text])


def main():
    st.title('Text Translator')
    im1, im2, im3 = st.columns([1, 5.3, 1])
    with im1:
        pass
    with im2:
        url = "https://i.postimg.cc/jdF1hPng/combined.png"
        html(set_image(url), height=400, width=400)
    with im3:
        pass
    
    languages = ['English', 'French']
    source_lang = st.sidebar.selectbox('Source Language', languages)
    target_lang = st.sidebar.selectbox('Target Language', languages, index=1)
    text = st.text_area('Text Translator', placeholder='Enter your input text here ...', height=200, label_visibility='hidden')

    if st.button('translate it'):
        if text != '':
            if (source_lang == 'English' and target_lang == 'English') or (source_lang == 'French' and target_lang == 'French'):
                st.error('Expected different values for source and target languages, but got the same values!')

            else:
                with st.expander('Original Text'):
                    st.write(text)
                    add_one_item(text, 'Text Translator')

                with st.expander('Translated Text'):
                    translated_text = translate_text_to_text(text, source_lang, target_lang)
                    st.write(translated_text)
                
                col1, col2 = st.columns(2)
                with col1:
                    with st.expander('Download Translated Text'):
                        FileDownloader(translated_text, 'txt').download()

                with col2:
                    with st.expander('Translated Text Validation'):
                        bleu_score = validate_translation(text, translated_text)
                        df = pd.DataFrame({
                            'Brevity Penalty' : bleu_score.bp,
                            'length of original text' : bleu_score.ref_len,
                            'length of translated text' : bleu_score.sys_len,
                            'Ratio' : bleu_score.ratio
                        }, index=[1])
                        st.dataframe(df)

        else:
            st.error('Please enter a non-empty text.')
                

if __name__ == '__main__':
    main()