File size: 3,038 Bytes
e539b70
 
 
 
bfbd0a1
78a71e8
 
 
e539b70
78a71e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f37c0ba
e539b70
 
bfbd0a1
78a71e8
e539b70
bfbd0a1
 
e539b70
bfbd0a1
 
e539b70
bfbd0a1
e539b70
 
bfbd0a1
 
 
 
 
 
 
 
 
78a71e8
bfbd0a1
 
 
 
 
 
 
 
 
78a71e8
bfbd0a1
 
 
 
 
 
 
 
 
 
78a71e8
bfbd0a1
 
78a71e8
bfbd0a1
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import streamlit as st
from extractor import extract, FewDocumentsError
from summarizer import summarize
import time
import cProfile
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

@st.cache(allow_output_mutation=True)
def init():
    # Dowload required NLTK resources
    from nltk import download
    download('punkt')
    download('stopwords')

    device = "cuda" if torch.cuda.is_available() else "cpu"
    # Model for semantic searches
    search_model = SentenceTransformer('msmarco-distilbert-base-v4', device=device)
    # Model for abstraction
    summ_model = AutoModelForSeq2SeqLM.from_pretrained('t5-base')
    tokenizer = AutoTokenizer.from_pretrained('t5-base')

    return search_model, summ_model, tokenizer

# TODO: translation

def main():
    search_model, summ_model, tokenizer = init()

    st.title("Trabalho de Formatura - Construindo textos para a internet")
    st.subheader("Lucas Antunes e Matheus Vieira")

    st.subheader("Digite o tópico sobre o qual você deseja gerar um resumo")
    query = st.text_input('Digite o tópico em inglês') #text is stored in this variable

    if 'few_documents' not in st.session_state:
        st.session_state['few_documents'] = False
        few_documents = False
    else:
        few_documents = st.session_state['few_documents']

    button1 = st.button('Gerar resumo')

    if button1:
        start_time = time.time()
        try:
            with st.spinner('Extraindo textos relevantes...'):
                text = extract(query, search_model=search_model)
        except FewDocumentsError as e:
            few_documents = True
            st.session_state['few_documents'] = True
            st.session_state['documents'] = e.documents
            st.session_state['msg'] = e.msg
        else:

            st.info(f'(Extraction) Elapsed time: {time.time() - start_time:.2f}s')
            with st.spinner('Gerando resumo...'):
                summary = summarize(text, summ_model, tokenizer)
            st.info(f'(Total) Elapsed time: {time.time() - start_time:.2f}s')

            st.markdown(f'Seu resumo para "{query}":\n\n> {summary}')


    if few_documents:
        st.warning(st.session_state['msg'])
        if st.button('Prosseguir'):
            start_time = time.time()
            with st.spinner('Extraindo textos relevantes...'):
                text = extract(query, search_model=search_model, extracted_documents=st.session_state['documents'])
            st.info(f'(Extraction) Elapsed time: {time.time() - start_time:.2f}s')
            with st.spinner('Gerando resumo...'):
                summary = summarize(text, summ_model, tokenizer)
            st.info(f'(Total) Elapsed time: {time.time() - start_time:.2f}s')

            st.markdown(f'Seu resumo para "{query}":\n\n> {summary}')

            st.session_state['few_documents'] = False
            few_documents = False
            
if __name__ == '__main__':
    cProfile.run('main()', 'stats.txt')