Spaces:
Runtime error
Runtime error
File size: 3,038 Bytes
e539b70 bfbd0a1 78a71e8 e539b70 78a71e8 f37c0ba e539b70 bfbd0a1 78a71e8 e539b70 bfbd0a1 e539b70 bfbd0a1 e539b70 bfbd0a1 e539b70 bfbd0a1 78a71e8 bfbd0a1 78a71e8 bfbd0a1 78a71e8 bfbd0a1 78a71e8 bfbd0a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import streamlit as st
from extractor import extract, FewDocumentsError
from summarizer import summarize
import time
import cProfile
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
@st.cache(allow_output_mutation=True)
def init():
# Dowload required NLTK resources
from nltk import download
download('punkt')
download('stopwords')
device = "cuda" if torch.cuda.is_available() else "cpu"
# Model for semantic searches
search_model = SentenceTransformer('msmarco-distilbert-base-v4', device=device)
# Model for abstraction
summ_model = AutoModelForSeq2SeqLM.from_pretrained('t5-base')
tokenizer = AutoTokenizer.from_pretrained('t5-base')
return search_model, summ_model, tokenizer
# TODO: translation
def main():
search_model, summ_model, tokenizer = init()
st.title("Trabalho de Formatura - Construindo textos para a internet")
st.subheader("Lucas Antunes e Matheus Vieira")
st.subheader("Digite o tópico sobre o qual você deseja gerar um resumo")
query = st.text_input('Digite o tópico em inglês') #text is stored in this variable
if 'few_documents' not in st.session_state:
st.session_state['few_documents'] = False
few_documents = False
else:
few_documents = st.session_state['few_documents']
button1 = st.button('Gerar resumo')
if button1:
start_time = time.time()
try:
with st.spinner('Extraindo textos relevantes...'):
text = extract(query, search_model=search_model)
except FewDocumentsError as e:
few_documents = True
st.session_state['few_documents'] = True
st.session_state['documents'] = e.documents
st.session_state['msg'] = e.msg
else:
st.info(f'(Extraction) Elapsed time: {time.time() - start_time:.2f}s')
with st.spinner('Gerando resumo...'):
summary = summarize(text, summ_model, tokenizer)
st.info(f'(Total) Elapsed time: {time.time() - start_time:.2f}s')
st.markdown(f'Seu resumo para "{query}":\n\n> {summary}')
if few_documents:
st.warning(st.session_state['msg'])
if st.button('Prosseguir'):
start_time = time.time()
with st.spinner('Extraindo textos relevantes...'):
text = extract(query, search_model=search_model, extracted_documents=st.session_state['documents'])
st.info(f'(Extraction) Elapsed time: {time.time() - start_time:.2f}s')
with st.spinner('Gerando resumo...'):
summary = summarize(text, summ_model, tokenizer)
st.info(f'(Total) Elapsed time: {time.time() - start_time:.2f}s')
st.markdown(f'Seu resumo para "{query}":\n\n> {summary}')
st.session_state['few_documents'] = False
few_documents = False
if __name__ == '__main__':
cProfile.run('main()', 'stats.txt') |