Spaces:
Runtime error
Runtime error
File size: 3,665 Bytes
e539b70 c16fec3 e539b70 bfbd0a1 78a71e8 e539b70 78a71e8 f37c0ba bfbd0a1 78a71e8 e539b70 c16fec3 e539b70 c16fec3 e539b70 bfbd0a1 e539b70 bfbd0a1 c16fec3 bfbd0a1 c16fec3 bfbd0a1 78a71e8 bfbd0a1 78a71e8 bfbd0a1 c16fec3 bfbd0a1 78a71e8 bfbd0a1 78a71e8 bfbd0a1 c16fec3 bfbd0a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import streamlit as st
from extractor import extract, FewDocumentsError
from summarizer import summarize
from translation import translate
import time
import cProfile
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
@st.cache(allow_output_mutation=True)
def init():
# Dowload required NLTK resources
from nltk import download
download('punkt')
download('stopwords')
device = "cuda" if torch.cuda.is_available() else "cpu"
# Model for semantic searches
search_model = SentenceTransformer('msmarco-distilbert-base-v4', device=device)
# Model for abstraction
summ_model = AutoModelForSeq2SeqLM.from_pretrained('t5-base')
tokenizer = AutoTokenizer.from_pretrained('t5-base')
return search_model, summ_model, tokenizer
def main():
search_model, summ_model, tokenizer = init()
st.title("AutoSumm")
st.subheader("Lucas Antunes & Matheus Vieira")
portuguese = st.checkbox('Traduzir para o português.')
if portuguese:
st.subheader("Digite o tópico sobre o qual você deseja gerar um resumo")
query_pt = st.text_input('Digite o tópico') #text is stored in this variable
button = st.button('Gerar resumo')
else:
st.subheader("Type the desired topic to generate the summary")
query = st.text_input('Type your topic') #text is stored in this variable
button = st.button('Generate summary')
if 'few_documents' not in st.session_state:
st.session_state['few_documents'] = False
few_documents = False
else:
few_documents = st.session_state['few_documents']
if button:
start_time = time.time()
query = translate(query_pt, 'pt', 'en') if portuguese else query
try:
with st.spinner('Extraindo textos relevantes...'):
text = extract(query, search_model=search_model)
except FewDocumentsError as e:
few_documents = True
st.session_state['few_documents'] = True
st.session_state['documents'] = e.documents
st.session_state['msg'] = e.msg
else:
st.info(f'(Extraction) Elapsed time: {time.time() - start_time:.2f}s')
with st.spinner('Gerando resumo...'):
summary = summarize(text, summ_model, tokenizer)
st.info(f'(Total) Elapsed time: {time.time() - start_time:.2f}s')
if portuguese:
st.markdown(f'Seu resumo para "{query_pt}":\n\n> {translate(summary, "en", "pt")}')
else:
st.markdown(f'Your summary for "{query}":\n\n> {summary}')
if few_documents:
st.warning(st.session_state['msg'])
if st.button('Prosseguir'):
start_time = time.time()
with st.spinner('Extraindo textos relevantes...'):
text = extract(query, search_model=search_model, extracted_documents=st.session_state['documents'])
st.info(f'(Extraction) Elapsed time: {time.time() - start_time:.2f}s')
with st.spinner('Gerando resumo...'):
summary = summarize(text, summ_model, tokenizer)
st.info(f'(Total) Elapsed time: {time.time() - start_time:.2f}s')
if portuguese:
st.markdown(f'Seu resumo para "{query_pt}":\n\n> {translate(summary, "en", "pt")}')
else:
st.markdown(f'Your summary for "{query}":\n\n> {summary}')
st.session_state['few_documents'] = False
few_documents = False
if __name__ == '__main__':
cProfile.run('main()', 'stats.txt') |