import streamlit as st import pandas as pd import numpy as np import os from grobidmonkey import reader from transformers import pipeline from transformers import BartTokenizer, BartModel, BartForConditionalGeneration from transformers import T5Tokenizer, T5ForConditionalGeneration from document import Document from BartSE import BARTAutoEncoder def save_uploaded_file(uploaded_file): file_path = os.path.join("./uploads", uploaded_file.name) os.makedirs("./uploads", exist_ok=True) # Create 'uploads' directory if it doesn't exist with open(file_path, "wb") as f: f.write(uploaded_file.getbuffer()) return file_path # Return the file path as a string st.title('Paper2Slides') st.subheader('Upload paper in pdf format') col1, col2 = st.columns([3, 1]) with col1: uploaded_file = st.file_uploader("Choose a file") with col2: option = st.selectbox( 'Select parsing method.', ('monkey', 'x2d', 'lxml')) if uploaded_file is not None: st.write(uploaded_file.name) bytes_data = uploaded_file.getvalue() st.write(len(bytes_data), "bytes") saved_file_path = save_uploaded_file(uploaded_file) monkeyReader = reader.MonkeyReader(option) outline = monkeyReader.readOutline(saved_file_path) for pre, fill, node in outline: st.write("%s%s" % (pre, node.name)) # read paper content essay = monkeyReader.readEssay(saved_file_path) for key, values in essay.items(): st.write(f"{key}: {', '.join(values)}") Barttokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn') summ_model_path = 'com3dian/Bart-large-paper2slides-summarizer' summarizor = BartForConditionalGeneration.from_pretrained(summ_model_path) exp_model_path = 'com3dian/Bart-large-paper2slides-expander' expandor = BartForConditionalGeneration.from_pretrained(exp_model_path) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") BartSE = BARTAutoEncoder(summarizor, summarizor, device) del summarizor, expandor document = Document(article, Barttokenizer) del Barttokenizer length = document.merge(10, 30, BartSE, device) summarizor = pipeline("summarization", model=summ_model_path, device = 0) summ_text = summarizor(document.segmentation['text'], max_length=100, min_length=10, do_sample=False) summ_text = [text['summary_text'] for text in summ_text]