Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import os | |
import torch | |
from grobidmonkey import reader | |
from transformers import pipeline | |
from transformers import BartTokenizer, BartModel, BartForConditionalGeneration | |
from transformers import T5Tokenizer, T5ForConditionalGeneration | |
from document import Document | |
from BartSE import BARTAutoEncoder | |
def save_uploaded_file(uploaded_file): | |
file_path = os.path.join("./uploads", uploaded_file.name) | |
os.makedirs("./uploads", exist_ok=True) # Create 'uploads' directory if it doesn't exist | |
with open(file_path, "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
return file_path # Return the file path as a string | |
st.title('Paper2Slides') | |
st.subheader('Upload paper in pdf format') | |
col1, col2 = st.columns([3, 1]) | |
with col1: | |
uploaded_file = st.file_uploader("Choose a file") | |
with col2: | |
option = st.selectbox( | |
'Select parsing method.', | |
('monkey', 'x2d', 'lxml')) | |
if uploaded_file is not None: | |
st.write(uploaded_file.name) | |
bytes_data = uploaded_file.getvalue() | |
st.write(len(bytes_data), "bytes") | |
saved_file_path = save_uploaded_file(uploaded_file) | |
monkeyReader = reader.MonkeyReader(option) | |
outline = monkeyReader.readOutline(saved_file_path) | |
for pre, fill, node in outline: | |
st.write("%s%s" % (pre, node.name)) | |
# read paper content | |
essay = monkeyReader.readEssay(saved_file_path) | |
for key, values in essay.items(): | |
st.write(f"{key}: {', '.join(values)}") | |
# with st.status("Understanding paper..."): | |
# Barttokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn') | |
# summ_model_path = 'com3dian/Bart-large-paper2slides-summarizer' | |
# summarizor = BartForConditionalGeneration.from_pretrained(summ_model_path) | |
# exp_model_path = 'com3dian/Bart-large-paper2slides-expander' | |
# expandor = BartForConditionalGeneration.from_pretrained(exp_model_path) | |
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# BartSE = BARTAutoEncoder(summarizor, summarizor, device) | |
# del summarizor, expandor | |
# document = Document(essay, Barttokenizer) | |
# del Barttokenizer | |
# length = document.merge(25, 30, BartSE, device) | |
# with st.status("Generating slides..."): | |
# summarizor = pipeline("summarization", model=summ_model_path, device = device) | |
# summ_text = summarizor(document.segmentation['text'], max_length=100, min_length=10, do_sample=False) | |
# summ_text = [text['summary_text'] for text in summ_text] | |
# for summ in summ_text: | |
# st.write(summ) | |