doc-to-slides / app.py
com3dian's picture
Update app.py
0265ddb verified
raw
history blame
4.91 kB
import streamlit as st
import pandas as pd
import numpy as np
import os
import pickle
import torch
from grobidmonkey import reader
from transformers import pipeline
from transformers import BartTokenizer, BartModel, BartForConditionalGeneration
from transformers import T5Tokenizer, T5ForConditionalGeneration
from document import Document
from BartSE import BARTAutoEncoder
def save_uploaded_file(uploaded_file):
file_path = os.path.join("./uploads", uploaded_file.name)
os.makedirs("./uploads", exist_ok=True) # Create 'uploads' directory if it doesn't exist
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
return file_path # Return the file path as a string
st.title('Paper2Slides')
st.subheader('Upload paper in pdf format')
# col1, col2 = st.columns([3, 1])
# with col1:
# uploaded_file = st.file_uploader("Choose a file")
# with col2:
# option = st.selectbox(
# 'Select parsing method.',
# ('monkey', 'x2d', 'lxml'))
# if uploaded_file is not None:
# st.write(uploaded_file.name)
# bytes_data = uploaded_file.getvalue()
# st.write(len(bytes_data), "bytes")
# saved_file_path = save_uploaded_file(uploaded_file)
# monkeyReader = reader.MonkeyReader(option)
# outline = monkeyReader.readOutline(saved_file_path)
# for pre, fill, node in outline:
# st.write("%s%s" % (pre, node.name))
# # read paper content
# essay = monkeyReader.readEssay(saved_file_path)
# with st.status("Understanding paper..."):
# Barttokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
# summ_model_path = 'com3dian/Bart-large-paper2slides-summarizer'
# summarizor = BartForConditionalGeneration.from_pretrained(summ_model_path)
# exp_model_path = 'com3dian/Bart-large-paper2slides-expander'
# expandor = BartForConditionalGeneration.from_pretrained(exp_model_path)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# BartSE = BARTAutoEncoder(summarizor, summarizor, device)
# del summarizor, expandor
# document = Document(essay, Barttokenizer)
# del Barttokenizer
# length = document.merge(25, 30, BartSE, device)
# with st.status("Generating slides..."):
# summarizor = pipeline("summarization", model=summ_model_path, device = device)
# summ_text = summarizor(document.segmentation['text'], max_length=100, min_length=10, do_sample=False)
# summ_text = [text['summary_text'] for text in summ_text]
# for summ in summ_text:
# st.write(summ)
with open('slides_text.pkl', 'rb') as file:
summ_text = pickle.load(file)
# Function to render HTML content
def format(text_list):
format_list = []
for text in text_list:
# Split text by periods
sentences = text.split('.')
# Create HTML list items
list_items = "".join([f"- {sentence.strip()}.\n" for sentence in sentences if sentence.strip()])
format_list.append(list_items)
return format_list
# Initialize session state for page index and text
if 'page_index' not in st.session_state:
st.session_state.page_index = 0
if 'summ_text' not in st.session_state:
st.session_state.summ_text = format(summ_text)
if 'current_text' not in st.session_state:
st.session_state.current_text = st.session_state.summ_text[st.session_state.page_index]
# Function to handle page turn
def turn_page(direction):
if direction == "next" and st.session_state.page_index < len(summ_text) - 1:
st.session_state.page_index += 1
elif direction == "prev" and st.session_state.page_index > 0:
st.session_state.page_index -= 1
st.session_state.current_text = st.session_state.summ_text[st.session_state.page_index]
# Function to update the current text based on text_area changes
def update_text():
st.session_state.summ_text[st.session_state.page_index] = st.session_state.text_area_value
st.session_state.current_text = st.session_state.text_area_value
# Display editable text box
text = st.text_area("Edit Text", st.session_state.current_text, height=200, key="text_area_value", on_change=update_text)
# Display page turner controls
col1, col2, col3 = st.columns([2.25, 12, 1.7])
# Previous button in col1
with col1:
st.button("Previous", on_click=turn_page, args=("prev",))
# Center aligned text in col2
with col2:
st.markdown(
f'<div style="display: flex; justify-content: center; align-items: center; height: 100%;">'
f'Page {st.session_state.page_index + 1} of {len(summ_text)}'
f'</div>',
unsafe_allow_html=True
)
# Next button in col3, right aligned
with col3:
st.button("Next", on_click=turn_page, args=("next",))
# Display HTML box
st.markdown(st.session_state.current_text)