Spaces:

ml6team
/

doc-to-slides

Sleeping

File size: 3,839 Bytes

ea5c59c
 
 
8d4620d
6f670c5
7ee71e2
9ec3b13
ea5c59c
0cbdeb5
 
 
 
 
 
 
 
8d4620d
28c51ee
 
8d4620d
 
 
ea5c59c
8d4620d
ea5c59c
1ed0b9b
54f71b8
0ed5911
 
 
 
 
 
 
 
 
230d178
0ed5911
 
 
230d178
0ed5911
 
 
 
 
fda22ce
51b9227
0ed5911
 
0cbdeb5
7ba9533
e514b11
7ba9533
 
 
 
 
 
 
 
e514b11
7ba9533
 
 
0cbdeb5
7ba9533
 
 
 
d11bba3
7ba9533
 
 
6f670c5
77bbf66
 
6f670c5
77bbf66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aeee731
 
7ba9533

import streamlit as st
import pandas as pd
import numpy as np
import os
import pickle
import torch
from grobidmonkey import reader

from transformers import pipeline
from transformers import BartTokenizer, BartModel, BartForConditionalGeneration
from transformers import T5Tokenizer, T5ForConditionalGeneration

from document import Document
from BartSE import BARTAutoEncoder


def save_uploaded_file(uploaded_file):
    file_path = os.path.join("./uploads", uploaded_file.name)
    os.makedirs("./uploads", exist_ok=True)  # Create 'uploads' directory if it doesn't exist
    with open(file_path, "wb") as f:
        f.write(uploaded_file.getbuffer())
    return file_path  # Return the file path as a string

st.title('Paper2Slides')

st.subheader('Upload paper in pdf format')

# col1, col2 = st.columns([3, 1])
# with col1:
#     uploaded_file = st.file_uploader("Choose a file")
# with col2:
#     option = st.selectbox(
#         'Select parsing method.',
#         ('monkey', 'x2d', 'lxml'))

# if uploaded_file is not None:
    
#     st.write(uploaded_file.name)
#     bytes_data = uploaded_file.getvalue()
#     st.write(len(bytes_data), "bytes")
    
#     saved_file_path = save_uploaded_file(uploaded_file)
#     monkeyReader = reader.MonkeyReader(option)
#     outline = monkeyReader.readOutline(saved_file_path)
#     for pre, fill, node in outline:
#         st.write("%s%s" % (pre, node.name))
    
    
#     # read paper content
#     essay = monkeyReader.readEssay(saved_file_path)
        
    # with st.status("Understanding paper..."):
        
    #     Barttokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
    #     summ_model_path = 'com3dian/Bart-large-paper2slides-summarizer'
    #     summarizor = BartForConditionalGeneration.from_pretrained(summ_model_path)
    #     exp_model_path = 'com3dian/Bart-large-paper2slides-expander'
    #     expandor = BartForConditionalGeneration.from_pretrained(exp_model_path)
    #     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #     BartSE = BARTAutoEncoder(summarizor, summarizor, device)
    #     del summarizor, expandor
            
    #     document = Document(essay, Barttokenizer)
    #     del Barttokenizer
    #     length = document.merge(25, 30, BartSE, device)

    # with st.status("Generating slides..."):
    #     summarizor = pipeline("summarization", model=summ_model_path, device = device)
    #     summ_text = summarizor(document.segmentation['text'], max_length=100, min_length=10, do_sample=False)
    #     summ_text = [text['summary_text'] for text in summ_text]
        
    #     for summ in summ_text:
    #         st.write(summ)

    
with open('slides_text.pkl', 'rb') as file:
    summ_text = pickle.load(file)

# Function to render HTML content
def render_html(text):
    return f"<div>{text}</div>"

# Initialize session state for page index
if 'page_index' not in st.session_state:
    st.session_state.page_index = 0

# Function to handle page turn
def turn_page(direction):
    if direction == "next" and st.session_state.page_index < len(text_list) - 1:
        st.session_state.page_index += 1
    elif direction == "prev" and st.session_state.page_index > 0:
        st.session_state.page_index -= 1

# Display page turner controls
col1, col2, col3 = st.columns([1, 2, 1])
with col1:
    st.button("Previous", on_click=turn_page, args=("prev",))
with col3:
    st.button("Next", on_click=turn_page, args=("next",))
with col2:
    st.write(f"Page {st.session_state.page_index + 1} of {len(text_list)}")

# Display editable text box
text = st.text_area("Edit Text", summ_text[st.session_state.page_index], height=200)

# Display HTML box
st.markdown(render_html(text), unsafe_allow_html=True)

# Update list with edited text
text_list[st.session_state.page_index] = text