Spaces:
Sleeping
Sleeping
File size: 4,495 Bytes
ea5c59c 8d4620d 6f670c5 7ee71e2 9ec3b13 ea5c59c 0cbdeb5 8d4620d 28c51ee 8d4620d ea5c59c 8d4620d ea5c59c 1ed0b9b 54f71b8 0ed5911 230d178 0ed5911 230d178 0ed5911 fda22ce 51b9227 0ed5911 0cbdeb5 7ba9533 e514b11 7ba9533 e514b11 7ba9533 0cbdeb5 7ba9533 d11bba3 7ba9533 6f670c5 77bbf66 6f670c5 77bbf66 6d4fa22 bb24853 77bbf66 7af5631 77bbf66 7af5631 dd69d2f 7af5631 77bbf66 2e05f78 77bbf66 312b574 77bbf66 7af5631 77bbf66 380962e 77bbf66 7af5631 77bbf66 312b574 7af5631 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import streamlit as st
import pandas as pd
import numpy as np
import os
import pickle
import torch
from grobidmonkey import reader
from transformers import pipeline
from transformers import BartTokenizer, BartModel, BartForConditionalGeneration
from transformers import T5Tokenizer, T5ForConditionalGeneration
from document import Document
from BartSE import BARTAutoEncoder
def save_uploaded_file(uploaded_file):
file_path = os.path.join("./uploads", uploaded_file.name)
os.makedirs("./uploads", exist_ok=True) # Create 'uploads' directory if it doesn't exist
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
return file_path # Return the file path as a string
st.title('Paper2Slides')
st.subheader('Upload paper in pdf format')
# col1, col2 = st.columns([3, 1])
# with col1:
# uploaded_file = st.file_uploader("Choose a file")
# with col2:
# option = st.selectbox(
# 'Select parsing method.',
# ('monkey', 'x2d', 'lxml'))
# if uploaded_file is not None:
# st.write(uploaded_file.name)
# bytes_data = uploaded_file.getvalue()
# st.write(len(bytes_data), "bytes")
# saved_file_path = save_uploaded_file(uploaded_file)
# monkeyReader = reader.MonkeyReader(option)
# outline = monkeyReader.readOutline(saved_file_path)
# for pre, fill, node in outline:
# st.write("%s%s" % (pre, node.name))
# # read paper content
# essay = monkeyReader.readEssay(saved_file_path)
# with st.status("Understanding paper..."):
# Barttokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
# summ_model_path = 'com3dian/Bart-large-paper2slides-summarizer'
# summarizor = BartForConditionalGeneration.from_pretrained(summ_model_path)
# exp_model_path = 'com3dian/Bart-large-paper2slides-expander'
# expandor = BartForConditionalGeneration.from_pretrained(exp_model_path)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# BartSE = BARTAutoEncoder(summarizor, summarizor, device)
# del summarizor, expandor
# document = Document(essay, Barttokenizer)
# del Barttokenizer
# length = document.merge(25, 30, BartSE, device)
# with st.status("Generating slides..."):
# summarizor = pipeline("summarization", model=summ_model_path, device = device)
# summ_text = summarizor(document.segmentation['text'], max_length=100, min_length=10, do_sample=False)
# summ_text = [text['summary_text'] for text in summ_text]
# for summ in summ_text:
# st.write(summ)
with open('slides_text.pkl', 'rb') as file:
summ_text = pickle.load(file)
# Function to render HTML content
def render_html(text):
# Split text by periods
sentences = text.split('.')
# Create HTML list items
list_items = "".join([f"<li>{sentence.strip()}.</li>" for sentence in sentences if sentence.strip()])
# Wrap list items in an unordered list
return f"<ul>{list_items}</ul>"
# Initialize session state for page index and text
if 'page_index' not in st.session_state:
st.session_state.page_index = 0
if 'current_text' not in st.session_state:
st.session_state.current_text = summ_text[st.session_state.page_index]
# Function to handle page turn
def turn_page(direction):
if direction == "next" and st.session_state.page_index < len(summ_text) - 1:
st.session_state.page_index += 1
elif direction == "prev" and st.session_state.page_index > 0:
st.session_state.page_index -= 1
st.session_state.current_text = summ_text[st.session_state.page_index]
# Function to update the current text based on text_area changes
def update_text():
summ_text[st.session_state.page_index] = st.session_state.text_area_value
st.session_state.current_text = st.session_state.text_area_value
# Display page turner controls
col1, col2, col3 = st.columns([1, 2, 1])
with col1:
st.button("Previous", on_click=turn_page, args=("prev",))
with col3:
st.button("Next", on_click=turn_page, args=("next",))
with col2:
st.write(f"Page {st.session_state.page_index + 1} of {len(summ_text)}")
# Display editable text box
text = st.text_area("Edit Text", st.session_state.current_text, height=200, key="text_area_value", on_change=update_text)
# Display HTML box
st.markdown(render_html(st.session_state.current_text), unsafe_allow_html=True)
|