Spaces:
Sleeping
Sleeping
File size: 6,876 Bytes
ea5c59c 8d4620d 6f670c5 7ee71e2 b0eedcf 9ec3b13 ea5c59c 0cbdeb5 8d4620d 28c51ee 8d4620d ea5c59c 8d4620d ea5c59c 1ed0b9b 54f71b8 0ed5911 230d178 0ed5911 230d178 0ed5911 fda22ce 51b9227 0ed5911 0cbdeb5 7ba9533 e514b11 7ba9533 e514b11 7ba9533 0cbdeb5 7ba9533 d11bba3 7ba9533 09f103b c464fcc 77bbf66 6b99783 44e5e0c 6b99783 77bbf66 7af5631 77bbf66 7af5631 09f103b 6b99783 09f103b 659f042 77bbf66 2e05f78 77bbf66 a23cae0 77bbf66 354eef6 a23cae0 354eef6 7af5631 e7d6973 77bbf66 0265ddb 1da1b0f b17cdba 77bbf66 b17cdba 77bbf66 b17cdba 034b0a5 77bbf66 6b99783 7af5631 b0eedcf 45e4e41 b0eedcf 45e4e41 b0eedcf 45e4e41 354eef6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
import streamlit as st
import pandas as pd
import numpy as np
import os
import pickle
import torch
import markdown
import pdfkit
import io
from grobidmonkey import reader
from transformers import pipeline
from transformers import BartTokenizer, BartModel, BartForConditionalGeneration
from transformers import T5Tokenizer, T5ForConditionalGeneration
from document import Document
from BartSE import BARTAutoEncoder
def save_uploaded_file(uploaded_file):
file_path = os.path.join("./uploads", uploaded_file.name)
os.makedirs("./uploads", exist_ok=True) # Create 'uploads' directory if it doesn't exist
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
return file_path # Return the file path as a string
st.title('Paper2Slides')
st.subheader('Upload paper in pdf format')
# col1, col2 = st.columns([3, 1])
# with col1:
# uploaded_file = st.file_uploader("Choose a file")
# with col2:
# option = st.selectbox(
# 'Select parsing method.',
# ('monkey', 'x2d', 'lxml'))
# if uploaded_file is not None:
# st.write(uploaded_file.name)
# bytes_data = uploaded_file.getvalue()
# st.write(len(bytes_data), "bytes")
# saved_file_path = save_uploaded_file(uploaded_file)
# monkeyReader = reader.MonkeyReader(option)
# outline = monkeyReader.readOutline(saved_file_path)
# for pre, fill, node in outline:
# st.write("%s%s" % (pre, node.name))
# # read paper content
# essay = monkeyReader.readEssay(saved_file_path)
# with st.status("Understanding paper..."):
# Barttokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
# summ_model_path = 'com3dian/Bart-large-paper2slides-summarizer'
# summarizor = BartForConditionalGeneration.from_pretrained(summ_model_path)
# exp_model_path = 'com3dian/Bart-large-paper2slides-expander'
# expandor = BartForConditionalGeneration.from_pretrained(exp_model_path)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# BartSE = BARTAutoEncoder(summarizor, summarizor, device)
# del summarizor, expandor
# document = Document(essay, Barttokenizer)
# del Barttokenizer
# length = document.merge(25, 30, BartSE, device)
# with st.status("Generating slides..."):
# summarizor = pipeline("summarization", model=summ_model_path, device = device)
# summ_text = summarizor(document.segmentation['text'], max_length=100, min_length=10, do_sample=False)
# summ_text = [text['summary_text'] for text in summ_text]
# for summ in summ_text:
# st.write(summ)
with open('slides_text.pkl', 'rb') as file:
summ_text = pickle.load(file)
# Function to render HTML content
def format(text_list):
format_list = []
for text in text_list:
# Split text by periods
sentences = text.split('.')
# Create HTML list items
list_items = "".join([f"- {sentence.strip()}.\n" for sentence in sentences if sentence.strip()])
format_list.append(list_items)
return format_list
# Initialize session state for page index and text
if 'page_index' not in st.session_state:
st.session_state.page_index = 0
if 'summ_text' not in st.session_state:
st.session_state.summ_text = format(summ_text)
if 'current_text' not in st.session_state:
st.session_state.current_text = st.session_state.summ_text[st.session_state.page_index]
# Function to handle page turn
def turn_page(direction):
if direction == "next" and st.session_state.page_index < len(summ_text) - 1:
st.session_state.page_index += 1
elif direction == "prev" and st.session_state.page_index > 0:
st.session_state.page_index -= 1
st.session_state.current_text = st.session_state.summ_text[st.session_state.page_index]
# Function to update the current text based on text_area changes
def update_text():
st.session_state.summ_text[st.session_state.page_index] = st.session_state.text_area_value
st.session_state.current_text = st.session_state.text_area_value
# Display editable text box
text = st.text_area("Edit Text", st.session_state.current_text, height=200, key="text_area_value", on_change=update_text)
# Display page turner controls
col1, col2, col3 = st.columns([2.25, 12, 1.7])
# Previous button in col1
with col1:
st.button("Previous", on_click=turn_page, args=("prev",))
# Center aligned text in col2
with col2:
st.markdown(
f'<div style="display: flex; justify-content: center; align-items: center; height: 100%;">'
f'Page {st.session_state.page_index + 1} of {len(summ_text)}'
f'</div>',
unsafe_allow_html=True
)
# Next button in col3, right aligned
with col3:
st.button("Next", on_click=turn_page, args=("next",))
# Display HTML box
st.markdown(st.session_state.current_text)
def render_markdown_to_html(markdown_str):
return markdown.markdown(markdown_str)
def create_pdf_from_markdown_strings(markdown_strings):
html_pages = [render_markdown_to_html(md) for md in markdown_strings]
# Combine HTML content with page breaks and add a style section for font size and margins
combined_html = '''
<html>
<head>
<style>
.page {
font-size: 16pt; /* Adjust the font size as needed */
margin: 20mm; /* Set margins for top, right, bottom, and left */
}
</style>
</head>
<body>
'''
for i, page in enumerate(html_pages):
combined_html += f'<div class="page">{page}</div>'
if i < len(html_pages) - 1: # Only add page break after if it's not the last page
combined_html += '<div style="page-break-after: always;"></div>'
combined_html += '</body></html>'
# PDF options: landscape orientation and page size
options = {
'page-size': 'A4',
'orientation': 'Landscape'
}
return html_pages
def html_to_pdf(html_content):
# Create a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
pdfkit.from_string(html_content, tmp_file.name)
return tmp_file.name
# Convert combined HTML to PDF directly into a file
pdfkit.from_string(combined_html, output_file, options=options)
html_content = create_pdf_from_markdown_strings(st.session_state.summ_text)
if st.button("Download PDF"):
if html_content:
pdf_path = html_to_pdf(html_content)
with open(pdf_path, "rb") as pdf_file:
st.download_button(
label="Download PDF",
data=pdf_file,
file_name="converted.pdf",
mime="application/pdf"
)
# Remove the temporary file
os.remove(pdf_path)
|