Spaces:
Running
Running
import streamlit as st | |
from PyPDF2 import PdfReader | |
from docx import Document | |
from io import BytesIO | |
def pdf_to_word(pdf_file): | |
"""Convert a PDF file to a Word file.""" | |
reader = PdfReader(pdf_file) | |
document = Document() | |
for page in reader.pages: | |
if page.extract_text(): # Ensure text is extracted | |
text = page.extract_text() | |
document.add_paragraph(text) | |
else: | |
document.add_paragraph("[This page contains non-extractable content or images]") | |
word_file = BytesIO() | |
document.save(word_file) | |
word_file.seek(0) | |
return word_file | |
# Streamlit app configuration | |
st.set_page_config(page_title="PDF to Word Converter", page_icon="🖋", layout="centered") | |
# App header | |
st.title("PDF to Word Converter") | |
st.write("Upload a PDF file, and we will convert it into a Word document for you.") | |
# File uploader | |
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
if uploaded_file is not None: | |
with st.spinner("Converting PDF to Word..."): | |
try: | |
word_file = pdf_to_word(uploaded_file) | |
st.success("Conversion successful!") | |
st.download_button( | |
label="Download Word file", | |
data=word_file, | |
file_name="converted.docx", | |
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" | |
) | |
except Exception as e: | |
st.error(f"An error occurred: {str(e)}") | |