import streamlit as st from PyPDF2 import PdfReader from docx import Document from io import BytesIO def pdf_to_word(pdf_file, password=None): """Convert a PDF file to a Word file with optional decryption.""" reader = PdfReader(pdf_file) # Decrypt the PDF if it's encrypted if reader.is_encrypted: if password: try: reader.decrypt(password) except Exception as e: raise ValueError("Failed to decrypt the PDF. Check the password.") from e else: raise ValueError("The PDF is encrypted. Please provide a password.") document = Document() for page in reader.pages: if page.extract_text(): # Ensure text is extracted text = page.extract_text() document.add_paragraph(text) else: document.add_paragraph("[This page contains non-extractable content or images]") word_file = BytesIO() document.save(word_file) word_file.seek(0) return word_file # Streamlit app configuration st.set_page_config(page_title="PDF to Word Converter", page_icon="🖋", layout="centered") # App header st.title("PDF to Word Converter") st.write("Upload a PDF file, and we will convert it into a Word document for you.") # File uploader uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") password = st.text_input("Enter password (if the PDF is encrypted):", type="password") if uploaded_file is not None: with st.spinner("Converting PDF to Word..."): try: word_file = pdf_to_word(uploaded_file, password) st.success("Conversion successful!") st.download_button( label="Download Word file", data=word_file, file_name="converted.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" ) except ValueError as ve: st.error(str(ve)) ex