AzizWazir commited on
Commit
972cb11
·
verified ·
1 Parent(s): 526c1dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -13
app.py CHANGED
@@ -2,11 +2,9 @@ import streamlit as st
2
  from PyPDF2 import PdfReader
3
  from docx import Document
4
  from io import BytesIO
5
- from pdf2image import convert_from_bytes
6
- import pytesseract
7
 
8
  def pdf_to_word(pdf_file, password=None):
9
- """Convert a PDF file to a Word file with optional decryption and OCR support."""
10
  reader = PdfReader(pdf_file)
11
 
12
  # Decrypt the PDF if it's encrypted
@@ -20,19 +18,12 @@ def pdf_to_word(pdf_file, password=None):
20
  raise ValueError("The PDF is encrypted. Please provide a password.")
21
 
22
  document = Document()
23
-
24
- # Extract text from each page
25
  for page in reader.pages:
26
- if page.extract_text(): # Use PyPDF2 for text extraction
27
  text = page.extract_text()
28
  document.add_paragraph(text)
29
  else:
30
- # Convert the page to an image and use OCR
31
- pdf_bytes = pdf_file.read()
32
- images = convert_from_bytes(pdf_bytes)
33
- for image in images:
34
- text = pytesseract.image_to_string(image)
35
- document.add_paragraph(text)
36
 
37
  word_file = BytesIO()
38
  document.save(word_file)
@@ -62,4 +53,6 @@ if uploaded_file is not None:
62
  mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
63
  )
64
  except ValueError as ve:
65
- st.error(str(ve
 
 
 
2
  from PyPDF2 import PdfReader
3
  from docx import Document
4
  from io import BytesIO
 
 
5
 
6
  def pdf_to_word(pdf_file, password=None):
7
+ """Convert a PDF file to a Word file with optional decryption."""
8
  reader = PdfReader(pdf_file)
9
 
10
  # Decrypt the PDF if it's encrypted
 
18
  raise ValueError("The PDF is encrypted. Please provide a password.")
19
 
20
  document = Document()
 
 
21
  for page in reader.pages:
22
+ if page.extract_text(): # Ensure text is extracted
23
  text = page.extract_text()
24
  document.add_paragraph(text)
25
  else:
26
+ document.add_paragraph("[This page contains non-extractable content or images]")
 
 
 
 
 
27
 
28
  word_file = BytesIO()
29
  document.save(word_file)
 
53
  mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
54
  )
55
  except ValueError as ve:
56
+ st.error(str(ve))
57
+ except Exception as e:
58
+ st.error(f"An error occurred: {str(e)}")