File size: 2,049 Bytes
641ff3e 37d982e 641ff3e 37d982e 641ff3e 37d982e 641ff3e 37d982e 641ff3e 37d982e 641ff3e 37d982e 641ff3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
from pdf2image import convert_from_path
from PIL import Image
import os
def is_pdf_or_image(filename):
"""
Check if a file name is a PDF or an image file.
Args:
filename (str): The name of the file.
Returns:
bool: True if the file name ends with ".pdf", ".jpg", or ".png", False otherwise.
"""
if filename.lower().endswith(".pdf") or filename.lower().endswith(".jpg") or filename.lower().endswith(".png"):
output = True
else:
output = False
return output
def is_pdf(filename):
"""
Check if a file name is a PDF.
Args:
filename (str): The name of the file.
Returns:
bool: True if the file name ends with ".pdf", False otherwise.
"""
return filename.lower().endswith(".pdf")
# %%
## Convert pdf to image if necessary
def convert_pdf_to_images(pdf_path):
# Convert PDF to a list of images
images = convert_from_path(pdf_path)
# Save each image as a separate file - deprecated
#image_paths = []
# for i, image in enumerate(images):
# page_path = f"processing/page_{i+1}.png"
# image.save(page_path, "PNG")
# image_paths.append(page_path)
print("PDF has been converted to images.")
return images
# %%
def process_file(file_path):
# Get the file extension
file_extension = os.path.splitext(file_path)[1].lower()
# Check if the file is an image type
if file_extension in ['.jpg', '.jpeg', '.png']:
print(f"{file_path} is an image file.")
# Perform image processing here
out_path = [Image.open(file_path)]
# Check if the file is a PDF
elif file_extension == '.pdf':
print(f"{file_path} is a PDF file. Converting to image set")
# Run your function for processing PDF files here
out_path = convert_pdf_to_images(file_path)
else:
print(f"{file_path} is not an image or PDF file.")
out_path = ['']
return out_path
|