|
from pdf2image import convert_from_path
|
|
import os
|
|
|
|
def is_pdf(filename):
|
|
"""
|
|
Check if a file name is a PDF.
|
|
|
|
Args:
|
|
filename (str): The name of the file.
|
|
|
|
Returns:
|
|
bool: True if the file name ends with ".pdf", False otherwise.
|
|
"""
|
|
return filename.lower().endswith(".pdf")
|
|
|
|
|
|
|
|
|
|
def convert_pdf_to_images(pdf_path):
|
|
|
|
image_paths = []
|
|
|
|
|
|
images = convert_from_path(pdf_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("PDF has been converted to images.")
|
|
|
|
return images
|
|
|
|
|
|
def process_file(file_path):
|
|
|
|
file_extension = os.path.splitext(file_path)[1].lower()
|
|
|
|
|
|
if file_extension in ['.jpg', '.jpeg', '.png', '.gif']:
|
|
print(f"{file_path} is an image file.")
|
|
|
|
out_path = [file_path]
|
|
|
|
|
|
elif file_extension == '.pdf':
|
|
print(f"{file_path} is a PDF file. Converting to image set")
|
|
|
|
out_path = convert_pdf_to_images(file_path)
|
|
|
|
else:
|
|
print(f"{file_path} is not an image or PDF file.")
|
|
out_path = ['']
|
|
|
|
return out_path
|
|
|
|
|