Spaces:
Sleeping
Sleeping
File size: 4,724 Bytes
68f98f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import cv2
import fitz
import numpy as np
from io import BytesIO
import matplotlib.pyplot as plt
from skimage.color import rgb2gray
from skimage.measure import label, regionprops
from fastapi import APIRouter, UploadFile, File, HTTPException
from fastapi.responses import StreamingResponse
router = APIRouter()
def convert_and_process_pdf(pdf_content: bytes, area_threshold: int = 100) -> BytesIO:
"""
Convert the first page of a PDF to a PNG and apply image enhancement.
Args:
pdf_content: The PDF file content as bytes.
area_threshold: Threshold for area filtering (default: 100).
Returns:
BytesIO: Enhanced PNG image content.
"""
# Open the PDF from bytes
doc = fitz.open(stream=pdf_content, filetype="pdf")
# Load the first page
page = doc.load_page(0)
# Render the page as an image
pix = page.get_pixmap(dpi=300)
png_image = pix.tobytes("png")
# Load the image with OpenCV
np_array = np.frombuffer(png_image, dtype=np.uint8)
img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
# Convert to grayscale
img_gray = rgb2gray(img)
# Convert grayscale to binary using Otsu's threshold
_, img_binary = cv2.threshold((img_gray * 255).astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Invert the binary image
img_binary = ~img_binary
# Label connected components
label_img = label(img_binary)
regions = regionprops(label_img)
# Filter by area threshold
valid_labels = [region.label for region in regions if region.area >= area_threshold]
img_filtered = np.isin(label_img, valid_labels)
# Save enhanced image to memory
output_buffer = BytesIO()
plt.imsave(output_buffer, ~img_filtered, cmap="gray", format="png")
output_buffer.seek(0)
return output_buffer
@router.post("/process-pdf/")
async def process_pdf(
file: UploadFile = File(...),
area_threshold: int = 100
):
"""
Process a PDF file and return an enhanced PNG image.
Args:
file: The PDF file to process
area_threshold: Threshold for area filtering (default: 100)
Returns:
StreamingResponse: Enhanced PNG image
"""
try:
# Read PDF file content
pdf_content = await file.read()
# Process the PDF and get the enhanced image
enhanced_image = convert_and_process_pdf(pdf_content, area_threshold)
# Return the processed image as a StreamingResponse
return StreamingResponse(
enhanced_image,
media_type="image/png",
headers={"Content-Disposition": f"attachment; filename={file.filename.rsplit('.', 1)[0]}_enhanced.png"}
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}")
@router.post("/process-image/")
async def process_image(
file: UploadFile = File(...),
area_threshold: int = 100
):
"""
Process an image file and return an enhanced image.
Args:
file: The image file to process
area_threshold: Threshold for area filtering (default: 100)
Returns:
StreamingResponse: Enhanced image
"""
try:
# Read image file content
image_content = await file.read()
# Convert to numpy array
np_array = np.frombuffer(image_content, dtype=np.uint8)
img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
# Convert to grayscale
img_gray = rgb2gray(img)
# Convert grayscale to binary using Otsu's threshold
_, img_binary = cv2.threshold((img_gray * 255).astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Invert the binary image
img_binary = ~img_binary
# Label connected components
label_img = label(img_binary)
regions = regionprops(label_img)
# Filter by area threshold
valid_labels = [region.label for region in regions if region.area >= area_threshold]
img_filtered = np.isin(label_img, valid_labels)
# Save enhanced image to memory
output_buffer = BytesIO()
plt.imsave(output_buffer, ~img_filtered, cmap="gray", format="png")
output_buffer.seek(0)
# Return the processed image as a StreamingResponse
return StreamingResponse(
output_buffer,
media_type="image/png",
headers={"Content-Disposition": f"attachment; filename={file.filename.rsplit('.', 1)[0]}_enhanced.png"}
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")
|