File size: 4,724 Bytes
68f98f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import cv2
import fitz
import numpy as np
from io import BytesIO
import matplotlib.pyplot as plt
from skimage.color import rgb2gray
from skimage.measure import label, regionprops
from fastapi import APIRouter, UploadFile, File, HTTPException
from fastapi.responses import StreamingResponse

router = APIRouter()

def convert_and_process_pdf(pdf_content: bytes, area_threshold: int = 100) -> BytesIO:
    """
    Convert the first page of a PDF to a PNG and apply image enhancement.
    Args:
        pdf_content: The PDF file content as bytes.
        area_threshold: Threshold for area filtering (default: 100).
    Returns:
        BytesIO: Enhanced PNG image content.
    """
    # Open the PDF from bytes
    doc = fitz.open(stream=pdf_content, filetype="pdf")
    
    # Load the first page
    page = doc.load_page(0)
    
    # Render the page as an image
    pix = page.get_pixmap(dpi=300)
    png_image = pix.tobytes("png")
    
    # Load the image with OpenCV
    np_array = np.frombuffer(png_image, dtype=np.uint8)
    img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
    
    # Convert to grayscale
    img_gray = rgb2gray(img)
    
    # Convert grayscale to binary using Otsu's threshold
    _, img_binary = cv2.threshold((img_gray * 255).astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Invert the binary image
    img_binary = ~img_binary
    
    # Label connected components
    label_img = label(img_binary)
    regions = regionprops(label_img)
    
    # Filter by area threshold
    valid_labels = [region.label for region in regions if region.area >= area_threshold]
    img_filtered = np.isin(label_img, valid_labels)
    
    # Save enhanced image to memory
    output_buffer = BytesIO()
    plt.imsave(output_buffer, ~img_filtered, cmap="gray", format="png")
    output_buffer.seek(0)
    return output_buffer

@router.post("/process-pdf/")
async def process_pdf(
    file: UploadFile = File(...),
    area_threshold: int = 100
):
    """
    Process a PDF file and return an enhanced PNG image.
    Args:
        file: The PDF file to process
        area_threshold: Threshold for area filtering (default: 100)
    Returns:
        StreamingResponse: Enhanced PNG image
    """
    try:
        # Read PDF file content
        pdf_content = await file.read()
        
        # Process the PDF and get the enhanced image
        enhanced_image = convert_and_process_pdf(pdf_content, area_threshold)
        
        # Return the processed image as a StreamingResponse
        return StreamingResponse(
            enhanced_image,
            media_type="image/png",
            headers={"Content-Disposition": f"attachment; filename={file.filename.rsplit('.', 1)[0]}_enhanced.png"}
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}")

@router.post("/process-image/")
async def process_image(
    file: UploadFile = File(...),
    area_threshold: int = 100
):
    """
    Process an image file and return an enhanced image.
    Args:
        file: The image file to process
        area_threshold: Threshold for area filtering (default: 100)
    Returns:
        StreamingResponse: Enhanced image
    """
    try:
        # Read image file content
        image_content = await file.read()
        
        # Convert to numpy array
        np_array = np.frombuffer(image_content, dtype=np.uint8)
        img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
        
        # Convert to grayscale
        img_gray = rgb2gray(img)
        
        # Convert grayscale to binary using Otsu's threshold
        _, img_binary = cv2.threshold((img_gray * 255).astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        
        # Invert the binary image
        img_binary = ~img_binary
        
        # Label connected components
        label_img = label(img_binary)
        regions = regionprops(label_img)
        
        # Filter by area threshold
        valid_labels = [region.label for region in regions if region.area >= area_threshold]
        img_filtered = np.isin(label_img, valid_labels)
        
        # Save enhanced image to memory
        output_buffer = BytesIO()
        plt.imsave(output_buffer, ~img_filtered, cmap="gray", format="png")
        output_buffer.seek(0)
        
        # Return the processed image as a StreamingResponse
        return StreamingResponse(
            output_buffer,
            media_type="image/png",
            headers={"Content-Disposition": f"attachment; filename={file.filename.rsplit('.', 1)[0]}_enhanced.png"}
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")