File size: 3,770 Bytes
ddf7acc
6ec889f
b896977
 
 
 
 
 
6ec889f
300310b
6ec889f
 
b896977
 
 
6ec889f
13a649f
 
 
 
 
 
 
b896977
6ec889f
 
 
 
b896977
 
 
 
 
 
6ec889f
b896977
 
6ec889f
b896977
 
 
6ec889f
 
 
 
 
b896977
 
300310b
b896977
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ec889f
 
 
 
 
 
 
 
 
 
 
 
 
 
300310b
b896977
 
 
 
 
 
 
13a649f
b896977
 
 
 
 
 
 
 
 
 
6ec889f
b896977
ddf7acc
b896977
e638a74
b896977
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import gradio as gr
import easyocr
from PIL import Image
import pdf2image
import tempfile
import os
import cv2
import numpy as np
import torch

# Initialize the OCR reader (this will download models on first run)
reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())

def preprocess_image(img):
    """Preprocess image to improve OCR accuracy for handwritten text"""
    # Convert PIL Image to numpy array
    img_array = np.array(img)
    
    # Check if the image is already grayscale
    if len(img_array.shape) == 2:
        gray = img_array
    else:
        gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
    
    # Apply adaptive thresholding for better handling of different lighting conditions
    binary = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
    )
    
    # Noise removal
    kernel = np.ones((1, 1), np.uint8)
    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
    
    return binary

def extract_text_from_image(img):
    """Extract text from an image using EasyOCR"""
    # Preprocess for better handwriting recognition
    processed_img = preprocess_image(img)
    
    # Use EasyOCR to extract text
    results = reader.readtext(processed_img)
    
    # Combine all detected text
    text = '\n'.join([result[1] for result in results])
    
    return text.strip()

def extract_text_from_pdf(pdf_path):
    """Extract text from all pages of a PDF file"""
    # Convert PDF to images
    with tempfile.TemporaryDirectory() as path:
        images = pdf2image.convert_from_path(pdf_path, output_folder=path)
        
        # Extract text from each page
        full_text = []
        for img in images:
            text = extract_text_from_image(img)
            full_text.append(text)
        
        return "\n\n--- Page Break ---\n\n".join(full_text)

def process_file(file):
    """Process the uploaded file (PDF or image)"""
    if file is None:
        return "No file uploaded. Please upload an image or PDF file."
    
    try:
        file_extension = os.path.splitext(file.name)[1].lower()
        
        if file_extension == ".pdf":
            # Process PDF
            return extract_text_from_pdf(file.name)
        elif file_extension in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]:
            # Process Image
            img = Image.open(file.name)
            return extract_text_from_image(img)
        else:
            return "Unsupported file format. Please upload a PDF or image file (JPG, PNG, BMP, TIFF)."
    except Exception as e:
        return f"Error processing file: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="Handwritten Text OCR Extractor") as app:
    gr.Markdown("# Handwritten Text OCR Extraction Tool")
    gr.Markdown("Upload an image or PDF containing handwritten text to extract the content.")
    
    with gr.Row():
        with gr.Column():
            file_input = gr.File(label="Upload Image or PDF")
            extract_button = gr.Button("Extract Text")
        
        with gr.Column():
            text_output = gr.Textbox(label="Extracted Text", lines=10, placeholder="Extracted text will appear here...")
    
    extract_button.click(fn=process_file, inputs=[file_input], outputs=[text_output])
    
    gr.Markdown("### Notes:")
    gr.Markdown("- For best results, ensure the handwriting is clear and the image is well-lit")
    gr.Markdown("- The system works best with dark text on light background")
    gr.Markdown("- The first run may take longer as it downloads the OCR models")
    gr.Markdown("- Multiple page PDFs will show page breaks in the output")

# Launch the app
if __name__ == "__main__":
    app.launch()