Spaces:

DeepDiveDev
/

TransformoDocs-Demo

Sleeping

File size: 3,770 Bytes

import gradio as gr
import easyocr
from PIL import Image
import pdf2image
import tempfile
import os
import cv2
import numpy as np
import torch

# Initialize the OCR reader (this will download models on first run)
reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())

def preprocess_image(img):
    """Preprocess image to improve OCR accuracy for handwritten text"""
    # Convert PIL Image to numpy array
    img_array = np.array(img)
    
    # Check if the image is already grayscale
    if len(img_array.shape) == 2:
        gray = img_array
    else:
        gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
    
    # Apply adaptive thresholding for better handling of different lighting conditions
    binary = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
    )
    
    # Noise removal
    kernel = np.ones((1, 1), np.uint8)
    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
    
    return binary

def extract_text_from_image(img):
    """Extract text from an image using EasyOCR"""
    # Preprocess for better handwriting recognition
    processed_img = preprocess_image(img)
    
    # Use EasyOCR to extract text
    results = reader.readtext(processed_img)
    
    # Combine all detected text
    text = '\n'.join([result[1] for result in results])
    
    return text.strip()

def extract_text_from_pdf(pdf_path):
    """Extract text from all pages of a PDF file"""
    # Convert PDF to images
    with tempfile.TemporaryDirectory() as path:
        images = pdf2image.convert_from_path(pdf_path, output_folder=path)
        
        # Extract text from each page
        full_text = []
        for img in images:
            text = extract_text_from_image(img)
            full_text.append(text)
        
        return "\n\n--- Page Break ---\n\n".join(full_text)

def process_file(file):
    """Process the uploaded file (PDF or image)"""
    if file is None:
        return "No file uploaded. Please upload an image or PDF file."
    
    try:
        file_extension = os.path.splitext(file.name)[1].lower()
        
        if file_extension == ".pdf":
            # Process PDF
            return extract_text_from_pdf(file.name)
        elif file_extension in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]:
            # Process Image
            img = Image.open(file.name)
            return extract_text_from_image(img)
        else:
            return "Unsupported file format. Please upload a PDF or image file (JPG, PNG, BMP, TIFF)."
    except Exception as e:
        return f"Error processing file: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="Handwritten Text OCR Extractor") as app:
    gr.Markdown("# Handwritten Text OCR Extraction Tool")
    gr.Markdown("Upload an image or PDF containing handwritten text to extract the content.")
    
    with gr.Row():
        with gr.Column():
            file_input = gr.File(label="Upload Image or PDF")
            extract_button = gr.Button("Extract Text")
        
        with gr.Column():
            text_output = gr.Textbox(label="Extracted Text", lines=10, placeholder="Extracted text will appear here...")
    
    extract_button.click(fn=process_file, inputs=[file_input], outputs=[text_output])
    
    gr.Markdown("### Notes:")
    gr.Markdown("- For best results, ensure the handwriting is clear and the image is well-lit")
    gr.Markdown("- The system works best with dark text on light background")
    gr.Markdown("- The first run may take longer as it downloads the OCR models")
    gr.Markdown("- Multiple page PDFs will show page breaks in the output")

# Launch the app
if __name__ == "__main__":
    app.launch()