Spaces:

Prasada
/

OCR_Demo

Sleeping

File size: 4,147 Bytes

import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import json
from paddleocr import PaddleOCR
import gradio as gr
import os

# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')

# Function to draw bounding boxes on the image
def draw_boxes_on_image(image, data):
    # Convert the image to RGB (OpenCV uses BGR by default)
    image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)

    # Load the image into PIL for easier drawing
    pil_image = Image.fromarray(image_rgb)
    draw = ImageDraw.Draw(pil_image)

    # Define a font (using DejaVuSans since it's available by default)
    try:
        font = ImageFont.truetype("DejaVuSans.ttf", 20)
    except IOError:
        font = ImageFont.load_default()

    for item in data:
        bounding_box, (text, confidence) = item

        # Ensure bounding_box is a list of lists
        if not isinstance(bounding_box[0], list):
            bounding_box = [bounding_box]

        box = np.array(bounding_box).astype(int)

        # Draw the bounding box
        draw.line([tuple(box[0]), tuple(box[1])], fill="green", width=2)
        draw.line([tuple(box[1]), tuple(box[2])], fill="green", width=2)
        draw.line([tuple(box[2]), tuple(box[3])], fill="green", width=2)
        draw.line([tuple(box[3]), tuple(box[0])], fill="green", width=2)

        # Draw the text above the bounding box
        text_position = (box[0][0], box[0][1] - 20)
        draw.text(text_position, f"{text} ({confidence:.2f})", fill="red", font=font)

    return pil_image

# Function to convert OCR results to JSON
def convert_to_json(results, output_file):
    """
    Converts the given results into a JSON file.

    Args:
        results: The list of results containing bounding box coordinates, text, and confidence.
        output_file: The name of the output JSON file.
    """
    json_data = []
    for result in results:
        bounding_box = result[0]
        text = result[1][0]
        confidence = result[1][1]

        json_data.append({
            "bounding_box": [list(map(float, coord)) for coord in bounding_box],
            "text": text,
            "confidence": confidence
        })

    with open(output_file, "w") as f:
        json.dump(json_data, f, indent=4)

# Function to identify 'field', 'value' pairs
def identify_field_value_pairs(ocr_results, fields):
    field_value_pairs = {}
    for line in ocr_results:
        for word_info in line:
            text, _ = word_info[1]
            for field in fields:
                if field.lower() in text.lower():
                    # Assuming the value comes immediately after the field
                    value_index = line.index(word_info) + 1
                    if value_index < len(line):
                        field_value_pairs[field] = line[value_index][1][0]
                    break
    return field_value_pairs

# Function to process the image and generate outputs
def process_image(image):
    ocr_results = ocr.ocr(np.array(image), cls=True)
    processed_image = draw_boxes_on_image(image, ocr_results[0])
    
    # Save OCR results to JSON
    json_path = "ocr_results.json"
    convert_to_json(ocr_results[0], json_path)
    
    # Identify field-value pairs
    fields = ["Scheme Name", "Folio Number", "Number of Units", "PAN", "Signature", "Tax Status", 
              "Mobile Number", "Email", "Address", "Bank Account Details"]
    field_value_pairs = identify_field_value_pairs(ocr_results[0], fields)
    field_value_json_path = "field_value_pairs.json"
    
    with open(field_value_json_path, 'w') as json_file:
        json.dump(field_value_pairs, json_file, indent=4)

    return processed_image, json_path, field_value_json_path

# Gradio Interface
interface = gr.Interface(
    fn=process_image, 
    inputs="image", 
    outputs=[
        "image", 
        gr.File(label="Download OCR Results JSON"), 
        gr.File(label="Download Field-Value Pairs JSON")
    ],
    title="OCR Web Application",
    description="Upload an image and get OCR results with bounding boxes and two JSON outputs."
)

if __name__ == "__main__":
    interface.launch()