File size: 4,147 Bytes
ad14750
 
 
 
 
 
1939ca9
ad14750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61a2cab
1939ca9
 
 
 
ad14750
 
 
 
 
 
 
 
 
 
 
 
 
1939ca9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad14750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1939ca9
ad14750
 
 
 
 
 
1939ca9
ad14750
 
 
 
 
 
 
 
 
 
 
1939ca9
 
ad14750
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import json
from paddleocr import PaddleOCR
import gradio as gr
import os

# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')

# Function to draw bounding boxes on the image
def draw_boxes_on_image(image, data):
    # Convert the image to RGB (OpenCV uses BGR by default)
    image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)

    # Load the image into PIL for easier drawing
    pil_image = Image.fromarray(image_rgb)
    draw = ImageDraw.Draw(pil_image)

    # Define a font (using DejaVuSans since it's available by default)
    try:
        font = ImageFont.truetype("DejaVuSans.ttf", 20)
    except IOError:
        font = ImageFont.load_default()

    for item in data:
        bounding_box, (text, confidence) = item

        # Ensure bounding_box is a list of lists
        if not isinstance(bounding_box[0], list):
            bounding_box = [bounding_box]

        box = np.array(bounding_box).astype(int)

        # Draw the bounding box
        draw.line([tuple(box[0]), tuple(box[1])], fill="green", width=2)
        draw.line([tuple(box[1]), tuple(box[2])], fill="green", width=2)
        draw.line([tuple(box[2]), tuple(box[3])], fill="green", width=2)
        draw.line([tuple(box[3]), tuple(box[0])], fill="green", width=2)

        # Draw the text above the bounding box
        text_position = (box[0][0], box[0][1] - 20)
        draw.text(text_position, f"{text} ({confidence:.2f})", fill="red", font=font)

    return pil_image

# Function to convert OCR results to JSON
def convert_to_json(results, output_file):
    """
    Converts the given results into a JSON file.

    Args:
        results: The list of results containing bounding box coordinates, text, and confidence.
        output_file: The name of the output JSON file.
    """
    json_data = []
    for result in results:
        bounding_box = result[0]
        text = result[1][0]
        confidence = result[1][1]

        json_data.append({
            "bounding_box": [list(map(float, coord)) for coord in bounding_box],
            "text": text,
            "confidence": confidence
        })

    with open(output_file, "w") as f:
        json.dump(json_data, f, indent=4)

# Function to identify 'field', 'value' pairs
def identify_field_value_pairs(ocr_results, fields):
    field_value_pairs = {}
    for line in ocr_results:
        for word_info in line:
            text, _ = word_info[1]
            for field in fields:
                if field.lower() in text.lower():
                    # Assuming the value comes immediately after the field
                    value_index = line.index(word_info) + 1
                    if value_index < len(line):
                        field_value_pairs[field] = line[value_index][1][0]
                    break
    return field_value_pairs

# Function to process the image and generate outputs
def process_image(image):
    ocr_results = ocr.ocr(np.array(image), cls=True)
    processed_image = draw_boxes_on_image(image, ocr_results[0])
    
    # Save OCR results to JSON
    json_path = "ocr_results.json"
    convert_to_json(ocr_results[0], json_path)
    
    # Identify field-value pairs
    fields = ["Scheme Name", "Folio Number", "Number of Units", "PAN", "Signature", "Tax Status", 
              "Mobile Number", "Email", "Address", "Bank Account Details"]
    field_value_pairs = identify_field_value_pairs(ocr_results[0], fields)
    field_value_json_path = "field_value_pairs.json"
    
    with open(field_value_json_path, 'w') as json_file:
        json.dump(field_value_pairs, json_file, indent=4)

    return processed_image, json_path, field_value_json_path

# Gradio Interface
interface = gr.Interface(
    fn=process_image, 
    inputs="image", 
    outputs=[
        "image", 
        gr.File(label="Download OCR Results JSON"), 
        gr.File(label="Download Field-Value Pairs JSON")
    ],
    title="OCR Web Application",
    description="Upload an image and get OCR results with bounding boxes and two JSON outputs."
)

if __name__ == "__main__":
    interface.launch()