File size: 4,147 Bytes
ad14750 1939ca9 ad14750 61a2cab 1939ca9 ad14750 1939ca9 ad14750 1939ca9 ad14750 1939ca9 ad14750 1939ca9 ad14750 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import json
from paddleocr import PaddleOCR
import gradio as gr
import os
# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')
# Function to draw bounding boxes on the image
def draw_boxes_on_image(image, data):
# Convert the image to RGB (OpenCV uses BGR by default)
image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
# Load the image into PIL for easier drawing
pil_image = Image.fromarray(image_rgb)
draw = ImageDraw.Draw(pil_image)
# Define a font (using DejaVuSans since it's available by default)
try:
font = ImageFont.truetype("DejaVuSans.ttf", 20)
except IOError:
font = ImageFont.load_default()
for item in data:
bounding_box, (text, confidence) = item
# Ensure bounding_box is a list of lists
if not isinstance(bounding_box[0], list):
bounding_box = [bounding_box]
box = np.array(bounding_box).astype(int)
# Draw the bounding box
draw.line([tuple(box[0]), tuple(box[1])], fill="green", width=2)
draw.line([tuple(box[1]), tuple(box[2])], fill="green", width=2)
draw.line([tuple(box[2]), tuple(box[3])], fill="green", width=2)
draw.line([tuple(box[3]), tuple(box[0])], fill="green", width=2)
# Draw the text above the bounding box
text_position = (box[0][0], box[0][1] - 20)
draw.text(text_position, f"{text} ({confidence:.2f})", fill="red", font=font)
return pil_image
# Function to convert OCR results to JSON
def convert_to_json(results, output_file):
"""
Converts the given results into a JSON file.
Args:
results: The list of results containing bounding box coordinates, text, and confidence.
output_file: The name of the output JSON file.
"""
json_data = []
for result in results:
bounding_box = result[0]
text = result[1][0]
confidence = result[1][1]
json_data.append({
"bounding_box": [list(map(float, coord)) for coord in bounding_box],
"text": text,
"confidence": confidence
})
with open(output_file, "w") as f:
json.dump(json_data, f, indent=4)
# Function to identify 'field', 'value' pairs
def identify_field_value_pairs(ocr_results, fields):
field_value_pairs = {}
for line in ocr_results:
for word_info in line:
text, _ = word_info[1]
for field in fields:
if field.lower() in text.lower():
# Assuming the value comes immediately after the field
value_index = line.index(word_info) + 1
if value_index < len(line):
field_value_pairs[field] = line[value_index][1][0]
break
return field_value_pairs
# Function to process the image and generate outputs
def process_image(image):
ocr_results = ocr.ocr(np.array(image), cls=True)
processed_image = draw_boxes_on_image(image, ocr_results[0])
# Save OCR results to JSON
json_path = "ocr_results.json"
convert_to_json(ocr_results[0], json_path)
# Identify field-value pairs
fields = ["Scheme Name", "Folio Number", "Number of Units", "PAN", "Signature", "Tax Status",
"Mobile Number", "Email", "Address", "Bank Account Details"]
field_value_pairs = identify_field_value_pairs(ocr_results[0], fields)
field_value_json_path = "field_value_pairs.json"
with open(field_value_json_path, 'w') as json_file:
json.dump(field_value_pairs, json_file, indent=4)
return processed_image, json_path, field_value_json_path
# Gradio Interface
interface = gr.Interface(
fn=process_image,
inputs="image",
outputs=[
"image",
gr.File(label="Download OCR Results JSON"),
gr.File(label="Download Field-Value Pairs JSON")
],
title="OCR Web Application",
description="Upload an image and get OCR results with bounding boxes and two JSON outputs."
)
if __name__ == "__main__":
interface.launch()
|