File size: 2,754 Bytes
598a04f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c49631d
598a04f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c89d208
598a04f
 
b049d04
 
598a04f
 
 
c89d208
b049d04
598a04f
c89d208
b049d04
 
598a04f
 
 
 
 
 
b049d04
 
 
 
 
 
598a04f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gradio as gr
from ultralytics import YOLO
from PIL import Image
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from qreader import QReader
import cv2
import json
import ast
from datetime import datetime


processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-stage1")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-stage1")
qreader = QReader()


def yolo_and_trocr(image_input, save):
  try:
    # YOLO instanciated from the trained model
    yolo = YOLO('best.pt')

    # Creating results
    results = yolo(image_input, conf=0.5, iou=0.7)
    res = results[0].plot()[:, :, [2,1,0]]
    boxes = results[0].boxes.xyxy
    image = Image.fromarray(res)
    texts = []
    
    # Texts and cropped images get saved in the lists.
    for i in boxes:
        img_cropped = image.crop(tuple(i.tolist()))
        # TrOCR model is run to detect text in image
        pixel_values = processor(img_cropped, return_tensors="pt").pixel_values
        generated_ids = model.generate(pixel_values)
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        texts.append(generated_text)
    text = texts[0]
    text = f"{text[:5]}.{text[5:]}" # fix decimals
    

    # Reading the QR code from the image
    qr_code = cv2.cvtColor(cv2.imread(image_input), cv2.COLOR_BGR2RGB)
    decoded_text = qreader.detect_and_decode(image=qr_code)
    if len(decoded_text) == 0:
      decoded_text = "No QR code detected"
    else:
      decoded_text = decoded_text[0]
    
    # Saving the info in a dictionary for output if requested
    if save: 
      data_dict = ast.literal_eval(decoded_text)
      file_path = f"{data_dict['Address']}.json"
      with open(file_path, "w") as file:
        current_datetime = datetime.now()
        timestamp = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
        data_dict['Last_Reading'] = {f'{timestamp}': f'{text}'}
        json.dump(data_dict, file, indent=4)
        return image, text, decoded_text, file_path 

    # Outputing the image, reading, and QR code info without saving the data
    else:
      return image, text, decoded_text, None
  
  except Exception as e:
    return "", f"Your input is invalid: {str(e)}", f"Try Again: Make sure the meter and QR code are clearly captured"

app = gr.Interface(
    fn=yolo_and_trocr,
    inputs=[gr.File(label="Input: Water Meter Image"),
            gr.Checkbox(label="Save")], 
    outputs=[gr.Image(label='Output: Water Meter Photo'),
             gr.Textbox(label="Output: Water Meter Reading"),
             gr.Textbox(label="Output: QR Code Detection"),
             gr.File(label="Output: Saved Data")],
    title="Water Meter Reading with YOLO and OCR"
)

app.launch()