Spaces:
Running
Running
| from paddleocr import PaddleOCR | |
| from PIL import Image | |
| from numpy import asarray | |
| ocr = PaddleOCR(use_angle_cls=True, lang="en") #will be italian | |
| def normalize_bbox(bbox, width, height): | |
| return [ | |
| int(1000 * (bbox[0] / width)), | |
| int(1000 * (bbox[1] / height)), | |
| int(1000 * (bbox[2] / width)), | |
| int(1000 * (bbox[3] / height)), | |
| ] | |
| def unnormalize_box(bbox, width, height): | |
| return [ | |
| width * (bbox[0] / 1000), | |
| height * (bbox[1] / 1000), | |
| width * (bbox[2] / 1000), | |
| height * (bbox[3] / 1000), | |
| ] | |
| def OCR(image): | |
| result = ocr.ocr(asarray(image), cls=True) | |
| bboxes = [] | |
| words = [] | |
| for idx in range(len(result)): | |
| res = result[idx] | |
| for line in res: | |
| if(line[1][0] == ""): continue | |
| # print(line) | |
| # print(line[0][0] + line[0][2]) | |
| bboxes.append(normalize_bbox(line[0][0]+line[0][2], image.width, image.height)) | |
| # print(line[1][0]) | |
| words.append(line[1][0]) | |
| return bboxes, words | |