Spaces:
Runtime error
Runtime error
File size: 5,529 Bytes
f0debc6 53bb8bc f0debc6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
#bit messy to say the least will put cleaner version in separate space
def imgOCR_img2text(imgFilename):
import easyocr
#reader = easyocr.Reader(['en'], gpu=True) #GPU inference - faster and more accurate but need GPU. Enable and try/excpet CPU users down to CPU
reader = easyocr.Reader(['en'], gpu=False) #CPU inference - slower and less accurate
'''
try:
reader = easyocr.Reader(['en'], gpu=True) #GPU inference - faster and more accurate but need GPU. Enable and try/except CPU users down to CPU
except:
reader = easyocr.Reader(['en'], gpu=False) #CPU inference - slower and less accurate
'''
# Create a reader to do OCR.
# If you change to GPU instance, it will be faster. But CPU is enough.
# (by MENU > Runtime > Change runtime type > GPU, then redo from beginning )
#import easyocr
#reader = easyocr.Reader(['en'], gpu=True)
# Doing OCR. Get bounding boxes.
bounds2 = reader.readtext(imgFilename) #'writing_demo1.png'
#bounds2 = reader.readtext('writing_demo1.png', detail = 0) # detail = 0 turns off details, ie coordinates of bounding boxes and just returns the text
OCRbox = []
for kk in range(len(bounds2)): #don't want to alter original with the operations below
OCRbox.append( bounds2[kk] )
def getX1ofBoundingBox(inputArray1): # inputArray1 = bounds2[kk]
boundingX1 = (inputArray1[0])[0][0]
return boundingX1
def getY1ofBoundingBox(inputArray2): # inputArray2 = bounds2[kk]
boundingY1 = (inputArray2[0])[0][1]
return boundingY1
def getX3ofBoundingBox(inputArray3): # inputArray3 = bounds2[kk]
boundingX3 = (inputArray3[0])[2][0]
return boundingX3
def getY3ofBoundingBox(inputArray4): # inputArray4 = bounds2[kk]
boundingY3 = (inputArray4[0])[2][1]
return boundingY3
def get_XcentroidCoordinate_ofBoundingBox(inputArray5): # inputArray5 = bounds2[kk]
x1_0 = getX1ofBoundingBox(inputArray5)
x3_0 = getX3ofBoundingBox(inputArray5)
x_centroid0 = ( (x3_0 - x1_0) / 2 ) + x1_0
return x_centroid0
def get_YcentroidCoordinate_ofBoundingBox(inputArray6): # inputArray6 = bounds2[kk]
y1_0 = getY1ofBoundingBox(inputArray6)
y3_0 = getY3ofBoundingBox(inputArray6)
y_centroid0 = ( (y3_0 - y1_0) / 2 ) + y1_0
return y_centroid0
for kk in range(len(OCRbox)):
#bounds2[]
#OCRbox.sort(key=getY1ofBoundingBox) #Sorts it by Y1 location, see here for use of function key in sort https://www.w3schools.com/python/ref_list_sort.asp
OCRbox.sort(key=get_YcentroidCoordinate_ofBoundingBox) #Sorts it by Y centroid location
# [ associatedText, boundingCoordinates ] = [ bounds2[kk][1] , [X1, X3, Y1, Y3] ]
print( bounds2 )
print( "Row sorted aka all Y_centroid (or Y1, Y3, whichever we chose to sort by) should be increasing in each new item : ", OCRbox )
listOfRows = []
minilist = []
for kk in range(len(OCRbox) - 1):
minilist.append( OCRbox[kk] )
if get_YcentroidCoordinate_ofBoundingBox( OCRbox[kk] ) < getY1ofBoundingBox( OCRbox[kk + 1] ):
listOfRows.append( minilist )
#print( "this minilist aka row = " , minilist )
minilist = []
#minilist.append( OCRbox[kk] )
print( "listOfRows = ", listOfRows)
print( "len( listOfRows) = " , len( listOfRows) )
print( "the final minilist aka row = " , minilist )
print( "OCRbox[-1] = ", OCRbox[-1] )
#boundary case for last row. If its a single box we append it as its own row. If not we append it to the last list.
if get_YcentroidCoordinate_ofBoundingBox( OCRbox[-2] ) < getY1ofBoundingBox( OCRbox[-1] ): #boundary case in case the last row also happens to be a single box
listOfRows.append( [OCRbox[-1]] ) #tack on last one that for loop didnt AS ITS OWN LIST
elif len(listOfRows) < 1: #basically no text or single row detected
listOfRows.append( [OCRbox[-1]] )
else:
listOfRows[-1].append( OCRbox[-1] ) #tack it onto the last row
#def readLeft2RightSort(): #aka English, for Japanese just do Right2Left; Really just an X-centroid sort on each element of list of rows SEPARATELY like we did Y-centroid sort above
listOfRows.append( [([[0, 0], [0, 0], [0, 0], [0, 0]], '', 1)] ) #preserve structure in empty case
for kk in range(len(listOfRows)):
listOfRows[kk].sort(key=get_XcentroidCoordinate_ofBoundingBox)
print(listOfRows)
print(listOfRows[0])
print(listOfRows[1])
print(listOfRows[0][0][1])
rowOfTextList = []
for kk in range(len(listOfRows)):
for ii in range(len(listOfRows[kk])):
rowOfTextString = ''.join(listOfRows[kk][ii][1])
rowOfTextList.append(rowOfTextString)
print(rowOfTextList)
coordinateSortedText = ' '.join(rowOfTextList)
print(coordinateSortedText)
def cleanOCRtext(inputString2clean):
inputString2clean = inputString2clean.replace("_", " ") #replace _ with space
inputString2clean = inputString2clean.replace(" ", " ") #replace double space with single space
inputString2clean = inputString2clean.lower()
#import re #turn 0's that appear in the text into o's, this seems to be the major letter to number error
inputString2clean = re.sub("([a-z])[0]", "\\1o", inputString2clean) #capture [a-z] with parentheses then reference the first capture as \\1
inputString2clean = re.sub("[0]([a-z])", "\\1o", inputString2clean)
return inputString2clean
cleanedText = cleanOCRtext(coordinateSortedText)
print("============================== FINAL ==============================")
print(cleanedText)
return cleanedText |