File size: 5,529 Bytes
f0debc6
 
 
53bb8bc
 
f0debc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#bit messy to say the least will put cleaner version in separate space 
def imgOCR_img2text(imgFilename):
  import easyocr
  #reader = easyocr.Reader(['en'], gpu=True) #GPU inference - faster and more accurate but need GPU. Enable and try/excpet CPU users down to CPU
  reader = easyocr.Reader(['en'], gpu=False) #CPU inference - slower and less accurate

  '''
  try:
      reader = easyocr.Reader(['en'], gpu=True) #GPU inference - faster and more accurate but need GPU. Enable and try/except CPU users down to CPU
  except:
      reader = easyocr.Reader(['en'], gpu=False) #CPU inference - slower and less accurate
  '''

  # Create a reader to do OCR.
  # If you change to GPU instance, it will be faster. But CPU is enough.
  # (by MENU > Runtime > Change runtime type > GPU, then redo from beginning )
  #import easyocr
  #reader = easyocr.Reader(['en'], gpu=True)

  # Doing OCR. Get bounding boxes.
  bounds2 = reader.readtext(imgFilename) #'writing_demo1.png'
  #bounds2 = reader.readtext('writing_demo1.png', detail = 0) # detail = 0 turns off details, ie coordinates of bounding boxes and just returns the text 

  OCRbox = []
  for kk in range(len(bounds2)): #don't want to alter original with the operations below
    OCRbox.append( bounds2[kk] )

  def getX1ofBoundingBox(inputArray1): # inputArray1 = bounds2[kk]
    boundingX1 = (inputArray1[0])[0][0]
    return boundingX1

  def getY1ofBoundingBox(inputArray2): # inputArray2 = bounds2[kk]
    boundingY1 = (inputArray2[0])[0][1]
    return boundingY1

  def getX3ofBoundingBox(inputArray3): # inputArray3 = bounds2[kk]
    boundingX3 = (inputArray3[0])[2][0]
    return boundingX3

  def getY3ofBoundingBox(inputArray4): # inputArray4 = bounds2[kk]
    boundingY3 = (inputArray4[0])[2][1]
    return boundingY3


  def get_XcentroidCoordinate_ofBoundingBox(inputArray5): # inputArray5 = bounds2[kk]
    x1_0 = getX1ofBoundingBox(inputArray5)
    x3_0 = getX3ofBoundingBox(inputArray5)
    
    x_centroid0 = ( (x3_0 - x1_0) / 2 )  + x1_0
    return x_centroid0

  def get_YcentroidCoordinate_ofBoundingBox(inputArray6): # inputArray6 = bounds2[kk]
    y1_0 = getY1ofBoundingBox(inputArray6)
    y3_0 = getY3ofBoundingBox(inputArray6)
    
    y_centroid0 = ( (y3_0 - y1_0) / 2 )  + y1_0
    return y_centroid0



  for kk in range(len(OCRbox)):
    #bounds2[]
    #OCRbox.sort(key=getY1ofBoundingBox) #Sorts it by Y1 location, see here for use of function key in sort https://www.w3schools.com/python/ref_list_sort.asp
    OCRbox.sort(key=get_YcentroidCoordinate_ofBoundingBox) #Sorts it by Y centroid location

    # [ associatedText, boundingCoordinates ] = [ bounds2[kk][1] , [X1, X3, Y1, Y3] ]

  print( bounds2 )
  print( "Row sorted aka all Y_centroid (or Y1, Y3, whichever we chose to sort by) should be increasing in each new item   :   ", OCRbox )


  listOfRows = []
  minilist = []


  for kk in range(len(OCRbox) - 1):
    minilist.append( OCRbox[kk] )
    if get_YcentroidCoordinate_ofBoundingBox( OCRbox[kk] ) < getY1ofBoundingBox( OCRbox[kk + 1] ):
      listOfRows.append( minilist )
      #print( "this minilist aka row = " , minilist )
      minilist = []
    #minilist.append( OCRbox[kk] )

  print( "listOfRows = ", listOfRows)
  print( "len( listOfRows) = " , len( listOfRows) )
  print( "the final minilist aka row = " , minilist )
  print( "OCRbox[-1] = ", OCRbox[-1] )

  #boundary case for last row. If its a single box we append it as its own row. If not we append it to the last list. 
  if get_YcentroidCoordinate_ofBoundingBox( OCRbox[-2] ) < getY1ofBoundingBox( OCRbox[-1] ): #boundary case in case the last row also happens to be a single box
    listOfRows.append( [OCRbox[-1]] ) #tack on last one that for loop didnt AS ITS OWN LIST
  elif len(listOfRows) < 1: #basically no text or single row detected 
    listOfRows.append( [OCRbox[-1]] )
  else:
    listOfRows[-1].append( OCRbox[-1] ) #tack it onto the last row 


  #def readLeft2RightSort(): #aka English, for Japanese just do Right2Left; Really just an X-centroid sort on each element of list of rows SEPARATELY like we did Y-centroid sort above 

  listOfRows.append( [([[0, 0], [0, 0], [0, 0], [0, 0]], '', 1)] ) #preserve structure in empty case

  for kk in range(len(listOfRows)):
    listOfRows[kk].sort(key=get_XcentroidCoordinate_ofBoundingBox)

  print(listOfRows)
  print(listOfRows[0])
  print(listOfRows[1])
  print(listOfRows[0][0][1])

  rowOfTextList = []

  for kk in range(len(listOfRows)):
    for ii in range(len(listOfRows[kk])):
      rowOfTextString = ''.join(listOfRows[kk][ii][1])
      rowOfTextList.append(rowOfTextString)

  print(rowOfTextList)

  coordinateSortedText = ' '.join(rowOfTextList)

  print(coordinateSortedText)




  def cleanOCRtext(inputString2clean):
    inputString2clean = inputString2clean.replace("_", " ") #replace _ with space
    inputString2clean = inputString2clean.replace("  ", " ") #replace double space with single space
    inputString2clean = inputString2clean.lower()

    #import re #turn 0's that appear in the text into o's, this seems to be the major letter to number error 
    inputString2clean = re.sub("([a-z])[0]", "\\1o", inputString2clean)   #capture [a-z] with parentheses then reference the first capture as \\1 
    inputString2clean = re.sub("[0]([a-z])", "\\1o", inputString2clean)
    
    return inputString2clean

  cleanedText = cleanOCRtext(coordinateSortedText)


  print("============================== FINAL ==============================")
  print(cleanedText)

  return cleanedText