pritmanvar-bacancy commited on
Commit
373409e
·
verified ·
1 Parent(s): f932bed

initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,19 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ 3.jpg filter=lfs diff=lfs merge=lfs -text
37
+ GrayImages/1.jpg filter=lfs diff=lfs merge=lfs -text
38
+ GrayImages/10.jpg filter=lfs diff=lfs merge=lfs -text
39
+ GrayImages/12.jpg filter=lfs diff=lfs merge=lfs -text
40
+ GrayImages/7.jpg filter=lfs diff=lfs merge=lfs -text
41
+ GrayImages/8.jpg filter=lfs diff=lfs merge=lfs -text
42
+ GrayImages/9.jpg filter=lfs diff=lfs merge=lfs -text
43
+ GrayImages/parcel_img13.png filter=lfs diff=lfs merge=lfs -text
44
+ GrayImages/parcel_img3.png filter=lfs diff=lfs merge=lfs -text
45
+ grey_images/1.jpg filter=lfs diff=lfs merge=lfs -text
46
+ grey_images/10.jpg filter=lfs diff=lfs merge=lfs -text
47
+ grey_images/12.jpg filter=lfs diff=lfs merge=lfs -text
48
+ grey_images/7.jpg filter=lfs diff=lfs merge=lfs -text
49
+ grey_images/8.jpg filter=lfs diff=lfs merge=lfs -text
50
+ grey_images/9.jpg filter=lfs diff=lfs merge=lfs -text
51
+ grey_images/parcel_img3.png filter=lfs diff=lfs merge=lfs -text
3.jpg ADDED

Git LFS Details

  • SHA256: e31dcf5cb6926537dae945c34b78060847141361f48d7f574643428c66cc4555
  • Pointer size: 132 Bytes
  • Size of remote file: 7.86 MB
App.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import cv2
3
+ from pipeline import main
4
+ from pathlib import Path
5
+ import pandas as pd
6
+ import os
7
+ from dotenv import load_dotenv
8
+ from pathlib import Path
9
+ env_path = Path('.') / '.env'
10
+ load_dotenv(dotenv_path=env_path)
11
+
12
+ path = {
13
+ 'SEG_MODEL_PATH': str(os.getenv('SEG_MODEL_PATH')),
14
+ 'MAIN_FLOW_GRAY_IMG_DIR_PATH': str(os.getenv('MAIN_FLOW_GRAY_IMG_DIR_PATH')),
15
+ 'MAIN_FLOW_INFERENCE_FOLDER': str(os.getenv('MAIN_FLOW_INFERENCE_FOLDER')),
16
+ }
17
+
18
+ with st.sidebar:
19
+ st.title("Shipping Label Extraction")
20
+ data = st.file_uploader(label='Upload Image of Parcel',type=['png','jpg','jpeg'])
21
+
22
+
23
+ if data:
24
+ Path('grey_images').mkdir(parents=True, exist_ok=True)
25
+
26
+ with open(os.path.join('grey_images',data.name),'wb') as f:
27
+ f.write(data.getvalue())
28
+
29
+ img = cv2.imread(os.path.join('grey_images',data.name),0)
30
+
31
+ if img.shape[0] > 1500:
32
+ height, width = img.shape
33
+ img = img[height//4:-height//4, width//4:-width//4]
34
+
35
+ cv2.imwrite(os.path.join('grey_images',data.name), img)
36
+
37
+ #call main function
38
+ Output_dict= main(os.path.join('grey_images',data.name))
39
+ df = pd.DataFrame(Output_dict)
40
+
41
+ col1,col2 = st.columns(2)
42
+
43
+ with col1:
44
+ st.markdown("<h3 style='text-align: center;'>Grey Image</h1>", unsafe_allow_html=True)
45
+ st.image(os.path.join('grey_images',data.name))
46
+
47
+
48
+ st.markdown("<h3 style='text-align: center;'>Enhanced Image</h1>", unsafe_allow_html=True)
49
+ st.image(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'enhanced', data.name))
50
+
51
+ with col2:
52
+ st.markdown("<h3 style='text-align: center;'>Detected Image</h1>", unsafe_allow_html=True)
53
+ st.image(os.path.join('runs', 'segment',path['MAIN_FLOW_INFERENCE_FOLDER'],data.name))
54
+
55
+
56
+ st.markdown("<h3 style='text-align: center;'>Rotated Image</h1>", unsafe_allow_html=True)
57
+ st.image(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'rotated_image', data.name))
58
+
59
+
60
+ ocr_data = ""
61
+ with open(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'ocr_label_data', data.name.split('.')[0]+'.txt'),'r+') as f :
62
+ ocr_data = f.read()
63
+ st.header("OCR Text Output")
64
+ st.text(ocr_data)
65
+
66
+ st.header("NER Output")
67
+ st.table(df)
GrayImages/1.jpg ADDED

Git LFS Details

  • SHA256: 9ea4c338418606c239ca1292323d60bc0fda6870b8cbb00b77928a3627f6676f
  • Pointer size: 132 Bytes
  • Size of remote file: 4.01 MB
GrayImages/10.jpg ADDED

Git LFS Details

  • SHA256: 955545c6644e97539b48d9fefdd238877df7e02edc31431e63b83ec09f8519e5
  • Pointer size: 132 Bytes
  • Size of remote file: 3.45 MB
GrayImages/12.jpg ADDED

Git LFS Details

  • SHA256: d841a192f48179d2bab0d36e167b8d401a5df0c82512bd76a546ed876d4bddbe
  • Pointer size: 132 Bytes
  • Size of remote file: 3.1 MB
GrayImages/3.jpg ADDED
GrayImages/7.jpg ADDED

Git LFS Details

  • SHA256: 4d321e7063ce115643da23742873e36e1c4117c90b9a49cbda0b2cbf44afca57
  • Pointer size: 132 Bytes
  • Size of remote file: 1.12 MB
GrayImages/8.jpg ADDED

Git LFS Details

  • SHA256: 2ed29e1bb494858ce5df7a833667423c1bd106097a4a949cfbf9d8086ee180b0
  • Pointer size: 132 Bytes
  • Size of remote file: 3.71 MB
GrayImages/9.jpg ADDED

Git LFS Details

  • SHA256: b6e52892249be2594e0238c372f8912f344fec24fddb9a447c26bb4ac953253d
  • Pointer size: 132 Bytes
  • Size of remote file: 3.33 MB
GrayImages/parcel_img1.png ADDED
GrayImages/parcel_img13.png ADDED

Git LFS Details

  • SHA256: feed2ae0425a3ac6b2ed78e0302c69b4d3573137dd9d0a8e5b6c7c28016c4716
  • Pointer size: 132 Bytes
  • Size of remote file: 1.04 MB
GrayImages/parcel_img15.png ADDED
GrayImages/parcel_img19.png ADDED
GrayImages/parcel_img3.png ADDED

Git LFS Details

  • SHA256: a9e3ed65479c94f8103c93df65e9a43e888811fc110a2c17e30f2df408b2d49c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.15 MB
GrayImages/parcel_img5.png ADDED
NER/final_model_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:432a72a5a4c668835a579e9f9fcad6137e0fd6ec72056eaf98a6373abac2852f
3
+ size 414142321
NER/ner_inference.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from flair.data import Sentence
3
+ from flair.models import SequenceTagger
4
+ # import pathlib
5
+ # temp = pathlib.PosixPath
6
+ # pathlib.PosixPath = pathlib.WindowsPath
7
+ import json
8
+ import os
9
+ from dotenv import load_dotenv
10
+ from pathlib import Path
11
+ env_path = Path('.') / '.env'
12
+ load_dotenv(dotenv_path=env_path)
13
+ import re
14
+ import string
15
+ import nltk
16
+ from nltk.corpus import stopwords
17
+
18
+ nltk.download('stopwords')
19
+ nltk.download('punkt')
20
+
21
+ path = {
22
+ 'NER_MODEL_PATH': str(os.getenv('NER_MODEL_PATH')),
23
+ }
24
+ model = SequenceTagger.load(path['NER_MODEL_PATH'])
25
+
26
+
27
+ def inference(sent):
28
+
29
+ sent = clean_sent(sent)
30
+ sentence = Sentence(sent)
31
+
32
+ res = model.predict(sentence)
33
+ res = sentence.to_dict()
34
+
35
+ label_name = {'GCNUM':[],'TRACK-ID':[],'Company':"" }
36
+ for label in res['entities']:
37
+ sorted_labels = sorted(label['labels'], key=lambda k: k['confidence'])
38
+ if sorted_labels[0]['value'] not in label_name:
39
+ label_name[sorted_labels[0]['value']].append(label['text'])
40
+ else:
41
+ label_name[sorted_labels[0]['value']].append(label['text'])
42
+
43
+ # RE part
44
+ if label_name['GCNUM'] is None or len(label_name['GCNUM']) == 0:
45
+ pattern = r'[G,O,Q][C,O,Q]\d{5}'
46
+ result = re.findall(pattern, sent, re.IGNORECASE)
47
+ if result is not None and len(result) > 0:
48
+ label_name['GCNUM'].append(result[0])
49
+
50
+ label_name['GCNUM'] = label_name['GCNUM']
51
+ label_name['TRACK-ID'] = label_name['TRACK-ID']
52
+ #getting companie name
53
+ #1. get name directly from sentence
54
+ label_name['Company'] = get_company_name(sent.lower())
55
+
56
+ #2. track id from sentence
57
+
58
+ return label_name
59
+
60
+ def get_company_name(sent):
61
+ patterns = []
62
+ patterns.append({'company': "FeDex", 'pattern' : re.compile(r"\b{}\b".format("fedex")),"track_pattern":re.compile(r"\b{}\b".format("[0-9]{12}"))})
63
+ patterns.append({'company':'UPS', 'pattern':re.compile(r"\b{}\b".format("ups")),"track_pattern":re.compile(r"\b{}\b".format("1Z"))})
64
+ patterns.append({'company':'USPS', 'pattern':re.compile(r"\b{}\b".format("usps")),"track_pattern":re.compile(r"\b{}\b".format("42033155"))})
65
+ patterns.append({'company':'onTrack', 'pattern':re.compile(r"\b{}\b".format("ontrack")),"track_pattern":re.compile(r"\b{}\b".format("BG[0-9]{5}"))})
66
+ patterns.append({'company':'Lasership', 'pattern':re.compile(r"\b{}\b".format('1LS')),"track_pattern":re.compile(r"\b{}\b".format('1LS'))})
67
+ patterns.append({'company':'Amazon', 'pattern':re.compile(r"\b{}\b".format("dmi6 | dm16")),"track_pattern":re.compile(r"\b{}\b".format("T[B,8]A"))})
68
+
69
+ for pattern in patterns:
70
+ if pattern['pattern'].search(sent) is not None:
71
+ return pattern['company']
72
+
73
+ for pattern in patterns:
74
+ if pattern['track_pattern'].search(sent) is not None:
75
+ return pattern['company']
76
+ return None
77
+
78
+
79
+ def clean_sent(sent):
80
+
81
+ # Remove punctuation
82
+ sent = sent.translate(str.maketrans('', '', string.punctuation))
83
+
84
+ # Tokenize the sentence
85
+ tokens = nltk.word_tokenize(sent)
86
+
87
+ # Remove stop words
88
+ stop_words = set(stopwords.words('english'))
89
+ tokens = [token for token in tokens if token.lower() not in stop_words]
90
+
91
+ # Join the tokens back into a sentence
92
+ sent = ' '.join(tokens)
93
+ return sent
94
+
95
+
96
+ # print(inference("CVG2 5.1Lbs 02123 DMIGE Rasheba PierreGC12100 4654SW75TH AVE 33155-4433MIAMIFLUnited TBA305477063112 DMI6 CYCLE 1 MIA1 MIA2 &MI6 DMIG"))
OCR/label_ocr.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #import libraries
2
+ import numpy as np
3
+ import os
4
+ import cv2
5
+ from dotenv import load_dotenv
6
+ from pathlib import Path
7
+ env_path = Path('.') / '.env'
8
+
9
+ from paddleocr import PaddleOCR
10
+ ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
11
+
12
+ load_dotenv(dotenv_path=env_path)
13
+ path = {
14
+ 'ROTATED_IMAGE_FOLDER_PATH': str(os.getenv('ROTATED_IMAGE_FOLDER_PATH')),
15
+ }
16
+ # Traves rotated images
17
+ for img_name in os.listdir(path["ROTATED_IMAGE_FOLDER_PATH"]):
18
+
19
+ # perform ocr
20
+ file_name = img_name.split(".")[0]
21
+ result = ocr.ocr(os.path.join(path["ROTATED_IMAGE_FOLDER_PATH"],img_name), cls=True)
22
+ ocr_output_paddle = []
23
+ for i in result:
24
+ ocr_output_paddle.append(" ".join([line[1][0] for line in i]))
25
+
26
+ #store ocr in OCR_LABEL_DATA folder
27
+ if result is not None:
28
+ try:
29
+ Path('runs').mkdir(parents=True, exist_ok=True)
30
+ Path(os.path.join('runs', 'segment')).mkdir(parents=True, exist_ok=True)
31
+ Path(os.path.join('runs', 'segment', 'inference')).mkdir(parents=True, exist_ok=True)
32
+ Path(os.path.join('runs', 'segment', 'inference', 'ocr_label_data')).mkdir(parents=True, exist_ok=True)
33
+ except OSError as error:
34
+ print(error)
35
+
36
+ with open(os.path.join('runs', 'segment', 'inference', 'ocr_label_data', img_name) +'.txt',"w+") as f:
37
+ f.write("\n".join(ocr_output_paddle))
OCR/label_rotation.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import cv2
3
+ import os
4
+ from rotation_functions import rotate,hoffman_transformation,pytesseractRotate
5
+ from dotenv import load_dotenv
6
+ from pathlib import Path
7
+ env_path = Path('.') / '.env'
8
+ load_dotenv(dotenv_path=env_path)
9
+
10
+ path = {
11
+ 'ENHANCED_IMAGE_FOLDER_PATH': str(os.getenv('ENHANCED_IMAGE_FOLDER_PATH')),
12
+ }
13
+
14
+ import numpy as np
15
+
16
+ for img_name in os.listdir(path['ENHANCED_IMAGE_FOLDER_PATH']):
17
+ img = cv2.imread(os.path.join(path['ENHANCED_IMAGE_FOLDER_PATH'],img_name))
18
+ original_img = img
19
+
20
+ # scale and dialate the image for better result
21
+ img = cv2.resize(img,None,fx=2.7, fy=3)
22
+ kernel = np.ones((2,2),np.uint8)
23
+ img = cv2.dilate(img,kernel)
24
+ sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
25
+ img = cv2.filter2D(img, -1, sharpen_kernel)
26
+
27
+ #apply hoffman transformation
28
+ rotated_image,angle = hoffman_transformation(img, True)
29
+
30
+ original_img = rotate(original_img,angle)
31
+
32
+ # apply tesseract ocd
33
+ rotated_image = pytesseractRotate(rotated_image,original_img,1)
34
+
35
+
36
+ # save to file
37
+ if rotated_image is not None:
38
+ try:
39
+ Path('runs').mkdir(parents=True, exist_ok=True)
40
+ Path(os.path.join('runs', 'segment')).mkdir(parents=True, exist_ok=True)
41
+ Path(os.path.join('runs', 'segment', 'inference')).mkdir(parents=True, exist_ok=True)
42
+ Path(os.path.join('runs', 'segment', 'inference', 'rotated_image')).mkdir(parents=True, exist_ok=True)
43
+ except OSError as error:
44
+ print(error)
45
+ pass
46
+
47
+ cv2.imwrite(os.path.join('runs', 'segment', 'inference', 'rotated_image', img_name), rotated_image)
OCR/rotation_functions.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # method 4 mix two methods
2
+ import cv2
3
+ import numpy as np
4
+ import os
5
+ import re
6
+ import pytesseract
7
+ from dotenv import load_dotenv
8
+ from pathlib import Path
9
+ env_path = Path('.') / '.env'
10
+
11
+ load_dotenv(dotenv_path=env_path)
12
+ path = {
13
+ 'TESSERACT_PATH': str(os.getenv('TESSERACT_PATH')),
14
+ }
15
+ pytesseract.pytesseract.tesseract_cmd = (path['TESSERACT_PATH'])
16
+
17
+
18
+
19
+ def hoffman_transformation(image, verbose=False):
20
+ """
21
+ this function performs hoffman transformation method which fixes the rotation of image in 4 angles 0,90,270,360.
22
+ Args:
23
+ image (ndarray): gets image and perform hoffman tarnsformation
24
+ verbose (bool, optional): for seeing image transformation using matplotlib plots. Defaults to False.
25
+
26
+ Returns:
27
+ rotated_image: returns rotated image which can be only 4 angles rotated label
28
+ """
29
+ # Define our parameters for Canny
30
+ low_threshold = 50
31
+ high_threshold = 100
32
+ kernel = np.ones((8,8),dtype=np.uint8)
33
+ eroded_image = cv2.erode(image,kernel=kernel)
34
+ eroded_image = cv2.dilate(eroded_image,kernel)
35
+
36
+ # perform canny edge detection
37
+ edges = cv2.Canny(eroded_image, low_threshold, high_threshold)
38
+
39
+ edges = cv2.erode(edges,(50,50))
40
+
41
+ # Define the Hough transform parameters
42
+ # Make a blank the same size as our image to draw on
43
+ rho = 1
44
+ theta = np.pi/180
45
+ threshold = 60
46
+ min_line_length = 10
47
+ max_line_gap = 5
48
+ line_image = np.copy(image) #creating an image copy to draw lines on
49
+
50
+ # Run Hough on the edge-detected image
51
+ lines = cv2.HoughLinesP(edges, rho, theta, threshold, np.array([]),
52
+ min_line_length, max_line_gap)
53
+
54
+ # Iterate over the output "lines" and draw lines on the image copy
55
+ angles_count = {}
56
+ final_angle = 0
57
+ if lines is not None:
58
+ for line in lines:
59
+ if line is not None:
60
+ for x1,y1,x2,y2 in line:
61
+ cv2.line(line_image,(x1,y1),(x2,y2),(255,0,0),5)
62
+
63
+ angle = 0
64
+ if abs(x1-x2) < 0.000001:
65
+ angle = np.pi/2
66
+ else:
67
+ angle = (y1-y2)/(x1-x2)
68
+ angle = np.arctan(angle)
69
+ angle = angle*180/np.pi
70
+ angle = np.round(angle)
71
+ if angle%10 < 5:
72
+ angle = angle- angle%10
73
+ else:
74
+ angle = angle + 10 - angle%10
75
+ if angle in angles_count:
76
+ angles_count[angle] += 1
77
+ else:
78
+ angles_count[angle] = 1
79
+
80
+ final_angle = max(angles_count, key=angles_count.get)
81
+
82
+
83
+ line_image = cv2.putText(line_image, str(final_angle), (20,30), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,0), 3, cv2.LINE_8, False)
84
+
85
+ angle= 360
86
+ angle-= final_angle
87
+ angle = -(90 + angle) if angle < -45 else -angle
88
+
89
+ # rotate image at final_angle using rotation matrix and warpAffine transformation
90
+ h, w = image.shape[:2]
91
+ (c_x, c_y) = (w // 2, h // 2)
92
+ matrix = cv2.getRotationMatrix2D((c_x, c_y), angle, 1.0)
93
+
94
+ cos = np.abs(matrix[0, 0])
95
+ sin = np.abs(matrix[0, 1])
96
+
97
+ n_w = int((h * sin) + (w * cos))
98
+ n_h = int((h * cos) + (w * sin))
99
+
100
+ matrix[0, 2] += (n_w / 2) - c_x
101
+ matrix[1, 2] += (n_h / 2) - c_y
102
+
103
+ rotated_image = cv2.warpAffine(image, matrix, (n_w, n_h), borderValue=(255, 255, 255))
104
+ return rotated_image,angle
105
+
106
+
107
+
108
+ def rotate(
109
+ image: np.ndarray, angle: float
110
+ ) -> np.ndarray:
111
+ """ this function rotates the image at given angle and returns the rotated image
112
+
113
+ Args:
114
+ image (np.ndarray): _description_
115
+ angle (float): _description_
116
+
117
+ Returns:
118
+ np.ndarray: _description_
119
+ """
120
+ h, w = image.shape[:2]
121
+ (c_x, c_y) = (w // 2, h // 2)
122
+ matrix = cv2.getRotationMatrix2D((c_x, c_y), angle, 1.0)
123
+
124
+ cos = np.abs(matrix[0, 0])
125
+ sin = np.abs(matrix[0, 1])
126
+
127
+ n_w = int((h * sin) + (w * cos))
128
+ n_h = int((h * cos) + (w * sin))
129
+
130
+ matrix[0, 2] += (n_w / 2) - c_x
131
+ matrix[1, 2] += (n_h / 2) - c_y
132
+
133
+ return cv2.warpAffine(image, matrix, (n_w, n_h), borderValue=(255, 255, 255))
134
+
135
+
136
+ def pytesseractRotate(image,original_image, grid=3):
137
+ """ this function takes one image and apply pytesseract osd method and gives orientation and script details and returns 0 degree oriented parcel image.
138
+
139
+ Args:
140
+ image (ndarray): takes image and perform osd
141
+ original_image (ndarray): _description_
142
+ grid (int, optional): _description_. Defaults to 3.
143
+
144
+ Returns:
145
+ rotated_image (ndarray):
146
+ """
147
+ h, w = image.shape[:2]
148
+
149
+ images_list = []
150
+ angles_list = {}
151
+ for i in range(1, grid+1):
152
+ for j in range(1, grid+1):
153
+ tx, ty = (w//grid)*(j-1), (h//grid)*(i-1)
154
+ bx, by = (w//grid)*j, (h//grid)*i
155
+
156
+
157
+ img = image[ty:by, tx: bx]
158
+ images_list.append(img)
159
+
160
+ for i in range(len(images_list)):
161
+ try:
162
+ result = pytesseract.image_to_osd(images_list[i], config="osd --psm 0 -c min_characters_to_try=200", output_type='dict')
163
+ pytesseract_angle = result['rotate']
164
+ orientation_conf = result['orientation_conf']
165
+ script = result['script']
166
+ script_conf = result['script_conf']
167
+
168
+ script_list = ['Latin','Cyrillic']
169
+
170
+ if script in script_list and script_conf > 0:
171
+ if pytesseract_angle in angles_list:
172
+ angles_list[pytesseract_angle].append(orientation_conf)
173
+ else:
174
+ angles_list[pytesseract_angle] = [orientation_conf]
175
+
176
+ except Exception as error:
177
+ print(error)
178
+
179
+ confidence_list = []
180
+ for key in angles_list.keys():
181
+ mean = sum(angles_list[key])/len(angles_list[key])
182
+ confidence_list.append((len(angles_list[key]), mean, key))
183
+
184
+ confidence_list = sorted(confidence_list)
185
+
186
+ final_angle = 360
187
+ if len(confidence_list) > 0:
188
+ final_angle -= confidence_list[-1][-1]
189
+ else:
190
+ final_angle -= 0
191
+
192
+ rotated_image = rotate(original_image, final_angle)
193
+
194
+ return rotated_image
ObjectDetection/best_nano_det.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bac3e136b414633b08317ed286f32ba889ca260823e1ddb820a2580df6eb84b1
3
+ size 6245593
ObjectDetection/best_nano_seg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f62babdbb36264497df9deeab00157570b160f32241e1cdffe9e8228b5357f8
3
+ size 6792867
ObjectDetection/functions.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import cv2
3
+ import numpy as np
4
+
5
+ def cropBlackBackground(img):
6
+ """Removes black background of labbel image and returns portion which contains label only.
7
+ It will need gray image.
8
+
9
+ Args:
10
+ img (ndarray): Numpy array representation of image
11
+
12
+ Returns:
13
+ img (ndarray): Numpy array representation of cropped image
14
+ """
15
+ try:
16
+ _, binary = cv2.threshold(img, 1, 255, cv2.THRESH_BINARY)
17
+ contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
18
+
19
+ max_area_indx = 0
20
+ max_area = 0
21
+ for indx, contour in enumerate(contours):
22
+ area = cv2.contourArea(contour)
23
+ if area > max_area:
24
+ max_area_indx = indx
25
+
26
+ x, y, w, h = cv2.boundingRect(contours[max_area_indx])
27
+ img = img[y:y+h, x:x+w]
28
+ return img
29
+ except Exception as e:
30
+ print(e)
31
+ return None
32
+
33
+ def enhanceImage(img, block_size: int = 19, constant: int = 5, adaptive_thresold_type = "GAUSSIAN", need_to_sharp: bool = True):
34
+ """Enhance image by appling adaptive thresolding and filter2D
35
+
36
+ Args:
37
+ img (ndarray): Numpy array representation of image
38
+ block_size (int, optional): Block size for adaptive thresolding. Defaults to 25.
39
+ constant (int, optional): Constant for adaptive thresolding. Defaults to 10.
40
+ adaptive_thresold_type (str, optional): "GAUSSIAN" or "MEAN. Defaults to "GAUSSIAN".
41
+ need_to_sharp (bool, optional): Defaults to True.
42
+
43
+ Returns:
44
+ img (ndarray): Numpy array representation of enhanced image
45
+ """
46
+ try:
47
+ if block_size < 2:
48
+ block_size = 2
49
+ block_size = block_size + 1 if block_size%2 == 0 else block_size
50
+
51
+ final_img = img
52
+ if adaptive_thresold_type == "MEAN":
53
+ final_img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,block_size,constant)
54
+ else:
55
+ final_img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,block_size,constant)
56
+
57
+ if need_to_sharp:
58
+ kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
59
+ final_img = cv2.filter2D(final_img, -1, kernel)
60
+
61
+ return final_img
62
+ except Exception as e:
63
+ print(e)
64
+ return None
65
+
66
+ def generateMask(res, original_img):
67
+ """_summary_
68
+
69
+ Args:
70
+ res: Resulf of yolo image segmentation for single mask
71
+ original_img (ndarray): Numpy array representation of original image
72
+
73
+ Returns:
74
+ tupple (ndarray, ndarray): (crop_img, mask)
75
+ """
76
+
77
+ try:
78
+ height,width = original_img.shape
79
+ masks = res.masks.data
80
+ boxes = res.boxes.data
81
+
82
+ #get index of box which has maximum confidence
83
+ max_conf_index = res.boxes.conf.argmax()
84
+
85
+ # extract classes
86
+ clss = boxes[:, 5]
87
+ # get indices of ress where class is 0
88
+ label_indices = torch.where(clss == 0)
89
+ # use these indices to extract the relevant masks
90
+ label_masks = masks[label_indices]
91
+
92
+
93
+ # get maximum confidence label's mask
94
+ max_conf_label_mask = torch.empty(size=(1,label_masks[max_conf_index].shape[0],label_masks[max_conf_index].shape[1]))
95
+ max_conf_label_mask[0]= label_masks[max_conf_index]
96
+
97
+ # scale for visualizing ress
98
+ label_mask = torch.any(max_conf_label_mask, dim=0).int() * 255
99
+
100
+ #final mask
101
+ final_mask = label_mask.cpu().numpy()
102
+ height_mask,width_mask =final_mask.shape
103
+ fy = height/height_mask
104
+ fx = width/width_mask
105
+ final_mask = cv2.resize(final_mask,(0,0),fx =fx ,fy = fy,interpolation=cv2.INTER_NEAREST)
106
+
107
+ original_img = original_img.astype(np.uint8)
108
+ final_mask = final_mask.astype(np.uint8)
109
+
110
+ # Expand boundries
111
+ kernel = np.ones((40,40), np.uint8)
112
+ expanded_mask = cv2.dilate(final_mask, kernel)
113
+
114
+ #crop_img
115
+ crop_img = cv2.bitwise_and(original_img,original_img,mask=expanded_mask)
116
+
117
+
118
+ return crop_img, expanded_mask
119
+
120
+ except Exception as e:
121
+ print(e)
122
+ return None, None
123
+
124
+
ObjectDetection/object_detection.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ from ultralytics import YOLO
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from pathlib import Path
6
+ env_path = Path('.') / '.env'
7
+ load_dotenv(dotenv_path=env_path)
8
+ from functions import cropBlackBackground, enhanceImage, generateMask
9
+
10
+ path = {
11
+ 'SEG_MODEL_PATH': str(os.getenv('SEG_MODEL_PATH')),
12
+ 'DET_MODEL_PATH': str(os.getenv('DET_MODEL_PATH')),
13
+ 'IMG_DIR_PATH': str(os.getenv('IMG_DIR_PATH')),
14
+ 'INFERENCE_FOLDER': str(os.getenv('INFERENCE_FOLDER')),
15
+ }
16
+
17
+ #import models
18
+ seg_model = YOLO(path['SEG_MODEL_PATH'])
19
+ det_model = YOLO(path['DET_MODEL_PATH'])
20
+
21
+ CONF = 0.7
22
+
23
+
24
+ # do inference for detection and store croped images in folder
25
+ for img in os.listdir(path['IMG_DIR_PATH']):
26
+ img_file = cv2.imread(os.path.join(path['IMG_DIR_PATH'],img),0)
27
+ cv2.imwrite(os.path.join(path['IMG_DIR_PATH'],img),img_file)
28
+
29
+ det_model(os.path.join(path['IMG_DIR_PATH'],img),conf = CONF, save=True , save_crop=True , name=path['INFERENCE_FOLDER'],exist_ok = True)
30
+
31
+ #do inference for image segmentation and store image in folder
32
+ for img in os.listdir(path['IMG_DIR_PATH']):
33
+ img_file = cv2.imread(os.path.join(path['IMG_DIR_PATH'],img),0)
34
+ cv2.imwrite(os.path.join(path['IMG_DIR_PATH'],img),img_file)
35
+
36
+ result = seg_model(os.path.join(path['IMG_DIR_PATH'],img),save = True,name = path['INFERENCE_FOLDER'],exist_ok = True)
37
+
38
+ original_img = cv2.imread(os.path.join(path['IMG_DIR_PATH'],img))
39
+
40
+ for res in result:
41
+ crop_img, mask = generateMask(res, original_img)
42
+
43
+ image = None
44
+ if crop_img is not None:
45
+ # Convert to gray scale image
46
+ crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
47
+ crop_img = cropBlackBackground(crop_img)
48
+ image = enhanceImage(crop_img)
49
+
50
+ # save to file
51
+ if image is not None:
52
+ try:
53
+ Path('runs').mkdir(parents=True, exist_ok=True)
54
+ Path(os.path.join('runs', 'segment')).mkdir(parents=True, exist_ok=True)
55
+ Path(os.path.join('runs', 'segment', 'inference')).mkdir(parents=True, exist_ok=True)
56
+ Path(os.path.join('runs', 'segment', 'inference', 'crops_seg')).mkdir(parents=True, exist_ok=True)
57
+ Path(os.path.join('runs', 'segment', 'inference', 'enhanced')).mkdir(parents=True, exist_ok=True)
58
+ Path(os.path.join('runs', 'segment', 'inference', 'masks')).mkdir(parents=True, exist_ok=True)
59
+ except OSError as error:
60
+ print(error)
61
+ pass
62
+
63
+ cv2.imwrite(os.path.join('runs', 'segment', 'inference', 'masks', img), mask)
64
+ cv2.imwrite(os.path.join('runs', 'segment', 'inference', 'crops_seg', img), crop_img )
65
+ cv2.imwrite(os.path.join('runs', 'segment', 'inference', 'enhanced', img), image )
app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import cv2
3
+ from pipeline import main
4
+ from pathlib import Path
5
+ import pandas as pd
6
+ import os
7
+ from dotenv import load_dotenv
8
+ from pathlib import Path
9
+ from pipeline_functions import object_detection, crop_image, enhance_image, morphological_transform, hoffman_transform, pytesseract_rotate, ocr,ner
10
+
11
+ env_path = Path('.') / '.env'
12
+ load_dotenv(dotenv_path=env_path)
13
+
14
+ path = {
15
+ 'SEG_MODEL_PATH': str(os.getenv('SEG_MODEL_PATH')),
16
+ 'MAIN_FLOW_GRAY_IMG_DIR_PATH': str(os.getenv('MAIN_FLOW_GRAY_IMG_DIR_PATH')),
17
+ 'MAIN_FLOW_INFERENCE_FOLDER': str(os.getenv('MAIN_FLOW_INFERENCE_FOLDER')),
18
+ }
19
+
20
+ with st.sidebar:
21
+ st.title("Shipping Label Extraction")
22
+ data = st.file_uploader(label='Upload Image of Parcel',type=['png','jpg','jpeg'])
23
+
24
+
25
+ if data:
26
+ Path('grey_images').mkdir(parents=True, exist_ok=True)
27
+
28
+ with open(os.path.join('grey_images',data.name),'wb') as f:
29
+ f.write(data.getvalue())
30
+
31
+ img = cv2.imread(os.path.join('grey_images',data.name),0)
32
+
33
+ if img.shape[0] > 1500:
34
+ height, width = img.shape
35
+ img = img[height//4:-height//4, width//4:-width//4]
36
+
37
+ cv2.imwrite(os.path.join('grey_images',data.name), img)
38
+
39
+ #call main function
40
+ # main(os.path.join('grey_images',data.name))
41
+ file_path = os.path.join('grey_images',data.name)
42
+ img_name = os.path.basename(file_path)
43
+
44
+
45
+ col1,col2 = st.columns(2)
46
+
47
+ with col1:
48
+ st.markdown("<h3 style='text-align: center;'>Grey Image</h1>", unsafe_allow_html=True)
49
+ st.image(os.path.join('grey_images',data.name))
50
+
51
+ # Object detection and enhance image
52
+ seg_result, img_file = object_detection(file_path)
53
+ croped_img = crop_image(seg_result, img_file, img_name)
54
+ image = enhance_image(croped_img, img_name)
55
+
56
+ st.markdown("<h3 style='text-align: center;'>Enhanced Image</h1>", unsafe_allow_html=True)
57
+ st.image(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'enhanced', data.name))
58
+
59
+
60
+ with col2:
61
+ st.markdown("<h3 style='text-align: center;'>Detected Image</h1>", unsafe_allow_html=True)
62
+ st.image(os.path.join('runs', 'segment',path['MAIN_FLOW_INFERENCE_FOLDER'],data.name))
63
+
64
+ # Rotation
65
+ processed_img = morphological_transform(image)
66
+ rotated_image, image = hoffman_transform(processed_img, image)
67
+ img_name = pytesseract_rotate(rotated_image, image, img_name)
68
+
69
+ st.markdown("<h3 style='text-align: center;'>Rotated Image</h1>", unsafe_allow_html=True)
70
+ st.image(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'rotated_image', data.name))
71
+
72
+ # Apply OCR and NER
73
+ file_name = ocr(img_name)
74
+ Output_dict = ner(file_name)
75
+ # df = pd.DataFrame(Output_dict)
76
+
77
+ ocr_data = ""
78
+ with open(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'ocr_label_data', data.name.split('.')[0]+'.txt'),'r+') as f :
79
+ ocr_data = f.read()
80
+ st.header("OCR Text Output")
81
+ st.text(ocr_data)
82
+
83
+ st.header("NER Output")
84
+
85
+ new_df = pd.DataFrame()
86
+ new_df['Entity'] = list(Output_dict.keys())
87
+
88
+ # print(df)
89
+ new_df['Value'] = list(Output_dict.values())
90
+ new_df['Value'] = new_df['Value'].astype('str')
91
+ st.table(new_df)
92
+
93
+ else:
94
+ img_name = '3.jpg'
95
+ img = cv2.imread(img_name,0)
96
+
97
+ if img.shape[0] > 1500:
98
+ height, width = img.shape
99
+ img = img[height//4:-height//4, width//4:-width//4]
100
+
101
+ cv2.imwrite(os.path.join('grey_images',img_name), img)
102
+
103
+ #call main function
104
+ # main(os.path.join('grey_images',img_name))
105
+ file_path = os.path.join('grey_images',img_name)
106
+ img_name = os.path.basename(file_path)
107
+
108
+
109
+ col1,col2 = st.columns(2)
110
+
111
+ with col1:
112
+ st.markdown("<h3 style='text-align: center;'>Grey Image</h1>", unsafe_allow_html=True)
113
+ st.image(os.path.join('grey_images',img_name))
114
+
115
+ # Object detection and enhance image
116
+ seg_result, img_file = object_detection(file_path)
117
+ croped_img = crop_image(seg_result, img_file, img_name)
118
+ image = enhance_image(croped_img, img_name)
119
+
120
+ st.markdown("<h3 style='text-align: center;'>Enhanced Image</h1>", unsafe_allow_html=True)
121
+ st.image(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'enhanced', img_name))
122
+
123
+
124
+ with col2:
125
+ st.markdown("<h3 style='text-align: center;'>Detected Image</h1>", unsafe_allow_html=True)
126
+ st.image(os.path.join('runs', 'segment',path['MAIN_FLOW_INFERENCE_FOLDER'],img_name))
127
+
128
+ # Rotation
129
+ processed_img = morphological_transform(image)
130
+ rotated_image, image = hoffman_transform(processed_img, image)
131
+ img_name = pytesseract_rotate(rotated_image, image, img_name)
132
+
133
+ st.markdown("<h3 style='text-align: center;'>Rotated Image</h1>", unsafe_allow_html=True)
134
+ st.image(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'rotated_image', img_name))
135
+
136
+ # Apply OCR and NER
137
+ file_name = ocr(img_name)
138
+ Output_dict = ner(file_name)
139
+ # df = pd.DataFrame(Output_dict)
140
+
141
+ ocr_data = ""
142
+ with open(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'ocr_label_data', img_name.split('.')[0]+'.txt'),'r+') as f :
143
+ ocr_data = f.read()
144
+ st.header("OCR Text Output")
145
+ st.text(ocr_data)
146
+
147
+ st.header("NER Output")
148
+
149
+ new_df = pd.DataFrame()
150
+ new_df['Entity'] = list(Output_dict.keys())
151
+
152
+ # print(df)
153
+ new_df['Value'] = list(Output_dict.values())
154
+ new_df['Value'] = new_df['Value'].astype('str')
155
+ st.table(new_df)
grey_images/1.jpg ADDED

Git LFS Details

  • SHA256: 9452b2a4c4c6184875d3f1b0cbe749eb0d0c57fc84aa1afd052635b51475bbe1
  • Pointer size: 132 Bytes
  • Size of remote file: 4.02 MB
grey_images/10.jpg ADDED

Git LFS Details

  • SHA256: e330ba4249996deb2d2e3477d710ac8938bcf6e244a3a44618e8aa4dbf96edb7
  • Pointer size: 132 Bytes
  • Size of remote file: 3.46 MB
grey_images/12.jpg ADDED

Git LFS Details

  • SHA256: 010d1b9b9684d64a0bd7f7db31312d561942b424d03a2b9d7de565d342000dd1
  • Pointer size: 132 Bytes
  • Size of remote file: 3.1 MB
grey_images/3.jpg ADDED
grey_images/7.jpg ADDED

Git LFS Details

  • SHA256: 6a93ee48b37c456abacfe000103698eb7cacc908d0606c4cd1b0efe8de8b682b
  • Pointer size: 132 Bytes
  • Size of remote file: 1.12 MB
grey_images/8.jpg ADDED

Git LFS Details

  • SHA256: 79f9dd87ffec9add99cd8ac72f7c4f272cf46836dcb846dfc70e4cab08d74b15
  • Pointer size: 132 Bytes
  • Size of remote file: 3.71 MB
grey_images/9.jpg ADDED

Git LFS Details

  • SHA256: 28dd23fb3919d7a16a10e55ecd77d79a09d5084979953379c6bdc19ef5347f34
  • Pointer size: 132 Bytes
  • Size of remote file: 3.34 MB
grey_images/parcel_img0.png ADDED
grey_images/parcel_img1.png ADDED
grey_images/parcel_img19.png ADDED
grey_images/parcel_img3.png ADDED

Git LFS Details

  • SHA256: a9e3ed65479c94f8103c93df65e9a43e888811fc110a2c17e30f2df408b2d49c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.15 MB
grey_images/parcel_img5.png ADDED
notebooks/Shipping_label_NER.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/readme.md ADDED
@@ -0,0 +1 @@
 
 
1
+
notebooks/yolo_shipping_label_detection.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
pipeline.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pipeline_functions import object_detection, crop_image, enhance_image, morphological_transform, hoffman_transform, pytesseract_rotate, ocr,ner
2
+ import os
3
+
4
+ def main(path):
5
+ img_name = os.path.basename(path)
6
+ seg_result, img_file = object_detection(path)
7
+ croped_img = crop_image(seg_result, img_file, img_name)
8
+ image = enhance_image(croped_img, img_name)
9
+ processed_img = morphological_transform(image)
10
+ rotated_image, image = hoffman_transform(processed_img, image)
11
+ img_name = pytesseract_rotate(rotated_image, image, img_name)
12
+ file_name = ocr(img_name)
13
+ Output_dict = ner(file_name)
14
+ return Output_dict
pipeline_functions.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ from ultralytics import YOLO
3
+ import numpy as np
4
+ from paddleocr import PaddleOCR
5
+
6
+
7
+ from ObjectDetection.functions import generateMask, cropBlackBackground, enhanceImage
8
+ from OCR.rotation_functions import hoffman_transformation, rotate, pytesseractRotate
9
+ from NER.ner_inference import inference
10
+
11
+ import os
12
+ from dotenv import load_dotenv
13
+ from pathlib import Path
14
+ env_path = Path('.') / '.env'
15
+ load_dotenv(dotenv_path=env_path)
16
+
17
+ path = {
18
+ 'SEG_MODEL_PATH': str(os.getenv('SEG_MODEL_PATH')),
19
+ 'MAIN_FLOW_GRAY_IMG_DIR_PATH': str(os.getenv('MAIN_FLOW_GRAY_IMG_DIR_PATH')),
20
+ 'MAIN_FLOW_INFERENCE_FOLDER': str(os.getenv('MAIN_FLOW_INFERENCE_FOLDER')),
21
+ }
22
+ seg_model = YOLO(path['SEG_MODEL_PATH'])
23
+
24
+ CONF = 0.7
25
+
26
+
27
+
28
+
29
+ def object_detection(file):
30
+ print("**************************** PERFORMING_OBJECT_DETECTION **************************** ")
31
+ img_file = cv2.imread(file,0)
32
+ img_name = os.path.basename(file)
33
+
34
+ Path(os.path.join(path['MAIN_FLOW_GRAY_IMG_DIR_PATH'])).mkdir(parents=True, exist_ok=True)
35
+ cv2.imwrite(os.path.join(path['MAIN_FLOW_GRAY_IMG_DIR_PATH'],img_name),img_file)
36
+ result = seg_model(os.path.join(path['MAIN_FLOW_GRAY_IMG_DIR_PATH'],img_name),conf = CONF,save = True,name = path['MAIN_FLOW_INFERENCE_FOLDER'],exist_ok = True)
37
+
38
+ return result, img_file
39
+
40
+
41
+ def crop_image(seg_result, img_file, img_name):
42
+ print("**************************** CROPPING_IMAGE **************************** ")
43
+ for res in seg_result:
44
+
45
+ croped_img, mask = generateMask(res, img_file)
46
+
47
+ if croped_img is not None:
48
+ croped_img = cropBlackBackground(croped_img)
49
+
50
+ # save to file
51
+ try:
52
+ Path('runs').mkdir(parents=True, exist_ok=True)
53
+ Path(os.path.join('runs', 'segment')).mkdir(parents=True, exist_ok=True)
54
+ Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'])).mkdir(parents=True, exist_ok=True)
55
+ Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'crops_seg')).mkdir(parents=True, exist_ok=True)
56
+ Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'masks')).mkdir(parents=True, exist_ok=True)
57
+ except OSError as error:
58
+ print(error)
59
+ pass
60
+
61
+ cv2.imwrite(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'masks', img_name), mask)
62
+ cv2.imwrite(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'crops_seg', img_name), croped_img )
63
+ return croped_img
64
+ return img_file
65
+
66
+ def enhance_image(croped_img, img_name):
67
+ print("**************************** ENHANCE_IMAGE **************************** ")
68
+ image = None
69
+ if croped_img is not None:
70
+ image = enhanceImage(croped_img)
71
+
72
+ if image is not None:
73
+ try:
74
+ Path('runs').mkdir(parents=True, exist_ok=True)
75
+ Path(os.path.join('runs', 'segment')).mkdir(parents=True, exist_ok=True)
76
+ Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'])).mkdir(parents=True, exist_ok=True)
77
+ Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'enhanced')).mkdir(parents=True, exist_ok=True)
78
+ except OSError as error:
79
+ print(error)
80
+ pass
81
+
82
+ cv2.imwrite(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'enhanced', img_name), image )
83
+
84
+ return image
85
+
86
+ def morphological_transform(image):
87
+
88
+ print("**************************** APPLY_MORPHOLOGICAL_TRANSFORM **************************** ")
89
+ processed_img = cv2.resize(image,None,fx=2.7, fy=3)
90
+ kernel = np.ones((2,2),np.uint8)
91
+ processed_img = cv2.dilate(processed_img,kernel)
92
+ sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
93
+ processed_img = cv2.filter2D(processed_img, -1, sharpen_kernel)
94
+
95
+ return processed_img
96
+
97
+ def hoffman_transform(processed_img, original_img):
98
+ print("**************************** APPLY_HOFFMAN_TRANSFORM **************************** ")
99
+ rotated_image,angle = hoffman_transformation(processed_img, True)
100
+ original_img = rotate(original_img,angle)
101
+
102
+ return rotated_image, original_img
103
+
104
+ def pytesseract_rotate(rotated_image, original_img, img_name):
105
+ print("**************************** APPLY_PYTESSERACT_ROTATION **************************** ")
106
+ rotated_image = pytesseractRotate(rotated_image,original_img,1)
107
+
108
+ if rotated_image is not None:
109
+ try:
110
+ Path('runs').mkdir(parents=True, exist_ok=True)
111
+ Path(os.path.join('runs', 'segment')).mkdir(parents=True, exist_ok=True)
112
+ Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'])).mkdir(parents=True, exist_ok=True)
113
+ Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'rotated_image')).mkdir(parents=True, exist_ok=True)
114
+ except OSError as error:
115
+ print(error)
116
+ pass
117
+
118
+ cv2.imwrite(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'rotated_image', img_name), rotated_image)
119
+
120
+ return img_name
121
+
122
+ def ocr(img_name):
123
+ print("**************************** APPLY_OCR **************************** ")
124
+ ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
125
+ result = ocr.ocr(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'rotated_image', img_name), cls=True)
126
+
127
+ ocr_output_paddle = []
128
+ if result is not None:
129
+ try:
130
+ for i in result:
131
+ ocr_output_paddle.append(" ".join([line[1][0] for line in i]))
132
+ except:
133
+ pass
134
+ try:
135
+ Path('runs').mkdir(parents=True, exist_ok=True)
136
+ Path(os.path.join('runs', 'segment')).mkdir(parents=True, exist_ok=True)
137
+ Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'])).mkdir(parents=True, exist_ok=True)
138
+ Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'ocr_label_data')).mkdir(parents=True, exist_ok=True)
139
+ except OSError as error:
140
+ print(error)
141
+
142
+ file_name = img_name.split('.')[0] +'.txt'
143
+ with open(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'ocr_label_data',file_name) ,"w+") as f:
144
+ f.write("\n".join(ocr_output_paddle))
145
+
146
+ return file_name
147
+ def ner(file_name):
148
+ print("**************************** APPLY_NER **************************** ")
149
+ # print(file_name)
150
+ ocr_file = os.path.join('runs', 'segment',path['MAIN_FLOW_INFERENCE_FOLDER'],'ocr_label_data',file_name)
151
+ with open(ocr_file,'r+') as f:
152
+ sent = f.read()
153
+ print(sent)
154
+ output_dict = inference(sent)
155
+
156
+ return output_dict
157
+
requirements.txt ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.28.0
2
+ altair==5.2.0
3
+ anyio==4.3.0
4
+ astor==0.8.1
5
+ asttokens==2.4.1
6
+ attrdict==2.0.1
7
+ attrs==23.2.0
8
+ Babel==2.14.0
9
+ bce-python-sdk==0.9.5
10
+ beautifulsoup4==4.12.3
11
+ blinker==1.7.0
12
+ boto3==1.34.73
13
+ botocore==1.34.73
14
+ bpemb==0.3.5
15
+ cachetools==5.3.3
16
+ certifi==2024.2.2
17
+ charset-normalizer==3.3.2
18
+ click==8.1.7
19
+ colorama==0.4.6
20
+ comm==0.2.2
21
+ conllu==4.5.3
22
+ contourpy==1.2.0
23
+ cssselect==1.2.0
24
+ cssutils==2.9.0
25
+ cycler==0.12.1
26
+ Cython==3.0.9
27
+ debugpy==1.8.1
28
+ decorator==5.1.1
29
+ Deprecated==1.2.14
30
+ et-xmlfile==1.1.0
31
+ exceptiongroup==1.2.0
32
+ executing==2.0.1
33
+ filelock==3.13.3
34
+ fire==0.6.0
35
+ flair==0.13.1
36
+ Flask==3.0.2
37
+ flask-babel==4.0.0
38
+ fonttools==4.50.0
39
+ fsspec==2024.3.1
40
+ ftfy==6.2.0
41
+ future==1.0.0
42
+ gdown==5.1.0
43
+ gensim==4.3.2
44
+ gitdb==4.0.11
45
+ GitPython==3.1.42
46
+ h11==0.14.0
47
+ httpcore==1.0.4
48
+ httpx==0.27.0
49
+ huggingface-hub==0.22.2
50
+ idna==3.6
51
+ imageio==2.34.0
52
+ imgaug==0.4.0
53
+ importlib_metadata==7.1.0
54
+ importlib_resources==6.4.0
55
+ ipykernel==6.29.4
56
+ ipython==8.18.1
57
+ itsdangerous==2.1.2
58
+ Janome==0.5.0
59
+ jedi==0.19.1
60
+ Jinja2==3.1.3
61
+ jmespath==1.0.1
62
+ joblib==1.3.2
63
+ jsonschema==4.21.1
64
+ jsonschema-specifications==2023.12.1
65
+ jupyter_client==8.6.1
66
+ jupyter_core==5.7.2
67
+ kiwisolver==1.4.5
68
+ langdetect==1.0.9
69
+ lazy_loader==0.3
70
+ lmdb==1.4.1
71
+ lxml==5.1.0
72
+ markdown-it-py==3.0.0
73
+ MarkupSafe==2.1.5
74
+ matplotlib==3.8.3
75
+ matplotlib-inline==0.1.6
76
+ mdurl==0.1.2
77
+ more-itertools==10.2.0
78
+ mpld3==0.5.10
79
+ mpmath==1.3.0
80
+ nest-asyncio==1.6.0
81
+ networkx==3.2.1
82
+ nltk==3.8.1
83
+ numpy==1.26.4
84
+ nvidia-cublas-cu12==12.1.3.1
85
+ nvidia-cuda-cupti-cu12==12.1.105
86
+ nvidia-cuda-nvrtc-cu12==12.1.105
87
+ nvidia-cuda-runtime-cu12==12.1.105
88
+ nvidia-cudnn-cu12==8.9.2.26
89
+ nvidia-cufft-cu12==11.0.2.54
90
+ nvidia-curand-cu12==10.3.2.106
91
+ nvidia-cusolver-cu12==11.4.5.107
92
+ nvidia-cusparse-cu12==12.1.0.106
93
+ nvidia-nccl-cu12==2.19.3
94
+ nvidia-nvjitlink-cu12==12.4.99
95
+ nvidia-nvtx-cu12==12.1.105
96
+ opencv-contrib-python==4.6.0.66
97
+ opencv-python==4.6.0.66
98
+ opencv-python-headless==4.9.0.80
99
+ openpyxl==3.1.2
100
+ opt-einsum==3.3.0
101
+ packaging==23.2
102
+ paddleocr==2.7.2
103
+ paddlepaddle==2.6.1
104
+ pandas==2.2.1
105
+ parso==0.8.3
106
+ pdf2docx==0.5.8
107
+ pexpect==4.9.0
108
+ pillow==10.2.0
109
+ platformdirs==4.2.0
110
+ pptree==3.1
111
+ premailer==3.10.0
112
+ prompt-toolkit==3.0.43
113
+ protobuf==3.20.2
114
+ psutil==5.9.8
115
+ ptyprocess==0.7.0
116
+ pure-eval==0.2.2
117
+ py-cpuinfo==9.0.0
118
+ pyarrow==15.0.2
119
+ pyclipper==1.3.0.post5
120
+ pycryptodome==3.20.0
121
+ pydeck==0.8.1b0
122
+ Pygments==2.17.2
123
+ PyMuPDF==1.24.0
124
+ PyMuPDFb==1.24.0
125
+ pyparsing==3.1.2
126
+ PySocks==1.7.1
127
+ pytesseract==0.3.10
128
+ python-dateutil==2.9.0.post0
129
+ python-docx==1.1.0
130
+ python-dotenv==1.0.1
131
+ pytorch_revgrad==0.2.0
132
+ pytz==2024.1
133
+ PyYAML==6.0.1
134
+ pyzmq==25.1.2
135
+ rapidfuzz==3.7.0
136
+ rarfile==4.1
137
+ referencing==0.34.0
138
+ regex==2023.12.25
139
+ requests==2.31.0
140
+ rich==13.7.1
141
+ rpds-py==0.18.0
142
+ s3transfer==0.10.1
143
+ safetensors==0.4.2
144
+ scikit-image==0.22.0
145
+ scikit-learn==1.4.1.post1
146
+ scipy==1.12.0
147
+ seaborn==0.13.2
148
+ segtok==1.5.11
149
+ semver==3.0.2
150
+ sentencepiece==0.1.99
151
+ shapely==2.0.3
152
+ six==1.16.0
153
+ smart-open==7.0.4
154
+ smmap==5.0.1
155
+ sniffio==1.3.1
156
+ soupsieve==2.5
157
+ sqlitedict==2.1.0
158
+ stack-data==0.6.3
159
+ streamlit==1.32.2
160
+ sympy==1.12
161
+ tabulate==0.9.0
162
+ tenacity==8.2.3
163
+ termcolor==2.4.0
164
+ thop==0.1.1.post2209072238
165
+ threadpoolctl==3.4.0
166
+ tifffile==2024.2.12
167
+ tokenizers==0.15.2
168
+ toml==0.10.2
169
+ toolz==0.12.1
170
+ torch==2.2.1
171
+ torchvision==0.17.1
172
+ tornado==6.4
173
+ tqdm==4.66.2
174
+ traitlets==5.14.2
175
+ transformer-smaller-training-vocab==0.3.3
176
+ transformers==4.39.2
177
+ triton==2.2.0
178
+ typing_extensions==4.10.0
179
+ tzdata==2024.1
180
+ ultralytics==8.1.34
181
+ urllib3==1.26.18
182
+ visualdl==2.5.3
183
+ watchdog==4.0.0
184
+ wcwidth==0.2.13
185
+ Werkzeug==3.0.1
186
+ Wikipedia-API==0.6.0
187
+ wrapt==1.16.0
188
+ zipp==3.18.1
tesseract ADDED
Binary file (35.1 kB). View file