SalML commited on
Commit
37dcf5d
·
1 Parent(s): b25395c

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -156
app.py DELETED
@@ -1,156 +0,0 @@
1
- import streamlit as st
2
- from PIL import Image
3
- import os
4
- import TDTSR
5
- import pytesseract
6
- from pytesseract import Output
7
- import pandas as pd
8
- import matplotlib.pyplot as plt
9
- import cv2
10
- import numpy as np
11
- from cv2 import dnn_superres
12
-
13
- pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
14
-
15
- st.set_option('deprecation.showPyplotGlobalUse', False)
16
- st.set_page_config(layout='wide')
17
- st.title("Table Detection and Table Structure Recognition")
18
-
19
- c1, c2, c3 = st.columns((1,1,1))
20
-
21
-
22
- def PIL_to_cv(pil_img):
23
- return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
24
-
25
- def cv_to_PIL(cv_img):
26
- return Image.fromarray(cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB))
27
-
28
- def pytess(cell_pil_img):
29
- return ' '.join(pytesseract.image_to_data(cell_pil_img, output_type=Output.DICT, config='preserve_interword_spaces')['text']).strip()
30
-
31
- def super_res(pil_img):
32
- # requires opencv-contrib-python installed without the opencv-python
33
- sr = dnn_superres.DnnSuperResImpl_create()
34
- image = PIL_to_cv(pil_img)
35
- model_path = "./LapSRN_x8.pb"
36
- model_name = model_path.split('/')[1].split('_')[0].lower()
37
- model_scale = int(model_path.split('/')[1].split('_')[1].split('.')[0][1])
38
-
39
- sr.readModel(model_path)
40
- sr.setModel(model_name, model_scale)
41
- final_img = sr.upsample(image)
42
- final_img = cv_to_PIL(final_img)
43
-
44
- return final_img
45
-
46
-
47
- def sharpen_image(pil_img):
48
-
49
- img = PIL_to_cv(pil_img)
50
- sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
51
- # sharpen_kernel = np.array([[0, -1, 0],
52
- # [-1, 5,-1],
53
- # [0, -1, 0]])
54
- sharpen = cv2.filter2D(img, -1, sharpen_kernel)
55
- pil_img = cv_to_PIL(sharpen)
56
- return pil_img
57
-
58
-
59
- def preprocess_magic(pil_img):
60
-
61
- cv_img = PIL_to_cv(pil_img)
62
- grayscale_image = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
63
- _, binary_image = cv2.threshold(grayscale_image, 0, 255, cv2.THRESH_OTSU)
64
-
65
- count_white = np.sum(binary_image > 0)
66
- count_black = np.sum(binary_image == 0)
67
-
68
- if count_black > count_white:
69
- binary_image = 255 - binary_image
70
-
71
- black_text_white_background_image = binary_image
72
-
73
- return cv_to_PIL(black_text_white_background_image)
74
-
75
-
76
- ### main code:
77
- for td_sample in os.listdir('D:/Jupyter/Multi-Type-TD-TSR/TD_samples/'):
78
-
79
- image = Image.open("D:/Jupyter/Multi-Type-TD-TSR/TD_samples/"+td_sample).convert("RGB")
80
- model, image, probas, bboxes_scaled = TDTSR.table_detector(image, THRESHOLD_PROBA=0.6)
81
- TDTSR.plot_results_detection(c1, model, image, probas, bboxes_scaled)
82
- cropped_img_list = TDTSR.plot_table_detection(c2, model, image, probas, bboxes_scaled)
83
-
84
- for unpadded_table in cropped_img_list:
85
- # table : pil_img
86
- table = TDTSR.add_margin(unpadded_table)
87
- model, image, probas, bboxes_scaled = TDTSR.table_struct_recog(table, THRESHOLD_PROBA=0.6)
88
-
89
- # The try, except block of code below plots table header row and simple rows
90
- try:
91
- rows, cols = TDTSR.plot_structure(c3, model, image, probas, bboxes_scaled, class_to_show=0)
92
- rows, cols = TDTSR.sort_table_featuresv2(rows, cols)
93
- # headers, rows, cols are ordered dictionaries with 5th element value of tuple being pil_img
94
- rows, cols = TDTSR.individual_table_featuresv2(table, rows, cols)
95
- # TDTSR.plot_table_features(c1, header, row_header, rows, cols)
96
- except Exception as printableException:
97
- st.write(td_sample, ' terminated with exception:', printableException)
98
-
99
- # master_row = TDTSR.master_row_set(header, row_header, rows, cols)
100
- master_row = rows
101
-
102
- # cells_img = TDTSR.object_to_cells(master_row, cols)
103
- cells_img = TDTSR.object_to_cellsv2(master_row, cols)
104
-
105
- headers = []
106
- cells_list = []
107
- # st.write(cells_img)
108
- for n, kv in enumerate(cells_img.items()):
109
- k, row_images = kv
110
- if n == 0:
111
- for idx, header in enumerate(row_images):
112
- # plt.imshow(header)
113
- # c2.pyplot()
114
- # c2.write(pytess(header))
115
- ############################
116
- SR_img = super_res(header)
117
- # # w, h = SR_img.size
118
- # # SR_img = SR_img.crop((0 ,0 ,w, h-60))
119
- # plt.imshow(SR_img)
120
- # c3.pyplot()
121
- # c3.write(pytess(SR_img))
122
- header_text = pytess(SR_img)
123
- if header_text == '':
124
- header_text = 'empty_col'+str(idx)
125
- headers.append(header_text)
126
-
127
-
128
- else:
129
- for cells in row_images:
130
- # plt.imshow(cells)
131
- # c2.pyplot()
132
- # c2.write(pytess(cells))
133
- ##############################
134
- SR_img = super_res(cells)
135
- # # w, h = SR_img.size
136
- # # SR_img = SR_img.crop((0 ,0 ,w, h-60))
137
- # plt.imshow(SR_img)
138
- # c3.pyplot()
139
- # c3.write(pytess(SR_img))
140
- cells_list.append(pytess(SR_img))
141
-
142
-
143
-
144
- df = pd.DataFrame("", index=range(0, len(master_row)), columns=headers)
145
-
146
- cell_idx = 0
147
-
148
- for nrows in range(len(master_row)-1):
149
- for ncols in range(len(cols)):
150
-
151
- df.iat[nrows, ncols] = cells_list[cell_idx]
152
- cell_idx += 1
153
-
154
- c3.dataframe(df)
155
- # break
156
-