Spaces:

bachpc
/

table-structure-recognition

Build error

App Files Files Community

bachpc commited on Apr 12, 2023

Commit

aaffb60

1 Parent(s): 8ed655c

Minor changes

Browse files

Files changed (1) hide show

app.py +106 -61

app.py CHANGED Viewed

@@ -29,20 +29,28 @@ structure_model = torch.hub.load('ultralytics/yolov5', 'custom', 'weights/struct
 imgsz = 640
-detection_class_names = ['table', 'table rotated']
 structure_class_names = [
     'table', 'table column', 'table row', 'table column header',
     'table projected row header', 'table spanning cell', 'no object'
 ]
 structure_class_map = {k: v for v, k in enumerate(structure_class_names)}
 structure_class_thresholds = {
-    "table": 0.42,
-    "table column": 0.56,
-    "table row": 0.5,
-    "table column header": 0.38,
-    "table projected row header": 0.27,
-    "table spanning cell": 0.4,
-    "no object": 10
 }
@@ -84,6 +92,9 @@ def crop_image(pil_img, detection_result, padding=30):
         w = result[2]
         h = result[3]
         x1 = int((min_x - w / 2) * width)
         y1 = int((min_y - h / 2) * height)
         x2 = int((min_x + w / 2) * width)
@@ -97,7 +108,7 @@ def crop_image(pil_img, detection_result, padding=30):
         crop_image = image[y1_pad:y2_pad, x1_pad:x2_pad, :]
         crop_image = cv_to_PIL(crop_image)
-        if class_id == 1:  # table rotated
             crop_image = crop_image.rotate(270, expand=True)
         crop_images.append(crop_image)
@@ -180,17 +191,49 @@ def convert_stucture(page_tokens, pil_img, structure_result):
     return table_structures, cells, confidence_score
 def visualize_ocr(pil_img, ocr_result):
-    image = PIL_to_cv(pil_img)
-    for i, res in enumerate(ocr_result):
-        bbox = res['bbox']
-        x1 = int(bbox[0])
-        y1 = int(bbox[1])
-        x2 = int(bbox[2])
-        y2 = int(bbox[3])
-        cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 0, 255))
-        cv2.putText(image, res['text'], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.25, color=(255, 0, 0))
-    return cv_to_PIL(image)
 def get_bbox_decorations(data_type, label):
@@ -231,6 +274,9 @@ def visualize_structure(pil_img, structure_result):
         w = result[2]
         h = result[3]
         x1 = int((min_x - w / 2) * width)
         y1 = int((min_y - h / 2) * height)
         x2 = int((min_x + w / 2) * width)
@@ -238,35 +284,31 @@ def visualize_structure(pil_img, structure_result):
         # print(x1, y1, x2, y2)
         bbox = [x1, y1, x2, y2]
-        if score >= structure_class_thresholds[structure_class_names[class_id]]:
-            #cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 255, 0))
-            #cv2.putText(image, str(i)+'-'+str(class_id), (x1-10, y1), cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0,0,255))
-            color, alpha, linewidth, hatch = get_bbox_decorations('recognition', class_id)
-            # Fill
-            rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1],
-                                     linewidth=linewidth, alpha=alpha,
-                                     edgecolor='none',facecolor=color,
-                                     linestyle=None)
-            ax.add_patch(rect)
-            # Hatch
-            rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1],
-                                     linewidth=1, alpha=0.4,
-                                     edgecolor=color, facecolor='none',
-                                     linestyle='--',hatch=hatch)
-            ax.add_patch(rect)
-            # Edge
-            rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1],
-                                     linewidth=linewidth,
-                                     edgecolor=color, facecolor='none',
-                                     linestyle="--")
-            ax.add_patch(rect)
     plt.xticks([], [])
     plt.yticks([], [])
     legend_elements = []
-    for class_name in structure_class_names:
         color, alpha, linewidth, hatch = get_bbox_decorations('recognition', structure_class_map[class_name])
         legend_elements.append(
             Patch(facecolor=color, edgecolor=color, label=class_name, hatch=hatch, alpha=alpha)
@@ -506,10 +548,7 @@ def cells_to_excel(cells, file_path):
     workbook = xlsxwriter.Workbook(file_path)
     cell_format = workbook.add_format(
-        {
-            'align': 'center',
-            'valign': 'vcenter',
-        }
     )
     worksheet = workbook.add_worksheet(name='Table')
@@ -573,33 +612,35 @@ def main():
             with tabs[1]:
                 st.header('Table Structure Recognition')
-                str_cols = st.columns((len(crop_images), ) * 4)
                 str_cols[0].subheader('Table image')
                 str_cols[1].subheader('OCR result')
                 str_cols[2].subheader('Structure result')
                 str_cols[3].subheader('Cells result')
                 for i, img in enumerate(crop_images):
                     ocr_result = ocr(img)
                     structure_result = table_structure(img)
                     table_structures, cells, confidence_score = convert_stucture(ocr_result, img, structure_result)
                     cells = extract_text_from_cells(cells)
-                    all_cells.append(cells)
-                    html_result = cells_to_html(cells)
-                    #df, csv_result = cells_to_csv(cells)
-                    #print(df)
-                    vis_ocr_img = visualize_ocr(img, ocr_result)
-                    vis_str_img = visualize_structure(img, structure_result)
                     vis_cells_img = visualize_cells(img, cells)
-                    str_cols[0].image(img)
-                    str_cols[1].image(vis_ocr_img)
-                    str_cols[2].image(vis_str_img)
                     str_cols[3].image(vis_cells_img)
-                    st.write('\n')
-                    st.markdown(html_result, unsafe_allow_html=True)
             with tabs[2]:
                 st.header('Extracted Table(s)')
@@ -621,6 +662,10 @@ def main():
                                     file_name=f'output_{idx}.xlsx',
                                 )
 if __name__ == '__main__':
     main()

 imgsz = 640
+detection_class_names = ['table', 'table rotated', 'no object']
 structure_class_names = [
     'table', 'table column', 'table row', 'table column header',
     'table projected row header', 'table spanning cell', 'no object'
 ]
+detection_class_map = {k: v for v, k in enumerate(detection_class_names)}
 structure_class_map = {k: v for v, k in enumerate(structure_class_names)}
+detection_class_thresholds = {
+    'table': 0.5,
+    'table rotated': 0.5,
+    'no object': 10
+}
 structure_class_thresholds = {
+    'table': 0.42,
+    'table column': 0.56,
+    'table row': 0.5,
+    'table column header': 0.38,
+    'table projected row header': 0.27,
+    'table spanning cell': 0.4,
+    'no object': 10
 }
         w = result[2]
         h = result[3]
+        if score < detection_class_thresholds[detection_class_names[class_id]]:
+            continue
         x1 = int((min_x - w / 2) * width)
         y1 = int((min_y - h / 2) * height)
         x2 = int((min_x + w / 2) * width)
         crop_image = image[y1_pad:y2_pad, x1_pad:x2_pad, :]
         crop_image = cv_to_PIL(crop_image)
+        if detection_class_names[class_id] == 'table rotated':
             crop_image = crop_image.rotate(270, expand=True)
         crop_images.append(crop_image)
     return table_structures, cells, confidence_score
+def visualize_image(pil_img):
+    plt.imshow(pil_img, interpolation='lanczos')
+    plt.gcf().set_size_inches(10, 10)
+    plt.axis('off')
+    img_buf = io.BytesIO()
+    plt.savefig(img_buf, bbox_inches='tight', dpi=150)
+    plt.close()
+    return PIL.Image.open(img_buf)
 def visualize_ocr(pil_img, ocr_result):
+    plt.imshow(pil_img, interpolation='lanczos')
+    plt.gcf().set_size_inches(20, 20)
+    ax = plt.gca()
+    for i, result in enumerate(ocr_result):
+        bbox = result['bbox']
+        text = result['text']
+        rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1], linewidth=2, edgecolor='red', facecolor='none', linestyle="-")
+        ax.add_patch(rect)
+        ax.text(bbox[0], bbox[3], text, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes, color='blue')
+    plt.xticks([], [])
+    plt.yticks([], [])
+    plt.gcf().set_size_inches(10, 10)
+    plt.axis('off')
+    img_buf = io.BytesIO()
+    plt.savefig(img_buf, bbox_inches='tight', dpi=150)
+    plt.close()
+    return PIL.Image.open(img_buf)
+    # image = PIL_to_cv(pil_img)
+    # for i, res in enumerate(ocr_result):
+    #     bbox = res['bbox']
+    #     x1 = int(bbox[0])
+    #     y1 = int(bbox[1])
+    #     x2 = int(bbox[2])
+    #     y2 = int(bbox[3])
+    #     cv2.rectangle(image, (x1, y1), (x2, y2), color=(255, 0, 0))
+    #     cv2.putText(image, res['text'], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.25, color=(0, 0, 255))
+    # return cv_to_PIL(image)
 def get_bbox_decorations(data_type, label):
         w = result[2]
         h = result[3]
+        if score < structure_class_thresholds[structure_class_names[class_id]]:
+            continue
         x1 = int((min_x - w / 2) * width)
         y1 = int((min_y - h / 2) * height)
         x2 = int((min_x + w / 2) * width)
         # print(x1, y1, x2, y2)
         bbox = [x1, y1, x2, y2]
+        color, alpha, linewidth, hatch = get_bbox_decorations('recognition', class_id)
+        # Fill
+        rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1],
+                                    linewidth=linewidth, alpha=alpha,
+                                    edgecolor='none',facecolor=color,
+                                    linestyle=None)
+        ax.add_patch(rect)
+        # Hatch
+        rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1],
+                                    linewidth=1, alpha=0.4,
+                                    edgecolor=color, facecolor='none',
+                                    linestyle='--',hatch=hatch)
+        ax.add_patch(rect)
+        # Edge
+        rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1],
+                                    linewidth=linewidth,
+                                    edgecolor=color, facecolor='none',
+                                    linestyle="--")
+        ax.add_patch(rect)
     plt.xticks([], [])
     plt.yticks([], [])
     legend_elements = []
+    for class_name in structure_class_names[:-1]:
         color, alpha, linewidth, hatch = get_bbox_decorations('recognition', structure_class_map[class_name])
         legend_elements.append(
             Patch(facecolor=color, edgecolor=color, label=class_name, hatch=hatch, alpha=alpha)
     workbook = xlsxwriter.Workbook(file_path)
     cell_format = workbook.add_format(
+        {'align': 'center', 'valign': 'vcenter'}
     )
     worksheet = workbook.add_worksheet(name='Table')
             with tabs[1]:
                 st.header('Table Structure Recognition')
+                str_cols = st.columns(4)
                 str_cols[0].subheader('Table image')
                 str_cols[1].subheader('OCR result')
                 str_cols[2].subheader('Structure result')
                 str_cols[3].subheader('Cells result')
                 for i, img in enumerate(crop_images):
+                    str_cols = st.columns(4)
+                    vis_img = visualize_image(img)
+                    str_cols[0].image(vis_img)
                     ocr_result = ocr(img)
+                    vis_ocr_img = visualize_ocr(img, ocr_result)
+                    str_cols[1].image(vis_ocr_img)
                     structure_result = table_structure(img)
+                    vis_str_img = visualize_structure(img, structure_result)
+                    str_cols[2].image(vis_str_img)
                     table_structures, cells, confidence_score = convert_stucture(ocr_result, img, structure_result)
                     cells = extract_text_from_cells(cells)
                     vis_cells_img = visualize_cells(img, cells)
                     str_cols[3].image(vis_cells_img)
+                    all_cells.append(cells)
+                    #df, csv_result = cells_to_csv(cells)
+                    #print(df)
             with tabs[2]:
                 st.header('Extracted Table(s)')
                                     file_name=f'output_{idx}.xlsx',
                                 )
+                for idx, cells in enumerate(all_cells):
+                    html_result = cells_to_html(cells)
+                    st.subheader(f'HTML Table {idx + 1}')
+                    st.markdown(html_result, unsafe_allow_html=True)
 if __name__ == '__main__':
     main()