Spaces:

bachpc
/

table-structure-recognition

Build error

App Files Files Community

bachpc commited on Apr 3, 2023

Commit

17ae8b6

1 Parent(s): 87f7012

Fix bug and clean

Browse files

Files changed (1) hide show

app.py +39 -41

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import streamlit as st
 import PIL
 import cv2
 import numpy as np
@@ -9,7 +8,6 @@ import torch
 # import json
 from collections import OrderedDict, defaultdict
 import xml.etree.ElementTree as ET
 from paddleocr import PaddleOCR
 import pytesseract
 from pytesseract import Output
@@ -29,13 +27,13 @@ structure_class_names = [
 ]
 structure_class_map = {k: v for v, k in enumerate(structure_class_names)}
 structure_class_thresholds = {
-    "table": 0.5,
-    "table column": 0.5,
-    "table row": 0.5,
-    "table column header": 0.25,
-    "table projected row header": 0.25,
-    "table spanning cell": 0.25,
-    "no object": 10
 }
@@ -150,7 +148,7 @@ def convert_stucture(page_tokens, pil_img, structure_result):
     try:
         table_bbox = list(table_class_objects[0]['bbox'])
     except:
-        table_bbox = (0,0,1000,1000)
     # print('table_class_objects:', table_class_objects)
     # print('table_bbox:', table_bbox)
@@ -186,17 +184,17 @@ def visualize_structure(pil_img, structure_result):
         min_y = result[1]
         w = result[2]
         h = result[3]
         x1 = int((min_x - w / 2) * width)
         y1 = int((min_y - h / 2) * height)
         x2 = int((min_x + w / 2) * width)
         y2 = int((min_y + h / 2) * height)
         # print(x1, y1, x2, y2)
         if score >= structure_class_thresholds[structure_class_names[class_id]]:
             cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 0, 255))
             #cv2.putText(image, str(i)+'-'+str(class_id), (x1-10, y1), cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0,0,255))
     return cv_to_PIL(image)
@@ -281,12 +279,12 @@ def cells_to_csv(cells):
     else:
         max_header_row = -1
-    table_array = np.empty([num_rows, num_columns], dtype="object")
     if len(cells) > 0:
         for cell in cells:
             for row_num in cell['row_nums']:
                 for column_num in cell['column_nums']:
-                    table_array[row_num, column_num] = cell["cell_text"]
     header = table_array[:max_header_row+1,:]
     flattened_header = []
@@ -301,7 +299,7 @@ def cells_to_html(cells):
     cells = sorted(cells, key=lambda k: min(k['column_nums']))
     cells = sorted(cells, key=lambda k: min(k['row_nums']))
-    table = ET.Element("table")
     current_row = -1
     for cell in cells:
@@ -317,15 +315,15 @@ def cells_to_html(cells):
         if this_row > current_row:
             current_row = this_row
             if cell['header']:
-                cell_tag = "th"
-                row = ET.SubElement(table, "thead")
             else:
-                cell_tag = "td"
-                row = ET.SubElement(table, "tr")
         tcell = ET.SubElement(row, cell_tag, attrib=attrib)
         tcell.text = cell['cell_text']
-    return str(ET.tostring(table, encoding="unicode", short_empty_elements=False))
 # def cells_to_html(cells):
@@ -342,11 +340,11 @@ def cells_to_html(cells):
 #         for cell in r_cells:
 #             rowspan = cell['row_nums'][-1] - cell['row_nums'][0] + 1
 #             colspan = cell['column_nums'][-1] - cell['column_nums'][0] + 1
-#             r_html += f'<td rowspan="{rowspan}" colspan="{colspan}">{escape(cell["text"])}</td>'
 #         html_code += f'<tr>{r_html}</tr>'
 #     html_code = '''<html>
 #                    <head>
-#                    <meta charset="UTF-8">
 #                    <style>
 #                    table, th, td {
 #                      border: 1px solid black;
@@ -355,7 +353,7 @@ def cells_to_html(cells):
 #                    </style>
 #                    </head>
 #                    <body>
-#                    <table frame="hsides" rules="groups" width="100%%">
 #                      %s
 #                    </table>
 #                    </body>
@@ -367,22 +365,22 @@ def cells_to_html(cells):
 def main():
-    st.set_page_config(layout="wide")
-    st.title("Table Structure Recognition Demo")
     st.write('\n')
     cols = st.columns((1, 1))
-    cols[0].subheader("Input page")
-    cols[1].subheader("Table(s) detected")
-    st.sidebar.title("Image upload")
     st.set_option('deprecation.showfileUploaderEncoding', False)
-    filename = st.sidebar.file_uploader("Upload files", type=['png', 'jpeg', 'jpg'])
-    if st.sidebar.button("Analyze image"):
         if filename is None:
-            st.sidebar.write("Please upload an image")
         else:
             print(filename)
@@ -394,31 +392,31 @@ def main():
             cols[1].image(vis_det_img)
             str_cols = st.columns((len(crop_images), ) * 5)
-            str_cols[0].subheader("Table image")
-            str_cols[1].subheader("OCR result")
-            str_cols[2].subheader("Structure result")
-            str_cols[3].subheader("Cells result")
-            str_cols[4].subheader("CSV result")
-            for img in crop_images:
                 ocr_result = ocr(img)
                 structure_result = table_structure(img)
                 table_structures, cells, confidence_score = convert_stucture(ocr_result, img, structure_result)
                 cells = extract_text_from_cells(cells)
                 html_result = cells_to_html(cells)
                 df, csv_result = cells_to_csv(cells)
-                print(df)
                 vis_ocr_img = visualize_ocr(img, ocr_result)
                 vis_str_img = visualize_structure(img, structure_result)
                 vis_cells_img = visualize_cells(img, cells)
                 str_cols[0].image(img)
                 str_cols[1].image(vis_ocr_img)
                 str_cols[2].image(vis_str_img)
                 str_cols[3].image(vis_cells_img)
                 #str_cols[4].dataframe(df)
-                str_cols[4].download_button("Download table", csv_result, "file.csv", "text/csv", key='download-csv')
                 st.markdown(html_result, unsafe_allow_html=True)

 import streamlit as st
 import PIL
 import cv2
 import numpy as np
 # import json
 from collections import OrderedDict, defaultdict
 import xml.etree.ElementTree as ET
 from paddleocr import PaddleOCR
 import pytesseract
 from pytesseract import Output
 ]
 structure_class_map = {k: v for v, k in enumerate(structure_class_names)}
 structure_class_thresholds = {
+    'table': 0.5,
+    'table column': 0.5,
+    'table row': 0.5,
+    'table column header': 0.25,
+    'table projected row header': 0.25,
+    'table spanning cell': 0.25,
+    'no object': 10
 }
     try:
         table_bbox = list(table_class_objects[0]['bbox'])
     except:
+        table_bbox = (0, 0, 1000, 1000)
     # print('table_class_objects:', table_class_objects)
     # print('table_bbox:', table_bbox)
         min_y = result[1]
         w = result[2]
         h = result[3]
         x1 = int((min_x - w / 2) * width)
         y1 = int((min_y - h / 2) * height)
         x2 = int((min_x + w / 2) * width)
         y2 = int((min_y + h / 2) * height)
         # print(x1, y1, x2, y2)
         if score >= structure_class_thresholds[structure_class_names[class_id]]:
             cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 0, 255))
             #cv2.putText(image, str(i)+'-'+str(class_id), (x1-10, y1), cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0,0,255))
     return cv_to_PIL(image)
     else:
         max_header_row = -1
+    table_array = np.empty([num_rows, num_columns], dtype='object')
     if len(cells) > 0:
         for cell in cells:
             for row_num in cell['row_nums']:
                 for column_num in cell['column_nums']:
+                    table_array[row_num, column_num] = cell['cell_text']
     header = table_array[:max_header_row+1,:]
     flattened_header = []
     cells = sorted(cells, key=lambda k: min(k['column_nums']))
     cells = sorted(cells, key=lambda k: min(k['row_nums']))
+    table = ET.Element('table')
     current_row = -1
     for cell in cells:
         if this_row > current_row:
             current_row = this_row
             if cell['header']:
+                cell_tag = 'th'
+                row = ET.SubElement(table, 'thead')
             else:
+                cell_tag = 'td'
+                row = ET.SubElement(table, 'tr')
         tcell = ET.SubElement(row, cell_tag, attrib=attrib)
         tcell.text = cell['cell_text']
+    return str(ET.tostring(table, encoding='unicode', short_empty_elements=False))
 # def cells_to_html(cells):
 #         for cell in r_cells:
 #             rowspan = cell['row_nums'][-1] - cell['row_nums'][0] + 1
 #             colspan = cell['column_nums'][-1] - cell['column_nums'][0] + 1
+#             r_html += f'<td rowspan='{rowspan}' colspan='{colspan}'>{escape(cell['text'])}</td>'
 #         html_code += f'<tr>{r_html}</tr>'
 #     html_code = '''<html>
 #                    <head>
+#                    <meta charset='UTF-8'>
 #                    <style>
 #                    table, th, td {
 #                      border: 1px solid black;
 #                    </style>
 #                    </head>
 #                    <body>
+#                    <table frame='hsides' rules='groups' width='100%%'>
 #                      %s
 #                    </table>
 #                    </body>
 def main():
+    st.set_page_config(layout='wide')
+    st.title('Table Extraction Demo')
     st.write('\n')
     cols = st.columns((1, 1))
+    cols[0].subheader('Input page')
+    cols[1].subheader('Table(s) detected')
+    st.sidebar.title('Image upload')
     st.set_option('deprecation.showfileUploaderEncoding', False)
+    filename = st.sidebar.file_uploader('Upload files', type=['png', 'jpeg', 'jpg'])
+    if st.sidebar.button('Analyze image'):
         if filename is None:
+            st.sidebar.write('Please upload an image')
         else:
             print(filename)
             cols[1].image(vis_det_img)
             str_cols = st.columns((len(crop_images), ) * 5)
+            str_cols[0].subheader('Table image')
+            str_cols[1].subheader('OCR result')
+            str_cols[2].subheader('Structure result')
+            str_cols[3].subheader('Cells result')
+            str_cols[4].subheader('CSV result')
+            for i, img in enumerate(crop_images):
                 ocr_result = ocr(img)
                 structure_result = table_structure(img)
                 table_structures, cells, confidence_score = convert_stucture(ocr_result, img, structure_result)
                 cells = extract_text_from_cells(cells)
                 html_result = cells_to_html(cells)
                 df, csv_result = cells_to_csv(cells)
+                #print(df)
                 vis_ocr_img = visualize_ocr(img, ocr_result)
                 vis_str_img = visualize_structure(img, structure_result)
                 vis_cells_img = visualize_cells(img, cells)
                 str_cols[0].image(img)
                 str_cols[1].image(vis_ocr_img)
                 str_cols[2].image(vis_str_img)
                 str_cols[3].image(vis_cells_img)
                 #str_cols[4].dataframe(df)
+                str_cols[4].download_button('Download table', csv_result, f'table-{i}.csv', 'text/csv', key=f'download-csv-{i}')
                 st.markdown(html_result, unsafe_allow_html=True)