Spaces:
Sleeping
Sleeping
version1 (#5)
Browse files- fix table issue (77ca97ca240ccbde6bfc27dfd71ca451cedda871)
- pdf_route.py +4 -6
pdf_route.py
CHANGED
@@ -255,7 +255,7 @@ def create_markdown_file(result, output_file):
|
|
255 |
table_cells = set()
|
256 |
for _, element_type, element in elements:
|
257 |
if element_type == 'paragraph':
|
258 |
-
if any(is_element_inside_table(element, get_table_max_polygon(table)) for table in result.tables):
|
259 |
continue
|
260 |
content = element.content.replace(":selected:", "").replace(":unselected:", "")
|
261 |
md_file.write(f"{content}\n\n")
|
@@ -321,7 +321,7 @@ def create_word_file(result, output_file):
|
|
321 |
for _, element_type, element in elements:
|
322 |
if element_type == 'paragraph':
|
323 |
# Skip lines that are part of a table
|
324 |
-
if any(is_element_inside_table(element, get_table_max_polygon(table)) for table in result.tables):
|
325 |
continue
|
326 |
content = element.content.replace(":selected:", "").replace(":unselected:", "")
|
327 |
doc.add_paragraph(content)
|
@@ -349,12 +349,10 @@ def format_polygon(polygon):
|
|
349 |
|
350 |
def get_table_max_polygon(table):
|
351 |
# first coordination
|
352 |
-
|
353 |
-
first_coordinate = first_cell.bounding_regions[0].polygon[0]
|
354 |
|
355 |
# last coordination
|
356 |
-
|
357 |
-
last_coordinate = last_cell.bounding_regions[0].polygon[2]
|
358 |
|
359 |
# return max polygon
|
360 |
return [first_coordinate, last_coordinate]
|
|
|
255 |
table_cells = set()
|
256 |
for _, element_type, element in elements:
|
257 |
if element_type == 'paragraph':
|
258 |
+
if any(is_element_inside_table(element, get_table_max_polygon(table)) for table in result.tables if table.bounding_regions[0].page_number == page.page_number):
|
259 |
continue
|
260 |
content = element.content.replace(":selected:", "").replace(":unselected:", "")
|
261 |
md_file.write(f"{content}\n\n")
|
|
|
321 |
for _, element_type, element in elements:
|
322 |
if element_type == 'paragraph':
|
323 |
# Skip lines that are part of a table
|
324 |
+
if any(is_element_inside_table(element, get_table_max_polygon(table)) for table in result.tables if table.bounding_regions[0].page_number == page.page_number):
|
325 |
continue
|
326 |
content = element.content.replace(":selected:", "").replace(":unselected:", "")
|
327 |
doc.add_paragraph(content)
|
|
|
349 |
|
350 |
def get_table_max_polygon(table):
|
351 |
# first coordination
|
352 |
+
first_coordinate = table.bounding_regions[0].polygon[0]
|
|
|
353 |
|
354 |
# last coordination
|
355 |
+
last_coordinate = table.bounding_regions[0].polygon[2]
|
|
|
356 |
|
357 |
# return max polygon
|
358 |
return [first_coordinate, last_coordinate]
|