jayyai commited on
Commit
c173760
·
verified ·
1 Parent(s): d2fbc21
Files changed (1) hide show
  1. pdf_route.py +4 -6
pdf_route.py CHANGED
@@ -255,7 +255,7 @@ def create_markdown_file(result, output_file):
255
  table_cells = set()
256
  for _, element_type, element in elements:
257
  if element_type == 'paragraph':
258
- if any(is_element_inside_table(element, get_table_max_polygon(table)) for table in result.tables):
259
  continue
260
  content = element.content.replace(":selected:", "").replace(":unselected:", "")
261
  md_file.write(f"{content}\n\n")
@@ -321,7 +321,7 @@ def create_word_file(result, output_file):
321
  for _, element_type, element in elements:
322
  if element_type == 'paragraph':
323
  # Skip lines that are part of a table
324
- if any(is_element_inside_table(element, get_table_max_polygon(table)) for table in result.tables):
325
  continue
326
  content = element.content.replace(":selected:", "").replace(":unselected:", "")
327
  doc.add_paragraph(content)
@@ -349,12 +349,10 @@ def format_polygon(polygon):
349
 
350
  def get_table_max_polygon(table):
351
  # first coordination
352
- first_cell = table.cells[0]
353
- first_coordinate = first_cell.bounding_regions[0].polygon[0]
354
 
355
  # last coordination
356
- last_cell = table.cells[-1]
357
- last_coordinate = last_cell.bounding_regions[0].polygon[2]
358
 
359
  # return max polygon
360
  return [first_coordinate, last_coordinate]
 
255
  table_cells = set()
256
  for _, element_type, element in elements:
257
  if element_type == 'paragraph':
258
+ if any(is_element_inside_table(element, get_table_max_polygon(table)) for table in result.tables if table.bounding_regions[0].page_number == page.page_number):
259
  continue
260
  content = element.content.replace(":selected:", "").replace(":unselected:", "")
261
  md_file.write(f"{content}\n\n")
 
321
  for _, element_type, element in elements:
322
  if element_type == 'paragraph':
323
  # Skip lines that are part of a table
324
+ if any(is_element_inside_table(element, get_table_max_polygon(table)) for table in result.tables if table.bounding_regions[0].page_number == page.page_number):
325
  continue
326
  content = element.content.replace(":selected:", "").replace(":unselected:", "")
327
  doc.add_paragraph(content)
 
349
 
350
  def get_table_max_polygon(table):
351
  # first coordination
352
+ first_coordinate = table.bounding_regions[0].polygon[0]
 
353
 
354
  # last coordination
355
+ last_coordinate = table.bounding_regions[0].polygon[2]
 
356
 
357
  # return max polygon
358
  return [first_coordinate, last_coordinate]