Spaces:
Sleeping
Sleeping
version1 (#5)
Browse files- fix table issue (77ca97ca240ccbde6bfc27dfd71ca451cedda871)
- pdf_route.py +4 -6
pdf_route.py
CHANGED
|
@@ -255,7 +255,7 @@ def create_markdown_file(result, output_file):
|
|
| 255 |
table_cells = set()
|
| 256 |
for _, element_type, element in elements:
|
| 257 |
if element_type == 'paragraph':
|
| 258 |
-
if any(is_element_inside_table(element, get_table_max_polygon(table)) for table in result.tables):
|
| 259 |
continue
|
| 260 |
content = element.content.replace(":selected:", "").replace(":unselected:", "")
|
| 261 |
md_file.write(f"{content}\n\n")
|
|
@@ -321,7 +321,7 @@ def create_word_file(result, output_file):
|
|
| 321 |
for _, element_type, element in elements:
|
| 322 |
if element_type == 'paragraph':
|
| 323 |
# Skip lines that are part of a table
|
| 324 |
-
if any(is_element_inside_table(element, get_table_max_polygon(table)) for table in result.tables):
|
| 325 |
continue
|
| 326 |
content = element.content.replace(":selected:", "").replace(":unselected:", "")
|
| 327 |
doc.add_paragraph(content)
|
|
@@ -349,12 +349,10 @@ def format_polygon(polygon):
|
|
| 349 |
|
| 350 |
def get_table_max_polygon(table):
|
| 351 |
# first coordination
|
| 352 |
-
|
| 353 |
-
first_coordinate = first_cell.bounding_regions[0].polygon[0]
|
| 354 |
|
| 355 |
# last coordination
|
| 356 |
-
|
| 357 |
-
last_coordinate = last_cell.bounding_regions[0].polygon[2]
|
| 358 |
|
| 359 |
# return max polygon
|
| 360 |
return [first_coordinate, last_coordinate]
|
|
|
|
| 255 |
table_cells = set()
|
| 256 |
for _, element_type, element in elements:
|
| 257 |
if element_type == 'paragraph':
|
| 258 |
+
if any(is_element_inside_table(element, get_table_max_polygon(table)) for table in result.tables if table.bounding_regions[0].page_number == page.page_number):
|
| 259 |
continue
|
| 260 |
content = element.content.replace(":selected:", "").replace(":unselected:", "")
|
| 261 |
md_file.write(f"{content}\n\n")
|
|
|
|
| 321 |
for _, element_type, element in elements:
|
| 322 |
if element_type == 'paragraph':
|
| 323 |
# Skip lines that are part of a table
|
| 324 |
+
if any(is_element_inside_table(element, get_table_max_polygon(table)) for table in result.tables if table.bounding_regions[0].page_number == page.page_number):
|
| 325 |
continue
|
| 326 |
content = element.content.replace(":selected:", "").replace(":unselected:", "")
|
| 327 |
doc.add_paragraph(content)
|
|
|
|
| 349 |
|
| 350 |
def get_table_max_polygon(table):
|
| 351 |
# first coordination
|
| 352 |
+
first_coordinate = table.bounding_regions[0].polygon[0]
|
|
|
|
| 353 |
|
| 354 |
# last coordination
|
| 355 |
+
last_coordinate = table.bounding_regions[0].polygon[2]
|
|
|
|
| 356 |
|
| 357 |
# return max polygon
|
| 358 |
return [first_coordinate, last_coordinate]
|