Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -369,9 +369,35 @@ def create_dataframe(cells_pytess_result: list, max_cols: int, max_rows: int, cs
|
|
| 369 |
|
| 370 |
return df
|
| 371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
|
| 373 |
def process_image(image):
|
| 374 |
-
TD_THRESHOLD = 0.
|
| 375 |
TSR_THRESHOLD = 0.8
|
| 376 |
padd_top = 100
|
| 377 |
padd_left = 100
|
|
@@ -424,10 +450,8 @@ def process_image(image):
|
|
| 424 |
csv_path = "/content/sample_data/table_" + str(idx)
|
| 425 |
df = create_dataframe(sequential_cell_img_list, max_cols, max_rows, csv_path)
|
| 426 |
result.append(df)
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
res = {"items": res.to_json(orient='records')}
|
| 430 |
-
return res
|
| 431 |
|
| 432 |
|
| 433 |
title = "Interactive demo OCR: microsoft - table-transformer-detection + tesseract"
|
|
|
|
| 369 |
|
| 370 |
return df
|
| 371 |
|
| 372 |
+
def postprocess_dataframes(result_tables):
|
| 373 |
+
"""
|
| 374 |
+
Normalize column names
|
| 375 |
+
"""
|
| 376 |
+
# df.columns = [col.replace('|', '') for col in df.columns]
|
| 377 |
+
res = {}
|
| 378 |
+
for idx, table_df in enumerate(result):
|
| 379 |
+
result_df = pd.DataFrame()
|
| 380 |
+
for col in table_df.columns:
|
| 381 |
+
if col.lower().startswith("item"):
|
| 382 |
+
result_df["name"] = table_df[col].copy()
|
| 383 |
+
if (
|
| 384 |
+
col.lower().startswith("total")
|
| 385 |
+
or col.lower().startswith("amount")
|
| 386 |
+
or col.lower().startswith("cost")
|
| 387 |
+
):
|
| 388 |
+
result_df["amount"] = table_df[col].copy()
|
| 389 |
+
print(result_df.columns)
|
| 390 |
+
if len(result_df.columns) == 0:
|
| 391 |
+
result_df["name"] = table_df.iloc[:, 0].copy()
|
| 392 |
+
result_df["amount"] = table_df.iloc[:, 1].copy()
|
| 393 |
+
|
| 394 |
+
result_df["cost_code"] = ""
|
| 395 |
+
res["Table1" + str(idx)] = result_df.to_json(orient="records")
|
| 396 |
+
return res
|
| 397 |
+
|
| 398 |
|
| 399 |
def process_image(image):
|
| 400 |
+
TD_THRESHOLD = 0.7
|
| 401 |
TSR_THRESHOLD = 0.8
|
| 402 |
padd_top = 100
|
| 403 |
padd_left = 100
|
|
|
|
| 450 |
csv_path = "/content/sample_data/table_" + str(idx)
|
| 451 |
df = create_dataframe(sequential_cell_img_list, max_cols, max_rows, csv_path)
|
| 452 |
result.append(df)
|
| 453 |
+
output = postprocess_dataframes(result)
|
| 454 |
+
return output
|
|
|
|
|
|
|
| 455 |
|
| 456 |
|
| 457 |
title = "Interactive demo OCR: microsoft - table-transformer-detection + tesseract"
|