post-processing code

Files changed (8) hide show

Demo.ipynb +2 -2
README.md +3 -8
example-ocr.json +1 -0
post_processing/__pycache__/page_elt_pp.cpython-312.pyc +0 -0
post_processing/__pycache__/table_struct_pp.cpython-312.pyc +0 -0
post_processing/__pycache__/text_pp.cpython-312.pyc +0 -0
post_processing/__pycache__/wbf.cpython-312.pyc +0 -0
post_processing/table_struct_pp.py +222 -1

Demo.ipynb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e656cf3a473450457a118dcee7f0c65db9167b9aab09554cb3247f6ee1ebf3ec
-size 779791

 version https://git-lfs.github.com/spec/v1
+oid sha256:e799b29f13e7dba664c2e5e9af5866cc10c7fc847fdd32295348cc78cdf9d13f
+size 1085057

README.md CHANGED Viewed

@@ -14,7 +14,7 @@ tags:
 - ingestion
 - yolox
 ---
-# Nemoretriever Graphic Element v1
 ## **Model Overview**
@@ -144,7 +144,7 @@ import matplotlib.pyplot as plt
 from PIL import Image
 from model import define_model
-from utils import plot_sample, postprocess_preds_graphic_element, reformat_for_plotting
 # Load image
 path = "./example.png"
@@ -182,7 +182,7 @@ If you wish to do additional training, [refer to the original repo](https://gith
 3. Advanced post-processing
 Additional post-processing might be required to use the model as part of a data extraction pipeline.
-We provide examples in the notebook `Demo.ipynb`.
 **Disclaimer:**
 We are aware of some issues with the model, and will provide a v2 with improved performance in the future which addresses the following issues:
@@ -240,11 +240,6 @@ The primary evaluation set is a cut of the Azure labels and digital corpora imag
 | row    | 76.992 | 81.115 |
 | column | 85.293 | 87.434 |
-## Inference:
-**Acceleartion Engine**: TensorRT <br>
-**Test hardware**: See [Support Matrix from NIM documentation](https://docs.nvidia.com/nim/ingestion/object-detection/latest/support-matrix.html#)
 <!---
 ## Inference:

 - ingestion
 - yolox
 ---
+# Nemoretriever Table Structure v1
 ## **Model Overview**
 from PIL import Image
 from model import define_model
+from utils import plot_sample, postprocess_preds_table_structure, reformat_for_plotting
 # Load image
 path = "./example.png"
 3. Advanced post-processing
 Additional post-processing might be required to use the model as part of a data extraction pipeline.
+We show how to use the model as part of a table to text pipeline alongside with the [Nemo Retriever OCR](https://huggingface.co/nvidia/nemoretriever-ocr-v1) in the notebook `Demo.ipynb`.
 **Disclaimer:**
 We are aware of some issues with the model, and will provide a v2 with improved performance in the future which addresses the following issues:
 | row    | 76.992 | 81.115 |
 | column | 85.293 | 87.434 |
 <!---
 ## Inference:

example-ocr.json ADDED Viewed

	@@ -0,0 +1 @@

+ [{"text": "Judaism", "confidence": 0.9761894491491112, "left": 0.313720703125, "upper": 0.069580078125, "right": 0.41552734375, "lower": 0.04180908203125}, {"text": "Christianity", "confidence": 0.9864673460063902, "left": 0.449462890625, "upper": 0.07061767578125, "right": 0.591796875, "lower": 0.041900634765625}, {"text": "Islam", "confidence": 0.9741032144189129, "left": 0.62353515625, "upper": 0.06451416015625, "right": 0.69091796875, "lower": 0.042205810546875}, {"text": "Buddhism", "confidence": 0.9820241379034715, "left": 0.72412109375, "upper": 0.0650634765625, "right": 0.849609375, "lower": 0.04180908203125}, {"text": "Sikhism", "confidence": 0.970345573215551, "left": 0.880859375, "upper": 0.064697265625, "right": 0.978515625, "lower": 0.0421142578125}, {"text": "Pretrained", "confidence": 0.9936134942698354, "left": 0.0272369384765625, "upper": 0.12359619140625, "right": 0.1575927734375, "lower": 0.09942626953125}, {"text": "MPT", "confidence": 0.9913080780677679, "left": 0.0268707275390625, "upper": 0.1907958984375, "right": 0.08831787109375, "lower": 0.1685791015625}, {"text": "7B", "confidence": 0.9905469329639989, "left": 0.251953125, "upper": 0.1732177734375, "right": 0.28369140625, "lower": 0.153076171875}, {"text": "0.39", "confidence": 0.9985185665573967, "left": 0.34130859375, "upper": 0.1739501953125, "right": 0.3896484375, "lower": 0.1522216796875}, {"text": "0.38", "confidence": 0.9893707907998098, "left": 0.495361328125, "upper": 0.174072265625, "right": 0.54443359375, "lower": 0.15234375}, {"text": "0.31", "confidence": 0.9989534774832673, "left": 0.6328125, "upper": 0.173828125, "right": 0.6796875, "lower": 0.15234375}, {"text": "0.27", "confidence": 0.9956375751248465, "left": 0.76171875, "upper": 0.173583984375, "right": 0.81103515625, "lower": 0.15234375}, {"text": "0.07", "confidence": 0.9942783306227302, "left": 0.91357421875, "upper": 0.173828125, "right": 0.96337890625, "lower": 0.1522216796875}, {"text": "30B", "confidence": 0.972883248728015, "left": 0.239013671875, "upper": 0.2109375, "right": 0.283935546875, "lower": 0.1885986328125}, {"text": "0.33", "confidence": 0.998774187312769, "left": 0.34130859375, "upper": 0.21044921875, "right": 0.3896484375, "lower": 0.188720703125}, {"text": "0.28", "confidence": 0.9836438141378729, "left": 0.49560546875, "upper": 0.210205078125, "right": 0.5439453125, "lower": 0.18896484375}, {"text": "0.20", "confidence": 0.997989728503616, "left": 0.6337890625, "upper": 0.2103271484375, "right": 0.68115234375, "lower": 0.18896484375}, {"text": "0.30", "confidence": 0.9981678680694565, "left": 0.76220703125, "upper": 0.210693359375, "right": 0.81005859375, "lower": 0.1888427734375}, {"text": "0.19", "confidence": 0.9872296722567225, "left": 0.91357421875, "upper": 0.2105712890625, "right": 0.9619140625, "lower": 0.1884765625}, {"text": "7B", "confidence": 0.9876452154809514, "left": 0.252197265625, "upper": 0.2607421875, "right": 0.28369140625, "lower": 0.240234375}, {"text": "0.25", "confidence": 0.9975937481432596, "left": 0.341796875, "upper": 0.26123046875, "right": 0.39013671875, "lower": 0.239501953125}, {"text": "0.35", "confidence": 0.9917687898647339, "left": 0.49560546875, "upper": 0.261474609375, "right": 0.54443359375, "lower": 0.2392578125}, {"text": "0.20", "confidence": 0.9984690982188885, "left": 0.6337890625, "upper": 0.261474609375, "right": 0.68115234375, "lower": 0.2396240234375}, {"text": "0.25", "confidence": 0.997957530803141, "left": 0.76220703125, "upper": 0.26123046875, "right": 0.810546875, "lower": 0.2393798828125}, {"text": "0.22", "confidence": 0.9882444380230272, "left": 0.9140625, "upper": 0.26123046875, "right": 0.96240234375, "lower": 0.23974609375}, {"text": "Falcon", "confidence": 0.9981243450662549, "left": 0.0273590087890625, "upper": 0.2783203125, "right": 0.10601806640625, "lower": 0.256591796875}, {"text": "40B", "confidence": 0.9982407007689418, "left": 0.2391357421875, "upper": 0.2978515625, "right": 0.283935546875, "lower": 0.2763671875}, {"text": "0.26", "confidence": 0.9985793226619016, "left": 0.341796875, "upper": 0.2978515625, "right": 0.389892578125, "lower": 0.276123046875}, {"text": "0.28", "confidence": 0.9923507332532226, "left": 0.495849609375, "upper": 0.2978515625, "right": 0.5439453125, "lower": 0.276123046875}, {"text": "0.26", "confidence": 0.9970813602397094, "left": 0.6337890625, "upper": 0.2978515625, "right": 0.68115234375, "lower": 0.276123046875}, {"text": "0.31", "confidence": 0.998671661567936, "left": 0.76220703125, "upper": 0.297607421875, "right": 0.80859375, "lower": 0.276123046875}, {"text": "0.19", "confidence": 0.9882519618141666, "left": 0.9140625, "upper": 0.297607421875, "right": 0.9619140625, "lower": 0.27587890625}, {"text": "7B", "confidence": 0.9918499357919858, "left": 0.252197265625, "upper": 0.3486328125, "right": 0.28369140625, "lower": 0.328369140625}, {"text": "0.37", "confidence": 0.9981141371642034, "left": 0.341552734375, "upper": 0.34912109375, "right": 0.390625, "lower": 0.327392578125}, {"text": "0.30", "confidence": 0.9863860868688986, "left": 0.49560546875, "upper": 0.349609375, "right": 0.5439453125, "lower": 0.32763671875}, {"text": "0.24", "confidence": 0.9976063742143298, "left": 0.63330078125, "upper": 0.348876953125, "right": 0.681640625, "lower": 0.32763671875}, {"text": "0.38", "confidence": 0.9990392507749052, "left": 0.76220703125, "upper": 0.349609375, "right": 0.810546875, "lower": 0.327392578125}, {"text": "0.17", "confidence": 0.9958182080744692, "left": 0.91357421875, "upper": 0.349365234375, "right": 0.96337890625, "lower": 0.32763671875}, {"text": "13B", "confidence": 0.9907532088107024, "left": 0.240234375, "upper": 0.385986328125, "right": 0.2841796875, "lower": 0.364013671875}, {"text": "0.36", "confidence": 0.9990900539759036, "left": 0.341552734375, "upper": 0.385986328125, "right": 0.390380859375, "lower": 0.364013671875}, {"text": "0.26", "confidence": 0.9967458970288499, "left": 0.495849609375, "upper": 0.3857421875, "right": 0.54443359375, "lower": 0.364013671875}, {"text": "0.30", "confidence": 0.9983706373299477, "left": 0.63330078125, "upper": 0.385986328125, "right": 0.681640625, "lower": 0.364013671875}, {"text": "0.37", "confidence": 0.9976059303527898, "left": 0.76220703125, "upper": 0.3857421875, "right": 0.810546875, "lower": 0.363525390625}, {"text": "0.13", "confidence": 0.989847993170092, "left": 0.91357421875, "upper": 0.385986328125, "right": 0.9619140625, "lower": 0.36376953125}, {"text": "LLAMA 1", "confidence": 0.8968659330804545, "left": 0.0275421142578125, "upper": 0.403076171875, "right": 0.125, "lower": 0.381591796875}, {"text": "33B", "confidence": 0.9954893218906407, "left": 0.2391357421875, "upper": 0.422607421875, "right": 0.2841796875, "lower": 0.400634765625}, {"text": "0.35", "confidence": 0.9987272973382014, "left": 0.341796875, "upper": 0.42236328125, "right": 0.3896484375, "lower": 0.400390625}, {"text": "0.27", "confidence": 0.9770261919475443, "left": 0.495361328125, "upper": 0.422119140625, "right": 0.5439453125, "lower": 0.400390625}, {"text": "0.29", "confidence": 0.9942585752715364, "left": 0.63330078125, "upper": 0.42236328125, "right": 0.6806640625, "lower": 0.400390625}, {"text": "0.20", "confidence": 0.9972422449309013, "left": 0.76220703125, "upper": 0.42236328125, "right": 0.81005859375, "lower": 0.400634765625}, {"text": "0.18", "confidence": 0.9910378700402914, "left": 0.91357421875, "upper": 0.42236328125, "right": 0.96240234375, "lower": 0.400390625}, {"text": "65B", "confidence": 0.9833687099306082, "left": 0.2391357421875, "upper": 0.458984375, "right": 0.283935546875, "lower": 0.4365234375}, {"text": "0.37", "confidence": 0.9985199995857583, "left": 0.341796875, "upper": 0.458251953125, "right": 0.390380859375, "lower": 0.4365234375}, {"text": "0.27", "confidence": 0.9781837174489308, "left": 0.495361328125, "upper": 0.458251953125, "right": 0.5439453125, "lower": 0.4365234375}, {"text": "0.20", "confidence": 0.9959723809811365, "left": 0.6337890625, "upper": 0.458251953125, "right": 0.68115234375, "lower": 0.436767578125}, {"text": "0.30", "confidence": 0.9881508252015603, "left": 0.76220703125, "upper": 0.458251953125, "right": 0.81005859375, "lower": 0.4365234375}, {"text": "0.19", "confidence": 0.9870585272727334, "left": 0.9140625, "upper": 0.45849609375, "right": 0.9619140625, "lower": 0.43603515625}, {"text": "7B", "confidence": 0.9918677028011398, "left": 0.252197265625, "upper": 0.5087890625, "right": 0.283935546875, "lower": 0.488525390625}, {"text": "0.34", "confidence": 0.9989572858425726, "left": 0.341796875, "upper": 0.509765625, "right": 0.390625, "lower": 0.488037109375}, {"text": "0.28", "confidence": 0.9883034962034247, "left": 0.49560546875, "upper": 0.509765625, "right": 0.54443359375, "lower": 0.488037109375}, {"text": "0.30", "confidence": 0.9986882057244657, "left": 0.63330078125, "upper": 0.509765625, "right": 0.681640625, "lower": 0.488037109375}, {"text": "0.24", "confidence": 0.9975580736636244, "left": 0.76220703125, "upper": 0.509765625, "right": 0.810546875, "lower": 0.488037109375}, {"text": "0.16", "confidence": 0.9966634772643969, "left": 0.91357421875, "upper": 0.509765625, "right": 0.96240234375, "lower": 0.488037109375}, {"text": "13B", "confidence": 0.9669212205137284, "left": 0.2401123046875, "upper": 0.546875, "right": 0.283935546875, "lower": 0.52392578125}, {"text": "0.29", "confidence": 0.9931262832458065, "left": 0.341552734375, "upper": 0.54638671875, "right": 0.389404296875, "lower": 0.52392578125}, {"text": "0.33", "confidence": 0.956158281645661, "left": 0.4951171875, "upper": 0.546875, "right": 0.54345703125, "lower": 0.52392578125}, {"text": "0.35", "confidence": 0.9652406782267843, "left": 0.63330078125, "upper": 0.546875, "right": 0.681640625, "lower": 0.52392578125}, {"text": "0.33", "confidence": 0.9659962979238208, "left": 0.76171875, "upper": 0.54638671875, "right": 0.8095703125, "lower": 0.52392578125}, {"text": "0.19", "confidence": 0.9760530513096785, "left": 0.91357421875, "upper": 0.546875, "right": 0.9619140625, "lower": 0.52392578125}, {"text": "LLAMA 2", "confidence": 0.9230539093920775, "left": 0.027587890625, "upper": 0.56396484375, "right": 0.1273193359375, "lower": 0.5419921875}, {"text": "34B", "confidence": 0.9737839233037625, "left": 0.2391357421875, "upper": 0.5830078125, "right": 0.283935546875, "lower": 0.5615234375}, {"text": "0.31", "confidence": 0.9574870442103371, "left": 0.341796875, "upper": 0.5830078125, "right": 0.388427734375, "lower": 0.56103515625}, {"text": "0.24", "confidence": 0.9459550426122554, "left": 0.49560546875, "upper": 0.5830078125, "right": 0.54443359375, "lower": 0.5615234375}, {"text": "0.32", "confidence": 0.9652482034330485, "left": 0.63330078125, "upper": 0.5830078125, "right": 0.681640625, "lower": 0.56103515625}, {"text": "0.34", "confidence": 0.9732832840284351, "left": 0.76220703125, "upper": 0.5830078125, "right": 0.810546875, "lower": 0.56103515625}, {"text": "0.28", "confidence": 0.9533701700329483, "left": 0.9140625, "upper": 0.5830078125, "right": 0.9619140625, "lower": 0.56103515625}, {"text": "70B", "confidence": 0.9942666337733455, "left": 0.2384033203125, "upper": 0.6201171875, "right": 0.283935546875, "lower": 0.59814453125}, {"text": "0.42", "confidence": 0.9958264434154595, "left": 0.341796875, "upper": 0.61962890625, "right": 0.39013671875, "lower": 0.5986328125}, {"text": "0.29", "confidence": 0.9821117248018926, "left": 0.495361328125, "upper": 0.6201171875, "right": 0.54345703125, "lower": 0.59814453125}, {"text": "0.34", "confidence": 0.9966167427484915, "left": 0.63330078125, "upper": 0.6201171875, "right": 0.681640625, "lower": 0.59814453125}, {"text": "0.37", "confidence": 0.993297869415919, "left": 0.76220703125, "upper": 0.6201171875, "right": 0.810546875, "lower": 0.59765625}, {"text": "0.20", "confidence": 0.9876292530135815, "left": 0.9140625, "upper": 0.61962890625, "right": 0.96142578125, "lower": 0.59814453125}, {"text": "Fine-tuned", "confidence": 0.9953880606655975, "left": 0.0269317626953125, "upper": 0.67822265625, "right": 0.1640625, "lower": 0.654296875}, {"text": "ChatGPT", "confidence": 0.9969630013721966, "left": 0.0276641845703125, "upper": 0.728515625, "right": 0.1392822265625, "lower": 0.70556640625}, {"text": "0.19", "confidence": 0.9988893534997763, "left": 0.341552734375, "upper": 0.72802734375, "right": 0.390380859375, "lower": 0.70703125}, {"text": "0.16", "confidence": 0.9928872200942891, "left": 0.49560546875, "upper": 0.728515625, "right": 0.54443359375, "lower": 0.70703125}, {"text": "0.21", "confidence": 0.9989062915958875, "left": 0.63330078125, "upper": 0.72802734375, "right": 0.6796875, "lower": 0.70751953125}, {"text": "0.17", "confidence": 0.9985639827258815, "left": 0.76171875, "upper": 0.72802734375, "right": 0.81201171875, "lower": 0.70703125}, {"text": "0.17", "confidence": 0.9965368796068824, "left": 0.91357421875, "upper": 0.728515625, "right": 0.96337890625, "lower": 0.70703125}, {"text": "MPT-instruct", "confidence": 0.9989049972028129, "left": 0.0291900634765625, "upper": 0.76513671875, "right": 0.1865234375, "lower": 0.74365234375}, {"text": "7B", "confidence": 0.984694287266506, "left": 0.252197265625, "upper": 0.76416015625, "right": 0.283447265625, "lower": 0.74365234375}, {"text": "0.35", "confidence": 0.9991214853153084, "left": 0.341552734375, "upper": 0.7646484375, "right": 0.389892578125, "lower": 0.7431640625}, {"text": "0.29", "confidence": 0.974891251290882, "left": 0.495361328125, "upper": 0.76513671875, "right": 0.5439453125, "lower": 0.7431640625}, {"text": "0.33", "confidence": 0.9964501479832892, "left": 0.6328125, "upper": 0.76513671875, "right": 0.68115234375, "lower": 0.7431640625}, {"text": "0.41", "confidence": 0.9985691315320698, "left": 0.76220703125, "upper": 0.7646484375, "right": 0.80810546875, "lower": 0.7431640625}, {"text": "0.14", "confidence": 0.9964564748701925, "left": 0.91357421875, "upper": 0.76513671875, "right": 0.96240234375, "lower": 0.7431640625}, {"text": "Falcon-instruct", "confidence": 0.9571371356184732, "left": 0.02825927734375, "upper": 0.802734375, "right": 0.208740234375, "lower": 0.77978515625}, {"text": "7B", "confidence": 0.9665162081666453, "left": 0.251953125, "upper": 0.80126953125, "right": 0.283935546875, "lower": 0.7802734375}, {"text": "0.34", "confidence": 0.967693884403376, "left": 0.341552734375, "upper": 0.8017578125, "right": 0.390380859375, "lower": 0.779296875}, {"text": "0.26", "confidence": 0.9520253022323407, "left": 0.49560546875, "upper": 0.8017578125, "right": 0.5439453125, "lower": 0.77978515625}, {"text": "0.30", "confidence": 0.973102993656882, "left": 0.63330078125, "upper": 0.80224609375, "right": 0.68115234375, "lower": 0.77978515625}, {"text": "0.33", "confidence": 0.9718302160365343, "left": 0.76220703125, "upper": 0.8017578125, "right": 0.81005859375, "lower": 0.779296875}, {"text": "0.29", "confidence": 0.9756271335843325, "left": 0.9140625, "upper": 0.8017578125, "right": 0.96142578125, "lower": 0.779296875}, {"text": "7B", "confidence": 0.9913205707632803, "left": 0.252197265625, "upper": 0.85205078125, "right": 0.28369140625, "lower": 0.83154296875}, {"text": "0.55", "confidence": 0.9985149221176529, "left": 0.341796875, "upper": 0.85302734375, "right": 0.389892578125, "lower": 0.8310546875}, {"text": "0.50", "confidence": 0.976761350205919, "left": 0.495849609375, "upper": 0.85302734375, "right": 0.5439453125, "lower": 0.8310546875}, {"text": "0.48", "confidence": 0.9934171281591422, "left": 0.63330078125, "upper": 0.85302734375, "right": 0.681640625, "lower": 0.8310546875}, {"text": "0.45", "confidence": 0.998221423861148, "left": 0.7626953125, "upper": 0.85302734375, "right": 0.810546875, "lower": 0.8310546875}, {"text": "0.62", "confidence": 0.9944353516245061, "left": 0.91357421875, "upper": 0.853515625, "right": 0.96240234375, "lower": 0.83056640625}, {"text": "13B", "confidence": 0.965234102519708, "left": 0.2403564453125, "upper": 0.89013671875, "right": 0.283935546875, "lower": 0.86767578125}, {"text": "0.40", "confidence": 0.976861319747196, "left": 0.341552734375, "upper": 0.8896484375, "right": 0.389892578125, "lower": 0.86767578125}, {"text": "0.50", "confidence": 0.9791019811977275, "left": 0.495361328125, "upper": 0.89013671875, "right": 0.5439453125, "lower": 0.86767578125}, {"text": "0.71", "confidence": 0.9548464227134962, "left": 0.63330078125, "upper": 0.8896484375, "right": 0.6796875, "lower": 0.86767578125}, {"text": "0.40", "confidence": 0.9743567645608693, "left": 0.76220703125, "upper": 0.89013671875, "right": 0.81005859375, "lower": 0.86767578125}, {"text": "0.62", "confidence": 0.9542199227339931, "left": 0.91357421875, "upper": 0.89013671875, "right": 0.9619140625, "lower": 0.86767578125}, {"text": "LLAMA 2-CHAT", "confidence": 0.9861827644038366, "left": 0.027557373046875, "upper": 0.90673828125, "right": 0.1982421875, "lower": 0.88525390625}, {"text": "34B", "confidence": 0.9589954561669843, "left": 0.2391357421875, "upper": 0.92578125, "right": 0.2841796875, "lower": 0.9033203125}, {"text": "0.44", "confidence": 0.9674675641607965, "left": 0.341796875, "upper": 0.92578125, "right": 0.39013671875, "lower": 0.9033203125}, {"text": "0.54", "confidence": 0.9736240446020379, "left": 0.49560546875, "upper": 0.92578125, "right": 0.54443359375, "lower": 0.9033203125}, {"text": "0.63", "confidence": 0.9760913134904838, "left": 0.63330078125, "upper": 0.92578125, "right": 0.68115234375, "lower": 0.9033203125}, {"text": "0.53", "confidence": 0.9796015180729337, "left": 0.76220703125, "upper": 0.92578125, "right": 0.81005859375, "lower": 0.9033203125}, {"text": "0.53", "confidence": 0.9752167869237847, "left": 0.91357421875, "upper": 0.92626953125, "right": 0.96142578125, "lower": 0.9033203125}, {"text": "70B", "confidence": 0.9937974655878223, "left": 0.238525390625, "upper": 0.96240234375, "right": 0.283935546875, "lower": 0.94091796875}, {"text": "0.47", "confidence": 0.9898123335197536, "left": 0.341796875, "upper": 0.96240234375, "right": 0.390625, "lower": 0.9404296875}, {"text": "0.52", "confidence": 0.9714987532522628, "left": 0.495361328125, "upper": 0.96240234375, "right": 0.54443359375, "lower": 0.93994140625}, {"text": "0.50", "confidence": 0.9964541225349073, "left": 0.63330078125, "upper": 0.962890625, "right": 0.68115234375, "lower": 0.9404296875}, {"text": "0.55", "confidence": 0.980007828744104, "left": 0.76171875, "upper": 0.962890625, "right": 0.810546875, "lower": 0.93994140625}, {"text": "0.50", "confidence": 0.9833386444653252, "left": 0.9140625, "upper": 0.96240234375, "right": 0.9619140625, "lower": 0.9404296875}]

post_processing/__pycache__/page_elt_pp.cpython-312.pyc ADDED Viewed

Binary file (11.6 kB). View file

post_processing/__pycache__/table_struct_pp.cpython-312.pyc ADDED Viewed

Binary file (10.4 kB). View file

post_processing/__pycache__/text_pp.cpython-312.pyc ADDED Viewed

Binary file (10.6 kB). View file

post_processing/__pycache__/wbf.cpython-312.pyc ADDED Viewed

Binary file (14.7 kB). View file

post_processing/table_struct_pp.py CHANGED Viewed

	@@ -1 +1,222 @@
1	- # ~~TODO~~

+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+import re
+from typing import List, Union, Optional, Literal
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+def assign_boxes(
+    box: Union[List[float], npt.NDArray[np.float64]],
+    candidate_boxes: npt.NDArray[np.float64],
+    delta: float = 2.0,
+    min_overlap: float = 0.25,
+    mode: Literal["cell", "row", "column"] = "cell",
+) -> npt.NDArray[np.int_]:
+    """
+    Assigns the best candidate boxes to a reference `box` based on overlap.
+    If mode is "cell", the overlap is calculated using surface area overlap.
+    If mode is "row", the overlap is calculated using row height overlap.
+    If mode is "column", the overlap is calculated using column width overlap.
+    If delta > 1, it will look for multiple matches,
+    using candidates with score >= max_overlap / delta.
+    Args:
+        box (list or numpy.ndarray): Reference bounding box [x_min, y_min, x_max, y_max].
+        candidate_boxes (numpy.ndarray [N, 4]): Array of candidate bounding boxes.
+        delta (float, optional): Factor for matches relative to the best overlap. Defaults to 2.0.
+        min_overlap (float, optional): Minimum required overlap for a match. Defaults to 0.25.
+        mode (str, optional): Mode to assign boxes ("cell", "row", or "column"). Defaults to "cell".
+    Returns:
+        numpy.ndarray [M]: Indices of the matched boxes sorted by decreasing overlap.
+                          Returns an empty array if no matches are found.
+    """
+    if not len(candidate_boxes):
+        return np.array([], dtype=np.int_)
+    x0_1, y0_1, x1_1, y1_1 = box
+    x0_2, y0_2, x1_2, y1_2 = (
+        candidate_boxes[:, 0],
+        candidate_boxes[:, 1],
+        candidate_boxes[:, 2],
+        candidate_boxes[:, 3],
+    )
+    # Intersection
+    inter_y0 = np.maximum(y0_1, y0_2)
+    inter_y1 = np.minimum(y1_1, y1_2)
+    inter_x0 = np.maximum(x0_1, x0_2)
+    inter_x1 = np.minimum(x1_1, x1_2)
+    if mode == "cell":
+        inter_area = np.maximum(0, inter_y1 - inter_y0) * np.maximum(0, inter_x1 - inter_x0)
+        box_area = (y1_1 - y0_1) * (x1_1 - x0_1)
+        overlap = inter_area / (box_area + 1e-6)
+    elif mode == "row":
+        inter_area = np.maximum(0, inter_y1 - inter_y0)
+        box_area = y1_1 - y0_1
+        overlap = inter_area / (box_area + 1e-6)
+    elif mode == "column":
+        inter_area = np.maximum(0, inter_x1 - inter_x0)
+        box_area = x1_1 - x0_1
+        overlap = inter_area / (box_area + 1e-6)
+    else:
+        raise ValueError(f"Invalid mode: {mode}")
+    max_overlap = np.max(overlap)
+    if max_overlap <= min_overlap:  # No match
+        return np.array([], dtype=np.int_)
+    n = len(np.where(overlap >= (max_overlap / delta))[0]) if delta > 1 else 1
+    matches = np.argsort(-overlap)[:n]
+    return matches
+def merge_text_in_cell(df_cell: pd.DataFrame) -> pd.DataFrame:
+    """
+    Merges text from multiple rows into a single cell and recalculates its bounding box.
+    Values are sorted by rounded (y, x) coordinates.
+    Args:
+        df_cell (pandas.DataFrame): DataFrame containing cells to merge.
+    Returns:
+        pandas.DataFrame: Updated DataFrame with merged text and a single bounding box.
+    """
+    boxes = np.stack(df_cell["box"].values)
+    df_cell["x"] = (boxes[:, 0] - boxes[:, 0].min()) // 10
+    df_cell["y"] = (boxes[:, 1] - boxes[:, 1].min()) // 10
+    df_cell = df_cell.sort_values(["y", "x"])
+    text = " ".join(df_cell["text"].values.tolist())
+    df_cell["text"] = text
+    df_cell = df_cell.head(1)
+    df_cell["box"] = df_cell["cell"]
+    df_cell.drop(["x", "y"], axis=1, inplace=True)
+    return df_cell
+def remove_empty_row(mat: List[List[str]]) -> List[List[str]]:
+    """
+    Remove empty rows from a matrix.
+    Args:
+        mat (list[list]): The matrix to remove empty rows from.
+    Returns:
+        list[list]: The matrix with empty rows removed.
+    """
+    mat_filter = []
+    for row in mat:
+        if max([len(c) for c in row]):
+            mat_filter.append(row)
+    return mat_filter
+def build_markdown(
+    df: pd.DataFrame,
+    remove_empty: bool = True,
+    n_rows: Optional[int] = None,
+    repeat_single: bool = False,
+) -> Union[List[List[str]], npt.NDArray[np.str_]]:
+    """
+    Convert a dataframe into a markdown table.
+    Args:
+        df (pandas.DataFrame): The dataframe to convert with columns 'col_ids',
+            'row_ids', and 'text'.
+        remove_empty (bool, optional): Whether to remove empty rows & cols. Defaults to True.
+        n_rows (int, optional): Number of rows. Inferred from df if None. Defaults to None.
+        repeat_single (bool, optional): Whether to repeat single element in rows.
+            Defaults to False.
+    Returns:
+        list[list[str]] or numpy.ndarray: A list of lists or array representing the markdown table.
+    """
+    df = df.reset_index(drop=True)
+    n_cols = max([np.max(c) for c in df['col_ids'].values])
+    if n_rows is None:
+        n_rows = max([np.max(c) for c in df['row_ids'].values])
+    else:
+        n_rows = max(
+            n_rows - 1,
+            max([np.max(c) for c in df['row_ids'].values])
+        )
+    mat = np.empty((n_rows + 1, n_cols + 1), dtype=str).tolist()
+    for i in range(len(df)):
+        if isinstance(df["row_ids"][i], int) or isinstance(df["col_ids"][i], int):
+            continue
+        for r in df["row_ids"][i]:
+            for c in df["col_ids"][i]:
+                mat[r][c] = (mat[r][c] + " " + df["text"][i]).strip()
+    # Remove empty rows & columns
+    if remove_empty:
+        mat = remove_empty_row(mat)
+        mat = np.array(remove_empty_row(np.array(mat).T.tolist())).T.tolist()
+    if repeat_single:
+        new_mat = []
+        for row in mat:
+            if sum([len(c) > 0 for c in row]) == 1:
+                txt = [c for c in row if len(c)][0]
+                new_mat.append([txt for _ in range(len(row))])
+            else:
+                new_mat.append(row)
+        mat = np.array(new_mat)
+    return mat
+def display_markdown(
+    data: List[List[str]], show: bool = True, use_header: bool = True
+) -> str:
+    """
+    Convert a list of lists of strings into a markdown table.
+    If show is True, use_header will be set to True.
+    Args:
+        data (list[list[str]]): The table data. The first sublist should contain headers.
+        show (bool, optional): Whether to display the table. Defaults to True.
+        use_header (bool, optional): Whether to use the first sublist as headers. Defaults to True.
+    Returns:
+        str: A markdown-formatted table as a string.
+    """
+    if show:
+        use_header = True
+        data = [[re.sub(r'\n', ' ', c) for c in row] for row in data]
+    if not len(data):
+        return "EMPTY TABLE"
+    max_cols = max(len(row) for row in data)
+    data = [row + [""] * (max_cols - len(row)) for row in data]
+    if use_header:
+        header = "| " + " | ".join(data[0]) + " |"
+        separator = "| " + " | ".join(["---"] * max_cols) + " |"
+        body = "\n".join("| " + " | ".join(row) + " |" for row in data[1:])
+        markdown_table = (
+            f"{header}\n{separator}\n{body}" if body else f"{header}\n{separator}"
+        )
+        if show:
+            from IPython.display import display, Markdown
+            markdown_table = re.sub(r'\$', r'\\$', markdown_table)
+            markdown_table = re.sub(r'\%', r'\\%', markdown_table)
+            display(Markdown(markdown_table))
+    else:
+        markdown_table = "\n".join("| " + " | ".join(row) + " |" for row in data)
+    return markdown_table