Theo Viel
commited on
Commit
·
a82013d
1
Parent(s):
9b6b8b8
post-processing code
Browse files- Demo.ipynb +2 -2
- README.md +3 -8
- example-ocr.json +1 -0
- post_processing/__pycache__/page_elt_pp.cpython-312.pyc +0 -0
- post_processing/__pycache__/table_struct_pp.cpython-312.pyc +0 -0
- post_processing/__pycache__/text_pp.cpython-312.pyc +0 -0
- post_processing/__pycache__/wbf.cpython-312.pyc +0 -0
- post_processing/table_struct_pp.py +222 -1
Demo.ipynb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e799b29f13e7dba664c2e5e9af5866cc10c7fc847fdd32295348cc78cdf9d13f
|
| 3 |
+
size 1085057
|
README.md
CHANGED
|
@@ -14,7 +14,7 @@ tags:
|
|
| 14 |
- ingestion
|
| 15 |
- yolox
|
| 16 |
---
|
| 17 |
-
# Nemoretriever
|
| 18 |
|
| 19 |
## **Model Overview**
|
| 20 |
|
|
@@ -144,7 +144,7 @@ import matplotlib.pyplot as plt
|
|
| 144 |
from PIL import Image
|
| 145 |
|
| 146 |
from model import define_model
|
| 147 |
-
from utils import plot_sample,
|
| 148 |
|
| 149 |
# Load image
|
| 150 |
path = "./example.png"
|
|
@@ -182,7 +182,7 @@ If you wish to do additional training, [refer to the original repo](https://gith
|
|
| 182 |
3. Advanced post-processing
|
| 183 |
|
| 184 |
Additional post-processing might be required to use the model as part of a data extraction pipeline.
|
| 185 |
-
We
|
| 186 |
|
| 187 |
**Disclaimer:**
|
| 188 |
We are aware of some issues with the model, and will provide a v2 with improved performance in the future which addresses the following issues:
|
|
@@ -240,11 +240,6 @@ The primary evaluation set is a cut of the Azure labels and digital corpora imag
|
|
| 240 |
| row | 76.992 | 81.115 |
|
| 241 |
| column | 85.293 | 87.434 |
|
| 242 |
|
| 243 |
-
## Inference:
|
| 244 |
-
|
| 245 |
-
**Acceleartion Engine**: TensorRT <br>
|
| 246 |
-
**Test hardware**: See [Support Matrix from NIM documentation](https://docs.nvidia.com/nim/ingestion/object-detection/latest/support-matrix.html#)
|
| 247 |
-
|
| 248 |
<!---
|
| 249 |
## Inference:
|
| 250 |
|
|
|
|
| 14 |
- ingestion
|
| 15 |
- yolox
|
| 16 |
---
|
| 17 |
+
# Nemoretriever Table Structure v1
|
| 18 |
|
| 19 |
## **Model Overview**
|
| 20 |
|
|
|
|
| 144 |
from PIL import Image
|
| 145 |
|
| 146 |
from model import define_model
|
| 147 |
+
from utils import plot_sample, postprocess_preds_table_structure, reformat_for_plotting
|
| 148 |
|
| 149 |
# Load image
|
| 150 |
path = "./example.png"
|
|
|
|
| 182 |
3. Advanced post-processing
|
| 183 |
|
| 184 |
Additional post-processing might be required to use the model as part of a data extraction pipeline.
|
| 185 |
+
We show how to use the model as part of a table to text pipeline alongside with the [Nemo Retriever OCR](https://huggingface.co/nvidia/nemoretriever-ocr-v1) in the notebook `Demo.ipynb`.
|
| 186 |
|
| 187 |
**Disclaimer:**
|
| 188 |
We are aware of some issues with the model, and will provide a v2 with improved performance in the future which addresses the following issues:
|
|
|
|
| 240 |
| row | 76.992 | 81.115 |
|
| 241 |
| column | 85.293 | 87.434 |
|
| 242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
<!---
|
| 244 |
## Inference:
|
| 245 |
|
example-ocr.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[{"text": "Judaism", "confidence": 0.9761894491491112, "left": 0.313720703125, "upper": 0.069580078125, "right": 0.41552734375, "lower": 0.04180908203125}, {"text": "Christianity", "confidence": 0.9864673460063902, "left": 0.449462890625, "upper": 0.07061767578125, "right": 0.591796875, "lower": 0.041900634765625}, {"text": "Islam", "confidence": 0.9741032144189129, "left": 0.62353515625, "upper": 0.06451416015625, "right": 0.69091796875, "lower": 0.042205810546875}, {"text": "Buddhism", "confidence": 0.9820241379034715, "left": 0.72412109375, "upper": 0.0650634765625, "right": 0.849609375, "lower": 0.04180908203125}, {"text": "Sikhism", "confidence": 0.970345573215551, "left": 0.880859375, "upper": 0.064697265625, "right": 0.978515625, "lower": 0.0421142578125}, {"text": "Pretrained", "confidence": 0.9936134942698354, "left": 0.0272369384765625, "upper": 0.12359619140625, "right": 0.1575927734375, "lower": 0.09942626953125}, {"text": "MPT", "confidence": 0.9913080780677679, "left": 0.0268707275390625, "upper": 0.1907958984375, "right": 0.08831787109375, "lower": 0.1685791015625}, {"text": "7B", "confidence": 0.9905469329639989, "left": 0.251953125, "upper": 0.1732177734375, "right": 0.28369140625, "lower": 0.153076171875}, {"text": "0.39", "confidence": 0.9985185665573967, "left": 0.34130859375, "upper": 0.1739501953125, "right": 0.3896484375, "lower": 0.1522216796875}, {"text": "0.38", "confidence": 0.9893707907998098, "left": 0.495361328125, "upper": 0.174072265625, "right": 0.54443359375, "lower": 0.15234375}, {"text": "0.31", "confidence": 0.9989534774832673, "left": 0.6328125, "upper": 0.173828125, "right": 0.6796875, "lower": 0.15234375}, {"text": "0.27", "confidence": 0.9956375751248465, "left": 0.76171875, "upper": 0.173583984375, "right": 0.81103515625, "lower": 0.15234375}, {"text": "0.07", "confidence": 0.9942783306227302, "left": 0.91357421875, "upper": 0.173828125, "right": 0.96337890625, "lower": 0.1522216796875}, {"text": "30B", "confidence": 0.972883248728015, "left": 0.239013671875, "upper": 0.2109375, "right": 0.283935546875, "lower": 0.1885986328125}, {"text": "0.33", "confidence": 0.998774187312769, "left": 0.34130859375, "upper": 0.21044921875, "right": 0.3896484375, "lower": 0.188720703125}, {"text": "0.28", "confidence": 0.9836438141378729, "left": 0.49560546875, "upper": 0.210205078125, "right": 0.5439453125, "lower": 0.18896484375}, {"text": "0.20", "confidence": 0.997989728503616, "left": 0.6337890625, "upper": 0.2103271484375, "right": 0.68115234375, "lower": 0.18896484375}, {"text": "0.30", "confidence": 0.9981678680694565, "left": 0.76220703125, "upper": 0.210693359375, "right": 0.81005859375, "lower": 0.1888427734375}, {"text": "0.19", "confidence": 0.9872296722567225, "left": 0.91357421875, "upper": 0.2105712890625, "right": 0.9619140625, "lower": 0.1884765625}, {"text": "7B", "confidence": 0.9876452154809514, "left": 0.252197265625, "upper": 0.2607421875, "right": 0.28369140625, "lower": 0.240234375}, {"text": "0.25", "confidence": 0.9975937481432596, "left": 0.341796875, "upper": 0.26123046875, "right": 0.39013671875, "lower": 0.239501953125}, {"text": "0.35", "confidence": 0.9917687898647339, "left": 0.49560546875, "upper": 0.261474609375, "right": 0.54443359375, "lower": 0.2392578125}, {"text": "0.20", "confidence": 0.9984690982188885, "left": 0.6337890625, "upper": 0.261474609375, "right": 0.68115234375, "lower": 0.2396240234375}, {"text": "0.25", "confidence": 0.997957530803141, "left": 0.76220703125, "upper": 0.26123046875, "right": 0.810546875, "lower": 0.2393798828125}, {"text": "0.22", "confidence": 0.9882444380230272, "left": 0.9140625, "upper": 0.26123046875, "right": 0.96240234375, "lower": 0.23974609375}, {"text": "Falcon", "confidence": 0.9981243450662549, "left": 0.0273590087890625, "upper": 0.2783203125, "right": 0.10601806640625, "lower": 0.256591796875}, {"text": "40B", "confidence": 0.9982407007689418, "left": 0.2391357421875, "upper": 0.2978515625, "right": 0.283935546875, "lower": 0.2763671875}, {"text": "0.26", "confidence": 0.9985793226619016, "left": 0.341796875, "upper": 0.2978515625, "right": 0.389892578125, "lower": 0.276123046875}, {"text": "0.28", "confidence": 0.9923507332532226, "left": 0.495849609375, "upper": 0.2978515625, "right": 0.5439453125, "lower": 0.276123046875}, {"text": "0.26", "confidence": 0.9970813602397094, "left": 0.6337890625, "upper": 0.2978515625, "right": 0.68115234375, "lower": 0.276123046875}, {"text": "0.31", "confidence": 0.998671661567936, "left": 0.76220703125, "upper": 0.297607421875, "right": 0.80859375, "lower": 0.276123046875}, {"text": "0.19", "confidence": 0.9882519618141666, "left": 0.9140625, "upper": 0.297607421875, "right": 0.9619140625, "lower": 0.27587890625}, {"text": "7B", "confidence": 0.9918499357919858, "left": 0.252197265625, "upper": 0.3486328125, "right": 0.28369140625, "lower": 0.328369140625}, {"text": "0.37", "confidence": 0.9981141371642034, "left": 0.341552734375, "upper": 0.34912109375, "right": 0.390625, "lower": 0.327392578125}, {"text": "0.30", "confidence": 0.9863860868688986, "left": 0.49560546875, "upper": 0.349609375, "right": 0.5439453125, "lower": 0.32763671875}, {"text": "0.24", "confidence": 0.9976063742143298, "left": 0.63330078125, "upper": 0.348876953125, "right": 0.681640625, "lower": 0.32763671875}, {"text": "0.38", "confidence": 0.9990392507749052, "left": 0.76220703125, "upper": 0.349609375, "right": 0.810546875, "lower": 0.327392578125}, {"text": "0.17", "confidence": 0.9958182080744692, "left": 0.91357421875, "upper": 0.349365234375, "right": 0.96337890625, "lower": 0.32763671875}, {"text": "13B", "confidence": 0.9907532088107024, "left": 0.240234375, "upper": 0.385986328125, "right": 0.2841796875, "lower": 0.364013671875}, {"text": "0.36", "confidence": 0.9990900539759036, "left": 0.341552734375, "upper": 0.385986328125, "right": 0.390380859375, "lower": 0.364013671875}, {"text": "0.26", "confidence": 0.9967458970288499, "left": 0.495849609375, "upper": 0.3857421875, "right": 0.54443359375, "lower": 0.364013671875}, {"text": "0.30", "confidence": 0.9983706373299477, "left": 0.63330078125, "upper": 0.385986328125, "right": 0.681640625, "lower": 0.364013671875}, {"text": "0.37", "confidence": 0.9976059303527898, "left": 0.76220703125, "upper": 0.3857421875, "right": 0.810546875, "lower": 0.363525390625}, {"text": "0.13", "confidence": 0.989847993170092, "left": 0.91357421875, "upper": 0.385986328125, "right": 0.9619140625, "lower": 0.36376953125}, {"text": "LLAMA 1", "confidence": 0.8968659330804545, "left": 0.0275421142578125, "upper": 0.403076171875, "right": 0.125, "lower": 0.381591796875}, {"text": "33B", "confidence": 0.9954893218906407, "left": 0.2391357421875, "upper": 0.422607421875, "right": 0.2841796875, "lower": 0.400634765625}, {"text": "0.35", "confidence": 0.9987272973382014, "left": 0.341796875, "upper": 0.42236328125, "right": 0.3896484375, "lower": 0.400390625}, {"text": "0.27", "confidence": 0.9770261919475443, "left": 0.495361328125, "upper": 0.422119140625, "right": 0.5439453125, "lower": 0.400390625}, {"text": "0.29", "confidence": 0.9942585752715364, "left": 0.63330078125, "upper": 0.42236328125, "right": 0.6806640625, "lower": 0.400390625}, {"text": "0.20", "confidence": 0.9972422449309013, "left": 0.76220703125, "upper": 0.42236328125, "right": 0.81005859375, "lower": 0.400634765625}, {"text": "0.18", "confidence": 0.9910378700402914, "left": 0.91357421875, "upper": 0.42236328125, "right": 0.96240234375, "lower": 0.400390625}, {"text": "65B", "confidence": 0.9833687099306082, "left": 0.2391357421875, "upper": 0.458984375, "right": 0.283935546875, "lower": 0.4365234375}, {"text": "0.37", "confidence": 0.9985199995857583, "left": 0.341796875, "upper": 0.458251953125, "right": 0.390380859375, "lower": 0.4365234375}, {"text": "0.27", "confidence": 0.9781837174489308, "left": 0.495361328125, "upper": 0.458251953125, "right": 0.5439453125, "lower": 0.4365234375}, {"text": "0.20", "confidence": 0.9959723809811365, "left": 0.6337890625, "upper": 0.458251953125, "right": 0.68115234375, "lower": 0.436767578125}, {"text": "0.30", "confidence": 0.9881508252015603, "left": 0.76220703125, "upper": 0.458251953125, "right": 0.81005859375, "lower": 0.4365234375}, {"text": "0.19", "confidence": 0.9870585272727334, "left": 0.9140625, "upper": 0.45849609375, "right": 0.9619140625, "lower": 0.43603515625}, {"text": "7B", "confidence": 0.9918677028011398, "left": 0.252197265625, "upper": 0.5087890625, "right": 0.283935546875, "lower": 0.488525390625}, {"text": "0.34", "confidence": 0.9989572858425726, "left": 0.341796875, "upper": 0.509765625, "right": 0.390625, "lower": 0.488037109375}, {"text": "0.28", "confidence": 0.9883034962034247, "left": 0.49560546875, "upper": 0.509765625, "right": 0.54443359375, "lower": 0.488037109375}, {"text": "0.30", "confidence": 0.9986882057244657, "left": 0.63330078125, "upper": 0.509765625, "right": 0.681640625, "lower": 0.488037109375}, {"text": "0.24", "confidence": 0.9975580736636244, "left": 0.76220703125, "upper": 0.509765625, "right": 0.810546875, "lower": 0.488037109375}, {"text": "0.16", "confidence": 0.9966634772643969, "left": 0.91357421875, "upper": 0.509765625, "right": 0.96240234375, "lower": 0.488037109375}, {"text": "13B", "confidence": 0.9669212205137284, "left": 0.2401123046875, "upper": 0.546875, "right": 0.283935546875, "lower": 0.52392578125}, {"text": "0.29", "confidence": 0.9931262832458065, "left": 0.341552734375, "upper": 0.54638671875, "right": 0.389404296875, "lower": 0.52392578125}, {"text": "0.33", "confidence": 0.956158281645661, "left": 0.4951171875, "upper": 0.546875, "right": 0.54345703125, "lower": 0.52392578125}, {"text": "0.35", "confidence": 0.9652406782267843, "left": 0.63330078125, "upper": 0.546875, "right": 0.681640625, "lower": 0.52392578125}, {"text": "0.33", "confidence": 0.9659962979238208, "left": 0.76171875, "upper": 0.54638671875, "right": 0.8095703125, "lower": 0.52392578125}, {"text": "0.19", "confidence": 0.9760530513096785, "left": 0.91357421875, "upper": 0.546875, "right": 0.9619140625, "lower": 0.52392578125}, {"text": "LLAMA 2", "confidence": 0.9230539093920775, "left": 0.027587890625, "upper": 0.56396484375, "right": 0.1273193359375, "lower": 0.5419921875}, {"text": "34B", "confidence": 0.9737839233037625, "left": 0.2391357421875, "upper": 0.5830078125, "right": 0.283935546875, "lower": 0.5615234375}, {"text": "0.31", "confidence": 0.9574870442103371, "left": 0.341796875, "upper": 0.5830078125, "right": 0.388427734375, "lower": 0.56103515625}, {"text": "0.24", "confidence": 0.9459550426122554, "left": 0.49560546875, "upper": 0.5830078125, "right": 0.54443359375, "lower": 0.5615234375}, {"text": "0.32", "confidence": 0.9652482034330485, "left": 0.63330078125, "upper": 0.5830078125, "right": 0.681640625, "lower": 0.56103515625}, {"text": "0.34", "confidence": 0.9732832840284351, "left": 0.76220703125, "upper": 0.5830078125, "right": 0.810546875, "lower": 0.56103515625}, {"text": "0.28", "confidence": 0.9533701700329483, "left": 0.9140625, "upper": 0.5830078125, "right": 0.9619140625, "lower": 0.56103515625}, {"text": "70B", "confidence": 0.9942666337733455, "left": 0.2384033203125, "upper": 0.6201171875, "right": 0.283935546875, "lower": 0.59814453125}, {"text": "0.42", "confidence": 0.9958264434154595, "left": 0.341796875, "upper": 0.61962890625, "right": 0.39013671875, "lower": 0.5986328125}, {"text": "0.29", "confidence": 0.9821117248018926, "left": 0.495361328125, "upper": 0.6201171875, "right": 0.54345703125, "lower": 0.59814453125}, {"text": "0.34", "confidence": 0.9966167427484915, "left": 0.63330078125, "upper": 0.6201171875, "right": 0.681640625, "lower": 0.59814453125}, {"text": "0.37", "confidence": 0.993297869415919, "left": 0.76220703125, "upper": 0.6201171875, "right": 0.810546875, "lower": 0.59765625}, {"text": "0.20", "confidence": 0.9876292530135815, "left": 0.9140625, "upper": 0.61962890625, "right": 0.96142578125, "lower": 0.59814453125}, {"text": "Fine-tuned", "confidence": 0.9953880606655975, "left": 0.0269317626953125, "upper": 0.67822265625, "right": 0.1640625, "lower": 0.654296875}, {"text": "ChatGPT", "confidence": 0.9969630013721966, "left": 0.0276641845703125, "upper": 0.728515625, "right": 0.1392822265625, "lower": 0.70556640625}, {"text": "0.19", "confidence": 0.9988893534997763, "left": 0.341552734375, "upper": 0.72802734375, "right": 0.390380859375, "lower": 0.70703125}, {"text": "0.16", "confidence": 0.9928872200942891, "left": 0.49560546875, "upper": 0.728515625, "right": 0.54443359375, "lower": 0.70703125}, {"text": "0.21", "confidence": 0.9989062915958875, "left": 0.63330078125, "upper": 0.72802734375, "right": 0.6796875, "lower": 0.70751953125}, {"text": "0.17", "confidence": 0.9985639827258815, "left": 0.76171875, "upper": 0.72802734375, "right": 0.81201171875, "lower": 0.70703125}, {"text": "0.17", "confidence": 0.9965368796068824, "left": 0.91357421875, "upper": 0.728515625, "right": 0.96337890625, "lower": 0.70703125}, {"text": "MPT-instruct", "confidence": 0.9989049972028129, "left": 0.0291900634765625, "upper": 0.76513671875, "right": 0.1865234375, "lower": 0.74365234375}, {"text": "7B", "confidence": 0.984694287266506, "left": 0.252197265625, "upper": 0.76416015625, "right": 0.283447265625, "lower": 0.74365234375}, {"text": "0.35", "confidence": 0.9991214853153084, "left": 0.341552734375, "upper": 0.7646484375, "right": 0.389892578125, "lower": 0.7431640625}, {"text": "0.29", "confidence": 0.974891251290882, "left": 0.495361328125, "upper": 0.76513671875, "right": 0.5439453125, "lower": 0.7431640625}, {"text": "0.33", "confidence": 0.9964501479832892, "left": 0.6328125, "upper": 0.76513671875, "right": 0.68115234375, "lower": 0.7431640625}, {"text": "0.41", "confidence": 0.9985691315320698, "left": 0.76220703125, "upper": 0.7646484375, "right": 0.80810546875, "lower": 0.7431640625}, {"text": "0.14", "confidence": 0.9964564748701925, "left": 0.91357421875, "upper": 0.76513671875, "right": 0.96240234375, "lower": 0.7431640625}, {"text": "Falcon-instruct", "confidence": 0.9571371356184732, "left": 0.02825927734375, "upper": 0.802734375, "right": 0.208740234375, "lower": 0.77978515625}, {"text": "7B", "confidence": 0.9665162081666453, "left": 0.251953125, "upper": 0.80126953125, "right": 0.283935546875, "lower": 0.7802734375}, {"text": "0.34", "confidence": 0.967693884403376, "left": 0.341552734375, "upper": 0.8017578125, "right": 0.390380859375, "lower": 0.779296875}, {"text": "0.26", "confidence": 0.9520253022323407, "left": 0.49560546875, "upper": 0.8017578125, "right": 0.5439453125, "lower": 0.77978515625}, {"text": "0.30", "confidence": 0.973102993656882, "left": 0.63330078125, "upper": 0.80224609375, "right": 0.68115234375, "lower": 0.77978515625}, {"text": "0.33", "confidence": 0.9718302160365343, "left": 0.76220703125, "upper": 0.8017578125, "right": 0.81005859375, "lower": 0.779296875}, {"text": "0.29", "confidence": 0.9756271335843325, "left": 0.9140625, "upper": 0.8017578125, "right": 0.96142578125, "lower": 0.779296875}, {"text": "7B", "confidence": 0.9913205707632803, "left": 0.252197265625, "upper": 0.85205078125, "right": 0.28369140625, "lower": 0.83154296875}, {"text": "0.55", "confidence": 0.9985149221176529, "left": 0.341796875, "upper": 0.85302734375, "right": 0.389892578125, "lower": 0.8310546875}, {"text": "0.50", "confidence": 0.976761350205919, "left": 0.495849609375, "upper": 0.85302734375, "right": 0.5439453125, "lower": 0.8310546875}, {"text": "0.48", "confidence": 0.9934171281591422, "left": 0.63330078125, "upper": 0.85302734375, "right": 0.681640625, "lower": 0.8310546875}, {"text": "0.45", "confidence": 0.998221423861148, "left": 0.7626953125, "upper": 0.85302734375, "right": 0.810546875, "lower": 0.8310546875}, {"text": "0.62", "confidence": 0.9944353516245061, "left": 0.91357421875, "upper": 0.853515625, "right": 0.96240234375, "lower": 0.83056640625}, {"text": "13B", "confidence": 0.965234102519708, "left": 0.2403564453125, "upper": 0.89013671875, "right": 0.283935546875, "lower": 0.86767578125}, {"text": "0.40", "confidence": 0.976861319747196, "left": 0.341552734375, "upper": 0.8896484375, "right": 0.389892578125, "lower": 0.86767578125}, {"text": "0.50", "confidence": 0.9791019811977275, "left": 0.495361328125, "upper": 0.89013671875, "right": 0.5439453125, "lower": 0.86767578125}, {"text": "0.71", "confidence": 0.9548464227134962, "left": 0.63330078125, "upper": 0.8896484375, "right": 0.6796875, "lower": 0.86767578125}, {"text": "0.40", "confidence": 0.9743567645608693, "left": 0.76220703125, "upper": 0.89013671875, "right": 0.81005859375, "lower": 0.86767578125}, {"text": "0.62", "confidence": 0.9542199227339931, "left": 0.91357421875, "upper": 0.89013671875, "right": 0.9619140625, "lower": 0.86767578125}, {"text": "LLAMA 2-CHAT", "confidence": 0.9861827644038366, "left": 0.027557373046875, "upper": 0.90673828125, "right": 0.1982421875, "lower": 0.88525390625}, {"text": "34B", "confidence": 0.9589954561669843, "left": 0.2391357421875, "upper": 0.92578125, "right": 0.2841796875, "lower": 0.9033203125}, {"text": "0.44", "confidence": 0.9674675641607965, "left": 0.341796875, "upper": 0.92578125, "right": 0.39013671875, "lower": 0.9033203125}, {"text": "0.54", "confidence": 0.9736240446020379, "left": 0.49560546875, "upper": 0.92578125, "right": 0.54443359375, "lower": 0.9033203125}, {"text": "0.63", "confidence": 0.9760913134904838, "left": 0.63330078125, "upper": 0.92578125, "right": 0.68115234375, "lower": 0.9033203125}, {"text": "0.53", "confidence": 0.9796015180729337, "left": 0.76220703125, "upper": 0.92578125, "right": 0.81005859375, "lower": 0.9033203125}, {"text": "0.53", "confidence": 0.9752167869237847, "left": 0.91357421875, "upper": 0.92626953125, "right": 0.96142578125, "lower": 0.9033203125}, {"text": "70B", "confidence": 0.9937974655878223, "left": 0.238525390625, "upper": 0.96240234375, "right": 0.283935546875, "lower": 0.94091796875}, {"text": "0.47", "confidence": 0.9898123335197536, "left": 0.341796875, "upper": 0.96240234375, "right": 0.390625, "lower": 0.9404296875}, {"text": "0.52", "confidence": 0.9714987532522628, "left": 0.495361328125, "upper": 0.96240234375, "right": 0.54443359375, "lower": 0.93994140625}, {"text": "0.50", "confidence": 0.9964541225349073, "left": 0.63330078125, "upper": 0.962890625, "right": 0.68115234375, "lower": 0.9404296875}, {"text": "0.55", "confidence": 0.980007828744104, "left": 0.76171875, "upper": 0.962890625, "right": 0.810546875, "lower": 0.93994140625}, {"text": "0.50", "confidence": 0.9833386444653252, "left": 0.9140625, "upper": 0.96240234375, "right": 0.9619140625, "lower": 0.9404296875}]
|
post_processing/__pycache__/page_elt_pp.cpython-312.pyc
ADDED
|
Binary file (11.6 kB). View file
|
|
|
post_processing/__pycache__/table_struct_pp.cpython-312.pyc
ADDED
|
Binary file (10.4 kB). View file
|
|
|
post_processing/__pycache__/text_pp.cpython-312.pyc
ADDED
|
Binary file (10.6 kB). View file
|
|
|
post_processing/__pycache__/wbf.cpython-312.pyc
ADDED
|
Binary file (14.7 kB). View file
|
|
|
post_processing/table_struct_pp.py
CHANGED
|
@@ -1 +1,222 @@
|
|
| 1 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 2 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 3 |
+
|
| 4 |
+
import re
|
| 5 |
+
from typing import List, Union, Optional, Literal
|
| 6 |
+
import numpy as np
|
| 7 |
+
import numpy.typing as npt
|
| 8 |
+
import pandas as pd
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def assign_boxes(
|
| 12 |
+
box: Union[List[float], npt.NDArray[np.float64]],
|
| 13 |
+
candidate_boxes: npt.NDArray[np.float64],
|
| 14 |
+
delta: float = 2.0,
|
| 15 |
+
min_overlap: float = 0.25,
|
| 16 |
+
mode: Literal["cell", "row", "column"] = "cell",
|
| 17 |
+
) -> npt.NDArray[np.int_]:
|
| 18 |
+
"""
|
| 19 |
+
Assigns the best candidate boxes to a reference `box` based on overlap.
|
| 20 |
+
|
| 21 |
+
If mode is "cell", the overlap is calculated using surface area overlap.
|
| 22 |
+
If mode is "row", the overlap is calculated using row height overlap.
|
| 23 |
+
If mode is "column", the overlap is calculated using column width overlap.
|
| 24 |
+
|
| 25 |
+
If delta > 1, it will look for multiple matches,
|
| 26 |
+
using candidates with score >= max_overlap / delta.
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
box (list or numpy.ndarray): Reference bounding box [x_min, y_min, x_max, y_max].
|
| 30 |
+
candidate_boxes (numpy.ndarray [N, 4]): Array of candidate bounding boxes.
|
| 31 |
+
delta (float, optional): Factor for matches relative to the best overlap. Defaults to 2.0.
|
| 32 |
+
min_overlap (float, optional): Minimum required overlap for a match. Defaults to 0.25.
|
| 33 |
+
mode (str, optional): Mode to assign boxes ("cell", "row", or "column"). Defaults to "cell".
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
numpy.ndarray [M]: Indices of the matched boxes sorted by decreasing overlap.
|
| 37 |
+
Returns an empty array if no matches are found.
|
| 38 |
+
"""
|
| 39 |
+
if not len(candidate_boxes):
|
| 40 |
+
return np.array([], dtype=np.int_)
|
| 41 |
+
|
| 42 |
+
x0_1, y0_1, x1_1, y1_1 = box
|
| 43 |
+
x0_2, y0_2, x1_2, y1_2 = (
|
| 44 |
+
candidate_boxes[:, 0],
|
| 45 |
+
candidate_boxes[:, 1],
|
| 46 |
+
candidate_boxes[:, 2],
|
| 47 |
+
candidate_boxes[:, 3],
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# Intersection
|
| 51 |
+
inter_y0 = np.maximum(y0_1, y0_2)
|
| 52 |
+
inter_y1 = np.minimum(y1_1, y1_2)
|
| 53 |
+
inter_x0 = np.maximum(x0_1, x0_2)
|
| 54 |
+
inter_x1 = np.minimum(x1_1, x1_2)
|
| 55 |
+
|
| 56 |
+
if mode == "cell":
|
| 57 |
+
inter_area = np.maximum(0, inter_y1 - inter_y0) * np.maximum(0, inter_x1 - inter_x0)
|
| 58 |
+
box_area = (y1_1 - y0_1) * (x1_1 - x0_1)
|
| 59 |
+
overlap = inter_area / (box_area + 1e-6)
|
| 60 |
+
elif mode == "row":
|
| 61 |
+
inter_area = np.maximum(0, inter_y1 - inter_y0)
|
| 62 |
+
box_area = y1_1 - y0_1
|
| 63 |
+
overlap = inter_area / (box_area + 1e-6)
|
| 64 |
+
elif mode == "column":
|
| 65 |
+
inter_area = np.maximum(0, inter_x1 - inter_x0)
|
| 66 |
+
box_area = x1_1 - x0_1
|
| 67 |
+
overlap = inter_area / (box_area + 1e-6)
|
| 68 |
+
else:
|
| 69 |
+
raise ValueError(f"Invalid mode: {mode}")
|
| 70 |
+
|
| 71 |
+
max_overlap = np.max(overlap)
|
| 72 |
+
if max_overlap <= min_overlap: # No match
|
| 73 |
+
return np.array([], dtype=np.int_)
|
| 74 |
+
|
| 75 |
+
n = len(np.where(overlap >= (max_overlap / delta))[0]) if delta > 1 else 1
|
| 76 |
+
matches = np.argsort(-overlap)[:n]
|
| 77 |
+
return matches
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def merge_text_in_cell(df_cell: pd.DataFrame) -> pd.DataFrame:
|
| 81 |
+
"""
|
| 82 |
+
Merges text from multiple rows into a single cell and recalculates its bounding box.
|
| 83 |
+
Values are sorted by rounded (y, x) coordinates.
|
| 84 |
+
|
| 85 |
+
Args:
|
| 86 |
+
df_cell (pandas.DataFrame): DataFrame containing cells to merge.
|
| 87 |
+
|
| 88 |
+
Returns:
|
| 89 |
+
pandas.DataFrame: Updated DataFrame with merged text and a single bounding box.
|
| 90 |
+
"""
|
| 91 |
+
boxes = np.stack(df_cell["box"].values)
|
| 92 |
+
|
| 93 |
+
df_cell["x"] = (boxes[:, 0] - boxes[:, 0].min()) // 10
|
| 94 |
+
df_cell["y"] = (boxes[:, 1] - boxes[:, 1].min()) // 10
|
| 95 |
+
df_cell = df_cell.sort_values(["y", "x"])
|
| 96 |
+
|
| 97 |
+
text = " ".join(df_cell["text"].values.tolist())
|
| 98 |
+
df_cell["text"] = text
|
| 99 |
+
df_cell = df_cell.head(1)
|
| 100 |
+
df_cell["box"] = df_cell["cell"]
|
| 101 |
+
df_cell.drop(["x", "y"], axis=1, inplace=True)
|
| 102 |
+
|
| 103 |
+
return df_cell
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def remove_empty_row(mat: List[List[str]]) -> List[List[str]]:
|
| 107 |
+
"""
|
| 108 |
+
Remove empty rows from a matrix.
|
| 109 |
+
|
| 110 |
+
Args:
|
| 111 |
+
mat (list[list]): The matrix to remove empty rows from.
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
list[list]: The matrix with empty rows removed.
|
| 115 |
+
"""
|
| 116 |
+
mat_filter = []
|
| 117 |
+
for row in mat:
|
| 118 |
+
if max([len(c) for c in row]):
|
| 119 |
+
mat_filter.append(row)
|
| 120 |
+
return mat_filter
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def build_markdown(
|
| 124 |
+
df: pd.DataFrame,
|
| 125 |
+
remove_empty: bool = True,
|
| 126 |
+
n_rows: Optional[int] = None,
|
| 127 |
+
repeat_single: bool = False,
|
| 128 |
+
) -> Union[List[List[str]], npt.NDArray[np.str_]]:
|
| 129 |
+
"""
|
| 130 |
+
Convert a dataframe into a markdown table.
|
| 131 |
+
|
| 132 |
+
Args:
|
| 133 |
+
df (pandas.DataFrame): The dataframe to convert with columns 'col_ids',
|
| 134 |
+
'row_ids', and 'text'.
|
| 135 |
+
remove_empty (bool, optional): Whether to remove empty rows & cols. Defaults to True.
|
| 136 |
+
n_rows (int, optional): Number of rows. Inferred from df if None. Defaults to None.
|
| 137 |
+
repeat_single (bool, optional): Whether to repeat single element in rows.
|
| 138 |
+
Defaults to False.
|
| 139 |
+
|
| 140 |
+
Returns:
|
| 141 |
+
list[list[str]] or numpy.ndarray: A list of lists or array representing the markdown table.
|
| 142 |
+
"""
|
| 143 |
+
df = df.reset_index(drop=True)
|
| 144 |
+
n_cols = max([np.max(c) for c in df['col_ids'].values])
|
| 145 |
+
if n_rows is None:
|
| 146 |
+
n_rows = max([np.max(c) for c in df['row_ids'].values])
|
| 147 |
+
else:
|
| 148 |
+
n_rows = max(
|
| 149 |
+
n_rows - 1,
|
| 150 |
+
max([np.max(c) for c in df['row_ids'].values])
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
mat = np.empty((n_rows + 1, n_cols + 1), dtype=str).tolist()
|
| 154 |
+
|
| 155 |
+
for i in range(len(df)):
|
| 156 |
+
if isinstance(df["row_ids"][i], int) or isinstance(df["col_ids"][i], int):
|
| 157 |
+
continue
|
| 158 |
+
for r in df["row_ids"][i]:
|
| 159 |
+
for c in df["col_ids"][i]:
|
| 160 |
+
mat[r][c] = (mat[r][c] + " " + df["text"][i]).strip()
|
| 161 |
+
|
| 162 |
+
# Remove empty rows & columns
|
| 163 |
+
if remove_empty:
|
| 164 |
+
mat = remove_empty_row(mat)
|
| 165 |
+
mat = np.array(remove_empty_row(np.array(mat).T.tolist())).T.tolist()
|
| 166 |
+
|
| 167 |
+
if repeat_single:
|
| 168 |
+
new_mat = []
|
| 169 |
+
for row in mat:
|
| 170 |
+
if sum([len(c) > 0 for c in row]) == 1:
|
| 171 |
+
txt = [c for c in row if len(c)][0]
|
| 172 |
+
new_mat.append([txt for _ in range(len(row))])
|
| 173 |
+
else:
|
| 174 |
+
new_mat.append(row)
|
| 175 |
+
mat = np.array(new_mat)
|
| 176 |
+
|
| 177 |
+
return mat
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def display_markdown(
|
| 181 |
+
data: List[List[str]], show: bool = True, use_header: bool = True
|
| 182 |
+
) -> str:
|
| 183 |
+
"""
|
| 184 |
+
Convert a list of lists of strings into a markdown table.
|
| 185 |
+
If show is True, use_header will be set to True.
|
| 186 |
+
|
| 187 |
+
Args:
|
| 188 |
+
data (list[list[str]]): The table data. The first sublist should contain headers.
|
| 189 |
+
show (bool, optional): Whether to display the table. Defaults to True.
|
| 190 |
+
use_header (bool, optional): Whether to use the first sublist as headers. Defaults to True.
|
| 191 |
+
|
| 192 |
+
Returns:
|
| 193 |
+
str: A markdown-formatted table as a string.
|
| 194 |
+
"""
|
| 195 |
+
if show:
|
| 196 |
+
use_header = True
|
| 197 |
+
data = [[re.sub(r'\n', ' ', c) for c in row] for row in data]
|
| 198 |
+
|
| 199 |
+
if not len(data):
|
| 200 |
+
return "EMPTY TABLE"
|
| 201 |
+
|
| 202 |
+
max_cols = max(len(row) for row in data)
|
| 203 |
+
data = [row + [""] * (max_cols - len(row)) for row in data]
|
| 204 |
+
|
| 205 |
+
if use_header:
|
| 206 |
+
header = "| " + " | ".join(data[0]) + " |"
|
| 207 |
+
separator = "| " + " | ".join(["---"] * max_cols) + " |"
|
| 208 |
+
body = "\n".join("| " + " | ".join(row) + " |" for row in data[1:])
|
| 209 |
+
markdown_table = (
|
| 210 |
+
f"{header}\n{separator}\n{body}" if body else f"{header}\n{separator}"
|
| 211 |
+
)
|
| 212 |
+
|
| 213 |
+
if show:
|
| 214 |
+
from IPython.display import display, Markdown
|
| 215 |
+
markdown_table = re.sub(r'\$', r'\\$', markdown_table)
|
| 216 |
+
markdown_table = re.sub(r'\%', r'\\%', markdown_table)
|
| 217 |
+
display(Markdown(markdown_table))
|
| 218 |
+
|
| 219 |
+
else:
|
| 220 |
+
markdown_table = "\n".join("| " + " | ".join(row) + " |" for row in data)
|
| 221 |
+
|
| 222 |
+
return markdown_table
|