Theo Viel commited on
Commit
a82013d
·
1 Parent(s): 9b6b8b8

post-processing code

Browse files
Demo.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e656cf3a473450457a118dcee7f0c65db9167b9aab09554cb3247f6ee1ebf3ec
3
- size 779791
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e799b29f13e7dba664c2e5e9af5866cc10c7fc847fdd32295348cc78cdf9d13f
3
+ size 1085057
README.md CHANGED
@@ -14,7 +14,7 @@ tags:
14
  - ingestion
15
  - yolox
16
  ---
17
- # Nemoretriever Graphic Element v1
18
 
19
  ## **Model Overview**
20
 
@@ -144,7 +144,7 @@ import matplotlib.pyplot as plt
144
  from PIL import Image
145
 
146
  from model import define_model
147
- from utils import plot_sample, postprocess_preds_graphic_element, reformat_for_plotting
148
 
149
  # Load image
150
  path = "./example.png"
@@ -182,7 +182,7 @@ If you wish to do additional training, [refer to the original repo](https://gith
182
  3. Advanced post-processing
183
 
184
  Additional post-processing might be required to use the model as part of a data extraction pipeline.
185
- We provide examples in the notebook `Demo.ipynb`.
186
 
187
  **Disclaimer:**
188
  We are aware of some issues with the model, and will provide a v2 with improved performance in the future which addresses the following issues:
@@ -240,11 +240,6 @@ The primary evaluation set is a cut of the Azure labels and digital corpora imag
240
  | row | 76.992 | 81.115 |
241
  | column | 85.293 | 87.434 |
242
 
243
- ## Inference:
244
-
245
- **Acceleartion Engine**: TensorRT <br>
246
- **Test hardware**: See [Support Matrix from NIM documentation](https://docs.nvidia.com/nim/ingestion/object-detection/latest/support-matrix.html#)
247
-
248
  <!---
249
  ## Inference:
250
 
 
14
  - ingestion
15
  - yolox
16
  ---
17
+ # Nemoretriever Table Structure v1
18
 
19
  ## **Model Overview**
20
 
 
144
  from PIL import Image
145
 
146
  from model import define_model
147
+ from utils import plot_sample, postprocess_preds_table_structure, reformat_for_plotting
148
 
149
  # Load image
150
  path = "./example.png"
 
182
  3. Advanced post-processing
183
 
184
  Additional post-processing might be required to use the model as part of a data extraction pipeline.
185
+ We show how to use the model as part of a table to text pipeline alongside with the [Nemo Retriever OCR](https://huggingface.co/nvidia/nemoretriever-ocr-v1) in the notebook `Demo.ipynb`.
186
 
187
  **Disclaimer:**
188
  We are aware of some issues with the model, and will provide a v2 with improved performance in the future which addresses the following issues:
 
240
  | row | 76.992 | 81.115 |
241
  | column | 85.293 | 87.434 |
242
 
 
 
 
 
 
243
  <!---
244
  ## Inference:
245
 
example-ocr.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"text": "Judaism", "confidence": 0.9761894491491112, "left": 0.313720703125, "upper": 0.069580078125, "right": 0.41552734375, "lower": 0.04180908203125}, {"text": "Christianity", "confidence": 0.9864673460063902, "left": 0.449462890625, "upper": 0.07061767578125, "right": 0.591796875, "lower": 0.041900634765625}, {"text": "Islam", "confidence": 0.9741032144189129, "left": 0.62353515625, "upper": 0.06451416015625, "right": 0.69091796875, "lower": 0.042205810546875}, {"text": "Buddhism", "confidence": 0.9820241379034715, "left": 0.72412109375, "upper": 0.0650634765625, "right": 0.849609375, "lower": 0.04180908203125}, {"text": "Sikhism", "confidence": 0.970345573215551, "left": 0.880859375, "upper": 0.064697265625, "right": 0.978515625, "lower": 0.0421142578125}, {"text": "Pretrained", "confidence": 0.9936134942698354, "left": 0.0272369384765625, "upper": 0.12359619140625, "right": 0.1575927734375, "lower": 0.09942626953125}, {"text": "MPT", "confidence": 0.9913080780677679, "left": 0.0268707275390625, "upper": 0.1907958984375, "right": 0.08831787109375, "lower": 0.1685791015625}, {"text": "7B", "confidence": 0.9905469329639989, "left": 0.251953125, "upper": 0.1732177734375, "right": 0.28369140625, "lower": 0.153076171875}, {"text": "0.39", "confidence": 0.9985185665573967, "left": 0.34130859375, "upper": 0.1739501953125, "right": 0.3896484375, "lower": 0.1522216796875}, {"text": "0.38", "confidence": 0.9893707907998098, "left": 0.495361328125, "upper": 0.174072265625, "right": 0.54443359375, "lower": 0.15234375}, {"text": "0.31", "confidence": 0.9989534774832673, "left": 0.6328125, "upper": 0.173828125, "right": 0.6796875, "lower": 0.15234375}, {"text": "0.27", "confidence": 0.9956375751248465, "left": 0.76171875, "upper": 0.173583984375, "right": 0.81103515625, "lower": 0.15234375}, {"text": "0.07", "confidence": 0.9942783306227302, "left": 0.91357421875, "upper": 0.173828125, "right": 0.96337890625, "lower": 0.1522216796875}, {"text": "30B", "confidence": 0.972883248728015, "left": 0.239013671875, "upper": 0.2109375, "right": 0.283935546875, "lower": 0.1885986328125}, {"text": "0.33", "confidence": 0.998774187312769, "left": 0.34130859375, "upper": 0.21044921875, "right": 0.3896484375, "lower": 0.188720703125}, {"text": "0.28", "confidence": 0.9836438141378729, "left": 0.49560546875, "upper": 0.210205078125, "right": 0.5439453125, "lower": 0.18896484375}, {"text": "0.20", "confidence": 0.997989728503616, "left": 0.6337890625, "upper": 0.2103271484375, "right": 0.68115234375, "lower": 0.18896484375}, {"text": "0.30", "confidence": 0.9981678680694565, "left": 0.76220703125, "upper": 0.210693359375, "right": 0.81005859375, "lower": 0.1888427734375}, {"text": "0.19", "confidence": 0.9872296722567225, "left": 0.91357421875, "upper": 0.2105712890625, "right": 0.9619140625, "lower": 0.1884765625}, {"text": "7B", "confidence": 0.9876452154809514, "left": 0.252197265625, "upper": 0.2607421875, "right": 0.28369140625, "lower": 0.240234375}, {"text": "0.25", "confidence": 0.9975937481432596, "left": 0.341796875, "upper": 0.26123046875, "right": 0.39013671875, "lower": 0.239501953125}, {"text": "0.35", "confidence": 0.9917687898647339, "left": 0.49560546875, "upper": 0.261474609375, "right": 0.54443359375, "lower": 0.2392578125}, {"text": "0.20", "confidence": 0.9984690982188885, "left": 0.6337890625, "upper": 0.261474609375, "right": 0.68115234375, "lower": 0.2396240234375}, {"text": "0.25", "confidence": 0.997957530803141, "left": 0.76220703125, "upper": 0.26123046875, "right": 0.810546875, "lower": 0.2393798828125}, {"text": "0.22", "confidence": 0.9882444380230272, "left": 0.9140625, "upper": 0.26123046875, "right": 0.96240234375, "lower": 0.23974609375}, {"text": "Falcon", "confidence": 0.9981243450662549, "left": 0.0273590087890625, "upper": 0.2783203125, "right": 0.10601806640625, "lower": 0.256591796875}, {"text": "40B", "confidence": 0.9982407007689418, "left": 0.2391357421875, "upper": 0.2978515625, "right": 0.283935546875, "lower": 0.2763671875}, {"text": "0.26", "confidence": 0.9985793226619016, "left": 0.341796875, "upper": 0.2978515625, "right": 0.389892578125, "lower": 0.276123046875}, {"text": "0.28", "confidence": 0.9923507332532226, "left": 0.495849609375, "upper": 0.2978515625, "right": 0.5439453125, "lower": 0.276123046875}, {"text": "0.26", "confidence": 0.9970813602397094, "left": 0.6337890625, "upper": 0.2978515625, "right": 0.68115234375, "lower": 0.276123046875}, {"text": "0.31", "confidence": 0.998671661567936, "left": 0.76220703125, "upper": 0.297607421875, "right": 0.80859375, "lower": 0.276123046875}, {"text": "0.19", "confidence": 0.9882519618141666, "left": 0.9140625, "upper": 0.297607421875, "right": 0.9619140625, "lower": 0.27587890625}, {"text": "7B", "confidence": 0.9918499357919858, "left": 0.252197265625, "upper": 0.3486328125, "right": 0.28369140625, "lower": 0.328369140625}, {"text": "0.37", "confidence": 0.9981141371642034, "left": 0.341552734375, "upper": 0.34912109375, "right": 0.390625, "lower": 0.327392578125}, {"text": "0.30", "confidence": 0.9863860868688986, "left": 0.49560546875, "upper": 0.349609375, "right": 0.5439453125, "lower": 0.32763671875}, {"text": "0.24", "confidence": 0.9976063742143298, "left": 0.63330078125, "upper": 0.348876953125, "right": 0.681640625, "lower": 0.32763671875}, {"text": "0.38", "confidence": 0.9990392507749052, "left": 0.76220703125, "upper": 0.349609375, "right": 0.810546875, "lower": 0.327392578125}, {"text": "0.17", "confidence": 0.9958182080744692, "left": 0.91357421875, "upper": 0.349365234375, "right": 0.96337890625, "lower": 0.32763671875}, {"text": "13B", "confidence": 0.9907532088107024, "left": 0.240234375, "upper": 0.385986328125, "right": 0.2841796875, "lower": 0.364013671875}, {"text": "0.36", "confidence": 0.9990900539759036, "left": 0.341552734375, "upper": 0.385986328125, "right": 0.390380859375, "lower": 0.364013671875}, {"text": "0.26", "confidence": 0.9967458970288499, "left": 0.495849609375, "upper": 0.3857421875, "right": 0.54443359375, "lower": 0.364013671875}, {"text": "0.30", "confidence": 0.9983706373299477, "left": 0.63330078125, "upper": 0.385986328125, "right": 0.681640625, "lower": 0.364013671875}, {"text": "0.37", "confidence": 0.9976059303527898, "left": 0.76220703125, "upper": 0.3857421875, "right": 0.810546875, "lower": 0.363525390625}, {"text": "0.13", "confidence": 0.989847993170092, "left": 0.91357421875, "upper": 0.385986328125, "right": 0.9619140625, "lower": 0.36376953125}, {"text": "LLAMA 1", "confidence": 0.8968659330804545, "left": 0.0275421142578125, "upper": 0.403076171875, "right": 0.125, "lower": 0.381591796875}, {"text": "33B", "confidence": 0.9954893218906407, "left": 0.2391357421875, "upper": 0.422607421875, "right": 0.2841796875, "lower": 0.400634765625}, {"text": "0.35", "confidence": 0.9987272973382014, "left": 0.341796875, "upper": 0.42236328125, "right": 0.3896484375, "lower": 0.400390625}, {"text": "0.27", "confidence": 0.9770261919475443, "left": 0.495361328125, "upper": 0.422119140625, "right": 0.5439453125, "lower": 0.400390625}, {"text": "0.29", "confidence": 0.9942585752715364, "left": 0.63330078125, "upper": 0.42236328125, "right": 0.6806640625, "lower": 0.400390625}, {"text": "0.20", "confidence": 0.9972422449309013, "left": 0.76220703125, "upper": 0.42236328125, "right": 0.81005859375, "lower": 0.400634765625}, {"text": "0.18", "confidence": 0.9910378700402914, "left": 0.91357421875, "upper": 0.42236328125, "right": 0.96240234375, "lower": 0.400390625}, {"text": "65B", "confidence": 0.9833687099306082, "left": 0.2391357421875, "upper": 0.458984375, "right": 0.283935546875, "lower": 0.4365234375}, {"text": "0.37", "confidence": 0.9985199995857583, "left": 0.341796875, "upper": 0.458251953125, "right": 0.390380859375, "lower": 0.4365234375}, {"text": "0.27", "confidence": 0.9781837174489308, "left": 0.495361328125, "upper": 0.458251953125, "right": 0.5439453125, "lower": 0.4365234375}, {"text": "0.20", "confidence": 0.9959723809811365, "left": 0.6337890625, "upper": 0.458251953125, "right": 0.68115234375, "lower": 0.436767578125}, {"text": "0.30", "confidence": 0.9881508252015603, "left": 0.76220703125, "upper": 0.458251953125, "right": 0.81005859375, "lower": 0.4365234375}, {"text": "0.19", "confidence": 0.9870585272727334, "left": 0.9140625, "upper": 0.45849609375, "right": 0.9619140625, "lower": 0.43603515625}, {"text": "7B", "confidence": 0.9918677028011398, "left": 0.252197265625, "upper": 0.5087890625, "right": 0.283935546875, "lower": 0.488525390625}, {"text": "0.34", "confidence": 0.9989572858425726, "left": 0.341796875, "upper": 0.509765625, "right": 0.390625, "lower": 0.488037109375}, {"text": "0.28", "confidence": 0.9883034962034247, "left": 0.49560546875, "upper": 0.509765625, "right": 0.54443359375, "lower": 0.488037109375}, {"text": "0.30", "confidence": 0.9986882057244657, "left": 0.63330078125, "upper": 0.509765625, "right": 0.681640625, "lower": 0.488037109375}, {"text": "0.24", "confidence": 0.9975580736636244, "left": 0.76220703125, "upper": 0.509765625, "right": 0.810546875, "lower": 0.488037109375}, {"text": "0.16", "confidence": 0.9966634772643969, "left": 0.91357421875, "upper": 0.509765625, "right": 0.96240234375, "lower": 0.488037109375}, {"text": "13B", "confidence": 0.9669212205137284, "left": 0.2401123046875, "upper": 0.546875, "right": 0.283935546875, "lower": 0.52392578125}, {"text": "0.29", "confidence": 0.9931262832458065, "left": 0.341552734375, "upper": 0.54638671875, "right": 0.389404296875, "lower": 0.52392578125}, {"text": "0.33", "confidence": 0.956158281645661, "left": 0.4951171875, "upper": 0.546875, "right": 0.54345703125, "lower": 0.52392578125}, {"text": "0.35", "confidence": 0.9652406782267843, "left": 0.63330078125, "upper": 0.546875, "right": 0.681640625, "lower": 0.52392578125}, {"text": "0.33", "confidence": 0.9659962979238208, "left": 0.76171875, "upper": 0.54638671875, "right": 0.8095703125, "lower": 0.52392578125}, {"text": "0.19", "confidence": 0.9760530513096785, "left": 0.91357421875, "upper": 0.546875, "right": 0.9619140625, "lower": 0.52392578125}, {"text": "LLAMA 2", "confidence": 0.9230539093920775, "left": 0.027587890625, "upper": 0.56396484375, "right": 0.1273193359375, "lower": 0.5419921875}, {"text": "34B", "confidence": 0.9737839233037625, "left": 0.2391357421875, "upper": 0.5830078125, "right": 0.283935546875, "lower": 0.5615234375}, {"text": "0.31", "confidence": 0.9574870442103371, "left": 0.341796875, "upper": 0.5830078125, "right": 0.388427734375, "lower": 0.56103515625}, {"text": "0.24", "confidence": 0.9459550426122554, "left": 0.49560546875, "upper": 0.5830078125, "right": 0.54443359375, "lower": 0.5615234375}, {"text": "0.32", "confidence": 0.9652482034330485, "left": 0.63330078125, "upper": 0.5830078125, "right": 0.681640625, "lower": 0.56103515625}, {"text": "0.34", "confidence": 0.9732832840284351, "left": 0.76220703125, "upper": 0.5830078125, "right": 0.810546875, "lower": 0.56103515625}, {"text": "0.28", "confidence": 0.9533701700329483, "left": 0.9140625, "upper": 0.5830078125, "right": 0.9619140625, "lower": 0.56103515625}, {"text": "70B", "confidence": 0.9942666337733455, "left": 0.2384033203125, "upper": 0.6201171875, "right": 0.283935546875, "lower": 0.59814453125}, {"text": "0.42", "confidence": 0.9958264434154595, "left": 0.341796875, "upper": 0.61962890625, "right": 0.39013671875, "lower": 0.5986328125}, {"text": "0.29", "confidence": 0.9821117248018926, "left": 0.495361328125, "upper": 0.6201171875, "right": 0.54345703125, "lower": 0.59814453125}, {"text": "0.34", "confidence": 0.9966167427484915, "left": 0.63330078125, "upper": 0.6201171875, "right": 0.681640625, "lower": 0.59814453125}, {"text": "0.37", "confidence": 0.993297869415919, "left": 0.76220703125, "upper": 0.6201171875, "right": 0.810546875, "lower": 0.59765625}, {"text": "0.20", "confidence": 0.9876292530135815, "left": 0.9140625, "upper": 0.61962890625, "right": 0.96142578125, "lower": 0.59814453125}, {"text": "Fine-tuned", "confidence": 0.9953880606655975, "left": 0.0269317626953125, "upper": 0.67822265625, "right": 0.1640625, "lower": 0.654296875}, {"text": "ChatGPT", "confidence": 0.9969630013721966, "left": 0.0276641845703125, "upper": 0.728515625, "right": 0.1392822265625, "lower": 0.70556640625}, {"text": "0.19", "confidence": 0.9988893534997763, "left": 0.341552734375, "upper": 0.72802734375, "right": 0.390380859375, "lower": 0.70703125}, {"text": "0.16", "confidence": 0.9928872200942891, "left": 0.49560546875, "upper": 0.728515625, "right": 0.54443359375, "lower": 0.70703125}, {"text": "0.21", "confidence": 0.9989062915958875, "left": 0.63330078125, "upper": 0.72802734375, "right": 0.6796875, "lower": 0.70751953125}, {"text": "0.17", "confidence": 0.9985639827258815, "left": 0.76171875, "upper": 0.72802734375, "right": 0.81201171875, "lower": 0.70703125}, {"text": "0.17", "confidence": 0.9965368796068824, "left": 0.91357421875, "upper": 0.728515625, "right": 0.96337890625, "lower": 0.70703125}, {"text": "MPT-instruct", "confidence": 0.9989049972028129, "left": 0.0291900634765625, "upper": 0.76513671875, "right": 0.1865234375, "lower": 0.74365234375}, {"text": "7B", "confidence": 0.984694287266506, "left": 0.252197265625, "upper": 0.76416015625, "right": 0.283447265625, "lower": 0.74365234375}, {"text": "0.35", "confidence": 0.9991214853153084, "left": 0.341552734375, "upper": 0.7646484375, "right": 0.389892578125, "lower": 0.7431640625}, {"text": "0.29", "confidence": 0.974891251290882, "left": 0.495361328125, "upper": 0.76513671875, "right": 0.5439453125, "lower": 0.7431640625}, {"text": "0.33", "confidence": 0.9964501479832892, "left": 0.6328125, "upper": 0.76513671875, "right": 0.68115234375, "lower": 0.7431640625}, {"text": "0.41", "confidence": 0.9985691315320698, "left": 0.76220703125, "upper": 0.7646484375, "right": 0.80810546875, "lower": 0.7431640625}, {"text": "0.14", "confidence": 0.9964564748701925, "left": 0.91357421875, "upper": 0.76513671875, "right": 0.96240234375, "lower": 0.7431640625}, {"text": "Falcon-instruct", "confidence": 0.9571371356184732, "left": 0.02825927734375, "upper": 0.802734375, "right": 0.208740234375, "lower": 0.77978515625}, {"text": "7B", "confidence": 0.9665162081666453, "left": 0.251953125, "upper": 0.80126953125, "right": 0.283935546875, "lower": 0.7802734375}, {"text": "0.34", "confidence": 0.967693884403376, "left": 0.341552734375, "upper": 0.8017578125, "right": 0.390380859375, "lower": 0.779296875}, {"text": "0.26", "confidence": 0.9520253022323407, "left": 0.49560546875, "upper": 0.8017578125, "right": 0.5439453125, "lower": 0.77978515625}, {"text": "0.30", "confidence": 0.973102993656882, "left": 0.63330078125, "upper": 0.80224609375, "right": 0.68115234375, "lower": 0.77978515625}, {"text": "0.33", "confidence": 0.9718302160365343, "left": 0.76220703125, "upper": 0.8017578125, "right": 0.81005859375, "lower": 0.779296875}, {"text": "0.29", "confidence": 0.9756271335843325, "left": 0.9140625, "upper": 0.8017578125, "right": 0.96142578125, "lower": 0.779296875}, {"text": "7B", "confidence": 0.9913205707632803, "left": 0.252197265625, "upper": 0.85205078125, "right": 0.28369140625, "lower": 0.83154296875}, {"text": "0.55", "confidence": 0.9985149221176529, "left": 0.341796875, "upper": 0.85302734375, "right": 0.389892578125, "lower": 0.8310546875}, {"text": "0.50", "confidence": 0.976761350205919, "left": 0.495849609375, "upper": 0.85302734375, "right": 0.5439453125, "lower": 0.8310546875}, {"text": "0.48", "confidence": 0.9934171281591422, "left": 0.63330078125, "upper": 0.85302734375, "right": 0.681640625, "lower": 0.8310546875}, {"text": "0.45", "confidence": 0.998221423861148, "left": 0.7626953125, "upper": 0.85302734375, "right": 0.810546875, "lower": 0.8310546875}, {"text": "0.62", "confidence": 0.9944353516245061, "left": 0.91357421875, "upper": 0.853515625, "right": 0.96240234375, "lower": 0.83056640625}, {"text": "13B", "confidence": 0.965234102519708, "left": 0.2403564453125, "upper": 0.89013671875, "right": 0.283935546875, "lower": 0.86767578125}, {"text": "0.40", "confidence": 0.976861319747196, "left": 0.341552734375, "upper": 0.8896484375, "right": 0.389892578125, "lower": 0.86767578125}, {"text": "0.50", "confidence": 0.9791019811977275, "left": 0.495361328125, "upper": 0.89013671875, "right": 0.5439453125, "lower": 0.86767578125}, {"text": "0.71", "confidence": 0.9548464227134962, "left": 0.63330078125, "upper": 0.8896484375, "right": 0.6796875, "lower": 0.86767578125}, {"text": "0.40", "confidence": 0.9743567645608693, "left": 0.76220703125, "upper": 0.89013671875, "right": 0.81005859375, "lower": 0.86767578125}, {"text": "0.62", "confidence": 0.9542199227339931, "left": 0.91357421875, "upper": 0.89013671875, "right": 0.9619140625, "lower": 0.86767578125}, {"text": "LLAMA 2-CHAT", "confidence": 0.9861827644038366, "left": 0.027557373046875, "upper": 0.90673828125, "right": 0.1982421875, "lower": 0.88525390625}, {"text": "34B", "confidence": 0.9589954561669843, "left": 0.2391357421875, "upper": 0.92578125, "right": 0.2841796875, "lower": 0.9033203125}, {"text": "0.44", "confidence": 0.9674675641607965, "left": 0.341796875, "upper": 0.92578125, "right": 0.39013671875, "lower": 0.9033203125}, {"text": "0.54", "confidence": 0.9736240446020379, "left": 0.49560546875, "upper": 0.92578125, "right": 0.54443359375, "lower": 0.9033203125}, {"text": "0.63", "confidence": 0.9760913134904838, "left": 0.63330078125, "upper": 0.92578125, "right": 0.68115234375, "lower": 0.9033203125}, {"text": "0.53", "confidence": 0.9796015180729337, "left": 0.76220703125, "upper": 0.92578125, "right": 0.81005859375, "lower": 0.9033203125}, {"text": "0.53", "confidence": 0.9752167869237847, "left": 0.91357421875, "upper": 0.92626953125, "right": 0.96142578125, "lower": 0.9033203125}, {"text": "70B", "confidence": 0.9937974655878223, "left": 0.238525390625, "upper": 0.96240234375, "right": 0.283935546875, "lower": 0.94091796875}, {"text": "0.47", "confidence": 0.9898123335197536, "left": 0.341796875, "upper": 0.96240234375, "right": 0.390625, "lower": 0.9404296875}, {"text": "0.52", "confidence": 0.9714987532522628, "left": 0.495361328125, "upper": 0.96240234375, "right": 0.54443359375, "lower": 0.93994140625}, {"text": "0.50", "confidence": 0.9964541225349073, "left": 0.63330078125, "upper": 0.962890625, "right": 0.68115234375, "lower": 0.9404296875}, {"text": "0.55", "confidence": 0.980007828744104, "left": 0.76171875, "upper": 0.962890625, "right": 0.810546875, "lower": 0.93994140625}, {"text": "0.50", "confidence": 0.9833386444653252, "left": 0.9140625, "upper": 0.96240234375, "right": 0.9619140625, "lower": 0.9404296875}]
post_processing/__pycache__/page_elt_pp.cpython-312.pyc ADDED
Binary file (11.6 kB). View file
 
post_processing/__pycache__/table_struct_pp.cpython-312.pyc ADDED
Binary file (10.4 kB). View file
 
post_processing/__pycache__/text_pp.cpython-312.pyc ADDED
Binary file (10.6 kB). View file
 
post_processing/__pycache__/wbf.cpython-312.pyc ADDED
Binary file (14.7 kB). View file
 
post_processing/table_struct_pp.py CHANGED
@@ -1 +1,222 @@
1
- # TODO
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ import re
5
+ from typing import List, Union, Optional, Literal
6
+ import numpy as np
7
+ import numpy.typing as npt
8
+ import pandas as pd
9
+
10
+
11
+ def assign_boxes(
12
+ box: Union[List[float], npt.NDArray[np.float64]],
13
+ candidate_boxes: npt.NDArray[np.float64],
14
+ delta: float = 2.0,
15
+ min_overlap: float = 0.25,
16
+ mode: Literal["cell", "row", "column"] = "cell",
17
+ ) -> npt.NDArray[np.int_]:
18
+ """
19
+ Assigns the best candidate boxes to a reference `box` based on overlap.
20
+
21
+ If mode is "cell", the overlap is calculated using surface area overlap.
22
+ If mode is "row", the overlap is calculated using row height overlap.
23
+ If mode is "column", the overlap is calculated using column width overlap.
24
+
25
+ If delta > 1, it will look for multiple matches,
26
+ using candidates with score >= max_overlap / delta.
27
+
28
+ Args:
29
+ box (list or numpy.ndarray): Reference bounding box [x_min, y_min, x_max, y_max].
30
+ candidate_boxes (numpy.ndarray [N, 4]): Array of candidate bounding boxes.
31
+ delta (float, optional): Factor for matches relative to the best overlap. Defaults to 2.0.
32
+ min_overlap (float, optional): Minimum required overlap for a match. Defaults to 0.25.
33
+ mode (str, optional): Mode to assign boxes ("cell", "row", or "column"). Defaults to "cell".
34
+
35
+ Returns:
36
+ numpy.ndarray [M]: Indices of the matched boxes sorted by decreasing overlap.
37
+ Returns an empty array if no matches are found.
38
+ """
39
+ if not len(candidate_boxes):
40
+ return np.array([], dtype=np.int_)
41
+
42
+ x0_1, y0_1, x1_1, y1_1 = box
43
+ x0_2, y0_2, x1_2, y1_2 = (
44
+ candidate_boxes[:, 0],
45
+ candidate_boxes[:, 1],
46
+ candidate_boxes[:, 2],
47
+ candidate_boxes[:, 3],
48
+ )
49
+
50
+ # Intersection
51
+ inter_y0 = np.maximum(y0_1, y0_2)
52
+ inter_y1 = np.minimum(y1_1, y1_2)
53
+ inter_x0 = np.maximum(x0_1, x0_2)
54
+ inter_x1 = np.minimum(x1_1, x1_2)
55
+
56
+ if mode == "cell":
57
+ inter_area = np.maximum(0, inter_y1 - inter_y0) * np.maximum(0, inter_x1 - inter_x0)
58
+ box_area = (y1_1 - y0_1) * (x1_1 - x0_1)
59
+ overlap = inter_area / (box_area + 1e-6)
60
+ elif mode == "row":
61
+ inter_area = np.maximum(0, inter_y1 - inter_y0)
62
+ box_area = y1_1 - y0_1
63
+ overlap = inter_area / (box_area + 1e-6)
64
+ elif mode == "column":
65
+ inter_area = np.maximum(0, inter_x1 - inter_x0)
66
+ box_area = x1_1 - x0_1
67
+ overlap = inter_area / (box_area + 1e-6)
68
+ else:
69
+ raise ValueError(f"Invalid mode: {mode}")
70
+
71
+ max_overlap = np.max(overlap)
72
+ if max_overlap <= min_overlap: # No match
73
+ return np.array([], dtype=np.int_)
74
+
75
+ n = len(np.where(overlap >= (max_overlap / delta))[0]) if delta > 1 else 1
76
+ matches = np.argsort(-overlap)[:n]
77
+ return matches
78
+
79
+
80
+ def merge_text_in_cell(df_cell: pd.DataFrame) -> pd.DataFrame:
81
+ """
82
+ Merges text from multiple rows into a single cell and recalculates its bounding box.
83
+ Values are sorted by rounded (y, x) coordinates.
84
+
85
+ Args:
86
+ df_cell (pandas.DataFrame): DataFrame containing cells to merge.
87
+
88
+ Returns:
89
+ pandas.DataFrame: Updated DataFrame with merged text and a single bounding box.
90
+ """
91
+ boxes = np.stack(df_cell["box"].values)
92
+
93
+ df_cell["x"] = (boxes[:, 0] - boxes[:, 0].min()) // 10
94
+ df_cell["y"] = (boxes[:, 1] - boxes[:, 1].min()) // 10
95
+ df_cell = df_cell.sort_values(["y", "x"])
96
+
97
+ text = " ".join(df_cell["text"].values.tolist())
98
+ df_cell["text"] = text
99
+ df_cell = df_cell.head(1)
100
+ df_cell["box"] = df_cell["cell"]
101
+ df_cell.drop(["x", "y"], axis=1, inplace=True)
102
+
103
+ return df_cell
104
+
105
+
106
+ def remove_empty_row(mat: List[List[str]]) -> List[List[str]]:
107
+ """
108
+ Remove empty rows from a matrix.
109
+
110
+ Args:
111
+ mat (list[list]): The matrix to remove empty rows from.
112
+
113
+ Returns:
114
+ list[list]: The matrix with empty rows removed.
115
+ """
116
+ mat_filter = []
117
+ for row in mat:
118
+ if max([len(c) for c in row]):
119
+ mat_filter.append(row)
120
+ return mat_filter
121
+
122
+
123
+ def build_markdown(
124
+ df: pd.DataFrame,
125
+ remove_empty: bool = True,
126
+ n_rows: Optional[int] = None,
127
+ repeat_single: bool = False,
128
+ ) -> Union[List[List[str]], npt.NDArray[np.str_]]:
129
+ """
130
+ Convert a dataframe into a markdown table.
131
+
132
+ Args:
133
+ df (pandas.DataFrame): The dataframe to convert with columns 'col_ids',
134
+ 'row_ids', and 'text'.
135
+ remove_empty (bool, optional): Whether to remove empty rows & cols. Defaults to True.
136
+ n_rows (int, optional): Number of rows. Inferred from df if None. Defaults to None.
137
+ repeat_single (bool, optional): Whether to repeat single element in rows.
138
+ Defaults to False.
139
+
140
+ Returns:
141
+ list[list[str]] or numpy.ndarray: A list of lists or array representing the markdown table.
142
+ """
143
+ df = df.reset_index(drop=True)
144
+ n_cols = max([np.max(c) for c in df['col_ids'].values])
145
+ if n_rows is None:
146
+ n_rows = max([np.max(c) for c in df['row_ids'].values])
147
+ else:
148
+ n_rows = max(
149
+ n_rows - 1,
150
+ max([np.max(c) for c in df['row_ids'].values])
151
+ )
152
+
153
+ mat = np.empty((n_rows + 1, n_cols + 1), dtype=str).tolist()
154
+
155
+ for i in range(len(df)):
156
+ if isinstance(df["row_ids"][i], int) or isinstance(df["col_ids"][i], int):
157
+ continue
158
+ for r in df["row_ids"][i]:
159
+ for c in df["col_ids"][i]:
160
+ mat[r][c] = (mat[r][c] + " " + df["text"][i]).strip()
161
+
162
+ # Remove empty rows & columns
163
+ if remove_empty:
164
+ mat = remove_empty_row(mat)
165
+ mat = np.array(remove_empty_row(np.array(mat).T.tolist())).T.tolist()
166
+
167
+ if repeat_single:
168
+ new_mat = []
169
+ for row in mat:
170
+ if sum([len(c) > 0 for c in row]) == 1:
171
+ txt = [c for c in row if len(c)][0]
172
+ new_mat.append([txt for _ in range(len(row))])
173
+ else:
174
+ new_mat.append(row)
175
+ mat = np.array(new_mat)
176
+
177
+ return mat
178
+
179
+
180
+ def display_markdown(
181
+ data: List[List[str]], show: bool = True, use_header: bool = True
182
+ ) -> str:
183
+ """
184
+ Convert a list of lists of strings into a markdown table.
185
+ If show is True, use_header will be set to True.
186
+
187
+ Args:
188
+ data (list[list[str]]): The table data. The first sublist should contain headers.
189
+ show (bool, optional): Whether to display the table. Defaults to True.
190
+ use_header (bool, optional): Whether to use the first sublist as headers. Defaults to True.
191
+
192
+ Returns:
193
+ str: A markdown-formatted table as a string.
194
+ """
195
+ if show:
196
+ use_header = True
197
+ data = [[re.sub(r'\n', ' ', c) for c in row] for row in data]
198
+
199
+ if not len(data):
200
+ return "EMPTY TABLE"
201
+
202
+ max_cols = max(len(row) for row in data)
203
+ data = [row + [""] * (max_cols - len(row)) for row in data]
204
+
205
+ if use_header:
206
+ header = "| " + " | ".join(data[0]) + " |"
207
+ separator = "| " + " | ".join(["---"] * max_cols) + " |"
208
+ body = "\n".join("| " + " | ".join(row) + " |" for row in data[1:])
209
+ markdown_table = (
210
+ f"{header}\n{separator}\n{body}" if body else f"{header}\n{separator}"
211
+ )
212
+
213
+ if show:
214
+ from IPython.display import display, Markdown
215
+ markdown_table = re.sub(r'\$', r'\\$', markdown_table)
216
+ markdown_table = re.sub(r'\%', r'\\%', markdown_table)
217
+ display(Markdown(markdown_table))
218
+
219
+ else:
220
+ markdown_table = "\n".join("| " + " | ".join(row) + " |" for row in data)
221
+
222
+ return markdown_table