Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

README.md +115 -42
config.json +52 -78
model.onnx +3 -0
model.safetensors +2 -2

README.md CHANGED Viewed

@@ -7,52 +7,39 @@ base_model:
 # EfficientNet-B0 Document Image Classifier
-This is an image classification model based on **Google EfficientNet-B0**, fine-tuned to classify input images into one of the following 39 categories (to be reduced):
-1. **bar_chart**
-2. **bar_code**
-3. **chemistry_structure**
-4. **flow_chart**
-5. **icon**
-6. **line_chart**
-7. **logo**
-8. **geographical_map**
-9. **topographical_map**
-10. **other**
-11. **pie_chart**
-12. **qr_code**
-13. **scatter_plot**
-14. **screenshot_from_manual**
-15. **screenshot_from_computer**
-16. **calendar**
-17. **crossword_puzzle**
-18. **signature**
-19. **stamp**
-20. **photograph**
-21. **engineering_drawing**
-22. **table**
-23. **full_page_image**
-24. **page_thumbnail**
-25. **music**
-26. **illustration**
-27. **treemap**
-28. **radar_chart**
-29. **screenshot_from_mobile**
-30. **sudoku_puzzle**
-31. **box_plot**
-32. **cryptoquote**
-33. **heatmap**
-34. **poster**
-35. **passport**
-36. **legend**
-37. **area_chart**
-38. **astrology_chart**
-39. **book cover**
 ### How to use
-Example of how to classify an image into one of the 39 classes:
 ```python
 import torch
@@ -109,6 +96,92 @@ for idx, probs_image in enumerate(probs_batch):
 ```
 ## Citation
 If you use this model in your work, please cite the following papers:

 # EfficientNet-B0 Document Image Classifier
+This is an image classification model based on **Google EfficientNet-B0**, fine-tuned to classify input images into one of the following 26 categories:
+1. **logo**
+2. **photograph**
+3. **icon**
+4. **engineering_drawing**
+5. **line_chart**
+6. **bar_chart**
+7. **other**
+8. **table**
+9. **flow_chart**
+10. **screenshot_from_computer**
+11. **signature**
+12. **screenshot_from_manual**
+13. **geographical_map**
+14. **pie_chart**
+15. **page_thumbnail**
+16. **stamp**
+17. **music**
+18. **calendar**
+19. **qr_code**
+20. **bar_code**
+21. **full_page_image**
+22. **scatter_plot**
+23. **chemistry_structure**
+24. **topographical_map**
+25. **crossword_puzzle**
+26. **box_plot**
 ### How to use
+Example of how to classify an image into one of the 39 classes using transformers:
 ```python
 import torch
 ```
+Example of how to classify an image into one of the 39 classes using onnx runtime:
+```python
+import onnxruntime
+import numpy as np
+import torchvision.transforms as transforms
+from PIL import Image
+import requests
+LABELS = [
+    "logo",
+    "photograph",
+    "icon",
+    "engineering_drawing",
+    "line_chart",
+    "bar_chart",
+    "other",
+    "table",
+    "flow_chart",
+    "screenshot_from_computer",
+    "signature",
+    "screenshot_from_manual",
+    "geographical_map",
+    "pie_chart",
+    "page_thumbnail",
+    "stamp",
+    "music",
+    "calendar",
+    "qr_code",
+    "bar_code",
+    "full_page_image",
+    "scatter_plot",
+    "chemistry_structure",
+    "topographical_map",
+    "crossword_puzzle",
+    "box_plot"
+]
+urls = [
+    'http://images.cocodataset.org/val2017/000000039769.jpg',
+    'http://images.cocodataset.org/test-stuff2017/000000001750.jpg',
+    'http://images.cocodataset.org/test-stuff2017/000000000001.jpg'
+]
+images = []
+for url in urls:
+    image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+    images.append(image)
+image_processor = transforms.Compose(
+    [
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+        transforms.Normalize(
+            mean=[0.485, 0.456, 0.406],
+            std=[0.47853944, 0.4732864, 0.47434163],
+        ),
+    ]
+)
+processed_images_onnx = [image_processor(image).unsqueeze(0) for image in images]
+# onnx needs numpy as input
+onnx_inputs = [item.numpy(force=True) for item in processed_images_onnx]
+# pack into a batch
+onnx_inputs = np.concatenate(onnx_inputs, axis=0)
+ort_session = onnxruntime.InferenceSession(
+    "./DocumentFigureClassifier-v2_0-onnx/model.onnx",
+    providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
+)
+for item in ort_session.run(None, {'input': onnx_inputs}):
+    for x in iter(item):
+        pred = x.argmax()
+        print(LABELS[pred])
+```
 ## Citation
 If you use this model in your work, please cite the following papers:

config.json CHANGED Viewed

@@ -22,45 +22,32 @@
   "hidden_act": "swish",
   "hidden_dim": 1280,
   "id2label": {
-    "0": "bar_chart",
-    "1": "bar_code",
-    "10": "pie_chart",
-    "11": "qr_code",
-    "12": "scatter_plot",
-    "13": "screenshot_from_manual",
-    "14": "screenshot_from_computer",
-    "15": "calendar",
-    "16": "crossword_puzzle",
-    "17": "signature",
-    "18": "stamp",
-    "19": "photograph",
-    "2": "chemistry_structure",
-    "20": "engineering_drawing",
-    "21": "table",
-    "22": "full_page_image",
-    "23": "page_thumbnail",
-    "24": "music",
-    "25": "illustration",
-    "26": "treemap",
-    "27": "radar_chart",
-    "28": "screenshot_from_mobile",
-    "29": "sudoku_puzzle",
-    "3": "flow_chart",
-    "30": "box_plot",
-    "31": "cryptoquote",
-    "32": "heatmap",
-    "33": "poster",
-    "34": "passport",
-    "35": "legend",
-    "36": "area_chart",
-    "37": "astrology_chart",
-    "38": "book cover",
-    "4": "icon",
-    "5": "line_chart",
-    "6": "logo",
-    "7": "geographical_map",
-    "8": "topographical_map",
-    "9": "other"
   },
   "image_size": 224,
   "in_channels": [
@@ -83,45 +70,32 @@
     3
   ],
   "label2id": {
-    "area_chart": "36",
-    "astrology_chart": "37",
-    "bar_chart": "0",
-    "bar_code": "1",
-    "book cover": "38",
-    "box_plot": "30",
-    "calendar": "15",
-    "chemistry_structure": "2",
-    "crossword_puzzle": "16",
-    "cryptoquote": "31",
-    "engineering_drawing": "20",
-    "flow_chart": "3",
-    "full_page_image": "22",
-    "geographical_map": "7",
-    "heatmap": "32",
-    "icon": "4",
-    "illustration": "25",
-    "legend": "35",
-    "line_chart": "5",
-    "logo": "6",
-    "music": "24",
-    "other": "9",
-    "page_thumbnail": "23",
-    "passport": "34",
-    "photograph": "19",
-    "pie_chart": "10",
-    "poster": "33",
-    "qr_code": "11",
-    "radar_chart": "27",
-    "scatter_plot": "12",
-    "screenshot_from_computer": "14",
-    "screenshot_from_manual": "13",
-    "screenshot_from_mobile": "28",
-    "signature": "17",
-    "stamp": "18",
-    "sudoku_puzzle": "29",
-    "table": "21",
-    "topographical_map": "8",
-    "treemap": "26"
   },
   "model_type": "efficientnet",
   "num_block_repeats": [

   "hidden_act": "swish",
   "hidden_dim": 1280,
   "id2label": {
+    "0": "logo",
+    "1": "photograph",
+    "10": "signature",
+    "11": "screenshot_from_manual",
+    "12": "geographical_map",
+    "13": "pie_chart",
+    "14": "page_thumbnail",
+    "15": "stamp",
+    "16": "music",
+    "17": "calendar",
+    "18": "qr_code",
+    "19": "bar_code",
+    "2": "icon",
+    "20": "full_page_image",
+    "21": "scatter_plot",
+    "22": "chemistry_structure",
+    "23": "topographical_map",
+    "24": "crossword_puzzle",
+    "25": "box_plot",
+    "3": "engineering_drawing",
+    "4": "line_chart",
+    "5": "bar_chart",
+    "6": "other",
+    "7": "table",
+    "8": "flow_chart",
+    "9": "screenshot_from_computer"
   },
   "image_size": 224,
   "in_channels": [
     3
   ],
   "label2id": {
+    "bar_chart": "5",
+    "bar_code": "19",
+    "box_plot": "25",
+    "calendar": "17",
+    "chemistry_structure": "22",
+    "crossword_puzzle": "24",
+    "engineering_drawing": "3",
+    "flow_chart": "8",
+    "full_page_image": "20",
+    "geographical_map": "12",
+    "icon": "2",
+    "line_chart": "4",
+    "logo": "0",
+    "music": "16",
+    "other": "6",
+    "page_thumbnail": "14",
+    "photograph": "1",
+    "pie_chart": "13",
+    "qr_code": "18",
+    "scatter_plot": "21",
+    "screenshot_from_computer": "9",
+    "screenshot_from_manual": "11",
+    "signature": "10",
+    "stamp": "15",
+    "table": "7",
+    "topographical_map": "23"
   },
   "model_type": "efficientnet",
   "num_block_repeats": [

model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:acba68df0a2f149212f5b5082d98a81700c93280e39a73dca095040ef19a583f
+size 16763657

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:441ff87d71573c0aea1f8d00537ae8b2c88baf4885674677f410de08db2bd547
-size 16444820

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8232c0c1e4a25551e496ccaf548e469e321f78997d18c9be7f3af9ccb5d222b
+size 16378200