attilaultzindur
/

garbage_classifier_effnetv2s_ft

@@ -1,91 +1,10 @@
 ---
 pipeline_tag: image-classification
 library_name: torchvision
-tags:
-  - image-classification
-  - efficientnet
-  - efficientnet-v2
-  - garbage
-  - waste-sorting
-metrics:
-  - accuracy
----
-# Garbage Classifier – EfficientNet‑V2‑S
-A finetuned EfficientNet‑V2‑S model that recognises **10 waste categories**
-(battery, glass, plastic, etc.) for smart recycling and sorting applications.
-| id | class      |
-| -: | ---------- |
-|  0 | battery    |
-|  1 | biological |
-|  2 | cardboard  |
-|  3 | clothes    |
-|  4 | glass      |
-|  5 | metal      |
-|  6 | paper      |
-|  7 | plastic    |
-|  8 | shoes      |
-|  9 | trash      |
----
-## Quick Start
-```python
-from huggingface_hub import InferenceClient
-client = InferenceClient("attilaultzindur/garbage_classifier_effnetv2s_ft")
-with open("your_image.jpg", "rb") as f:
-    predictions = client.post(data={"inputs": f.read()})
-print(predictions)
-```
-Example output:
-```json
-[
-  {"label": "plastic", "score": 0.997},
-  {"label": "metal",   "score": 0.002},
-  …
-]
-```
----
-## Model Details
-| Field                    | Value                                                     |
-| ------------------------ | --------------------------------------------------------- |
-| Architecture             | EfficientNet‑V2‑S (torchvision)                           |
-| Input size               | `3 × 224 × 224`                                           |
-| Normalisation            | mean = \[0.485 0.456 0.406], std = \[0.229 0.224 0.225]   |
-| Classification head      | `Linear(1280→256) → ReLU → Dropout(0.5) → Linear(256→10)` |
-| Best validation accuracy | **97.6 %** after 20 epochs                                |
-### Training summary
-* **Dataset:** [Garbage Classification v2 (Kaggle)](https://www.kaggle.com/datasets/sumn2u/garbage-classification-v2)
-  split 80 % train / 20 % val
-* **Augmentations:** RandomResizedCrop, ColorJitter, RandomAffine, HorizontalFlip
-* **Optimiser:** Adam, LR = 1 e‑4
-* **Frozen layers:** first 70 % of feature blocks
-* **Hardware:** single NVIDIA GPU
----
-## Reproduce
-The full training script is provided in `train_script.py`.
-Run it with the same hyper‑parameters to reproduce the checkpoint.
 ---
-## Licence
-No licence has been specified yet.
-Add an appropriate open‑source licence before using the model in production.

 ---
 pipeline_tag: image-classification
 library_name: torchvision
+tags: [image-classification, efficientnet, garbage]
+metrics: [accuracy]
 ---
+# Garbage Classifier – EfficientNet‑V2‑S
+Finetuned model that recognises 10 waste categories.

config.json CHANGED Viewed

@@ -14,5 +14,5 @@
     "shoes",
     "trash"
   ],
-  "library": "torchvision"
 }

     "shoes",
     "trash"
   ],
+  "library_name": "torchvision"
 }

handler.py CHANGED Viewed

@@ -1,37 +1,39 @@
 from typing import Dict, Any
 import io, base64, torch, torchvision
-from safetensors.torch import load_file
 from PIL import Image
 from torchvision import transforms as T
 class EndpointHandler:
-    def __init__(self, path='.'):  # path = repo root in container
         self.labels = ['battery', 'biological', 'cardboard', 'clothes', 'glass', 'metal', 'paper', 'plastic', 'shoes', 'trash']
-        self.model = torchvision.models.efficientnet_v2_s(weights=None)
         nf = self.model.classifier[1].in_features
         self.model.classifier = torch.nn.Sequential(
             torch.nn.Linear(nf, 256),
             torch.nn.ReLU(inplace=True),
             torch.nn.Dropout(0.5),
-            torch.nn.Linear(256, len(self.labels))
         )
-        state = load_file(str(pth), device="cpu")
         self.model.load_state_dict(state)
         self.model.eval()
-        self.trans = T.Compose([
             T.Resize((224, 224)),
             T.ToTensor(),
-            T.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
         ])
     def __call__(self, data: Dict[str, Any]):
-        img_bytes = data['inputs']
-        if isinstance(img_bytes, str):  # base64
             img_bytes = base64.b64decode(img_bytes)
-        img = Image.open(io.BytesIO(img_bytes)).convert('RGB')
-        x = self.trans(img).unsqueeze(0)
         with torch.no_grad():
             probs = self.model(x).softmax(1)[0]
         topk = probs.topk(5)
-        return [{'label': self.labels[i], 'score': float(topk.values[j])}
-                for j, i in enumerate(topk.indices)]

 from typing import Dict, Any
 import io, base64, torch, torchvision
 from PIL import Image
 from torchvision import transforms as T
+from safetensors.torch import load_file
 class EndpointHandler:
+    def __init__(self, path: str = "."):
         self.labels = ['battery', 'biological', 'cardboard', 'clothes', 'glass', 'metal', 'paper', 'plastic', 'shoes', 'trash']
+        self.model  = torchvision.models.efficientnet_v2_s(weights=None)
         nf = self.model.classifier[1].in_features
         self.model.classifier = torch.nn.Sequential(
             torch.nn.Linear(nf, 256),
             torch.nn.ReLU(inplace=True),
             torch.nn.Dropout(0.5),
+            torch.nn.Linear(256, len(self.labels)),
         )
+        state = load_file(f"{path}/model.safetensors", device="cpu")
         self.model.load_state_dict(state)
         self.model.eval()
+        self.preprocess = T.Compose([
             T.Resize((224, 224)),
             T.ToTensor(),
+            T.Normalize(mean=[0.485,0.456,0.406],
+                        std =[0.229,0.224,0.225])
         ])
     def __call__(self, data: Dict[str, Any]):
+        img_bytes = data["inputs"]
+        if isinstance(img_bytes, str):         # base64
             img_bytes = base64.b64decode(img_bytes)
+        img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
+        x   = self.preprocess(img).unsqueeze(0)
         with torch.no_grad():
             probs = self.model(x).softmax(1)[0]
         topk = probs.topk(5)
+        return [{"label": self.labels[i], "score": float(topk.values[j])}
+                for j,i in enumerate(topk.indices)]