Spaces:

HMWCS
/

Gemma3n-challenge-demo

Running on Zero

App Files Files Community

HMWCS commited on Jul 16

Commit

9390992

verified ·

1 Parent(s): bd6d077

Upload 8 files

Browse files

Files changed (8) hide show

app.py +90 -4
classifier.py +250 -0
config.py +25 -0
knowledge_base.py +55 -0
requirements.txt +8 -0
test_images/cardboard1.jpg +0 -0
test_images/glass2.jpg +0 -0
test_images/metal5.jpg +0 -0

app.py CHANGED Viewed

@@ -1,7 +1,93 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+from PIL import Image
+import os
+from classifier import GarbageClassifier
+from config import Config
+# Initialize classifier
+config = Config()
+classifier = GarbageClassifier(config)
+# Load model at startup
+print("Loading model...")
+classifier.load_model()
+print("Model loaded successfully!")
+def classify_garbage(image):
+    """
+    Classify garbage in uploaded image
+    """
+    if image is None:
+        return "Please upload an image", "No image provided"
+    try:
+        classification, full_response = classifier.classify_image(image)
+        return classification, full_response
+    except Exception as e:
+        return "Error", f"Classification failed: {str(e)}"
+def get_example_images():
+    """Get example images if they exist"""
+    example_dir = "test_images"
+    examples = []
+    if os.path.exists(example_dir):
+        for file in os.listdir(example_dir):
+            if file.lower().endswith((".png", ".jpg", ".jpeg")):
+                examples.append(os.path.join(example_dir, file))
+    return examples[:3]  # Limit to 3 examples
+# Create Gradio interface
+with gr.Blocks(title="Garbage Classification System") as demo:
+    gr.Markdown("# 🗂️ Garbage Classification System")
+    gr.Markdown(
+        "Upload an image to classify garbage into: Recyclable Waste, Food/Kitchen Waste, Hazardous Waste, or Other Waste"
+    )
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="Upload Garbage Image")
+            classify_btn = gr.Button("Classify Garbage", variant="primary", size="lg")
+        with gr.Column():
+            classification_output = gr.Textbox(
+                label="Classification Result",
+                placeholder="Upload an image and click classify",
+            )
+            full_response_output = gr.Textbox(
+                label="Detailed Analysis",
+                placeholder="Detailed reasoning will appear here",
+                lines=10,
+            )
+    # Category information
+    with gr.Accordion("📋 Garbage Categories Information", open=False):
+        category_info = classifier.get_categories_info()
+        for category, description in category_info.items():
+            gr.Markdown(f"**{category}**: {description}")
+    # Examples
+    examples = get_example_images()
+    if examples:
+        gr.Examples(examples=examples, inputs=image_input, label="Example Images")
+    # Event handlers
+    classify_btn.click(
+        fn=classify_garbage,
+        inputs=image_input,
+        outputs=[classification_output, full_response_output],
+    )
+    # Auto-classify on image upload
+    image_input.change(
+        fn=classify_garbage,
+        inputs=image_input,
+        outputs=[classification_output, full_response_output],
+    )
+if __name__ == "__main__":
+    demo.launch()

classifier.py ADDED Viewed

	@@ -0,0 +1,250 @@

+from transformers import AutoProcessor, AutoModelForImageTextToText
+from PIL import Image
+import torch
+import logging
+from typing import Union, Tuple
+from config import Config
+from knowledge_base import GarbageClassificationKnowledge
+class GarbageClassifier:
+    def __init__(self, config: Config = None):
+        self.config = config or Config()
+        self.knowledge = GarbageClassificationKnowledge()
+        self.processor = None
+        self.model = None
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Setup logging
+        logging.basicConfig(level=logging.INFO)
+        self.logger = logging.getLogger(__name__)
+    def load_model(self):
+        """Load the model and processor"""
+        try:
+            self.logger.info(f"Loading model: {self.config.MODEL_NAME}")
+            # Load processor
+            kwargs = {}
+            if self.config.HF_TOKEN:
+                kwargs["token"] = self.config.HF_TOKEN
+            self.processor = AutoProcessor.from_pretrained(
+                self.config.MODEL_NAME, **kwargs
+            )
+            # Load model
+            self.model = AutoModelForImageTextToText.from_pretrained(
+                self.config.MODEL_NAME,
+                torch_dtype=self.config.TORCH_DTYPE,
+                device_map=self.config.DEVICE_MAP,
+            )
+            self.logger.info("Model loaded successfully")
+        except Exception as e:
+            self.logger.error(f"Error loading model: {str(e)}")
+            raise
+    def preprocess_image(self, image: Image.Image) -> Image.Image:
+        """
+        Preprocess image to meet Gemma3n requirements (512x512)
+        """
+        # Convert to RGB if necessary
+        if image.mode != "RGB":
+            image = image.convert("RGB")
+        # Resize to 512x512 as required by Gemma3n
+        target_size = (512, 512)
+        # Calculate aspect ratio preserving resize
+        original_width, original_height = image.size
+        aspect_ratio = original_width / original_height
+        if aspect_ratio > 1:
+            # Width is larger
+            new_width = target_size[0]
+            new_height = int(target_size[0] / aspect_ratio)
+        else:
+            # Height is larger or equal
+            new_height = target_size[1]
+            new_width = int(target_size[1] * aspect_ratio)
+        # Resize image maintaining aspect ratio
+        image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
+        # Create a new image with target size and paste the resized image
+        processed_image = Image.new(
+            "RGB", target_size, (255, 255, 255)
+        )  # White background
+        # Calculate position to center the image
+        x_offset = (target_size[0] - new_width) // 2
+        y_offset = (target_size[1] - new_height) // 2
+        processed_image.paste(image, (x_offset, y_offset))
+        return processed_image
+    def classify_image(self, image: Union[str, Image.Image]) -> Tuple[str, str]:
+        """
+        Classify garbage in the image
+        Args:
+            image: PIL Image or path to image file
+        Returns:
+            Tuple of (classification_result, full_response)
+        """
+        if self.model is None or self.processor is None:
+            raise RuntimeError("Model not loaded. Call load_model() first.")
+        try:
+            # Load and process image
+            if isinstance(image, str):
+                image = Image.open(image)
+            elif not isinstance(image, Image.Image):
+                raise ValueError("Image must be a PIL Image or file path")
+            # Preprocess image to meet Gemma3n requirements
+            processed_image = self.preprocess_image(image)
+            # Prepare messages with system prompt and user query
+            messages = [
+                {
+                    "role": "system",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": self.knowledge.get_system_prompt(),
+                        }
+                    ],
+                },
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image", "image": processed_image},
+                        {
+                            "type": "text",
+                            "text": "Please classify the garbage in this image and explain your reasoning.",
+                        },
+                    ],
+                },
+            ]
+            # Apply chat template and tokenize
+            inputs = self.processor.apply_chat_template(
+                messages,
+                add_generation_prompt=True,
+                tokenize=True,
+                return_dict=True,
+                return_tensors="pt",
+            ).to(self.model.device, dtype=self.model.dtype)
+            input_len = inputs["input_ids"].shape[-1]
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=self.config.MAX_NEW_TOKENS,
+                disable_compile=True,
+            )
+            response = self.processor.batch_decode(
+                outputs[:, input_len:],
+                skip_special_tokens=True,
+            )[0]
+            # Extract classification from response
+            classification = self._extract_classification(response)
+            # Create formatted response
+            formatted_response = self._format_response(classification, response)
+            return classification, formatted_response
+        except Exception as e:
+            self.logger.error(f"Error during classification: {str(e)}")
+            import traceback
+            traceback.print_exc()
+            return "Error", f"Classification failed: {str(e)}"
+    def _extract_classification(self, response: str) -> str:
+        """Extract the main classification from the response"""
+        categories = self.knowledge.get_categories()
+        # Convert response to lowercase for matching
+        response_lower = response.lower()
+        # Look for exact category matches first
+        for category in categories:
+            if category.lower() in response_lower:
+                return category
+        # Look for key terms if no exact match
+        category_keywords = {
+            "Recyclable Waste": [
+                "recyclable",
+                "recycle",
+                "plastic",
+                "paper",
+                "metal",
+                "glass",
+                "bottle",
+                "can",
+                "aluminum",
+                "cardboard",
+            ],
+            "Food/Kitchen Waste": [
+                "food",
+                "kitchen",
+                "organic",
+                "fruit",
+                "vegetable",
+                "leftovers",
+                "scraps",
+                "peel",
+                "core",
+                "bone",
+            ],
+            "Hazardous Waste": [
+                "hazardous",
+                "dangerous",
+                "toxic",
+                "battery",
+                "chemical",
+                "medicine",
+                "paint",
+                "pharmaceutical",
+            ],
+            "Other Waste": [
+                "other",
+                "general",
+                "trash",
+                "garbage",
+                "waste",
+                "cigarette",
+                "ceramic",
+                "dust",
+            ],
+        }
+        for category, keywords in category_keywords.items():
+            if any(keyword in response_lower for keyword in keywords):
+                return category
+        return "Unable to classify"
+    def _format_response(self, classification: str, full_response: str) -> str:
+        """Format the response with classification and reasoning"""
+        if not full_response.strip():
+            return f"**Classification**: {classification}\n**Reasoning**: No detailed analysis available."
+        # If response already contains structured format, return as is
+        if "**Classification**" in full_response and "**Reasoning**" in full_response:
+            return full_response
+        # Otherwise, format it
+        return f"**Classification**: {classification}\n\n**Reasoning**: {full_response}"
+    def get_categories_info(self):
+        """Get information about all categories"""
+        return self.knowledge.get_category_descriptions()

config.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+import torch
+from dataclasses import dataclass
+@dataclass
+class Config:
+    # Gemma3n model configuration
+    MODEL_NAME: str = "google/gemma-3n-E2B-it"
+    # Generation parameters
+    MAX_NEW_TOKENS: int = 512
+    # Device configuration
+    TORCH_DTYPE: str = torch.bfloat16
+    if torch.cuda.is_available():
+        DEVICE_MAP: str = "cuda:0"  # Use first GPU if available
+    else:
+        DEVICE_MAP: str = "cpu"
+    # Image preprocessing
+    IMAGE_SIZE: int = 512
+    # Hugging Face token
+    HF_TOKEN: str = os.getenv("HF_TOKEN", "")

knowledge_base.py ADDED Viewed

	@@ -0,0 +1,55 @@

+class GarbageClassificationKnowledge:
+    @staticmethod
+    def get_system_prompt():
+        return """You are a professional garbage classification expert. You need to carefully observe the items in the picture, analyze their materials, properties and uses, and then make accurate judgments according to garbage classification standards.
+Garbage classification standards:
+**Recyclable Waste**:
+- Paper: newspapers, magazines, books, various packaging papers, office paper, advertising flyers, cardboard boxes, copy paper, etc.
+- Plastics: various plastic bags, plastic packaging, disposable plastic food containers and utensils, toothbrushes, cups, water bottles, plastic toys, etc.
+- Metals: aluminum cans, tin cans, toothpaste tubes, metal toys, metal stationery, nails, metal sheets, aluminum foil, etc.
+- Glass: glass bottles, broken glass pieces, mirrors, light bulbs, vacuum flasks, etc.
+- Textiles: old clothing, textile products, shoes, curtains, towels, bags, etc.
+**Food/Kitchen Waste**:
+- Food scraps: rice, noodles, bread, meat, fish, shrimp shells, crab shells, bones, etc.
+- Fruit peels and cores: watermelon rinds, apple cores, orange peels, banana peels, nut shells, etc.
+- Plants: withered branches and leaves, flowers, traditional Chinese medicine residue, etc.
+- Expired food: expired canned food, cookies, candy, etc.
+**Hazardous Waste**:
+- Batteries: dry batteries, rechargeable batteries, button batteries, and all types of batteries
+- Light tubes: energy-saving lamps, fluorescent tubes, incandescent bulbs, LED lights, etc.
+- Pharmaceuticals: expired medicines, medicine packaging, thermometers, blood pressure monitors, etc.
+- Paints: paint, coatings, glue, nail polish, cosmetics, etc.
+- Others: pesticides, cleaning agents, agricultural chemicals, X-ray films, etc.
+**Other Waste**:
+- Contaminated non-recyclable paper: toilet paper, diapers, wet wipes, napkins, etc.
+- Cigarette butts, ceramics, dust, disposable tableware (non-plastic)
+- Large bones, hard shells, hard fruit pits (coconut shells, durian shells, walnut shells, corn cobs, etc.)
+- Hair, pet waste, cat litter, etc.
+Please observe the items in the image carefully according to the above classification standards, provide accurate garbage classification results, and briefly explain the classification reasoning. Format your response as:
+**Classification**: [Category Name]
+**Reasoning**: [Brief explanation of why this item belongs to this category]"""
+    @staticmethod
+    def get_categories():
+        return [
+            "Recyclable Waste",
+            "Food/Kitchen Waste",
+            "Hazardous Waste",
+            "Other Waste",
+        ]
+    @staticmethod
+    def get_category_descriptions():
+        return {
+            "Recyclable Waste": "Items that can be processed and reused, including paper, plastic, metal, glass, and textiles",
+            "Food/Kitchen Waste": "Organic waste from food preparation and consumption",
+            "Hazardous Waste": "Items containing harmful substances that require special disposal",
+            "Other Waste": "Items that don't fit into other categories and go to general waste",
+        }

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+numpy
+pillow
+torch
+torchvision
+transformers >= 4.53
+accelerate
+timm
+gradio

test_images/cardboard1.jpg ADDED Viewed

test_images/glass2.jpg ADDED Viewed

test_images/metal5.jpg ADDED Viewed