Spaces:

HMWCS
/

Gemma3n-challenge-demo

Running on Zero

App Files Files Community

HMWCS commited on Jul 17

Commit

c86f4fa

verified ·

1 Parent(s): 8f8cb24

Update classifier.py

Browse files

Files changed (1) hide show

classifier.py +113 -121

classifier.py CHANGED Viewed

@@ -168,153 +168,139 @@ class GarbageClassifier:
             return "Error", f"Classification failed: {str(e)}"
     def _extract_classification(self, response: str) -> str:
-        """Extract the main classification from the response with enhanced logic"""
         response_lower = response.lower()
-        # Strong indicators that this is NOT garbage - check these first
-        non_garbage_indicators = [
             "unable to classify",
             "cannot classify",
-            "not garbage",
-            "not waste",
-            "not trash",
             "person",
             "people",
             "human",
             "face",
             "man",
             "woman",
             "living",
             "alive",
             "animal",
             "pet",
             "dog",
             "cat",
-            "functioning",
-            "in use",
-            "working",
-            "operational",
-            "furniture",
-            "appliance",
-            "electronic device",
-            "building",
-            "house",
-            "room",
-            "landscape",
-            "vehicle",
-            "car",
-            "truck",
-            "bike",
             "elon musk",
             "celebrity",
             "famous person",
             "portrait",
             "photo of a person",
         ]
-        # Check for explicit statements about not being garbage
-        non_garbage_phrases = [
             "this is not",
-            "this does not appear to be",
             "not intended to be discarded",
             "not something that should be",
             "appears to be a person",
             "shows a person",
             "image of a person",
-            "human being",
-            "living creature",
         ]
-        # First priority: Check for strong non-garbage indicators
         if any(indicator in response_lower for indicator in non_garbage_indicators):
             return "Unable to classify"
-        # Second priority: Check for phrases indicating it's not garbage
-        if any(phrase in response_lower for phrase in non_garbage_phrases):
-            return "Unable to classify"
-        # Third priority: Look for reasoning that explicitly says it's not waste/garbage
-        reasoning_against_waste = [
-            "cannot be classified as waste",
-            "should not be classified as",
-            "not appropriate to classify",
-            "does not belong to any waste category",
-            "is not waste material",
-        ]
-        if any(phrase in response_lower for phrase in reasoning_against_waste):
             return "Unable to classify"
-        # Only if none of the above conditions are met, then look for garbage categories
         categories = self.knowledge.get_categories()
         waste_categories = [cat for cat in categories if cat != "Unable to classify"]
-        # Look for exact category matches
         for category in waste_categories:
             if category.lower() in response_lower:
-                # Double check - make sure the context is positive
-                category_index = response_lower.find(category.lower())
-                context_before = response_lower[
-                    max(0, category_index - 50) : category_index
-                ]
-                context_after = response_lower[category_index : category_index + 50]
-                # If there are negation words around the category, skip it
-                negation_words = [
-                    "not",
-                    "cannot",
-                    "unable",
-                    "doesn't",
-                    "isn't",
-                    "won't",
-                    "shouldn't",
-                ]
-                if any(
-                    neg in context_before or neg in context_after
-                    for neg in negation_words
-                ):
-                    continue
                 return category
-        # Look for key terms only if no explicit non-garbage indicators were found
         category_keywords = {
             "Recyclable Waste": [
                 "recyclable",
                 "recycle",
-                "plastic bottle",
-                "aluminum can",
-                "cardboard box",
-                "glass bottle",
-                "metal can",
             ],
             "Food/Kitchen Waste": [
-                "food scraps",
-                "fruit peel",
-                "vegetable waste",
                 "leftovers",
                 "organic waste",
-                "kitchen waste",
             ],
             "Hazardous Waste": [
                 "battery",
-                "chemical container",
-                "medicine bottle",
-                "paint can",
-                "toxic material",
             ],
-            "Other Waste": ["cigarette butt", "ceramic piece", "dust", "general waste"],
         }
         for category, keywords in category_keywords.items():
             if any(keyword in response_lower for keyword in keywords):
                 return category
-        # Default to "Unable to classify" if nothing clear is found
         return "Unable to classify"
     def _extract_reasoning(self, response: str) -> str:
-        """Extract only the reasoning content, removing all formatting markers"""
         import re
         # Remove all formatting markers
@@ -327,44 +313,50 @@ class GarbageClassifier:
             "**", ""
         )  # Remove remaining ** markers
-        # Split into lines and process
-        lines = cleaned_response.split("\n")
-        reasoning_parts = []
-        for line in lines:
-            line = line.strip()
-            # Skip empty lines and lines that look like classification categories
-            if line and not line in [
-                "Recyclable Waste",
-                "Food/Kitchen Waste",
-                "Hazardous Waste",
-                "Other Waste",
-                "Unable to classify",
-            ]:
-                # Skip lines that are just category names
-                if line not in self.knowledge.get_categories():
-                    reasoning_parts.append(line)
-        # Join the reasoning parts
-        reasoning = " ".join(reasoning_parts).strip()
-        # If we still have structured format markers, try a different approach
-        if reasoning.startswith("Classification:") or reasoning.startswith(
-            "Reasoning:"
-        ):
-            # Split by common patterns and take the reasoning part
-            if "Reasoning:" in reasoning:
-                reasoning = reasoning.split("Reasoning:")[-1].strip()
-            elif reasoning.count(":") >= 1:
-                # Take everything after the first colon if it looks like "Classification: X Reasoning: Y"
-                parts = reasoning.split(":", 1)
-                if len(parts) > 1:
-                    reasoning = parts[1].strip()
-        # Clean up any remaining artifacts
-        reasoning = re.sub(
-            r"^[A-Za-z\s]+:", "", reasoning
-        ).strip()  # Remove "Category:" type prefixes
         return reasoning if reasoning else "Analysis not available"

             return "Error", f"Classification failed: {str(e)}"
     def _extract_classification(self, response: str) -> str:
+        """Extract the main classification from the response"""
         response_lower = response.lower()
+        # First check for explicit "Unable to classify" statements
+        unable_phrases = [
             "unable to classify",
             "cannot classify",
+            "cannot be classified",
+        ]
+        if any(phrase in response_lower for phrase in unable_phrases):
+            return "Unable to classify"
+        # Check for non-garbage items (people, living things, etc.)
+        non_garbage_indicators = [
             "person",
             "people",
             "human",
             "face",
             "man",
             "woman",
+            "boy",
+            "girl",
             "living",
             "alive",
             "animal",
             "pet",
             "dog",
             "cat",
+            "bird",
             "elon musk",
             "celebrity",
             "famous person",
             "portrait",
             "photo of a person",
+            "human being",
         ]
+        # Check for explicit statements about not being garbage/waste
+        non_waste_phrases = [
+            "not garbage",
+            "not waste",
+            "not trash",
             "this is not",
+            "does not appear to be waste",
             "not intended to be discarded",
             "not something that should be",
             "appears to be a person",
             "shows a person",
             "image of a person",
         ]
+        # Only classify as "Unable to classify" if it's clearly not garbage
         if any(indicator in response_lower for indicator in non_garbage_indicators):
             return "Unable to classify"
+        if any(phrase in response_lower for phrase in non_waste_phrases):
             return "Unable to classify"
+        # Now look for waste categories - check exact matches first
         categories = self.knowledge.get_categories()
         waste_categories = [cat for cat in categories if cat != "Unable to classify"]
         for category in waste_categories:
             if category.lower() in response_lower:
                 return category
+        # Look for category keywords
         category_keywords = {
             "Recyclable Waste": [
                 "recyclable",
                 "recycle",
+                "plastic",
+                "paper",
+                "metal",
+                "glass",
+                "aluminum",
+                "foil",
+                "can",
+                "bottle",
+                "cardboard",
+                "tin",
+                "steel",
+                "iron",
+                "copper",
+                "brass",
+                "recyclable material",
             ],
             "Food/Kitchen Waste": [
+                "food",
+                "kitchen",
+                "organic",
+                "fruit",
+                "vegetable",
                 "leftovers",
+                "scraps",
+                "peel",
+                "core",
+                "bone",
+                "food waste",
                 "organic waste",
             ],
             "Hazardous Waste": [
+                "hazardous",
+                "dangerous",
+                "toxic",
                 "battery",
+                "chemical",
+                "medicine",
+                "paint",
+                "pharmaceutical",
+                "hazardous waste",
+            ],
+            "Other Waste": [
+                "cigarette",
+                "ceramic",
+                "dust",
+                "diaper",
+                "tissue",
+                "general waste",
+                "other waste",
             ],
         }
         for category, keywords in category_keywords.items():
             if any(keyword in response_lower for keyword in keywords):
                 return category
+        # If no clear classification found, default to "Unable to classify"
         return "Unable to classify"
     def _extract_reasoning(self, response: str) -> str:
+        """Extract only the reasoning content, removing all formatting markers and classification info"""
         import re
         # Remove all formatting markers
             "**", ""
         )  # Remove remaining ** markers
+        # Remove category names that might appear at the beginning
+        categories = self.knowledge.get_categories()
+        for category in categories:
+            if cleaned_response.strip().startswith(category):
+                cleaned_response = cleaned_response.replace(category, "", 1)
+                break
+        # Split into sentences and clean up
+        sentences = []
+        # Split by common sentence endings
+        parts = re.split(r"[.!?]\s+", cleaned_response)
+        for part in parts:
+            part = part.strip()
+            if not part:
+                continue
+            # Skip parts that are just category names
+            if part in categories:
+                continue
+            # Skip parts that start with category names
+            is_category_line = False
+            for category in categories:
+                if part.startswith(category):
+                    is_category_line = True
+                    break
+            if is_category_line:
+                continue
+            # Clean up the sentence
+            part = re.sub(
+                r"^[A-Za-z\s]+:", "", part
+            ).strip()  # Remove "Category:" type prefixes
+            if part and len(part) > 3:  # Only keep meaningful content
+                sentences.append(part)
+        # Join sentences and ensure proper punctuation
+        reasoning = ". ".join(sentences)
+        if reasoning and not reasoning.endswith((".", "!", "?")):
+            reasoning += "."
         return reasoning if reasoning else "Analysis not available"