Spaces:
Running
on
Zero
Running
on
Zero
fix: fix failed uploads
Browse files- classifier.py +31 -111
- knowledge_base.py +17 -18
classifier.py
CHANGED
|
@@ -224,6 +224,7 @@ class GarbageClassifier:
|
|
| 224 |
|
| 225 |
# Look for explicit confidence scores in the response
|
| 226 |
confidence_patterns = [
|
|
|
|
| 227 |
r'confidence[:\s]*(\d+)',
|
| 228 |
r'confident[:\s]*(\d+)',
|
| 229 |
r'certainty[:\s]*(\d+)',
|
|
@@ -242,139 +243,58 @@ class GarbageClassifier:
|
|
| 242 |
return self._calculate_confidence_heuristic(response_lower, classification)
|
| 243 |
|
| 244 |
def _extract_classification(self, response: str) -> str:
|
| 245 |
-
"""Extract the main classification from the response
|
| 246 |
response_lower = response.lower()
|
| 247 |
|
| 248 |
-
#
|
| 249 |
-
|
| 250 |
-
# 1. Explicit mixed garbage phrases
|
| 251 |
-
explicit_mixed_phrases = [
|
| 252 |
-
"multiple garbage types",
|
| 253 |
-
"multiple different",
|
| 254 |
-
"different types of garbage",
|
| 255 |
-
"various items",
|
| 256 |
-
"mixed items",
|
| 257 |
-
"several different",
|
| 258 |
-
"collection of mixed items",
|
| 259 |
-
"mixture of items",
|
| 260 |
-
"variety of items",
|
| 261 |
-
"separate items",
|
| 262 |
-
"please separate"
|
| 263 |
-
]
|
| 264 |
-
|
| 265 |
-
if any(phrase in response_lower for phrase in explicit_mixed_phrases):
|
| 266 |
-
return "Unable to classify"
|
| 267 |
-
|
| 268 |
-
# 2. Language patterns that indicate multiple items/uncertainty about classification
|
| 269 |
-
uncertainty_patterns = [
|
| 270 |
-
"appears to be containers",
|
| 271 |
-
"what appears to be",
|
| 272 |
-
"including what appears",
|
| 273 |
-
"various colors and textures",
|
| 274 |
-
"don't clearly fall into a single",
|
| 275 |
-
"without further detail",
|
| 276 |
-
"not possible to definitively classify",
|
| 277 |
-
"more information",
|
| 278 |
-
"can't determine",
|
| 279 |
-
"difficult to identify",
|
| 280 |
-
"unclear category",
|
| 281 |
-
"mixed materials"
|
| 282 |
-
]
|
| 283 |
-
|
| 284 |
-
if any(pattern in response_lower for pattern in uncertainty_patterns):
|
| 285 |
-
return "Unable to classify"
|
| 286 |
-
|
| 287 |
-
# 3. Multiple container/item indicators
|
| 288 |
-
multiple_item_indicators = [
|
| 289 |
-
"containers (", "bottles, cans", "bags, and", "items, including",
|
| 290 |
-
"bottles and", "cans and", "containers and", "bags and",
|
| 291 |
-
"plastic bottles, cans", "various containers"
|
| 292 |
-
]
|
| 293 |
-
|
| 294 |
-
if any(indicator in response_lower for indicator in multiple_item_indicators):
|
| 295 |
-
return "Unable to classify"
|
| 296 |
-
|
| 297 |
-
# 4. Count different item types mentioned
|
| 298 |
-
item_types = [
|
| 299 |
-
"bottle", "can", "container", "bag", "box", "wrapper",
|
| 300 |
-
"jar", "cup", "plate", "bowl", "package"
|
| 301 |
-
]
|
| 302 |
-
|
| 303 |
-
item_count = sum(1 for item_type in item_types if item_type in response_lower)
|
| 304 |
-
if item_count >= 3: # If 3+ different container types mentioned, it's mixed
|
| 305 |
-
return "Unable to classify"
|
| 306 |
-
|
| 307 |
-
# ONLY EXCEPTION: Single recyclable container with visible food content
|
| 308 |
-
recyclable_container_indicators = ["container", "bottle", "can", "jar", "box", "wrapper"]
|
| 309 |
-
food_content_indicators = [
|
| 310 |
-
"food residue", "food content", "food inside", "visible food",
|
| 311 |
-
"remains", "leftovers", "scraps inside", "not empty", "not rinsed"
|
| 312 |
-
]
|
| 313 |
-
recyclable_material_indicators = ["plastic", "aluminum", "glass", "metal", "cardboard"]
|
| 314 |
-
|
| 315 |
-
# Check for recycling tip warning
|
| 316 |
-
has_recycling_tip = any(tip in response_lower for tip in [
|
| 317 |
-
"tip: empty and rinse",
|
| 318 |
-
"empty and rinse this container",
|
| 319 |
-
"clean first", "rinse first"
|
| 320 |
-
])
|
| 321 |
-
|
| 322 |
-
# ONLY allow Food/Kitchen classification for single contaminated container
|
| 323 |
-
has_single_container = any(indicator in response_lower for indicator in recyclable_container_indicators)
|
| 324 |
-
has_food_content = any(indicator in response_lower for indicator in food_content_indicators)
|
| 325 |
-
has_recyclable_material = any(indicator in response_lower for indicator in recyclable_material_indicators)
|
| 326 |
-
|
| 327 |
-
# Must be single item (not multiple) and contaminated
|
| 328 |
-
if (has_single_container and has_food_content and
|
| 329 |
-
(has_recyclable_material or has_recycling_tip) and
|
| 330 |
-
item_count <= 1): # Only single container
|
| 331 |
-
return "Food/Kitchen Waste"
|
| 332 |
-
|
| 333 |
-
# Now proceed with normal classification for single, clear items
|
| 334 |
categories = self.knowledge.get_categories()
|
| 335 |
-
waste_categories = [cat for cat in categories if cat != "Unable to classify"]
|
| 336 |
|
| 337 |
-
for category in
|
| 338 |
if category.lower() in response_lower:
|
|
|
|
| 339 |
category_index = response_lower.find(category.lower())
|
| 340 |
-
context_before = response_lower[max(0, category_index -
|
| 341 |
|
| 342 |
-
if not any(neg in context_before[-10:] for neg in ["not", "cannot", "isn't"
|
| 343 |
return category
|
| 344 |
|
| 345 |
-
#
|
| 346 |
-
|
| 347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
|
| 353 |
-
|
| 354 |
-
food_indicators = ["food", "fruit", "vegetable", "organic", "kitchen waste", "peel", "core", "scraps"]
|
| 355 |
-
if any(indicator in response_lower for indicator in food_indicators):
|
| 356 |
return "Food/Kitchen Waste"
|
| 357 |
|
| 358 |
-
|
| 359 |
-
hazardous_indicators = ["battery", "chemical", "medicine", "paint", "toxic", "hazardous"]
|
| 360 |
-
if any(indicator in response_lower for indicator in hazardous_indicators):
|
| 361 |
return "Hazardous Waste"
|
| 362 |
|
| 363 |
-
|
| 364 |
-
other_waste_indicators = ["cigarette", "ceramic", "dust", "diaper", "tissue"]
|
| 365 |
-
if any(indicator in response_lower for indicator in other_waste_indicators):
|
| 366 |
return "Other Waste"
|
| 367 |
|
| 368 |
# Non-garbage detection
|
| 369 |
-
|
| 370 |
-
if any(phrase in response_lower for phrase in unable_phrases):
|
| 371 |
return "Unable to classify"
|
| 372 |
|
| 373 |
-
|
| 374 |
-
if any(
|
| 375 |
return "Unable to classify"
|
| 376 |
|
| 377 |
-
# Default
|
| 378 |
return "Unable to classify"
|
| 379 |
|
| 380 |
def _extract_reasoning(self, response: str) -> str:
|
|
|
|
| 224 |
|
| 225 |
# Look for explicit confidence scores in the response
|
| 226 |
confidence_patterns = [
|
| 227 |
+
r'\*\*confidence score\*\*[:\s]*(\d+)', # For **Confidence Score**: format
|
| 228 |
r'confidence[:\s]*(\d+)',
|
| 229 |
r'confident[:\s]*(\d+)',
|
| 230 |
r'certainty[:\s]*(\d+)',
|
|
|
|
| 243 |
return self._calculate_confidence_heuristic(response_lower, classification)
|
| 244 |
|
| 245 |
def _extract_classification(self, response: str) -> str:
|
| 246 |
+
"""Extract the main classification from the response - trust Gemma 3n intelligence more"""
|
| 247 |
response_lower = response.lower()
|
| 248 |
|
| 249 |
+
# Primary: Trust explicit category mentions from Gemma 3n
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
categories = self.knowledge.get_categories()
|
|
|
|
| 251 |
|
| 252 |
+
for category in categories:
|
| 253 |
if category.lower() in response_lower:
|
| 254 |
+
# Simple negation check
|
| 255 |
category_index = response_lower.find(category.lower())
|
| 256 |
+
context_before = response_lower[max(0, category_index - 20):category_index]
|
| 257 |
|
| 258 |
+
if not any(neg in context_before[-10:] for neg in ["not", "cannot", "isn't"]):
|
| 259 |
return category
|
| 260 |
|
| 261 |
+
# Secondary: Look for explicit mixed garbage warnings from model
|
| 262 |
+
mixed_warnings = [
|
| 263 |
+
"multiple garbage types detected",
|
| 264 |
+
"separate items",
|
| 265 |
+
"different garbage types",
|
| 266 |
+
"mixed together"
|
| 267 |
+
]
|
| 268 |
+
|
| 269 |
+
if any(warning in response_lower for warning in mixed_warnings):
|
| 270 |
+
return "Unable to classify"
|
| 271 |
|
| 272 |
+
# Tertiary: Basic material detection (simplified)
|
| 273 |
+
if any(material in response_lower for material in
|
| 274 |
+
["recyclable", "aluminum", "plastic", "glass", "metal", "cardboard"]):
|
| 275 |
+
# Check for contamination
|
| 276 |
+
if any(cont in response_lower for cont in ["obvious food", "substantial residue", "chunks", "liquids"]):
|
| 277 |
+
return "Food/Kitchen Waste"
|
| 278 |
+
return "Recyclable Waste"
|
| 279 |
|
| 280 |
+
if any(food in response_lower for food in ["food", "organic", "kitchen", "fruit", "vegetable"]):
|
|
|
|
|
|
|
| 281 |
return "Food/Kitchen Waste"
|
| 282 |
|
| 283 |
+
if any(hazard in response_lower for hazard in ["battery", "hazardous", "chemical", "toxic"]):
|
|
|
|
|
|
|
| 284 |
return "Hazardous Waste"
|
| 285 |
|
| 286 |
+
if any(other in response_lower for other in ["cigarette", "ceramic", "styrofoam"]):
|
|
|
|
|
|
|
| 287 |
return "Other Waste"
|
| 288 |
|
| 289 |
# Non-garbage detection
|
| 290 |
+
if any(non_garbage in response_lower for non_garbage in ["person", "people", "human", "living", "animal"]):
|
|
|
|
| 291 |
return "Unable to classify"
|
| 292 |
|
| 293 |
+
# Final fallback - let Gemma 3n's reasoning guide us
|
| 294 |
+
if any(unable in response_lower for unable in ["unable to classify", "cannot classify", "not garbage"]):
|
| 295 |
return "Unable to classify"
|
| 296 |
|
| 297 |
+
# Default to Unable to classify if unclear
|
| 298 |
return "Unable to classify"
|
| 299 |
|
| 300 |
def _extract_reasoning(self, response: str) -> str:
|
knowledge_base.py
CHANGED
|
@@ -7,32 +7,33 @@ IMPORTANT: You should ONLY classify items that are actually garbage/waste. If th
|
|
| 7 |
|
| 8 |
**MIXED GARBAGE HANDLING RULES:**
|
| 9 |
|
| 10 |
-
1. **
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
- Classify as "Unable to classify"
|
| 18 |
-
- Include warning: "⚠️ Warning: Multiple garbage types detected. Please separate items for proper classification."
|
| 19 |
|
| 20 |
Garbage classification standards:
|
| 21 |
|
| 22 |
**Recyclable Waste**:
|
| 23 |
-
- Paper: newspapers, magazines, books, various packaging papers, office paper, advertising flyers,
|
| 24 |
- Plastics: clean plastic bottles (#1 PETE, #2 HDPE), clean plastic containers, plastic bags, toothbrushes, cups, water bottles, plastic toys, etc. (NOT styrofoam #6 or heavily coated containers)
|
| 25 |
- Metals: clean aluminum cans, clean tin cans, toothpaste tubes, metal toys, metal stationery, nails, metal sheets, aluminum foil, etc.
|
| 26 |
- Glass: clean glass bottles and jars, broken glass pieces, mirrors, light bulbs, vacuum flasks, etc.
|
| 27 |
- Textiles: old clothing, textile products, shoes, curtains, towels, bags, etc.
|
| 28 |
-
- NOTE:
|
| 29 |
|
| 30 |
**Food/Kitchen Waste**:
|
| 31 |
- Food scraps: rice, noodles, bread, meat, fish, shrimp shells, crab shells, bones, etc.
|
| 32 |
- Fruit peels and cores: watermelon rinds, apple cores, orange peels, banana peels, nut shells, etc.
|
| 33 |
- Plants: withered branches and leaves, flowers, traditional Chinese medicine residue, etc.
|
| 34 |
- Expired food: expired canned food, cookies, candy, etc.
|
| 35 |
-
-
|
| 36 |
|
| 37 |
**Hazardous Waste**:
|
| 38 |
- Batteries: dry batteries, rechargeable batteries, button batteries, and all types of batteries
|
|
@@ -56,9 +57,7 @@ Garbage classification standards:
|
|
| 56 |
- Any item that is not intended to be discarded as waste
|
| 57 |
- Multiple different garbage types mixed together
|
| 58 |
|
| 59 |
-
Please observe the items in the image carefully according to the above classification standards
|
| 60 |
-
|
| 61 |
-
For mixed garbage situations, apply the special handling rules above and include appropriate warnings.
|
| 62 |
|
| 63 |
Format your response EXACTLY as follows:
|
| 64 |
|
|
@@ -79,9 +78,9 @@ Format your response EXACTLY as follows:
|
|
| 79 |
@staticmethod
|
| 80 |
def get_category_descriptions():
|
| 81 |
return {
|
| 82 |
-
"Recyclable Waste": "Items that can be processed and reused, including paper, plastic, metal, glass, and textiles (
|
| 83 |
-
"Food/Kitchen Waste": "Organic waste from food preparation and consumption, including
|
| 84 |
"Hazardous Waste": "Items containing harmful substances that require special disposal",
|
| 85 |
"Other Waste": "Items that don't fit into other categories and go to general waste",
|
| 86 |
-
"Unable to classify": "Items that are not garbage/waste, such as people, living things, functioning objects, or
|
| 87 |
-
}
|
|
|
|
| 7 |
|
| 8 |
**MIXED GARBAGE HANDLING RULES:**
|
| 9 |
|
| 10 |
+
1. **Food Residue Assessment**:
|
| 11 |
+
- OBVIOUSLY VISIBLE FOOD (chunks, liquids, substantial residue): Container goes to "Food/Kitchen Waste" with warning "⚠️ Tip: Empty and rinse this container first, then it can be recycled!"
|
| 12 |
+
- MINOR RESIDUE (grease stains, light film, pizza box grease spots): Container remains "Recyclable Waste"
|
| 13 |
+
|
| 14 |
+
2. **Multiple Different Garbage Types**:
|
| 15 |
+
- If image shows clearly different waste categories mixed together (electronics + organic waste, batteries + food scraps, multiple unrelated garbage types): classify as "Unable to classify" with warning "⚠️ Warning: Multiple garbage types detected. Please separate items for proper classification."
|
| 16 |
+
- Recyclable container with food is the ONLY allowed mixed situation - handle with rule 1 above
|
| 17 |
+
- ALL other mixed scenarios must be classified as "Unable to classify"
|
| 18 |
|
| 19 |
+
STRICTLY ENFORCE: Only recyclable containers with food are permitted mixed classification. Everything else mixed = "Unable to classify" with separation warning.
|
|
|
|
|
|
|
| 20 |
|
| 21 |
Garbage classification standards:
|
| 22 |
|
| 23 |
**Recyclable Waste**:
|
| 24 |
+
- Paper: newspapers, magazines, books, various packaging papers, office paper, advertising flyers, cardboard boxes with light grease stains, copy paper, etc.
|
| 25 |
- Plastics: clean plastic bottles (#1 PETE, #2 HDPE), clean plastic containers, plastic bags, toothbrushes, cups, water bottles, plastic toys, etc. (NOT styrofoam #6 or heavily coated containers)
|
| 26 |
- Metals: clean aluminum cans, clean tin cans, toothpaste tubes, metal toys, metal stationery, nails, metal sheets, aluminum foil, etc.
|
| 27 |
- Glass: clean glass bottles and jars, broken glass pieces, mirrors, light bulbs, vacuum flasks, etc.
|
| 28 |
- Textiles: old clothing, textile products, shoes, curtains, towels, bags, etc.
|
| 29 |
+
- NOTE: Light grease stains or minor residue are acceptable for recycling. Only obvious food content requires cleaning first.
|
| 30 |
|
| 31 |
**Food/Kitchen Waste**:
|
| 32 |
- Food scraps: rice, noodles, bread, meat, fish, shrimp shells, crab shells, bones, etc.
|
| 33 |
- Fruit peels and cores: watermelon rinds, apple cores, orange peels, banana peels, nut shells, etc.
|
| 34 |
- Plants: withered branches and leaves, flowers, traditional Chinese medicine residue, etc.
|
| 35 |
- Expired food: expired canned food, cookies, candy, etc.
|
| 36 |
+
- Containers with obvious food content (chunks, liquids, substantial residue)
|
| 37 |
|
| 38 |
**Hazardous Waste**:
|
| 39 |
- Batteries: dry batteries, rechargeable batteries, button batteries, and all types of batteries
|
|
|
|
| 57 |
- Any item that is not intended to be discarded as waste
|
| 58 |
- Multiple different garbage types mixed together
|
| 59 |
|
| 60 |
+
Please observe the items in the image carefully according to the above classification standards and provide accurate classification results.
|
|
|
|
|
|
|
| 61 |
|
| 62 |
Format your response EXACTLY as follows:
|
| 63 |
|
|
|
|
| 78 |
@staticmethod
|
| 79 |
def get_category_descriptions():
|
| 80 |
return {
|
| 81 |
+
"Recyclable Waste": "Items that can be processed and reused, including paper, plastic, metal, glass, and textiles (light grease stains acceptable)",
|
| 82 |
+
"Food/Kitchen Waste": "Organic waste from food preparation and consumption, including containers with obvious food content",
|
| 83 |
"Hazardous Waste": "Items containing harmful substances that require special disposal",
|
| 84 |
"Other Waste": "Items that don't fit into other categories and go to general waste",
|
| 85 |
+
"Unable to classify": "Items that are not garbage/waste, such as people, living things, functioning objects, or multiple different garbage types mixed together",
|
| 86 |
+
}
|