Spaces:
Running
on
Zero
Running
on
Zero
Update classifier.py
Browse files- classifier.py +82 -116
classifier.py
CHANGED
|
@@ -170,133 +170,99 @@ class GarbageClassifier:
|
|
| 170 |
def _extract_classification(self, response: str) -> str:
|
| 171 |
"""Extract the main classification from the response"""
|
| 172 |
response_lower = response.lower()
|
| 173 |
-
|
| 174 |
-
# First
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
unable_phrases = [
|
| 176 |
"unable to classify",
|
| 177 |
"cannot classify",
|
| 178 |
-
"cannot be classified",
|
|
|
|
| 179 |
]
|
| 180 |
-
|
| 181 |
if any(phrase in response_lower for phrase in unable_phrases):
|
| 182 |
return "Unable to classify"
|
| 183 |
-
|
| 184 |
# Check for non-garbage items (people, living things, etc.)
|
| 185 |
non_garbage_indicators = [
|
| 186 |
-
"person",
|
| 187 |
-
"
|
| 188 |
-
"
|
| 189 |
-
"face",
|
| 190 |
-
"man",
|
| 191 |
-
"woman",
|
| 192 |
-
"boy",
|
| 193 |
-
"girl",
|
| 194 |
-
"living",
|
| 195 |
-
"alive",
|
| 196 |
-
"animal",
|
| 197 |
-
"pet",
|
| 198 |
-
"dog",
|
| 199 |
-
"cat",
|
| 200 |
-
"bird",
|
| 201 |
-
"elon musk",
|
| 202 |
-
"celebrity",
|
| 203 |
-
"famous person",
|
| 204 |
-
"portrait",
|
| 205 |
-
"photo of a person",
|
| 206 |
-
"human being",
|
| 207 |
]
|
| 208 |
-
|
| 209 |
-
# Check for explicit statements about not being garbage/waste
|
| 210 |
-
non_waste_phrases = [
|
| 211 |
-
"not garbage",
|
| 212 |
-
"not waste",
|
| 213 |
-
"not trash",
|
| 214 |
-
"this is not",
|
| 215 |
-
"does not appear to be waste",
|
| 216 |
-
"not intended to be discarded",
|
| 217 |
-
"not something that should be",
|
| 218 |
-
"appears to be a person",
|
| 219 |
-
"shows a person",
|
| 220 |
-
"image of a person",
|
| 221 |
-
]
|
| 222 |
-
|
| 223 |
-
# Only classify as "Unable to classify" if it's clearly not garbage
|
| 224 |
if any(indicator in response_lower for indicator in non_garbage_indicators):
|
| 225 |
return "Unable to classify"
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
# Look for category keywords
|
| 239 |
-
category_keywords = {
|
| 240 |
-
"Recyclable Waste": [
|
| 241 |
-
"recyclable",
|
| 242 |
-
"recycle",
|
| 243 |
-
"plastic",
|
| 244 |
-
"paper",
|
| 245 |
-
"metal",
|
| 246 |
-
"glass",
|
| 247 |
-
"aluminum",
|
| 248 |
-
"foil",
|
| 249 |
-
"can",
|
| 250 |
-
"bottle",
|
| 251 |
-
"cardboard",
|
| 252 |
-
"tin",
|
| 253 |
-
"steel",
|
| 254 |
-
"iron",
|
| 255 |
-
"copper",
|
| 256 |
-
"brass",
|
| 257 |
-
"recyclable material",
|
| 258 |
-
],
|
| 259 |
-
"Food/Kitchen Waste": [
|
| 260 |
-
"food",
|
| 261 |
-
"kitchen",
|
| 262 |
-
"organic",
|
| 263 |
-
"fruit",
|
| 264 |
-
"vegetable",
|
| 265 |
-
"leftovers",
|
| 266 |
-
"scraps",
|
| 267 |
-
"peel",
|
| 268 |
-
"core",
|
| 269 |
-
"bone",
|
| 270 |
-
"food waste",
|
| 271 |
-
"organic waste",
|
| 272 |
-
],
|
| 273 |
-
"Hazardous Waste": [
|
| 274 |
-
"hazardous",
|
| 275 |
-
"dangerous",
|
| 276 |
-
"toxic",
|
| 277 |
-
"battery",
|
| 278 |
-
"chemical",
|
| 279 |
-
"medicine",
|
| 280 |
-
"paint",
|
| 281 |
-
"pharmaceutical",
|
| 282 |
-
"hazardous waste",
|
| 283 |
-
],
|
| 284 |
-
"Other Waste": [
|
| 285 |
-
"cigarette",
|
| 286 |
-
"ceramic",
|
| 287 |
-
"dust",
|
| 288 |
-
"diaper",
|
| 289 |
-
"tissue",
|
| 290 |
-
"general waste",
|
| 291 |
-
"other waste",
|
| 292 |
-
],
|
| 293 |
-
}
|
| 294 |
-
|
| 295 |
-
for category, keywords in category_keywords.items():
|
| 296 |
-
if any(keyword in response_lower for keyword in keywords):
|
| 297 |
-
return category
|
| 298 |
-
|
| 299 |
-
# If no clear classification found, default to "Unable to classify"
|
| 300 |
return "Unable to classify"
|
| 301 |
|
| 302 |
def _extract_reasoning(self, response: str) -> str:
|
|
|
|
| 170 |
def _extract_classification(self, response: str) -> str:
|
| 171 |
"""Extract the main classification from the response"""
|
| 172 |
response_lower = response.lower()
|
| 173 |
+
|
| 174 |
+
# First, look for positive waste category indicators
|
| 175 |
+
# Check exact category matches first
|
| 176 |
+
categories = self.knowledge.get_categories()
|
| 177 |
+
waste_categories = [cat for cat in categories if cat != "Unable to classify"]
|
| 178 |
+
|
| 179 |
+
for category in waste_categories:
|
| 180 |
+
if category.lower() in response_lower:
|
| 181 |
+
# Make sure it's not in a negative context
|
| 182 |
+
category_index = response_lower.find(category.lower())
|
| 183 |
+
context_before = response_lower[max(0, category_index-30):category_index]
|
| 184 |
+
|
| 185 |
+
# Only skip if there's a clear negation right before
|
| 186 |
+
if not any(neg in context_before[-10:] for neg in ["not", "cannot", "isn't", "doesn't"]):
|
| 187 |
+
return category
|
| 188 |
+
|
| 189 |
+
# Look for strong recyclable indicators
|
| 190 |
+
recyclable_indicators = [
|
| 191 |
+
"recyclable", "recycle", "aluminum", "plastic", "glass", "metal",
|
| 192 |
+
"foil", "can", "bottle", "cardboard", "paper", "tin", "steel", "iron"
|
| 193 |
+
]
|
| 194 |
+
|
| 195 |
+
if any(indicator in response_lower for indicator in recyclable_indicators):
|
| 196 |
+
# Check if it's explicitly said to be recyclable
|
| 197 |
+
recyclable_phrases = [
|
| 198 |
+
"recyclable", "can be recycled", "made of recyclable",
|
| 199 |
+
"recyclable material", "recyclable aluminum", "recyclable plastic"
|
| 200 |
+
]
|
| 201 |
+
if any(phrase in response_lower for phrase in recyclable_phrases):
|
| 202 |
+
return "Recyclable Waste"
|
| 203 |
+
|
| 204 |
+
# Check for specific materials
|
| 205 |
+
if any(material in response_lower for material in ["aluminum", "foil", "metal"]):
|
| 206 |
+
return "Recyclable Waste"
|
| 207 |
+
if any(material in response_lower for material in ["plastic", "bottle"]):
|
| 208 |
+
return "Recyclable Waste"
|
| 209 |
+
if any(material in response_lower for material in ["glass", "cardboard", "paper"]):
|
| 210 |
+
return "Recyclable Waste"
|
| 211 |
+
|
| 212 |
+
# Look for food waste indicators
|
| 213 |
+
food_indicators = [
|
| 214 |
+
"food", "fruit", "vegetable", "organic", "kitchen waste",
|
| 215 |
+
"peel", "core", "scraps", "leftovers"
|
| 216 |
+
]
|
| 217 |
+
if any(indicator in response_lower for indicator in food_indicators):
|
| 218 |
+
return "Food/Kitchen Waste"
|
| 219 |
+
|
| 220 |
+
# Look for hazardous waste indicators
|
| 221 |
+
hazardous_indicators = [
|
| 222 |
+
"battery", "chemical", "medicine", "paint", "toxic", "hazardous"
|
| 223 |
+
]
|
| 224 |
+
if any(indicator in response_lower for indicator in hazardous_indicators):
|
| 225 |
+
return "Hazardous Waste"
|
| 226 |
+
|
| 227 |
+
# Look for other waste indicators
|
| 228 |
+
other_waste_indicators = [
|
| 229 |
+
"cigarette", "ceramic", "dust", "diaper", "tissue", "other waste"
|
| 230 |
+
]
|
| 231 |
+
if any(indicator in response_lower for indicator in other_waste_indicators):
|
| 232 |
+
return "Other Waste"
|
| 233 |
+
|
| 234 |
+
# Only classify as "Unable to classify" if there are explicit indicators
|
| 235 |
unable_phrases = [
|
| 236 |
"unable to classify",
|
| 237 |
"cannot classify",
|
| 238 |
+
"cannot be classified as waste",
|
| 239 |
+
"not garbage", "not waste", "not trash"
|
| 240 |
]
|
| 241 |
+
|
| 242 |
if any(phrase in response_lower for phrase in unable_phrases):
|
| 243 |
return "Unable to classify"
|
| 244 |
+
|
| 245 |
# Check for non-garbage items (people, living things, etc.)
|
| 246 |
non_garbage_indicators = [
|
| 247 |
+
"person", "people", "human", "face", "man", "woman",
|
| 248 |
+
"living", "alive", "animal", "pet",
|
| 249 |
+
"portrait", "photo of a person"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
]
|
| 251 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
if any(indicator in response_lower for indicator in non_garbage_indicators):
|
| 253 |
return "Unable to classify"
|
| 254 |
+
|
| 255 |
+
# If we found waste-related content but no clear category, try to infer
|
| 256 |
+
waste_related = any(word in response_lower for word in [
|
| 257 |
+
"waste", "trash", "garbage", "discard", "throw", "bin"
|
| 258 |
+
])
|
| 259 |
+
|
| 260 |
+
if waste_related:
|
| 261 |
+
# Default to Other Waste if it's clearly waste but unclear category
|
| 262 |
+
return "Other Waste"
|
| 263 |
+
|
| 264 |
+
# If no clear classification found and no clear non-waste indicators,
|
| 265 |
+
# default to "Unable to classify"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
return "Unable to classify"
|
| 267 |
|
| 268 |
def _extract_reasoning(self, response: str) -> str:
|