Spaces:
Running
on
Zero
Running
on
Zero
Update classifier.py
Browse files- classifier.py +113 -121
classifier.py
CHANGED
|
@@ -168,153 +168,139 @@ class GarbageClassifier:
|
|
| 168 |
return "Error", f"Classification failed: {str(e)}"
|
| 169 |
|
| 170 |
def _extract_classification(self, response: str) -> str:
|
| 171 |
-
"""Extract the main classification from the response
|
| 172 |
response_lower = response.lower()
|
| 173 |
|
| 174 |
-
#
|
| 175 |
-
|
| 176 |
"unable to classify",
|
| 177 |
"cannot classify",
|
| 178 |
-
"
|
| 179 |
-
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
"person",
|
| 182 |
"people",
|
| 183 |
"human",
|
| 184 |
"face",
|
| 185 |
"man",
|
| 186 |
"woman",
|
|
|
|
|
|
|
| 187 |
"living",
|
| 188 |
"alive",
|
| 189 |
"animal",
|
| 190 |
"pet",
|
| 191 |
"dog",
|
| 192 |
"cat",
|
| 193 |
-
"
|
| 194 |
-
"in use",
|
| 195 |
-
"working",
|
| 196 |
-
"operational",
|
| 197 |
-
"furniture",
|
| 198 |
-
"appliance",
|
| 199 |
-
"electronic device",
|
| 200 |
-
"building",
|
| 201 |
-
"house",
|
| 202 |
-
"room",
|
| 203 |
-
"landscape",
|
| 204 |
-
"vehicle",
|
| 205 |
-
"car",
|
| 206 |
-
"truck",
|
| 207 |
-
"bike",
|
| 208 |
"elon musk",
|
| 209 |
"celebrity",
|
| 210 |
"famous person",
|
| 211 |
"portrait",
|
| 212 |
"photo of a person",
|
|
|
|
| 213 |
]
|
| 214 |
|
| 215 |
-
# Check for explicit statements about not being garbage
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
| 217 |
"this is not",
|
| 218 |
-
"
|
| 219 |
"not intended to be discarded",
|
| 220 |
"not something that should be",
|
| 221 |
"appears to be a person",
|
| 222 |
"shows a person",
|
| 223 |
"image of a person",
|
| 224 |
-
"human being",
|
| 225 |
-
"living creature",
|
| 226 |
]
|
| 227 |
|
| 228 |
-
#
|
| 229 |
if any(indicator in response_lower for indicator in non_garbage_indicators):
|
| 230 |
return "Unable to classify"
|
| 231 |
|
| 232 |
-
|
| 233 |
-
if any(phrase in response_lower for phrase in non_garbage_phrases):
|
| 234 |
-
return "Unable to classify"
|
| 235 |
-
|
| 236 |
-
# Third priority: Look for reasoning that explicitly says it's not waste/garbage
|
| 237 |
-
reasoning_against_waste = [
|
| 238 |
-
"cannot be classified as waste",
|
| 239 |
-
"should not be classified as",
|
| 240 |
-
"not appropriate to classify",
|
| 241 |
-
"does not belong to any waste category",
|
| 242 |
-
"is not waste material",
|
| 243 |
-
]
|
| 244 |
-
|
| 245 |
-
if any(phrase in response_lower for phrase in reasoning_against_waste):
|
| 246 |
return "Unable to classify"
|
| 247 |
|
| 248 |
-
#
|
| 249 |
categories = self.knowledge.get_categories()
|
| 250 |
waste_categories = [cat for cat in categories if cat != "Unable to classify"]
|
| 251 |
|
| 252 |
-
# Look for exact category matches
|
| 253 |
for category in waste_categories:
|
| 254 |
if category.lower() in response_lower:
|
| 255 |
-
# Double check - make sure the context is positive
|
| 256 |
-
category_index = response_lower.find(category.lower())
|
| 257 |
-
context_before = response_lower[
|
| 258 |
-
max(0, category_index - 50) : category_index
|
| 259 |
-
]
|
| 260 |
-
context_after = response_lower[category_index : category_index + 50]
|
| 261 |
-
|
| 262 |
-
# If there are negation words around the category, skip it
|
| 263 |
-
negation_words = [
|
| 264 |
-
"not",
|
| 265 |
-
"cannot",
|
| 266 |
-
"unable",
|
| 267 |
-
"doesn't",
|
| 268 |
-
"isn't",
|
| 269 |
-
"won't",
|
| 270 |
-
"shouldn't",
|
| 271 |
-
]
|
| 272 |
-
if any(
|
| 273 |
-
neg in context_before or neg in context_after
|
| 274 |
-
for neg in negation_words
|
| 275 |
-
):
|
| 276 |
-
continue
|
| 277 |
-
|
| 278 |
return category
|
| 279 |
|
| 280 |
-
# Look for
|
| 281 |
category_keywords = {
|
| 282 |
"Recyclable Waste": [
|
| 283 |
"recyclable",
|
| 284 |
"recycle",
|
| 285 |
-
"plastic
|
| 286 |
-
"
|
| 287 |
-
"
|
| 288 |
-
"glass
|
| 289 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
],
|
| 291 |
"Food/Kitchen Waste": [
|
| 292 |
-
"food
|
| 293 |
-
"
|
| 294 |
-
"
|
|
|
|
|
|
|
| 295 |
"leftovers",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
"organic waste",
|
| 297 |
-
"kitchen waste",
|
| 298 |
],
|
| 299 |
"Hazardous Waste": [
|
|
|
|
|
|
|
|
|
|
| 300 |
"battery",
|
| 301 |
-
"chemical
|
| 302 |
-
"medicine
|
| 303 |
-
"paint
|
| 304 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
],
|
| 306 |
-
"Other Waste": ["cigarette butt", "ceramic piece", "dust", "general waste"],
|
| 307 |
}
|
| 308 |
|
| 309 |
for category, keywords in category_keywords.items():
|
| 310 |
if any(keyword in response_lower for keyword in keywords):
|
| 311 |
return category
|
| 312 |
|
| 313 |
-
#
|
| 314 |
return "Unable to classify"
|
| 315 |
|
| 316 |
def _extract_reasoning(self, response: str) -> str:
|
| 317 |
-
"""Extract only the reasoning content, removing all formatting markers"""
|
| 318 |
import re
|
| 319 |
|
| 320 |
# Remove all formatting markers
|
|
@@ -327,44 +313,50 @@ class GarbageClassifier:
|
|
| 327 |
"**", ""
|
| 328 |
) # Remove remaining ** markers
|
| 329 |
|
| 330 |
-
#
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
|
| 369 |
return reasoning if reasoning else "Analysis not available"
|
| 370 |
|
|
|
|
| 168 |
return "Error", f"Classification failed: {str(e)}"
|
| 169 |
|
| 170 |
def _extract_classification(self, response: str) -> str:
|
| 171 |
+
"""Extract the main classification from the response"""
|
| 172 |
response_lower = response.lower()
|
| 173 |
|
| 174 |
+
# First check for explicit "Unable to classify" statements
|
| 175 |
+
unable_phrases = [
|
| 176 |
"unable to classify",
|
| 177 |
"cannot classify",
|
| 178 |
+
"cannot be classified",
|
| 179 |
+
]
|
| 180 |
+
|
| 181 |
+
if any(phrase in response_lower for phrase in unable_phrases):
|
| 182 |
+
return "Unable to classify"
|
| 183 |
+
|
| 184 |
+
# Check for non-garbage items (people, living things, etc.)
|
| 185 |
+
non_garbage_indicators = [
|
| 186 |
"person",
|
| 187 |
"people",
|
| 188 |
"human",
|
| 189 |
"face",
|
| 190 |
"man",
|
| 191 |
"woman",
|
| 192 |
+
"boy",
|
| 193 |
+
"girl",
|
| 194 |
"living",
|
| 195 |
"alive",
|
| 196 |
"animal",
|
| 197 |
"pet",
|
| 198 |
"dog",
|
| 199 |
"cat",
|
| 200 |
+
"bird",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
"elon musk",
|
| 202 |
"celebrity",
|
| 203 |
"famous person",
|
| 204 |
"portrait",
|
| 205 |
"photo of a person",
|
| 206 |
+
"human being",
|
| 207 |
]
|
| 208 |
|
| 209 |
+
# Check for explicit statements about not being garbage/waste
|
| 210 |
+
non_waste_phrases = [
|
| 211 |
+
"not garbage",
|
| 212 |
+
"not waste",
|
| 213 |
+
"not trash",
|
| 214 |
"this is not",
|
| 215 |
+
"does not appear to be waste",
|
| 216 |
"not intended to be discarded",
|
| 217 |
"not something that should be",
|
| 218 |
"appears to be a person",
|
| 219 |
"shows a person",
|
| 220 |
"image of a person",
|
|
|
|
|
|
|
| 221 |
]
|
| 222 |
|
| 223 |
+
# Only classify as "Unable to classify" if it's clearly not garbage
|
| 224 |
if any(indicator in response_lower for indicator in non_garbage_indicators):
|
| 225 |
return "Unable to classify"
|
| 226 |
|
| 227 |
+
if any(phrase in response_lower for phrase in non_waste_phrases):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
return "Unable to classify"
|
| 229 |
|
| 230 |
+
# Now look for waste categories - check exact matches first
|
| 231 |
categories = self.knowledge.get_categories()
|
| 232 |
waste_categories = [cat for cat in categories if cat != "Unable to classify"]
|
| 233 |
|
|
|
|
| 234 |
for category in waste_categories:
|
| 235 |
if category.lower() in response_lower:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
return category
|
| 237 |
|
| 238 |
+
# Look for category keywords
|
| 239 |
category_keywords = {
|
| 240 |
"Recyclable Waste": [
|
| 241 |
"recyclable",
|
| 242 |
"recycle",
|
| 243 |
+
"plastic",
|
| 244 |
+
"paper",
|
| 245 |
+
"metal",
|
| 246 |
+
"glass",
|
| 247 |
+
"aluminum",
|
| 248 |
+
"foil",
|
| 249 |
+
"can",
|
| 250 |
+
"bottle",
|
| 251 |
+
"cardboard",
|
| 252 |
+
"tin",
|
| 253 |
+
"steel",
|
| 254 |
+
"iron",
|
| 255 |
+
"copper",
|
| 256 |
+
"brass",
|
| 257 |
+
"recyclable material",
|
| 258 |
],
|
| 259 |
"Food/Kitchen Waste": [
|
| 260 |
+
"food",
|
| 261 |
+
"kitchen",
|
| 262 |
+
"organic",
|
| 263 |
+
"fruit",
|
| 264 |
+
"vegetable",
|
| 265 |
"leftovers",
|
| 266 |
+
"scraps",
|
| 267 |
+
"peel",
|
| 268 |
+
"core",
|
| 269 |
+
"bone",
|
| 270 |
+
"food waste",
|
| 271 |
"organic waste",
|
|
|
|
| 272 |
],
|
| 273 |
"Hazardous Waste": [
|
| 274 |
+
"hazardous",
|
| 275 |
+
"dangerous",
|
| 276 |
+
"toxic",
|
| 277 |
"battery",
|
| 278 |
+
"chemical",
|
| 279 |
+
"medicine",
|
| 280 |
+
"paint",
|
| 281 |
+
"pharmaceutical",
|
| 282 |
+
"hazardous waste",
|
| 283 |
+
],
|
| 284 |
+
"Other Waste": [
|
| 285 |
+
"cigarette",
|
| 286 |
+
"ceramic",
|
| 287 |
+
"dust",
|
| 288 |
+
"diaper",
|
| 289 |
+
"tissue",
|
| 290 |
+
"general waste",
|
| 291 |
+
"other waste",
|
| 292 |
],
|
|
|
|
| 293 |
}
|
| 294 |
|
| 295 |
for category, keywords in category_keywords.items():
|
| 296 |
if any(keyword in response_lower for keyword in keywords):
|
| 297 |
return category
|
| 298 |
|
| 299 |
+
# If no clear classification found, default to "Unable to classify"
|
| 300 |
return "Unable to classify"
|
| 301 |
|
| 302 |
def _extract_reasoning(self, response: str) -> str:
|
| 303 |
+
"""Extract only the reasoning content, removing all formatting markers and classification info"""
|
| 304 |
import re
|
| 305 |
|
| 306 |
# Remove all formatting markers
|
|
|
|
| 313 |
"**", ""
|
| 314 |
) # Remove remaining ** markers
|
| 315 |
|
| 316 |
+
# Remove category names that might appear at the beginning
|
| 317 |
+
categories = self.knowledge.get_categories()
|
| 318 |
+
for category in categories:
|
| 319 |
+
if cleaned_response.strip().startswith(category):
|
| 320 |
+
cleaned_response = cleaned_response.replace(category, "", 1)
|
| 321 |
+
break
|
| 322 |
+
|
| 323 |
+
# Split into sentences and clean up
|
| 324 |
+
sentences = []
|
| 325 |
+
|
| 326 |
+
# Split by common sentence endings
|
| 327 |
+
parts = re.split(r"[.!?]\s+", cleaned_response)
|
| 328 |
+
|
| 329 |
+
for part in parts:
|
| 330 |
+
part = part.strip()
|
| 331 |
+
if not part:
|
| 332 |
+
continue
|
| 333 |
+
|
| 334 |
+
# Skip parts that are just category names
|
| 335 |
+
if part in categories:
|
| 336 |
+
continue
|
| 337 |
+
|
| 338 |
+
# Skip parts that start with category names
|
| 339 |
+
is_category_line = False
|
| 340 |
+
for category in categories:
|
| 341 |
+
if part.startswith(category):
|
| 342 |
+
is_category_line = True
|
| 343 |
+
break
|
| 344 |
+
|
| 345 |
+
if is_category_line:
|
| 346 |
+
continue
|
| 347 |
+
|
| 348 |
+
# Clean up the sentence
|
| 349 |
+
part = re.sub(
|
| 350 |
+
r"^[A-Za-z\s]+:", "", part
|
| 351 |
+
).strip() # Remove "Category:" type prefixes
|
| 352 |
+
|
| 353 |
+
if part and len(part) > 3: # Only keep meaningful content
|
| 354 |
+
sentences.append(part)
|
| 355 |
+
|
| 356 |
+
# Join sentences and ensure proper punctuation
|
| 357 |
+
reasoning = ". ".join(sentences)
|
| 358 |
+
if reasoning and not reasoning.endswith((".", "!", "?")):
|
| 359 |
+
reasoning += "."
|
| 360 |
|
| 361 |
return reasoning if reasoning else "Analysis not available"
|
| 362 |
|