HMWCS commited on
Commit
c86f4fa
·
verified ·
1 Parent(s): 8f8cb24

Update classifier.py

Browse files
Files changed (1) hide show
  1. classifier.py +113 -121
classifier.py CHANGED
@@ -168,153 +168,139 @@ class GarbageClassifier:
168
  return "Error", f"Classification failed: {str(e)}"
169
 
170
  def _extract_classification(self, response: str) -> str:
171
- """Extract the main classification from the response with enhanced logic"""
172
  response_lower = response.lower()
173
 
174
- # Strong indicators that this is NOT garbage - check these first
175
- non_garbage_indicators = [
176
  "unable to classify",
177
  "cannot classify",
178
- "not garbage",
179
- "not waste",
180
- "not trash",
 
 
 
 
 
181
  "person",
182
  "people",
183
  "human",
184
  "face",
185
  "man",
186
  "woman",
 
 
187
  "living",
188
  "alive",
189
  "animal",
190
  "pet",
191
  "dog",
192
  "cat",
193
- "functioning",
194
- "in use",
195
- "working",
196
- "operational",
197
- "furniture",
198
- "appliance",
199
- "electronic device",
200
- "building",
201
- "house",
202
- "room",
203
- "landscape",
204
- "vehicle",
205
- "car",
206
- "truck",
207
- "bike",
208
  "elon musk",
209
  "celebrity",
210
  "famous person",
211
  "portrait",
212
  "photo of a person",
 
213
  ]
214
 
215
- # Check for explicit statements about not being garbage
216
- non_garbage_phrases = [
 
 
 
217
  "this is not",
218
- "this does not appear to be",
219
  "not intended to be discarded",
220
  "not something that should be",
221
  "appears to be a person",
222
  "shows a person",
223
  "image of a person",
224
- "human being",
225
- "living creature",
226
  ]
227
 
228
- # First priority: Check for strong non-garbage indicators
229
  if any(indicator in response_lower for indicator in non_garbage_indicators):
230
  return "Unable to classify"
231
 
232
- # Second priority: Check for phrases indicating it's not garbage
233
- if any(phrase in response_lower for phrase in non_garbage_phrases):
234
- return "Unable to classify"
235
-
236
- # Third priority: Look for reasoning that explicitly says it's not waste/garbage
237
- reasoning_against_waste = [
238
- "cannot be classified as waste",
239
- "should not be classified as",
240
- "not appropriate to classify",
241
- "does not belong to any waste category",
242
- "is not waste material",
243
- ]
244
-
245
- if any(phrase in response_lower for phrase in reasoning_against_waste):
246
  return "Unable to classify"
247
 
248
- # Only if none of the above conditions are met, then look for garbage categories
249
  categories = self.knowledge.get_categories()
250
  waste_categories = [cat for cat in categories if cat != "Unable to classify"]
251
 
252
- # Look for exact category matches
253
  for category in waste_categories:
254
  if category.lower() in response_lower:
255
- # Double check - make sure the context is positive
256
- category_index = response_lower.find(category.lower())
257
- context_before = response_lower[
258
- max(0, category_index - 50) : category_index
259
- ]
260
- context_after = response_lower[category_index : category_index + 50]
261
-
262
- # If there are negation words around the category, skip it
263
- negation_words = [
264
- "not",
265
- "cannot",
266
- "unable",
267
- "doesn't",
268
- "isn't",
269
- "won't",
270
- "shouldn't",
271
- ]
272
- if any(
273
- neg in context_before or neg in context_after
274
- for neg in negation_words
275
- ):
276
- continue
277
-
278
  return category
279
 
280
- # Look for key terms only if no explicit non-garbage indicators were found
281
  category_keywords = {
282
  "Recyclable Waste": [
283
  "recyclable",
284
  "recycle",
285
- "plastic bottle",
286
- "aluminum can",
287
- "cardboard box",
288
- "glass bottle",
289
- "metal can",
 
 
 
 
 
 
 
 
 
 
290
  ],
291
  "Food/Kitchen Waste": [
292
- "food scraps",
293
- "fruit peel",
294
- "vegetable waste",
 
 
295
  "leftovers",
 
 
 
 
 
296
  "organic waste",
297
- "kitchen waste",
298
  ],
299
  "Hazardous Waste": [
 
 
 
300
  "battery",
301
- "chemical container",
302
- "medicine bottle",
303
- "paint can",
304
- "toxic material",
 
 
 
 
 
 
 
 
 
 
305
  ],
306
- "Other Waste": ["cigarette butt", "ceramic piece", "dust", "general waste"],
307
  }
308
 
309
  for category, keywords in category_keywords.items():
310
  if any(keyword in response_lower for keyword in keywords):
311
  return category
312
 
313
- # Default to "Unable to classify" if nothing clear is found
314
  return "Unable to classify"
315
 
316
  def _extract_reasoning(self, response: str) -> str:
317
- """Extract only the reasoning content, removing all formatting markers"""
318
  import re
319
 
320
  # Remove all formatting markers
@@ -327,44 +313,50 @@ class GarbageClassifier:
327
  "**", ""
328
  ) # Remove remaining ** markers
329
 
330
- # Split into lines and process
331
- lines = cleaned_response.split("\n")
332
- reasoning_parts = []
333
-
334
- for line in lines:
335
- line = line.strip()
336
- # Skip empty lines and lines that look like classification categories
337
- if line and not line in [
338
- "Recyclable Waste",
339
- "Food/Kitchen Waste",
340
- "Hazardous Waste",
341
- "Other Waste",
342
- "Unable to classify",
343
- ]:
344
- # Skip lines that are just category names
345
- if line not in self.knowledge.get_categories():
346
- reasoning_parts.append(line)
347
-
348
- # Join the reasoning parts
349
- reasoning = " ".join(reasoning_parts).strip()
350
-
351
- # If we still have structured format markers, try a different approach
352
- if reasoning.startswith("Classification:") or reasoning.startswith(
353
- "Reasoning:"
354
- ):
355
- # Split by common patterns and take the reasoning part
356
- if "Reasoning:" in reasoning:
357
- reasoning = reasoning.split("Reasoning:")[-1].strip()
358
- elif reasoning.count(":") >= 1:
359
- # Take everything after the first colon if it looks like "Classification: X Reasoning: Y"
360
- parts = reasoning.split(":", 1)
361
- if len(parts) > 1:
362
- reasoning = parts[1].strip()
363
-
364
- # Clean up any remaining artifacts
365
- reasoning = re.sub(
366
- r"^[A-Za-z\s]+:", "", reasoning
367
- ).strip() # Remove "Category:" type prefixes
 
 
 
 
 
 
368
 
369
  return reasoning if reasoning else "Analysis not available"
370
 
 
168
  return "Error", f"Classification failed: {str(e)}"
169
 
170
  def _extract_classification(self, response: str) -> str:
171
+ """Extract the main classification from the response"""
172
  response_lower = response.lower()
173
 
174
+ # First check for explicit "Unable to classify" statements
175
+ unable_phrases = [
176
  "unable to classify",
177
  "cannot classify",
178
+ "cannot be classified",
179
+ ]
180
+
181
+ if any(phrase in response_lower for phrase in unable_phrases):
182
+ return "Unable to classify"
183
+
184
+ # Check for non-garbage items (people, living things, etc.)
185
+ non_garbage_indicators = [
186
  "person",
187
  "people",
188
  "human",
189
  "face",
190
  "man",
191
  "woman",
192
+ "boy",
193
+ "girl",
194
  "living",
195
  "alive",
196
  "animal",
197
  "pet",
198
  "dog",
199
  "cat",
200
+ "bird",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  "elon musk",
202
  "celebrity",
203
  "famous person",
204
  "portrait",
205
  "photo of a person",
206
+ "human being",
207
  ]
208
 
209
+ # Check for explicit statements about not being garbage/waste
210
+ non_waste_phrases = [
211
+ "not garbage",
212
+ "not waste",
213
+ "not trash",
214
  "this is not",
215
+ "does not appear to be waste",
216
  "not intended to be discarded",
217
  "not something that should be",
218
  "appears to be a person",
219
  "shows a person",
220
  "image of a person",
 
 
221
  ]
222
 
223
+ # Only classify as "Unable to classify" if it's clearly not garbage
224
  if any(indicator in response_lower for indicator in non_garbage_indicators):
225
  return "Unable to classify"
226
 
227
+ if any(phrase in response_lower for phrase in non_waste_phrases):
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  return "Unable to classify"
229
 
230
+ # Now look for waste categories - check exact matches first
231
  categories = self.knowledge.get_categories()
232
  waste_categories = [cat for cat in categories if cat != "Unable to classify"]
233
 
 
234
  for category in waste_categories:
235
  if category.lower() in response_lower:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  return category
237
 
238
+ # Look for category keywords
239
  category_keywords = {
240
  "Recyclable Waste": [
241
  "recyclable",
242
  "recycle",
243
+ "plastic",
244
+ "paper",
245
+ "metal",
246
+ "glass",
247
+ "aluminum",
248
+ "foil",
249
+ "can",
250
+ "bottle",
251
+ "cardboard",
252
+ "tin",
253
+ "steel",
254
+ "iron",
255
+ "copper",
256
+ "brass",
257
+ "recyclable material",
258
  ],
259
  "Food/Kitchen Waste": [
260
+ "food",
261
+ "kitchen",
262
+ "organic",
263
+ "fruit",
264
+ "vegetable",
265
  "leftovers",
266
+ "scraps",
267
+ "peel",
268
+ "core",
269
+ "bone",
270
+ "food waste",
271
  "organic waste",
 
272
  ],
273
  "Hazardous Waste": [
274
+ "hazardous",
275
+ "dangerous",
276
+ "toxic",
277
  "battery",
278
+ "chemical",
279
+ "medicine",
280
+ "paint",
281
+ "pharmaceutical",
282
+ "hazardous waste",
283
+ ],
284
+ "Other Waste": [
285
+ "cigarette",
286
+ "ceramic",
287
+ "dust",
288
+ "diaper",
289
+ "tissue",
290
+ "general waste",
291
+ "other waste",
292
  ],
 
293
  }
294
 
295
  for category, keywords in category_keywords.items():
296
  if any(keyword in response_lower for keyword in keywords):
297
  return category
298
 
299
+ # If no clear classification found, default to "Unable to classify"
300
  return "Unable to classify"
301
 
302
  def _extract_reasoning(self, response: str) -> str:
303
+ """Extract only the reasoning content, removing all formatting markers and classification info"""
304
  import re
305
 
306
  # Remove all formatting markers
 
313
  "**", ""
314
  ) # Remove remaining ** markers
315
 
316
+ # Remove category names that might appear at the beginning
317
+ categories = self.knowledge.get_categories()
318
+ for category in categories:
319
+ if cleaned_response.strip().startswith(category):
320
+ cleaned_response = cleaned_response.replace(category, "", 1)
321
+ break
322
+
323
+ # Split into sentences and clean up
324
+ sentences = []
325
+
326
+ # Split by common sentence endings
327
+ parts = re.split(r"[.!?]\s+", cleaned_response)
328
+
329
+ for part in parts:
330
+ part = part.strip()
331
+ if not part:
332
+ continue
333
+
334
+ # Skip parts that are just category names
335
+ if part in categories:
336
+ continue
337
+
338
+ # Skip parts that start with category names
339
+ is_category_line = False
340
+ for category in categories:
341
+ if part.startswith(category):
342
+ is_category_line = True
343
+ break
344
+
345
+ if is_category_line:
346
+ continue
347
+
348
+ # Clean up the sentence
349
+ part = re.sub(
350
+ r"^[A-Za-z\s]+:", "", part
351
+ ).strip() # Remove "Category:" type prefixes
352
+
353
+ if part and len(part) > 3: # Only keep meaningful content
354
+ sentences.append(part)
355
+
356
+ # Join sentences and ensure proper punctuation
357
+ reasoning = ". ".join(sentences)
358
+ if reasoning and not reasoning.endswith((".", "!", "?")):
359
+ reasoning += "."
360
 
361
  return reasoning if reasoning else "Analysis not available"
362