HMWCS commited on
Commit
1522e0a
·
verified ·
1 Parent(s): c86f4fa

Update classifier.py

Browse files
Files changed (1) hide show
  1. classifier.py +82 -116
classifier.py CHANGED
@@ -170,133 +170,99 @@ class GarbageClassifier:
170
  def _extract_classification(self, response: str) -> str:
171
  """Extract the main classification from the response"""
172
  response_lower = response.lower()
173
-
174
- # First check for explicit "Unable to classify" statements
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  unable_phrases = [
176
  "unable to classify",
177
  "cannot classify",
178
- "cannot be classified",
 
179
  ]
180
-
181
  if any(phrase in response_lower for phrase in unable_phrases):
182
  return "Unable to classify"
183
-
184
  # Check for non-garbage items (people, living things, etc.)
185
  non_garbage_indicators = [
186
- "person",
187
- "people",
188
- "human",
189
- "face",
190
- "man",
191
- "woman",
192
- "boy",
193
- "girl",
194
- "living",
195
- "alive",
196
- "animal",
197
- "pet",
198
- "dog",
199
- "cat",
200
- "bird",
201
- "elon musk",
202
- "celebrity",
203
- "famous person",
204
- "portrait",
205
- "photo of a person",
206
- "human being",
207
  ]
208
-
209
- # Check for explicit statements about not being garbage/waste
210
- non_waste_phrases = [
211
- "not garbage",
212
- "not waste",
213
- "not trash",
214
- "this is not",
215
- "does not appear to be waste",
216
- "not intended to be discarded",
217
- "not something that should be",
218
- "appears to be a person",
219
- "shows a person",
220
- "image of a person",
221
- ]
222
-
223
- # Only classify as "Unable to classify" if it's clearly not garbage
224
  if any(indicator in response_lower for indicator in non_garbage_indicators):
225
  return "Unable to classify"
226
-
227
- if any(phrase in response_lower for phrase in non_waste_phrases):
228
- return "Unable to classify"
229
-
230
- # Now look for waste categories - check exact matches first
231
- categories = self.knowledge.get_categories()
232
- waste_categories = [cat for cat in categories if cat != "Unable to classify"]
233
-
234
- for category in waste_categories:
235
- if category.lower() in response_lower:
236
- return category
237
-
238
- # Look for category keywords
239
- category_keywords = {
240
- "Recyclable Waste": [
241
- "recyclable",
242
- "recycle",
243
- "plastic",
244
- "paper",
245
- "metal",
246
- "glass",
247
- "aluminum",
248
- "foil",
249
- "can",
250
- "bottle",
251
- "cardboard",
252
- "tin",
253
- "steel",
254
- "iron",
255
- "copper",
256
- "brass",
257
- "recyclable material",
258
- ],
259
- "Food/Kitchen Waste": [
260
- "food",
261
- "kitchen",
262
- "organic",
263
- "fruit",
264
- "vegetable",
265
- "leftovers",
266
- "scraps",
267
- "peel",
268
- "core",
269
- "bone",
270
- "food waste",
271
- "organic waste",
272
- ],
273
- "Hazardous Waste": [
274
- "hazardous",
275
- "dangerous",
276
- "toxic",
277
- "battery",
278
- "chemical",
279
- "medicine",
280
- "paint",
281
- "pharmaceutical",
282
- "hazardous waste",
283
- ],
284
- "Other Waste": [
285
- "cigarette",
286
- "ceramic",
287
- "dust",
288
- "diaper",
289
- "tissue",
290
- "general waste",
291
- "other waste",
292
- ],
293
- }
294
-
295
- for category, keywords in category_keywords.items():
296
- if any(keyword in response_lower for keyword in keywords):
297
- return category
298
-
299
- # If no clear classification found, default to "Unable to classify"
300
  return "Unable to classify"
301
 
302
  def _extract_reasoning(self, response: str) -> str:
 
170
  def _extract_classification(self, response: str) -> str:
171
  """Extract the main classification from the response"""
172
  response_lower = response.lower()
173
+
174
+ # First, look for positive waste category indicators
175
+ # Check exact category matches first
176
+ categories = self.knowledge.get_categories()
177
+ waste_categories = [cat for cat in categories if cat != "Unable to classify"]
178
+
179
+ for category in waste_categories:
180
+ if category.lower() in response_lower:
181
+ # Make sure it's not in a negative context
182
+ category_index = response_lower.find(category.lower())
183
+ context_before = response_lower[max(0, category_index-30):category_index]
184
+
185
+ # Only skip if there's a clear negation right before
186
+ if not any(neg in context_before[-10:] for neg in ["not", "cannot", "isn't", "doesn't"]):
187
+ return category
188
+
189
+ # Look for strong recyclable indicators
190
+ recyclable_indicators = [
191
+ "recyclable", "recycle", "aluminum", "plastic", "glass", "metal",
192
+ "foil", "can", "bottle", "cardboard", "paper", "tin", "steel", "iron"
193
+ ]
194
+
195
+ if any(indicator in response_lower for indicator in recyclable_indicators):
196
+ # Check if it's explicitly said to be recyclable
197
+ recyclable_phrases = [
198
+ "recyclable", "can be recycled", "made of recyclable",
199
+ "recyclable material", "recyclable aluminum", "recyclable plastic"
200
+ ]
201
+ if any(phrase in response_lower for phrase in recyclable_phrases):
202
+ return "Recyclable Waste"
203
+
204
+ # Check for specific materials
205
+ if any(material in response_lower for material in ["aluminum", "foil", "metal"]):
206
+ return "Recyclable Waste"
207
+ if any(material in response_lower for material in ["plastic", "bottle"]):
208
+ return "Recyclable Waste"
209
+ if any(material in response_lower for material in ["glass", "cardboard", "paper"]):
210
+ return "Recyclable Waste"
211
+
212
+ # Look for food waste indicators
213
+ food_indicators = [
214
+ "food", "fruit", "vegetable", "organic", "kitchen waste",
215
+ "peel", "core", "scraps", "leftovers"
216
+ ]
217
+ if any(indicator in response_lower for indicator in food_indicators):
218
+ return "Food/Kitchen Waste"
219
+
220
+ # Look for hazardous waste indicators
221
+ hazardous_indicators = [
222
+ "battery", "chemical", "medicine", "paint", "toxic", "hazardous"
223
+ ]
224
+ if any(indicator in response_lower for indicator in hazardous_indicators):
225
+ return "Hazardous Waste"
226
+
227
+ # Look for other waste indicators
228
+ other_waste_indicators = [
229
+ "cigarette", "ceramic", "dust", "diaper", "tissue", "other waste"
230
+ ]
231
+ if any(indicator in response_lower for indicator in other_waste_indicators):
232
+ return "Other Waste"
233
+
234
+ # Only classify as "Unable to classify" if there are explicit indicators
235
  unable_phrases = [
236
  "unable to classify",
237
  "cannot classify",
238
+ "cannot be classified as waste",
239
+ "not garbage", "not waste", "not trash"
240
  ]
241
+
242
  if any(phrase in response_lower for phrase in unable_phrases):
243
  return "Unable to classify"
244
+
245
  # Check for non-garbage items (people, living things, etc.)
246
  non_garbage_indicators = [
247
+ "person", "people", "human", "face", "man", "woman",
248
+ "living", "alive", "animal", "pet",
249
+ "portrait", "photo of a person"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  ]
251
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  if any(indicator in response_lower for indicator in non_garbage_indicators):
253
  return "Unable to classify"
254
+
255
+ # If we found waste-related content but no clear category, try to infer
256
+ waste_related = any(word in response_lower for word in [
257
+ "waste", "trash", "garbage", "discard", "throw", "bin"
258
+ ])
259
+
260
+ if waste_related:
261
+ # Default to Other Waste if it's clearly waste but unclear category
262
+ return "Other Waste"
263
+
264
+ # If no clear classification found and no clear non-waste indicators,
265
+ # default to "Unable to classify"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  return "Unable to classify"
267
 
268
  def _extract_reasoning(self, response: str) -> str: