Spaces:
Sleeping
Sleeping
Deploy waste classification backend with ML model
Browse files- Dockerfile +31 -0
- backend/Dockerfile +37 -0
- backend/README.md +249 -0
- backend/inference_service.py +288 -0
- backend/requirements.txt +4 -0
- ml/README.md +223 -0
- ml/dataset_prep.py +159 -0
- ml/predict.py +153 -0
- ml/requirements.txt +10 -0
- ml/retrain.py +232 -0
- ml/train.py +326 -0
Dockerfile
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 7 |
+
libglib2.0-0 \
|
| 8 |
+
libsm6 \
|
| 9 |
+
libxext6 \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Copy requirements
|
| 13 |
+
COPY backend/requirements.txt /app/backend/requirements.txt
|
| 14 |
+
COPY ml/requirements.txt /app/ml/requirements.txt
|
| 15 |
+
|
| 16 |
+
# Install Python dependencies
|
| 17 |
+
RUN pip install --no-cache-dir -r /app/backend/requirements.txt
|
| 18 |
+
RUN pip install --no-cache-dir -r /app/ml/requirements.txt
|
| 19 |
+
|
| 20 |
+
# Copy code
|
| 21 |
+
COPY backend/ /app/backend/
|
| 22 |
+
COPY ml/ /app/ml/
|
| 23 |
+
|
| 24 |
+
# Create directories
|
| 25 |
+
RUN mkdir -p /app/ml/models /app/ml/data/retraining
|
| 26 |
+
|
| 27 |
+
# Expose port 7860 (Hugging Face requirement)
|
| 28 |
+
EXPOSE 7860
|
| 29 |
+
|
| 30 |
+
# Start FastAPI on port 7860
|
| 31 |
+
CMD ["uvicorn", "backend.inference_service:app", "--host", "0.0.0.0", "--port", "7860"]
|
backend/Dockerfile
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
libglib2.0-0 \
|
| 8 |
+
libsm6 \
|
| 9 |
+
libxext6 \
|
| 10 |
+
libxrender-dev \
|
| 11 |
+
libgomp1 \
|
| 12 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
+
|
| 14 |
+
# Copy ML requirements
|
| 15 |
+
COPY ml/requirements.txt /app/ml/requirements.txt
|
| 16 |
+
RUN pip install --no-cache-dir -r /app/ml/requirements.txt
|
| 17 |
+
|
| 18 |
+
# Copy backend requirements
|
| 19 |
+
COPY backend/requirements.txt /app/backend/requirements.txt
|
| 20 |
+
RUN pip install --no-cache-dir -r /app/backend/requirements.txt
|
| 21 |
+
|
| 22 |
+
# Copy application code
|
| 23 |
+
COPY ml/ /app/ml/
|
| 24 |
+
COPY backend/ /app/backend/
|
| 25 |
+
|
| 26 |
+
# Create directories
|
| 27 |
+
RUN mkdir -p /app/ml/models /app/ml/data/retraining
|
| 28 |
+
|
| 29 |
+
# Expose port
|
| 30 |
+
EXPOSE 8000
|
| 31 |
+
|
| 32 |
+
# Health check
|
| 33 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
| 34 |
+
CMD python -c "import requests; requests.get('http://localhost:8000/health')"
|
| 35 |
+
|
| 36 |
+
# Run application
|
| 37 |
+
CMD ["python", "backend/inference_service.py"]
|
backend/README.md
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Backend Inference Service
|
| 2 |
+
|
| 3 |
+
FastAPI-based REST API for waste classification inference and feedback collection.
|
| 4 |
+
|
| 5 |
+
## Setup
|
| 6 |
+
|
| 7 |
+
### 1. Install Dependencies
|
| 8 |
+
|
| 9 |
+
\`\`\`bash
|
| 10 |
+
pip install -r backend/requirements.txt
|
| 11 |
+
pip install -r ml/requirements.txt
|
| 12 |
+
\`\`\`
|
| 13 |
+
|
| 14 |
+
### 2. Train or Download Model
|
| 15 |
+
|
| 16 |
+
Ensure you have a trained model at `ml/models/best_model.pth`:
|
| 17 |
+
|
| 18 |
+
\`\`\`bash
|
| 19 |
+
# Train a model
|
| 20 |
+
python ml/train.py
|
| 21 |
+
|
| 22 |
+
# Or download a pretrained model (if available)
|
| 23 |
+
# Place it in ml/models/best_model.pth
|
| 24 |
+
\`\`\`
|
| 25 |
+
|
| 26 |
+
### 3. Start Service
|
| 27 |
+
|
| 28 |
+
\`\`\`bash
|
| 29 |
+
# Development
|
| 30 |
+
python backend/inference_service.py
|
| 31 |
+
|
| 32 |
+
# Production with Gunicorn
|
| 33 |
+
gunicorn backend.inference_service:app -w 4 -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:8000
|
| 34 |
+
\`\`\`
|
| 35 |
+
|
| 36 |
+
Service will be available at `http://localhost:8000`
|
| 37 |
+
|
| 38 |
+
## API Endpoints
|
| 39 |
+
|
| 40 |
+
### Health Check
|
| 41 |
+
|
| 42 |
+
\`\`\`bash
|
| 43 |
+
GET /
|
| 44 |
+
GET /health
|
| 45 |
+
\`\`\`
|
| 46 |
+
|
| 47 |
+
Response:
|
| 48 |
+
\`\`\`json
|
| 49 |
+
{
|
| 50 |
+
"status": "healthy",
|
| 51 |
+
"model_loaded": true,
|
| 52 |
+
"timestamp": "2024-01-01T00:00:00"
|
| 53 |
+
}
|
| 54 |
+
\`\`\`
|
| 55 |
+
|
| 56 |
+
### Predict
|
| 57 |
+
|
| 58 |
+
\`\`\`bash
|
| 59 |
+
POST /predict
|
| 60 |
+
Content-Type: application/json
|
| 61 |
+
|
| 62 |
+
{
|
| 63 |
+
"image": "data:image/jpeg;base64,/9j/4AAQ..."
|
| 64 |
+
}
|
| 65 |
+
\`\`\`
|
| 66 |
+
|
| 67 |
+
Response:
|
| 68 |
+
\`\`\`json
|
| 69 |
+
{
|
| 70 |
+
"category": "recyclable",
|
| 71 |
+
"confidence": 0.95,
|
| 72 |
+
"probabilities": {
|
| 73 |
+
"recyclable": 0.95,
|
| 74 |
+
"organic": 0.02,
|
| 75 |
+
"wet-waste": 0.01,
|
| 76 |
+
"dry-waste": 0.01,
|
| 77 |
+
"ewaste": 0.005,
|
| 78 |
+
"hazardous": 0.003,
|
| 79 |
+
"landfill": 0.002
|
| 80 |
+
},
|
| 81 |
+
"timestamp": 1704067200000
|
| 82 |
+
}
|
| 83 |
+
\`\`\`
|
| 84 |
+
|
| 85 |
+
### Feedback
|
| 86 |
+
|
| 87 |
+
\`\`\`bash
|
| 88 |
+
POST /feedback
|
| 89 |
+
Content-Type: application/json
|
| 90 |
+
|
| 91 |
+
{
|
| 92 |
+
"image": "data:image/jpeg;base64,/9j/4AAQ...",
|
| 93 |
+
"predicted_category": "recyclable",
|
| 94 |
+
"corrected_category": "organic",
|
| 95 |
+
"confidence": 0.75
|
| 96 |
+
}
|
| 97 |
+
\`\`\`
|
| 98 |
+
|
| 99 |
+
Response:
|
| 100 |
+
\`\`\`json
|
| 101 |
+
{
|
| 102 |
+
"status": "success",
|
| 103 |
+
"message": "Feedback saved for retraining",
|
| 104 |
+
"saved_path": "ml/data/retraining/organic/feedback_20240101_120000.jpg"
|
| 105 |
+
}
|
| 106 |
+
\`\`\`
|
| 107 |
+
|
| 108 |
+
### Trigger Retraining
|
| 109 |
+
|
| 110 |
+
\`\`\`bash
|
| 111 |
+
POST /retrain
|
| 112 |
+
Authorization: Bearer <ADMIN_API_KEY>
|
| 113 |
+
\`\`\`
|
| 114 |
+
|
| 115 |
+
Response:
|
| 116 |
+
\`\`\`json
|
| 117 |
+
{
|
| 118 |
+
"status": "started",
|
| 119 |
+
"message": "Retraining initiated with 150 new samples",
|
| 120 |
+
"feedback_count": 150
|
| 121 |
+
}
|
| 122 |
+
\`\`\`
|
| 123 |
+
|
| 124 |
+
### Retraining Status
|
| 125 |
+
|
| 126 |
+
\`\`\`bash
|
| 127 |
+
GET /retrain/status
|
| 128 |
+
\`\`\`
|
| 129 |
+
|
| 130 |
+
Response:
|
| 131 |
+
\`\`\`json
|
| 132 |
+
{
|
| 133 |
+
"status": "success",
|
| 134 |
+
"total_retrains": 3,
|
| 135 |
+
"events": [...],
|
| 136 |
+
"latest": {
|
| 137 |
+
"version": 3,
|
| 138 |
+
"timestamp": "2024-01-01T00:00:00",
|
| 139 |
+
"accuracy": 92.5,
|
| 140 |
+
"improvement": 2.3,
|
| 141 |
+
"new_samples": 150
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
\`\`\`
|
| 145 |
+
|
| 146 |
+
### Statistics
|
| 147 |
+
|
| 148 |
+
\`\`\`bash
|
| 149 |
+
GET /stats
|
| 150 |
+
\`\`\`
|
| 151 |
+
|
| 152 |
+
Response:
|
| 153 |
+
\`\`\`json
|
| 154 |
+
{
|
| 155 |
+
"model_loaded": true,
|
| 156 |
+
"categories": ["recyclable", "organic", ...],
|
| 157 |
+
"feedback_samples": 150,
|
| 158 |
+
"feedback_by_category": {
|
| 159 |
+
"recyclable": 45,
|
| 160 |
+
"organic": 38,
|
| 161 |
+
...
|
| 162 |
+
}
|
| 163 |
+
}
|
| 164 |
+
\`\`\`
|
| 165 |
+
|
| 166 |
+
## Docker Deployment
|
| 167 |
+
|
| 168 |
+
### Build and Run
|
| 169 |
+
|
| 170 |
+
\`\`\`bash
|
| 171 |
+
# Build image
|
| 172 |
+
docker build -f backend/Dockerfile -t waste-classification-api .
|
| 173 |
+
|
| 174 |
+
# Run container
|
| 175 |
+
docker run -p 8000:8000 \
|
| 176 |
+
-v $(pwd)/ml/models:/app/ml/models \
|
| 177 |
+
-v $(pwd)/ml/data:/app/ml/data \
|
| 178 |
+
waste-classification-api
|
| 179 |
+
\`\`\`
|
| 180 |
+
|
| 181 |
+
### Using Docker Compose
|
| 182 |
+
|
| 183 |
+
\`\`\`bash
|
| 184 |
+
# Start all services
|
| 185 |
+
docker-compose up -d
|
| 186 |
+
|
| 187 |
+
# View logs
|
| 188 |
+
docker-compose logs -f
|
| 189 |
+
|
| 190 |
+
# Stop services
|
| 191 |
+
docker-compose down
|
| 192 |
+
\`\`\`
|
| 193 |
+
|
| 194 |
+
## Environment Variables
|
| 195 |
+
|
| 196 |
+
- `PORT`: Server port (default: 8000)
|
| 197 |
+
- `ADMIN_API_KEY`: Admin key for retraining endpoint
|
| 198 |
+
|
| 199 |
+
## Performance
|
| 200 |
+
|
| 201 |
+
- **Inference Time**: ~50ms per image (CPU)
|
| 202 |
+
- **Throughput**: ~20 requests/second (single worker)
|
| 203 |
+
- **Memory**: ~500MB with model loaded
|
| 204 |
+
- **Scaling**: Deploy multiple workers for higher throughput
|
| 205 |
+
|
| 206 |
+
## Production Deployment
|
| 207 |
+
|
| 208 |
+
### Railway / Render
|
| 209 |
+
|
| 210 |
+
1. Connect your repository
|
| 211 |
+
2. Set build command: `pip install -r backend/requirements.txt -r ml/requirements.txt`
|
| 212 |
+
3. Set start command: `python backend/inference_service.py`
|
| 213 |
+
4. Set environment variables
|
| 214 |
+
5. Deploy
|
| 215 |
+
|
| 216 |
+
### AWS EC2
|
| 217 |
+
|
| 218 |
+
1. Launch EC2 instance (t3.medium or higher)
|
| 219 |
+
2. Install Docker
|
| 220 |
+
3. Clone repository
|
| 221 |
+
4. Run with Docker Compose
|
| 222 |
+
5. Configure security group (port 8000)
|
| 223 |
+
6. Set up SSL with Nginx reverse proxy
|
| 224 |
+
|
| 225 |
+
### Vercel (Not Recommended)
|
| 226 |
+
|
| 227 |
+
FastAPI with ML models exceeds serverless function limits. Use Railway, Render, or AWS EC2 instead.
|
| 228 |
+
|
| 229 |
+
## Monitoring
|
| 230 |
+
|
| 231 |
+
Add application monitoring:
|
| 232 |
+
|
| 233 |
+
\`\`\`python
|
| 234 |
+
from prometheus_fastapi_instrumentator import Instrumentator
|
| 235 |
+
|
| 236 |
+
Instrumentator().instrument(app).expose(app)
|
| 237 |
+
\`\`\`
|
| 238 |
+
|
| 239 |
+
Access metrics at `/metrics`
|
| 240 |
+
|
| 241 |
+
## Security
|
| 242 |
+
|
| 243 |
+
- Add rate limiting with `slowapi`
|
| 244 |
+
- Implement proper authentication
|
| 245 |
+
- Validate image sizes and formats
|
| 246 |
+
- Use HTTPS in production
|
| 247 |
+
- Restrict CORS origins
|
| 248 |
+
- Sanitize file uploads
|
| 249 |
+
\`\`\`
|
backend/inference_service.py
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
FastAPI inference service for waste classification
|
| 3 |
+
Provides REST API for predictions, feedback collection, and retraining
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
| 7 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
+
from pydantic import BaseModel
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
import base64
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
import json
|
| 13 |
+
import sys
|
| 14 |
+
import os
|
| 15 |
+
|
| 16 |
+
# Add ML directory to path
|
| 17 |
+
sys.path.append(str(Path(__file__).parent.parent))
|
| 18 |
+
|
| 19 |
+
from ml.predict import WasteClassifier
|
| 20 |
+
from ml.retrain import retrain_model
|
| 21 |
+
|
| 22 |
+
app = FastAPI(
|
| 23 |
+
title="AI Waste Segregation API",
|
| 24 |
+
description="ML inference service for waste classification",
|
| 25 |
+
version="1.0.0"
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# CORS middleware
|
| 29 |
+
app.add_middleware(
|
| 30 |
+
CORSMiddleware,
|
| 31 |
+
allow_origins=["*"], # Configure appropriately for production
|
| 32 |
+
allow_credentials=True,
|
| 33 |
+
allow_methods=["*"],
|
| 34 |
+
allow_headers=["*"],
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# Global classifier instance
|
| 38 |
+
classifier = None
|
| 39 |
+
MODEL_PATH = Path(__file__).parent.parent / "ml" / "models" / "best_model.pth"
|
| 40 |
+
RETRAINING_DIR = Path(__file__).parent.parent / "ml" / "data" / "retraining"
|
| 41 |
+
|
| 42 |
+
class PredictionRequest(BaseModel):
|
| 43 |
+
image: str # Base64 encoded image
|
| 44 |
+
|
| 45 |
+
class PredictionResponse(BaseModel):
|
| 46 |
+
category: str
|
| 47 |
+
confidence: float
|
| 48 |
+
probabilities: dict
|
| 49 |
+
timestamp: int
|
| 50 |
+
|
| 51 |
+
class FeedbackRequest(BaseModel):
|
| 52 |
+
image: str
|
| 53 |
+
predicted_category: str
|
| 54 |
+
corrected_category: str
|
| 55 |
+
confidence: float
|
| 56 |
+
|
| 57 |
+
class FeedbackResponse(BaseModel):
|
| 58 |
+
status: str
|
| 59 |
+
message: str
|
| 60 |
+
saved_path: str
|
| 61 |
+
|
| 62 |
+
@app.on_event("startup")
|
| 63 |
+
async def startup_event():
|
| 64 |
+
"""Load ML model on startup"""
|
| 65 |
+
global classifier
|
| 66 |
+
|
| 67 |
+
if not MODEL_PATH.exists():
|
| 68 |
+
print(f"Warning: Model not found at {MODEL_PATH}")
|
| 69 |
+
print("Please train a model first using: python ml/train.py")
|
| 70 |
+
return
|
| 71 |
+
|
| 72 |
+
try:
|
| 73 |
+
classifier = WasteClassifier(str(MODEL_PATH))
|
| 74 |
+
print(f"Model loaded successfully from {MODEL_PATH}")
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(f"Error loading model: {e}")
|
| 77 |
+
|
| 78 |
+
@app.get("/")
|
| 79 |
+
async def root():
|
| 80 |
+
"""Health check endpoint"""
|
| 81 |
+
return {
|
| 82 |
+
"status": "online",
|
| 83 |
+
"service": "AI Waste Segregation API",
|
| 84 |
+
"model_loaded": classifier is not None,
|
| 85 |
+
"version": "1.0.0"
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
@app.get("/health")
|
| 89 |
+
async def health():
|
| 90 |
+
"""Detailed health check"""
|
| 91 |
+
return {
|
| 92 |
+
"status": "healthy",
|
| 93 |
+
"model_loaded": classifier is not None,
|
| 94 |
+
"model_path": str(MODEL_PATH),
|
| 95 |
+
"timestamp": datetime.now().isoformat()
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
@app.post("/predict", response_model=PredictionResponse)
|
| 99 |
+
async def predict(request: PredictionRequest):
|
| 100 |
+
"""
|
| 101 |
+
Predict waste category from image
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
request: PredictionRequest with base64 encoded image
|
| 105 |
+
|
| 106 |
+
Returns:
|
| 107 |
+
PredictionResponse with category, confidence, and probabilities
|
| 108 |
+
"""
|
| 109 |
+
if classifier is None:
|
| 110 |
+
raise HTTPException(
|
| 111 |
+
status_code=503,
|
| 112 |
+
detail="Model not loaded. Please train a model first."
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
try:
|
| 116 |
+
# Perform prediction
|
| 117 |
+
result = classifier.predict(request.image)
|
| 118 |
+
|
| 119 |
+
return PredictionResponse(
|
| 120 |
+
category=result['category'],
|
| 121 |
+
confidence=result['confidence'],
|
| 122 |
+
probabilities=result['probabilities'],
|
| 123 |
+
timestamp=result['timestamp']
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
except Exception as e:
|
| 127 |
+
print(f"Prediction error: {e}")
|
| 128 |
+
raise HTTPException(
|
| 129 |
+
status_code=500,
|
| 130 |
+
detail=f"Prediction failed: {str(e)}"
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
@app.post("/feedback", response_model=FeedbackResponse)
|
| 134 |
+
async def save_feedback(request: FeedbackRequest):
|
| 135 |
+
"""
|
| 136 |
+
Save user feedback for continuous learning
|
| 137 |
+
|
| 138 |
+
Args:
|
| 139 |
+
request: FeedbackRequest with image and corrected category
|
| 140 |
+
|
| 141 |
+
Returns:
|
| 142 |
+
FeedbackResponse with save status
|
| 143 |
+
"""
|
| 144 |
+
try:
|
| 145 |
+
# Create retraining directory for corrected category
|
| 146 |
+
category_dir = RETRAINING_DIR / request.corrected_category
|
| 147 |
+
category_dir.mkdir(parents=True, exist_ok=True)
|
| 148 |
+
|
| 149 |
+
# Generate unique filename
|
| 150 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
| 151 |
+
filename = f"feedback_{timestamp}.jpg"
|
| 152 |
+
filepath = category_dir / filename
|
| 153 |
+
|
| 154 |
+
# Decode and save image
|
| 155 |
+
if request.image.startswith('data:image'):
|
| 156 |
+
image_data = request.image.split(',')[1]
|
| 157 |
+
else:
|
| 158 |
+
image_data = request.image
|
| 159 |
+
|
| 160 |
+
image_bytes = base64.b64decode(image_data)
|
| 161 |
+
|
| 162 |
+
with open(filepath, 'wb') as f:
|
| 163 |
+
f.write(image_bytes)
|
| 164 |
+
|
| 165 |
+
# Save metadata
|
| 166 |
+
metadata = {
|
| 167 |
+
'timestamp': timestamp,
|
| 168 |
+
'predicted_category': request.predicted_category,
|
| 169 |
+
'corrected_category': request.corrected_category,
|
| 170 |
+
'confidence': request.confidence,
|
| 171 |
+
'saved_at': datetime.now().isoformat()
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
metadata_path = category_dir / f"feedback_{timestamp}.json"
|
| 175 |
+
with open(metadata_path, 'w') as f:
|
| 176 |
+
json.dump(metadata, f, indent=2)
|
| 177 |
+
|
| 178 |
+
print(f"Feedback saved: {request.predicted_category} -> {request.corrected_category}")
|
| 179 |
+
|
| 180 |
+
return FeedbackResponse(
|
| 181 |
+
status="success",
|
| 182 |
+
message="Feedback saved for retraining",
|
| 183 |
+
saved_path=str(filepath)
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
except Exception as e:
|
| 187 |
+
print(f"Feedback save error: {e}")
|
| 188 |
+
raise HTTPException(
|
| 189 |
+
status_code=500,
|
| 190 |
+
detail=f"Failed to save feedback: {str(e)}"
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
@app.post("/retrain")
|
| 194 |
+
async def trigger_retrain(background_tasks: BackgroundTasks):
|
| 195 |
+
"""
|
| 196 |
+
Trigger model retraining with accumulated feedback
|
| 197 |
+
Runs as background task to avoid timeout
|
| 198 |
+
"""
|
| 199 |
+
|
| 200 |
+
# Check if there's feedback to retrain on
|
| 201 |
+
if not RETRAINING_DIR.exists():
|
| 202 |
+
raise HTTPException(
|
| 203 |
+
status_code=400,
|
| 204 |
+
detail="No feedback data available for retraining"
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
feedback_count = sum(1 for _ in RETRAINING_DIR.rglob('*.jpg'))
|
| 208 |
+
|
| 209 |
+
if feedback_count == 0:
|
| 210 |
+
raise HTTPException(
|
| 211 |
+
status_code=400,
|
| 212 |
+
detail="No feedback samples found for retraining"
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
# Add retraining to background tasks
|
| 216 |
+
background_tasks.add_task(retrain_model)
|
| 217 |
+
|
| 218 |
+
return {
|
| 219 |
+
"status": "started",
|
| 220 |
+
"message": f"Retraining initiated with {feedback_count} new samples",
|
| 221 |
+
"feedback_count": feedback_count
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
@app.get("/retrain/status")
|
| 225 |
+
async def get_retrain_status():
|
| 226 |
+
"""Get retraining history and status"""
|
| 227 |
+
|
| 228 |
+
log_file = Path(__file__).parent.parent / "ml" / "models" / "retraining_log.json"
|
| 229 |
+
|
| 230 |
+
if not log_file.exists():
|
| 231 |
+
return {
|
| 232 |
+
"status": "no_history",
|
| 233 |
+
"message": "No retraining history available",
|
| 234 |
+
"events": []
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
try:
|
| 238 |
+
with open(log_file, 'r') as f:
|
| 239 |
+
log = json.load(f)
|
| 240 |
+
|
| 241 |
+
return {
|
| 242 |
+
"status": "success",
|
| 243 |
+
"total_retrains": len(log),
|
| 244 |
+
"events": log[-10:], # Last 10 events
|
| 245 |
+
"latest": log[-1] if log else None
|
| 246 |
+
}
|
| 247 |
+
except Exception as e:
|
| 248 |
+
raise HTTPException(
|
| 249 |
+
status_code=500,
|
| 250 |
+
detail=f"Failed to read retraining log: {str(e)}"
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
@app.get("/stats")
|
| 254 |
+
async def get_stats():
|
| 255 |
+
"""Get system statistics"""
|
| 256 |
+
|
| 257 |
+
# Count feedback samples
|
| 258 |
+
feedback_count = 0
|
| 259 |
+
feedback_by_category = {}
|
| 260 |
+
|
| 261 |
+
if RETRAINING_DIR.exists():
|
| 262 |
+
for category in classifier.categories if classifier else []:
|
| 263 |
+
category_dir = RETRAINING_DIR / category
|
| 264 |
+
if category_dir.exists():
|
| 265 |
+
count = len(list(category_dir.glob('*.jpg')))
|
| 266 |
+
feedback_by_category[category] = count
|
| 267 |
+
feedback_count += count
|
| 268 |
+
|
| 269 |
+
return {
|
| 270 |
+
"model_loaded": classifier is not None,
|
| 271 |
+
"categories": classifier.categories if classifier else [],
|
| 272 |
+
"feedback_samples": feedback_count,
|
| 273 |
+
"feedback_by_category": feedback_by_category,
|
| 274 |
+
"model_path": str(MODEL_PATH),
|
| 275 |
+
"model_exists": MODEL_PATH.exists()
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
if __name__ == "__main__":
|
| 279 |
+
import uvicorn
|
| 280 |
+
|
| 281 |
+
port = int(os.getenv("PORT", 7860))
|
| 282 |
+
|
| 283 |
+
uvicorn.run(
|
| 284 |
+
"inference_service:app",
|
| 285 |
+
host="0.0.0.0",
|
| 286 |
+
port=port,
|
| 287 |
+
reload=True
|
| 288 |
+
)
|
backend/requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi>=0.104.0
|
| 2 |
+
uvicorn[standard]>=0.24.0
|
| 3 |
+
pydantic>=2.4.0
|
| 4 |
+
python-multipart>=0.0.6
|
ml/README.md
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ML Training Pipeline
|
| 2 |
+
|
| 3 |
+
Complete machine learning pipeline for waste classification using PyTorch and EfficientNet-B0.
|
| 4 |
+
|
| 5 |
+
## Setup
|
| 6 |
+
|
| 7 |
+
### 1. Install Dependencies
|
| 8 |
+
|
| 9 |
+
\`\`\`bash
|
| 10 |
+
pip install -r ml/requirements.txt
|
| 11 |
+
\`\`\`
|
| 12 |
+
|
| 13 |
+
### 2. Prepare Dataset
|
| 14 |
+
|
| 15 |
+
#### Option A: Use Public Datasets
|
| 16 |
+
|
| 17 |
+
\`\`\`bash
|
| 18 |
+
# View available datasets
|
| 19 |
+
python ml/dataset_prep.py info
|
| 20 |
+
|
| 21 |
+
# Download datasets from sources in DATASET_SOURCES.txt
|
| 22 |
+
# Extract to ml/data/raw/ with category folders
|
| 23 |
+
|
| 24 |
+
# Organize dataset into train/val/test splits
|
| 25 |
+
python ml/dataset_prep.py
|
| 26 |
+
\`\`\`
|
| 27 |
+
|
| 28 |
+
#### Option B: Use Custom Data
|
| 29 |
+
|
| 30 |
+
Place your images in:
|
| 31 |
+
\`\`\`
|
| 32 |
+
ml/data/raw/
|
| 33 |
+
recyclable/
|
| 34 |
+
organic/
|
| 35 |
+
wet-waste/
|
| 36 |
+
dry-waste/
|
| 37 |
+
ewaste/
|
| 38 |
+
hazardous/
|
| 39 |
+
landfill/
|
| 40 |
+
\`\`\`
|
| 41 |
+
|
| 42 |
+
Then run:
|
| 43 |
+
\`\`\`bash
|
| 44 |
+
python ml/dataset_prep.py
|
| 45 |
+
\`\`\`
|
| 46 |
+
|
| 47 |
+
## Training
|
| 48 |
+
|
| 49 |
+
### Initial Training
|
| 50 |
+
|
| 51 |
+
Train from scratch with pretrained EfficientNet-B0:
|
| 52 |
+
|
| 53 |
+
\`\`\`bash
|
| 54 |
+
python ml/train.py
|
| 55 |
+
\`\`\`
|
| 56 |
+
|
| 57 |
+
Training will:
|
| 58 |
+
- Use transfer learning with ImageNet pretrained weights
|
| 59 |
+
- Apply data augmentation for better generalization
|
| 60 |
+
- Save best model to `ml/models/best_model.pth`
|
| 61 |
+
- Generate confusion matrix
|
| 62 |
+
- Log training history
|
| 63 |
+
|
| 64 |
+
### Model Architecture
|
| 65 |
+
|
| 66 |
+
- **Base**: EfficientNet-B0 (pretrained on ImageNet)
|
| 67 |
+
- **Input**: 224x224 RGB images
|
| 68 |
+
- **Output**: 7 waste categories
|
| 69 |
+
- **Parameters**: ~5.3M
|
| 70 |
+
- **Inference Time**: ~50ms on CPU
|
| 71 |
+
|
| 72 |
+
### Why EfficientNet-B0?
|
| 73 |
+
|
| 74 |
+
1. **Accuracy**: State-of-the-art performance
|
| 75 |
+
2. **Speed**: Optimized for mobile/edge devices
|
| 76 |
+
3. **Size**: Compact model (~20MB)
|
| 77 |
+
4. **Efficiency**: Best accuracy-to-parameters ratio
|
| 78 |
+
|
| 79 |
+
## Inference
|
| 80 |
+
|
| 81 |
+
### Python Inference
|
| 82 |
+
|
| 83 |
+
\`\`\`python
|
| 84 |
+
from ml.predict import WasteClassifier
|
| 85 |
+
|
| 86 |
+
classifier = WasteClassifier('ml/models/best_model.pth')
|
| 87 |
+
|
| 88 |
+
# From file path
|
| 89 |
+
result = classifier.predict('image.jpg')
|
| 90 |
+
|
| 91 |
+
# From base64
|
| 92 |
+
result = classifier.predict('data:image/jpeg;base64,...')
|
| 93 |
+
|
| 94 |
+
print(result)
|
| 95 |
+
# {
|
| 96 |
+
# 'category': 'recyclable',
|
| 97 |
+
# 'confidence': 0.95,
|
| 98 |
+
# 'probabilities': {...},
|
| 99 |
+
# 'timestamp': 1234567890
|
| 100 |
+
# }
|
| 101 |
+
\`\`\`
|
| 102 |
+
|
| 103 |
+
### Export to ONNX
|
| 104 |
+
|
| 105 |
+
For production deployment:
|
| 106 |
+
|
| 107 |
+
\`\`\`bash
|
| 108 |
+
python -c "from ml.predict import export_to_onnx; export_to_onnx()"
|
| 109 |
+
\`\`\`
|
| 110 |
+
|
| 111 |
+
## Continuous Learning
|
| 112 |
+
|
| 113 |
+
### Collect Feedback
|
| 114 |
+
|
| 115 |
+
User corrections are saved to:
|
| 116 |
+
\`\`\`
|
| 117 |
+
ml/data/retraining/
|
| 118 |
+
recyclable/
|
| 119 |
+
organic/
|
| 120 |
+
...
|
| 121 |
+
\`\`\`
|
| 122 |
+
|
| 123 |
+
### Retrain Model
|
| 124 |
+
|
| 125 |
+
Fine-tune model with new samples:
|
| 126 |
+
|
| 127 |
+
\`\`\`bash
|
| 128 |
+
python ml/retrain.py
|
| 129 |
+
\`\`\`
|
| 130 |
+
|
| 131 |
+
Retraining will:
|
| 132 |
+
1. Add new samples to training set
|
| 133 |
+
2. Fine-tune existing model (lower learning rate)
|
| 134 |
+
3. Evaluate improvement
|
| 135 |
+
4. Promote model if accuracy improves by >1%
|
| 136 |
+
5. Version models (v1, v2, v3, ...)
|
| 137 |
+
6. Archive retraining samples
|
| 138 |
+
7. Log retraining events
|
| 139 |
+
|
| 140 |
+
### Automated Retraining
|
| 141 |
+
|
| 142 |
+
Set up a cron job or scheduled task:
|
| 143 |
+
|
| 144 |
+
\`\`\`bash
|
| 145 |
+
# Weekly retraining
|
| 146 |
+
0 2 * * 0 python ml/retrain.py
|
| 147 |
+
\`\`\`
|
| 148 |
+
|
| 149 |
+
## Model Versioning
|
| 150 |
+
|
| 151 |
+
Models are versioned automatically:
|
| 152 |
+
- `best_model.pth` - Current production model
|
| 153 |
+
- `model_v1.pth` - Version 1 (archived)
|
| 154 |
+
- `model_v2.pth` - Version 2 (archived)
|
| 155 |
+
- `best_model_backup_*.pth` - Backup before promotion
|
| 156 |
+
|
| 157 |
+
## Evaluation Metrics
|
| 158 |
+
|
| 159 |
+
- **Accuracy**: Overall classification accuracy
|
| 160 |
+
- **F1 Score (Macro)**: Average F1 across all categories
|
| 161 |
+
- **F1 Score (Weighted)**: Weighted by class frequency
|
| 162 |
+
- **Confusion Matrix**: Per-category performance
|
| 163 |
+
|
| 164 |
+
## Dataset Requirements
|
| 165 |
+
|
| 166 |
+
### Minimum Samples per Category
|
| 167 |
+
|
| 168 |
+
- Training: 500+ images per category
|
| 169 |
+
- Validation: 100+ images per category
|
| 170 |
+
- Test: 100+ images per category
|
| 171 |
+
|
| 172 |
+
### Image Quality
|
| 173 |
+
|
| 174 |
+
- Resolution: 640x480 or higher
|
| 175 |
+
- Format: JPG or PNG
|
| 176 |
+
- Lighting: Various conditions
|
| 177 |
+
- Backgrounds: Real-world environments
|
| 178 |
+
- Variety: Different angles, distances, overlaps
|
| 179 |
+
|
| 180 |
+
## Performance Optimization
|
| 181 |
+
|
| 182 |
+
### CPU Inference
|
| 183 |
+
|
| 184 |
+
- Uses optimized EfficientNet-B0
|
| 185 |
+
- Inference time: ~50ms per image
|
| 186 |
+
- No GPU required for deployment
|
| 187 |
+
|
| 188 |
+
### GPU Training
|
| 189 |
+
|
| 190 |
+
- Trains 10-20x faster on GPU
|
| 191 |
+
- Automatically detects CUDA availability
|
| 192 |
+
- Falls back to CPU if no GPU
|
| 193 |
+
|
| 194 |
+
## Troubleshooting
|
| 195 |
+
|
| 196 |
+
### Low Accuracy
|
| 197 |
+
|
| 198 |
+
1. Add more diverse training data
|
| 199 |
+
2. Balance dataset (equal samples per category)
|
| 200 |
+
3. Increase training epochs
|
| 201 |
+
4. Adjust learning rate
|
| 202 |
+
|
| 203 |
+
### Overfitting
|
| 204 |
+
|
| 205 |
+
1. Increase dropout rate
|
| 206 |
+
2. Add more data augmentation
|
| 207 |
+
3. Use early stopping (already enabled)
|
| 208 |
+
4. Collect more training data
|
| 209 |
+
|
| 210 |
+
### Class Confusion
|
| 211 |
+
|
| 212 |
+
1. Check confusion matrix
|
| 213 |
+
2. Add more examples for confused classes
|
| 214 |
+
3. Ensure clear visual differences
|
| 215 |
+
4. Review mislabeled data
|
| 216 |
+
|
| 217 |
+
## Next Steps
|
| 218 |
+
|
| 219 |
+
1. **Collect Data**: Gather Indian waste images
|
| 220 |
+
2. **Initial Training**: Train base model
|
| 221 |
+
3. **Deploy**: Integrate with backend API
|
| 222 |
+
4. **Monitor**: Track prediction accuracy
|
| 223 |
+
5. **Improve**: Continuous learning pipeline
|
ml/dataset_prep.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Dataset preparation and organization script
|
| 3 |
+
Helps structure your data for training
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import shutil
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from sklearn.model_selection import train_test_split
|
| 10 |
+
import random
|
| 11 |
+
|
| 12 |
+
CATEGORIES = [
|
| 13 |
+
'recyclable',
|
| 14 |
+
'organic',
|
| 15 |
+
'wet-waste',
|
| 16 |
+
'dry-waste',
|
| 17 |
+
'ewaste',
|
| 18 |
+
'hazardous',
|
| 19 |
+
'landfill'
|
| 20 |
+
]
|
| 21 |
+
|
| 22 |
+
def organize_dataset(raw_data_dir='ml/data/raw',
|
| 23 |
+
processed_dir='ml/data/processed',
|
| 24 |
+
test_split=0.15,
|
| 25 |
+
val_split=0.15):
|
| 26 |
+
"""
|
| 27 |
+
Organize raw images into train/val/test splits
|
| 28 |
+
|
| 29 |
+
Expected raw structure:
|
| 30 |
+
ml/data/raw/
|
| 31 |
+
recyclable/
|
| 32 |
+
img1.jpg
|
| 33 |
+
img2.jpg
|
| 34 |
+
organic/
|
| 35 |
+
img1.jpg
|
| 36 |
+
...
|
| 37 |
+
|
| 38 |
+
Output structure:
|
| 39 |
+
ml/data/processed/
|
| 40 |
+
train/
|
| 41 |
+
recyclable/
|
| 42 |
+
organic/
|
| 43 |
+
...
|
| 44 |
+
val/
|
| 45 |
+
...
|
| 46 |
+
test/
|
| 47 |
+
...
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
raw_path = Path(raw_data_dir)
|
| 51 |
+
processed_path = Path(processed_dir)
|
| 52 |
+
|
| 53 |
+
# Create output directories
|
| 54 |
+
for split in ['train', 'val', 'test']:
|
| 55 |
+
for category in CATEGORIES:
|
| 56 |
+
(processed_path / split / category).mkdir(parents=True, exist_ok=True)
|
| 57 |
+
|
| 58 |
+
print("Organizing dataset...")
|
| 59 |
+
|
| 60 |
+
total_images = 0
|
| 61 |
+
|
| 62 |
+
for category in CATEGORIES:
|
| 63 |
+
category_path = raw_path / category
|
| 64 |
+
|
| 65 |
+
if not category_path.exists():
|
| 66 |
+
print(f"Warning: {category} directory not found, skipping...")
|
| 67 |
+
continue
|
| 68 |
+
|
| 69 |
+
# Get all images
|
| 70 |
+
images = []
|
| 71 |
+
for ext in ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']:
|
| 72 |
+
images.extend(list(category_path.glob(ext)))
|
| 73 |
+
|
| 74 |
+
if len(images) == 0:
|
| 75 |
+
print(f"Warning: No images found for {category}")
|
| 76 |
+
continue
|
| 77 |
+
|
| 78 |
+
# Shuffle
|
| 79 |
+
random.shuffle(images)
|
| 80 |
+
|
| 81 |
+
# Split
|
| 82 |
+
train_val, test = train_test_split(images, test_size=test_split, random_state=42)
|
| 83 |
+
train, val = train_test_split(train_val, test_size=val_split/(1-test_split), random_state=42)
|
| 84 |
+
|
| 85 |
+
# Copy files
|
| 86 |
+
for img in train:
|
| 87 |
+
shutil.copy(img, processed_path / 'train' / category / img.name)
|
| 88 |
+
|
| 89 |
+
for img in val:
|
| 90 |
+
shutil.copy(img, processed_path / 'val' / category / img.name)
|
| 91 |
+
|
| 92 |
+
for img in test:
|
| 93 |
+
shutil.copy(img, processed_path / 'test' / category / img.name)
|
| 94 |
+
|
| 95 |
+
total_images += len(images)
|
| 96 |
+
print(f"{category}: {len(train)} train, {len(val)} val, {len(test)} test")
|
| 97 |
+
|
| 98 |
+
print(f"\nDataset organized successfully!")
|
| 99 |
+
print(f"Total images: {total_images}")
|
| 100 |
+
print(f"Train: {len(list((processed_path / 'train').rglob('*.jpg'))) + len(list((processed_path / 'train').rglob('*.png')))}")
|
| 101 |
+
print(f"Val: {len(list((processed_path / 'val').rglob('*.jpg'))) + len(list((processed_path / 'val').rglob('*.png')))}")
|
| 102 |
+
print(f"Test: {len(list((processed_path / 'test').rglob('*.jpg'))) + len(list((processed_path / 'test').rglob('*.png')))}")
|
| 103 |
+
|
| 104 |
+
def download_sample_datasets():
|
| 105 |
+
"""
|
| 106 |
+
Instructions for downloading public waste classification datasets
|
| 107 |
+
"""
|
| 108 |
+
|
| 109 |
+
datasets = """
|
| 110 |
+
PUBLIC WASTE CLASSIFICATION DATASETS:
|
| 111 |
+
|
| 112 |
+
1. Kaggle - Waste Classification Data
|
| 113 |
+
URL: https://www.kaggle.com/datasets/techsash/waste-classification-data
|
| 114 |
+
Categories: Organic, Recyclable
|
| 115 |
+
Size: ~25k images
|
| 116 |
+
|
| 117 |
+
2. TrashNet Dataset
|
| 118 |
+
URL: https://github.com/garythung/trashnet
|
| 119 |
+
Categories: Glass, Paper, Cardboard, Plastic, Metal, Trash
|
| 120 |
+
Size: ~2.5k images
|
| 121 |
+
|
| 122 |
+
3. Waste Pictures Dataset (Kaggle)
|
| 123 |
+
URL: https://www.kaggle.com/datasets/wangziang/waste-pictures
|
| 124 |
+
Categories: Multiple waste types
|
| 125 |
+
Size: ~20k images
|
| 126 |
+
|
| 127 |
+
4. TACO Dataset (Trash Annotations in Context)
|
| 128 |
+
URL: http://tacodataset.org/
|
| 129 |
+
Categories: 60 categories of litter
|
| 130 |
+
Size: ~1.5k images with annotations
|
| 131 |
+
|
| 132 |
+
SETUP INSTRUCTIONS:
|
| 133 |
+
|
| 134 |
+
1. Download one or more datasets from above
|
| 135 |
+
2. Extract to ml/data/raw/
|
| 136 |
+
3. Organize by category (recyclable, organic, etc.)
|
| 137 |
+
4. Run: python ml/dataset_prep.py
|
| 138 |
+
|
| 139 |
+
For Indian waste types, you can:
|
| 140 |
+
- Capture your own images using the webcam interface
|
| 141 |
+
- Map categories from public datasets to Indian categories
|
| 142 |
+
- Combine multiple datasets for better coverage
|
| 143 |
+
"""
|
| 144 |
+
|
| 145 |
+
print(datasets)
|
| 146 |
+
|
| 147 |
+
# Save to file
|
| 148 |
+
with open('ml/DATASET_SOURCES.txt', 'w') as f:
|
| 149 |
+
f.write(datasets)
|
| 150 |
+
|
| 151 |
+
print("\nDataset sources saved to ml/DATASET_SOURCES.txt")
|
| 152 |
+
|
| 153 |
+
if __name__ == "__main__":
|
| 154 |
+
import sys
|
| 155 |
+
|
| 156 |
+
if len(sys.argv) > 1 and sys.argv[1] == 'info':
|
| 157 |
+
download_sample_datasets()
|
| 158 |
+
else:
|
| 159 |
+
organize_dataset()
|
ml/predict.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Inference script for waste classification
|
| 3 |
+
Optimized for CPU with fast preprocessing
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn.functional as F
|
| 8 |
+
from torchvision import transforms, models
|
| 9 |
+
from PIL import Image
|
| 10 |
+
import numpy as np
|
| 11 |
+
import base64
|
| 12 |
+
from io import BytesIO
|
| 13 |
+
import json
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
|
| 16 |
+
class WasteClassifier:
|
| 17 |
+
"""Waste classification inference class"""
|
| 18 |
+
|
| 19 |
+
def __init__(self, model_path='ml/models/best_model.pth', device=None):
|
| 20 |
+
self.device = device or torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 21 |
+
|
| 22 |
+
# Load checkpoint
|
| 23 |
+
checkpoint = torch.load(model_path, map_location=self.device)
|
| 24 |
+
self.categories = checkpoint['categories']
|
| 25 |
+
|
| 26 |
+
# Create model
|
| 27 |
+
self.model = models.efficientnet_b0(pretrained=False)
|
| 28 |
+
num_features = self.model.classifier[1].in_features
|
| 29 |
+
self.model.classifier = torch.nn.Sequential(
|
| 30 |
+
torch.nn.Dropout(p=0.3),
|
| 31 |
+
torch.nn.Linear(num_features, len(self.categories))
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
# Load weights
|
| 35 |
+
self.model.load_state_dict(checkpoint['model_state_dict'])
|
| 36 |
+
self.model.to(self.device)
|
| 37 |
+
self.model.eval()
|
| 38 |
+
|
| 39 |
+
# Setup transforms
|
| 40 |
+
self.transform = transforms.Compose([
|
| 41 |
+
transforms.Resize((224, 224)),
|
| 42 |
+
transforms.ToTensor(),
|
| 43 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
| 44 |
+
std=[0.229, 0.224, 0.225])
|
| 45 |
+
])
|
| 46 |
+
|
| 47 |
+
print(f"Model loaded successfully on {self.device}")
|
| 48 |
+
print(f"Categories: {self.categories}")
|
| 49 |
+
|
| 50 |
+
def preprocess_image(self, image_input):
|
| 51 |
+
"""
|
| 52 |
+
Preprocess image from various input formats
|
| 53 |
+
Accepts: PIL Image, file path, base64 string, or numpy array
|
| 54 |
+
"""
|
| 55 |
+
if isinstance(image_input, str):
|
| 56 |
+
if image_input.startswith('data:image'):
|
| 57 |
+
# Base64 encoded image
|
| 58 |
+
image_data = image_input.split(',')[1]
|
| 59 |
+
image_bytes = base64.b64decode(image_data)
|
| 60 |
+
image = Image.open(BytesIO(image_bytes)).convert('RGB')
|
| 61 |
+
else:
|
| 62 |
+
# File path
|
| 63 |
+
image = Image.open(image_input).convert('RGB')
|
| 64 |
+
elif isinstance(image_input, np.ndarray):
|
| 65 |
+
image = Image.fromarray(image_input).convert('RGB')
|
| 66 |
+
elif isinstance(image_input, Image.Image):
|
| 67 |
+
image = image_input.convert('RGB')
|
| 68 |
+
else:
|
| 69 |
+
raise ValueError(f"Unsupported image input type: {type(image_input)}")
|
| 70 |
+
|
| 71 |
+
return self.transform(image).unsqueeze(0)
|
| 72 |
+
|
| 73 |
+
def predict(self, image_input):
|
| 74 |
+
"""
|
| 75 |
+
Predict waste category for input image
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
dict: {
|
| 79 |
+
'category': str,
|
| 80 |
+
'confidence': float,
|
| 81 |
+
'probabilities': dict
|
| 82 |
+
}
|
| 83 |
+
"""
|
| 84 |
+
# Preprocess
|
| 85 |
+
image_tensor = self.preprocess_image(image_input).to(self.device)
|
| 86 |
+
|
| 87 |
+
# Inference
|
| 88 |
+
with torch.no_grad():
|
| 89 |
+
outputs = self.model(image_tensor)
|
| 90 |
+
probabilities = F.softmax(outputs, dim=1)
|
| 91 |
+
confidence, predicted_idx = torch.max(probabilities, 1)
|
| 92 |
+
|
| 93 |
+
# Format results
|
| 94 |
+
predicted_category = self.categories[predicted_idx.item()]
|
| 95 |
+
confidence_score = confidence.item()
|
| 96 |
+
|
| 97 |
+
# Get all probabilities
|
| 98 |
+
prob_dict = {
|
| 99 |
+
category: float(prob)
|
| 100 |
+
for category, prob in zip(self.categories, probabilities[0].cpu().numpy())
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
return {
|
| 104 |
+
'category': predicted_category,
|
| 105 |
+
'confidence': confidence_score,
|
| 106 |
+
'probabilities': prob_dict,
|
| 107 |
+
'timestamp': int(np.datetime64('now').astype(int) / 1000000)
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
def predict_batch(self, image_inputs):
|
| 111 |
+
"""Predict for multiple images"""
|
| 112 |
+
results = []
|
| 113 |
+
for image_input in image_inputs:
|
| 114 |
+
results.append(self.predict(image_input))
|
| 115 |
+
return results
|
| 116 |
+
|
| 117 |
+
def export_to_onnx(model_path='ml/models/best_model.pth',
|
| 118 |
+
output_path='ml/models/model.onnx'):
|
| 119 |
+
"""Export PyTorch model to ONNX format for deployment"""
|
| 120 |
+
|
| 121 |
+
classifier = WasteClassifier(model_path)
|
| 122 |
+
|
| 123 |
+
# Create dummy input
|
| 124 |
+
dummy_input = torch.randn(1, 3, 224, 224).to(classifier.device)
|
| 125 |
+
|
| 126 |
+
# Export
|
| 127 |
+
torch.onnx.export(
|
| 128 |
+
classifier.model,
|
| 129 |
+
dummy_input,
|
| 130 |
+
output_path,
|
| 131 |
+
export_params=True,
|
| 132 |
+
opset_version=12,
|
| 133 |
+
do_constant_folding=True,
|
| 134 |
+
input_names=['input'],
|
| 135 |
+
output_names=['output'],
|
| 136 |
+
dynamic_axes={
|
| 137 |
+
'input': {0: 'batch_size'},
|
| 138 |
+
'output': {0: 'batch_size'}
|
| 139 |
+
}
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
print(f"Model exported to ONNX: {output_path}")
|
| 143 |
+
|
| 144 |
+
if __name__ == "__main__":
|
| 145 |
+
# Test inference
|
| 146 |
+
classifier = WasteClassifier()
|
| 147 |
+
|
| 148 |
+
# Example usage
|
| 149 |
+
test_image = "ml/data/processed/test/recyclable/sample.jpg"
|
| 150 |
+
if Path(test_image).exists():
|
| 151 |
+
result = classifier.predict(test_image)
|
| 152 |
+
print("\nPrediction Result:")
|
| 153 |
+
print(json.dumps(result, indent=2))
|
ml/requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch>=2.0.0
|
| 2 |
+
torchvision>=0.15.0
|
| 3 |
+
pillow>=9.0.0
|
| 4 |
+
numpy>=1.24.0
|
| 5 |
+
scikit-learn>=1.3.0
|
| 6 |
+
matplotlib>=3.7.0
|
| 7 |
+
seaborn>=0.12.0
|
| 8 |
+
tqdm>=4.65.0
|
| 9 |
+
onnx>=1.14.0
|
| 10 |
+
onnxruntime>=1.15.0
|
ml/retrain.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Continuous learning script for model improvement
|
| 3 |
+
Fine-tunes existing model with new corrected samples
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
import torch.optim as optim
|
| 9 |
+
from torch.utils.data import DataLoader
|
| 10 |
+
from torchvision import models
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
import shutil
|
| 13 |
+
from datetime import datetime
|
| 14 |
+
import json
|
| 15 |
+
|
| 16 |
+
from .train import WasteDataset, get_transforms, validate, CATEGORIES, CONFIG
|
| 17 |
+
|
| 18 |
+
def get_model_version():
|
| 19 |
+
"""Get next model version number"""
|
| 20 |
+
model_dir = Path(CONFIG['model_dir'])
|
| 21 |
+
existing_versions = list(model_dir.glob('model_v*.pth'))
|
| 22 |
+
|
| 23 |
+
if not existing_versions:
|
| 24 |
+
return 1
|
| 25 |
+
|
| 26 |
+
versions = [int(p.stem.split('_v')[1]) for p in existing_versions]
|
| 27 |
+
return max(versions) + 1
|
| 28 |
+
|
| 29 |
+
def prepare_retraining_data():
|
| 30 |
+
"""Organize retraining data into proper structure"""
|
| 31 |
+
|
| 32 |
+
retraining_dir = Path('ml/data/retraining')
|
| 33 |
+
processed_dir = Path(CONFIG['data_dir'])
|
| 34 |
+
|
| 35 |
+
if not retraining_dir.exists():
|
| 36 |
+
print("No retraining data found")
|
| 37 |
+
return 0
|
| 38 |
+
|
| 39 |
+
# Count new samples
|
| 40 |
+
new_samples = 0
|
| 41 |
+
|
| 42 |
+
for category in CATEGORIES:
|
| 43 |
+
category_dir = retraining_dir / category
|
| 44 |
+
if category_dir.exists():
|
| 45 |
+
images = list(category_dir.glob('*.jpg')) + list(category_dir.glob('*.png'))
|
| 46 |
+
new_samples += len(images)
|
| 47 |
+
|
| 48 |
+
# Copy to training set
|
| 49 |
+
target_dir = processed_dir / 'train' / category
|
| 50 |
+
target_dir.mkdir(parents=True, exist_ok=True)
|
| 51 |
+
|
| 52 |
+
for img_path in images:
|
| 53 |
+
target_path = target_dir / f"retrain_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{img_path.name}"
|
| 54 |
+
shutil.copy(img_path, target_path)
|
| 55 |
+
|
| 56 |
+
print(f"Added {new_samples} new samples to training set")
|
| 57 |
+
return new_samples
|
| 58 |
+
|
| 59 |
+
def retrain_model(base_model_path='ml/models/best_model.pth',
|
| 60 |
+
num_epochs=10,
|
| 61 |
+
learning_rate=0.0001):
|
| 62 |
+
"""
|
| 63 |
+
Fine-tune existing model with new data
|
| 64 |
+
Uses lower learning rate for incremental learning
|
| 65 |
+
"""
|
| 66 |
+
|
| 67 |
+
print("Starting retraining process...")
|
| 68 |
+
|
| 69 |
+
# Prepare new data
|
| 70 |
+
new_samples = prepare_retraining_data()
|
| 71 |
+
|
| 72 |
+
if new_samples == 0:
|
| 73 |
+
print("No new samples to train on")
|
| 74 |
+
return None
|
| 75 |
+
|
| 76 |
+
# Setup device
|
| 77 |
+
device = torch.device(CONFIG['device'])
|
| 78 |
+
print(f"Using device: {device}")
|
| 79 |
+
|
| 80 |
+
# Load base model
|
| 81 |
+
checkpoint = torch.load(base_model_path, map_location=device)
|
| 82 |
+
model = models.efficientnet_b0(pretrained=False)
|
| 83 |
+
num_features = model.classifier[1].in_features
|
| 84 |
+
model.classifier = nn.Sequential(
|
| 85 |
+
nn.Dropout(p=0.3),
|
| 86 |
+
nn.Linear(num_features, CONFIG['num_classes'])
|
| 87 |
+
)
|
| 88 |
+
model.load_state_dict(checkpoint['model_state_dict'])
|
| 89 |
+
model.to(device)
|
| 90 |
+
|
| 91 |
+
print(f"Loaded base model with accuracy: {checkpoint['accuracy']:.2f}%")
|
| 92 |
+
|
| 93 |
+
# Create datasets with updated data
|
| 94 |
+
train_dataset = WasteDataset(
|
| 95 |
+
CONFIG['data_dir'],
|
| 96 |
+
split='train',
|
| 97 |
+
transform=get_transforms('train')
|
| 98 |
+
)
|
| 99 |
+
val_dataset = WasteDataset(
|
| 100 |
+
CONFIG['data_dir'],
|
| 101 |
+
split='val',
|
| 102 |
+
transform=get_transforms('val')
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
train_loader = DataLoader(
|
| 106 |
+
train_dataset,
|
| 107 |
+
batch_size=CONFIG['batch_size'],
|
| 108 |
+
shuffle=True,
|
| 109 |
+
num_workers=4
|
| 110 |
+
)
|
| 111 |
+
val_loader = DataLoader(
|
| 112 |
+
val_dataset,
|
| 113 |
+
batch_size=CONFIG['batch_size'],
|
| 114 |
+
shuffle=False,
|
| 115 |
+
num_workers=4
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
# Setup training
|
| 119 |
+
criterion = nn.CrossEntropyLoss()
|
| 120 |
+
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
| 121 |
+
|
| 122 |
+
best_acc = checkpoint['accuracy']
|
| 123 |
+
improvement_threshold = 1.0 # Must improve by at least 1%
|
| 124 |
+
|
| 125 |
+
# Fine-tuning loop
|
| 126 |
+
for epoch in range(num_epochs):
|
| 127 |
+
print(f"\nRetraining Epoch {epoch+1}/{num_epochs}")
|
| 128 |
+
print("-" * 50)
|
| 129 |
+
|
| 130 |
+
# Train
|
| 131 |
+
model.train()
|
| 132 |
+
for images, labels in train_loader:
|
| 133 |
+
images, labels = images.to(device), labels.to(device)
|
| 134 |
+
|
| 135 |
+
optimizer.zero_grad()
|
| 136 |
+
outputs = model(images)
|
| 137 |
+
loss = criterion(outputs, labels)
|
| 138 |
+
loss.backward()
|
| 139 |
+
optimizer.step()
|
| 140 |
+
|
| 141 |
+
# Validate
|
| 142 |
+
val_loss, val_acc, f1_macro, f1_weighted, val_preds, val_labels = validate(
|
| 143 |
+
model, val_loader, criterion, device
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
print(f"Val Acc: {val_acc:.2f}% | F1 Macro: {f1_macro:.4f}")
|
| 147 |
+
|
| 148 |
+
# Check improvement
|
| 149 |
+
if val_acc > best_acc:
|
| 150 |
+
improvement = val_acc - best_acc
|
| 151 |
+
best_acc = val_acc
|
| 152 |
+
|
| 153 |
+
# Save improved model
|
| 154 |
+
version = get_model_version()
|
| 155 |
+
new_model_path = f"{CONFIG['model_dir']}/model_v{version}.pth"
|
| 156 |
+
|
| 157 |
+
torch.save({
|
| 158 |
+
'epoch': epoch,
|
| 159 |
+
'model_state_dict': model.state_dict(),
|
| 160 |
+
'optimizer_state_dict': optimizer.state_dict(),
|
| 161 |
+
'accuracy': val_acc,
|
| 162 |
+
'f1_macro': f1_macro,
|
| 163 |
+
'f1_weighted': f1_weighted,
|
| 164 |
+
'categories': CATEGORIES,
|
| 165 |
+
'config': CONFIG,
|
| 166 |
+
'base_model': base_model_path,
|
| 167 |
+
'new_samples': new_samples,
|
| 168 |
+
'improvement': improvement,
|
| 169 |
+
'retrain_date': datetime.now().isoformat()
|
| 170 |
+
}, new_model_path)
|
| 171 |
+
|
| 172 |
+
print(f"✓ Improved model saved as v{version} (+{improvement:.2f}%)")
|
| 173 |
+
|
| 174 |
+
# If significant improvement, promote to production
|
| 175 |
+
if improvement >= improvement_threshold:
|
| 176 |
+
production_path = f"{CONFIG['model_dir']}/best_model.pth"
|
| 177 |
+
|
| 178 |
+
# Backup old production model
|
| 179 |
+
if Path(production_path).exists():
|
| 180 |
+
backup_path = f"{CONFIG['model_dir']}/best_model_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pth"
|
| 181 |
+
shutil.copy(production_path, backup_path)
|
| 182 |
+
|
| 183 |
+
# Promote new model
|
| 184 |
+
shutil.copy(new_model_path, production_path)
|
| 185 |
+
print(f"✓ Model promoted to production!")
|
| 186 |
+
|
| 187 |
+
# Log retraining event
|
| 188 |
+
log_retraining_event(version, val_acc, improvement, new_samples)
|
| 189 |
+
|
| 190 |
+
# Clean up retraining directory
|
| 191 |
+
retraining_dir = Path('ml/data/retraining')
|
| 192 |
+
archive_dir = Path('ml/data/retraining_archive') / datetime.now().strftime('%Y%m%d_%H%M%S')
|
| 193 |
+
archive_dir.mkdir(parents=True, exist_ok=True)
|
| 194 |
+
|
| 195 |
+
for category in CATEGORIES:
|
| 196 |
+
category_dir = retraining_dir / category
|
| 197 |
+
if category_dir.exists():
|
| 198 |
+
shutil.move(str(category_dir), str(archive_dir / category))
|
| 199 |
+
|
| 200 |
+
print(f"\nRetraining complete! Final accuracy: {best_acc:.2f}%")
|
| 201 |
+
return model
|
| 202 |
+
|
| 203 |
+
def log_retraining_event(version, accuracy, improvement, new_samples):
|
| 204 |
+
"""Log retraining events for monitoring"""
|
| 205 |
+
|
| 206 |
+
log_file = Path(CONFIG['model_dir']) / 'retraining_log.json'
|
| 207 |
+
|
| 208 |
+
event = {
|
| 209 |
+
'version': version,
|
| 210 |
+
'timestamp': datetime.now().isoformat(),
|
| 211 |
+
'accuracy': accuracy,
|
| 212 |
+
'improvement': improvement,
|
| 213 |
+
'new_samples': new_samples
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
# Load existing log
|
| 217 |
+
if log_file.exists():
|
| 218 |
+
with open(log_file, 'r') as f:
|
| 219 |
+
log = json.load(f)
|
| 220 |
+
else:
|
| 221 |
+
log = []
|
| 222 |
+
|
| 223 |
+
log.append(event)
|
| 224 |
+
|
| 225 |
+
# Save updated log
|
| 226 |
+
with open(log_file, 'w') as f:
|
| 227 |
+
json.dump(log, f, indent=2)
|
| 228 |
+
|
| 229 |
+
print(f"Retraining event logged")
|
| 230 |
+
|
| 231 |
+
if __name__ == "__main__":
|
| 232 |
+
retrain_model()
|
ml/train.py
ADDED
|
@@ -0,0 +1,326 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Training script for waste classification model
|
| 3 |
+
Uses transfer learning with EfficientNet-B0 for optimal accuracy and speed
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
import torch.optim as optim
|
| 9 |
+
from torch.utils.data import DataLoader, Dataset
|
| 10 |
+
from torchvision import transforms, models
|
| 11 |
+
from PIL import Image
|
| 12 |
+
import os
|
| 13 |
+
import json
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
from tqdm import tqdm
|
| 16 |
+
import numpy as np
|
| 17 |
+
from sklearn.metrics import confusion_matrix, f1_score, classification_report
|
| 18 |
+
import matplotlib.pyplot as plt
|
| 19 |
+
import seaborn as sns
|
| 20 |
+
|
| 21 |
+
# Configuration
|
| 22 |
+
CONFIG = {
|
| 23 |
+
'data_dir': 'ml/data/processed',
|
| 24 |
+
'model_dir': 'ml/models',
|
| 25 |
+
'batch_size': 32,
|
| 26 |
+
'num_epochs': 50,
|
| 27 |
+
'learning_rate': 0.001,
|
| 28 |
+
'image_size': 224,
|
| 29 |
+
'num_classes': 7,
|
| 30 |
+
'early_stopping_patience': 7,
|
| 31 |
+
'device': 'cuda' if torch.cuda.is_available() else 'cpu',
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
# Waste categories mapping
|
| 35 |
+
CATEGORIES = [
|
| 36 |
+
'recyclable',
|
| 37 |
+
'organic',
|
| 38 |
+
'wet-waste',
|
| 39 |
+
'dry-waste',
|
| 40 |
+
'ewaste',
|
| 41 |
+
'hazardous',
|
| 42 |
+
'landfill'
|
| 43 |
+
]
|
| 44 |
+
|
| 45 |
+
class WasteDataset(Dataset):
|
| 46 |
+
"""Custom dataset for waste classification"""
|
| 47 |
+
|
| 48 |
+
def __init__(self, data_dir, split='train', transform=None):
|
| 49 |
+
self.data_dir = Path(data_dir) / split
|
| 50 |
+
self.transform = transform
|
| 51 |
+
self.samples = []
|
| 52 |
+
|
| 53 |
+
# Load all images and labels
|
| 54 |
+
for category_idx, category in enumerate(CATEGORIES):
|
| 55 |
+
category_path = self.data_dir / category
|
| 56 |
+
if category_path.exists():
|
| 57 |
+
for img_path in category_path.glob('*.jpg'):
|
| 58 |
+
self.samples.append((str(img_path), category_idx))
|
| 59 |
+
for img_path in category_path.glob('*.png'):
|
| 60 |
+
self.samples.append((str(img_path), category_idx))
|
| 61 |
+
|
| 62 |
+
print(f"Loaded {len(self.samples)} samples for {split} split")
|
| 63 |
+
|
| 64 |
+
def __len__(self):
|
| 65 |
+
return len(self.samples)
|
| 66 |
+
|
| 67 |
+
def __getitem__(self, idx):
|
| 68 |
+
img_path, label = self.samples[idx]
|
| 69 |
+
image = Image.open(img_path).convert('RGB')
|
| 70 |
+
|
| 71 |
+
if self.transform:
|
| 72 |
+
image = self.transform(image)
|
| 73 |
+
|
| 74 |
+
return image, label
|
| 75 |
+
|
| 76 |
+
def get_transforms(split='train'):
|
| 77 |
+
"""Get data augmentation transforms"""
|
| 78 |
+
|
| 79 |
+
if split == 'train':
|
| 80 |
+
return transforms.Compose([
|
| 81 |
+
transforms.Resize((CONFIG['image_size'], CONFIG['image_size'])),
|
| 82 |
+
transforms.RandomHorizontalFlip(p=0.5),
|
| 83 |
+
transforms.RandomRotation(15),
|
| 84 |
+
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
|
| 85 |
+
transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
|
| 86 |
+
transforms.ToTensor(),
|
| 87 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
| 88 |
+
std=[0.229, 0.224, 0.225])
|
| 89 |
+
])
|
| 90 |
+
else:
|
| 91 |
+
return transforms.Compose([
|
| 92 |
+
transforms.Resize((CONFIG['image_size'], CONFIG['image_size'])),
|
| 93 |
+
transforms.ToTensor(),
|
| 94 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
| 95 |
+
std=[0.229, 0.224, 0.225])
|
| 96 |
+
])
|
| 97 |
+
|
| 98 |
+
def create_model(num_classes):
|
| 99 |
+
"""
|
| 100 |
+
Create EfficientNet-B0 model with pretrained weights
|
| 101 |
+
EfficientNet provides excellent accuracy with low latency
|
| 102 |
+
"""
|
| 103 |
+
model = models.efficientnet_b0(pretrained=True)
|
| 104 |
+
|
| 105 |
+
# Freeze early layers
|
| 106 |
+
for param in model.features[:5].parameters():
|
| 107 |
+
param.requires_grad = False
|
| 108 |
+
|
| 109 |
+
# Replace classifier
|
| 110 |
+
num_features = model.classifier[1].in_features
|
| 111 |
+
model.classifier = nn.Sequential(
|
| 112 |
+
nn.Dropout(p=0.3),
|
| 113 |
+
nn.Linear(num_features, num_classes)
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
return model
|
| 117 |
+
|
| 118 |
+
def train_epoch(model, dataloader, criterion, optimizer, device):
|
| 119 |
+
"""Train for one epoch"""
|
| 120 |
+
model.train()
|
| 121 |
+
running_loss = 0.0
|
| 122 |
+
correct = 0
|
| 123 |
+
total = 0
|
| 124 |
+
|
| 125 |
+
pbar = tqdm(dataloader, desc='Training')
|
| 126 |
+
for images, labels in pbar:
|
| 127 |
+
images, labels = images.to(device), labels.to(device)
|
| 128 |
+
|
| 129 |
+
optimizer.zero_grad()
|
| 130 |
+
outputs = model(images)
|
| 131 |
+
loss = criterion(outputs, labels)
|
| 132 |
+
loss.backward()
|
| 133 |
+
optimizer.step()
|
| 134 |
+
|
| 135 |
+
running_loss += loss.item()
|
| 136 |
+
_, predicted = outputs.max(1)
|
| 137 |
+
total += labels.size(0)
|
| 138 |
+
correct += predicted.eq(labels).sum().item()
|
| 139 |
+
|
| 140 |
+
pbar.set_postfix({
|
| 141 |
+
'loss': f'{running_loss/len(pbar):.4f}',
|
| 142 |
+
'acc': f'{100.*correct/total:.2f}%'
|
| 143 |
+
})
|
| 144 |
+
|
| 145 |
+
return running_loss / len(dataloader), 100. * correct / total
|
| 146 |
+
|
| 147 |
+
def validate(model, dataloader, criterion, device):
|
| 148 |
+
"""Validate the model"""
|
| 149 |
+
model.eval()
|
| 150 |
+
running_loss = 0.0
|
| 151 |
+
correct = 0
|
| 152 |
+
total = 0
|
| 153 |
+
all_preds = []
|
| 154 |
+
all_labels = []
|
| 155 |
+
|
| 156 |
+
with torch.no_grad():
|
| 157 |
+
for images, labels in tqdm(dataloader, desc='Validating'):
|
| 158 |
+
images, labels = images.to(device), labels.to(device)
|
| 159 |
+
|
| 160 |
+
outputs = model(images)
|
| 161 |
+
loss = criterion(outputs, labels)
|
| 162 |
+
|
| 163 |
+
running_loss += loss.item()
|
| 164 |
+
_, predicted = outputs.max(1)
|
| 165 |
+
total += labels.size(0)
|
| 166 |
+
correct += predicted.eq(labels).sum().item()
|
| 167 |
+
|
| 168 |
+
all_preds.extend(predicted.cpu().numpy())
|
| 169 |
+
all_labels.extend(labels.cpu().numpy())
|
| 170 |
+
|
| 171 |
+
accuracy = 100. * correct / total
|
| 172 |
+
avg_loss = running_loss / len(dataloader)
|
| 173 |
+
|
| 174 |
+
# Calculate F1 scores
|
| 175 |
+
f1_macro = f1_score(all_labels, all_preds, average='macro')
|
| 176 |
+
f1_weighted = f1_score(all_labels, all_preds, average='weighted')
|
| 177 |
+
|
| 178 |
+
return avg_loss, accuracy, f1_macro, f1_weighted, all_preds, all_labels
|
| 179 |
+
|
| 180 |
+
def plot_confusion_matrix(y_true, y_pred, save_path):
|
| 181 |
+
"""Plot and save confusion matrix"""
|
| 182 |
+
cm = confusion_matrix(y_true, y_pred)
|
| 183 |
+
|
| 184 |
+
plt.figure(figsize=(10, 8))
|
| 185 |
+
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
|
| 186 |
+
xticklabels=CATEGORIES, yticklabels=CATEGORIES)
|
| 187 |
+
plt.title('Confusion Matrix')
|
| 188 |
+
plt.ylabel('True Label')
|
| 189 |
+
plt.xlabel('Predicted Label')
|
| 190 |
+
plt.tight_layout()
|
| 191 |
+
plt.savefig(save_path)
|
| 192 |
+
plt.close()
|
| 193 |
+
|
| 194 |
+
print(f"Confusion matrix saved to {save_path}")
|
| 195 |
+
|
| 196 |
+
def train_model():
|
| 197 |
+
"""Main training function"""
|
| 198 |
+
|
| 199 |
+
# Create directories
|
| 200 |
+
Path(CONFIG['model_dir']).mkdir(parents=True, exist_ok=True)
|
| 201 |
+
|
| 202 |
+
# Setup device
|
| 203 |
+
device = torch.device(CONFIG['device'])
|
| 204 |
+
print(f"Using device: {device}")
|
| 205 |
+
|
| 206 |
+
# Create datasets
|
| 207 |
+
train_dataset = WasteDataset(
|
| 208 |
+
CONFIG['data_dir'],
|
| 209 |
+
split='train',
|
| 210 |
+
transform=get_transforms('train')
|
| 211 |
+
)
|
| 212 |
+
val_dataset = WasteDataset(
|
| 213 |
+
CONFIG['data_dir'],
|
| 214 |
+
split='val',
|
| 215 |
+
transform=get_transforms('val')
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
# Create dataloaders
|
| 219 |
+
train_loader = DataLoader(
|
| 220 |
+
train_dataset,
|
| 221 |
+
batch_size=CONFIG['batch_size'],
|
| 222 |
+
shuffle=True,
|
| 223 |
+
num_workers=4,
|
| 224 |
+
pin_memory=True
|
| 225 |
+
)
|
| 226 |
+
val_loader = DataLoader(
|
| 227 |
+
val_dataset,
|
| 228 |
+
batch_size=CONFIG['batch_size'],
|
| 229 |
+
shuffle=False,
|
| 230 |
+
num_workers=4,
|
| 231 |
+
pin_memory=True
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
# Create model
|
| 235 |
+
model = create_model(CONFIG['num_classes']).to(device)
|
| 236 |
+
print(f"Model created with {sum(p.numel() for p in model.parameters())} parameters")
|
| 237 |
+
|
| 238 |
+
# Loss and optimizer
|
| 239 |
+
criterion = nn.CrossEntropyLoss()
|
| 240 |
+
optimizer = optim.Adam(model.parameters(), lr=CONFIG['learning_rate'])
|
| 241 |
+
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
|
| 242 |
+
optimizer, mode='max', factor=0.5, patience=3, verbose=True
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
# Training loop
|
| 246 |
+
best_acc = 0.0
|
| 247 |
+
patience_counter = 0
|
| 248 |
+
history = {
|
| 249 |
+
'train_loss': [], 'train_acc': [],
|
| 250 |
+
'val_loss': [], 'val_acc': [],
|
| 251 |
+
'val_f1_macro': [], 'val_f1_weighted': []
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
for epoch in range(CONFIG['num_epochs']):
|
| 255 |
+
print(f"\nEpoch {epoch+1}/{CONFIG['num_epochs']}")
|
| 256 |
+
print("-" * 50)
|
| 257 |
+
|
| 258 |
+
# Train
|
| 259 |
+
train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
|
| 260 |
+
|
| 261 |
+
# Validate
|
| 262 |
+
val_loss, val_acc, f1_macro, f1_weighted, val_preds, val_labels = validate(
|
| 263 |
+
model, val_loader, criterion, device
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
# Update scheduler
|
| 267 |
+
scheduler.step(val_acc)
|
| 268 |
+
|
| 269 |
+
# Save history
|
| 270 |
+
history['train_loss'].append(train_loss)
|
| 271 |
+
history['train_acc'].append(train_acc)
|
| 272 |
+
history['val_loss'].append(val_loss)
|
| 273 |
+
history['val_acc'].append(val_acc)
|
| 274 |
+
history['val_f1_macro'].append(f1_macro)
|
| 275 |
+
history['val_f1_weighted'].append(f1_weighted)
|
| 276 |
+
|
| 277 |
+
print(f"\nTrain Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
|
| 278 |
+
print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
|
| 279 |
+
print(f"F1 Macro: {f1_macro:.4f} | F1 Weighted: {f1_weighted:.4f}")
|
| 280 |
+
|
| 281 |
+
# Save best model
|
| 282 |
+
if val_acc > best_acc:
|
| 283 |
+
best_acc = val_acc
|
| 284 |
+
patience_counter = 0
|
| 285 |
+
|
| 286 |
+
torch.save({
|
| 287 |
+
'epoch': epoch,
|
| 288 |
+
'model_state_dict': model.state_dict(),
|
| 289 |
+
'optimizer_state_dict': optimizer.state_dict(),
|
| 290 |
+
'accuracy': val_acc,
|
| 291 |
+
'f1_macro': f1_macro,
|
| 292 |
+
'f1_weighted': f1_weighted,
|
| 293 |
+
'categories': CATEGORIES,
|
| 294 |
+
'config': CONFIG
|
| 295 |
+
}, f"{CONFIG['model_dir']}/best_model.pth")
|
| 296 |
+
|
| 297 |
+
print(f"✓ Best model saved with accuracy: {best_acc:.2f}%")
|
| 298 |
+
|
| 299 |
+
# Save confusion matrix for best model
|
| 300 |
+
plot_confusion_matrix(
|
| 301 |
+
val_labels,
|
| 302 |
+
val_preds,
|
| 303 |
+
f"{CONFIG['model_dir']}/confusion_matrix.png"
|
| 304 |
+
)
|
| 305 |
+
else:
|
| 306 |
+
patience_counter += 1
|
| 307 |
+
|
| 308 |
+
# Early stopping
|
| 309 |
+
if patience_counter >= CONFIG['early_stopping_patience']:
|
| 310 |
+
print(f"\nEarly stopping triggered after {epoch+1} epochs")
|
| 311 |
+
break
|
| 312 |
+
|
| 313 |
+
# Save training history
|
| 314 |
+
with open(f"{CONFIG['model_dir']}/training_history.json", 'w') as f:
|
| 315 |
+
json.dump(history, f, indent=2)
|
| 316 |
+
|
| 317 |
+
# Generate classification report
|
| 318 |
+
print("\nClassification Report:")
|
| 319 |
+
print(classification_report(val_labels, val_preds, target_names=CATEGORIES))
|
| 320 |
+
|
| 321 |
+
print(f"\nTraining complete! Best validation accuracy: {best_acc:.2f}%")
|
| 322 |
+
|
| 323 |
+
return model, history
|
| 324 |
+
|
| 325 |
+
if __name__ == "__main__":
|
| 326 |
+
train_model()
|