Spaces:

yogesh-venkat
/

SmartVision_AI

Sleeping

App Files Files Community

yogesh-venkat commited on 3 days ago

Commit

08d66df

verified ·

1 Parent(s): 8eb53f7

Auto-deploy from GitHub Actions

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +121 -0
.github/workflows/main.yml +60 -0
.gitignore +54 -0
README.md +253 -0
app.py +872 -0
dataset_preparation.ipynb +0 -0
inference_outputs/image_000000_annotated.jpg +0 -0
inference_outputs/image_002126_annotated.jpg +3 -0
requirements.txt +0 -0
saved_models/efficientnetb0_stage2_best.weights.h5 +3 -0
saved_models/mobilenetv2_v2_stage2_best.weights.h5 +3 -0
saved_models/resnet50_v2_stage2_best.weights.h5 +3 -0
saved_models/vgg16_v2_stage2_best.h5 +3 -0
scripts/01_Data Augmentation.ipynb +595 -0
scripts/01_EDA.ipynb +0 -0
scripts/02_efficientnetb0.py +385 -0
scripts/02_mobilenetv2.py +430 -0
scripts/02_model_comparision.ipynb +19 -0
scripts/02_resnet50.py +482 -0
scripts/02_vgg16.py +422 -0
scripts/03_eval_yolo.py +151 -0
scripts/03_train_yolo.py +56 -0
scripts/03_yolo_dataset_creation.py +248 -0
scripts/04_inference_pipeline.py +436 -0
scripts/04_validation and cleaning.py +310 -0
scripts/check.py +239 -0
scripts/compare_models.py +267 -0
scripts/convert_efficientnet_weights.py +109 -0
scripts/convert_mobilenet_weights.py +83 -0
scripts/convert_vgg16_weights.py +79 -0
scripts/train_yolo_smartvision.py +428 -0
scripts/yolov8n.pt +3 -0
smartvision_metrics/comparison_plots/MobileNetV2_cm.png +3 -0
smartvision_metrics/comparison_plots/MobileNetV2_v3_cm.png +3 -0
smartvision_metrics/comparison_plots/ResNet50_cm.png +3 -0
smartvision_metrics/comparison_plots/ResNet50_v2_Stage_2_FT_cm.png +3 -0
smartvision_metrics/comparison_plots/VGG16_cm.png +3 -0
smartvision_metrics/comparison_plots/VGG16_v2_Stage_2_FT_cm.png +3 -0
smartvision_metrics/comparison_plots/accuracy_comparison.png +0 -0
smartvision_metrics/comparison_plots/efficientnetb0_cm.png +3 -0
smartvision_metrics/comparison_plots/f1_comparison.png +0 -0
smartvision_metrics/comparison_plots/size_comparison.png +0 -0
smartvision_metrics/comparison_plots/speed_comparison.png +0 -0
smartvision_metrics/comparison_plots/top5_comparison.png +0 -0
smartvision_metrics/efficientnetb0/confusion_matrix.npy +0 -0
smartvision_metrics/efficientnetb0/metrics.json +12 -0
smartvision_metrics/efficientnetb0_stage2/confusion_matrix.npy +0 -0
smartvision_metrics/efficientnetb0_stage2/metrics.json +12 -0
smartvision_metrics/mobilenetv2/confusion_matrix.npy +0 -0
smartvision_metrics/mobilenetv2/metrics.json +12 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,121 @@

+saved_models/resnet50_v2_stage2_best.weights.h5 filter=lfs diff=lfs merge=lfs -text
+saved_models/vgg16_v2_stage2_best.h5 filter=lfs diff=lfs merge=lfs -text
+inference_outputs/image_002126_annotated.jpg filter=lfs diff=lfs merge=lfs -text
+saved_models/efficientnetb0_stage2_best.weights.h5 filter=lfs diff=lfs merge=lfs -text
+saved_models/mobilenetv2_v2_stage2_best.weights.h5 filter=lfs diff=lfs merge=lfs -text
+scripts/yolov8n.pt filter=lfs diff=lfs merge=lfs -text
+smartvision_metrics/comparison_plots/MobileNetV2_cm.png filter=lfs diff=lfs merge=lfs -text
+smartvision_metrics/comparison_plots/MobileNetV2_v3_cm.png filter=lfs diff=lfs merge=lfs -text
+smartvision_metrics/comparison_plots/ResNet50_cm.png filter=lfs diff=lfs merge=lfs -text
+smartvision_metrics/comparison_plots/ResNet50_v2_Stage_2_FT_cm.png filter=lfs diff=lfs merge=lfs -text
+smartvision_metrics/comparison_plots/VGG16_cm.png filter=lfs diff=lfs merge=lfs -text
+smartvision_metrics/comparison_plots/VGG16_v2_Stage_2_FT_cm.png filter=lfs diff=lfs merge=lfs -text
+smartvision_metrics/comparison_plots/efficientnetb0_cm.png filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/BoxF1_curve.png filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/BoxPR_curve.png filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/BoxP_curve.png filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/BoxR_curve.png filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/confusion_matrix.png filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/confusion_matrix_normalized.png filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/labels.jpg filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/results.png filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/train_batch0.jpg filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/train_batch1.jpg filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/train_batch1260.jpg filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/train_batch1261.jpg filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/train_batch1262.jpg filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/train_batch2.jpg filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/val_batch0_labels.jpg filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/val_batch0_pred.jpg filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/val_batch1_labels.jpg filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/val_batch1_pred.jpg filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/val_batch2_labels.jpg filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/val_batch2_pred.jpg filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/weights/best.pt filter=lfs diff=lfs merge=lfs -text
+smartvision_yolo/yolov8n_25classes/weights/last.pt filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/BoxF1_curve.png filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/BoxPR_curve.png filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/BoxP_curve.png filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/BoxR_curve.png filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/confusion_matrix.png filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/confusion_matrix_normalized.png filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/labels.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/results.png filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/train_batch0.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/train_batch1.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/train_batch2.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/train_batch8400.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/train_batch8401.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/train_batch8402.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/val_batch0_labels.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/val_batch0_pred.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/val_batch1_labels.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/val_batch1_pred.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/val_batch2_labels.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/val_batch2_pred.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/weights/best.pt filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s6[[:space:]]-[[:space:]]Copy/weights/last.pt filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/BoxF1_curve.png filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/BoxPR_curve.png filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/BoxP_curve.png filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/BoxR_curve.png filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/confusion_matrix.png filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/confusion_matrix_normalized.png filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/labels.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/results.png filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/train_batch0.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/train_batch1.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/train_batch2.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/val_batch0_labels.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/val_batch0_pred.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/val_batch1_labels.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/val_batch1_pred.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/val_batch2_labels.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/val_batch2_pred.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/weights/best.pt filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain/weights/last.pt filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain2/labels.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain2/train_batch0.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain2/train_batch1.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain2/train_batch2.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain3/labels.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain3/train_batch0.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain3/train_batch1.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain3/train_batch2.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain3/weights/best.pt filter=lfs diff=lfs merge=lfs -text
+yolo_runs/smartvision_yolov8s_alltrain3/weights/last.pt filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples/image_000001.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples/image_000003.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples/image_000004.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples/image_000005.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples/image_000006.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples/image_000007.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples2/image_000001.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples2/image_000002.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples2/image_000003.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples2/image_000004.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples2/image_000005.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples2/image_000007.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples3/image_001750.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples3/image_001752.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples3/image_001753.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples3/image_001755.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples3/image_001756.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples3/image_001757.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples4/image_001750.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples4/image_001751.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples4/image_001752.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples4/image_001753.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples4/image_001754.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples4/image_001755.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples4/image_001757.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples_debug/image_001750.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples_debug/image_001752.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples_debug/image_001753.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples_debug2/image_001750.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples_debug2/image_001751.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples_debug2/image_001752.jpg filter=lfs diff=lfs merge=lfs -text
+yolo_vis/samples_debug2/image_001753.jpg filter=lfs diff=lfs merge=lfs -text
+yolov8n.pt filter=lfs diff=lfs merge=lfs -text
+yolov8s.pt filter=lfs diff=lfs merge=lfs -text

.github/workflows/main.yml ADDED Viewed

	@@ -0,0 +1,60 @@

+name: Deploy to Hugging Face Space
+on:
+  push:
+    branches:
+      - main
+  workflow_dispatch:
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      # Step 1 — Checkout repo with LFS
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          lfs: true
+      # (Optional) Verify that LFS files are real binaries, not pointers
+      - name: Verify model files
+        run: |
+          ls -lh saved_models || echo "saved_models folder not found"
+          file saved_models/resnet50_v2_stage2_best.weights.h5 || echo "resnet file missing"
+          file saved_models/vgg16_v2_stage2_best.h5 || echo "vgg16 file missing"
+      # Step 2 — Set up Python
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      # Step 3 — Install Hugging Face Hub client
+      - name: Install Hugging Face Hub
+        run: pip install --upgrade huggingface_hub
+      # Step 4 — Upload entire repo to the Space
+      - name: Deploy to Hugging Face Space
+        env:
+          HF_TOKEN_01: ${{ secrets.HF_TOKEN_01 }}
+          HF_SPACE_ID: "yogesh-venkat/SmartVision_AI"
+        run: |
+          python - << 'EOF'
+          from huggingface_hub import HfApi
+          import os
+          space_id = os.getenv("HF_SPACE_ID")
+          token = os.getenv("HF_TOKEN_01")
+          api = HfApi()
+          print(f"🚀 Deploying to Hugging Face Space: {space_id}")
+          api.upload_folder(
+              repo_id=space_id,
+              repo_type="space",
+              folder_path=".",
+              token=token,
+              commit_message="Auto-deploy from GitHub Actions",
+          )
+          EOF

.gitignore ADDED Viewed

	@@ -0,0 +1,54 @@

+# --------------------------------------------------
+# Python general
+# --------------------------------------------------
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+*.so
+*.egg-info/
+.env
+.venv
+env/
+venv/
+ENV/
+.ipynb_checkpoints/
+# --------------------------------------------------
+# OS / Editor junk
+# --------------------------------------------------
+.DS_Store
+Thumbs.db
+.idea/
+.vscode/
+*.swp
+# --------------------------------------------------
+# Streamlit
+# --------------------------------------------------
+.streamlit/cache/
+.streamlit/static/
+# --------------------------------------------------
+# Logs
+# --------------------------------------------------
+logs/
+*.log
+# --------------------------------------------------
+# Datasets (local only)
+# --------------------------------------------------
+smartvision_dataset/
+# --------------------------------------------------
+# Misc
+# --------------------------------------------------
+*.tmp
+*.bak
+*.old
+untitled*
+draft*

README.md ADDED Viewed

	@@ -0,0 +1,253 @@

+---
+license: mit
+title: SmartVision AI
+sdk: streamlit
+emoji: 🚀
+colorFrom: red
+colorTo: red
+short_description: Multi-domain smart object detection and classification syste
+---
+# SmartVision AI – Complete Vision Pipeline (YOLOv8 + CNN Classifiers + Streamlit Dashboard)
+SmartVision AI is a fully integrated **Computer Vision system** that combines:
+- **Object Detection** using YOLOv8
+- **Image Classification** using 4 deep-learning models:
+  **VGG16**, **ResNet50**, **MobileNetV2**, **EfficientNetB0**
+- A complete **Streamlit-based Dashboard** for inference, comparison, metrics visualization, and webcam snapshots
+- A modified dataset built on a **25‑class COCO subset**
+This README explains setup, architecture, training, deployment, and usage.
+---
+## 🚀 Features
+### ✅ 1. Image Classification (4 Models)
+Each model is fine‑tuned on your custom 25‑class dataset:
+- **VGG16**
+- **ResNet50**
+- **MobileNetV2**
+- **EfficientNetB0**
+Outputs:
+- Top‑1 class prediction
+- Top‑5 predictions
+- Class probabilities
+---
+### 🎯 2. Object Detection – YOLOv8s
+YOLO detects multiple objects in images or webcam snapshots.
+Features:
+- Bounding boxes
+- Confidence scores
+- Optional classification verification using ResNet50
+- Annotated images saved automatically
+---
+### 🔗 3. Integrated Classification + Detection Pipeline
+For each YOLO‑detected box:
+1. Crop region
+2. Classify using chosen CNN model
+3. Display YOLO label + classifier label
+4. Draw combined annotated results
+---
+### 📊 4. Metrics Dashboard
+Displays:
+- Accuracy
+- Weighted F1 score
+- Top‑5 accuracy
+- Images per second
+- Model size
+- YOLOv8 mAP scores
+- Confusion matrices
+- Comparison bar charts
+---
+### 📷 5. Webcam Snapshot Detection
+Take a photo via webcam → YOLO detection → annotated results.
+---
+## 📁 Project Structure
+```
+SmartVision_AI/
+│
+├── app.py                     # Main Streamlit App
+├── saved_models/              # Trained weights (VGG16, ResNet, MobileNetV2, EfficientNet)
+├── yolo_runs/                 # YOLOv8 training folder
+├── smartvision_dataset/       # 25-class dataset
+│   ├── classification/
+│   │   ├── train/
+│   │   ├── val/
+│   │   └── test/
+│   └── detection/             # Labels + images for YOLOv8
+│
+├── smartvision_metrics/       # Accuracy, F1, confusion matrices
+├── scripts/                   # Weight converters, training scripts
+├── inference_outputs/         # Annotated results
+├── requirements.txt
+└── README.md
+```
+---
+## ⚙️ Installation
+### 1️⃣ Clone Repository
+```
+git clone https://github.com/<your-username>/SmartVision_AI.git
+cd SmartVision_AI
+```
+### 2️⃣ Install Dependencies
+```
+pip install -r requirements.txt
+```
+### 3️⃣ Install YOLOv8 (Ultralytics)
+```
+pip install ultralytics
+```
+---
+## ▶️ Run Streamlit App
+```
+streamlit run app.py
+```
+App will open at:
+```
+http://localhost:8501
+```
+---
+## 🏋️ Training Workflow
+### 1️⃣ Classification Models
+Each model has:
+- Stage 1 → Train head with frozen backbone
+- Stage 2 → Unfreeze top layers + fine‑tune
+Scripts:
+```
+scripts/train_mobilenetv2.py
+scripts/train_efficientnetb0.py
+scripts/train_resnet50.py
+scripts/train_vgg16.py
+```
+### 2️⃣ YOLO Training
+```
+yolo task=detect mode=train model=yolov8s.pt data=data.yaml epochs=50 imgsz=640
+```
+Outputs saved to:
+```
+yolo_runs/smartvision_yolov8s/
+```
+---
+## 🧪 Supported Classes (25 COCO Classes)
+```
+airplane, bed, bench, bicycle, bird, bottle, bowl,
+bus, cake, car, cat, chair, couch, cow, cup, dog,
+elephant, horse, motorcycle, person, pizza, potted plant,
+stop sign, traffic light, truck
+```
+---
+## 🧰 Deployment on Hugging Face Spaces
+You can deploy using **Streamlit SDK**.
+### Steps:
+1. Create public repository on GitHub
+2. Push project files
+3. Create new Hugging Face Space → select **Streamlit**
+4. Connect GitHub repo
+5. Add `requirements.txt`
+6. Enable **GPU** for YOLO (optional)
+7. Deploy 🚀
+---
+## 🧾 requirements.txt Example
+```
+streamlit
+tensorflow==2.13.0
+ultralytics
+numpy
+pandas
+Pillow
+matplotlib
+scikit-learn
+opencv-python-headless
+```
+---
+## 📄 .gitignore Example
+```
+saved_models/
+*.h5
+*.pt
+*.weights.h5
+yolo_runs/
+smartvision_metrics/
+inference_outputs/
+__pycache__/
+*.pyc
+.DS_Store
+env/
+```
+---
+## 🙋 Developer
+**SmartVision AI Project**
+Yogesh Kumar V
+M.Sc. Seed Science & Technology (TNAU)
+Passion: AI, Computer Vision, Agribusiness Technology
+---
+## 🏁 Conclusion
+SmartVision AI integrates:
+- Multi‑model classification
+- YOLO detection
+- Streamlit visualization
+- Full evaluation suite
+Perfect for:
+- Research
+- Demonstrations
+- CV/AI portfolio
+- Real‑world image understanding
+---
+Enjoy using SmartVision AI! 🚀🧠

app.py ADDED Viewed

	@@ -0,0 +1,872 @@

+import os
+import time
+import json
+from typing import Dict, Any, List
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+import streamlit as st
+import pandas as pd
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers, regularizers
+from ultralytics import YOLO
+# Keras application imports
+from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input as vgg16_preprocess
+from tensorflow.keras.applications.efficientnet import EfficientNetB0, preprocess_input as effnet_preprocess
+# ------------------------------------------------------------
+# GLOBAL CONFIG
+# ------------------------------------------------------------
+st.set_page_config(
+    page_title="SmartVision AI",
+    page_icon="🧠",
+    layout="wide",
+)
+st.markdown(
+    """
+    <h1 style='text-align:center;'>
+        🤖⚡ <b>SmartVision AI</b> ⚡🤖
+    </h1>
+    <h3 style='text-align:center; margin-top:-10px;'>
+        🔎🎯 Intelligent Multi-Class Object Recognition System 🎯🔎
+    </h3>
+    """,
+    unsafe_allow_html=True
+)
+st.markdown(
+    "<p style='text-align:center; color: gray;'>End-to-end computer vision pipeline on a COCO subset of 25 everyday object classes</p>",
+    unsafe_allow_html=True
+)
+st.divider()
+from pathlib import Path
+# Resolve repository root relative to this file (streamlit_app/app.py)
+THIS_FILE = Path(__file__).resolve()
+REPO_ROOT = THIS_FILE.parent  # repo/
+SAVED_MODELS_DIR = REPO_ROOT / "saved_models"
+YOLO_RUNS_DIR = REPO_ROOT / "yolo_runs"
+SMARTVISION_METRICS_DIR = REPO_ROOT / "smartvision_metrics"
+SMARTVISION_DATASET_DIR = REPO_ROOT / "smartvision_dataset"
+# Then turn constants into Path objects / strings
+YOLO_WEIGHTS_PATH = str(YOLO_RUNS_DIR / "smartvision_yolov8s6 - Copy" / "weights" / "best.pt")
+CLASSIFIER_MODEL_CONFIGS = {
+    "VGG16": {
+        "type": "vgg16",
+        "path": str(SAVED_MODELS_DIR / "vgg16_v2_stage2_best.h5"),
+    },
+    "ResNet50": {
+        "type": "resnet50",
+        "path": str(SAVED_MODELS_DIR / "resnet50_v2_stage2_best.weights.h5"),
+    },
+    "MobileNetV2": {
+        "type": "mobilenetv2",
+        "path": str(SAVED_MODELS_DIR / "mobilenetv2_v2_stage2_best.weights.h5"),
+    },
+    "EfficientNetB0": {
+        "type": "efficientnetb0",
+        "path": str(SAVED_MODELS_DIR / "efficientnetb0_stage2_best.weights.h5"),
+    },
+}
+CLASS_METRIC_PATHS = {
+    "VGG16": str(SMARTVISION_METRICS_DIR / "vgg16_v2_stage2" / "metrics.json"),
+    "ResNet50": str(SMARTVISION_METRICS_DIR / "resnet50_v2_stage2" / "metrics.json"),
+    "MobileNetV2": str(SMARTVISION_METRICS_DIR / "mobilenetv2_v2" / "metrics.json"),
+    "EfficientNetB0": str(SMARTVISION_METRICS_DIR / "efficientnetb0" / "metrics.json"),
+}
+YOLO_METRICS_JSON = str(REPO_ROOT / "yolo_metrics" / "yolov8s_metrics.json")
+BASE_DIR = str(SMARTVISION_DATASET_DIR)
+CLASS_DIR = str(SMARTVISION_DATASET_DIR / "classification")
+DET_DIR = str(SMARTVISION_DATASET_DIR / "detection")
+IMG_SIZE = (224, 224)
+NUM_CLASSES = 25
+CLASS_NAMES = [
+    "airplane", "bed", "bench", "bicycle", "bird", "bottle", "bowl",
+    "bus", "cake", "car", "cat", "chair", "couch", "cow", "cup", "dog",
+    "elephant", "horse", "motorcycle", "person", "pizza", "potted plant",
+    "stop sign", "traffic light", "truck"
+]
+assert len(CLASS_NAMES) == NUM_CLASSES
+# ------------------------------------------------------------
+# BUILDERS – MATCH TRAINING ARCHITECTURES
+# ------------------------------------------------------------
+# ---------- VGG16 v2 ----------
+def build_vgg16_model_v2():
+    inputs = keras.Input(shape=(*IMG_SIZE, 3), name="input_layer")
+    data_augmentation = keras.Sequential(
+        [
+            layers.RandomFlip("horizontal"),
+            layers.RandomRotation(0.04),
+            layers.RandomZoom(0.1),
+            layers.RandomContrast(0.2),
+            layers.Lambda(lambda x: tf.image.random_brightness(x, max_delta=0.2)),
+            layers.Lambda(lambda x: tf.image.random_saturation(x, 0.8, 1.2)),
+        ],
+        name="data_augmentation",
+    )
+    x = data_augmentation(inputs)
+    x = layers.Lambda(
+        lambda z: vgg16_preprocess(tf.cast(z, tf.float32)),
+        name="vgg16_preprocess",
+    )(x)
+    base_model = VGG16(
+        include_top=False,
+        weights="imagenet",
+        input_tensor=x,
+    )
+    x = layers.GlobalAveragePooling2D(name="global_average_pooling2d")(base_model.output)
+    x = layers.Dense(256, activation="relu", name="dense_256")(x)
+    x = layers.Dropout(0.5, name="dropout_0_5")(x)
+    outputs = layers.Dense(NUM_CLASSES, activation="softmax", name="predictions")(x)
+    model = keras.Model(inputs=inputs, outputs=outputs, name="VGG16_smartvision_v2")
+    return model
+# ---------- ResNet50 v2 ----------
+def build_resnet50_model_v2():
+    inputs = keras.Input(shape=(*IMG_SIZE, 3), name="input_layer")
+    data_augmentation = keras.Sequential(
+        [
+            layers.RandomFlip("horizontal"),
+            layers.RandomRotation(0.04),
+            layers.RandomZoom(0.1),
+            layers.RandomContrast(0.15),
+            layers.Lambda(lambda x: tf.image.random_brightness(x, max_delta=0.15)),
+            layers.Lambda(lambda x: tf.image.random_saturation(x, 0.85, 1.15)),
+        ],
+        name="data_augmentation",
+    )
+    x = data_augmentation(inputs)
+    x = layers.Lambda(
+        keras.applications.resnet50.preprocess_input,
+        name="resnet50_preprocess",
+    )(x)
+    base_model = keras.applications.ResNet50(
+        include_top=False,
+        weights="imagenet",
+        input_shape=(*IMG_SIZE, 3),
+    )
+    x = base_model(x)
+    x = layers.GlobalAveragePooling2D(name="global_average_pooling2d")(x)
+    x = layers.BatchNormalization(name="head_batchnorm")(x)
+    x = layers.Dropout(0.4, name="head_dropout")(x)
+    x = layers.Dense(256, activation="relu", name="head_dense")(x)
+    x = layers.BatchNormalization(name="head_batchnorm_2")(x)
+    x = layers.Dropout(0.5, name="head_dropout_2")(x)
+    outputs = layers.Dense(NUM_CLASSES, activation="softmax", name="predictions")(x)
+    model = keras.Model(inputs=inputs, outputs=outputs, name="ResNet50_smartvision_v2")
+    return model
+# ---------- MobileNetV2 v2 ----------
+def build_mobilenetv2_model_v2():
+    """
+    Same architecture as the MobileNetV2 v2 training script.
+    """
+    inputs = keras.Input(shape=(*IMG_SIZE, 3), name="input_layer")
+    data_augmentation = keras.Sequential(
+        [
+            layers.RandomFlip("horizontal"),
+            layers.RandomRotation(0.04),  # ~±15°
+            layers.RandomZoom(0.1),
+            layers.RandomContrast(0.15),
+            layers.Lambda(lambda x: tf.image.random_brightness(x, max_delta=0.15)),
+            layers.Lambda(lambda x: tf.image.random_saturation(x, 0.85, 1.15)),
+        ],
+        name="data_augmentation",
+    )
+    x = data_augmentation(inputs)
+    x = layers.Lambda(
+        keras.applications.mobilenet_v2.preprocess_input,
+        name="mobilenetv2_preprocess",
+    )(x)
+    base_model = keras.applications.MobileNetV2(
+        include_top=False,
+        weights="imagenet",
+        input_shape=(*IMG_SIZE, 3),
+    )
+    x = base_model(x)
+    x = layers.GlobalAveragePooling2D(name="global_average_pooling2d")(x)
+    x = layers.BatchNormalization(name="head_batchnorm_1")(x)
+    x = layers.Dropout(0.4, name="head_dropout_1")(x)
+    x = layers.Dense(
+        256,
+        activation="relu",
+        kernel_regularizer=regularizers.l2(1e-4),
+        name="head_dense_1",
+    )(x)
+    x = layers.BatchNormalization(name="head_batchnorm_2")(x)
+    x = layers.Dropout(0.5, name="head_dropout_2")(x)
+    outputs = layers.Dense(NUM_CLASSES, activation="softmax", name="predictions")(x)
+    model = keras.Model(
+        inputs=inputs,
+        outputs=outputs,
+        name="MobileNetV2_smartvision_v2",
+    )
+    return model
+# ---------- EfficientNetB0 ----------
+def bright_jitter(x):
+    x_f32 = tf.cast(x, tf.float32)
+    x_f32 = tf.image.random_brightness(x_f32, max_delta=0.25)
+    return tf.cast(x_f32, x.dtype)
+def sat_jitter(x):
+    x_f32 = tf.cast(x, tf.float32)
+    x_f32 = tf.image.random_saturation(x_f32, lower=0.7, upper=1.3)
+    return tf.cast(x_f32, x.dtype)
+def build_efficientnetb0_model():
+    """
+    Same architecture as EfficientNetB0 training script
+    (without the mixed precision policy setup, which belongs in training code).
+    """
+    inputs = keras.Input(shape=(*IMG_SIZE, 3), name="input_layer")
+    data_augmentation = keras.Sequential(
+        [
+            layers.RandomFlip("horizontal"),
+            layers.RandomRotation(0.08),
+            layers.RandomZoom(0.15),
+            layers.RandomContrast(0.3),
+            layers.RandomTranslation(0.1, 0.1),
+            layers.Lambda(bright_jitter),
+            layers.Lambda(sat_jitter),
+        ],
+        name="advanced_data_augmentation",
+    )
+    x = data_augmentation(inputs)
+    x = layers.Lambda(
+        lambda z: effnet_preprocess(tf.cast(z, tf.float32)),
+        name="effnet_preprocess",
+    )(x)
+    base_model = EfficientNetB0(
+        include_top=False,
+        weights="imagenet",
+        name="efficientnetb0",
+    )
+    x = base_model(x, training=False)
+    x = layers.GlobalAveragePooling2D(name="gap")(x)
+    x = layers.BatchNormalization(name="head_bn_1")(x)
+    x = layers.Dense(256, activation="relu", name="head_dense_1")(x)
+    x = layers.BatchNormalization(name="head_bn_2")(x)
+    x = layers.Dropout(0.4, name="head_dropout")(x)
+    outputs = layers.Dense(
+        NUM_CLASSES,
+        activation="softmax",
+        dtype="float32",
+        name="predictions",
+    )(x)
+    model = keras.Model(inputs, outputs, name="EfficientNetB0_smartvision")
+    return model
+# ------------------------------------------------------------
+# CACHED MODEL LOADERS
+# ------------------------------------------------------------
+@st.cache_resource(show_spinner=True)
+def load_yolo_model() -> YOLO:
+    if not os.path.exists(YOLO_WEIGHTS_PATH):
+        raise FileNotFoundError(f"YOLO weights not found: {YOLO_WEIGHTS_PATH}")
+    model = YOLO(YOLO_WEIGHTS_PATH)
+    return model
+@st.cache_resource(show_spinner=True)
+def load_classification_models() -> Dict[str, keras.Model]:
+    """
+    Build each architecture fresh, then TRY to load your trained weights.
+    If loading fails or path is None, the model is still returned
+    (ImageNet-pretrained backbone + random head), so all 4 are enabled.
+    """
+    models: Dict[str, keras.Model] = {}
+    for name, cfg in CLASSIFIER_MODEL_CONFIGS.items():
+        model_type = cfg["type"]
+        path = cfg["path"]
+        # 1) Build the architecture
+        if model_type == "vgg16":
+            model = build_vgg16_model_v2()
+        elif model_type == "resnet50":
+            model = build_resnet50_model_v2()
+        elif model_type == "mobilenetv2":
+            model = build_mobilenetv2_model_v2()
+        elif model_type == "efficientnetb0":
+            model = build_efficientnetb0_model()
+        else:
+            continue
+        # 2) Try to load your training weights (if path is provided and file exists)
+        if path is not None and os.path.exists(path):
+            try:
+                model.load_weights(path)
+            except Exception as e:
+                st.sidebar.warning(
+                    f"⚠️ Could not fully load weights for {name} from {path}: {e}\n"
+                    "   Using ImageNet-pretrained backbone + random head."
+                )
+        elif path is not None:
+            st.sidebar.warning(
+                f"⚠️ Weights file for {name} not found at {path}. "
+                "Using ImageNet-pretrained backbone + random head."
+            )
+        # if path is None → silently use ImageNet + random head
+        models[name] = model
+    return models
+# ------------------------------------------------------------
+# IMAGE HELPERS
+# ------------------------------------------------------------
+def read_image_file(uploaded_file) -> Image.Image:
+    image = Image.open(uploaded_file).convert("RGB")
+    return image
+def preprocess_for_classifier(pil_img: Image.Image) -> np.ndarray:
+    img_resized = pil_img.resize(IMG_SIZE, Image.BILINEAR)
+    arr = np.array(img_resized, dtype=np.float32)
+    arr = np.expand_dims(arr, axis=0)  # (1, H, W, 3)
+    return arr
+# ------------------------------------------------------------
+# DRAW BOXES FOR DETECTION
+# ------------------------------------------------------------
+def draw_boxes_with_labels(
+    pil_img: Image.Image,
+    detections: List[Dict[str, Any]],
+    font_path: str = None
+) -> Image.Image:
+    draw = ImageDraw.Draw(pil_img)
+    if font_path and os.path.exists(font_path):
+        font = ImageFont.truetype(font_path, 16)
+    else:
+        font = ImageFont.load_default()
+    for det in detections:
+        x1, y1, x2, y2 = det["x1"], det["y1"], det["x2"], det["y2"]
+        yolo_label = det["label"]
+        conf_yolo = det["conf_yolo"]
+        cls_label = det.get("cls_label")
+        cls_conf = det.get("cls_conf")
+        if cls_label is not None:
+            text = f"{yolo_label} {conf_yolo:.2f} | CLS: {cls_label} {cls_conf:.2f}"
+        else:
+            text = f"{yolo_label} {conf_yolo:.2f}"
+        draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
+        bbox = draw.textbbox((0, 0), text, font=font)
+        text_w = bbox[2] - bbox[0]
+        text_h = bbox[3] - bbox[1]
+        text_bg = [x1,
+                   max(0, y1 - text_h - 2),
+                   x1 + text_w + 4,
+                   y1]
+        draw.rectangle(text_bg, fill="black")
+        draw.text((x1 + 2, max(0, y1 - text_h - 1)), text, fill="white", font=font)
+    return pil_img
+def run_yolo_with_optional_classifier(
+    pil_img: Image.Image,
+    yolo_model: YOLO,
+    classifier_model: keras.Model = None,
+    conf_threshold: float = 0.5
+) -> Dict[str, Any]:
+    """Run YOLO on a PIL image, optionally verify each box with classifier."""
+    orig_w, orig_h = pil_img.size
+    t0 = time.perf_counter()
+    results = yolo_model.predict(
+        pil_img,
+        imgsz=640,
+        conf=conf_threshold,
+        device="cpu",  # change to "0" if GPU available
+        verbose=False,
+    )
+    t1 = time.perf_counter()
+    infer_time = t1 - t0
+    res = results[0]
+    boxes = res.boxes
+    detections = []
+    for box in boxes:
+        x1, y1, x2, y2 = box.xyxy[0].tolist()
+        cls_id = int(box.cls[0].item())
+        conf_yolo = float(box.conf[0].item())
+        label = res.names[cls_id]
+        x1 = max(0, min(x1, orig_w - 1))
+        y1 = max(0, min(y1, orig_h - 1))
+        x2 = max(0, min(x2, orig_w - 1))
+        y2 = max(0, min(y2, orig_h - 1))
+        cls_label = None
+        cls_conf = None
+        if classifier_model is not None:
+            crop = pil_img.crop((x1, y1, x2, y2))
+            arr = preprocess_for_classifier(crop)
+            probs = classifier_model.predict(arr, verbose=0)[0]
+            idx = int(np.argmax(probs))
+            cls_label = CLASS_NAMES[idx]
+            cls_conf = float(probs[idx])
+        detections.append(
+            {
+                "x1": x1,
+                "y1": y1,
+                "x2": x2,
+                "y2": y2,
+                "label": label,
+                "conf_yolo": conf_yolo,
+                "cls_label": cls_label,
+                "cls_conf": cls_conf,
+            }
+        )
+    annotated = pil_img.copy()
+    annotated = draw_boxes_with_labels(annotated, detections)
+    return {
+        "annotated_image": annotated,
+        "detections": detections,
+        "yolo_inference_time_sec": infer_time,
+    }
+# ------------------------------------------------------------
+# METRICS LOADING
+# ------------------------------------------------------------
+@st.cache_data
+def load_classification_metrics() -> pd.DataFrame:
+    rows = []
+    for name, path in CLASS_METRIC_PATHS.items():
+        if os.path.exists(path):
+            with open(path, "r") as f:
+                m = json.load(f)
+            rows.append(
+                {
+                    "Model": name,
+                    "Accuracy": m.get("accuracy", None),
+                    "F1 (weighted)": m.get("f1_weighted", None),
+                    "Top-5 Accuracy": m.get("top5_accuracy", None),
+                    "Images/sec": m.get("images_per_second", None),
+                    "Size (MB)": m.get("model_size_mb", None),
+                }
+            )
+    df = pd.DataFrame(rows)
+    return df
+@st.cache_data
+def load_yolo_metrics() -> Dict[str, Any]:
+    if not os.path.exists(YOLO_METRICS_JSON):
+        return {}
+    with open(YOLO_METRICS_JSON, "r") as f:
+        return json.load(f)
+# ------------------------------------------------------------
+# SIDEBAR NAVIGATION
+# ------------------------------------------------------------
+PAGES = [
+    "🏠 Home",
+    "🖼️ Image Classification",
+    "📦 Object Detection",
+    "📊 Model Performance",
+    "📷 Webcam Detection (snapshot)",
+    "ℹ️ About",
+]
+page = st.sidebar.radio("Navigate", PAGES)
+# ------------------------------------------------------------
+# PAGE 1 – HOME
+# ------------------------------------------------------------
+if page == "🏠 Home":
+    col1, col2 = st.columns([1.2, 1])
+    with col1:
+        st.subheader("📌 Project Overview")
+        st.markdown(
+            """
+SmartVision AI is a complete computer vision pipeline built on a curated subset
+of **25 COCO classes**. It brings together:
+- 🧠 **Image Classification** using multiple CNN backbones:
+  `VGG16 · ResNet50 · MobileNetV2 · EfficientNetB0`
+- 🎯 **Object Detection** using **YOLOv8s**, fine-tuned on the same 25 classes
+- 🔗 **Integrated Pipeline** where YOLO detects objects and
+  **ResNet50** verifies the cropped regions
+- 📊 **Interactive Streamlit Dashboard** for demos, metrics visualization, and experiments
+            """
+        )
+    with col2:
+        st.subheader("🕹️ How to Use This App")
+        st.markdown(
+            """
+1. **🖼️ Image Classification**
+   Upload an image with a **single dominant object** to classify it.
+2. **📦 Object Detection**
+   Upload a **scene with multiple objects** to run YOLOv8 detection.
+3. **📊 Model Performance**
+   Explore **accuracy, F1-score, speed, and confusion matrices** for all models.
+4. **📷 Webcam Detection (Snapshot)** *(optional)*
+   Capture an image via webcam and run **real-time YOLO detection**.
+            """
+        )
+        st.markdown(
+            """
+> 💡 Tip: Start with **Object Detection** to see YOLOv8 in action,
+> then inspect misclassifications in **Model Performance**.
+            """
+        )
+    st.divider()
+    st.subheader("🧪 Sample Annotated Outputs")
+    sample_dir = "inference_outputs"
+    if os.path.exists(sample_dir):
+        imgs = [
+            os.path.join(sample_dir, f)
+            for f in os.listdir(sample_dir)
+            if f.lower().endswith((".jpg", ".png", ".jpeg"))
+        ]
+        if imgs:
+            cols = st.columns(min(3, len(imgs)))
+            for i, img_path in enumerate(imgs[:3]):
+                with cols[i]:
+                    st.image(img_path, caption=os.path.basename(img_path), use_container_width=False)
+        else:
+            st.info("No sample images found in `inference_outputs/` yet.")
+    else:
+        st.info("`inference_outputs/` folder not found yet – run inference to create samples.")
+# ------------------------------------------------------------
+# PAGE 2 – IMAGE CLASSIFICATION
+# ------------------------------------------------------------
+elif page == "🖼️ Image Classification":
+    st.subheader("Image Classification – 4 CNN Models")
+    st.write(
+        """
+Upload an image that mainly contains **one object**.
+The app will run **all 4 CNN models** and show **top-5 predictions** per model.
+"""
+    )
+    uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
+    if uploaded_file is not None:
+        pil_img = read_image_file(uploaded_file)
+        st.image(pil_img, caption="Uploaded image", use_container_width=False)
+        with st.spinner("Loading classification models..."):
+            cls_models = load_classification_models()
+        if not cls_models:
+            st.error("No classification models could be loaded. Check your saved_models/ folder.")
+        else:
+            arr = preprocess_for_classifier(pil_img)
+            st.markdown("### Predictions")
+            cols = st.columns(len(cls_models))
+            for (model_name, model), col in zip(cls_models.items(), cols):
+                with col:
+                    st.markdown(f"**{model_name}**")
+                    probs = model.predict(arr, verbose=0)[0]
+                    top5_idx = probs.argsort()[-5:][::-1]
+                    top5_labels = [CLASS_NAMES[i] for i in top5_idx]
+                    top5_probs = [probs[i] for i in top5_idx]
+                    st.write(f"**Top-1:** {top5_labels[0]} ({top5_probs[0]:.3f})")
+                    st.write("Top-5:")
+                    for lbl, p in zip(top5_labels, top5_probs):
+                        st.write(f"- {lbl}: {p:.3f}")
+# ------------------------------------------------------------
+# PAGE 3 – OBJECT DETECTION
+# ------------------------------------------------------------
+elif page == "📦 Object Detection":
+    st.subheader("Object Detection – YOLOv8 + Optional ResNet Verification")
+    st.write(
+        """
+Upload an image containing one or more of the 25 COCO classes.
+YOLOv8 will detect all objects and optionally verify them with the best classifier (ResNet50).
+"""
+    )
+    conf_th = st.slider("Confidence threshold", 0.1, 0.9, 0.5, 0.05)
+    use_classifier = st.checkbox("Use ResNet50 classifier verification on crops", value=True)
+    uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
+    if uploaded_file is not None:
+        pil_img = read_image_file(uploaded_file)
+        # ❌ REMOVE THIS (caused duplicate)
+        # st.image(pil_img, caption="Uploaded image", use_container_width=False)
+        with st.spinner("Loading YOLO model..."):
+            yolo_model = load_yolo_model()
+        classifier_model = None
+        if use_classifier:
+            with st.spinner("Loading ResNet50 classifier..."):
+                classifier_model = build_resnet50_model_v2()
+                weights_path = CLASSIFIER_MODEL_CONFIGS["ResNet50"]["path"]
+            if os.path.exists(weights_path):
+                try:
+                    classifier_model.load_weights(weights_path)
+                except Exception as e:
+                    st.warning(f"Could not load ResNet50 v2 weights for detection: {e}")
+                    classifier_model = None
+            else:
+                st.warning("ResNet50 weights not found – classifier verification disabled.")
+                classifier_model = None
+        with st.spinner("Running detection..."):
+            result = run_yolo_with_optional_classifier(
+                pil_img=pil_img,
+                yolo_model=yolo_model,
+                classifier_model=classifier_model,
+                conf_threshold=conf_th,
+            )
+        # ✅ ONLY 2 IMAGES SHOWN — SIDE BY SIDE
+        col1, col2 = st.columns(2)
+        with col1:
+            st.image(pil_img, caption="Uploaded Image", use_container_width=True)
+        with col2:
+            st.image(result["annotated_image"], caption="Detected Result", use_container_width=True)
+        st.write(f"YOLO inference time: {result['yolo_inference_time_sec']*1000:.1f} ms")
+        st.write(f"Number of detections: {len(result['detections'])}")
+        if result["detections"]:
+            st.markdown("### Detected objects")
+            df_det = pd.DataFrame([
+                {
+                    "YOLO label": det["label"],
+                    "YOLO confidence level": det["conf_yolo"],
+                    "CLS label": det.get("cls_label"),
+                    "CLS confidence level": det.get("cls_conf"),
+                }
+                for det in result["detections"]
+            ])
+            st.dataframe(df_det, use_container_width=False)
+# ------------------------------------------------------------
+# PAGE 4 – MODEL PERFORMANCE
+# ------------------------------------------------------------
+elif page == "📊 Model Performance":
+    st.subheader("Model Performance – Classification vs Detection")
+    # --- Classification metrics ---
+    st.markdown("### 🧠 Classification Models (VGG16, ResNet50, MobileNetV2, EfficientNetB0)")
+    df_cls = load_classification_metrics()
+    if df_cls.empty:
+        st.info("No classification metrics found yet in `smartvision_metrics/`.")
+    else:
+        st.dataframe(df_cls, use_container_width=False)
+        col1, col2 = st.columns(2)
+        with col1:
+            st.bar_chart(
+                df_cls.set_index("Model")["Accuracy"],
+                use_container_width=True,
+            )
+        with col2:
+            st.bar_chart(
+                df_cls.set_index("Model")["F1 (weighted)"],
+                use_container_width=True,
+            )
+        st.markdown("#### Inference Speed (images/sec)")
+        st.bar_chart(
+            df_cls.set_index("Model")["Images/sec"],
+            use_container_width=True,
+        )
+    # --- YOLO metrics ---
+    st.markdown("### 📦 YOLOv8 Detection Model")
+    yolo_m = load_yolo_metrics()
+    if not yolo_m:
+        st.info("No YOLO metrics found yet in `yolo_metrics/`.")
+    else:
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            st.metric("mAP@0.5", f"{yolo_m.get('map_50', 0):.3f}")
+        with col2:
+            st.metric("mAP@0.5:0.95", f"{yolo_m.get('map_50_95', 0):.3f}")
+        with col3:
+            st.metric("YOLO FPS", f"{yolo_m.get('fps', 0):.2f}")
+        st.write("YOLO metrics JSON:", YOLO_METRICS_JSON)
+    # --- Confusion matrix & comparison plots (if available) ---
+    st.markdown("### 📈 Comparison Plots & Confusion Matrices")
+    comp_dir = os.path.join("smartvision_metrics", "comparison_plots")
+    if os.path.exists(comp_dir):
+        imgs = [
+            os.path.join(comp_dir, f)
+            for f in os.listdir(comp_dir)
+            if f.lower().endswith(".png")
+        ]
+        if imgs:
+            for img in sorted(imgs):
+                st.image(img, caption=os.path.basename(img), use_container_width=True)
+        else:
+            st.info("No comparison plots found in `smartvision_metrics/comparison_plots/`.")
+    else:
+        st.info("Folder `smartvision_metrics/comparison_plots/` not found.")
+# ------------------------------------------------------------
+# PAGE 5 – WEBCAM DETECTION (SNAPSHOT)
+# ------------------------------------------------------------
+elif page == "📷 Webcam Detection (snapshot)":
+    st.subheader("Webcam Detection (Snapshot-based)")
+    st.write(
+        """
+This page uses Streamlit's `camera_input` to grab a **single frame**
+from your webcam and run YOLOv8 detection on it.
+(For true real-time streaming, you would typically use `streamlit-webrtc`.)
+"""
+    )
+    conf_th = st.slider("Confidence threshold", 0.1, 0.9, 0.5, 0.05)
+    cam_image = st.camera_input("Capture image from webcam")
+    if cam_image is not None:
+        pil_img = Image.open(cam_image).convert("RGB")
+        with st.spinner("Loading YOLO model..."):
+            yolo_model = load_yolo_model()
+        with st.spinner("Running detection..."):
+            result = run_yolo_with_optional_classifier(
+                pil_img=pil_img,
+                yolo_model=yolo_model,
+                classifier_model=None,  # detection-only for speed
+                conf_threshold=conf_th,
+            )
+        st.image(result["annotated_image"], caption="Detections", use_container_width=False)
+        st.write(f"YOLO inference time: {result['yolo_inference_time_sec']*1000:.1f} ms")
+        st.write(f"Number of detections: {len(result['detections'])}")
+# ------------------------------------------------------------
+# PAGE 6 – ABOUT
+# ------------------------------------------------------------
+elif page == "ℹ️ About":
+    st.subheader("About SmartVision AI")
+    st.markdown(
+        """
+**Dataset:**
+- Subset of MS COCO with 25 commonly occurring classes
+- Split into train/val/test for both classification & detection
+**Models used:**
+- **Classification:**
+  - VGG16
+  - ResNet50
+  - MobileNetV2
+  - EfficientNetB0
+- **Detection:**
+  - YOLOv8s fine-tuned on the same 25 classes
+**Pipeline Highlights:**
+- Integrated pipeline: YOLO detects → ResNet50 verifies object crops
+- Performance metrics:
+  - CNN test accuracy, F1, Top-5 accuracy, images/sec, model size
+  - YOLO mAP@0.5, mAP@0.5:0.95, FPS
+- Quantization-ready: ResNet50 can be exported to float16 TFLite for deployment.
+**Tech Stack:**
+- Python, TensorFlow / Keras, Ultralytics YOLOv8
+- Streamlit for interactive dashboard
+- NumPy, Pandas, Pillow, Matplotlib
+"""
+    )

dataset_preparation.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

inference_outputs/image_000000_annotated.jpg ADDED Viewed

inference_outputs/image_002126_annotated.jpg ADDED Viewed

Git LFS Details

SHA256: fadf34fefcae5ecdd48f2b12f9090291e96988ac14be3612edb4efb7a8e051ad
Pointer size: 131 Bytes
Size of remote file: 140 kB

requirements.txt ADDED Viewed

Binary file (416 Bytes). View file

saved_models/efficientnetb0_stage2_best.weights.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:141ec000a01ef957577aea7ff9cc1da8b5053fec6d81453724e78d8014205e18
+size 46584176

saved_models/mobilenetv2_v2_stage2_best.weights.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:963f85823f7a153b9b0957b32b5bd058ce76d79fef63e820366b1b5831eed381
+size 13558112

saved_models/resnet50_v2_stage2_best.weights.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e4db346fc333c6181d5c4038f53f8e9d78e9c2ab9913e1b4eafd75d81e9660c
+size 227788524

saved_models/vgg16_v2_stage2_best.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eecf8ec51a5e39a2a5a8cebd5c5f548c14f385ea30223efa69bf93363c642cd2
+size 117259600

scripts/01_Data Augmentation.ipynb ADDED Viewed

	@@ -0,0 +1,595 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "4daac0c9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 1750 files belonging to 25 classes.\n",
+      "Found 375 files belonging to 25 classes.\n",
+      "Found 375 files belonging to 25 classes.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import tensorflow as tf\n",
+    "from tensorflow import keras\n",
+    "from tensorflow.keras import layers\n",
+    "import os\n",
+    "\n",
+    "BASE_DIR = r\"D:\\Guvi\\SmartVision_AI\\smartvision_dataset\"\n",
+    "IMG_SIZE = (224, 224)\n",
+    "BATCH_SIZE = 32\n",
+    "IMG_SIZE   = (224, 224)\n",
+    "\n",
+    "NUM_CLASSES = 25\n",
+    "\n",
+    "train_dir = os.path.join(BASE_DIR, \"classification\", \"train\")\n",
+    "val_dir   = os.path.join(BASE_DIR, \"classification\", \"val\")\n",
+    "test_dir  = os.path.join(BASE_DIR, \"classification\", \"test\")\n",
+    "\n",
+    "train_ds = tf.keras.utils.image_dataset_from_directory(\n",
+    "    train_dir,\n",
+    "    image_size=IMG_SIZE,\n",
+    "    batch_size=BATCH_SIZE,\n",
+    "    shuffle=True\n",
+    ")\n",
+    "\n",
+    "val_ds = tf.keras.utils.image_dataset_from_directory(\n",
+    "    val_dir,\n",
+    "    image_size=IMG_SIZE,\n",
+    "    batch_size=BATCH_SIZE,\n",
+    "    shuffle=False\n",
+    ")\n",
+    "\n",
+    "test_ds = tf.keras.utils.image_dataset_from_directory(\n",
+    "    test_dir,\n",
+    "    image_size=IMG_SIZE,\n",
+    "    batch_size=BATCH_SIZE,\n",
+    "    shuffle=False\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e690c322",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 1.4. Data augmentation block (applied only on training data)\n",
+    "data_augmentation = keras.Sequential(\n",
+    "    [\n",
+    "        layers.RandomFlip(\"horizontal\"),                # random horizontal flip\n",
+    "        layers.RandomRotation(0.04),                    # ~ ±15° (15/360 ≈ 0.04)\n",
+    "        layers.RandomZoom(0.1),                         # random zoom\n",
+    "        layers.RandomContrast(0.2),                     # ±20% contrast\n",
+    "        # Brightness jitter using Lambda + tf.image\n",
+    "        layers.Lambda(\n",
+    "            lambda x: tf.image.random_brightness(x, max_delta=0.2)\n",
+    "        ),\n",
+    "        # Optional: light color jitter via saturation\n",
+    "        layers.Lambda(\n",
+    "            lambda x: tf.image.random_saturation(x, lower=0.8, upper=1.2)\n",
+    "        ),\n",
+    "    ],\n",
+    "    name=\"data_augmentation\",\n",
+    ")\n",
+    "\n",
+    "# Normalization layer (0–1 scaling or ImageNet style)\n",
+    "normalization = layers.Rescaling(1./255)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "88323a0f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/25\n",
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 5s/step - accuracy: 0.0405 - loss: 3.4605"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m328s\u001b[0m 6s/step - accuracy: 0.0429 - loss: 3.4206 - val_accuracy: 0.0373 - val_loss: 3.2323 - learning_rate: 1.0000e-04\n",
+      "Epoch 2/25\n",
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6s/step - accuracy: 0.0474 - loss: 3.2988"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m457s\u001b[0m 8s/step - accuracy: 0.0486 - loss: 3.2914 - val_accuracy: 0.0533 - val_loss: 3.1938 - learning_rate: 1.0000e-04\n",
+      "Epoch 3/25\n",
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━���━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 18s/step - accuracy: 0.0463 - loss: 3.2775 "
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1232s\u001b[0m 22s/step - accuracy: 0.0486 - loss: 3.2567 - val_accuracy: 0.0853 - val_loss: 3.1689 - learning_rate: 1.0000e-04\n",
+      "Epoch 4/25\n",
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 19s/step - accuracy: 0.0568 - loss: 3.2323 "
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1278s\u001b[0m 23s/step - accuracy: 0.0543 - loss: 3.2274 - val_accuracy: 0.1360 - val_loss: 3.1451 - learning_rate: 1.0000e-04\n",
+      "Epoch 5/25\n",
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 16s/step - accuracy: 0.0526 - loss: 3.1936 "
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1076s\u001b[0m 19s/step - accuracy: 0.0623 - loss: 3.1870 - val_accuracy: 0.1520 - val_loss: 3.1223 - learning_rate: 1.0000e-04\n",
+      "Epoch 6/25\n",
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 11s/step - accuracy: 0.0762 - loss: 3.1579 "
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m757s\u001b[0m 14s/step - accuracy: 0.0811 - loss: 3.1483 - val_accuracy: 0.1867 - val_loss: 3.0975 - learning_rate: 1.0000e-04\n",
+      "Epoch 7/25\n",
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 13s/step - accuracy: 0.1051 - loss: 3.1299 "
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m900s\u001b[0m 16s/step - accuracy: 0.1029 - loss: 3.1283 - val_accuracy: 0.2107 - val_loss: 3.0750 - learning_rate: 1.0000e-04\n",
+      "Epoch 8/25\n",
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 11s/step - accuracy: 0.1321 - loss: 3.1018 "
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m55/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m799s\u001b[0m 15s/step - accuracy: 0.1343 - loss: 3.0993 - val_accuracy: 0.2373 - val_loss: 3.0532 - learning_rate: 1.0000e-04\n",
+      "Epoch 9/25\n",
+      "\u001b[1m50/55\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━━\u001b[0m \u001b[1m1:06\u001b[0m 13s/step - accuracy: 0.1195 - loss: 3.0798"
+     ]
+    }
+   ],
+   "source": [
+    "# 2.1: Model 1 - VGG16\n",
+    "\n",
+    "def build_vgg16_model():\n",
+    "    inputs = keras.Input(shape=(*IMG_SIZE, 3))\n",
+    "    x = data_augmentation(inputs)       # train only\n",
+    "    x = normalization(x)\n",
+    "\n",
+    "    base_model = keras.applications.VGG16(\n",
+    "        include_top=False,\n",
+    "        weights=\"imagenet\",\n",
+    "        input_tensor=x\n",
+    "    )\n",
+    "    base_model.trainable = False        # freeze convolutional base\n",
+    "\n",
+    "    x = layers.GlobalAveragePooling2D()(base_model.output)\n",
+    "    x = layers.Dense(256, activation=\"relu\")(x)\n",
+    "    x = layers.Dropout(0.5)(x)\n",
+    "    outputs = layers.Dense(NUM_CLASSES, activation=\"softmax\")(x)\n",
+    "\n",
+    "    model = keras.Model(inputs, outputs, name=\"VGG16_smartvision\")\n",
+    "    return model\n",
+    "def compile_and_train(model, model_name, train_ds, val_ds, epochs=25, lr=1e-4):\n",
+    "    model.compile(\n",
+    "        optimizer=keras.optimizers.Adam(learning_rate=lr),\n",
+    "        loss=\"sparse_categorical_crossentropy\",\n",
+    "        metrics=[\"accuracy\"]\n",
+    "    )\n",
+    "\n",
+    "    callbacks = [\n",
+    "        keras.callbacks.ModelCheckpoint(\n",
+    "            filepath=f\"{model_name}_best.h5\",\n",
+    "            monitor=\"val_accuracy\",\n",
+    "            save_best_only=True,\n",
+    "            mode=\"max\"\n",
+    "        ),\n",
+    "        keras.callbacks.EarlyStopping(\n",
+    "            monitor=\"val_accuracy\",\n",
+    "            patience=5,\n",
+    "            restore_best_weights=True\n",
+    "        ),\n",
+    "        keras.callbacks.ReduceLROnPlateau(\n",
+    "            monitor=\"val_loss\",\n",
+    "            factor=0.5,\n",
+    "            patience=2,\n",
+    "            min_lr=1e-6,\n",
+    "            verbose=1\n",
+    "        )\n",
+    "    ]\n",
+    "\n",
+    "    history = model.fit(\n",
+    "        train_ds,\n",
+    "        validation_data=val_ds,\n",
+    "        epochs=epochs,\n",
+    "        callbacks=callbacks\n",
+    "    )\n",
+    "    return history\n",
+    "\n",
+    "vgg16_model = build_vgg16_model()\n",
+    "history_vgg16 = compile_and_train(vgg16_model, \"vgg16\", train_ds, val_ds, epochs=25)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3e7696bc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class_names = train_ds.class_names\n",
+    "NUM_CLASSES = len(class_names)\n",
+    "print(class_names)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3b3417aa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import time\n",
+    "import json\n",
+    "import os\n",
+    "from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support\n",
+    "\n",
+    "def evaluate_and_collect_metrics(model, model_name, test_ds, class_names, weights_path=None):\n",
+    "    # If you saved best weights, load them\n",
+    "    if weights_path is not None and os.path.exists(weights_path):\n",
+    "        model.load_weights(weights_path)\n",
+    "        print(f\"✅ Loaded best weights from {weights_path}\")\n",
+    "\n",
+    "    y_true = []\n",
+    "    y_pred = []\n",
+    "    y_pred_probs = []\n",
+    "\n",
+    "    # ----- measure inference time -----\n",
+    "    total_time = 0.0\n",
+    "    total_images = 0\n",
+    "\n",
+    "    for images, labels in test_ds:\n",
+    "        images_np = images.numpy()\n",
+    "        batch_size = images_np.shape[0]\n",
+    "\n",
+    "        start = time.perf_counter()\n",
+    "        probs = model.predict(images_np, verbose=0)\n",
+    "        end = time.perf_counter()\n",
+    "\n",
+    "        total_time += (end - start)\n",
+    "        total_images += batch_size\n",
+    "\n",
+    "        preds = np.argmax(probs, axis=1)\n",
+    "\n",
+    "        y_true.extend(labels.numpy())\n",
+    "        y_pred.extend(preds)\n",
+    "        y_pred_probs.append(probs)\n",
+    "\n",
+    "    y_true = np.array(y_true)\n",
+    "    y_pred = np.array(y_pred)\n",
+    "    y_pred_probs = np.concatenate(y_pred_probs, axis=0)\n",
+    "\n",
+    "    # ----- basic metrics -----\n",
+    "    acc = (y_true == y_pred).mean()\n",
+    "\n",
+    "    precision, recall, f1, _ = precision_recall_fscore_support(\n",
+    "        y_true, y_pred, average=\"weighted\", zero_division=0\n",
+    "    )\n",
+    "\n",
+    "    # ----- top-5 accuracy -----\n",
+    "    top5_correct = 0\n",
+    "    for i, label in enumerate(y_true):\n",
+    "        top5 = np.argsort(y_pred_probs[i])[-5:]\n",
+    "        if label in top5:\n",
+    "            top5_correct += 1\n",
+    "    top5_acc = top5_correct / len(y_true)\n",
+    "\n",
+    "    # ----- inference time -----\n",
+    "    avg_time_per_image = total_time / total_images  # seconds\n",
+    "    imgs_per_second = 1.0 / avg_time_per_image if avg_time_per_image > 0 else 0.0\n",
+    "\n",
+    "    # ----- model size -----\n",
+    "    # Save weights temporarily to compute size\n",
+    "    temp_weights = f\"{model_name}_temp_for_size.weights.h5\" \n",
+    "    model.save_weights(temp_weights)\n",
+    "    size_mb = os.path.getsize(temp_weights) / (1024 * 1024)\n",
+    "    os.remove(temp_weights)\n",
+    "\n",
+    "    # ----- classification report & confusion matrix (for plots) -----\n",
+    "    print(f\"\\n=== {model_name.upper()} – Classification Report ===\")\n",
+    "    print(classification_report(y_true, y_pred, target_names=class_names, zero_division=0))\n",
+    "\n",
+    "    cm = confusion_matrix(y_true, y_pred)\n",
+    "    print(f\"\\nConfusion matrix shape: {cm.shape}\")\n",
+    "\n",
+    "    metrics = {\n",
+    "        \"model_name\": model_name,\n",
+    "        \"accuracy\": float(acc),\n",
+    "        \"precision_weighted\": float(precision),\n",
+    "        \"recall_weighted\": float(recall),\n",
+    "        \"f1_weighted\": float(f1),\n",
+    "        \"top5_accuracy\": float(top5_acc),\n",
+    "        \"avg_inference_time_sec_per_image\": float(avg_time_per_image),\n",
+    "        \"images_per_second\": float(imgs_per_second),\n",
+    "        \"model_size_mb\": float(size_mb),\n",
+    "        \"num_parameters\": int(model.count_params()),\n",
+    "    }\n",
+    "    return metrics, cm\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6c01d2cc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vgg_metrics, vgg_cm = evaluate_and_collect_metrics(\n",
+    "    vgg16_model, \"vgg16\", test_ds, class_names, \"vgg16_best.h5\"\n",
+    ")\n",
+    "with open(\"vgg16_metrics.json\", \"w\") as f:\n",
+    "    json.dump(vgg_metrics, f, indent=2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6e91352d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 2.2: Model 2 - ResNet50\n",
+    "def build_resnet50_model():\n",
+    "    inputs = keras.Input(shape=(*IMG_SIZE, 3))\n",
+    "    x = data_augmentation(inputs)\n",
+    "    x = normalization(x)\n",
+    "\n",
+    "    base_model = keras.applications.ResNet50(\n",
+    "        include_top=False,\n",
+    "        weights=\"imagenet\",\n",
+    "        input_tensor=x\n",
+    "    )\n",
+    "\n",
+    "    # Freeze all, then unfreeze last 20 layers\n",
+    "    for layer in base_model.layers:\n",
+    "        layer.trainable = False\n",
+    "    for layer in base_model.layers[-20:]:\n",
+    "        layer.trainable = True\n",
+    "\n",
+    "    x = layers.GlobalAveragePooling2D()(base_model.output)\n",
+    "    x = layers.Dense(256, activation=\"relu\")(x)\n",
+    "    x = layers.Dropout(0.5)(x)\n",
+    "    outputs = layers.Dense(NUM_CLASSES, activation=\"softmax\")(x)\n",
+    "\n",
+    "    model = keras.Model(inputs, outputs, name=\"ResNet50_smartvision\")\n",
+    "    return model\n",
+    "\n",
+    "resnet_model = build_resnet50_model()\n",
+    "history_resnet = compile_and_train(resnet_model, \"resnet50\", train_ds, val_ds, epochs=25, lr=1e-4)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aab6167c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 2.3: Model 3 - MobileNetV2\n",
+    "\n",
+    "def build_mobilenetv2_model():\n",
+    "    inputs = keras.Input(shape=(*IMG_SIZE, 3))\n",
+    "    x = data_augmentation(inputs)\n",
+    "    x = normalization(x)\n",
+    "\n",
+    "    base_model = keras.applications.MobileNetV2(\n",
+    "        include_top=False,\n",
+    "        weights=\"imagenet\",\n",
+    "        input_tensor=x\n",
+    "    )\n",
+    "    base_model.trainable = False  # keep it light & fast\n",
+    "\n",
+    "    x = layers.GlobalAveragePooling2D()(base_model.output)\n",
+    "    x = layers.Dense(128, activation=\"relu\")(x)\n",
+    "    x = layers.Dropout(0.3)(x)\n",
+    "    outputs = layers.Dense(NUM_CLASSES, activation=\"softmax\")(x)\n",
+    "\n",
+    "    model = keras.Model(inputs, outputs, name=\"MobileNetV2_smartvision\")\n",
+    "    return model\n",
+    "\n",
+    "mobilenet_model = build_mobilenetv2_model()\n",
+    "history_mobilenet = compile_and_train(mobilenet_model, \"mobilenetv2\", train_ds, val_ds, epochs=20, lr=1e-4)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4f51125",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 2.4: Model 4 - EfficientNetB0\n",
+    "\n",
+    "from tensorflow.keras import mixed_precision\n",
+    "mixed_precision.set_global_policy(\"mixed_float16\")   # for GPU speed\n",
+    "\n",
+    "def build_efficientnetb0_model():\n",
+    "    inputs = keras.Input(shape=(*IMG_SIZE, 3))\n",
+    "    x = data_augmentation(inputs)\n",
+    "    x = normalization(x)\n",
+    "\n",
+    "    base_model = keras.applications.EfficientNetB0(\n",
+    "        include_top=False,\n",
+    "        weights=\"imagenet\",\n",
+    "        input_tensor=x\n",
+    "    )\n",
+    "\n",
+    "    # Fine-tune: unfreeze some top layers\n",
+    "    for layer in base_model.layers[:-30]:\n",
+    "        layer.trainable = False\n",
+    "    for layer in base_model.layers[-30:]:\n",
+    "        layer.trainable = True\n",
+    "\n",
+    "    x = layers.GlobalAveragePooling2D()(base_model.output)\n",
+    "    x = layers.BatchNormalization()(x)\n",
+    "    x = layers.Dense(256, activation=\"relu\")(x)\n",
+    "    x = layers.Dropout(0.4)(x)\n",
+    "    outputs = layers.Dense(NUM_CLASSES, activation=\"softmax\", dtype=\"float32\")(x)  # force float32 at output\n",
+    "\n",
+    "    model = keras.Model(inputs, outputs, name=\"EfficientNetB0_smartvision\")\n",
+    "    return model\n",
+    "\n",
+    "effnet_model = build_efficientnetb0_model()\n",
+    "history_effnet = compile_and_train(effnet_model, \"efficientnetb0\", train_ds, val_ds, epochs=30, lr=5e-5)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0064b8f3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 2.5: Model Comparison & Selection\n",
+    "\n",
+    "from sklearn.metrics import classification_report, confusion_matrix\n",
+    "import numpy as np\n",
+    "\n",
+    "def evaluate_on_test(model, test_ds, model_name):\n",
+    "    y_true = []\n",
+    "    y_pred = []\n",
+    "\n",
+    "    for images, labels in test_ds:\n",
+    "        preds = model.predict(images)\n",
+    "        y_true.extend(labels.numpy())\n",
+    "        y_pred.extend(np.argmax(preds, axis=1))\n",
+    "\n",
+    "    print(f\"\\n=== {model_name} TEST REPORT ===\")\n",
+    "    print(classification_report(y_true, y_pred, target_names=class_names))\n",
+    "\n",
+    "    cm = confusion_matrix(y_true, y_pred)\n",
+    "    plt.figure(figsize=(10, 8))\n",
+    "    sns.heatmap(cm, annot=False, cmap=\"Blues\",\n",
+    "                xticklabels=class_names,\n",
+    "                yticklabels=class_names)\n",
+    "    plt.title(f\"{model_name} - Confusion Matrix\")\n",
+    "    plt.xlabel(\"Predicted\")\n",
+    "    plt.ylabel(\"True\")\n",
+    "    plt.show()\n",
+    "\n",
+    "# Load best weights if needed and evaluate\n",
+    "vgg16_model.load_weights(\"vgg16_best.h5\")\n",
+    "resnet_model.load_weights(\"resnet50_best.h5\")\n",
+    "mobilenet_model.load_weights(\"mobilenetv2_best.h5\")\n",
+    "effnet_model.load_weights(\"efficientnetb0_best.h5\")\n",
+    "\n",
+    "evaluate_on_test(vgg16_model, test_ds, \"VGG16\")\n",
+    "evaluate_on_test(resnet_model, test_ds, \"ResNet50\")\n",
+    "evaluate_on_test(mobilenet_model, test_ds, \"MobileNetV2\")\n",
+    "evaluate_on_test(effnet_model, test_ds, \"EfficientNetB0\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

scripts/01_EDA.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

scripts/02_efficientnetb0.py ADDED Viewed

	@@ -0,0 +1,385 @@

+# ============================================================
+# SMARTVISION AI - MODEL 4: EfficientNetB0 (FINE-TUNING)
+# Target: High-accuracy 25-class classifier
+# ============================================================
+import os
+import time
+import json
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+from sklearn.metrics import (
+    precision_recall_fscore_support,
+    confusion_matrix,
+    classification_report,
+)
+print("TensorFlow version:", tf.__version__)
+from tensorflow.keras.applications.efficientnet import (
+    EfficientNetB0,
+    preprocess_input,
+)
+# ------------------------------------------------------------
+# 1. CONFIGURATION
+# ------------------------------------------------------------
+BASE_DIR = "smartvision_dataset"
+CLASS_DIR = os.path.join(BASE_DIR, "classification")
+TRAIN_DIR = os.path.join(CLASS_DIR, "train")
+VAL_DIR = os.path.join(CLASS_DIR, "val")
+TEST_DIR = os.path.join(CLASS_DIR, "test")
+IMG_SIZE = (224, 224)  # EfficientNetB0 default
+BATCH_SIZE = 32
+NUM_CLASSES = 25
+MODELS_DIR = "saved_models"
+METRICS_DIR = "smartvision_metrics"
+os.makedirs(MODELS_DIR, exist_ok=True)
+os.makedirs(METRICS_DIR, exist_ok=True)
+print("Train dir:", TRAIN_DIR)
+print("Val dir  :", VAL_DIR)
+print("Test dir :", TEST_DIR)
+# ------------------------------------------------------------
+# 2. LOAD DATASETS
+# ------------------------------------------------------------
+train_ds = tf.keras.utils.image_dataset_from_directory(
+    TRAIN_DIR,
+    image_size=IMG_SIZE,
+    batch_size=BATCH_SIZE,
+    shuffle=True,
+)
+val_ds = tf.keras.utils.image_dataset_from_directory(
+    VAL_DIR,
+    image_size=IMG_SIZE,
+    batch_size=BATCH_SIZE,
+    shuffle=False,
+)
+test_ds = tf.keras.utils.image_dataset_from_directory(
+    TEST_DIR,
+    image_size=IMG_SIZE,
+    batch_size=BATCH_SIZE,
+    shuffle=False,
+)
+class_names = train_ds.class_names
+print("Detected classes:", class_names)
+print("Number of classes:", len(class_names))
+AUTOTUNE = tf.data.AUTOTUNE
+train_ds = train_ds.prefetch(AUTOTUNE)
+val_ds = val_ds.prefetch(AUTOTUNE)
+test_ds = test_ds.prefetch(AUTOTUNE)
+# ------------------------------------------------------------
+# 3. ADVANCED DATA AUGMENTATION
+# ------------------------------------------------------------
+def bright_jitter(x):
+    x_f32 = tf.cast(x, tf.float32)
+    x_f32 = tf.image.random_brightness(x_f32, max_delta=0.25)
+    return tf.cast(x_f32, x.dtype)
+def sat_jitter(x):
+    x_f32 = tf.cast(x, tf.float32)
+    x_f32 = tf.image.random_saturation(x_f32, lower=0.7, upper=1.3)
+    return tf.cast(x_f32, x.dtype)
+data_augmentation = keras.Sequential(
+    [
+        layers.RandomFlip("horizontal"),
+        layers.RandomRotation(0.08),    # ≈ ±30 degrees
+        layers.RandomZoom(0.15),
+        layers.RandomContrast(0.3),
+        layers.RandomTranslation(0.1, 0.1),
+        layers.Lambda(bright_jitter),
+        layers.Lambda(sat_jitter),
+    ],
+    name="advanced_data_augmentation",
+)
+# ------------------------------------------------------------
+# 4. BUILD EfficientNetB0 MODEL (TWO-STAGE FINE-TUNING)
+# ------------------------------------------------------------
+def build_efficientnetb0_model():
+    inputs = keras.Input(shape=(*IMG_SIZE, 3), name="input_layer")
+    # 1. Data augmentation (training only)
+    x = data_augmentation(inputs)
+    # 2. EfficientNetB0 preprocess_input
+    x = layers.Lambda(
+        lambda z: preprocess_input(tf.cast(z, tf.float32)),
+        name="effnet_preprocess",
+    )(x)
+    # 3. EfficientNetB0 base model (ImageNet)
+    base_model = EfficientNetB0(
+        include_top=False,
+        weights="imagenet",
+        input_shape=(*IMG_SIZE, 3),
+        name="efficientnetb0",
+    )
+    base_model.trainable = False  # Stage 1: frozen
+    x = base_model(x, training=False)
+    x = layers.GlobalAveragePooling2D(name="gap")(x)
+    x = layers.BatchNormalization(name="head_bn_1")(x)
+    x = layers.Dense(256, activation="relu", name="head_dense_1")(x)
+    x = layers.BatchNormalization(name="head_bn_2")(x)
+    x = layers.Dropout(0.4, name="head_dropout")(x)
+    outputs = layers.Dense(
+        NUM_CLASSES,
+        activation="softmax",
+        name="predictions",
+    )(x)
+    model = keras.Model(inputs, outputs, name="EfficientNetB0_smartvision")
+    return model
+effnet_model = build_efficientnetb0_model()
+effnet_model.summary()
+# ------------------------------------------------------------
+# 5. TRAINING UTILITY (WEIGHTS-ONLY .weights.h5)
+# ------------------------------------------------------------
+def compile_and_train(
+    model,
+    save_name: str,
+    train_ds,
+    val_ds,
+    epochs: int,
+    lr: float,
+    initial_epoch: int = 0,
+    patience_es: int = 5,
+    patience_rlr: int = 2,
+):
+    optimizer = keras.optimizers.Adam(learning_rate=lr)
+    model.compile(
+        optimizer=optimizer,
+        loss="sparse_categorical_crossentropy",
+        metrics=["accuracy"],
+    )
+    best_weights_path = os.path.join(
+        MODELS_DIR, f"{save_name}.weights.h5"
+    )
+    callbacks = [
+        keras.callbacks.ModelCheckpoint(
+            filepath=best_weights_path,
+            monitor="val_accuracy",
+            save_best_only=True,
+            save_weights_only=True,
+            mode="max",
+            verbose=1,
+        ),
+        keras.callbacks.EarlyStopping(
+            monitor="val_accuracy",
+            patience=patience_es,
+            restore_best_weights=True,
+            verbose=1,
+        ),
+        keras.callbacks.ReduceLROnPlateau(
+            monitor="val_loss",
+            factor=0.5,
+            patience=patience_rlr,
+            min_lr=1e-6,
+            verbose=1,
+        ),
+    ]
+    history = model.fit(
+        train_ds,
+        validation_data=val_ds,
+        epochs=epochs,
+        initial_epoch=initial_epoch,
+        callbacks=callbacks,
+    )
+    return history, best_weights_path
+# ------------------------------------------------------------
+# 6. TWO-STAGE TRAINING
+# ------------------------------------------------------------
+MODEL_NAME = "efficientnetb0"
+print("\n========== STAGE 1: TRAIN HEAD ONLY ==========\n")
+history_stage1, effnet_stage1_best = compile_and_train(
+    effnet_model,
+    save_name=f"{MODEL_NAME}_stage1_best",
+    train_ds=train_ds,
+    val_ds=val_ds,
+    epochs=10,
+    lr=1e-3,
+    initial_epoch=0,
+    patience_es=5,
+    patience_rlr=2,
+)
+print("Stage 1 best weights saved at:", effnet_stage1_best)
+print("\n========== STAGE 2: FINE-TUNE TOP LAYERS ==========\n")
+# Get the EfficientNet base from the combined model
+base_model = effnet_model.get_layer("efficientnetb0")
+# Unfreeze top N layers
+num_unfreeze = 80
+for layer in base_model.layers[:-num_unfreeze]:
+    layer.trainable = False
+for layer in base_model.layers[-num_unfreeze:]:
+    layer.trainable = True
+    if isinstance(layer, layers.BatchNormalization):
+        layer.trainable = False  # keep BN frozen
+initial_epoch_stage2 = len(history_stage1.history["accuracy"])
+history_stage2, effnet_stage2_best = compile_and_train(
+    effnet_model,
+    save_name=f"{MODEL_NAME}_stage2_best",
+    train_ds=train_ds,
+    val_ds=val_ds,
+    epochs=30,          # total (Stage1 + Stage2)
+    lr=5e-5,
+    initial_epoch=initial_epoch_stage2,
+    patience_es=5,
+    patience_rlr=2,
+)
+print("Stage 2 best weights saved at:", effnet_stage2_best)
+print("👉 Use this file in Streamlit app:", effnet_stage2_best)
+# ------------------------------------------------------------
+# 7. EVALUATION + SAVE METRICS & CONFUSION MATRIX
+# ------------------------------------------------------------
+def evaluate_and_save(model, model_name, best_weights_path, test_ds, class_names):
+    print(f"\n===== EVALUATING {model_name.upper()} ON TEST SET =====")
+    model.load_weights(best_weights_path)
+    print(f"Loaded best weights from {best_weights_path}")
+    y_true = []
+    y_pred = []
+    all_probs = []
+    total_time = 0.0
+    total_images = 0
+    for images, labels in test_ds:
+        images_np = images.numpy()
+        bs = images_np.shape[0]
+        start = time.perf_counter()
+        probs = model.predict(images_np, verbose=0)
+        end = time.perf_counter()
+        total_time += (end - start)
+        total_images += bs
+        preds = np.argmax(probs, axis=1)
+        y_true.extend(labels.numpy())
+        y_pred.extend(preds)
+        all_probs.append(probs)
+    y_true = np.array(y_true)
+    y_pred = np.array(y_pred)
+    all_probs = np.concatenate(all_probs, axis=0)
+    accuracy = float((y_true == y_pred).mean())
+    precision, recall, f1, _ = precision_recall_fscore_support(
+        y_true, y_pred, average="weighted", zero_division=0
+    )
+    top5_correct = 0
+    for i, label in enumerate(y_true):
+        if label in np.argsort(all_probs[i])[-5:]:
+            top5_correct += 1
+    top5_acc = top5_correct / len(y_true)
+    time_per_image = total_time / total_images
+    images_per_second = 1.0 / time_per_image
+    temp_w = os.path.join(MODELS_DIR, f"{model_name}_temp_for_size.weights.h5")
+    model.save_weights(temp_w)
+    size_mb = os.path.getsize(temp_w) / (1024 * 1024)
+    os.remove(temp_w)
+    cm = confusion_matrix(y_true, y_pred)
+    print("\nClassification Report:")
+    print(
+        classification_report(
+            y_true, y_pred, target_names=class_names, zero_division=0
+        )
+    )
+    print(f"Test Accuracy        : {accuracy:.4f}")
+    print(f"Weighted Precision   : {precision:.4f}")
+    print(f"Weighted Recall      : {recall:.4f}")
+    print(f"Weighted F1-score    : {f1:.4f}")
+    print(f"Top-5 Accuracy       : {top5_acc:.4f}")
+    print(f"Avg time per image   : {time_per_image*1000:.2f} ms")
+    print(f"Images per second    : {images_per_second:.2f}")
+    print(f"Model size (weights) : {size_mb:.2f} MB")
+    print(f"Num parameters       : {model.count_params()}")
+    save_dir = os.path.join(METRICS_DIR, model_name)
+    os.makedirs(save_dir, exist_ok=True)
+    metrics = {
+        "model_name": model_name,
+        "accuracy": accuracy,
+        "precision_weighted": float(precision),
+        "recall_weighted": float(recall),
+        "f1_weighted": float(f1),
+        "top5_accuracy": float(top5_acc),
+        "avg_inference_time_sec": float(time_per_image),
+        "images_per_second": float(images_per_second),
+        "model_size_mb": float(size_mb),
+        "num_parameters": int(model.count_params()),
+    }
+    metrics_path = os.path.join(save_dir, "metrics.json")
+    cm_path = os.path.join(save_dir, "confusion_matrix.npy")
+    with open(metrics_path, "w") as f:
+        json.dump(metrics, f, indent=2)
+    np.save(cm_path, cm)
+    print(f"\nSaved metrics to        : {metrics_path}")
+    print(f"Saved confusion matrix to: {cm_path}")
+    return metrics, cm
+effnet_metrics, effnet_cm = evaluate_and_save(
+    effnet_model,
+    model_name="efficientnetb0_stage2",
+    best_weights_path=effnet_stage2_best,
+    test_ds=test_ds,
+    class_names=class_names,
+)
+print("\n✅ EfficientNetB0 Model 4 pipeline complete.")
+print("✅ Use weights file in app:", effnet_stage2_best)

scripts/02_mobilenetv2.py ADDED Viewed

	@@ -0,0 +1,430 @@

+# ============================================================
+# SMARTVISION AI - MODEL 3 (v3): MobileNetV2 (FAST + ACCURATE)
+# with manual label smoothing + deeper fine-tuning
+# ============================================================
+import os
+import time
+import json
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers, regularizers
+from sklearn.metrics import (
+    precision_recall_fscore_support,
+    confusion_matrix,
+    classification_report,
+)
+print("TensorFlow version:", tf.__version__)
+# ------------------------------------------------------------
+# 1. CONFIGURATION
+# ------------------------------------------------------------
+BASE_DIR      = "smartvision_dataset"
+CLASS_DIR     = os.path.join(BASE_DIR, "classification")
+TRAIN_DIR     = os.path.join(CLASS_DIR, "train")
+VAL_DIR       = os.path.join(CLASS_DIR, "val")
+TEST_DIR      = os.path.join(CLASS_DIR, "test")
+IMG_SIZE      = (224, 224)
+BATCH_SIZE    = 32
+NUM_CLASSES   = 25
+MODELS_DIR    = "saved_models"
+METRICS_DIR   = "smartvision_metrics"
+os.makedirs(MODELS_DIR, exist_ok=True)
+os.makedirs(METRICS_DIR, exist_ok=True)
+print("Train dir:", TRAIN_DIR)
+print("Val dir  :", VAL_DIR)
+print("Test dir :", TEST_DIR)
+# ------------------------------------------------------------
+# 2. LOAD DATASETS (CROPPED SINGLE-OBJECT IMAGES)
+# ------------------------------------------------------------
+train_ds = tf.keras.utils.image_dataset_from_directory(
+    TRAIN_DIR,
+    image_size=IMG_SIZE,
+    batch_size=BATCH_SIZE,
+    shuffle=True,
+)
+val_ds = tf.keras.utils.image_dataset_from_directory(
+    VAL_DIR,
+    image_size=IMG_SIZE,
+    batch_size=BATCH_SIZE,
+    shuffle=False,
+)
+test_ds = tf.keras.utils.image_dataset_from_directory(
+    TEST_DIR,
+    image_size=IMG_SIZE,
+    batch_size=BATCH_SIZE,
+    shuffle=False,
+)
+class_names = train_ds.class_names
+print("Detected classes:", class_names)
+print("Number of classes:", len(class_names))
+AUTOTUNE = tf.data.AUTOTUNE
+train_ds = train_ds.prefetch(AUTOTUNE)
+val_ds   = val_ds.prefetch(AUTOTUNE)
+test_ds  = test_ds.prefetch(AUTOTUNE)
+# ------------------------------------------------------------
+# 3. DATA AUGMENTATION (STANDARD, TRAIN-ONLY)
+# ------------------------------------------------------------
+data_augmentation = keras.Sequential(
+    [
+        layers.RandomFlip("horizontal"),
+        layers.RandomRotation(0.04),                   # ~±15°
+        layers.RandomZoom(0.1),
+        layers.RandomContrast(0.15),
+        layers.Lambda(lambda x: tf.image.random_brightness(x, max_delta=0.15)),
+        layers.Lambda(lambda x: tf.image.random_saturation(x, 0.85, 1.15)),
+    ],
+    name="data_augmentation",
+)
+# ------------------------------------------------------------
+# 4. BUILD MobileNetV2 MODEL (2-STAGE TRAINING)
+# ------------------------------------------------------------
+def build_mobilenetv2_model_v2():
+    """
+    Returns:
+        model      : full MobileNetV2 classification model
+        base_model : the MobileNetV2 backbone (for freezing/unfreezing)
+    """
+    inputs = keras.Input(shape=(*IMG_SIZE, 3), name="input_layer")
+    # Apply augmentation only during training
+    x = data_augmentation(inputs)
+    # MobileNetV2 expects [-1, 1] normalized inputs via preprocess_input
+    x = layers.Lambda(
+        keras.applications.mobilenet_v2.preprocess_input,
+        name="mobilenetv2_preprocess",
+    )(x)
+    # Pretrained MobileNetV2 backbone
+    base_model = keras.applications.MobileNetV2(
+        include_top=False,
+        weights="imagenet",
+        input_shape=(*IMG_SIZE, 3),
+    )
+    # Run backbone
+    x = base_model(x)
+    # Global pooling + custom classification head
+    x = layers.GlobalAveragePooling2D(name="global_average_pooling2d")(x)
+    x = layers.BatchNormalization(name="head_batchnorm_1")(x)
+    x = layers.Dropout(0.4, name="head_dropout_1")(x)
+    x = layers.Dense(
+        256,
+        activation="relu",
+        kernel_regularizer=regularizers.l2(1e-4),
+        name="head_dense_1",
+    )(x)
+    x = layers.BatchNormalization(name="head_batchnorm_2")(x)
+    x = layers.Dropout(0.5, name="head_dropout_2")(x)
+    outputs = layers.Dense(
+        NUM_CLASSES, activation="softmax", name="predictions"
+    )(x)
+    model = keras.Model(
+        inputs=inputs,
+        outputs=outputs,
+        name="MobileNetV2_smartvision_v2",
+    )
+    return model, base_model
+mobilenet_model, base_model = build_mobilenetv2_model_v2()
+mobilenet_model.summary()
+# ------------------------------------------------------------
+# 5. MANUAL LABEL-SMOOTHED LOSS
+# ------------------------------------------------------------
+def make_sparse_ce_with_label_smoothing(num_classes, label_smoothing=0.05):
+    ls = float(label_smoothing)
+    nc = int(num_classes)
+    def loss_fn(y_true, y_pred):
+        # y_true: integer labels, shape (batch,)
+        y_true = tf.cast(y_true, tf.int32)
+        y_true_oh = tf.one_hot(y_true, depth=nc)
+        if ls > 0.0:
+            smooth = ls
+            y_true_oh = (1.0 - smooth) * y_true_oh + smooth / tf.cast(
+                nc, tf.float32
+            )
+        # y_pred is softmax probabilities
+        return tf.keras.losses.categorical_crossentropy(
+            y_true_oh, y_pred, from_logits=False
+        )
+    return loss_fn
+# ------------------------------------------------------------
+# 6. TRAINING UTILITY (SAVES WEIGHTS-ONLY .weights.h5)
+# ------------------------------------------------------------
+def compile_and_train(
+    model,
+    model_name,
+    train_ds,
+    val_ds,
+    epochs,
+    lr,
+    model_tag,
+    patience_es=5,
+    patience_rlr=2,
+):
+    """Compile and train model, saving the best weights by val_accuracy."""
+    print(f"\n===== TRAINING {model_name} ({model_tag}) =====")
+    optimizer = keras.optimizers.Adam(learning_rate=lr)
+    loss_fn = make_sparse_ce_with_label_smoothing(
+        num_classes=NUM_CLASSES,
+        label_smoothing=0.05,
+    )
+    model.compile(
+        optimizer=optimizer,
+        loss=loss_fn,
+        metrics=["accuracy"],
+    )
+    # Keras 3 requirement: weights-only must end with ".weights.h5"
+    best_weights_path = os.path.join(
+        MODELS_DIR, f"{model_name}_{model_tag}_best.weights.h5"
+    )
+    callbacks = [
+        keras.callbacks.ModelCheckpoint(
+            filepath=best_weights_path,
+            monitor="val_accuracy",
+            save_best_only=True,
+            save_weights_only=True,
+            mode="max",
+            verbose=1,
+        ),
+        keras.callbacks.EarlyStopping(
+            monitor="val_accuracy",
+            patience=patience_es,
+            restore_best_weights=True,
+            verbose=1,
+        ),
+        keras.callbacks.ReduceLROnPlateau(
+            monitor="val_loss",
+            factor=0.5,
+            patience=patience_rlr,
+            min_lr=1e-6,
+            verbose=1,
+        ),
+    ]
+    history = model.fit(
+        train_ds,
+        validation_data=val_ds,
+        epochs=epochs,
+        callbacks=callbacks,
+    )
+    return history, best_weights_path
+# ------------------------------------------------------------
+# 7. STAGE 1: TRAIN HEAD WITH FROZEN BASE
+# ------------------------------------------------------------
+print("\n===== STAGE 1: Training head with frozen MobileNetV2 base =====")
+for layer in base_model.layers:
+    layer.trainable = False
+epochs_stage1 = 12
+lr_stage1     = 1e-3
+history_stage1, mobilenet_stage1_best = compile_and_train(
+    mobilenet_model,
+    model_name="mobilenetv2_v2",
+    train_ds=train_ds,
+    val_ds=val_ds,
+    epochs=epochs_stage1,
+    lr=lr_stage1,
+    model_tag="stage1",
+    patience_es=4,
+    patience_rlr=2,
+)
+print("Stage 1 best weights saved at:", mobilenet_stage1_best)
+# ------------------------------------------------------------
+# 8. STAGE 2: DEEPER FINE-TUNE LAST LAYERS OF BASE MODEL
+# ------------------------------------------------------------
+print("\n===== STAGE 2: Fine-tuning last layers of MobileNetV2 base =====")
+mobilenet_model.load_weights(mobilenet_stage1_best)
+base_model.trainable = True
+num_unfreeze = 25
+print(f"Base model has {len(base_model.layers)} layers.")
+print(f"Unfrozen layers in base model: {num_unfreeze}")
+for layer in base_model.layers[:-num_unfreeze]:
+    layer.trainable = False
+for layer in base_model.layers[-num_unfreeze:]:
+    if isinstance(layer, layers.BatchNormalization):
+        layer.trainable = False
+epochs_stage2 = 25
+lr_stage2     = 3e-5
+history_stage2, mobilenet_stage2_best = compile_and_train(
+    mobilenet_model,
+    model_name="mobilenetv2_v2",
+    train_ds=train_ds,
+    val_ds=val_ds,
+    epochs=epochs_stage2,
+    lr=lr_stage2,
+    model_tag="stage2",
+    patience_es=8,
+    patience_rlr=3,
+)
+print("Stage 2 best weights saved at:", mobilenet_stage2_best)
+print("👉 Use this file in Streamlit app:", mobilenet_stage2_best)
+# ------------------------------------------------------------
+# 9. EVALUATION + SAVE METRICS & CONFUSION MATRIX
+# ------------------------------------------------------------
+def evaluate_and_save(model, model_name, best_weights_path, test_ds, class_names):
+    print(f"\n===== EVALUATING {model_name.upper()} ON TEST SET =====")
+    model.load_weights(best_weights_path)
+    print(f"Loaded best weights from {best_weights_path}")
+    y_true = []
+    y_pred = []
+    all_probs = []
+    total_time = 0.0
+    total_images = 0
+    for images, labels in test_ds:
+        images_np = images.numpy()
+        bs = images_np.shape[0]
+        start = time.perf_counter()
+        probs = model.predict(images_np, verbose=0)
+        end = time.perf_counter()
+        total_time += (end - start)
+        total_images += bs
+        preds = np.argmax(probs, axis=1)
+        y_true.extend(labels.numpy())
+        y_pred.extend(preds)
+        all_probs.append(probs)
+    y_true = np.array(y_true)
+    y_pred = np.array(y_pred)
+    all_probs = np.concatenate(all_probs, axis=0)
+    accuracy = float((y_true == y_pred).mean())
+    precision, recall, f1, _ = precision_recall_fscore_support(
+        y_true, y_pred, average="weighted", zero_division=0
+    )
+    top5_correct = 0
+    for i, label in enumerate(y_true):
+        if label in np.argsort(all_probs[i])[-5:]:
+            top5_correct += 1
+    top5_acc = top5_correct / len(y_true)
+    time_per_image = total_time / total_images
+    images_per_second = 1.0 / time_per_image
+    temp_w = os.path.join(MODELS_DIR, f"{model_name}_temp_for_size.weights.h5")
+    model.save_weights(temp_w)
+    size_mb = os.path.getsize(temp_w) / (1024 * 1024)
+    os.remove(temp_w)
+    cm = confusion_matrix(y_true, y_pred)
+    print("\nClassification Report:")
+    print(
+        classification_report(
+            y_true, y_pred, target_names=class_names, zero_division=0
+        )
+    )
+    print(f"Test Accuracy        : {accuracy:.4f}")
+    print(f"Weighted Precision   : {precision:.4f}")
+    print(f"Weighted Recall      : {recall:.4f}")
+    print(f"Weighted F1-score    : {f1:.4f}")
+    print(f"Top-5 Accuracy       : {top5_acc:.4f}")
+    print(f"Avg time per image   : {time_per_image*1000:.2f} ms")
+    print(f"Images per second    : {images_per_second:.2f}")
+    print(f"Model size (weights) : {size_mb:.2f} MB")
+    print(f"Num parameters       : {model.count_params()}")
+    save_dir = os.path.join(METRICS_DIR, model_name)
+    os.makedirs(save_dir, exist_ok=True)
+    metrics = {
+        "model_name": model_name,
+        "accuracy": accuracy,
+        "precision_weighted": float(precision),
+        "recall_weighted": float(recall),
+        "f1_weighted": float(f1),
+        "top5_accuracy": float(top5_acc),
+        "avg_inference_time_sec": float(time_per_image),
+        "images_per_second": float(images_per_second),
+        "model_size_mb": float(size_mb),
+        "num_parameters": int(model.count_params()),
+    }
+    metrics_path = os.path.join(save_dir, "metrics.json")
+    cm_path = os.path.join(save_dir, "confusion_matrix.npy")
+    with open(metrics_path, "w") as f:
+        json.dump(metrics, f, indent=2)
+    np.save(cm_path, cm)
+    print(f"\nSaved metrics to        : {metrics_path}")
+    print(f"Saved confusion matrix to: {cm_path}")
+    return metrics, cm
+mobilenet_metrics, mobilenet_cm = evaluate_and_save(
+    mobilenet_model,
+    model_name="mobilenetv2_v2_stage2",
+    best_weights_path=mobilenet_stage2_best,
+    test_ds=test_ds,
+    class_names=class_names,
+)
+print("\n✅ MobileNetV2 v3 (label-smoothed + deeper FT) pipeline complete.")
+print("✅ Use weights file in app:", mobilenet_stage2_best)

scripts/02_model_comparision.ipynb ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4852ae9f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

scripts/02_resnet50.py ADDED Viewed

	@@ -0,0 +1,482 @@

+# ============================================================
+# SMARTVISION AI - MODEL 2 (v2): ResNet50 (STRONG BASELINE)
+# with manual label smoothing (Keras 3 compatible)
+# ============================================================
+import os
+import time
+import json
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+from sklearn.metrics import (
+    precision_recall_fscore_support,
+    confusion_matrix,
+    classification_report,
+)
+print("TensorFlow version:", tf.__version__)
+# ------------------------------------------------------------
+# 1. CONFIGURATION
+# ------------------------------------------------------------
+BASE_DIR      = "smartvision_dataset"
+CLASS_DIR     = os.path.join(BASE_DIR, "classification")
+TRAIN_DIR     = os.path.join(CLASS_DIR, "train")
+VAL_DIR       = os.path.join(CLASS_DIR, "val")
+TEST_DIR      = os.path.join(CLASS_DIR, "test")
+IMG_SIZE      = (224, 224)
+BATCH_SIZE    = 32
+NUM_CLASSES   = 25
+MODELS_DIR    = "saved_models"
+METRICS_DIR   = "smartvision_metrics"
+os.makedirs(MODELS_DIR, exist_ok=True)
+os.makedirs(METRICS_DIR, exist_ok=True)
+print("Train dir:", TRAIN_DIR)
+print("Val dir  :", VAL_DIR)
+print("Test dir :", TEST_DIR)
+# ------------------------------------------------------------
+# 2. LOAD DATASETS
+# ------------------------------------------------------------
+train_ds = tf.keras.utils.image_dataset_from_directory(
+    TRAIN_DIR,
+    image_size=IMG_SIZE,
+    batch_size=BATCH_SIZE,
+    shuffle=True,
+)
+val_ds = tf.keras.utils.image_dataset_from_directory(
+    VAL_DIR,
+    image_size=IMG_SIZE,
+    batch_size=BATCH_SIZE,
+    shuffle=False,
+)
+test_ds = tf.keras.utils.image_dataset_from_directory(
+    TEST_DIR,
+    image_size=IMG_SIZE,
+    batch_size=BATCH_SIZE,
+    shuffle=False,
+)
+class_names = train_ds.class_names
+print("Detected classes:", class_names)
+print("Number of classes:", len(class_names))
+AUTOTUNE = tf.data.AUTOTUNE
+train_ds = train_ds.prefetch(AUTOTUNE)
+val_ds   = val_ds.prefetch(AUTOTUNE)
+test_ds  = test_ds.prefetch(AUTOTUNE)
+# ------------------------------------------------------------
+# 3. DATA AUGMENTATION
+# ------------------------------------------------------------
+data_augmentation = keras.Sequential(
+    [
+        layers.RandomFlip("horizontal"),
+        layers.RandomRotation(0.04),       # ~±15°
+        layers.RandomZoom(0.1),
+        layers.RandomContrast(0.15),
+        layers.Lambda(
+            lambda x: tf.image.random_brightness(x, max_delta=0.15)
+        ),
+        layers.Lambda(
+            lambda x: tf.image.random_saturation(x, 0.85, 1.15)
+        ),
+    ],
+    name="data_augmentation",
+)
+# NOTE: We will use ResNet50's preprocess_input, so we do NOT rescale 1./255 here.
+# ------------------------------------------------------------
+# 4. BUILD RESNET50 MODEL
+# ------------------------------------------------------------
+def build_resnet50_model_v2():
+    """
+    Returns:
+        model      : full ResNet50 classification model
+        base_model : the ResNet50 backbone for fine-tuning
+    """
+    inputs = keras.Input(shape=(*IMG_SIZE, 3), name="input_layer")
+    # Augmentation (train-time only)
+    x = data_augmentation(inputs)
+    # ResNet50-specific preprocessing
+    x = layers.Lambda(
+        keras.applications.resnet50.preprocess_input,
+        name="resnet50_preprocess",
+    )(x)
+    # Pretrained ResNet50 backbone
+    base_model = keras.applications.ResNet50(
+        include_top=False,
+        weights="imagenet",
+        input_shape=(*IMG_SIZE, 3),
+    )
+    x = base_model(x)
+    # Custom classification head
+    x = layers.GlobalAveragePooling2D(name="global_average_pooling2d")(x)
+    x = layers.BatchNormalization(name="head_batchnorm")(x)
+    x = layers.Dropout(0.4, name="head_dropout")(x)
+    x = layers.Dense(
+        256,
+        activation="relu",
+        name="head_dense",
+    )(x)
+    x = layers.BatchNormalization(name="head_batchnorm_2")(x)
+    x = layers.Dropout(0.5, name="head_dropout_2")(x)
+    outputs = layers.Dense(
+        NUM_CLASSES,
+        activation="softmax",
+        name="predictions",
+    )(x)
+    model = keras.Model(
+        inputs=inputs,
+        outputs=outputs,
+        name="ResNet50_smartvision_v2",
+    )
+    return model, base_model
+resnet_model, resnet_base = build_resnet50_model_v2()
+resnet_model.summary()
+# ------------------------------------------------------------
+# 5. CUSTOM LOSS WITH LABEL SMOOTHING
+# ------------------------------------------------------------
+def make_sparse_ce_with_label_smoothing(num_classes, label_smoothing=0.1):
+    """
+    Implements sparse categorical crossentropy with manual label smoothing.
+    Works even if Keras' SparseCategoricalCrossentropy doesn't have label_smoothing arg.
+    """
+    ls = float(label_smoothing)
+    nc = int(num_classes)
+    def loss_fn(y_true, y_pred):
+        # y_true: integer labels, shape (batch,)
+        y_true = tf.cast(y_true, tf.int32)
+        y_true_oh = tf.one_hot(y_true, depth=nc)
+        if ls > 0.0:
+            smooth = ls
+            y_true_oh = (1.0 - smooth) * y_true_oh + smooth / tf.cast(
+                nc, tf.float32
+            )
+        # y_pred is softmax probabilities
+        return tf.keras.losses.categorical_crossentropy(
+            y_true_oh, y_pred, from_logits=False
+        )
+    return loss_fn
+# ------------------------------------------------------------
+# 6. TRAINING UTILITY
+# ------------------------------------------------------------
+def compile_and_train(
+    model,
+    model_name: str,
+    train_ds,
+    val_ds,
+    epochs: int,
+    lr: float,
+    model_tag: str,
+    patience_es: int = 5,
+    patience_rlr: int = 2,
+):
+    """
+    Compile and train model, saving best weights by val_accuracy.
+    model_name: e.g. 'resnet50_v2'
+    model_tag : e.g. 'stage1', 'stage2'
+    """
+    print(f"\n===== {model_tag}: Training {model_name} =====")
+    optimizer = keras.optimizers.Adam(learning_rate=lr)
+    # Use custom loss with label smoothing
+    loss_fn = make_sparse_ce_with_label_smoothing(
+        num_classes=NUM_CLASSES,
+        label_smoothing=0.1,
+    )
+    model.compile(
+        optimizer=optimizer,
+        loss=loss_fn,
+        metrics=["accuracy"],
+    )
+    # Keras 3: when save_weights_only=True, must end with ".weights.h5"
+    best_weights_path = os.path.join(
+        MODELS_DIR, f"{model_name}_{model_tag}_best.weights.h5"
+    )
+    callbacks = [
+        keras.callbacks.ModelCheckpoint(
+            filepath=best_weights_path,
+            monitor="val_accuracy",
+            save_best_only=True,
+            save_weights_only=True,   # ✅ weights-only: avoids architecture issues
+            mode="max",
+            verbose=1,
+        ),
+        keras.callbacks.EarlyStopping(
+            monitor="val_accuracy",
+            patience=patience_es,
+            restore_best_weights=True,
+            verbose=1,
+        ),
+        keras.callbacks.ReduceLROnPlateau(
+            monitor="val_loss",
+            factor=0.5,
+            patience=patience_rlr,
+            min_lr=1e-6,
+            verbose=1,
+        ),
+    ]
+    history = model.fit(
+        train_ds,
+        validation_data=val_ds,
+        epochs=epochs,
+        callbacks=callbacks,
+    )
+    return history, best_weights_path
+# ------------------------------------------------------------
+# 7. STAGE 1: TRAIN HEAD WITH FROZEN RESNET BASE
+# ------------------------------------------------------------
+print("\n===== STAGE 1: Training head with frozen ResNet50 base =====")
+# Freeze entire backbone for Stage 1
+resnet_base.trainable = False
+epochs_stage1 = 15
+lr_stage1     = 1e-3
+history_stage1, resnet_stage1_best = compile_and_train(
+    resnet_model,
+    model_name="resnet50_v2",
+    train_ds=train_ds,
+    val_ds=val_ds,
+    epochs=epochs_stage1,
+    lr=lr_stage1,
+    model_tag="stage1",
+    patience_es=5,
+    patience_rlr=2,
+)
+print("Stage 1 best weights saved at:", resnet_stage1_best)
+# ------------------------------------------------------------
+# 8. STAGE 2: DEEPER FINE-TUNING OF RESNET BASE
+# ------------------------------------------------------------
+print("\n===== STAGE 2: Fine-tuning last layers of ResNet50 base =====")
+# Load Stage 1 best weights before fine-tuning
+resnet_model.load_weights(resnet_stage1_best)
+# Enable deeper fine-tuning on the backbone
+resnet_base.trainable = True
+print("Base model name:", resnet_base.name)
+print("Base model has", len(resnet_base.layers), "layers.")
+# Unfreeze last N layers of the backbone
+num_unfreeze = 40    # you can tune 30–50
+for layer in resnet_base.layers[:-num_unfreeze]:
+    layer.trainable = False
+# Keep BatchNorm layers frozen for stability
+for layer in resnet_base.layers[-num_unfreeze:]:
+    if isinstance(layer, layers.BatchNormalization):
+        layer.trainable = False
+trainable_count = int(np.sum([l.trainable for l in resnet_model.layers]))
+print("Total trainable layers in full model after unfreezing:", trainable_count)
+epochs_stage2 = 30
+lr_stage2     = 5e-6   # small LR for safe fine-tuning
+history_stage2, resnet_stage2_best = compile_and_train(
+    resnet_model,
+    model_name="resnet50_v2",
+    train_ds=train_ds,
+    val_ds=val_ds,
+    epochs=epochs_stage2,
+    lr=lr_stage2,
+    model_tag="stage2",
+    patience_es=8,
+    patience_rlr=3,
+)
+print("Stage 2 best weights saved at:", resnet_stage2_best)
+# ------------------------------------------------------------
+# 9. EVALUATION + SAVE METRICS & CONFUSION MATRIX
+# ------------------------------------------------------------
+def evaluate_and_save(model, save_name, best_weights_path, test_ds, class_names):
+    """
+    save_name: e.g. 'resnet50_v2_stage1', 'resnet50_v2_stage2'
+    """
+    print(f"\n===== EVALUATING {save_name.upper()} ON TEST SET =====")
+    # Load best weights
+    model.load_weights(best_weights_path)
+    print(f"Loaded best weights from {best_weights_path}")
+    y_true = []
+    y_pred = []
+    all_probs = []
+    total_time = 0.0
+    total_images = 0
+    for images, labels in test_ds:
+        images_np = images.numpy()
+        bs = images_np.shape[0]
+        start = time.perf_counter()
+        probs = model.predict(images_np, verbose=0)
+        end = time.perf_counter()
+        total_time += (end - start)
+        total_images += bs
+        preds = np.argmax(probs, axis=1)
+        y_true.extend(labels.numpy())
+        y_pred.extend(preds)
+        all_probs.append(probs)
+    y_true = np.array(y_true)
+    y_pred = np.array(y_pred)
+    all_probs = np.concatenate(all_probs, axis=0)
+    # Basic metrics
+    accuracy = float((y_true == y_pred).mean())
+    precision, recall, f1, _ = precision_recall_fscore_support(
+        y_true, y_pred, average="weighted", zero_division=0
+    )
+    # Top-5 accuracy
+    top5_correct = 0
+    for i, label in enumerate(y_true):
+        if label in np.argsort(all_probs[i])[-5:]:
+            top5_correct += 1
+    top5_acc = top5_correct / len(y_true)
+    # Inference time
+    time_per_image = total_time / total_images
+    images_per_second = 1.0 / time_per_image if time_per_image > 0 else 0.0
+    # Model size (weights only)
+    temp_w = os.path.join(MODELS_DIR, f"{save_name}_temp_for_size.weights.h5")
+    model.save_weights(temp_w)
+    size_mb = os.path.getsize(temp_w) / (1024 * 1024)
+    os.remove(temp_w)
+    # Confusion matrix
+    cm = confusion_matrix(y_true, y_pred)
+    print("\nClassification Report:")
+    print(
+        classification_report(
+            y_true,
+            y_pred,
+            target_names=class_names,
+            zero_division=0,
+        )
+    )
+    print(f"Test Accuracy        : {accuracy:.4f}")
+    print(f"Weighted Precision   : {precision:.4f}")
+    print(f"Weighted Recall      : {recall:.4f}")
+    print(f"Weighted F1-score    : {f1:.4f}")
+    print(f"Top-5 Accuracy       : {top5_acc:.4f}")
+    print(f"Avg time per image   : {time_per_image*1000:.2f} ms")
+    print(f"Images per second    : {images_per_second:.2f}")
+    print(f"Model size (weights) : {size_mb:.2f} MB")
+    print(f"Num parameters       : {model.count_params()}")
+    # Save metrics + confusion matrix
+    save_dir = os.path.join(METRICS_DIR, save_name)
+    os.makedirs(save_dir, exist_ok=True)
+    metrics = {
+        "model_name": save_name,
+        "accuracy": accuracy,
+        "precision_weighted": float(precision),
+        "recall_weighted": float(recall),
+        "f1_weighted": float(f1),
+        "top5_accuracy": float(top5_acc),
+        "avg_inference_time_sec": float(time_per_image),
+        "images_per_second": float(images_per_second),
+        "model_size_mb": float(size_mb),
+        "num_parameters": int(model.count_params()),
+    }
+    metrics_path = os.path.join(save_dir, "metrics.json")
+    cm_path = os.path.join(save_dir, "confusion_matrix.npy")
+    with open(metrics_path, "w") as f:
+        json.dump(metrics, f, indent=2)
+    np.save(cm_path, cm)
+    print(f"\nSaved metrics to        : {metrics_path}")
+    print(f"Saved confusion matrix to: {cm_path}")
+    return metrics, cm
+# ---- Evaluate Stage 1 ----
+resnet_stage1_metrics, resnet_stage1_cm = evaluate_and_save(
+    resnet_model,
+    save_name="resnet50_v2_stage1",
+    best_weights_path=resnet_stage1_best,
+    test_ds=test_ds,
+    class_names=class_names,
+)
+# ---- Evaluate Stage 2 ----
+resnet_stage2_metrics, resnet_stage2_cm = evaluate_and_save(
+    resnet_model,
+    save_name="resnet50_v2_stage2",
+    best_weights_path=resnet_stage2_best,
+    test_ds=test_ds,
+    class_names=class_names,
+)
+# ------------------------------------------------------------
+# 10. SUMMARY
+# ------------------------------------------------------------
+print("\n===== SUMMARY: RESNET50 v2 STAGES COMPARISON =====")
+print("Stage 1 Test Accuracy:", resnet_stage1_metrics["accuracy"])
+print("Stage 2 Test Accuracy:", resnet_stage2_metrics["accuracy"])
+print("✅ RESNET50 v2 pipeline complete.")

scripts/02_vgg16.py ADDED Viewed

	@@ -0,0 +1,422 @@

+# ============================================================
+# SMARTVISION AI - MODEL 1 (v2): VGG16 (TRANSFER LEARNING + FT)
+# with proper preprocess_input + label smoothing + deeper FT
+# ============================================================
+import os
+import time
+import json
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+from sklearn.metrics import (
+    precision_recall_fscore_support,
+    confusion_matrix,
+    classification_report,
+)
+from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
+print("TensorFlow version:", tf.__version__)
+# ------------------------------------------------------------
+# 1. CONFIGURATION
+# ------------------------------------------------------------
+BASE_DIR      = "smartvision_dataset"  # your dataset root
+CLASS_DIR     = os.path.join(BASE_DIR, "classification")
+TRAIN_DIR     = os.path.join(CLASS_DIR, "train")
+VAL_DIR       = os.path.join(CLASS_DIR, "val")
+TEST_DIR      = os.path.join(CLASS_DIR, "test")
+IMG_SIZE      = (224, 224)
+BATCH_SIZE    = 32
+NUM_CLASSES   = 25
+MODELS_DIR    = "saved_models"
+METRICS_DIR   = "smartvision_metrics"
+os.makedirs(MODELS_DIR, exist_ok=True)
+os.makedirs(METRICS_DIR, exist_ok=True)
+print("Train dir:", TRAIN_DIR)
+print("Val dir  :", VAL_DIR)
+print("Test dir :", TEST_DIR)
+# ------------------------------------------------------------
+# 2. LOAD DATASETS (FROM CROPPED SINGLE-OBJECT IMAGES)
+# ------------------------------------------------------------
+train_ds = tf.keras.utils.image_dataset_from_directory(
+    TRAIN_DIR,
+    image_size=IMG_SIZE,
+    batch_size=BATCH_SIZE,
+    shuffle=True,
+)
+val_ds = tf.keras.utils.image_dataset_from_directory(
+    VAL_DIR,
+    image_size=IMG_SIZE,
+    batch_size=BATCH_SIZE,
+    shuffle=False,
+)
+test_ds = tf.keras.utils.image_dataset_from_directory(
+    TEST_DIR,
+    image_size=IMG_SIZE,
+    batch_size=BATCH_SIZE,
+    shuffle=False,
+)
+class_names = train_ds.class_names
+print("Detected classes:", class_names)
+print("Number of classes:", len(class_names))
+AUTOTUNE = tf.data.AUTOTUNE
+train_ds = train_ds.prefetch(AUTOTUNE)
+val_ds   = val_ds.prefetch(AUTOTUNE)
+test_ds  = test_ds.prefetch(AUTOTUNE)
+# ------------------------------------------------------------
+# 3. DATA AUGMENTATION (APPLIED ONLY DURING TRAINING)
+# ------------------------------------------------------------
+data_augmentation = keras.Sequential(
+    [
+        layers.RandomFlip("horizontal"),                 # random horizontal flips
+        layers.RandomRotation(0.04),                     # ≈ ±15 degrees
+        layers.RandomZoom(0.1),                          # random zoom
+        layers.RandomContrast(0.2),                      # ±20% contrast
+        layers.Lambda(lambda x: tf.image.random_brightness(x, max_delta=0.2)),
+        layers.Lambda(lambda x: tf.image.random_saturation(x, 0.8, 1.2)),
+    ],
+    name="data_augmentation",
+)
+# NOTE:
+# We DO NOT use Rescaling(1./255) here.
+# Instead, we use VGG16's preprocess_input which subtracts ImageNet means
+# and expects BGR ordering. This matches the pretrained weights.
+# ------------------------------------------------------------
+# 4. BUILD VGG16 MODEL (FROZEN BASE + CUSTOM HEAD)
+# ------------------------------------------------------------
+def build_vgg16_model_v2():
+    inputs = keras.Input(shape=(*IMG_SIZE, 3), name="input_layer")
+    # 1. Augmentation (only active during training)
+    x = data_augmentation(inputs)
+    # 2. VGG16-specific preprocessing
+    x = layers.Lambda(
+        lambda z: preprocess_input(tf.cast(z, tf.float32)),
+        name="vgg16_preprocess"
+    )(x)
+    # 3. Pre-trained VGG16 backbone (no top classification head)
+    base_model = VGG16(
+        include_top=False,
+        weights="imagenet",
+        input_tensor=x,
+    )
+    # Freeze backbone initially (Stage 1)
+    base_model.trainable = False
+    # 4. Custom classification head for 25 classes
+    x = layers.GlobalAveragePooling2D(name="global_average_pooling2d")(base_model.output)
+    x = layers.Dense(256, activation="relu", name="dense_256")(x)
+    x = layers.Dropout(0.5, name="dropout_0_5")(x)
+    outputs = layers.Dense(NUM_CLASSES, activation="softmax", name="predictions")(x)
+    model = keras.Model(inputs=inputs, outputs=outputs, name="VGG16_smartvision_v2")
+    return model
+vgg16_model = build_vgg16_model_v2()
+vgg16_model.summary()
+# ------------------------------------------------------------
+# 5. CUSTOM LOSS WITH LABEL SMOOTHING
+# ------------------------------------------------------------
+def make_sparse_ce_with_label_smoothing(num_classes, label_smoothing=0.05):
+    """
+    Implements sparse categorical crossentropy with manual label smoothing.
+    Works even if your Keras version doesn't support `label_smoothing` in
+    SparseCategoricalCrossentropy.__init__.
+    """
+    ls = float(label_smoothing)
+    nc = int(num_classes)
+    def loss_fn(y_true, y_pred):
+        # y_true: integer labels, shape (batch,)
+        y_true = tf.cast(y_true, tf.int32)
+        y_true_oh = tf.one_hot(y_true, depth=nc)
+        if ls > 0.0:
+            smooth = ls
+            y_true_oh = (1.0 - smooth) * y_true_oh + smooth / tf.cast(nc, tf.float32)
+        # y_pred is softmax probabilities
+        return tf.keras.losses.categorical_crossentropy(
+            y_true_oh, y_pred, from_logits=False
+        )
+    return loss_fn
+# ------------------------------------------------------------
+# 6. TRAINING UTILITY (COMMON FOR STAGE 1 & 2)
+# ------------------------------------------------------------
+def compile_and_train(
+    model,
+    model_name,
+    train_ds,
+    val_ds,
+    epochs,
+    lr,
+    model_tag,
+    patience_es=5,
+    patience_rlr=2,
+):
+    """
+    Compile and train model, saving the best weights by val_accuracy.
+    model_name: base name ("vgg16_v2")
+    model_tag : "stage1" or "stage2" etc.
+    """
+    print(f"\n===== TRAINING {model_name} ({model_tag}) =====")
+    optimizer = keras.optimizers.Adam(learning_rate=lr)
+    # Use our custom loss with label smoothing
+    loss_fn = make_sparse_ce_with_label_smoothing(
+        num_classes=NUM_CLASSES,
+        label_smoothing=0.05,
+    )
+    model.compile(
+        optimizer=optimizer,
+        loss=loss_fn,
+        metrics=["accuracy"],
+    )
+    best_weights_path = os.path.join(MODELS_DIR, f"{model_name}_{model_tag}_best.h5")
+    callbacks = [
+        keras.callbacks.ModelCheckpoint(
+            filepath=best_weights_path,
+            monitor="val_accuracy",
+            save_best_only=True,
+            mode="max",
+            verbose=1,
+        ),
+        keras.callbacks.EarlyStopping(
+            monitor="val_accuracy",
+            patience=patience_es,
+            restore_best_weights=True,
+            verbose=1,
+        ),
+        keras.callbacks.ReduceLROnPlateau(
+            monitor="val_loss",
+            factor=0.5,
+            patience=patience_rlr,
+            min_lr=1e-6,
+            verbose=1,
+        ),
+    ]
+    history = model.fit(
+        train_ds,
+        validation_data=val_ds,
+        epochs=epochs,
+        callbacks=callbacks,
+    )
+    return history, best_weights_path
+# ------------------------------------------------------------
+# 7. STAGE 1: TRAIN HEAD WITH FROZEN VGG16 BASE
+# ------------------------------------------------------------
+print("\n===== STAGE 1: Training head with frozen VGG16 base =====")
+# Safety: ensure all VGG16 conv blocks are frozen
+for layer in vgg16_model.layers:
+    if layer.name.startswith("block"):
+        layer.trainable = False
+epochs_stage1 = 20
+lr_stage1     = 1e-4
+history_stage1, vgg16_stage1_best = compile_and_train(
+    vgg16_model,
+    model_name="vgg16_v2",
+    train_ds=train_ds,
+    val_ds=val_ds,
+    epochs=epochs_stage1,
+    lr=lr_stage1,
+    model_tag="stage1",
+    patience_es=5,
+    patience_rlr=2,
+)
+print("Stage 1 best weights saved at:", vgg16_stage1_best)
+# ------------------------------------------------------------
+# 8. STAGE 2: FINE-TUNE BLOCK4 + BLOCK5 OF VGG16
+# ------------------------------------------------------------
+print("\n===== STAGE 2: Fine-tuning VGG16 block4 + block5 =====")
+# Load best Stage 1 weights before fine-tuning
+vgg16_model.load_weights(vgg16_stage1_best)
+# Unfreeze only block4_* and block5_* layers for controlled fine-tuning
+for layer in vgg16_model.layers:
+    if layer.name.startswith("block5") :
+        layer.trainable = True      # fine-tune top two blocks
+    elif layer.name.startswith("block"):
+        layer.trainable = False     # keep lower blocks frozen (block1–3)
+# Head layers (GAP + Dense + Dropout + output) remain trainable
+epochs_stage2 = 15
+lr_stage2     = 1e-5   # slightly higher than 1e-5 but still safe for FT
+history_stage2, vgg16_stage2_best = compile_and_train(
+    vgg16_model,
+    model_name="vgg16_v2",
+    train_ds=train_ds,
+    val_ds=val_ds,
+    epochs=epochs_stage2,
+    lr=lr_stage2,
+    model_tag="stage2",
+    patience_es=6,
+    patience_rlr=3,
+)
+print("Stage 2 best weights saved at:", vgg16_stage2_best)
+# ------------------------------------------------------------
+# 9. EVALUATION + SAVE METRICS & CONFUSION MATRIX
+# ------------------------------------------------------------
+def evaluate_and_save(model, model_name, best_weights_path, test_ds, class_names):
+    print(f"\n===== EVALUATING {model_name.upper()} ON TEST SET =====")
+    # Load best weights
+    model.load_weights(best_weights_path)
+    print(f"Loaded best weights from {best_weights_path}")
+    y_true = []
+    y_pred = []
+    all_probs = []
+    total_time = 0.0
+    total_images = 0
+    # Predict over test dataset
+    for images, labels in test_ds:
+        images_np = images.numpy()
+        bs = images_np.shape[0]
+        start = time.perf_counter()
+        probs = model.predict(images_np, verbose=0)
+        end = time.perf_counter()
+        total_time += (end - start)
+        total_images += bs
+        preds = np.argmax(probs, axis=1)
+        y_true.extend(labels.numpy())
+        y_pred.extend(preds)
+        all_probs.append(probs)
+    y_true = np.array(y_true)
+    y_pred = np.array(y_pred)
+    all_probs = np.concatenate(all_probs, axis=0)
+    # Basic metrics
+    accuracy = float((y_true == y_pred).mean())
+    precision, recall, f1, _ = precision_recall_fscore_support(
+        y_true, y_pred, average="weighted", zero_division=0
+    )
+    # Top-5 accuracy
+    top5_correct = 0
+    for i, label in enumerate(y_true):
+        if label in np.argsort(all_probs[i])[-5:]:
+            top5_correct += 1
+    top5_acc = top5_correct / len(y_true)
+    # Inference time
+    time_per_image = total_time / total_images
+    images_per_second = 1.0 / time_per_image
+    # Model size (weights only)
+    temp_w = os.path.join(MODELS_DIR, f"{model_name}_temp_for_size.weights.h5")
+    model.save_weights(temp_w)
+    size_mb = os.path.getsize(temp_w) / (1024 * 1024)
+    os.remove(temp_w)
+    # Confusion matrix
+    cm = confusion_matrix(y_true, y_pred)
+    print("\nClassification Report:")
+    print(classification_report(y_true, y_pred, target_names=class_names, zero_division=0))
+    print(f"Test Accuracy        : {accuracy:.4f}")
+    print(f"Weighted Precision   : {precision:.4f}")
+    print(f"Weighted Recall      : {recall:.4f}")
+    print(f"Weighted F1-score    : {f1:.4f}")
+    print(f"Top-5 Accuracy       : {top5_acc:.4f}")
+    print(f"Avg time per image   : {time_per_image*1000:.2f} ms")
+    print(f"Images per second    : {images_per_second:.2f}")
+    print(f"Model size (weights) : {size_mb:.2f} MB")
+    print(f"Num parameters       : {model.count_params()}")
+    # Save metrics + confusion matrix in dedicated folder
+    save_dir = os.path.join(METRICS_DIR, model_name)
+    os.makedirs(save_dir, exist_ok=True)
+    metrics = {
+        "model_name": model_name,
+        "accuracy": accuracy,
+        "precision_weighted": float(precision),
+        "recall_weighted": float(recall),
+        "f1_weighted": float(f1),
+        "top5_accuracy": float(top5_acc),
+        "avg_inference_time_sec": float(time_per_image),
+        "images_per_second": float(images_per_second),
+        "model_size_mb": float(size_mb),
+        "num_parameters": int(model.count_params()),
+    }
+    metrics_path = os.path.join(save_dir, "metrics.json")
+    cm_path = os.path.join(save_dir, "confusion_matrix.npy")
+    with open(metrics_path, "w") as f:
+        json.dump(metrics, f, indent=2)
+    np.save(cm_path, cm)
+    print(f"\nSaved metrics to        : {metrics_path}")
+    print(f"Saved confusion matrix to: {cm_path}")
+    return metrics, cm
+# Evaluate FINAL (fine-tuned) model on test set
+vgg16_metrics, vgg16_cm = evaluate_and_save(
+    vgg16_model,
+    model_name="vgg16_v2_stage2",
+    best_weights_path=vgg16_stage2_best,
+    test_ds=test_ds,
+    class_names=class_names,
+)
+print("\n✅ VGG16 v2 (2-stage, improved) pipeline complete.")

scripts/03_eval_yolo.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# ============================================================
+# SMARTVISION AI - YOLOv8 EVALUATION SCRIPT
+# - Loads best.pt from training
+# - Computes mAP, per-class metrics
+# - Measures inference speed (FPS)
+# - Saves sample prediction images
+# - Saves metrics to JSON for reporting
+# ============================================================
+import os
+import glob
+import time
+import json
+from ultralytics import YOLO
+# ------------------------------------------------------------
+# 1. PATHS
+# ------------------------------------------------------------
+BASE_DIR  = "smartvision_dataset"
+DET_DIR   = os.path.join(BASE_DIR, "detection")
+DATA_YAML = os.path.join(DET_DIR, "data.yaml")
+# Folder created by your train_yolo.py script
+RUN_DIR       = "yolo_runs/smartvision_yolov8s"
+BEST_WEIGHTS  = os.path.join(RUN_DIR, "weights", "best.pt")
+# NOTE: all detection images are in detection/images (no "val" subfolder)
+VAL_IMAGES_DIR = os.path.join(DET_DIR, "images")
+print("📂 DATA_YAML   :", DATA_YAML)
+print("📦 BEST_WEIGHTS:", BEST_WEIGHTS)
+print("📁 VAL_IMAGES  :", VAL_IMAGES_DIR)
+# ------------------------------------------------------------
+# 2. LOAD TRAINED MODEL
+# ------------------------------------------------------------
+model = YOLO(BEST_WEIGHTS)
+print("\n✅ Loaded trained YOLOv8 model from best.pt")
+# ------------------------------------------------------------
+# 3. VALIDATION METRICS (mAP, precision, recall)
+# ------------------------------------------------------------
+print("\n===== RUNNING VALIDATION (YOLO model.val) =====")
+metrics = model.val(
+    data=DATA_YAML,
+    split="val",     # uses val split defined in data.yaml (here both train/val point to 'images')
+    imgsz=640,
+    save_json=False
+)
+print("\n===== YOLOv8 Validation Metrics =====")
+print(f"mAP@0.5      : {metrics.box.map50:.4f}")
+print(f"mAP@0.5:0.95 : {metrics.box.map:.4f}")
+# metrics.box.maps is a list of per-class mAP values in the same order as names
+print("\nPer-class mAP@0.5 (first 10 classes):")
+for i, m in enumerate(metrics.box.maps[:10]):
+    print(f"  Class {i}: {m:.4f}")
+# ------------------------------------------------------------
+# 4. INFERENCE SPEED (FPS) ON VALIDATION IMAGES
+# ------------------------------------------------------------
+print("\n===== MEASURING INFERENCE SPEED (FPS) =====")
+# Collect all JPG images in detection/images
+val_images = glob.glob(os.path.join(VAL_IMAGES_DIR, "*.jpg"))
+val_images = sorted(val_images)
+num_test_images = min(50, len(val_images))  # test on up to 50 images
+test_images = val_images[:num_test_images]
+print(f"Found {len(val_images)} images in {VAL_IMAGES_DIR}")
+print(f"Using {len(test_images)} images for speed test.")
+# Defaults in case there are no images
+time_per_image = 0.0
+fps = 0.0
+if len(test_images) == 0:
+    print("⚠️ No images found for FPS test. Skipping speed measurement.")
+else:
+    start = time.perf_counter()
+    _ = model.predict(
+        source=test_images,
+        imgsz=640,
+        conf=0.5,
+        verbose=False
+    )
+    end = time.perf_counter()
+    total_time = end - start
+    time_per_image = total_time / len(test_images)
+    fps = 1.0 / time_per_image
+    print(f"Total time       : {total_time:.2f} sec for {len(test_images)} images")
+    print(f"Avg time / image : {time_per_image*1000:.2f} ms")
+    print(f"Approx FPS       : {fps:.2f} images/sec")
+# ------------------------------------------------------------
+# 5. SAVE SAMPLE PREDICTIONS (BOXES + LABELS)
+# ------------------------------------------------------------
+print("\n===== SAVING SAMPLE PREDICTION IMAGES =====")
+sample_out_project = "yolo_vis"
+sample_out_name    = "samples"
+if len(test_images) == 0:
+    print("⚠️ No images available for sample visualization. Skipping sample predictions.")
+else:
+    sample_results = model.predict(
+        source=test_images[:8],  # first 8 images
+        imgsz=640,
+        conf=0.5,
+        save=True,               # save annotated images
+        project=sample_out_project,
+        name=sample_out_name,
+        verbose=False
+    )
+    print(f"✅ Saved sample predictions (with boxes & labels) to: {sample_out_project}/{sample_out_name}/")
+# ------------------------------------------------------------
+# 6. SAVE METRICS TO JSON (FOR REPORTING)
+# ------------------------------------------------------------
+print("\n===== SAVING METRICS TO JSON =====")
+yolo_metrics = {
+    "model_name": "yolov8s_smartvision",
+    "map_50": float(metrics.box.map50),
+    "map_50_95": float(metrics.box.map),
+    "num_val_images_for_speed_test": int(len(test_images)),
+    "avg_inference_time_sec": float(time_per_image),
+    "fps": float(fps),
+}
+os.makedirs("yolo_metrics", exist_ok=True)
+metrics_json_path = os.path.join("yolo_metrics", "yolov8s_metrics.json")
+with open(metrics_json_path, "w") as f:
+    json.dump(yolo_metrics, f, indent=2)
+print(f"✅ Saved YOLO metrics JSON to: {metrics_json_path}")
+print("\n🎯 YOLOv8 evaluation complete.")

scripts/03_train_yolo.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# ============================================================
+# SMARTVISION AI - YOLOv8 TRAINING SCRIPT
+# - Fine-tunes yolov8s on 25-class SmartVision detection dataset
+# ============================================================
+import os
+import torch
+from ultralytics import YOLO
+# ------------------------------------------------------------
+# 1. PATHS & CONFIG
+# ------------------------------------------------------------
+BASE_DIR  = "smartvision_dataset"
+DET_DIR   = os.path.join(BASE_DIR, "detection")
+DATA_YAML = os.path.join(DET_DIR, "data.yaml")
+# YOLO model size:
+#   - yolov8n.pt : nano
+#   - yolov8s.pt : small (good tradeoff) ✅
+MODEL_WEIGHTS = "yolov8s.pt"
+# Auto-select device
+device = "0" if torch.cuda.is_available() else "cpu"
+print("🚀 Using device:", device)
+print("📂 DATA_YAML:", DATA_YAML)
+# ------------------------------------------------------------
+# 2. LOAD BASE MODEL
+# ------------------------------------------------------------
+print(f"📥 Loading YOLOv8 model from: {MODEL_WEIGHTS}")
+model = YOLO(MODEL_WEIGHTS)
+# ------------------------------------------------------------
+# 3. TRAIN
+# ------------------------------------------------------------
+results = model.train(
+    data=DATA_YAML,
+    epochs=50,
+    imgsz=640,
+    batch=8,                # smaller for CPU
+    lr0=0.01,
+    optimizer="SGD",
+    device=device,
+    project="yolo_runs",
+    name="smartvision_yolov8s",
+    pretrained=True,
+    plots=True,
+    verbose=True,
+)
+print("\n✅ YOLO training complete.")
+print("📁 Run directory: yolo_runs/smartvision_yolov8s/")
+print("📦 Best weights:  yolo_runs/smartvision_yolov8s/weights/best.pt")

scripts/03_yolo_dataset_creation.py ADDED Viewed

	@@ -0,0 +1,248 @@

+# ============================================================
+# SMARTVISION DATASET BUILDER – FIXED VERSION
+# - Streams COCO
+# - Selects 25 classes
+# - Builds train/val/test for YOLO
+# - Uses correct image width/height for normalization
+# ============================================================
+import os
+import json
+import random
+from tqdm import tqdm
+from datasets import load_dataset
+from PIL import Image
+# ------------------------------------------------------------
+# CONFIG
+# ------------------------------------------------------------
+BASE_DIR = "smartvision_dataset"
+IMAGES_PER_CLASS = 100        # you can increase if needed
+TARGET_CLASSES = [
+    "person", "bicycle", "car", "motorcycle", "airplane", "bus",
+    "truck", "traffic light", "stop sign", "bench", "bird", "cat",
+    "dog", "horse", "cow", "elephant", "bottle", "cup", "bowl",
+    "pizza", "cake", "chair", "couch", "bed", "potted plant"
+]
+# COCO full classes (80)
+COCO_CLASSES = [
+    "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck",
+    "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
+    "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
+    "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
+    "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
+    "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
+    "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+    "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
+    "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
+    "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+    "hair drier", "toothbrush",
+]
+COCO_NAME_TO_INDEX = {name: i for i, name in enumerate(COCO_CLASSES)}
+SELECTED = {name: COCO_NAME_TO_INDEX[name] for name in TARGET_CLASSES}
+os.makedirs(BASE_DIR, exist_ok=True)
+# ------------------------------------------------------------
+# STEP 1 — STREAM COCO & COLLECT IMAGES
+# ------------------------------------------------------------
+print("📥 Loading COCO dataset (streaming mode)...")
+dataset = load_dataset("detection-datasets/coco", split="train", streaming=True)
+class_images = {c: [] for c in TARGET_CLASSES}
+class_count = {c: 0 for c in TARGET_CLASSES}
+print("🔍 Collecting images...")
+max_iterations = 100000  # safety cap
+for idx, item in enumerate(dataset):
+    if idx >= max_iterations:
+        print(f"⚠️ Reached safety limit of {max_iterations} samples, stopping collection.")
+        break
+    ann = item["objects"]
+    # Get image and its size (this is the reference for bbox coordinates)
+    img = item["image"]
+    orig_width, orig_height = img.size
+    for cat_id in ann["category"]:
+        # If this category is one of our target classes
+        for cname, coco_id in SELECTED.items():
+            if cat_id == coco_id and class_count[cname] < IMAGES_PER_CLASS:
+                class_images[cname].append({
+                    "image": img,                    # PIL image
+                    "orig_width": orig_width,        # width used for normalization
+                    "orig_height": orig_height,      # height used for normalization
+                    "bboxes": ann["bbox"],           # list of bboxes
+                    "cats": ann["category"],         # list of categories
+                })
+                class_count[cname] += 1
+                break
+    # Stop if all collected
+    if all(count >= IMAGES_PER_CLASS for count in class_count.values()):
+        break
+print("🎉 Collection complete")
+print("📊 Images per class:")
+for cname, cnt in class_count.items():
+    print(f"  {cname:15s}: {cnt}")
+# ------------------------------------------------------------
+# STEP 2 — CREATE FOLDERS
+# ------------------------------------------------------------
+DET_IMG_ROOT = os.path.join(BASE_DIR, "detection", "images")
+DET_LAB_ROOT = os.path.join(BASE_DIR, "detection", "labels")
+for split in ["train", "val", "test"]:
+    os.makedirs(os.path.join(DET_IMG_ROOT, split), exist_ok=True)
+    os.makedirs(os.path.join(DET_LAB_ROOT, split), exist_ok=True)
+# ------------------------------------------------------------
+# STEP 3 — TRAIN/VAL/TEST SPLIT
+# ------------------------------------------------------------
+train_data = {}
+val_data = {}
+test_data = {}
+for cname, items in class_images.items():
+    random.shuffle(items)
+    n = len(items)
+    if n == 0:
+        print(f"⚠️ No images collected for class: {cname}")
+        continue
+    t1 = int(0.7 * n)
+    t2 = int(0.85 * n)
+    train_data[cname] = items[:t1]
+    val_data[cname] = items[t1:t2]
+    test_data[cname] = items[t2:]
+split_dict = {
+    "train": train_data,
+    "val": val_data,
+    "test": test_data,
+}
+print("\n📊 Split sizes (per class):")
+for cname in TARGET_CLASSES:
+    tr = len(train_data.get(cname, []))
+    va = len(val_data.get(cname, []))
+    te = len(test_data.get(cname, []))
+    print(f"  {cname:15s} -> Train={tr:3d}, Val={va:3d}, Test={te:3d}")
+# ------------------------------------------------------------
+# STEP 4 — SAVE DETECTION IMAGES & LABELS (FIXED NORMALIZATION)
+# ------------------------------------------------------------
+print("\n📁 Saving detection images + labels with correct coordinates...\n")
+YOLO_NAME_TO_ID = {name: i for i, name in enumerate(TARGET_CLASSES)}
+global_idx = 0
+stats = {"train": 0, "val": 0, "test": 0}
+label_stats = {"train": 0, "val": 0, "test": 0}
+object_stats = {"train": 0, "val": 0, "test": 0}
+for split, cls_dict in split_dict.items():
+    print(f"\n🔹 Processing {split.upper()} ...")
+    for cname, items in tqdm(cls_dict.items(), desc=f"{split} classes"):
+        for item in items:
+            img = item["image"]
+            orig_w = item["orig_width"]
+            orig_h = item["orig_height"]
+            img_filename = f"image_{global_idx:06d}.jpg"
+            img_path = os.path.join(DET_IMG_ROOT, split, img_filename)
+            lab_path = os.path.join(DET_LAB_ROOT, split, img_filename.replace(".jpg", ".txt"))
+            img.save(img_path, quality=95)
+            stats[split] += 1
+            bboxes = item["bboxes"]
+            cats = item["cats"]
+            yolo_lines = []
+            obj_count = 0
+            for bbox, cat in zip(bboxes, cats):
+                # Only use 25 SmartVision classes
+                coco_class_name = COCO_CLASSES[cat]
+                if coco_class_name not in YOLO_NAME_TO_ID:
+                    continue
+                yolo_id = YOLO_NAME_TO_ID[coco_class_name]
+                x, y, w, h = bbox  # COCO: pixel values
+                # Normalize using image size
+                x_center = (x + w / 2) / orig_w
+                y_center = (y + h / 2) / orig_h
+                w_norm = w / orig_w
+                h_norm = h / orig_h
+                # discard invalid
+                if not (0 <= x_center <= 1 and 0 <= y_center <= 1):
+                    continue
+                if not (0 < w_norm <= 1 and 0 < h_norm <= 1):
+                    continue
+                yolo_lines.append(
+                    f"{yolo_id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}"
+                )
+                obj_count += 1
+            if yolo_lines:
+                with open(lab_path, "w") as f:
+                    f.write("\n".join(yolo_lines))
+                label_stats[split] += 1
+                object_stats[split] += obj_count
+            global_idx += 1
+print("\n🎉 All detection data saved successfully!")
+for split in ["train", "val", "test"]:
+    print(
+        f"  {split.upper():5s} -> "
+        f"images: {stats[split]:4d}, "
+        f"label_files: {label_stats[split]:4d}, "
+        f"objects: {object_stats[split]:5d}"
+    )
+# ------------------------------------------------------------
+# STEP 5 — WRITE data.yaml
+# ------------------------------------------------------------
+print("\n📝 Writing data.yaml ...")
+yaml = f"""
+# SmartVision Dataset - YOLOv8 Configuration (with splits)
+path: {os.path.abspath(os.path.join(BASE_DIR, "detection"))}
+train: images/train
+val: images/val
+test: images/test
+nc: {len(TARGET_CLASSES)}
+names:
+""" + "\n".join([f"  {i}: {name}" for i, name in enumerate(TARGET_CLASSES)])
+data_yaml_path = os.path.join(BASE_DIR, "detection", "data.yaml")
+os.makedirs(os.path.dirname(data_yaml_path), exist_ok=True)
+with open(data_yaml_path, "w") as f:
+    f.write(yaml)
+print(f"✅ Created data.yaml at: {data_yaml_path}")

scripts/04_inference_pipeline.py ADDED Viewed

	@@ -0,0 +1,436 @@

+# ============================================================
+# SMARTVISION AI - PHASE 4
+# Model Integration & Inference Pipeline (YOLOv8 + ResNet50 v2)
+# ============================================================
+import os
+import time
+from typing import List, Dict, Any
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+from ultralytics import YOLO
+print("TensorFlow version:", tf.__version__)
+# ------------------------------------------------------------
+# 1. CONFIGURATION
+# ------------------------------------------------------------
+# Dataset & models
+BASE_DIR = "smartvision_dataset"
+CLASS_DIR = os.path.join(BASE_DIR, "classification")
+TRAIN_DIR = os.path.join(CLASS_DIR, "train")
+# YOLO & classifier weights
+YOLO_WEIGHTS = "yolo_runs/smartvision_yolov8s6 - Copy/weights/best.pt"  # adjust if needed
+CLASSIFIER_WEIGHTS_PATH = os.path.join(
+    "saved_models", "resnet50_v2_stage2_best.weights.h5"
+)
+IMG_SIZE = (224, 224)
+NUM_CLASSES = 25
+# Where to save annotated outputs
+OUTPUT_DIR = "inference_outputs"
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+# ------------------------------------------------------------
+# 2. CLASS NAMES (MUST MATCH TRAINING ORDER)
+#    From your training logs:
+#    ['airplane', 'bed', 'bench', 'bicycle', 'bird', 'bottle', 'bowl',
+#     'bus', 'cake', 'car', 'cat', 'chair', 'couch', 'cow', 'cup', 'dog',
+#     'elephant', 'horse', 'motorcycle', 'person', 'pizza', 'potted plant',
+#     'stop sign', 'traffic light', 'truck']
+# ------------------------------------------------------------
+CLASS_NAMES = [
+    "airplane", "bed", "bench", "bicycle", "bird", "bottle", "bowl",
+    "bus", "cake", "car", "cat", "chair", "couch", "cow", "cup", "dog",
+    "elephant", "horse", "motorcycle", "person", "pizza", "potted plant",
+    "stop sign", "traffic light", "truck"
+]
+assert len(CLASS_NAMES) == NUM_CLASSES, "CLASS_NAMES length must be 25"
+# ------------------------------------------------------------
+# 3. DATA AUGMENTATION (same as training, but no effect in inference)
+# ------------------------------------------------------------
+data_augmentation = keras.Sequential(
+    [
+        layers.RandomFlip("horizontal"),
+        layers.RandomRotation(0.04),       # ~±15°
+        layers.RandomZoom(0.1),
+        layers.RandomContrast(0.15),
+        layers.Lambda(
+            lambda x: tf.image.random_brightness(x, max_delta=0.15)
+        ),
+        layers.Lambda(
+            lambda x: tf.image.random_saturation(x, 0.85, 1.15)
+        ),
+    ],
+    name="data_augmentation",
+)
+# ------------------------------------------------------------
+# 4. BUILD RESNET50 v2 CLASSIFIER (MATCHES TRAINING ARCHITECTURE)
+# ------------------------------------------------------------
+def build_resnet50_model_v2():
+    """
+    Build the ResNet50 v2 classifier with the SAME architecture as in training.
+    (data_augmentation + Lambda(resnet50.preprocess_input) + ResNet50 backbone + head)
+    """
+    inputs = keras.Input(shape=(*IMG_SIZE, 3), name="input_layer")
+    # Augmentation (no randomness in inference mode, Keras handles that)
+    x = data_augmentation(inputs)
+    # ResNet50-specific preprocessing
+    x = layers.Lambda(
+        keras.applications.resnet50.preprocess_input,
+        name="resnet50_preprocess",
+    )(x)
+    # Pretrained ResNet50 backbone
+    base_model = keras.applications.ResNet50(
+        include_top=False,
+        weights="imagenet",
+        input_shape=(*IMG_SIZE, 3),
+    )
+    x = base_model(x)
+    # Custom classification head (same as training file)
+    x = layers.GlobalAveragePooling2D(name="global_average_pooling2d")(x)
+    x = layers.BatchNormalization(name="head_batchnorm")(x)
+    x = layers.Dropout(0.4, name="head_dropout")(x)
+    x = layers.Dense(
+        256,
+        activation="relu",
+        name="head_dense",
+    )(x)
+    x = layers.BatchNormalization(name="head_batchnorm_2")(x)
+    x = layers.Dropout(0.5, name="head_dropout_2")(x)
+    outputs = layers.Dense(
+        NUM_CLASSES,
+        activation="softmax",
+        name="predictions",
+    )(x)
+    model = keras.Model(
+        inputs=inputs,
+        outputs=outputs,
+        name="ResNet50_smartvision_v2_infer",
+    )
+    return model, base_model
+def load_classifier(weights_path: str):
+    """
+    Build the ResNet50 v2 model and load fine-tuned weights from
+    resnet50_v2_stage2_best.weights.h5
+    """
+    if not os.path.exists(weights_path):
+        print(f"⚠️ Classifier weights not found at: {weights_path}")
+        print("   Using ImageNet-pretrained ResNet50 base + randomly initialized head.")
+        model, _ = build_resnet50_model_v2()
+        return model
+    model, _ = build_resnet50_model_v2()
+    model.load_weights(weights_path)
+    print(f"✅ Loaded classifier weights from: {weights_path}")
+    return model
+# ------------------------------------------------------------
+# 5. LOAD YOLO MODEL
+# ------------------------------------------------------------
+def load_yolo_model(weights_path: str = YOLO_WEIGHTS) -> YOLO:
+    if not os.path.exists(weights_path):
+        raise FileNotFoundError(f"YOLO weights not found at: {weights_path}")
+    model = YOLO(weights_path)
+    print(f"✅ Loaded YOLOv8 model from: {weights_path}")
+    return model
+# ------------------------------------------------------------
+# 6. HELPER: PREPROCESS CROP FOR CLASSIFIER
+# ------------------------------------------------------------
+def preprocess_crop_for_classifier(crop_img: Image.Image,
+                                   img_size=IMG_SIZE) -> np.ndarray:
+    """
+    Resize PIL image crop to 224x224 and prepare as batch tensor.
+    NOTE: No manual rescaling here; model already has preprocess_input inside.
+    """
+    crop_resized = crop_img.resize(img_size, Image.BILINEAR)
+    arr = np.array(crop_resized, dtype=np.float32)  # shape (H,W,3)
+    arr = np.expand_dims(arr, axis=0)  # (1,H,W,3)
+    return arr
+# ------------------------------------------------------------
+# 7. DRAWING UTIL: BOUNDING BOXES + LABELS (Pillow 10+ SAFE)
+# ------------------------------------------------------------
+def draw_boxes_with_labels(
+    pil_img: Image.Image,
+    detections: List[Dict[str, Any]],
+    font_path: str = None
+) -> Image.Image:
+    """
+    Draw bounding boxes & labels on an image.
+    detections: list of dicts with keys:
+      - x1, y1, x2, y2
+      - label (str)
+      - conf_yolo (float)
+      - cls_label (optional, str)
+      - cls_conf (optional, float)
+    """
+    draw = ImageDraw.Draw(pil_img)
+    # Try to load a TTF font, fallback to default
+    if font_path and os.path.exists(font_path):
+        font = ImageFont.truetype(font_path, 16)
+    else:
+        font = ImageFont.load_default()
+    for det in detections:
+        x1, y1, x2, y2 = det["x1"], det["y1"], det["x2"], det["y2"]
+        yolo_label = det["label"]
+        conf_yolo = det["conf_yolo"]
+        cls_label = det.get("cls_label")
+        cls_conf = det.get("cls_conf")
+        # Text to display
+        if cls_label is not None:
+            text = f"{yolo_label} {conf_yolo:.2f} | CLS: {cls_label} {cls_conf:.2f}"
+        else:
+            text = f"{yolo_label} {conf_yolo:.2f}"
+        # Box
+        draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
+        # Compute text size safely (Pillow 10+)
+        bbox = draw.textbbox((0, 0), text, font=font)
+        text_w = bbox[2] - bbox[0]
+        text_h = bbox[3] - bbox[1]
+        # Text background (clamp to top of image)
+        text_bg = [x1,
+                   max(0, y1 - text_h - 2),
+                   x1 + text_w + 4,
+                   y1]
+        draw.rectangle(text_bg, fill="black")
+        draw.text((x1 + 2, max(0, y1 - text_h - 1)), text, fill="white", font=font)
+    return pil_img
+# ------------------------------------------------------------
+# 8. SINGLE-IMAGE PIPELINE
+#    user_image → YOLO → (optional ResNet verify) → annotated image
+# ------------------------------------------------------------
+def run_inference_on_image(
+    image_path: str,
+    yolo_model: YOLO,
+    classifier: keras.Model = None,
+    conf_threshold: float = 0.5,
+    save_name: str = None
+) -> Dict[str, Any]:
+    """
+    Full pipeline on a single image.
+    - Runs YOLO detection (with NMS internally).
+    - Filters by conf_threshold.
+    - Optionally runs ResNet50 classifier on each crop.
+    - Draws bounding boxes + labels.
+    - Saves annotated image to OUTPUT_DIR.
+    """
+    if not os.path.exists(image_path):
+        raise FileNotFoundError(f"Image not found: {image_path}")
+    print(f"\n🔍 Processing image: {image_path}")
+    pil_img = Image.open(image_path).convert("RGB")
+    orig_w, orig_h = pil_img.size
+    # YOLO prediction (NMS is automatically applied)
+    t0 = time.perf_counter()
+    results = yolo_model.predict(
+        source=image_path,
+        imgsz=640,
+        conf=conf_threshold,
+        device="cpu",     # change to "0" if you have a GPU
+        verbose=False
+    )
+    t1 = time.perf_counter()
+    infer_time = t1 - t0
+    print(f"YOLO inference time: {infer_time*1000:.2f} ms")
+    res = results[0]  # one image
+    boxes = res.boxes  # Boxes object
+    detections = []
+    for box in boxes:
+        # xyxy coordinates
+        x1, y1, x2, y2 = box.xyxy[0].tolist()
+        cls_id = int(box.cls[0].item())
+        conf_yolo = float(box.conf[0].item())
+        label = yolo_model.names[cls_id]  # class name from YOLO model
+        # Clip coords to image size, just in case
+        x1 = max(0, min(x1, orig_w - 1))
+        y1 = max(0, min(y1, orig_h - 1))
+        x2 = max(0, min(x2, orig_w - 1))
+        y2 = max(0, min(y2, orig_h - 1))
+        # Optional classification verification
+        cls_label = None
+        cls_conf = None
+        if classifier is not None:
+            crop = pil_img.crop((x1, y1, x2, y2))
+            arr = preprocess_crop_for_classifier(crop)
+            probs = classifier.predict(arr, verbose=0)[0]  # shape (25,)
+            cls_idx = int(np.argmax(probs))
+            cls_label = CLASS_NAMES[cls_idx]
+            cls_conf = float(probs[cls_idx])
+        detection_info = {
+            "x1": x1,
+            "y1": y1,
+            "x2": x2,
+            "y2": y2,
+            "class_id_yolo": cls_id,
+            "label": label,
+            "conf_yolo": conf_yolo,
+            "cls_label": cls_label,
+            "cls_conf": cls_conf,
+        }
+        detections.append(detection_info)
+    # Draw boxes
+    annotated = pil_img.copy()
+    annotated = draw_boxes_with_labels(annotated, detections)
+    # Save output image
+    if save_name is None:
+        base = os.path.basename(image_path)
+        name_wo_ext, _ = os.path.splitext(base)
+        save_name = f"{name_wo_ext}_annotated.jpg"
+    save_path = os.path.join(OUTPUT_DIR, save_name)
+    annotated.save(save_path)
+    print(f"✅ Saved annotated image to: {save_path}")
+    return {
+        "image_path": image_path,
+        "output_path": save_path,
+        "num_detections": len(detections),
+        "detections": detections,
+        "yolo_inference_time_sec": infer_time,
+    }
+# ------------------------------------------------------------
+# 9. BATCH PIPELINE (MULTIPLE IMAGES)
+# ------------------------------------------------------------
+def run_inference_on_folder(
+    folder_path: str,
+    yolo_model: YOLO,
+    classifier: keras.Model = None,
+    conf_threshold: float = 0.5,
+    max_images: int = None
+) -> List[Dict[str, Any]]:
+    """
+    Run the full pipeline on all images in a folder.
+    """
+    supported_ext = (".jpg", ".jpeg", ".png")
+    image_files = [
+        os.path.join(folder_path, f)
+        for f in os.listdir(folder_path)
+        if f.lower().endswith(supported_ext)
+    ]
+    image_files.sort()
+    if max_images is not None:
+        image_files = image_files[:max_images]
+    results_all = []
+    for img_path in image_files:
+        res = run_inference_on_image(
+            img_path,
+            yolo_model=yolo_model,
+            classifier=classifier,
+            conf_threshold=conf_threshold
+        )
+        results_all.append(res)
+    return results_all
+# ------------------------------------------------------------
+# 10. SIMPLE QUANTIZATION (CLASSIFIER → TFLITE FLOAT16)
+# ------------------------------------------------------------
+def export_classifier_tflite_float16(
+    keras_model: keras.Model,
+    export_path: str = "resnet50_smartvision_float16.tflite"
+):
+    """
+    Export the classifier to a TFLite model with float16 quantization.
+    This is suitable for faster inference on CPU / mobile.
+    """
+    converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+    converter.target_spec.supported_types = [tf.float16]
+    tflite_model = converter.convert()
+    with open(export_path, "wb") as f:
+        f.write(tflite_model)
+    size_mb = os.path.getsize(export_path) / (1024 * 1024)
+    print(f"✅ Exported float16 TFLite model to: {export_path} ({size_mb:.2f} MB)")
+# ------------------------------------------------------------
+# 11. MAIN (for quick testing)
+# ------------------------------------------------------------
+if __name__ == "__main__":
+    print("🔧 Loading models...")
+    yolo_model = load_yolo_model(YOLO_WEIGHTS)
+    classifier_model = load_classifier(CLASSIFIER_WEIGHTS_PATH)
+    # Example: run on a single test image
+    test_image = os.path.join(BASE_DIR, "detection", "images", "test", "image_002126.jpg")
+    if os.path.exists(test_image):
+        _ = run_inference_on_image(
+            image_path=test_image,
+            yolo_model=yolo_model,
+            classifier=classifier_model,
+            conf_threshold=0.5,
+        )
+    else:
+        print(f"⚠️ Example image not found: {test_image}")
+    # Example: run on a folder of images
+    # folder = os.path.join(BASE_DIR, "detection", "images")
+    # _ = run_inference_on_folder(
+    #     folder_path=folder,
+    #     yolo_model=yolo_model,
+    #     classifier=classifier_model,
+    #     conf_threshold=0.5,
+    #     max_images=10,
+    # )
+    # Example: export quantized classifier
+    # export_classifier_tflite_float16(classifier_model)

scripts/04_validation and cleaning.py ADDED Viewed

	@@ -0,0 +1,310 @@

+"""
+YOLO Dataset Validation & Cleaning Script
+==========================================
+This script will:
+1. Validate all YOLO label files
+2. Fix out-of-bounds coordinates (clip to [0,1])
+3. Remove invalid/empty annotations
+4. Generate a detailed report
+5. Create backups before making changes
+6. Clear corrupted cache files
+"""
+import os
+import glob
+import shutil
+import json
+from datetime import datetime
+from pathlib import Path
+class YOLODatasetCleaner:
+    def __init__(self, dataset_dir):
+        self.dataset_dir = dataset_dir
+        self.detection_dir = os.path.join(dataset_dir, "detection")
+        self.labels_dir = os.path.join(self.detection_dir, "labels")
+        self.images_dir = os.path.join(self.detection_dir, "images")
+        self.backup_dir = os.path.join(dataset_dir, f"backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
+        self.stats = {
+            'total_files': 0,
+            'corrupt_files': 0,
+            'fixed_files': 0,
+            'removed_files': 0,
+            'empty_files': 0,
+            'splits': {'train': {}, 'val': {}, 'test': {}}
+        }
+    def create_backup(self):
+        """Create backup of labels directory"""
+        print("\n" + "="*60)
+        print("📦 CREATING BACKUP")
+        print("="*60)
+        if os.path.exists(self.backup_dir):
+            print(f"⚠️  Backup directory already exists: {self.backup_dir}")
+            return False
+        try:
+            shutil.copytree(self.labels_dir, os.path.join(self.backup_dir, "labels"))
+            print(f"✅ Backup created at: {self.backup_dir}")
+            return True
+        except Exception as e:
+            print(f"❌ Backup failed: {e}")
+            return False
+    def validate_label_line(self, line):
+        """Validate a single label line and return fixed version if needed"""
+        parts = line.strip().split()
+        # Need at least 5 values: class_id x_center y_center width height
+        if len(parts) < 5:
+            return None, "insufficient_values"
+        try:
+            class_id = int(parts[0])
+            coords = [float(x) for x in parts[1:5]]
+            # Check if coordinates are out of bounds
+            issues = []
+            if any(c < 0 for c in coords):
+                issues.append("negative_coords")
+            if any(c > 1 for c in coords):
+                issues.append("out_of_bounds")
+            # Check for invalid dimensions (width/height must be > 0)
+            if coords[2] <= 0 or coords[3] <= 0:
+                issues.append("invalid_dimensions")
+            # Clip coordinates to [0, 1]
+            fixed_coords = [max(0.0, min(1.0, c)) for c in coords]
+            # Keep width and height positive
+            if fixed_coords[2] <= 0:
+                fixed_coords[2] = 0.01
+            if fixed_coords[3] <= 0:
+                fixed_coords[3] = 0.01
+            fixed_line = f"{class_id} {' '.join(f'{c:.6f}' for c in fixed_coords)}\n"
+            return fixed_line, issues if issues else None
+        except (ValueError, IndexError) as e:
+            return None, f"parse_error: {e}"
+    def clean_label_file(self, label_path):
+        """Clean a single label file"""
+        try:
+            with open(label_path, 'r') as f:
+                lines = f.readlines()
+            if not lines:
+                return {'status': 'empty', 'issues': ['empty_file']}
+            fixed_lines = []
+            all_issues = []
+            for line_num, line in enumerate(lines, 1):
+                if not line.strip():
+                    continue
+                fixed_line, issues = self.validate_label_line(line)
+                if fixed_line is None:
+                    all_issues.append(f"line_{line_num}: {issues}")
+                else:
+                    fixed_lines.append(fixed_line)
+                    if issues:
+                        all_issues.extend([f"line_{line_num}: {issue}" for issue in issues])
+            if not fixed_lines:
+                return {'status': 'all_invalid', 'issues': all_issues}
+            # Write back fixed labels
+            with open(label_path, 'w') as f:
+                f.writelines(fixed_lines)
+            if all_issues:
+                return {'status': 'fixed', 'issues': all_issues, 'lines_kept': len(fixed_lines)}
+            else:
+                return {'status': 'valid', 'issues': [], 'lines_kept': len(fixed_lines)}
+        except Exception as e:
+            return {'status': 'error', 'issues': [str(e)]}
+    def process_split(self, split_name):
+        """Process all label files in a split (train/val/test)"""
+        print(f"\n📂 Processing {split_name.upper()} split...")
+        label_path = os.path.join(self.labels_dir, split_name)
+        image_path = os.path.join(self.images_dir, split_name)
+        if not os.path.exists(label_path):
+            print(f"⚠️  Labels directory not found: {label_path}")
+            return
+        label_files = glob.glob(os.path.join(label_path, "*.txt"))
+        split_stats = {
+            'total': len(label_files),
+            'valid': 0,
+            'fixed': 0,
+            'empty': 0,
+            'removed': 0,
+            'corrupt_files': []
+        }
+        for label_file in label_files:
+            self.stats['total_files'] += 1
+            result = self.clean_label_file(label_file)
+            if result['status'] == 'valid':
+                split_stats['valid'] += 1
+            elif result['status'] == 'fixed':
+                split_stats['fixed'] += 1
+                self.stats['fixed_files'] += 1
+                split_stats['corrupt_files'].append({
+                    'file': os.path.basename(label_file),
+                    'issues': result['issues']
+                })
+            elif result['status'] in ['empty', 'all_invalid']:
+                split_stats['empty'] += 1
+                self.stats['empty_files'] += 1
+                split_stats['corrupt_files'].append({
+                    'file': os.path.basename(label_file),
+                    'issues': result['issues']
+                })
+                # Remove empty/invalid label files and corresponding images
+                img_file = label_file.replace(label_path, image_path).replace('.txt', '.jpg')
+                try:
+                    os.remove(label_file)
+                    if os.path.exists(img_file):
+                        os.remove(img_file)
+                    split_stats['removed'] += 1
+                    self.stats['removed_files'] += 1
+                    print(f"  🗑️  Removed: {os.path.basename(label_file)}")
+                except Exception as e:
+                    print(f"  ❌ Could not remove {os.path.basename(label_file)}: {e}")
+        self.stats['splits'][split_name] = split_stats
+        print(f"  ✅ Valid: {split_stats['valid']}")
+        print(f"  🔧 Fixed: {split_stats['fixed']}")
+        print(f"  🗑️  Removed: {split_stats['removed']}")
+    def clear_cache_files(self):
+        """Remove YOLO cache files"""
+        print("\n" + "="*60)
+        print("🧹 CLEARING CACHE FILES")
+        print("="*60)
+        cache_files = glob.glob(os.path.join(self.labels_dir, "**/*.cache"), recursive=True)
+        for cache_file in cache_files:
+            try:
+                os.remove(cache_file)
+                print(f"  ✅ Removed: {cache_file}")
+            except Exception as e:
+                print(f"  ❌ Could not remove {cache_file}: {e}")
+        print(f"✅ Removed {len(cache_files)} cache files")
+    def generate_report(self):
+        """Generate detailed cleaning report"""
+        print("\n" + "="*60)
+        print("📊 CLEANING REPORT")
+        print("="*60)
+        print(f"\n📈 Overall Statistics:")
+        print(f"  Total files processed: {self.stats['total_files']}")
+        print(f"  Files fixed: {self.stats['fixed_files']}")
+        print(f"  Files removed: {self.stats['removed_files']}")
+        print(f"  Empty files: {self.stats['empty_files']}")
+        print(f"\n📊 Per-Split Statistics:")
+        for split, data in self.stats['splits'].items():
+            if data:
+                print(f"\n  {split.upper()}:")
+                print(f"    Total: {data['total']}")
+                print(f"    Valid: {data['valid']}")
+                print(f"    Fixed: {data['fixed']}")
+                print(f"    Removed: {data['removed']}")
+        # Save detailed report to JSON
+        report_path = os.path.join(self.dataset_dir, f"cleaning_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
+        with open(report_path, 'w') as f:
+            json.dump(self.stats, f, indent=2)
+        print(f"\n💾 Detailed report saved to: {report_path}")
+    def verify_dataset(self):
+        """Verify dataset after cleaning"""
+        print("\n" + "="*60)
+        print("✅ VERIFICATION")
+        print("="*60)
+        for split in ['train', 'val', 'test']:
+            label_path = os.path.join(self.labels_dir, split)
+            image_path = os.path.join(self.images_dir, split)
+            label_files = glob.glob(os.path.join(label_path, "*.txt"))
+            image_files = glob.glob(os.path.join(image_path, "*.jpg"))
+            print(f"\n{split.upper()}:")
+            print(f"  Images: {len(image_files)}")
+            print(f"  Labels: {len(label_files)}")
+            if len(image_files) != len(label_files):
+                print(f"  ⚠️  WARNING: Image/Label count mismatch!")
+    def run(self):
+        """Run the complete cleaning pipeline"""
+        print("\n" + "="*60)
+        print("🚀 YOLO DATASET CLEANER")
+        print("="*60)
+        print(f"Dataset directory: {self.dataset_dir}")
+        # Step 1: Create backup
+        if not self.create_backup():
+            response = input("\n⚠️  Proceed without backup? (yes/no): ")
+            if response.lower() != 'yes':
+                print("❌ Cleaning cancelled.")
+                return
+        # Step 2: Process each split
+        print("\n" + "="*60)
+        print("🔧 CLEANING LABELS")
+        print("="*60)
+        for split in ['train', 'val', 'test']:
+            self.process_split(split)
+        # Step 3: Clear cache
+        self.clear_cache_files()
+        # Step 4: Generate report
+        self.generate_report()
+        # Step 5: Verify
+        self.verify_dataset()
+        print("\n" + "="*60)
+        print("✅ CLEANING COMPLETE!")
+        print("="*60)
+        print("\n🎯 Next Steps:")
+        print("  1. Review the cleaning report")
+        print("  2. Delete old training runs: rm -rf yolo_runs/smartvision_yolov8s*")
+        print("  3. Retrain your model: python scripts/train_yolo_smartvision.py")
+        print(f"\n💾 Backup location: {self.backup_dir}")
+        print("   (You can restore from backup if needed)")
+if __name__ == "__main__":
+    # Configuration
+    DATASET_DIR = "smartvision_dataset"
+    # Run the cleaner
+    cleaner = YOLODatasetCleaner(DATASET_DIR)
+    cleaner.run()

scripts/check.py ADDED Viewed

	@@ -0,0 +1,239 @@

+# ============================================================
+# SMARTVISION AI - YOLOv8 TRAIN + EVAL SCRIPT
+# - Uses separate train / val / test splits
+# - QUICK_TEST flag lets you sanity-check the whole pipeline
+#   with just 1 epoch before doing full training
+# ============================================================
+import os
+import glob
+import time
+import json
+import torch
+from ultralytics import YOLO
+# ------------------------------------------------------------
+# 0. CONFIG: QUICK TEST OR FULL TRAINING?
+# ------------------------------------------------------------
+# First run with QUICK_TEST = True (1 epoch, debug run).
+# If everything runs end-to-end without errors, set it to False.
+QUICK_TEST = True   # <<< CHANGE TO False FOR FULL TRAINING
+FULL_EPOCHS = 50
+DEBUG_EPOCHS = 1
+EPOCHS = DEBUG_EPOCHS if QUICK_TEST else FULL_EPOCHS
+RUN_NAME = "smartvision_yolov8s_debug" if QUICK_TEST else "smartvision_yolov8s"
+print("⚙️  QUICK_TEST :", QUICK_TEST)
+print("⚙️  EPOCHS     :", EPOCHS)
+print("⚙️  RUN_NAME   :", RUN_NAME)
+# ------------------------------------------------------------
+# 1. PATHS & CONFIG
+# ------------------------------------------------------------
+BASE_DIR  = "smartvision_dataset"
+DET_DIR   = os.path.join(BASE_DIR, "detection")
+DATA_YAML = os.path.join(DET_DIR, "data.yaml")
+# Expected folder structure:
+# smartvision_dataset/detection/
+#   data.yaml
+#   images/train, images/val, images/test
+#   labels/train, labels/val, labels/test
+RUN_PROJECT   = "yolo_runs"
+MODEL_WEIGHTS = "yolov8s.pt"   # base checkpoint to fine-tune
+VAL_IMAGES_DIR = os.path.join(DET_DIR, "images", "val")
+# Auto-select device
+device = "0" if torch.cuda.is_available() else "cpu"
+print("🚀 Using device:", device)
+print("📂 DATA_YAML   :", DATA_YAML)
+# Basic path checks (fail fast if something is wrong)
+if not os.path.exists(DATA_YAML):
+    raise FileNotFoundError(f"data.yaml not found at: {DATA_YAML}")
+for split in ["train", "val", "test"]:
+    img_dir = os.path.join(DET_DIR, "images", split)
+    lab_dir = os.path.join(DET_DIR, "labels", split)
+    if not os.path.isdir(img_dir):
+        raise FileNotFoundError(f"Images directory missing: {img_dir}")
+    if not os.path.isdir(lab_dir):
+        raise FileNotFoundError(f"Labels directory missing: {lab_dir}")
+    if len(glob.glob(os.path.join(img_dir, "*.jpg"))) == 0:
+        print(f"⚠️ Warning: No .jpg images found in {img_dir}")
+# ------------------------------------------------------------
+# 2. LOAD BASE MODEL
+# ------------------------------------------------------------
+print(f"\n📥 Loading YOLOv8 base model from: {MODEL_WEIGHTS}")
+model = YOLO(MODEL_WEIGHTS)
+# ------------------------------------------------------------
+# 3. TRAIN
+# ------------------------------------------------------------
+print("\n===== STARTING TRAINING =====")
+print("(This is a QUICK TEST run)" if QUICK_TEST else "(Full training run)")
+results = model.train(
+    data=DATA_YAML,
+    epochs=EPOCHS,
+    imgsz=640,
+    batch=8,                # adjust if more GPU RAM
+    lr0=0.01,
+    optimizer="SGD",
+    device=device,
+    project=RUN_PROJECT,
+    name=RUN_NAME,
+    pretrained=True,
+    plots=True,
+    verbose=True,
+)
+print("\n✅ YOLO training complete.")
+RUN_DIR      = os.path.join(RUN_PROJECT, RUN_NAME)
+BEST_WEIGHTS = os.path.join(RUN_DIR, "weights", "best.pt")
+print("📁 Run directory:", RUN_DIR)
+print("📦 Best weights :", BEST_WEIGHTS)
+if not os.path.exists(BEST_WEIGHTS):
+    raise FileNotFoundError(f"best.pt not found at: {BEST_WEIGHTS}")
+# ------------------------------------------------------------
+# 4. LOAD TRAINED MODEL (best.pt)
+# ------------------------------------------------------------
+print("\n📥 Loading trained model from best.pt")
+model = YOLO(BEST_WEIGHTS)
+print("✅ Loaded trained YOLOv8 model.")
+print("📜 Class mapping (model.names):")
+print(model.names)
+# ------------------------------------------------------------
+# 5. VALIDATION & TEST METRICS
+# ------------------------------------------------------------
+print("\n===== RUNNING VALIDATION (val split) =====")
+metrics_val = model.val(
+    data=DATA_YAML,
+    split="val",     # images/val + labels/val
+    imgsz=640,
+    save_json=False
+)
+print("\n===== YOLOv8 Validation Metrics =====")
+print(f"[VAL] mAP@0.5      : {metrics_val.box.map50:.4f}")
+print(f"[VAL] mAP@0.5:0.95 : {metrics_val.box.map:.4f}")
+print("\nPer-class mAP@0.5 on VAL (first 10 classes):")
+for i, m in enumerate(metrics_val.box.maps[:10]):
+    print(f"  Class {i}: {m:.4f}")
+print("\n===== RUNNING TEST EVALUATION (test split) =====")
+metrics_test = model.val(
+    data=DATA_YAML,
+    split="test",    # images/test + labels/test
+    imgsz=640,
+    save_json=False
+)
+print("\n===== YOLOv8 Test Metrics =====")
+print(f"[TEST] mAP@0.5      : {metrics_test.box.map50:.4f}")
+print(f"[TEST] mAP@0.5:0.95 : {metrics_test.box.map:.4f}")
+# ------------------------------------------------------------
+# 6. INFERENCE SPEED (FPS) ON VAL IMAGES
+# ------------------------------------------------------------
+print("\n===== MEASURING INFERENCE SPEED (FPS) ON VAL IMAGES =====")
+val_images = glob.glob(os.path.join(VAL_IMAGES_DIR, "*.jpg"))
+val_images = sorted(val_images)
+num_test_images = min(10 if QUICK_TEST else 50, len(val_images))
+test_images = val_images[:num_test_images]
+print(f"Found {len(val_images)} images in {VAL_IMAGES_DIR}")
+print(f"Using {len(test_images)} images for speed test.")
+time_per_image = 0.0
+fps = 0.0
+if len(test_images) == 0:
+    print("⚠️ No images found for FPS test. Skipping speed measurement.")
+else:
+    start = time.perf_counter()
+    _ = model.predict(
+        source=test_images,
+        imgsz=640,
+        conf=0.5,
+        verbose=False
+    )
+    end = time.perf_counter()
+    total_time = end - start
+    time_per_image = total_time / len(test_images)
+    fps = 1.0 / time_per_image
+    print(f"Total time       : {total_time:.2f} sec for {len(test_images)} images")
+    print(f"Avg time / image : {time_per_image*1000:.2f} ms")
+    print(f"Approx FPS       : {fps:.2f} images/sec")
+# ------------------------------------------------------------
+# 7. SAVE SAMPLE PREDICTION IMAGES (FROM VAL)
+# ------------------------------------------------------------
+print("\n===== SAVING SAMPLE PREDICTION IMAGES (VAL) =====")
+sample_out_project = "yolo_vis"
+sample_out_name    = "samples_debug" if QUICK_TEST else "samples"
+if len(test_images) == 0:
+    print("⚠️ No val images available for sample visualization. Skipping sample predictions.")
+else:
+    _ = model.predict(
+        source=test_images[:4 if QUICK_TEST else 8],
+        imgsz=640,
+        conf=0.5,
+        save=True,
+        project=sample_out_project,
+        name=sample_out_name,
+        verbose=False,
+    )
+    print(f"✅ Saved sample predictions (with boxes & labels) to: {sample_out_project}/{sample_out_name}/")
+# ------------------------------------------------------------
+# 8. SAVE METRICS TO JSON
+# ------------------------------------------------------------
+print("\n===== SAVING METRICS TO JSON =====")
+os.makedirs("yolo_metrics", exist_ok=True)
+metrics_json_path = os.path.join("yolo_metrics", "yolov8s_metrics_debug.json" if QUICK_TEST else "yolov8s_metrics.json")
+yolo_metrics = {
+    "model_name": "yolov8s_smartvision",
+    "quick_test": QUICK_TEST,
+    "epochs": EPOCHS,
+    "run_dir": RUN_DIR,
+    "best_weights": BEST_WEIGHTS,
+    "val_map_50": float(metrics_val.box.map50),
+    "val_map_50_95": float(metrics_val.box.map),
+    "test_map_50": float(metrics_test.box.map50),
+    "test_map_50_95": float(metrics_test.box.map),
+    "num_val_images_for_speed_test": int(len(test_images)),
+    "avg_inference_time_sec": float(time_per_image),
+    "fps": float(fps),
+}
+with open(metrics_json_path, "w") as f:
+    json.dump(yolo_metrics, f, indent=2)
+print(f"✅ Saved YOLO metrics JSON to: {metrics_json_path}")
+print("\n🎯 YOLOv8 training + evaluation script finished.")

scripts/compare_models.py ADDED Viewed

	@@ -0,0 +1,267 @@

+"""
+SMARTVISION AI - Step 2.5: Model Comparison & Selection
+This script:
+- Loads metrics.json and confusion_matrix.npy for all models.
+- Compares accuracy, precision, recall, F1, top-5 accuracy, speed, and model size.
+- Generates bar plots for metrics.
+- Generates confusion matrix heatmaps per model.
+- Selects the best model using an accuracy–speed tradeoff rule.
+"""
+import os
+import json
+import numpy as np
+import matplotlib.pyplot as plt
+# ------------------------------------------------------------
+# 0. CONFIG – resolve paths relative to this file
+# ------------------------------------------------------------
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+ROOT_DIR   = os.path.dirname(SCRIPT_DIR)           # one level up from scripts/
+METRICS_DIR = os.path.join(ROOT_DIR, "smartvision_metrics")
+PLOTS_DIR   = os.path.join(METRICS_DIR, "comparison_plots")
+os.makedirs(PLOTS_DIR, exist_ok=True)
+print(f"[INFO] Using METRICS_DIR = {METRICS_DIR}")
+print(f"[INFO] Existing subfolders in METRICS_DIR: {os.listdir(METRICS_DIR) if os.path.exists(METRICS_DIR) else 'NOT FOUND'}")
+# Map "pretty" model names to their metrics subdirectories
+MODEL_PATHS = {
+    "VGG16"    : "vgg16_v2_stage2",
+    "ResNet50" : "resnet50_v2_stage2",
+    "MobileNetV2"           : "mobilenetv2_v2",
+    "efficientnetb0"           : "efficientnetb0",
+    # Optional: add more models here, e.g.:
+    # "ResNet50 v2 (Stage 1)"  : "resnet50_v2_stage1",
+}
+# Class names (COCO-style 25 classes)
+CLASS_NAMES = [
+    "airplane", "bed", "bench", "bicycle", "bird",
+    "bottle", "bowl", "bus", "cake", "car",
+    "cat", "chair", "couch", "cow", "cup",
+    "dog", "elephant", "horse", "motorcycle", "person",
+    "pizza", "potted plant", "stop sign", "traffic light", "truck",
+]
+# ------------------------------------------------------------
+# 1. LOAD METRICS & CONFUSION MATRICES
+# ------------------------------------------------------------
+def load_model_results():
+    model_metrics = {}
+    model_cms = {}
+    for nice_name, folder_name in MODEL_PATHS.items():
+        metrics_path = os.path.join(METRICS_DIR, folder_name, "metrics.json")
+        cm_path      = os.path.join(METRICS_DIR, folder_name, "confusion_matrix.npy")
+        print(f"[DEBUG] Looking for {nice_name} metrics at: {metrics_path}")
+        print(f"[DEBUG] Looking for {nice_name} CM at     : {cm_path}")
+        if not os.path.exists(metrics_path):
+            print(f"[WARN] Skipping {nice_name}: missing {metrics_path}")
+            continue
+        if not os.path.exists(cm_path):
+            print(f"[WARN] Skipping {nice_name}: missing {cm_path}")
+            continue
+        with open(metrics_path, "r") as f:
+            metrics = json.load(f)
+        cm = np.load(cm_path)
+        model_metrics[nice_name] = metrics
+        model_cms[nice_name] = cm
+        print(f"[INFO] Loaded metrics & CM for {nice_name}")
+    return model_metrics, model_cms
+# ------------------------------------------------------------
+# 2. PLOTTING HELPERS
+# ------------------------------------------------------------
+def plot_bar_metric(model_metrics, metric_key, ylabel, filename, higher_is_better=True):
+    names = list(model_metrics.keys())
+    values = [model_metrics[n][metric_key] for n in names]
+    plt.figure(figsize=(8, 5))
+    bars = plt.bar(names, values)
+    plt.ylabel(ylabel)
+    plt.xticks(rotation=20, ha="right")
+    for bar, val in zip(bars, values):
+        plt.text(
+            bar.get_x() + bar.get_width() / 2,
+            bar.get_height(),
+            f"{val:.3f}",
+            ha="center",
+            va="bottom",
+            fontsize=8,
+        )
+    title_prefix = "Higher is better" if higher_is_better else "Lower is better"
+    plt.title(f"{metric_key} comparison ({title_prefix})")
+    plt.tight_layout()
+    out_path = os.path.join(PLOTS_DIR, filename)
+    plt.savefig(out_path, dpi=200)
+    plt.close()
+    print(f"[PLOT] Saved {metric_key} comparison to {out_path}")
+def plot_confusion_matrix(cm, classes, title, filename, normalize=True):
+    if normalize:
+        cm = cm.astype("float") / (cm.sum(axis=1)[:, np.newaxis] + 1e-12)
+    plt.figure(figsize=(6, 5))
+    im = plt.imshow(cm, interpolation="nearest")
+    plt.title(title)
+    plt.colorbar(im, fraction=0.046, pad=0.04)
+    tick_marks = np.arange(len(classes))
+    plt.xticks(tick_marks, classes, rotation=90)
+    plt.yticks(tick_marks, classes)
+    # annotate diagonal only to reduce clutter
+    for i in range(cm.shape[0]):
+        for j in range(cm.shape[1]):
+            if i == j:
+                plt.text(
+                    j,
+                    i,
+                    f"{cm[i, j]:.2f}",
+                    ha="center",
+                    va="center",
+                    color="white" if cm[i, j] > 0.5 else "black",
+                    fontsize=6,
+                )
+    plt.ylabel("True label")
+    plt.xlabel("Predicted label")
+    plt.tight_layout()
+    out_path = os.path.join(PLOTS_DIR, filename)
+    plt.savefig(out_path, dpi=200)
+    plt.close()
+    print(f"[PLOT] Saved confusion matrix to {out_path}")
+# ------------------------------------------------------------
+# 3. MODEL SELECTION (ACCURACY–SPEED TRADEOFF)
+# ------------------------------------------------------------
+def pick_best_model(model_metrics):
+    """
+    Rule:
+      1. Prefer highest accuracy.
+      2. If two models are within 0.5% accuracy, prefer higher images_per_second.
+    """
+    best_name = None
+    best_acc = -1.0
+    best_speed = -1.0
+    for name, m in model_metrics.items():
+        acc = m["accuracy"]
+        speed = m.get("images_per_second", 0.0)
+        if acc > best_acc + 0.005:  # clearly better
+            best_name = name
+            best_acc = acc
+            best_speed = speed
+        elif abs(acc - best_acc) <= 0.005:  # within 0.5%, use speed as tie-breaker
+            if speed > best_speed:
+                best_name = name
+                best_acc = acc
+                best_speed = speed
+    return best_name, best_acc, best_speed
+# ------------------------------------------------------------
+# 4. MAIN
+# ------------------------------------------------------------
+def main():
+    model_metrics, model_cms = load_model_results()
+    if not model_metrics:
+        print("[ERROR] No models found with valid metrics. Check METRICS_DIR and MODEL_PATHS.")
+        return
+    print("\n===== MODEL METRICS SUMMARY =====")
+    print(
+        f"{'Model':30s}  {'Acc':>6s}  {'Prec':>6s}  {'Rec':>6s}  {'F1':>6s}  {'Top5':>6s}  {'img/s':>7s}  {'Size(MB)':>8s}"
+    )
+    for name, m in model_metrics.items():
+        print(
+            f"{name:30s}  "
+            f"{m['accuracy']:.3f}  "
+            f"{m['precision_weighted']:.3f}  "
+            f"{m['recall_weighted']:.3f}  "
+            f"{m['f1_weighted']:.3f}  "
+            f"{m['top5_accuracy']:.3f}  "
+            f"{m['images_per_second']:.2f}  "
+            f"{m['model_size_mb']:.1f}"
+        )
+    # ---- Comparison plots ----
+    plot_bar_metric(model_metrics, "accuracy", "Accuracy", "accuracy_comparison.png")
+    plot_bar_metric(
+        model_metrics, "f1_weighted", "Weighted F1-score", "f1_comparison.png"
+    )
+    plot_bar_metric(
+        model_metrics, "top5_accuracy", "Top-5 Accuracy", "top5_comparison.png"
+    )
+    plot_bar_metric(
+        model_metrics,
+        "images_per_second",
+        "Images per second",
+        "speed_comparison.png",
+    )
+    plot_bar_metric(
+        model_metrics,
+        "model_size_mb",
+        "Model size (MB)",
+        "size_comparison.png",
+        higher_is_better=False,
+    )
+    # ---- Confusion matrices ----
+    print("\n===== SAVING CONFUSION MATRICES =====")
+    for name, cm in model_cms.items():
+        safe_name = name.replace(" ", "_").replace("(", "").replace(")", "")
+        filename = f"{safe_name}_cm.png"
+        plot_confusion_matrix(
+            cm,
+            classes=CLASS_NAMES,
+            title=f"Confusion Matrix - {name}",
+            filename=filename,
+            normalize=True,
+        )
+    # ---- Best model ----
+    best_name, best_acc, best_speed = pick_best_model(model_metrics)
+    print("\n===== BEST MODEL SELECTION =====")
+    print(f"Selected best model: {best_name}")
+    print(f"  Test Accuracy      : {best_acc:.4f}")
+    print(f"  Images per second  : {best_speed:.2f}")
+    print("\nRationale:")
+    print("- Highest accuracy is preferred.")
+    print("- If models are within 0.5% accuracy, the faster model (higher img/s) is chosen.")
+    print("\nSuggested text for report:")
+    print(
+        f"\"Among all evaluated architectures, {best_name} achieved the best accuracy–speed "
+        f"tradeoff on the SmartVision AI test set, with a top-1 accuracy of {best_acc:.3f} "
+        f"and an inference throughput of {best_speed:.2f} images per second on the "
+        f"evaluation hardware.\""
+    )
+if __name__ == "__main__":
+    main()

scripts/convert_efficientnet_weights.py ADDED Viewed

	@@ -0,0 +1,109 @@

+# scripts/convert_efficientnet_weights.py
+import os
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+from tensorflow.keras.applications.efficientnet import (
+    EfficientNetB0,
+    preprocess_input as effnet_preprocess,
+)
+print("TensorFlow version:", tf.__version__)
+IMG_SIZE = (224, 224)
+NUM_CLASSES = 25
+MODELS_DIR = "saved_models"
+# --- These were in your training script, keep same names ---
+def bright_jitter(x):
+    x_f32 = tf.cast(x, tf.float32)
+    x_f32 = tf.image.random_brightness(x_f32, max_delta=0.25)
+    return tf.cast(x_f32, x.dtype)
+def sat_jitter(x):
+    x_f32 = tf.cast(x, tf.float32)
+    x_f32 = tf.image.random_saturation(x_f32, lower=0.7, upper=1.3)
+    return tf.cast(x_f32, x.dtype)
+def build_efficientnetb0_model_v2():
+    """
+    Rebuilds the SAME EfficientNetB0 architecture used in your training script
+    (data_augmentation + preprocess_input + EfficientNetB0 backbone + head).
+    """
+    inputs = keras.Input(shape=(*IMG_SIZE, 3), name="input_layer")
+    # --- Data augmentation (as in training) ---
+    data_augmentation = keras.Sequential(
+        [
+            layers.RandomFlip("horizontal"),
+            layers.RandomRotation(0.08),    # ≈ ±30°
+            layers.RandomZoom(0.15),
+            layers.RandomContrast(0.3),
+            layers.RandomTranslation(0.1, 0.1),
+            layers.Lambda(bright_jitter, name="bright_jitter"),
+            layers.Lambda(sat_jitter, name="sat_jitter"),
+        ],
+        name="advanced_data_augmentation",
+    )
+    x = data_augmentation(inputs)
+    # EfficientNetB0 preprocess_input (same as training)
+    x = layers.Lambda(
+        lambda z: effnet_preprocess(tf.cast(z, tf.float32)),
+        name="effnet_preprocess",
+    )(x)
+    # EfficientNetB0 backbone
+    base_model = EfficientNetB0(
+        include_top=False,
+        weights="imagenet",
+        name="efficientnetb0",
+    )
+    base_model.trainable = False  # doesn't matter for conversion
+    x = base_model(x, training=False)
+    # Classification head (same as training)
+    x = layers.GlobalAveragePooling2D(name="gap")(x)
+    x = layers.BatchNormalization(name="head_bn_1")(x)
+    x = layers.Dense(256, activation="relu", name="head_dense_1")(x)
+    x = layers.BatchNormalization(name="head_bn_2")(x)
+    x = layers.Dropout(0.4, name="head_dropout")(x)
+    # Final output: float32 softmax
+    outputs = layers.Dense(
+        NUM_CLASSES,
+        activation="softmax",
+        dtype="float32",
+        name="predictions",
+    )(x)
+    model = keras.Model(inputs, outputs, name="EfficientNetB0_smartvision_v2")
+    return model
+if __name__ == "__main__":
+    full_path = os.path.join(MODELS_DIR, "efficientnetb0_best.h5")
+    weights_path = os.path.join(MODELS_DIR, "efficientnetb0_best.weights.h5")
+    if not os.path.exists(full_path):
+        raise FileNotFoundError(f"Full EfficientNet model .h5 not found at: {full_path}")
+    print("🔧 Building EfficientNetB0 v2 architecture...")
+    model = build_efficientnetb0_model_v2()
+    model.summary()
+    print(f"\n📥 Loading weights BY NAME (skip mismatches) from:\n  {full_path}")
+    # 🔑 KEY FIX: use by_name=True and skip_mismatch=True so shape mismatches
+    # are simply ignored instead of crashing.
+    model.load_weights(full_path, by_name=True, skip_mismatch=True)
+    print("✅ Weights loaded into rebuilt model (by name, mismatches skipped).")
+    print(f"\n💾 Saving weights-only file to:\n  {weights_path}")
+    model.save_weights(weights_path)
+    print("✅ Done converting EfficientNetB0 weights to .weights.h5")

scripts/convert_mobilenet_weights.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import os
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers, regularizers
+IMG_SIZE = (224, 224)
+NUM_CLASSES = 25
+# ---- this MUST match your training build_mobilenetv2_model_v2 ----
+def build_mobilenetv2_model_v2():
+    inputs = keras.Input(shape=(*IMG_SIZE, 3), name="input_layer")
+    data_augmentation = keras.Sequential(
+        [
+            layers.RandomFlip("horizontal"),
+            layers.RandomRotation(0.04),                   # ~±15°
+            layers.RandomZoom(0.1),
+            layers.RandomContrast(0.15),
+            layers.Lambda(
+                lambda x: tf.image.random_brightness(x, max_delta=0.15)
+            ),
+            layers.Lambda(
+                lambda x: tf.image.random_saturation(x, 0.85, 1.15)
+            ),
+        ],
+        name="data_augmentation",   # 👈 same name as training
+    )
+    x = data_augmentation(inputs)
+    x = layers.Lambda(
+        keras.applications.mobilenet_v2.preprocess_input,
+        name="mobilenetv2_preprocess",
+    )(x)
+    base_model = keras.applications.MobileNetV2(
+        include_top=False,
+        weights="imagenet",
+        input_shape=(*IMG_SIZE, 3),
+    )
+    x = base_model(x)
+    x = layers.GlobalAveragePooling2D(name="global_average_pooling2d")(x)
+    x = layers.BatchNormalization(name="head_batchnorm_1")(x)
+    x = layers.Dropout(0.4, name="head_dropout_1")(x)
+    x = layers.Dense(
+        256,
+        activation="relu",
+        kernel_regularizer=regularizers.l2(1e-4),
+        name="head_dense_1",
+    )(x)
+    x = layers.BatchNormalization(name="head_batchnorm_2")(x)
+    x = layers.Dropout(0.5, name="head_dropout_2")(x)
+    outputs = layers.Dense(
+        NUM_CLASSES, activation="softmax", name="predictions"
+    )(x)
+    model = keras.Model(
+        inputs=inputs,
+        outputs=outputs,
+        name="MobileNetV2_smartvision_v2",
+    )
+    return model
+if __name__ == "__main__":
+    old_path = os.path.join("saved_models", "mobilenetv2_v2_stage2_best.h5")
+    new_path = os.path.join("saved_models", "mobilenetv2_v2_stage2_best.weights.h5")
+    print("Building MobileNetV2 architecture...")
+    model = build_mobilenetv2_model_v2()
+    print("Loading weights from full .h5 (by_name, skip_mismatch)...")
+    model.load_weights(old_path, by_name=True, skip_mismatch=True)
+    print("Saving clean weights-only file...")
+    model.save_weights(new_path)
+    print("✅ Done. Saved weights-only file to:", new_path)

scripts/convert_vgg16_weights.py ADDED Viewed

	@@ -0,0 +1,79 @@

+# scripts/convert_vgg16_weights.py
+import os
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
+print("TensorFlow version:", tf.__version__)
+IMG_SIZE = (224, 224)
+NUM_CLASSES = 25
+MODELS_DIR = "saved_models"
+# --- SAME AUGMENTATION AS IN TRAINING (ok for building, problem was only deserializing old model) ---
+data_augmentation = keras.Sequential(
+    [
+        layers.RandomFlip("horizontal"),
+        layers.RandomRotation(0.04),                     # ≈ ±15°
+        layers.RandomZoom(0.1),
+        layers.RandomContrast(0.2),
+        layers.Lambda(lambda x: tf.image.random_brightness(x, max_delta=0.2)),
+        layers.Lambda(lambda x: tf.image.random_saturation(x, 0.8, 1.2)),
+    ],
+    name="data_augmentation",
+)
+def build_vgg16_model_v2():
+    """
+    EXACTLY the same architecture as your VGG16 training code.
+    """
+    inputs = keras.Input(shape=(*IMG_SIZE, 3), name="input_layer")
+    # 1. Augmentation
+    x = data_augmentation(inputs)
+    # 2. VGG16-specific preprocessing
+    x = layers.Lambda(
+        lambda z: preprocess_input(tf.cast(z, tf.float32)),
+        name="vgg16_preprocess",
+    )(x)
+    # 3. Pre-trained VGG16 backbone
+    base_model = VGG16(
+        include_top=False,
+        weights="imagenet",
+        input_tensor=x,
+    )
+    # 4. Custom head
+    x = layers.GlobalAveragePooling2D(name="global_average_pooling2d")(base_model.output)
+    x = layers.Dense(256, activation="relu", name="dense_256")(x)
+    x = layers.Dropout(0.5, name="dropout_0_5")(x)
+    outputs = layers.Dense(NUM_CLASSES, activation="softmax", name="predictions")(x)
+    model = keras.Model(inputs=inputs, outputs=outputs, name="VGG16_smartvision_v2")
+    return model
+if __name__ == "__main__":
+    full_path = os.path.join(MODELS_DIR, "vgg16_v2_stage2_best.h5")
+    weights_path = os.path.join(MODELS_DIR, "vgg16_v2_stage2_best.weights.h5")
+    if not os.path.exists(full_path):
+        raise FileNotFoundError(f"Full VGG16 model .h5 not found at: {full_path}")
+    print("🧱 Rebuilding VGG16 v2 architecture...")
+    model = build_vgg16_model_v2()
+    model.summary()
+    print(f"📥 Loading weights from legacy full model file (by_name, skip_mismatch): {full_path}")
+    # NOTE: this reads the HDF5 weights **without** trying to deserialize the old Lambda graph
+    model.load_weights(full_path, by_name=True, skip_mismatch=True)
+    print(f"💾 Saving clean weights-only file to: {weights_path}")
+    model.save_weights(weights_path)
+    print("✅ Done: vgg16_v2_stage2_best.weights.h5 created.")

scripts/train_yolo_smartvision.py ADDED Viewed

	@@ -0,0 +1,428 @@

+#!/usr/bin/env python3
+"""
+train_yolo_smartvision_alltrain.py
+Train YOLOv8 on ALL images (train+val+test) by creating images/train_all & labels/train_all,
+then validate/test only on original val/test splits.
+Features:
+- Robust linking/copying with retries (hard link when possible, fallback copy).
+- Manifest generation (train_all_manifest.json) with failures and post-check.
+- Temporary data_all.yaml created and removed by default.
+- Helpful early-failure checks so training doesn't crash with FileNotFoundError.
+"""
+import os
+import sys
+import time
+import json
+import glob
+import shutil
+import argparse
+import pathlib
+import torch
+from ultralytics import YOLO
+# ---------------------------
+# Utilities
+# ---------------------------
+def safe_makedirs(path):
+    os.makedirs(path, exist_ok=True)
+    return path
+def link_or_copy(src, dst, max_retries=3, allow_copy=True):
+    """
+    Try to create a hard link. If it fails, fall back to shutil.copy2.
+    Retries on transient failures. Returns tuple (ok:bool, method:str, error:str|None).
+    method in {'link', 'copy', 'exists', 'failed', 'copied_existing'}
+    """
+    dst_dir = os.path.dirname(dst)
+    os.makedirs(dst_dir, exist_ok=True)
+    if os.path.exists(dst):
+        return True, "exists", None
+    last_err = None
+    for attempt in range(1, max_retries + 1):
+        try:
+            os.link(src, dst)
+            return True, "link", None
+        except Exception as e_link:
+            last_err = str(e_link)
+            if not allow_copy:
+                time.sleep(0.1)
+                continue
+            # try copying
+            try:
+                shutil.copy2(src, dst)
+                return True, "copy", None
+            except Exception as e_copy:
+                last_err = f"link_err: {e_link}; copy_err: {e_copy}"
+                time.sleep(0.1)
+                continue
+    return False, "failed", last_err
+def unique_name(split, basename, used):
+    """
+    Create a unique filename under train_all to avoid collisions.
+    Format: {split}__{basename} and if collision append index.
+    """
+    base = f"{split}__{basename}"
+    name = base
+    idx = 1
+    while name in used:
+        name = f"{split}__{idx}__{basename}"
+        idx += 1
+    used.add(name)
+    return name
+# ---------------------------
+# Create train_all (robust)
+# ---------------------------
+def create_train_all(det_dir, splits=("train", "val", "test")):
+    """
+    Create images/train_all and labels/train_all by linking/copying
+    all files from images/<split> and labels/<split>.
+    Returns (out_imgs, out_labs, counters, manifest_path)
+    where manifest contains details and failures.
+    """
+    img_root = os.path.join(det_dir, "images")
+    lab_root = os.path.join(det_dir, "labels")
+    out_imgs = os.path.join(det_dir, "images", "train_all")
+    out_labs = os.path.join(det_dir, "labels", "train_all")
+    safe_makedirs(out_imgs)
+    safe_makedirs(out_labs)
+    used_names = set()
+    counters = {"images": 0, "labels": 0}
+    manifest = {"images": [], "labels": [], "failures": [], "post_check_missing": []}
+    for split in splits:
+        imgs_dir = os.path.join(img_root, split)
+        labs_dir = os.path.join(lab_root, split)
+        if not os.path.isdir(imgs_dir) or not os.path.isdir(labs_dir):
+            # skip missing split
+            continue
+        # collect possible image extensions
+        img_files = sorted(glob.glob(os.path.join(imgs_dir, "*.jpg")) +
+                           glob.glob(os.path.join(imgs_dir, "*.jpeg")) +
+                           glob.glob(os.path.join(imgs_dir, "*.png")))
+        for img_path in img_files:
+            basename = os.path.basename(img_path)
+            new_basename = unique_name(split, basename, used_names)
+            dst_img = os.path.join(out_imgs, new_basename)
+            ok_img, method_img, err_img = link_or_copy(img_path, dst_img, max_retries=3, allow_copy=True)
+            if not ok_img:
+                manifest["failures"].append({
+                    "type": "image_copy_failed",
+                    "src": img_path,
+                    "dst": dst_img,
+                    "error": err_img
+                })
+                continue
+            counters["images"] += 1
+            manifest["images"].append({"src": img_path, "dst": dst_img, "method": method_img})
+            # create or link label
+            orig_label_base = os.path.splitext(basename)[0]
+            lab_src = os.path.join(labs_dir, orig_label_base + ".txt")
+            dst_lab = os.path.join(out_labs, os.path.splitext(new_basename)[0] + ".txt")
+            if os.path.exists(lab_src):
+                ok_lab, method_lab, err_lab = link_or_copy(lab_src, dst_lab, max_retries=3, allow_copy=True)
+                if not ok_lab:
+                    manifest["failures"].append({
+                        "type": "label_copy_failed",
+                        "src": lab_src,
+                        "dst": dst_lab,
+                        "error": err_lab
+                    })
+                else:
+                    counters["labels"] += 1
+                    manifest["labels"].append({"src": lab_src, "dst": dst_lab, "method": method_lab})
+            else:
+                # Create empty label file so YOLO treats it as background (explicit)
+                try:
+                    open(dst_lab, "w").close()
+                    counters["labels"] += 1
+                    manifest["labels"].append({"src": None, "dst": dst_lab, "method": "empty_created"})
+                except Exception as e:
+                    manifest["failures"].append({
+                        "type": "label_create_failed",
+                        "src": None,
+                        "dst": dst_lab,
+                        "error": str(e)
+                    })
+    # Final verification: every label should have at least one matching image with same base (any ext)
+    out_img_bases = set(os.path.splitext(os.path.basename(p))[0] for p in glob.glob(os.path.join(out_imgs, "*")))
+    missing_pairs = []
+    for lab in glob.glob(os.path.join(out_labs, "*.txt")):
+        base = os.path.splitext(os.path.basename(lab))[0]
+        if base not in out_img_bases:
+            # Labels that don't have corresponding image
+            missing_pairs.append(base)
+    manifest["post_check_missing"] = missing_pairs
+    manifest_path = os.path.join(det_dir, "train_all_manifest.json")
+    try:
+        with open(manifest_path, "w") as f:
+            json.dump({"counters": counters, "manifest": manifest}, f, indent=2)
+    except Exception as e:
+        # fallback printing
+        print("⚠️ Could not write manifest:", e)
+    return out_imgs, out_labs, counters, manifest_path
+# ---------------------------
+# Write temporary data YAML
+# ---------------------------
+def write_temp_data_yaml(det_dir, data_yaml_path, train_rel="images/train_all", val_rel="images/val", test_rel="images/test", names_list=None):
+    """
+    Writes a temporary data YAML for training.
+    """
+    if names_list is None:
+        orig = os.path.join(det_dir, "data.yaml")
+        if os.path.exists(orig):
+            try:
+                import yaml
+                with open(orig, "r") as f:
+                    d = yaml.safe_load(f)
+                    names_list = d.get("names") or d.get("names", None)
+                    if isinstance(names_list, dict):
+                        # convert mapping to ordered list by int key
+                        sorted_items = sorted(names_list.items(), key=lambda x: int(x[0]))
+                        names_list = [v for k, v in sorted_items]
+            except Exception:
+                names_list = None
+    if names_list is None:
+        # safe default if reading fails
+        names_list = [f"class{i}" for i in range(25)]
+    abs_path = os.path.abspath(det_dir)
+    yaml_str = f"path: {abs_path}\n\ntrain: {train_rel}\nval: {val_rel}\ntest: {test_rel}\n\nnc: {len(names_list)}\nnames:\n"
+    for i, n in enumerate(names_list):
+        yaml_str += f"  {i}: {n}\n"
+    with open(data_yaml_path, "w") as f:
+        f.write(yaml_str)
+    return data_yaml_path
+# ---------------------------
+# Main flow
+# ---------------------------
+def main(
+    base_dir="smartvision_dataset",
+    run_project="yolo_runs",
+    run_name="smartvision_yolov8s_alltrain",
+    model_weights="yolov8s.pt",
+    quick_test=False,
+    epochs_full=50,
+    batch=8,
+    keep_temp=False,
+):
+    DET_DIR = os.path.join(base_dir, "detection")
+    DATA_YAML_ORIG = os.path.join(DEТ_DIR := DET_DIR, "data.yaml")  # preserve original var name for readability
+    # safety checks
+    if not os.path.exists(DET_DIR):
+        raise FileNotFoundError(f"Detection directory not found: {DET_DIR}")
+    if not os.path.exists(DATA_YAML_ORIG):
+        raise FileNotFoundError(f"Original data.yaml not found: {DATA_YAML_ORIG}")
+    # show basic dataset split counts
+    for split in ["train", "val", "test"]:
+        img_dir = os.path.join(DET_DIR, "images", split)
+        lab_dir = os.path.join(DET_DIR, "labels", split)
+        num_imgs = len(glob.glob(os.path.join(img_dir, "*.jpg"))) + len(glob.glob(os.path.join(img_dir, "*.png"))) + len(glob.glob(os.path.join(img_dir, "*.jpeg")))
+        num_labs = len(glob.glob(os.path.join(lab_dir, "*.txt")))
+        print(f"✅ {split.upper():5s}: {num_imgs} images, {num_labs} label files")
+    # Read class names from original data.yaml (if possible)
+    try:
+        import yaml
+        with open(DATA_YAML_ORIG, "r") as f:
+            orig_yaml = yaml.safe_load(f)
+            names = orig_yaml.get("names")
+            if isinstance(names, dict):
+                sorted_items = sorted(names.items(), key=lambda x: int(x[0]))
+                names_list = [v for k, v in sorted_items]
+            else:
+                names_list = names
+    except Exception:
+        names_list = None
+    print("🧩 Creating combined train_all (train+val+test)...")
+    imgs_train_all, labs_train_all, counters, manifest_path = create_train_all(DET_DIR, splits=("train", "val", "test"))
+    print(f"  ➜ train_all images: {counters['images']}, labels: {counters['labels']}")
+    print(f"  ➜ manifest written to: {manifest_path}")
+    # read manifest and abort early on issues
+    try:
+        with open(manifest_path, "r") as f:
+            manifest_data = json.load(f)
+            manifest = manifest_data.get("manifest", {})
+    except Exception:
+        manifest = {}
+    failures = manifest.get("failures", [])
+    post_missing = manifest.get("post_check_missing", [])
+    if failures:
+        print("\n❌ Errors found while creating train_all (see manifest). Aborting training.")
+        print(f"  Failures count: {len(failures)}. Sample:")
+        for f in failures[:10]:
+            print("   -", f)
+        print(f"\nInspect and fix ({manifest_path}) then re-run.")
+        return
+    if post_missing:
+        print("\n❌ Post-creation check failed: some labels don't have matching images.")
+        print(f"  Missing pairs count: {len(post_missing)}. Sample: {post_missing[:20]}")
+        print(f"Please inspect the labels/images under {labs_train_all} and {imgs_train_all}. Aborting.")
+        return
+    # write temporary data yaml
+    temp_data_yaml = os.path.join(DET_DIR, "data_all.yaml")
+    write_temp_data_yaml(DET_DIR, temp_data_yaml, train_rel="images/train_all", val_rel="images/val", test_rel="images/test", names_list=names_list)
+    print(f"📝 Temporary data yaml created at: {temp_data_yaml}")
+    # determine epochs
+    EPOCHS = 1 if quick_test else epochs_full
+    device = "0" if torch.cuda.is_available() else "cpu"
+    print(f"🚀 Device: {device}; QUICK_TEST: {quick_test}; EPOCHS: {EPOCHS}")
+    # load base model
+    print(f"\n📥 Loading YOLOv8 base model from: {model_weights}")
+    model = YOLO(model_weights)
+    # Train on train_all
+    run_name_final = run_name
+    print("\n===== STARTING TRAINING on ALL IMAGES (train_all) =====")
+    results = model.train(
+        data=temp_data_yaml,
+        epochs=EPOCHS,
+        imgsz=640,
+        batch=batch,
+        lr0=0.01,
+        optimizer="SGD",
+        device=device,
+        project=run_project,
+        name=run_name_final,
+        pretrained=True,
+        plots=True,
+        verbose=True,
+    )
+    print("\n✅ Training finished.")
+    run_dir = os.path.join(run_project, run_name_final)
+    best_weights = os.path.join(run_dir, "weights", "best.pt")
+    if not os.path.exists(best_weights):
+        print("⚠️ best.pt not found after training — attempting to use last.pt")
+        last = os.path.join(run_dir, "weights", "last.pt")
+        if os.path.exists(last):
+            best_weights = last
+        else:
+            raise FileNotFoundError("No trained weights found (best.pt or last.pt).")
+    # Load trained model
+    print(f"\n📥 Loading trained model from: {best_weights}")
+    model = YOLO(best_weights)
+    print("✅ Model loaded. Running val/test on original val & test splits...")
+    # Validation (val split)
+    print("\n===== VALIDATION (original val split) =====")
+    metrics_val = model.val(data=DATA_YAML_ORIG, split="val", imgsz=640, save_json=False)
+    print(f"[VAL] mAP@0.5 : {metrics_val.box.map50:.4f}   mAP@0.5:0.95 : {metrics_val.box.map:.4f}")
+    # Test (test split)
+    print("\n===== TEST (original test split) =====")
+    metrics_test = model.val(data=DATA_YAML_ORIG, split="test", imgsz=640, save_json=False)
+    print(f"[TEST] mAP@0.5 : {metrics_test.box.map50:.4f}   mAP@0.5:0.95 : {metrics_test.box.map:.4f}")
+    # FPS test on val images (small subset)
+    val_images_dir = os.path.join(DET_DIR, "images", "val")
+    val_images = sorted(glob.glob(os.path.join(val_images_dir, "*.jpg")) +
+                        glob.glob(os.path.join(val_images_dir, "*.png")) +
+                        glob.glob(os.path.join(val_images_dir, "*.jpeg")))
+    n_proc = min(50, len(val_images))
+    test_imgs = val_images[:n_proc]
+    if test_imgs:
+        print(f"\n🏃 Running speed test on {len(test_imgs)} val images...")
+        start = time.perf_counter()
+        _ = model.predict(source=test_imgs, imgsz=640, conf=0.5, verbose=False)
+        duration = time.perf_counter() - start
+        print(f"  Total {duration:.2f}s -> {duration/len(test_imgs)*1000:.2f} ms/img -> {1.0/(duration/len(test_imgs)):.2f} FPS")
+    else:
+        print("⚠️ No val images found for speed test.")
+    # Save metrics to JSON
+    metrics_out = {
+        "train_all_counters": counters,
+        "val_map50": float(metrics_val.box.map50),
+        "test_map50": float(metrics_test.box.map50),
+        "val_map50_95": float(metrics_val.box.map),
+        "test_map50_95": float(metrics_test.box.map),
+        "run_dir": run_dir,
+        "best_weights": best_weights,
+    }
+    os.makedirs("yolo_metrics", exist_ok=True)
+    json_path = os.path.join("yolo_metrics", f"yolov8s_metrics_alltrain.json")
+    with open(json_path, "w") as f:
+        json.dump(metrics_out, f, indent=2)
+    print(f"\n💾 Saved metrics to: {json_path}")
+    # Cleanup if requested
+    if not keep_temp:
+        try:
+            print("\n🧹 Cleaning temporary train_all files and temp data yaml...")
+            shutil.rmtree(os.path.join(DET_DIR, "images", "train_all"), ignore_errors=True)
+            shutil.rmtree(os.path.join(DET_DIR, "labels", "train_all"), ignore_errors=True)
+            if os.path.exists(temp_data_yaml):
+                os.remove(temp_data_yaml)
+            if os.path.exists(manifest_path):
+                os.remove(manifest_path)
+            print("✅ Temp cleanup done.")
+        except Exception as e:
+            print("⚠️ Cleanup error:", e)
+    else:
+        print(f"\nℹ️ Kept temp train_all and temp yaml as requested. Path: {os.path.join(DET_DIR, 'images', 'train_all')}")
+    print("\n🎯 ALL DONE.")
+# ---------------------------
+# CLI
+# ---------------------------
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Train YOLOv8 on ALL images (train+val+test) then validate/test on original splits.")
+    parser.add_argument("--dataset-dir", "-d", default="smartvision_dataset", help="Base dataset directory (default: smartvision_dataset)")
+    parser.add_argument("--model", "-m", default="yolov8s.pt", help="Base yolov8 weights (default: yolov8s.pt)")
+    parser.add_argument("--quick", action="store_true", help="Quick test (1 epoch, small speed test)")
+    parser.add_argument("--epochs", type=int, default=50, help="Full epochs when not quick")
+    parser.add_argument("--batch", type=int, default=8, help="Batch size")
+    parser.add_argument("--no-clean", dest="keep_temp", action="store_true", help="Do NOT remove temp train_all folder and temp yaml after run")
+    parser.add_argument("--project", default="yolo_runs", help="Ultralytics runs project folder")
+    parser.add_argument("--name", default="smartvision_yolov8s_alltrain", help="Run name")
+    args = parser.parse_args()
+    main(
+        base_dir=args.dataset_dir,
+        run_project=args.project,
+        run_name=args.name,
+        model_weights=args.model,
+        quick_test=args.quick,
+        epochs_full=args.epochs,
+        batch=args.batch,
+        keep_temp=args.keep_temp,
+    )

scripts/yolov8n.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f59b3d833e2ff32e194b5bb8e08d211dc7c5bdf144b90d2c8412c47ccfc83b36
+size 6549796

smartvision_metrics/comparison_plots/MobileNetV2_cm.png ADDED Viewed

Git LFS Details

SHA256: 438b24793f602d5dd20f2725122d219a84ae71c7c095d2226822eb98cc80a4b1
Pointer size: 131 Bytes
Size of remote file: 134 kB

smartvision_metrics/comparison_plots/MobileNetV2_v3_cm.png ADDED Viewed

Git LFS Details

SHA256: 1ebc22b78581d39493872c4121e81fe1df75cc1829b1b0998fb3c53ae32e543b
Pointer size: 131 Bytes
Size of remote file: 135 kB

smartvision_metrics/comparison_plots/ResNet50_cm.png ADDED Viewed

Git LFS Details

SHA256: 06407d0d7b8c0e52fb0caabd5a7e6856451a04e18e6489705474a8a2c4bb8d95
Pointer size: 131 Bytes
Size of remote file: 134 kB

smartvision_metrics/comparison_plots/ResNet50_v2_Stage_2_FT_cm.png ADDED Viewed

Git LFS Details

SHA256: e8f1aea7d2e274e418f6aeb3c2c375cca48967bbac93e54523cd84cd446d96dd
Pointer size: 131 Bytes
Size of remote file: 137 kB

smartvision_metrics/comparison_plots/VGG16_cm.png ADDED Viewed

Git LFS Details

SHA256: 6ba1b2737dc6080dbffc2fb850bc04f6fb4053601970cba29232a678efbd12c3
Pointer size: 131 Bytes
Size of remote file: 133 kB

smartvision_metrics/comparison_plots/VGG16_v2_Stage_2_FT_cm.png ADDED Viewed

Git LFS Details

SHA256: b9a116632956a5b2bbcbe5720496ffa9ac10d74babc1c926ef79c1273862e623
Pointer size: 131 Bytes
Size of remote file: 136 kB

smartvision_metrics/comparison_plots/accuracy_comparison.png ADDED Viewed

smartvision_metrics/comparison_plots/efficientnetb0_cm.png ADDED Viewed

Git LFS Details

SHA256: 9abcd096fed50b010f4f66f1d65bc559f17e425aa1778a58d32c0102136f6897
Pointer size: 131 Bytes
Size of remote file: 134 kB

smartvision_metrics/comparison_plots/f1_comparison.png ADDED Viewed

smartvision_metrics/comparison_plots/size_comparison.png ADDED Viewed

smartvision_metrics/comparison_plots/speed_comparison.png ADDED Viewed

smartvision_metrics/comparison_plots/top5_comparison.png ADDED Viewed

smartvision_metrics/efficientnetb0/confusion_matrix.npy ADDED Viewed

Binary file (5.13 kB). View file

smartvision_metrics/efficientnetb0/metrics.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "model_name": "efficientnetb0",
+  "accuracy": 0.7573333333333333,
+  "precision_weighted": 0.7654770197123137,
+  "recall_weighted": 0.7573333333333333,
+  "f1_weighted": 0.7558831298763445,
+  "top5_accuracy": 0.8746666666666667,
+  "avg_inference_time_sec": 0.7098700226666406,
+  "images_per_second": 1.4087085918116116,
+  "model_size_mb": 71.93372344970703,
+  "num_parameters": 4390076
+}

smartvision_metrics/efficientnetb0_stage2/confusion_matrix.npy ADDED Viewed

Binary file (5.13 kB). View file

smartvision_metrics/efficientnetb0_stage2/metrics.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "model_name": "efficientnetb0_stage2",
+  "accuracy": 0.7973333333333333,
+  "precision_weighted": 0.8018408351194729,
+  "recall_weighted": 0.7973333333333333,
+  "f1_weighted": 0.7955331918405726,
+  "top5_accuracy": 0.92,
+  "avg_inference_time_sec": 0.07931595280021429,
+  "images_per_second": 12.607804164174375,
+  "model_size_mb": 44.42613220214844,
+  "num_parameters": 4390076
+}

smartvision_metrics/mobilenetv2/confusion_matrix.npy ADDED Viewed

Binary file (5.13 kB). View file

smartvision_metrics/mobilenetv2/metrics.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "model_name": "mobilenetv2",
+  "accuracy": 0.6506666666666666,
+  "precision_weighted": 0.6619423668866393,
+  "recall_weighted": 0.6506666666666666,
+  "f1_weighted": 0.6420473620753672,
+  "top5_accuracy": 0.9013333333333333,
+  "avg_inference_time_sec": 0.04660592453321442,
+  "images_per_second": 21.456499576300324,
+  "model_size_mb": 10.954902648925781,
+  "num_parameters": 2425177
+}