In [8]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os

BASE_DIR = r"D:\Guvi\SmartVision_AI\smartvision_dataset"
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
IMG_SIZE = (224, 224)

NUM_CLASSES = 25

train_dir = os.path.join(BASE_DIR, "classification", "train")
val_dir = os.path.join(BASE_DIR, "classification", "val")
test_dir = os.path.join(BASE_DIR, "classification", "test")

train_ds = tf.keras.utils.image_dataset_from_directory(
 train_dir,
 image_size=IMG_SIZE,
 batch_size=BATCH_SIZE,
 shuffle=True
)

val_ds = tf.keras.utils.image_dataset_from_directory(
 val_dir,
 image_size=IMG_SIZE,
 batch_size=BATCH_SIZE,
 shuffle=False
)

test_ds = tf.keras.utils.image_dataset_from_directory(
 test_dir,
 image_size=IMG_SIZE,
 batch_size=BATCH_SIZE,
 shuffle=False
)


Found 1750 files belonging to 25 classes.
Found 375 files belonging to 25 classes.
Found 375 files belonging to 25 classes.


In [None]:
# 1.4. Data augmentation block (applied only on training data)
data_augmentation = keras.Sequential(
 [
 layers.RandomFlip("horizontal"), # random horizontal flip
 layers.RandomRotation(0.04), # ~ ±15° (15/360 ≈ 0.04)
 layers.RandomZoom(0.1), # random zoom
 layers.RandomContrast(0.2), # ±20% contrast
 # Brightness jitter using Lambda + tf.image
 layers.Lambda(
 lambda x: tf.image.random_brightness(x, max_delta=0.2)
 ),
 # Optional: light color jitter via saturation
 layers.Lambda(
 lambda x: tf.image.random_saturation(x, lower=0.8, upper=1.2)
 ),
 ],
 name="data_augmentation",
)

# Normalization layer (0–1 scaling or ImageNet style)
normalization = layers.Rescaling(1./255)


In [None]:
# 2.1: Model 1 - VGG16

def build_vgg16_model():
 inputs = keras.Input(shape=(*IMG_SIZE, 3))
 x = data_augmentation(inputs) # train only
 x = normalization(x)

 base_model = keras.applications.VGG16(
 include_top=False,
 weights="imagenet",
 input_tensor=x
 )
 base_model.trainable = False # freeze convolutional base

 x = layers.GlobalAveragePooling2D()(base_model.output)
 x = layers.Dense(256, activation="relu")(x)
 x = layers.Dropout(0.5)(x)
 outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)

 model = keras.Model(inputs, outputs, name="VGG16_smartvision")
 return model
def compile_and_train(model, model_name, train_ds, val_ds, epochs=25, lr=1e-4):
 model.compile(
 optimizer=keras.optimizers.Adam(learning_rate=lr),
 loss="sparse_categorical_crossentropy",
 metrics=["accuracy"]
 )

 callbacks = [
 keras.callbacks.ModelCheckpoint(
 filepath=f"{model_name}_best.h5",
 monitor="val_accuracy",
 save_best_only=True,
 mode="max"
 ),
 keras.callbacks.EarlyStopping(
 monitor="val_accuracy",
 patience=5,
 restore_best_weights=True
 ),
 keras.callbacks.ReduceLROnPlateau(
 monitor="val_loss",
 factor=0.5,
 patience=2,
 min_lr=1e-6,
 verbose=1
 )
 ]

 history = model.fit(
 train_ds,
 validation_data=val_ds,
 epochs=epochs,
 callbacks=callbacks
 )
 return history

vgg16_model = build_vgg16_model()
history_vgg16 = compile_and_train(vgg16_model, "vgg16", train_ds, val_ds, epochs=25)


Epoch 1/25
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5s/step - accuracy: 0.0405 - loss: 3.4605



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m328s[0m 6s/step - accuracy: 0.0429 - loss: 3.4206 - val_accuracy: 0.0373 - val_loss: 3.2323 - learning_rate: 1.0000e-04
Epoch 2/25
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - accuracy: 0.0474 - loss: 3.2988



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m457s[0m 8s/step - accuracy: 0.0486 - loss: 3.2914 - val_accuracy: 0.0533 - val_loss: 3.1938 - learning_rate: 1.0000e-04
Epoch 3/25
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18s/step - accuracy: 0.0463 - loss: 3.2775 



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1232s[0m 22s/step - accuracy: 0.0486 - loss: 3.2567 - val_accuracy: 0.0853 - val_loss: 3.1689 - learning_rate: 1.0000e-04
Epoch 4/25
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19s/step - accuracy: 0.0568 - loss: 3.2323 



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1278s[0m 23s/step - accuracy: 0.0543 - loss: 3.2274 - val_accuracy: 0.1360 - val_loss: 3.1451 - learning_rate: 1.0000e-04
Epoch 5/25
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16s/step - accuracy: 0.0526 - loss: 3.1936 



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1076s[0m 19s/step - accuracy: 0.0623 - loss: 3.1870 - val_accuracy: 0.1520 - val_loss: 3.1223 - learning_rate: 1.0000e-04
Epoch 6/25
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11s/step - accuracy: 0.0762 - loss: 3.1579 



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m757s[0m 14s/step - accuracy: 0.0811 - loss: 3.1483 - val_accuracy: 0.1867 - val_loss: 3.0975 - learning_rate: 1.0000e-04
Epoch 7/25
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13s/step - accuracy: 0.1051 - loss: 3.1299 



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m900s[0m 16s/step - accuracy: 0.1029 - loss: 3.1283 - val_accuracy: 0.2107 - val_loss: 3.0750 - learning_rate: 1.0000e-04
Epoch 8/25
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11s/step - accuracy: 0.1321 - loss: 3.1018 



[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m799s[0m 15s/step - accuracy: 0.1343 - loss: 3.0993 - val_accuracy: 0.2373 - val_loss: 3.0532 - learning_rate: 1.0000e-04
Epoch 9/25
[1m50/55[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m1:06[0m 13s/step - accuracy: 0.1195 - loss: 3.0798

In [None]:
class_names = train_ds.class_names
NUM_CLASSES = len(class_names)
print(class_names)

In [None]:
import numpy as np
import time
import json
import os
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support

def evaluate_and_collect_metrics(model, model_name, test_ds, class_names, weights_path=None):
 # If you saved best weights, load them
 if weights_path is not None and os.path.exists(weights_path):
 model.load_weights(weights_path)
 print(f"✅ Loaded best weights from {weights_path}")

 y_true = []
 y_pred = []
 y_pred_probs = []

 # ----- measure inference time -----
 total_time = 0.0
 total_images = 0

 for images, labels in test_ds:
 images_np = images.numpy()
 batch_size = images_np.shape[0]

 start = time.perf_counter()
 probs = model.predict(images_np, verbose=0)
 end = time.perf_counter()

 total_time += (end - start)
 total_images += batch_size

 preds = np.argmax(probs, axis=1)

 y_true.extend(labels.numpy())
 y_pred.extend(preds)
 y_pred_probs.append(probs)

 y_true = np.array(y_true)
 y_pred = np.array(y_pred)
 y_pred_probs = np.concatenate(y_pred_probs, axis=0)

 # ----- basic metrics -----
 acc = (y_true == y_pred).mean()

 precision, recall, f1, _ = precision_recall_fscore_support(
 y_true, y_pred, average="weighted", zero_division=0
 )

 # ----- top-5 accuracy -----
 top5_correct = 0
 for i, label in enumerate(y_true):
 top5 = np.argsort(y_pred_probs[i])[-5:]
 if label in top5:
 top5_correct += 1
 top5_acc = top5_correct / len(y_true)

 # ----- inference time -----
 avg_time_per_image = total_time / total_images # seconds
 imgs_per_second = 1.0 / avg_time_per_image if avg_time_per_image > 0 else 0.0

 # ----- model size -----
 # Save weights temporarily to compute size
 temp_weights = f"{model_name}_temp_for_size.weights.h5" 
 model.save_weights(temp_weights)
 size_mb = os.path.getsize(temp_weights) / (1024 * 1024)
 os.remove(temp_weights)

 # ----- classification report & confusion matrix (for plots) -----
 print(f"\n=== {model_name.upper()} – Classification Report ===")
 print(classification_report(y_true, y_pred, target_names=class_names, zero_division=0))

 cm = confusion_matrix(y_true, y_pred)
 print(f"\nConfusion matrix shape: {cm.shape}")

 metrics = {
 "model_name": model_name,
 "accuracy": float(acc),
 "precision_weighted": float(precision),
 "recall_weighted": float(recall),
 "f1_weighted": float(f1),
 "top5_accuracy": float(top5_acc),
 "avg_inference_time_sec_per_image": float(avg_time_per_image),
 "images_per_second": float(imgs_per_second),
 "model_size_mb": float(size_mb),
 "num_parameters": int(model.count_params()),
 }
 return metrics, cm


In [None]:
vgg_metrics, vgg_cm = evaluate_and_collect_metrics(
 vgg16_model, "vgg16", test_ds, class_names, "vgg16_best.h5"
)
with open("vgg16_metrics.json", "w") as f:
 json.dump(vgg_metrics, f, indent=2)

In [None]:
# 2.2: Model 2 - ResNet50
def build_resnet50_model():
 inputs = keras.Input(shape=(*IMG_SIZE, 3))
 x = data_augmentation(inputs)
 x = normalization(x)

 base_model = keras.applications.ResNet50(
 include_top=False,
 weights="imagenet",
 input_tensor=x
 )

 # Freeze all, then unfreeze last 20 layers
 for layer in base_model.layers:
 layer.trainable = False
 for layer in base_model.layers[-20:]:
 layer.trainable = True

 x = layers.GlobalAveragePooling2D()(base_model.output)
 x = layers.Dense(256, activation="relu")(x)
 x = layers.Dropout(0.5)(x)
 outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)

 model = keras.Model(inputs, outputs, name="ResNet50_smartvision")
 return model

resnet_model = build_resnet50_model()
history_resnet = compile_and_train(resnet_model, "resnet50", train_ds, val_ds, epochs=25, lr=1e-4)


In [None]:
# 2.3: Model 3 - MobileNetV2

def build_mobilenetv2_model():
 inputs = keras.Input(shape=(*IMG_SIZE, 3))
 x = data_augmentation(inputs)
 x = normalization(x)

 base_model = keras.applications.MobileNetV2(
 include_top=False,
 weights="imagenet",
 input_tensor=x
 )
 base_model.trainable = False # keep it light & fast

 x = layers.GlobalAveragePooling2D()(base_model.output)
 x = layers.Dense(128, activation="relu")(x)
 x = layers.Dropout(0.3)(x)
 outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)

 model = keras.Model(inputs, outputs, name="MobileNetV2_smartvision")
 return model

mobilenet_model = build_mobilenetv2_model()
history_mobilenet = compile_and_train(mobilenet_model, "mobilenetv2", train_ds, val_ds, epochs=20, lr=1e-4)


In [None]:
# 2.4: Model 4 - EfficientNetB0

from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy("mixed_float16") # for GPU speed

def build_efficientnetb0_model():
 inputs = keras.Input(shape=(*IMG_SIZE, 3))
 x = data_augmentation(inputs)
 x = normalization(x)

 base_model = keras.applications.EfficientNetB0(
 include_top=False,
 weights="imagenet",
 input_tensor=x
 )

 # Fine-tune: unfreeze some top layers
 for layer in base_model.layers[:-30]:
 layer.trainable = False
 for layer in base_model.layers[-30:]:
 layer.trainable = True

 x = layers.GlobalAveragePooling2D()(base_model.output)
 x = layers.BatchNormalization()(x)
 x = layers.Dense(256, activation="relu")(x)
 x = layers.Dropout(0.4)(x)
 outputs = layers.Dense(NUM_CLASSES, activation="softmax", dtype="float32")(x) # force float32 at output

 model = keras.Model(inputs, outputs, name="EfficientNetB0_smartvision")
 return model

effnet_model = build_efficientnetb0_model()
history_effnet = compile_and_train(effnet_model, "efficientnetb0", train_ds, val_ds, epochs=30, lr=5e-5)


In [None]:
# 2.5: Model Comparison & Selection

from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

def evaluate_on_test(model, test_ds, model_name):
 y_true = []
 y_pred = []

 for images, labels in test_ds:
 preds = model.predict(images)
 y_true.extend(labels.numpy())
 y_pred.extend(np.argmax(preds, axis=1))

 print(f"\n=== {model_name} TEST REPORT ===")
 print(classification_report(y_true, y_pred, target_names=class_names))

 cm = confusion_matrix(y_true, y_pred)
 plt.figure(figsize=(10, 8))
 sns.heatmap(cm, annot=False, cmap="Blues",
 xticklabels=class_names,
 yticklabels=class_names)
 plt.title(f"{model_name} - Confusion Matrix")
 plt.xlabel("Predicted")
 plt.ylabel("True")
 plt.show()

# Load best weights if needed and evaluate
vgg16_model.load_weights("vgg16_best.h5")
resnet_model.load_weights("resnet50_best.h5")
mobilenet_model.load_weights("mobilenetv2_best.h5")
effnet_model.load_weights("efficientnetb0_best.h5")

evaluate_on_test(vgg16_model, test_ds, "VGG16")
evaluate_on_test(resnet_model, test_ds, "ResNet50")
evaluate_on_test(mobilenet_model, test_ds, "MobileNetV2")
evaluate_on_test(effnet_model, test_ds, "EfficientNetB0")
