| | --- |
| | license: mit |
| | language: |
| | - en |
| | metrics: |
| | - accuracy |
| | tags: |
| | - code |
| | --- |
| | |
| |
|
| | # ResNet Cat-Dog Classifier |
| |
|
| | This repository contains a ResNet-based convolutional neural network trained to classify images as either cats or dogs. The model achieves an accuracy of 90.27% on a test dataset and is fine-tuned using transfer learning on the ImageNet dataset. It uses PyTorch for training and inference. |
| |
|
| | ## Model Details |
| |
|
| | ### Architecture: |
| | - Backbone: ResNet-18 |
| | - Input Size: 128x128 RGB images |
| | - Output: Binary classification (Cat or Dog) |
| |
|
| | ### Training Details: |
| | - Dataset: Kaggle Cats and Dogs dataset |
| | - Loss Function: Cross-entropy loss |
| | - Optimizer: Adam optimizer |
| | - Learning Rate: 0.001 |
| | - Epochs: 15 |
| | - Batch Size: 32 |
| |
|
| | ### Performance: |
| | - Accuracy: 90.27% on test images |
| | - Training Time: Approximately 1 hour on NVIDIA RTX 3050 Ti |
| |
|
| |
|
| | ## Results: |
| |  |
| | ## Usage |
| |
|
| | ### Installation: |
| | - Dependencies: PyTorch, TorchVision, matplotlib |
| |
|
| |
|
| |
|
| | ### Inference: |
| | ```python |
| | import torch |
| | from torchvision.models import resnet18 |
| | from PIL import Image |
| | import torchvision.transforms as transforms |
| | import matplotlib.pyplot as plt |
| | model = resnet18(pretrained=False) |
| | num_ftrs = model.fc.in_features |
| | model.fc = torch.nn.Linear(num_ftrs, 2) |
| | |
| | # Load the trained model state_dict |
| | model_path = 'cat_dog_classifier.pth' |
| | model.load_state_dict(torch.load(model_path)) |
| | model.eval() |
| | |
| | <!-- ResNet( |
| | (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) |
| | (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | (relu): ReLU(inplace=True) |
| | (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) |
| | (layer1): Sequential( |
| | (0): BasicBlock( |
| | (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
| | (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | (relu): ReLU(inplace=True) |
| | (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
| | (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | ) |
| | (1): BasicBlock( |
| | (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
| | (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | (relu): ReLU(inplace=True) |
| | (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
| | (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | ) |
| | ) |
| | (layer2): Sequential( |
| | (0): BasicBlock( |
| | (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) |
| | (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | (relu): ReLU(inplace=True) |
| | (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
| | (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | (downsample): Sequential( |
| | (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False) |
| | (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | ) |
| | ) |
| | (1): BasicBlock( |
| | (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
| | (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | (relu): ReLU(inplace=True) |
| | (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
| | (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | ) |
| | ) |
| | (layer3): Sequential( |
| | (0): BasicBlock( |
| | (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) |
| | (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | (relu): ReLU(inplace=True) |
| | (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
| | (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | (downsample): Sequential( |
| | (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False) |
| | (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | ) |
| | ) |
| | (1): BasicBlock( |
| | (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
| | (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | (relu): ReLU(inplace=True) |
| | (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
| | (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | ) |
| | ) |
| | (layer4): Sequential( |
| | (0): BasicBlock( |
| | (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) |
| | (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | (relu): ReLU(inplace=True) |
| | (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
| | (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | (downsample): Sequential( |
| | (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) |
| | (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | ) |
| | ) |
| | (1): BasicBlock( |
| | (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
| | (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | (relu): ReLU(inplace=True) |
| | (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
| | (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
| | ) |
| | ) |
| | (avgpool): AdaptiveAvgPool2d(output_size=(1, 1)) |
| | (fc): Linear(in_features=512, out_features=2, bias=True) |
| | ) |
| | --> |
| | # Define the transformation (ensure it matches the training preprocessing) |
| | transform = transforms.Compose([ |
| | transforms.Resize((128, 128)), |
| | transforms.ToTensor(), |
| | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), |
| | ]) |
| | |
| | def load_image(image_path): |
| | image = Image.open(image_path) |
| | image = transform(image) |
| | image = image.unsqueeze(0) # Add batch dimension |
| | return image |
| | |
| | def predict_image(model, image_path): |
| | image = load_image(image_path) |
| | model.eval() |
| | with torch.no_grad(): |
| | outputs = model(image) |
| | _, predicted = torch.max(outputs, 1) |
| | return "Cat" if predicted.item() == 0 else "Dog" |
| | |
| | def plot_image(image_path, prediction): |
| | image = Image.open(image_path) |
| | plt.imshow(image) |
| | plt.title(f'Predicted: {prediction}') |
| | plt.axis('off') |
| | plt.show() |
| | |
| | # Example usage |
| | image_path = "path.jpeg" |
| | prediction = predict_image(model, image_path) |
| | print(f'The predicted class for the image is: {prediction}') |
| | plot_image(image_path, prediction) |
| | The predicted class for the image is: Cat |