Motivation

During my deep learning class of 2023, we were asked to build a slightly different version of the VGG16 with pytorch. The program was originaly coded in a jupyter notebook.

What is the VGG16 architecture

VGG stands for Visual Geometry Group, a research group at the University of Oxford. It is a well known convolutional neural network used in image classification. Published in a 2014 paper, it gained recognition for its deep stacking of convolution layers, utilization of small filter sizes, and adherence to a uniform architecture.

Implementing the VGG16 architecture

This is simple implementation of the VGG16 model. It does not train the model, but it has some cute functions to visualize the first and last feature maps of the model.

import torch
import torch.nn as nn
import torch.nn.init as init
device = "cuda" if torch.cuda.is_available() else "cpu"
class VGG16(nn.Module):
  """This class implements the VGG-16 architecture in PyTorch"""

  def __init__(self, activation_str="relu"):
    """
      Constructor for the VGG16 class.

      activation_str: string, default "relu"
        Activation function to use.
    """
    super(VGG16, self).__init__()

    self.n_classes = 10
    self.activation_str = activation_str

    self.conv_layer_1 = nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 3, padding = "same")
    self.conv_layer_2 = nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, padding = "same")

    self.conv_layer_3 = nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, padding = "same")
    self.conv_layer_4 = nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, padding = "same")

    self.conv_layer_5 = nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 3, padding = "same")
    self.conv_layer_6 = nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, padding = "same")
    self.conv_layer_7 = nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, padding = "same")

    self.conv_layer_8 = nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 3, padding = "same")
    self.conv_layer_9 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = "same")
    self.conv_layer_10 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = "same")

    self.conv_layer_11 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = "same")
    self.conv_layer_12 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = "same")
    self.conv_layer_13 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = "same")

    # Add 2D batch normalization after every convolutional layer
    self.conv_layer_1_bn  = nn.BatchNorm2d(num_features = 64)
    self.conv_layer_2_bn  = nn.BatchNorm2d(num_features = 64)
    self.conv_layer_3_bn  = nn.BatchNorm2d(num_features = 128)
    self.conv_layer_4_bn  = nn.BatchNorm2d(num_features = 128)
    self.conv_layer_5_bn  = nn.BatchNorm2d(num_features = 256)
    self.conv_layer_6_bn  = nn.BatchNorm2d(num_features = 256)
    self.conv_layer_7_bn  = nn.BatchNorm2d(num_features = 256)
    self.conv_layer_8_bn  = nn.BatchNorm2d(num_features = 512)
    self.conv_layer_9_bn  = nn.BatchNorm2d(num_features = 512)
    self.conv_layer_10_bn = nn.BatchNorm2d(num_features = 512)
    self.conv_layer_11_bn = nn.BatchNorm2d(num_features = 512)
    self.conv_layer_12_bn = nn.BatchNorm2d(num_features = 512)
    self.conv_layer_13_bn = nn.BatchNorm2d(num_features = 512)

    self.max_pool_layer_1 = nn.MaxPool2d(kernel_size=2, stride = 2)
    self.max_pool_layer_2 = nn.MaxPool2d(kernel_size=2, stride = 2)
    self.max_pool_layer_3 = nn.MaxPool2d(kernel_size=2, stride = 2)
    self.max_pool_layer_4 = nn.MaxPool2d(kernel_size=2, stride = 2)
    self.max_pool_layer_5 = nn.MaxPool2d(kernel_size=2, stride = 2)

    self.fc_1 = nn.Linear(in_features = 25088, out_features = 4096)
    self.fc_2 = nn.Linear(in_features = 4096, out_features = 4096)
    self.fc_3 = nn.Linear(in_features = 4096, out_features = self.n_classes)

    # Initialize the weights of each trainable layer of the network using xavier_uniform initialization
    def xavier_init(layer):
      for k,v in layer.named_parameters():
        if k == 'weight':
          init.xavier_uniform_(v)

    xavier_init(self.conv_layer_1)
    xavier_init(self.conv_layer_2)
    xavier_init(self.conv_layer_3)
    xavier_init(self.conv_layer_4)
    xavier_init(self.conv_layer_5)
    xavier_init(self.conv_layer_6)
    xavier_init(self.conv_layer_7)
    xavier_init(self.conv_layer_8)
    xavier_init(self.conv_layer_9)
    xavier_init(self.conv_layer_10)
    xavier_init(self.conv_layer_11)
    xavier_init(self.conv_layer_12)
    xavier_init(self.conv_layer_13)

    xavier_init(self.fc_1)
    xavier_init(self.fc_2)
    xavier_init(self.fc_3)

  def activation(self, input):
    """
      input: Tensor
        Input on which the activation is applied.

      Output: Result of activation function applied on input.
        E.g. if self.activation_str is "relu", return relu(input).
    """
    if self.activation_str == "relu":
      a = nn.ReLU()
      return a(input)
    elif self.activation_str == "tanh":
      a = nn.Tanh()
      return a(input)
    else:
      raise Exception("Invalid activation")
    return 0


  def get_first_conv_layer_filters(self):
    """
      Outputs: Returns the filters in the first convolution layer.
    """
    return self.conv_layer_1.weight.clone().cpu().detach().numpy()

  def get_last_conv_layer_filters(self):
    """
      Outputs: Returns the filters in the last convolution layer.
    """
    return self.conv_layer_13.weight.clone().cpu().detach().numpy()

  def forward(self, x):
    """
      x: Tensor
        Input to the network.

      Outputs: Returns the output of the forward pass of the network.
    """
    x = self.conv_layer_1(x)
    x = self.conv_layer_1_bn(x)
    x = self.activation(x)


    x = self.conv_layer_2(x)
    x = self.conv_layer_2_bn(x)
    x = self.activation(x)

    x = self.max_pool_layer_1(x)

    x = self.conv_layer_3(x)
    x = self.conv_layer_3_bn(x)
    x = self.activation(x)

    x = self.conv_layer_4(x)
    x = self.conv_layer_4_bn(x)
    x = self.activation(x)

    x = self.max_pool_layer_2(x)

    x = self.conv_layer_5(x)
    x = self.conv_layer_5_bn(x)
    x = self.activation(x)

    x = self.conv_layer_6(x)
    x = self.conv_layer_6_bn(x)
    x = self.activation(x)

    x = self.conv_layer_7(x)
    x = self.conv_layer_7_bn(x)
    x = self.activation(x)

    x = self.max_pool_layer_3(x)

    x = self.conv_layer_8(x)
    x = self.conv_layer_8_bn(x)
    x = self.activation(x)

    x = self.conv_layer_9(x)
    x = self.conv_layer_9_bn(x)
    x = self.activation(x)

    x = self.conv_layer_10(x)
    x = self.conv_layer_10_bn(x)
    x = self.activation(x)

    x = self.max_pool_layer_4(x)

    x = self.conv_layer_11(x)
    x = self.conv_layer_11_bn(x)
    x = self.activation(x)

    x = self.conv_layer_12(x)
    x = self.conv_layer_12_bn(x)
    x = self.activation(x)

    x = self.conv_layer_13(x)
    x = self.conv_layer_13_bn(x)
    x = self.activation(x)

    x = self.max_pool_layer_5(x)

    x = torch.flatten(x, start_dim = 1)
    x = self.fc_1(x)
    x = self.activation(x)
    x = self.fc_2(x)
    x = self.activation(x)
    x = self.fc_3(x)
    #x = self.activation(x)
    softmax = nn.Softmax(dim = 1)
    o = softmax(x)

    return o

  def last_conv(self, x):
    """
      x: Tensor
        Input to the network.

      Outputs: Returns the output of the last convolution of the network. Useful
      to visualize the last feature map.
    """
    x = self.conv_layer_1(x)
    x = self.conv_layer_1_bn(x)
    x = self.activation(x)


    x = self.conv_layer_2(x)
    x = self.conv_layer_2_bn(x)
    x = self.activation(x)

    x = self.max_pool_layer_1(x)

    x = self.conv_layer_3(x)
    x = self.conv_layer_3_bn(x)
    x = self.activation(x)

    x = self.conv_layer_4(x)
    x = self.conv_layer_4_bn(x)
    x = self.activation(x)

    x = self.max_pool_layer_2(x)

    x = self.conv_layer_5(x)
    x = self.conv_layer_5_bn(x)
    x = self.activation(x)

    x = self.conv_layer_6(x)
    x = self.conv_layer_6_bn(x)
    x = self.activation(x)

    x = self.conv_layer_7(x)
    x = self.conv_layer_7_bn(x)
    x = self.activation(x)

    x = self.max_pool_layer_3(x)

    x = self.conv_layer_8(x)
    x = self.conv_layer_8_bn(x)
    x = self.activation(x)

    x = self.conv_layer_9(x)
    x = self.conv_layer_9_bn(x)
    x = self.activation(x)

    x = self.conv_layer_10(x)
    x = self.conv_layer_10_bn(x)
    x = self.activation(x)

    x = self.max_pool_layer_4(x)

    x = self.conv_layer_11(x)
    x = self.conv_layer_11_bn(x)
    x = self.activation(x)

    x = self.conv_layer_12(x)
    x = self.conv_layer_12_bn(x)
    x = self.activation(x)

    x = self.conv_layer_13(x)
    x = self.conv_layer_13_bn(x)
    x = self.activation(x)

    x = self.max_pool_layer_5(x)
    return x