Convolution Neural Network from Scratch

This is a 2 part series. In this part we will understand the forward pass in a Convolution neural network .(Avoiding detailed explanation as there are comments in code).

Richa Kaur

Nov 8, 2020 6 min read Computer Vision

Image credit: [sourced from google]

Import Libraries & Data

import numpy as np
import matplotlib.pyplot as plt

import skimage.data  
# Reading the image  
img = skimage.data.chelsea()  
# Converting the image into gray.  
#img = skimage.color.rgb2gray(img)
print(img.shape)
plt.imshow(img)

(300, 451, 3)





<matplotlib.image.AxesImage at 0x1f452f43088>

png

Architecture

Funcions

zero padding
colvolution operation
forward pass in convolution
pooling operation forward
backward pass in colvolution
pooling operation backward

Zero padding

def zero_pad(X, pad):
    """
    Pad with zeros all images of the dataset X. The padding is applied to the height and width of an image, 
    as illustrated in Figure 1.
    
    Argument:
    X -- python numpy array of shape (m, n_H, n_W, n_C) representing a batch of m images
    pad -- integer, amount of padding around each image on vertical and horizontal dimensions
    
    Returns:
    X_pad -- padded image of shape (m, n_H + 2*pad, n_W + 2*pad, n_C)
    """
    X_pad= np.pad(X,((0,0),(pad,pad),(pad,pad),(0,0)),mode="constant",constant_values=(0,0))
    
    return X_pad

x=np.expand_dims(img, axis=0)
x_pad=zero_pad(x,20)
print(x.shape,x_pad.shape)
fig,axs=plt.subplots(1,2)

axs[0].imshow(x[0,:,:,:])
axs[0].set_title('X')
axs[1].imshow(x_pad[0,:,:,:])
axs[1].set_title('X_pad')

(1, 300, 451, 3) (1, 340, 491, 3)





Text(0.5, 1.0, 'X_pad')

png

Convolution operation

def conv_single_step(a_slice_prev, W, b):
    """
    Apply one filter defined by parameters W on a single slice (a_slice_prev) of the output activation 
    of the previous layer.
    
    Arguments:
    a_slice_prev -- slice of input data of shape (f, f, n_C_prev)
    W -- Weight parameters contained in a window - matrix of shape (f, f, n_C_prev)
    b -- Bias parameters contained in a window - matrix of shape (1, 1, 1)
    
    Returns:
    Z -- a scalar value, the result of convolving the sliding window (W, b) on a slice x of the input data
    """
    s=np.multiply(a_slice_prev,W)
    
    Z=np.sum(s)
    
    Z=Z+np.float(b)
    
    return Z

l1_filter=np.zeros((3,3,3))
l1_filter[:, :,0] = np.array([[[-1, 0, 1],   
                                    [-1, 0, 1],   
                                     [-1, 0, 1]]])  
l1_filter[:, :, 1] = np.array([[[1,   1,  1],   
                                     [0,   0,  0],   
                                    [-1, -1, -1]]]) 
l1_filter[:, :, 2] = np.array([[[1,   1,  1],   
                                     [0,   0,  0],   
                                    [-1, -1, -1]]])

#for a slice
np.random.seed(1)
a_slice_prev = img[20:23,20:23,:]
plt.imshow(img[20:23,20:23,:])
W = l1_filter
b = np.random.randn(1, 1, 1)

Z = conv_single_step(a_slice_prev, W, b)
print("Z =", Z)

Z = -19.37565463633676

png

Convolution forward operation

def conv_forward(A_prev, W, b, hparameters):
    """
    Implements the forward propagation for a convolution function
    
    Arguments:
    A_prev -- output activations of the previous layer, 
        numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    W -- Weights, numpy array of shape (f, f, n_C_prev, n_C)
    b -- Biases, numpy array of shape (1, 1, 1, n_C)
    hparameters -- python dictionary containing "stride" and "pad"
        
    Returns:
    Z -- conv output, numpy array of shape (m, n_H, n_W, n_C)
    cache -- cache of values needed for the conv_backward() function
    """
    m, n_H_prev, n_W_prev, n_C_prev =A_prev.shape
    f, f, n_C_prev, n_C =W.shape
    stride=hparameters["stride"]
    pad =hparameters["pad"]
    
    #step 1 calculate final n_H , n_w post convolution operation
    n_H = 1+int((n_H_prev+2*pad-f)/stride)
    n_W = 1+int((n_W_prev+2*pad-f)/stride)
    
    #step 2 pad the image
    A_prev_pad=zero_pad(A_prev, pad)
    
    # Initialize the output volume Z with zeros. (≈1 line)
    Z = np.zeros((m,n_H,n_W,n_C))
    A=np.zeros((m,n_H,n_W,n_C))
    
    #step 3 generate sub parts of image for convolution operation for all m training examples
    for i in range(m):
        a_prev_pad=A_prev_pad[i,:,:,:] #get the ith image from m images
        
        for h in range(int(n_H)): #loop over vertical axis of image
            vert_start = h*stride
            vert_end = vert_start+f
            
            for  w in range(int(n_W)):# loop over horizontal axis
                horiz_start = w*stride
                horiz_end = horiz_start+f
                
                for c in range(n_C):# loop over the new no of channels(#filters)
                    #for all the channel in previous image
                    a_slice_prev = a_prev_pad[vert_start:vert_end,horiz_start:horiz_end,0:n_C_prev]
                    
                    weights = W[:,:,:,c] #get the value of filter and bias for a particular filter
                    bias =b[:,:,:,c]
                    Z[i,h,w,c]=conv_single_step(a_slice_prev,weights,bias)
                    A[i,h,w,c] =np.where(Z[i,h,w,c]>0,Z[i,h,w,c],0)
                    
        assert(Z.shape==(m,n_H,n_W,n_C))
        
        cache=(A_prev,W,b,hparameters)
        
        return A,cache

#for the entire image
filter_stack = np.zeros((3,3,3,3))
for i in range(3):
    filter_stack[:,:,:,i]=l1_filter
filter_stack  

A_prev = np.expand_dims(img, axis=0) #10examples, each image of height=5,width=7,no_of channels =4
W = filter_stack#filter of height=3,width=3,no_of channel is same as iamge=4,no of filters=3
b = np.random.randn(1,1,1,3)#bias of size h=1,w=1,depht/no of channel=1,for each filter so 8
hparameters = {"pad" : 1,
               "stride": 2}

A, cache_conv = conv_forward(A_prev, W, b, hparameters)
plt.imshow(Z[0,:,:,:])

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).





<matplotlib.image.AxesImage at 0x1f4536ad1c8>

png

CONV layer should also contain an activation, in which case we would add the following line of code:

Convolve the window to get back one output neuron Z[i, h, w, c] = …
Apply activation A[i, h, w, c] = activation(Z[i, h, w, c])

Pooling forward

def pool_forward(A_prev, hparameters, mode = "max"):
    """
    Implements the forward pass of the pooling layer
    
    Arguments:
    A_prev -- Input data, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    hparameters -- python dictionary containing "f" and "stride"
    mode -- the pooling mode you would like to use, defined as a string ("max" or "average")
    
    Returns:
    A -- output of the pool layer, a numpy array of shape (m, n_H, n_W, n_C)
    cache -- cache used in the backward pass of the pooling layer, contains the input and hparameters 
    """
    
    m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape
    stride=hparameters["stride"]
    f =hparameters["f"]
    
    #step 1 calculate final n_H , n_w post convolution operation
    n_H = int(1 + (n_H_prev - f) / stride)
    n_W = int(1 + (n_W_prev - f) / stride)
    n_C = n_C_prev
    
    
    # Initialize the output volume Z with zeros. (≈1 line)
    A = np.zeros((m,n_H,n_W,n_C))
    
    #step 3 generate sub parts of image for convolution operation for all m training examples
    for i in range(m):
        a_prev=A_prev[i,:,:,:] #get the ith image from m images
        
        for h in range(int(n_H)): #loop over vertical axis of image
            vert_start = h*stride
            vert_end = vert_start+f
            
            for  w in range(int(n_W)):# loop over horizontal axis
                horiz_start = w*stride
                horiz_end = horiz_start+f
                
                for c in range(n_C):# loop over the new no of channels(#filters)
                    #for all the channel in previous image
                    a_slice_prev = a_prev[vert_start:vert_end,horiz_start:horiz_end,0:n_C_prev]
                    
                    # Compute the pooling operation on the slice. 
                    # Use an if statement to differentiate the modes. 
                    # Use np.max and np.mean.
                    if mode == "max":
                        A[i, h, w, c] = np.max(a_slice_prev )
                    elif mode == "average":
                        A[i, h, w, c] = np.mean(a_slice_prev )
                        
                    
        assert(A.shape==(m,n_H,n_W,n_C))
        
        cache=(A_prev,hparameters)
        
        return A,cache

#stride of 1
np.random.seed(1)
A_prev = A
hparameters = {"stride" : 1, "f": 3}

fig,axs=plt.subplots(1,2)

A, cache = pool_forward(A_prev, hparameters)
print("mode = max")
#print("A.shape = " + str(A.shape))
#print("A =\n", A)
print()
print(A.shape)

axs[0].imshow(A_prev[0,:,:,:])
axs[0].set_title('A prev')
axs[1].imshow(A[0,:,:,:])
axs[1].set_title('max pool')

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


mode = max

(1, 148, 224, 3)





Text(0.5, 1.0, 'max pool')

png


A, cache = pool_forward(A_prev, hparameters, mode = "average")
print("mode = average")
#print("A.shape = " + str(A.shape))
#print("A =\n", A)

fig,axs=plt.subplots(1,2)
axs[0].imshow(A_prev[0,:,:,:])
axs[0].set_title('A prev')
axs[1].imshow(A[0,:,:,:])
axs[1].set_title('average pool')

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


mode = average





Text(0.5, 1.0, 'average pool')

png

#stride of 2
np.random.seed(1)

hparameters = {"stride" : 2, "f": 3}

fig,axs=plt.subplots(1,2)

A, cache = pool_forward(A_prev, hparameters)
print("mode = max")
#print("A.shape = " + str(A.shape))
#print("A =\n", A)
print()

axs[0].imshow(A_prev[0,:,:,:])
axs[0].set_title('A prev')
axs[1].imshow(A[0,:,:,:])
axs[1].set_title('max pool')

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


mode = max






Text(0.5, 1.0, 'max pool')

png

Put it all together (CNN)

we will see this in the second part of the post .

jupyter Computer Vision

Richa Kaur

Business Analyst

Business Analyst by Day , Machine Learning hobbyist by night

Convolution Neural Network from Scratch

Import Libraries & Data

Architecture

Funcions

Zero padding

Convolution operation

Convolution forward operation

Pooling forward

Put it all together (CNN)

Richa Kaur

Business Analyst

Related