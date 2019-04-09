Subscribe to Hacker Noon's best tech stories, delivered at noon
DeepLearning Enthusiast. Data Science Writer @marktechpost.com
to evaluate the matrix multiplication and dot product between two vectors,
numpy
to visualize the data and from
matplotlib
package we are importing functions to generate data and evaluate the network performance.
thesklearn
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
from tqdm import tqdm_notebook
from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import make_blobs
#creating my own color map for better visualization
my_cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["red","yellow","green"])
#Generating 1000 observations with 4 labels - multi class
data, labels = make_blobs(n_samples=1000, centers=4, n_features=2, random_state=0)
print(data.shape, labels.shape)
#visualize the data
plt.scatter(data[:,0], data[:,1], c=labels, cmap=my_cmap)
plt.show()
#converting the multi-class to binary
labels_orig = labels
labels = np.mod(labels_orig, 2)
plt.scatter(data[:,0], data[:,1], c=labels, cmap=my_cmap)
plt.show()
#split the binary data
X_train, X_val, Y_train, Y_val = train_test_split(data, labels, stratify=labels, random_state=0)
print(X_train.shape, X_val.shape)
to generate blobs of points with a Gaussian distribution. I have generated 1000 data points in 2D space with four blobs
make_blobs
as a multi-class classification prediction problem. Each data point has two inputs and 0, 1, 2 or 3 class labels. The code present in Line 9, 10 helps to visualize the data using a scatter plot. We can see that they are 4 centers present and the data is linearly separable (almost).
centers=4
labels_orig = labels
labels = np.mod(labels_orig, 2)
function to split the data for
train_test_split
and
training
in the ratio of 90:10
validation
class SigmoidNeuron:
#intialization
def __init__(self):
self.w = None
self.b = None
#forward pass
def perceptron(self, x):
return np.dot(x, self.w.T) + self.b
def sigmoid(self, x):
return 1.0/(1.0 + np.exp(-x))
#updating the gradients using mean squared error loss
def grad_w_mse(self, x, y):
y_pred = self.sigmoid(self.perceptron(x))
return (y_pred - y) * y_pred * (1 - y_pred) * x
def grad_b_mse(self, x, y):
y_pred = self.sigmoid(self.perceptron(x))
return (y_pred - y) * y_pred * (1 - y_pred)
#updating the gradients using cross entropy loss
def grad_w_ce(self, x, y):
y_pred = self.sigmoid(self.perceptron(x))
if y == 0:
return y_pred * x
elif y == 1:
return -1 * (1 - y_pred) * x
else:
raise ValueError("y should be 0 or 1")
def grad_b_ce(self, x, y):
y_pred = self.sigmoid(self.perceptron(x))
if y == 0:
return y_pred
elif y == 1:
return -1 * (1 - y_pred)
else:
raise ValueError("y should be 0 or 1")
#model fit method
def fit(self, X, Y, epochs=1, learning_rate=1, initialise=True, loss_fn="mse", display_loss=False):
# initialise w, b
if initialise:
self.w = np.random.randn(1, X.shape[1])
self.b = 0
if display_loss:
loss = {}
for i in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
dw = 0
db = 0
for x, y in zip(X, Y):
if loss_fn == "mse":
dw += self.grad_w_mse(x, y)
db += self.grad_b_mse(x, y)
elif loss_fn == "ce":
dw += self.grad_w_ce(x, y)
db += self.grad_b_ce(x, y)
m = X.shape[1]
self.w -= learning_rate * dw/m
self.b -= learning_rate * db/m
if display_loss:
Y_pred = self.sigmoid(self.perceptron(X))
if loss_fn == "mse":
loss[i] = mean_squared_error(Y, Y_pred)
elif loss_fn == "ce":
loss[i] = log_loss(Y, Y_pred)
if display_loss:
plt.plot(loss.values())
plt.xlabel('Epochs')
if loss_fn == "mse":
plt.ylabel('Mean Squared Error')
elif loss_fn == "ce":
plt.ylabel('Log Loss')
plt.show()
def predict(self, X):
Y_pred = []
for x in X:
y_pred = self.sigmoid(self.perceptron(x))
Y_pred.append(y_pred)
return np.array(Y_pred)
we have 9 functions, I will walk you through these functions one by one and explain what they are doing.
SigmoidNeuron
def __init__(self):
self.w = None
self.b = None
function (constructor function) helps to initialize the parameters of sigmoid neuron w weights and b biases to None.
__init__
#forward pass
def perceptron(self, x):
return np.dot(x, self.w.T) + self.b
def sigmoid(self, x):
return 1.0/(1.0 + np.exp(-x))
and
perceptron
which characterizes the forward pass. In case of a sigmoid neuron forward pass involves two steps
sigmoid
— Computes the dot product between the input x & weights w and adds bias b
perceptron
— Takes the output of perceptron and applies the sigmoid (logistic) function on top of it.
sigmoid
#updating the gradients using mean squared error loss
def grad_w_mse(self, x, y):
.....
def grad_b_mse(self, x, y):
.....
#updating the gradients using cross entropy loss
def grad_w_ce(self, x, y):
.....
def grad_b_ce(self, x, y):
.....
def fit(self, X, Y, epochs=1, learning_rate=1, initialise=True, loss_fn="mse", display_loss=False):
.....
return
— Inputs
X
— Labels
Y
— Number of epochs we will allow our algorithm through iterate on the data, default value set to 1
epochs
— The magnitude of change for our weights during each step through our training data, default value set to 1
learning_rate
— To randomly initialize the parameters of the model or not. If it is set to True weights will be initialized, you can set it to False if you want to retrain the trained model.
intialise
— To select the loss function for the algorithm to update the parameters. It can be “mse” or “ce”
loss_fn
— Boolean Variable indicating whether to show the decrease of loss for each epoch
display_loss
method, we go through the data passed through parameters X and Y and compute the update values for the parameters either using mean squared loss or cross entropy loss. Once we the update value we go and update the weights and bias terms (Line 49–62).
fit
def predict(self, X):
as an argument, which it expects to be an
X
array. In the predict function, we will compute the forward pass of each input with the trained model and send back a numpy
numpy
which contains the predicted value of each input data.
array
#create a class object
sn = SigmoidNeuron()
#train the model
sn.fit(X_train, Y_train, epochs=1000, learning_rate=0.5, display_loss=True)
#prediction on training data
Y_pred_train = sn.predict(X_train)
Y_pred_binarised_train = (Y_pred_train >= 0.5).astype("int").ravel()
#prediction on testing data
Y_pred_val = sn.predict(X_val)
Y_pred_binarised_val = (Y_pred_val >= 0.5).astype("int").ravel()
#model accuracy
accuracy_train = accuracy_score(Y_pred_binarised_train, Y_train)
accuracy_val = accuracy_score(Y_pred_binarised_val, Y_val)
print("Training accuracy", round(accuracy_train, 2))
print("Validation accuracy", round(accuracy_val, 2))
#visualizing the results
plt.scatter(X_train[:,0], X_train[:,1], c=Y_pred_binarised_train, cmap=my_cmap, s=15*(np.abs(Y_pred_binarised_train-Y_train)+.2))
plt.show()
method on the training data with 1000 epochs and learning rate set to 1 (These values are arbitrary not the optimal values for this data, you can play around these values and find the best number of epochs and the learning rate). By default, the loss function is set to mean square error loss but you can change it to cross entropy loss as well.
fit
#visualizing the results
plt.scatter(X_train[:,0], X_train[:,1], c=Y_pred_binarised_train, cmap=my_cmap, s=15*(np.abs(Y_pred_binarised_train-Y_train)+.2))
plt.show()
. The function takes two inputs as the first and second features, for the color I have used
matplotlib.pyplot
and defined a custom ‘cmap’ for visualization. As you can see that the size of each point is different in the below plot.
Y_pred_binarised_train
s=15*(np.abs(Y_pred_binarised_train-Y_train)+.2)
class FirstFFNetwork:
#intialize the parameters
def __init__(self):
self.w1 = np.random.randn()
self.w2 = np.random.randn()
self.w3 = np.random.randn()
self.w4 = np.random.randn()
self.w5 = np.random.randn()
self.w6 = np.random.randn()
self.b1 = 0
self.b2 = 0
self.b3 = 0
def sigmoid(self, x):
return 1.0/(1.0 + np.exp(-x))
def forward_pass(self, x):
#forward pass - preactivation and activation
self.x1, self.x2 = x
self.a1 = self.w1*self.x1 + self.w2*self.x2 + self.b1
self.h1 = self.sigmoid(self.a1)
self.a2 = self.w3*self.x1 + self.w4*self.x2 + self.b2
self.h2 = self.sigmoid(self.a2)
self.a3 = self.w5*self.h1 + self.w6*self.h2 + self.b3
self.h3 = self.sigmoid(self.a3)
return self.h3
def grad(self, x, y):
#back propagation
self.forward_pass(x)
self.dw5 = (self.h3-y) * self.h3*(1-self.h3) * self.h1
self.dw6 = (self.h3-y) * self.h3*(1-self.h3) * self.h2
self.db3 = (self.h3-y) * self.h3*(1-self.h3)
self.dw1 = (self.h3-y) * self.h3*(1-self.h3) * self.w5 * self.h1*(1-self.h1) * self.x1
self.dw2 = (self.h3-y) * self.h3*(1-self.h3) * self.w5 * self.h1*(1-self.h1) * self.x2
self.db1 = (self.h3-y) * self.h3*(1-self.h3) * self.w5 * self.h1*(1-self.h1)
self.dw3 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2) * self.x1
self.dw4 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2) * self.x2
self.db2 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2)
def fit(self, X, Y, epochs=1, learning_rate=1, initialise=True, display_loss=False):
# initialise w, b
if initialise:
self.w1 = np.random.randn()
self.w2 = np.random.randn()
self.w3 = np.random.randn()
self.w4 = np.random.randn()
self.w5 = np.random.randn()
self.w6 = np.random.randn()
self.b1 = 0
self.b2 = 0
self.b3 = 0
if display_loss:
loss = {}
for i in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
dw1, dw2, dw3, dw4, dw5, dw6, db1, db2, db3 = [0]*9
for x, y in zip(X, Y):
self.grad(x, y)
dw1 += self.dw1
dw2 += self.dw2
dw3 += self.dw3
dw4 += self.dw4
dw5 += self.dw5
dw6 += self.dw6
db1 += self.db1
db2 += self.db2
db3 += self.db3
m = X.shape[1]
self.w1 -= learning_rate * dw1 / m
self.w2 -= learning_rate * dw2 / m
self.w3 -= learning_rate * dw3 / m
self.w4 -= learning_rate * dw4 / m
self.w5 -= learning_rate * dw5 / m
self.w6 -= learning_rate * dw6 / m
self.b1 -= learning_rate * db1 / m
self.b2 -= learning_rate * db2 / m
self.b3 -= learning_rate * db3 / m
if display_loss:
Y_pred = self.predict(X)
loss[i] = mean_squared_error(Y_pred, Y)
if display_loss:
plt.plot(loss.values())
plt.xlabel('Epochs')
plt.ylabel('Mean Squared Error')
plt.show()
def predict(self, X):
#predicting the results on unseen data
Y_pred = []
for x in X:
y_pred = self.forward_pass(x)
Y_pred.append(y_pred)
return np.array(Y_pred)
have 6 functions, we will go over these functions one by one.
FirstFFNetworkwe
def __init__(self):
.....
function initializes all the parameters of the network including weights and biases. Unlike the sigmoid neuron where we have only two parameters in the neural network, we have 9 parameters to be initialized. All the 6 weights are initialized randomly and 3 biases are set to zero.
__init__
def sigmoid(self, x):
return 1.0/(1.0 + np.exp(-x))
def forward_pass(self, x):
#forward pass - preactivation and activation
self.x1, self.x2 = x
self.a1 = self.w1*self.x1 + self.w2*self.x2 + self.b1
self.h1 = self.sigmoid(self.a1)
self.a2 = self.w3*self.x1 + self.w4*self.x2 + self.b2
self.h2 = self.sigmoid(self.a2)
self.a3 = self.w5*self.h1 + self.w6*self.h2 + self.b3
self.h3 = self.sigmoid(self.a3)
return self.h3
Pre-activation represented by ‘a’: It is a weighted sum of inputs plus the bias.
Activation represented by ‘h’: Activation function is Sigmoid function.
a₁ = w₁ * x₁ + w₂ * x₂ + b₁
h₁ = sigmoid(a₁)
a₃ = w₅ * h₁ + w₆ * h₂ + b₃
def grad(self, x, y):
#back propagation
......
function which takes inputs x and y as arguments and computes the forward pass. Based on the forward pass it computes the partial derivates of these weights with respect to the loss function, which is mean squared error loss in this case.
grad
Note: In this post, I am not explaining how do we arrive at these partial derivatives for the parameters. Just consider this function as a black box for now, in my next article I will explain how do we compute these partial derivatives in backpropagation.
def fit(self, X, Y, epochs=1, learning_rate=1, initialise=True, display_loss=False):
......
function similar to the sigmoid neuron. In this function, we iterate through each data point, compute the partial derivates by calling the
fit
function and store those values in a new variable for each parameter (Line 63–75). Then, we go ahead and update the values of all the parameters (Line 77–87). We also have the
grad
condition, if set to
display_loss
it will display the plot of network loss variation across all the epochs.
True
def predict(self, X):
#predicting the results on unseen data
.....
ffn = FirstFFNetwork()
#train the model on the data
ffn.fit(X_train, Y_train, epochs=2000, learning_rate=.01, display_loss=True)
#predictions
Y_pred_train = ffn.predict(X_train)
Y_pred_binarised_train = (Y_pred_train >= 0.5).astype("int").ravel()
Y_pred_val = ffn.predict(X_val)
Y_pred_binarised_val = (Y_pred_val >= 0.5).astype("int").ravel()
accuracy_train = accuracy_score(Y_pred_binarised_train, Y_train)
accuracy_val = accuracy_score(Y_pred_binarised_val, Y_val)
#model performance
print("Training accuracy", round(accuracy_train, 2))
print("Validation accuracy", round(accuracy_val, 2))
#visualize the predictions
plt.scatter(X_train[:,0], X_train[:,1], c=Y_pred_binarised_train, cmap=my_cmap, s=15*(np.abs(Y_pred_binarised_train-Y_train)+.2))
plt.show()
#visualize the predictions
plt.scatter(X_train[:,0], X_train[:,1], c=Y_pred_binarised_train, cmap=my_cmap, s=15*(np.abs(Y_pred_binarised_train-Y_train)+.2))
plt.show()
Note: In this case, I am considering the network for binary classification only.
W(Layer number)(Neuron number in the layer)(Input number)
b(Layer number)(Bias number associated for that input)
a(Layer number) (Input number)
class FFSNNetwork:
def __init__(self, n_inputs, hidden_sizes=[2]):
#intialize the inputs
self.nx = n_inputs
self.ny = 1
self.nh = len(hidden_sizes)
self.sizes = [self.nx] + hidden_sizes + [self.ny]
self.W = {}
self.B = {}
for i in range(self.nh+1):
self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
self.B[i+1] = np.zeros((1, self.sizes[i+1]))
def sigmoid(self, x):
return 1.0/(1.0 + np.exp(-x))
def forward_pass(self, x):
self.A = {}
self.H = {}
self.H[0] = x.reshape(1, -1)
for i in range(self.nh+1):
self.A[i+1] = np.matmul(self.H[i], self.W[i+1]) + self.B[i+1]
self.H[i+1] = self.sigmoid(self.A[i+1])
return self.H[self.nh+1]
def grad_sigmoid(self, x):
return x*(1-x)
def grad(self, x, y):
self.forward_pass(x)
self.dW = {}
self.dB = {}
self.dH = {}
self.dA = {}
L = self.nh + 1
self.dA[L] = (self.H[L] - y)
for k in range(L, 0, -1):
self.dW[k] = np.matmul(self.H[k-1].T, self.dA[k])
self.dB[k] = self.dA[k]
self.dH[k-1] = np.matmul(self.dA[k], self.W[k].T)
self.dA[k-1] = np.multiply(self.dH[k-1], self.grad_sigmoid(self.H[k-1]))
def fit(self, X, Y, epochs=1, learning_rate=1, initialise=True, display_loss=False):
# initialise w, b
if initialise:
for i in range(self.nh+1):
self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
self.B[i+1] = np.zeros((1, self.sizes[i+1]))
if display_loss:
loss = {}
for e in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
dW = {}
dB = {}
for i in range(self.nh+1):
dW[i+1] = np.zeros((self.sizes[i], self.sizes[i+1]))
dB[i+1] = np.zeros((1, self.sizes[i+1]))
for x, y in zip(X, Y):
self.grad(x, y)
for i in range(self.nh+1):
dW[i+1] += self.dW[i+1]
dB[i+1] += self.dB[i+1]
m = X.shape[1]
for i in range(self.nh+1):
self.W[i+1] -= learning_rate * dW[i+1] / m
self.B[i+1] -= learning_rate * dB[i+1] / m
if display_loss:
Y_pred = self.predict(X)
loss[e] = mean_squared_error(Y_pred, Y)
if display_loss:
plt.plot(loss.values())
plt.xlabel('Epochs')
plt.ylabel('Mean Squared Error')
plt.show()
def predict(self, X):
Y_pred = []
for x in X:
y_pred = self.forward_pass(x)
Y_pred.append(y_pred)
return np.array(Y_pred).squeeze()
def __init__(self, n_inputs, hidden_sizes=[2]):
#intialize the inputs
self.nx = n_inputs
self.ny = 1 #one final neuron for binary classification.
self.nh = len(hidden_sizes)
self.sizes = [self.nx] + hidden_sizes + [self.ny]
.....
function takes a few arguments,
__init__
— Number of inputs going into the network.
n_inputs
— Expects a list of integers, represents the number of neurons present in the hidden layer.
hidden_sizes
def forward_pass(self, x):
self.A = {}
self.H = {}
self.H[0] = x.reshape(1, -1)
....
function, we have initialized two dictionaries A and H and instead of representing the inputs as X I am representing it as H₀ so that we can save that in the post-activation dictionary H. Then, we will loop through all the layers and compute the pre-activation & post-activation values and store them in their respective dictionaries. The pre-activation output of the final layer is the same as the predicted value of our network. The function will return this value outside. So that we can use this value to calculate the loss of the neuron.
forward_pass
def grad_sigmoid(self, x):
return x*(1-x)
def grad(self, x, y):
self.forward_pass(x)
.....
def fit(self, X, Y, epochs=1, learning_rate=1, initialise=True, display_loss=False):
# initialise w, b
if initialise:
for i in range(self.nh+1):
self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
self.B[i+1] = np.zeros((1, self.sizes[i+1]))
function which is essentially the same but in here we loop through each of the input and update the weights and biases in generalized fashion rather than updating the individual parameter.
fit
def predict(self, X):
#predicting the results on unseen data
.....
function on each of the input.
forward_pass
FFSNetwork
and then call the
Class
method on the training data with 2000 epochs and learning rate set to 0.01.
fit
#train the network with two hidden layers - 2 neurons and 3 neurons
ffsnn = FFSNNetwork(2, [2, 3])
ffsnn.fit(X_train, Y_train, epochs=1000, learning_rate=.001, display_loss=True)
Y_pred_train = ffsnn.predict(X_train)
Y_pred_binarised_train = (Y_pred_train >= 0.5).astype("int").ravel()
Y_pred_val = ffsnn.predict(X_val)
Y_pred_binarised_val = (Y_pred_val >= 0.5).astype("int").ravel()
accuracy_train = accuracy_score(Y_pred_binarised_train, Y_train)
accuracy_val = accuracy_score(Y_pred_binarised_val, Y_val)
print("Training accuracy", round(accuracy_train, 2))
print("Validation accuracy", round(accuracy_val, 2))
#visualize the results
plt.scatter(X_train[:,0], X_train[:,1], c=Y_pred_binarised_train, cmap=my_cmap, s=15*(np.abs(Y_pred_binarised_train-Y_train)+.2))
plt.show()
#visualize the predictions
plt.scatter(X_train[:,0], X_train[:,1], c=Y_pred_binarised_train, cmap=my_cmap, s=15*(np.abs(Y_pred_binarised_train-Y_train)+.2))
plt.show()
#remember that we have label_org with four classes.
#split that data into train and val
X_train, X_val, Y_train, Y_val = train_test_split(data, labels_orig, stratify=labels_orig, random_state=0)
print(X_train.shape, X_val.shape, labels_orig.shape)
#one hot encoder
enc = OneHotEncoder()
# 0 -> (1, 0, 0, 0), 1 -> (0, 1, 0, 0), 2 -> (0, 0, 1, 0), 3 -> (0, 0, 0, 1)
y_OH_train = enc.fit_transform(np.expand_dims(Y_train,1)).toarray()
y_OH_val = enc.fit_transform(np.expand_dims(Y_val,1)).toarray()
print(y_OH_train.shape, y_OH_val.shape)
on training and validation labels.
sklearn.OneHotEncoder
class FFSN_MultiClass:
def __init__(self, n_inputs, n_outputs, hidden_sizes=[3]):
self.nx = n_inputs
self.ny = n_outputs
self.nh = len(hidden_sizes)
self.sizes = [self.nx] + hidden_sizes + [self.ny]
self.W = {}
self.B = {}
for i in range(self.nh+1):
self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
self.B[i+1] = np.zeros((1, self.sizes[i+1]))
def sigmoid(self, x):
return 1.0/(1.0 + np.exp(-x))
def softmax(self, x):
exps = np.exp(x)
return exps / np.sum(exps)
def forward_pass(self, x):
self.A = {}
self.H = {}
self.H[0] = x.reshape(1, -1)
for i in range(self.nh):
self.A[i+1] = np.matmul(self.H[i], self.W[i+1]) + self.B[i+1]
self.H[i+1] = self.sigmoid(self.A[i+1])
self.A[self.nh+1] = np.matmul(self.H[self.nh], self.W[self.nh+1]) + self.B[self.nh+1]
self.H[self.nh+1] = self.softmax(self.A[self.nh+1])
return self.H[self.nh+1]
def predict(self, X):
Y_pred = []
for x in X:
y_pred = self.forward_pass(x)
Y_pred.append(y_pred)
return np.array(Y_pred).squeeze()
def grad_sigmoid(self, x):
return x*(1-x)
def cross_entropy(self,label,pred):
yl=np.multiply(pred,label)
yl=yl[yl!=0]
yl=-np.log(yl)
yl=np.mean(yl)
return yl
def grad(self, x, y):
self.forward_pass(x)
self.dW = {}
self.dB = {}
self.dH = {}
self.dA = {}
L = self.nh + 1
self.dA[L] = (self.H[L] - y)
for k in range(L, 0, -1):
self.dW[k] = np.matmul(self.H[k-1].T, self.dA[k])
self.dB[k] = self.dA[k]
self.dH[k-1] = np.matmul(self.dA[k], self.W[k].T)
self.dA[k-1] = np.multiply(self.dH[k-1], self.grad_sigmoid(self.H[k-1]))
def fit(self, X, Y, epochs=100, initialize='True', learning_rate=0.01, display_loss=False):
if display_loss:
loss = {}
if initialize:
for i in range(self.nh+1):
self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
self.B[i+1] = np.zeros((1, self.sizes[i+1]))
for epoch in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
dW = {}
dB = {}
for i in range(self.nh+1):
dW[i+1] = np.zeros((self.sizes[i], self.sizes[i+1]))
dB[i+1] = np.zeros((1, self.sizes[i+1]))
for x, y in zip(X, Y):
self.grad(x, y)
for i in range(self.nh+1):
dW[i+1] += self.dW[i+1]
dB[i+1] += self.dB[i+1]
m = X.shape[1]
for i in range(self.nh+1):
self.W[i+1] -= learning_rate * (dW[i+1]/m)
self.B[i+1] -= learning_rate * (dB[i+1]/m)
if display_loss:
Y_pred = self.predict(X)
loss[epoch] = self.cross_entropy(Y, Y_pred)
if display_loss:
plt.plot(loss.values())
plt.xlabel('Epochs')
plt.ylabel('CE')
plt.show()
function,
forward_pass
def forward_pass(self, x):
self.A = {}
self.H = {}
self.H[0] = x.reshape(1, -1)
for i in range(self.nh):
self.A[i+1] = np.matmul(self.H[i], self.W[i+1]) + self.B[i+1]
self.H[i+1] = self.sigmoid(self.A[i+1])
self.A[self.nh+1] = np.matmul(self.H[self.nh], self.W[self.nh+1]) + self.B[self.nh+1]
self.H[self.nh+1] = self.softmax(self.A[self.nh+1])
return self.H[self.nh+1]
def cross_entropy(self,label,pred):
yl=np.multiply(pred,label)
yl=yl[yl!=0]
yl=-np.log(yl)
yl=np.mean(yl)
return yl
#train the network
ffsn_multi = FFSN_MultiClass(2,4,[2,3])
ffsn_multi.fit(X_train,y_OH_train,epochs=2000,learning_rate=.005,display_loss=True)
Y_pred_train = ffsn_multi.predict(X_train)
Y_pred_train = np.argmax(Y_pred_train,1)
Y_pred_val = ffsn_multi.predict(X_val)
Y_pred_val = np.argmax(Y_pred_val,1)
accuracy_train = accuracy_score(Y_pred_train, Y_train)
accuracy_val = accuracy_score(Y_pred_val, Y_val)
print("Training accuracy", round(accuracy_train, 2))
print("Validation accuracy", round(accuracy_val, 2))
#visualize
plt.scatter(X_train[:,0], X_train[:,1], c=Y_pred_train, cmap=my_cmap, s=15*(np.abs(np.sign(Y_pred_train-Y_train))+.1))
plt.show()
function to get the label with the highest probability. Using that label we can plot our 4D graph and compare it with the actual input data scatter plot.
argmax
LEARN BY CODING
function to generate toy data and we have seen that
make_blobs
generate linearly separable data. If you want to generate some complex non-linearly separable data to train your feedforward neural network, you can use
make_blobs
function from
make_moons
package.
sklearn