TensorFlow?
Tensors are represented as n-dimensional arrays of base datatypes.
- The
shape
of a Tensor: its number of dimensions and the size of each dimension - The
rank
of a Tensor: the number of dimensions, Tensor's order or degree.
import tensorflow as tf
import mitdeeplearning as mdl
import numpy as np
import matplotlib.pyplot as plt
sport = tf.constant("Tennis", tf.string) # 0-d
numbers = tf.constant([3.141, 1.414, 2.718], tf.float64) # 1-d
matrix = tf.constnat([[1, 2, 3], [4, 5, 6]], tf.float64) # 2-d
images = tf.zeros([10, 256, 256, 3]) # 4-d
assert tf.rank(images).numpy() == 4
assert tf.shape(images).numpy().tolist() == [10, 256, 256, 3]
# slicing to access subtensors within a higher-rank Tensor
row_vector = matrix[1] # [4. 5. 6.]
column_vector = matrix[:,2] # [3. 6.]
scalar = matrix[1, 2] # 6.0
Computations on Tensors
a = tf.constant(15)
b = tf.constant(61)
c1 = tf.add(a, b)
c2 = a + b # tf provides overrided "+" operation
print(c1, c2)
def func(a, b):
c = tf.add(a, b)
d = tf.subtract(b, 1)
e = tf.multiply(c, d)
return e
a, b = 1.5, 2.5
e_out = func(a, b)
print(e_out) # a single scalar value
Neural networks
$$ \begin{aligned} y &= \sigma(Wx + b)\ W &: \text{a matrix of weights}\ b &: \text{a bias}\ x &: \text{input}\ \sigma &: \text{sigmoid activation function}\ y &: \text{output} \end{aligned} $$
Tensors can flow through abstract types called Layer
, the building blocks of neural networks.
Implement:
- Common neural netowrk operations
- Update weights
- Compute losses
- inter-layer connectivity
class OutDenseLayer(Tf.keras.layers.Layer):
def __init__(self, n_output_nodes):
super(OurDenseLayer, self).__init__()
self.n_output_nodes = n_output_nodes
def build(self, input_shape):
d = int(input_shape[-1])
self.W = self.add_weight("weight", shape=[d, self.n_output_nodes])
self.b = self.add_weight("bias", shape=[1, self.n_output_nodes])
def call(self, x):
z = tf.add(tf.matmul(x, self.W), self.b)
y = tf.sigmoid(z)
return y
# layer parameters are initialized randomly
# seed it to validation
tf.random.set_seed(1)
layer = OurDenseLayer(3)
layer.build((1, 2))
x_input = tf.constnat([[1, 2.]], shape=(1,2))
y = layer.call(x_input)
print(y.numpy())
mdl.test_custom_dense_layer_output(y)
Define a number of Layers
using a Dense
. Use Sequential
model from Keras to create a single Dense
layer to define the network.
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
n_output_nodes = 3
model = Sequential()
dense_layer = tf.keras.layers.Dense(n_output_nodes, activation='sigmoid')
model.add(dense_layer)
x_input = tf.constant([[1, 2.]], shape=(1,2))
model_output = model(x_input).numpy()
print(model_output)
By subclassing the Model
class, we can define the custom neural networks.
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
class SubclassModel(tf.keras.Model):
def __init__(self, n_output_nodes):
super(SubclassModel, self).__init__()
self.dense_layer = Dense(n_output_nodes, activation='sigmoid')
def call(self, inputs):
return self.dense_layer(inputs)
n_output_nodes = 3
model = SubclassModel(n_output_nodes)
x_input = tf.constant([1,2.], shape=(1,2))
print(model.call(x_input))
# Suppose under some instances we want out network
# to simply output the input, without any perturbation.
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
class IdentityModel(tf.keras.Model):
def __init__(self, n_output_nodes):
super(IdentityModel, self) .__init__()
self.dense_layer = Dense(n_output_nodes, activation='sigmoid')
def call(self, inputs, isidentity=False):
x = self.dense_layer(inputs)
if isidentity:
return inputs
return x
n_output_nodes = 3
model = IdentityModel(n_output_nodes)
x_input = tf.constant([1,2.], shape=(1,2))
out_activate = model.call(x_input, False)
out_identity = model.call(x_input, True)
Automatic differentiation
Uses tf.GradientTap
to trace operations for computing gradients.
All forward-pass operations get recorded to a tape. The tape played backwards when it compute the gradient. It discarded after the computation. If computing multiple gardients needed, use persistent
gradient tape.
Define $y = x^2$ and compute the gradient:
x = tf.Variable(3.0)
with tf.GradientTape() as tape:
y = x * x
dy_dx = tape.gradient(y, x)
assert dy_dx.numpy() == 6.0
In training neural networks, we use differentiation and stochastic gradient descent (SGD) to optimize a loss function.
Use automatic differentiation and SGD to find the minimum of the function below. We know that $x_{min} = x_f$ but uses GradientTape
to solve the problem.
$$ L = (x-x_f)^2 \ \begin{aligned} \x_f : &\text{ a variable for a desired value} \ &\text{ we are trying to optimize for}; \ L : &\text{ a loss that we are trying to minimize} \end{aligned} $$
x = tf.Variable([tf.random.normal([1])])
print("Initializing x = {}".format(x.numpy()))
learning_rate = 1e-2
history = []
x_f = 4
# We will run SGD for a number of iterations. At each iteration, we compute the loss,
# compute the derivative of the loss with respect to x, and perform the SGD update.
for i in range(500):
with tf.GradientTape() as tape:
loss = tf.pow(x - x_f, 2)
# answer: (x - x_f)**2
# loss minimization using gradient tape
grad = tape.gradient(loss, x) # compute the derivative of the loss with respect to x
new_x = x - learning_rate*grad # sgd update
x.assign(new_x) # update the value of x
history.append(x.numpy()[0])
plt.plot(history)
plt.plot([0, 500], [x_f, x_f])
plt.legend(('Predicted', 'True'))
plt.xlabel('Iteration')
plt.ylabel('x value')
GradientTape
provides an extremely flexible framework for automatic differentiation. In order to back propagate errors through a neural network, we track forward passes on the Tape, use this information to determine the gradients, and then use these gradients for optimization using SGD.