Tensorflow

[1]:
%matplotlib inline
[2]:
import warnings
warnings.simplefilter('ignore', RuntimeWarning)
[3]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
[4]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
[5]:
import tensorflow as tf
[6]:
import tensorflow_probability as tfp
tfd = tfp.distributions

Working with tensors

Almost exactly like numpy arrays.

[7]:
tf.constant([1., 2., 3.])
[7]:
<tf.Tensor: shape=(3,), dtype=float32, numpy=array([1., 2., 3.], dtype=float32)>
[8]:
x = tf.Variable([[1.,2.,3.], [4.,5.,6.]])
[9]:
x.shape
[9]:
TensorShape([2, 3])
[10]:
x.dtype
[10]:
tf.float32

Conversin to numpy

[11]:
x.numpy()
[11]:
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)

Indexing

[12]:
x[:, :2]
[12]:
<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[1., 2.],
       [4., 5.]], dtype=float32)>

Assignment

[13]:
x[0,:].assign([3.,2.,1.])
[13]:
<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[3., 2., 1.],
       [4., 5., 6.]], dtype=float32)>
[14]:
x
[14]:
<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[3., 2., 1.],
       [4., 5., 6.]], dtype=float32)>

Reductions

[15]:
tf.reduce_mean(x, axis=0)
[15]:
<tf.Tensor: shape=(3,), dtype=float32, numpy=array([3.5, 3.5, 3.5], dtype=float32)>
[16]:
tf.reduce_sum(x, axis=1)
[16]:
<tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 6., 15.], dtype=float32)>

Broadcasting

[17]:
x + 10
[17]:
<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[13., 12., 11.],
       [14., 15., 16.]], dtype=float32)>
[18]:
x * 10
[18]:
<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[30., 20., 10.],
       [40., 50., 60.]], dtype=float32)>
[19]:
x - tf.reduce_mean(x, axis=1)[:, tf.newaxis]
[19]:
<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[ 1.,  0., -1.],
       [-1.,  0.,  1.]], dtype=float32)>

Matrix operations

[20]:
x @ tf.transpose(x)
[20]:
<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[14., 28.],
       [28., 77.]], dtype=float32)>

Ufuncs

[21]:
tf.exp(x)
[21]:
<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[ 20.085537 ,   7.389056 ,   2.7182817],
       [ 54.59815  , 148.41316  , 403.4288   ]], dtype=float32)>
[22]:
tf.sqrt(x)
[22]:
<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1.7320508, 1.4142135, 1.       ],
       [2.       , 2.236068 , 2.4494898]], dtype=float32)>

Random numbers

[23]:
X = tf.random.normal(shape=(10,4))
y = tf.random.normal(shape=(10,1))

Linear algebra

[24]:
tf.linalg.lstsq(X, y)
[24]:
<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[ 0.02415883],
       [ 0.17781891],
       [-0.20745884],
       [-0.30115527]], dtype=float32)>

Vectorization

[25]:
X = tf.random.normal(shape=(1000,10,4))
y = tf.random.normal(shape=(1000,10,1))
[26]:
tf.linalg.lstsq(X, y)
[26]:
<tf.Tensor: shape=(1000, 4, 1), dtype=float32, numpy=
array([[[ 0.19148365],
        [-0.51689374],
        [-0.17029501],
        [ 0.484788  ]],

       [[ 0.7028206 ],
        [-0.15227778],
        [ 0.6967405 ],
        [-0.60269237]],

       [[-0.25489303],
        [ 0.20810236],
        [ 0.88173383],
        [ 0.28963062]],

       ...,

       [[-0.15725346],
        [ 0.49795693],
        [ 0.13796304],
        [-0.11143823]],

       [[-0.00428388],
        [ 0.6222656 ],
        [ 0.02911544],
        [-0.56122893]],

       [[ 0.3179135 ],
        [-0.41116422],
        [-0.16914578],
        [ 0.5184275 ]]], dtype=float32)>

Automatic differntiation

[27]:
def f(x,y):
    return x**2 + 2*y**2 + 3*x*y

Gradient

[28]:
x, y = tf.Variable(1.0), tf.Variable(2.0)
[29]:
with tf.GradientTape() as tape:
    z = f(x, y)
[30]:
tape.gradient(z, [x,y])
[30]:
[<tf.Tensor: shape=(), dtype=float32, numpy=8.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=11.0>]

Hessian

[31]:
with tf.GradientTape(persistent=True) as H_tape:
    with tf.GradientTape() as J_tape:
        z = f(x, y)
    Js = J_tape.gradient(z, [x,y])
Hs = [H_tape.gradient(J, [x,y]) for J in Js]
del H_tape
[32]:
np.array(Hs)
[32]:
array([[2., 3.],
       [3., 4.]], dtype=float32)

Tensorflow proability

Distributions

[33]:
[str(x).split('.')[-1][:-2] for x in tfd.distribution.Distribution.__subclasses__()]
[33]:
['Autoregressive',
 'BatchReshape',
 'Bates',
 'Bernoulli',
 'Beta',
 'Gamma',
 'Binomial',
 'BetaBinomial',
 'JointDistribution',
 'JointDistribution',
 '_Cast',
 'Blockwise',
 'Categorical',
 'Cauchy',
 'Chi2',
 'TransformedDistribution',
 'LKJ',
 'CholeskyLKJ',
 'ContinuousBernoulli',
 '_BaseDeterministic',
 '_BaseDeterministic',
 'Dirichlet',
 'Multinomial',
 'DirichletMultinomial',
 'DoublesidedMaxwell',
 'Empirical',
 'FiniteDiscrete',
 'GammaGamma',
 'Normal',
 'Sample',
 'GaussianProcess',
 'GeneralizedNormal',
 'GeneralizedPareto',
 'Geometric',
 'Uniform',
 'HalfCauchy',
 'HalfNormal',
 'StudentT',
 'HalfStudentT',
 'HiddenMarkovModel',
 'Horseshoe',
 'Independent',
 'InverseGamma',
 'InverseGaussian',
 'Laplace',
 'LinearGaussianStateSpaceModel',
 'Logistic',
 'Mixture',
 'MixtureSameFamily',
 'MultivariateStudentTLinearOperator',
 'NegativeBinomial',
 'OneHotCategorical',
 'OrderedLogistic',
 'Pareto',
 'PERT',
 'QuantizedDistribution',
 'Poisson',
 '_TensorCoercible',
 'PixelCNN',
 'PlackettLuce',
 'PoissonLogNormalQuadratureCompound',
 'SphericalUniform',
 'VonMisesFisher',
 'PowerSpherical',
 'ProbitBernoulli',
 'RelaxedBernoulli',
 'ExpRelaxedOneHotCategorical',
 'StudentTProcess',
 'Triangular',
 'TruncatedCauchy',
 'TruncatedNormal',
 'VonMises',
 'WishartLinearOperator',
 'Zipf',
 'DeterministicEmpirical']
[34]:
dist = tfd.Normal(loc=100, scale=15)
[35]:
x = dist.sample((3,4))
x
[35]:
<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[ 90.82311 , 115.852325,  69.85233 , 111.310234],
       [114.064575, 113.6998  , 104.08606 , 115.32353 ],
       [ 98.11167 , 104.219315, 112.88289 , 106.38903 ]], dtype=float32)>
[36]:
n = 100
xs = dist.sample(n)
plt.hist(xs, density=True)
xp = tf.linspace(50., 150., 100)
plt.plot(xp, dist.prob(xp))
pass
../_images/notebooks_B11_TF2_Building_Blocks_52_0.png

Broadcasting

[37]:
dist = tfd.Normal(loc=[3,4,5,6], scale=0.5)
[38]:
dist.sample(5)
[38]:
<tf.Tensor: shape=(5, 4), dtype=float32, numpy=
array([[3.4214985, 4.020046 , 5.4378996, 7.129439 ],
       [2.4713418, 3.6216328, 5.1785645, 6.5690327],
       [3.2009451, 3.6195786, 5.032829 , 5.546834 ],
       [2.6353314, 3.8272653, 4.5734615, 6.2040973],
       [3.0592232, 4.0541725, 4.9947314, 5.419991 ]], dtype=float32)>
[39]:
xp = tf.linspace(0., 9., 100)[:, tf.newaxis]
plt.plot(np.tile(xp, dist.batch_shape), dist.prob(xp))
pass
../_images/notebooks_B11_TF2_Building_Blocks_56_0.png

Mixtures

[40]:
gmm = tfd.MixtureSameFamily(
    mixture_distribution=tfd.Categorical(
        probs=[0.4, 0.1, 0.2, 0.3]
    ),
    components_distribution=tfd.Normal(
      loc=[3., 4., 5., 6.],
      scale=[0.1, 0.5, 0.5, .1])
)
[41]:
n = 10000
xs = gmm.sample(n)
[42]:
sns.distplot(xs)
pass
../_images/notebooks_B11_TF2_Building_Blocks_60_0.png

Transformations

[43]:
[x for x in dir(tfp.bijectors) if x[0].isupper()]
[43]:
['AbsoluteValue',
 'Affine',
 'AffineLinearOperator',
 'AffineScalar',
 'AutoregressiveNetwork',
 'BatchNormalization',
 'Bijector',
 'Blockwise',
 'Chain',
 'CholeskyOuterProduct',
 'CholeskyToInvCholesky',
 'CorrelationCholesky',
 'Cumsum',
 'DiscreteCosineTransform',
 'Exp',
 'Expm1',
 'FFJORD',
 'FillScaleTriL',
 'FillTriangular',
 'FrechetCDF',
 'GeneralizedExtremeValueCDF',
 'GeneralizedPareto',
 'GompertzCDF',
 'GumbelCDF',
 'Identity',
 'Inline',
 'Invert',
 'IteratedSigmoidCentered',
 'KumaraswamyCDF',
 'LambertWTail',
 'Log',
 'Log1p',
 'MaskedAutoregressiveFlow',
 'MatrixInverseTriL',
 'MatvecLU',
 'MoyalCDF',
 'NormalCDF',
 'Ordered',
 'Pad',
 'Permute',
 'PowerTransform',
 'RationalQuadraticSpline',
 'RealNVP',
 'Reciprocal',
 'Reshape',
 'Scale',
 'ScaleMatvecDiag',
 'ScaleMatvecLU',
 'ScaleMatvecLinearOperator',
 'ScaleMatvecTriL',
 'ScaleTriL',
 'Shift',
 'ShiftedGompertzCDF',
 'Sigmoid',
 'Sinh',
 'SinhArcsinh',
 'SoftClip',
 'Softfloor',
 'SoftmaxCentered',
 'Softplus',
 'Softsign',
 'Split',
 'Square',
 'Tanh',
 'TransformDiagonal',
 'Transpose',
 'WeibullCDF']
[44]:
lognormal = tfp.bijectors.Exp()(tfd.Normal(0, 0.5))
[45]:
xs = lognormal.sample(1000)
sns.distplot(xs)
xp = np.linspace(tf.reduce_min(xs), tf.reduce_max(xs), 100)
plt.plot(xp, tfd.LogNormal(loc=0, scale=0.5).prob(xp))
pass
../_images/notebooks_B11_TF2_Building_Blocks_64_0.png

Regression

[46]:
xs = tf.Variable([0., 1., 2., 5., 6., 8.])
ys = tf.sin(xs) + tfd.Normal(loc=0, scale=0.5).sample(xs.shape[0])
[47]:
xs.shape, ys.shape
[47]:
(TensorShape([6]), TensorShape([6]))
[48]:
xs.numpy()
[48]:
array([0., 1., 2., 5., 6., 8.], dtype=float32)
[49]:
ys.numpy()
[49]:
array([-0.21832775,  0.49554497,  1.124055  , -0.30666602, -0.6614609 ,
        1.8780737 ], dtype=float32)
[50]:
xp = tf.linspace(-1., 9., 100)[:, None]
plt.scatter(xs.numpy(), ys.numpy())
plt.plot(xp, tf.sin(xp))
pass
../_images/notebooks_B11_TF2_Building_Blocks_70_0.png
[51]:
kernel = tfp.math.psd_kernels.ExponentiatedQuadratic(length_scale=1.5)
reg = tfd.GaussianProcessRegressionModel(
    kernel, xp[:, tf.newaxis], xs[:, tf.newaxis], ys
)
[52]:
ub, lb = reg.mean() + [2*reg.stddev(), -2*reg.stddev()]
plt.fill_between(np.ravel(xp), np.ravel(ub), np.ravel(lb), alpha=0.2)
plt.plot(xp, reg.mean(), c='red', linewidth=2)
plt.scatter(xs[:], ys[:], s=50, c='k')
pass
../_images/notebooks_B11_TF2_Building_Blocks_72_0.png

Tenssorflow Data

Tesnorflow provides a data API to allow it to work seamlessly with large data sets that may not fit into memory. This results inTesnorfolw Dataset (TFDS) objects that handle multi-threading, queuing, batching and pre-fetching.

You can think of TFDS as being a smart generator from data. Generally, you first create a TFDS from data using from_tensor_slices or from data in the file system or a relational database. Then you apply trasnforms to the data to process it, before handing it off to, say, a deep learning method.

Using from_tensor_slices

You can pass in a list, dict, numpy array, or Tensorflow tensor.

[53]:
x = np.arange(6)
ds = tf.data.Dataset.from_tensor_slices(x)
ds
[53]:
<TensorSliceDataset shapes: (), types: tf.int64>
[54]:
for item in ds.take(3):
    print(item)
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
tf.Tensor(2, shape=(), dtype=int64)

Transformations

Once you have a TFDS, you can chain its transformation methods to process the data. We will cover functional programming next week, but most of this should be comprehensible even without a deep understanding of functional programming.

[55]:
ds = ds.map(lambda x: x**2).repeat(3)
[56]:
for item in ds.take(3):
    print(item)
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
tf.Tensor(4, shape=(), dtype=int64)
[57]:
ds = ds.shuffle(buffer_size=4, seed=0).batch(5)
[58]:
for item in ds.take(3):
    print(item)
tf.Tensor([ 0  9  4  0 25], shape=(5,), dtype=int64)
tf.Tensor([ 1  9  1 16  0], shape=(5,), dtype=int64)
tf.Tensor([16  4  1 16  4], shape=(5,), dtype=int64)

Prefetching is an optimization to preload data in parallel

[59]:
ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
[59]:
<PrefetchDataset shapes: (None,), types: tf.int64>

Reading from files

You can also read from CSV, text files or SQLite database and transform in the same way.

[60]:
ds = tf.data.experimental.CsvDataset(
    'data/X_train_unscaled.csv',
    record_defaults=[tf.float32]*10,
    header=True
)
[61]:
for item in ds.take(1):
    print(item)
(<tf.Tensor: shape=(), dtype=float32, numpy=1.0>, <tf.Tensor: shape=(), dtype=float32, numpy=1.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=52.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=30.5>, <tf.Tensor: shape=(), dtype=float32, numpy=1.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>)