Tensorflow¶

In [1]:

%matplotlib inline

In [2]:

import warnings
warnings.simplefilter('ignore', RuntimeWarning)

In [3]:

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

We work with TF1 but ask it to emulate TF2 behavior

In [4]:

import tensorflow.compat.v2 as tf
tf.enable_v2_behavior()

In [5]:

%%capture
import tensorflow_probability as tfp
tfd = tfp.distributions

Working with tensors¶

Almost exaclty like numpy arrays.`m

In [6]:

tf.constant([1., 2., 3.])

Out[6]:

<tf.Tensor: id=0, shape=(3,), dtype=float32, numpy=array([1., 2., 3.], dtype=float32)>

In [7]:

x = tf.Variable([[1.,2.,3.], [4.,5.,6.]])

In [8]:

x.shape

Out[8]:

TensorShape([2, 3])

In [9]:

x.dtype

Out[9]:

tf.float32

Conversin to numpy¶

In [10]:

x.numpy()

Out[10]:

array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)

Indexing¶

In [11]:

x[:, :2]

Out[11]:

<tf.Tensor: id=16, shape=(2, 2), dtype=float32, numpy=
array([[1., 2.],
       [4., 5.]], dtype=float32)>

Assignment¶

In [12]:

x[0,:].assign([3.,2.,1.])

Out[12]:

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[3., 2., 1.],
       [4., 5., 6.]], dtype=float32)>

In [13]:

Out[13]:

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[3., 2., 1.],
       [4., 5., 6.]], dtype=float32)>

Reductions¶

In [14]:

tf.reduce_mean(x, axis=0)

Out[14]:

<tf.Tensor: id=30, shape=(3,), dtype=float32, numpy=array([3.5, 3.5, 3.5], dtype=float32)>

In [15]:

tf.reduce_sum(x, axis=1)

Out[15]:

<tf.Tensor: id=34, shape=(2,), dtype=float32, numpy=array([ 6., 15.], dtype=float32)>

Broadcasting¶

In [16]:

x + 10

Out[16]:

<tf.Tensor: id=38, shape=(2, 3), dtype=float32, numpy=
array([[13., 12., 11.],
       [14., 15., 16.]], dtype=float32)>

In [17]:

x * 10

Out[17]:

<tf.Tensor: id=42, shape=(2, 3), dtype=float32, numpy=
array([[30., 20., 10.],
       [40., 50., 60.]], dtype=float32)>

In [18]:

x - tf.reduce_mean(x, axis=1)[:, tf.newaxis]

Out[18]:

<tf.Tensor: id=52, shape=(2, 3), dtype=float32, numpy=
array([[ 1.,  0., -1.],
       [-1.,  0.,  1.]], dtype=float32)>

Matrix operations¶

In [19]:

x @ tf.transpose(x)

Out[19]:

<tf.Tensor: id=58, shape=(2, 2), dtype=float32, numpy=
array([[14., 28.],
       [28., 77.]], dtype=float32)>

Ufuncs¶

In [20]:

tf.exp(x)

Out[20]:

<tf.Tensor: id=61, shape=(2, 3), dtype=float32, numpy=
array([[ 20.085537 ,   7.389056 ,   2.7182817],
       [ 54.59815  , 148.41316  , 403.4288   ]], dtype=float32)>

In [21]:

tf.sqrt(x)

Out[21]:

<tf.Tensor: id=64, shape=(2, 3), dtype=float32, numpy=
array([[1.7320508, 1.4142135, 1.       ],
       [2.       , 2.236068 , 2.4494898]], dtype=float32)>

Random numbers¶

In [22]:

X = tf.random.normal(shape=(10,4))
y = tf.random.normal(shape=(10,1))

Linear algebra¶

In [23]:

tf.linalg.lstsq(X, y)

Out[23]:

<tf.Tensor: id=82, shape=(4, 1), dtype=float32, numpy=
array([[-0.20034605],
       [-0.18362193],
       [-0.77986723],
       [-0.558746  ]], dtype=float32)>

Vectorization¶

In [24]:

X = tf.random.normal(shape=(1000,10,4))
y = tf.random.normal(shape=(1000,10,1))

In [25]:

tf.linalg.lstsq(X, y)

Out[25]:

<tf.Tensor: id=100, shape=(1000, 4, 1), dtype=float32, numpy=
array([[[ 0.04171363],
        [ 0.8832583 ],
        [ 0.5841526 ],
        [-0.48581767]],

       [[-0.73950046],
        [-0.22255968],
        [ 0.7190923 ],
        [ 0.5090183 ]],

       [[ 0.74239963],
        [-0.5741371 ],
        [-0.19040915],
        [ 0.1790382 ]],

       ...,

       [[-0.21469031],
        [-0.5195575 ],
        [-0.08611798],
        [ 0.2624016 ]],

       [[ 0.01139675],
        [ 0.3803665 ],
        [-0.1892877 ],
        [ 0.27254012]],

       [[ 0.21505316],
        [ 0.43938717],
        [-0.4075264 ],
        [-0.12740411]]], dtype=float32)>

Automatic differntiation¶

In [26]:

def f(x,y):
    return x**2 + 2*y**2 + 3*x*y

Gradient¶

In [27]:

x, y = tf.Variable(1.0), tf.Variable(2.0)

In [28]:

with tf.GradientTape() as tape:
    z = f(x, y)

In [29]:

tape.gradient(z, [x,y])

Out[29]:

[<tf.Tensor: id=186, shape=(), dtype=float32, numpy=8.0>,
 <tf.Tensor: id=187, shape=(), dtype=float32, numpy=11.0>]

Hessian¶

In [30]:

with tf.GradientTape(persistent=True) as H_tape:
    with tf.GradientTape() as J_tape:
        z = f(x, y)
    Js = J_tape.gradient(z, [x,y])
Hs = [H_tape.gradient(J, [x,y]) for J in Js]
del H_tape

In [31]:

np.array(Hs)

Out[31]:

array([[2., 3.],
       [3., 4.]], dtype=float32)

Keras¶

In [32]:

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [33]:

data = fetch_california_housing()

In [34]:

X_train, X_test, y_train, y_test = train_test_split(data.data, data.target)

In [35]:

y_train.min(), y_train.max()

Out[35]:

(0.14999, 5.00001)

In [36]:

scalar = StandardScaler()
X_train_s = scalar.fit_transform(X_train)
X_test_s = scalar.transform(X_test)

In [37]:

import tensorflow.keras as keras

In [38]:

Dense = keras.layers.Dense

We can consider a DL model as just a black box with a bunch of unnown parameters. For exanple, when the outoput is a Dense layer with just one node, the entire network model is just doing some form of regression. Hence we can replace a linear regression model with such a neural network model and run MCMC or VI as usual.

In [39]:

model = keras.models.Sequential([
    Dense(30,
          activation='elu',
          input_shape=X_train.shape[1:]),
    Dense(1)
])

In [40]:

model.compile(loss="mse", optimizer="nadam", metrics=["mae"])

In [41]:

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
dense (Dense)                (None, 30)                270
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 31
=================================================================
Total params: 301
Trainable params: 301
Non-trainable params: 0
_________________________________________________________________

In [42]:

model.layers

Out[42]:

[<tensorflow.python.keras.layers.core.Dense at 0x1a2fad3cc0>,
 <tensorflow.python.keras.layers.core.Dense at 0x1a2fc2b080>]

In [43]:

model.layers[0].name

Out[43]:

'dense'

In [44]:

model.layers[0].activation

Out[44]:

<function tensorflow.python.keras.activations.elu(x, alpha=1.0)>

In [45]:

hist = model.fit(X_train_s,
                 y_train,
                 epochs=10,
                 validation_split=0.2)

Train on 12384 samples, validate on 3096 samples
Epoch 1/10
12384/12384 [==============================] - 1s 118us/sample - loss: 1.8582 - mae: 0.9963 - val_loss: 0.5784 - val_mae: 0.5518
Epoch 2/10
12384/12384 [==============================] - 1s 73us/sample - loss: 0.5320 - mae: 0.5257 - val_loss: 0.4855 - val_mae: 0.5058
Epoch 3/10
12384/12384 [==============================] - 1s 73us/sample - loss: 0.4773 - mae: 0.4980 - val_loss: 0.4502 - val_mae: 0.4890
Epoch 4/10
12384/12384 [==============================] - 1s 78us/sample - loss: 0.4565 - mae: 0.4889 - val_loss: 0.4334 - val_mae: 0.4801
Epoch 5/10
12384/12384 [==============================] - 1s 79us/sample - loss: 0.4521 - mae: 0.4853 - val_loss: 0.4253 - val_mae: 0.4713
Epoch 6/10
12384/12384 [==============================] - 1s 77us/sample - loss: 0.4385 - mae: 0.4791 - val_loss: 0.4205 - val_mae: 0.4684
Epoch 7/10
12384/12384 [==============================] - 1s 79us/sample - loss: 0.4371 - mae: 0.4756 - val_loss: 0.4212 - val_mae: 0.4678
Epoch 8/10
12384/12384 [==============================] - 1s 81us/sample - loss: 0.4282 - mae: 0.4719 - val_loss: 0.4136 - val_mae: 0.4633
Epoch 9/10
12384/12384 [==============================] - 1s 73us/sample - loss: 0.4227 - mae: 0.4691 - val_loss: 0.4069 - val_mae: 0.4545
Epoch 10/10
12384/12384 [==============================] - 1s 60us/sample - loss: 0.4159 - mae: 0.4650 - val_loss: 0.4172 - val_mae: 0.4628

In [46]:

import pandas as pd

In [47]:

df = pd.DataFrame(hist.history)

In [48]:

df.head()

Out[48]:

	loss	mae	val_loss	val_mae
0	1.858223	0.996252	0.578439	0.551837
1	0.532025	0.525739	0.485485	0.505817
2	0.477273	0.497998	0.450215	0.489029
3	0.456520	0.488940	0.433416	0.480149
4	0.452106	0.485341	0.425300	0.471301

In [49]:

df.plot()
pass

notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_66_0.png

In [50]:

model.evaluate(X_test_s, y_test)

5160/5160 [==============================] - 0s 35us/sample - loss: 0.4270 - mae: 0.4642

Out[50]:

[0.4270468956278276, 0.46417126]

In [51]:

np.c_[model.predict(X_test_s[:3, :]), y_test[:3]]

Out[51]:

array([[0.97243857, 1.04      ],
       [1.58840811, 1.65      ],
       [0.96830416, 0.83      ]])

In [80]:

model.save('housing.h5')

In [81]:

model = keras.models.load_model('housing.h5')

Tensorflow proability¶

Distributions¶

In [52]:

[str(x).split('.')[-1][:-2] for x in tfd.distribution.Distribution.__subclasses__()]

Out[52]:

['ConditionalDistribution',
 'Autoregressive',
 'BatchReshape',
 'Bernoulli',
 'Beta',
 'Multinomial',
 'Binomial',
 'Blockwise',
 'Categorical',
 'Cauchy',
 'Gamma',
 'TransformedDistribution',
 '_BaseDeterministic',
 '_BaseDeterministic',
 'Dirichlet',
 'DirichletMultinomial',
 'Empirical',
 'GammaGamma',
 'Normal',
 'Geometric',
 'Uniform',
 'HalfCauchy',
 'HalfNormal',
 'HiddenMarkovModel',
 'Horseshoe',
 'Independent',
 'InverseGamma',
 'InverseGaussian',
 'JointDistribution',
 'Laplace',
 'LinearGaussianStateSpaceModel',
 'LKJ',
 'Logistic',
 'Mixture',
 'MixtureSameFamily',
 'MultivariateStudentTLinearOperator',
 'NegativeBinomial',
 'OneHotCategorical',
 'Pareto',
 'Poisson',
 'PoissonLogNormalQuadratureCompound',
 'QuantizedDistribution',
 'ExpRelaxedOneHotCategorical',
 'Sample',
 'StudentT',
 'Triangular',
 'TruncatedNormal',
 'VectorDiffeomixture',
 'VonMises',
 'VonMisesFisher',
 '_WishartLinearOperator',
 'Zipf',
 '_TensorCoercible']

In [53]:

dist = tfd.Normal(loc=100, scale=15)

In [54]:

x = dist.sample((3,4))
x

Out[54]:

<tf.Tensor: id=40381, shape=(3, 4), dtype=float32, numpy=
array([[ 85.994514,  97.646706,  94.947174, 117.32828 ],
       [111.22745 , 102.00683 , 107.21807 , 101.29047 ],
       [ 92.01316 , 111.45788 ,  97.36463 , 124.13283 ]], dtype=float32)>

In [55]:

n = 100
xs = dist.sample(n)
plt.hist(xs, density=True)
xp = tf.linspace(50., 150., 100)
plt.plot(xp, dist.prob(xp))
pass

notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_76_0.png

Broadcasting¶

In [56]:

dist = tfd.Normal(loc=[3,4,5,6], scale=0.5)

In [57]:

dist.sample(5)

Out[57]:

<tf.Tensor: id=40506, shape=(5, 4), dtype=float32, numpy=
array([[2.9720945, 4.0878305, 5.4115677, 5.4803023],
       [2.3959792, 4.4667177, 5.6659327, 5.7601957],
       [4.26844  , 3.9352567, 5.9535723, 6.14893  ],
       [2.587455 , 3.4145675, 4.2401953, 6.1878347],
       [3.5405107, 3.9414463, 4.8362327, 6.335392 ]], dtype=float32)>

In [58]:

xp = tf.linspace(0., 9., 100)[:, tf.newaxis]
plt.plot(np.tile(xp, dist.batch_shape), dist.prob(xp))
pass

notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_80_0.png

Mixtures¶

In [59]:

tfd.MixtureSameFamily?

In [60]:

gmm = tfd.MixtureSameFamily(
    mixture_distribution=tfd.Categorical(
        probs=[0.4, 0.1, 0.2, 0.3]
    ),
    components_distribution=tfd.Normal(
      loc=[3., 4., 5., 6.],
      scale=[0.1, 0.5, 0.5, .1])
)

In [61]:

n = 10000
xs = gmm.sample(n)

In [62]:

sns.distplot(xs)
pass

notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_85_0.png

Transformations¶

In [63]:

[x for x in dir(tfp.bijectors) if x[0].isupper()]

Out[63]:

['AbsoluteValue',
 'Affine',
 'AffineLinearOperator',
 'AffineScalar',
 'AutoregressiveLayer',
 'BatchNormalization',
 'Bijector',
 'Blockwise',
 'Chain',
 'CholeskyOuterProduct',
 'CholeskyToInvCholesky',
 'ConditionalBijector',
 'DiscreteCosineTransform',
 'Exp',
 'Expm1',
 'FillTriangular',
 'Gumbel',
 'Identity',
 'Inline',
 'Invert',
 'IteratedSigmoidCentered',
 'Kumaraswamy',
 'MaskedAutoregressiveFlow',
 'MatrixInverseTriL',
 'MatvecLU',
 'NormalCDF',
 'Ordered',
 'Permute',
 'PowerTransform',
 'RealNVP',
 'Reciprocal',
 'Reshape',
 'ScaleTriL',
 'Sigmoid',
 'SinhArcsinh',
 'SoftmaxCentered',
 'Softplus',
 'Softsign',
 'Square',
 'Tanh',
 'TransformDiagonal',
 'Transpose',
 'Weibull']

In [64]:

lognormal = tfp.bijectors.Exp()(tfd.Normal(0, 0.5))

In [65]:

xs = lognormal.sample(1000)
sns.distplot(xs)
xp = np.linspace(tf.reduce_min(xs), tf.reduce_max(xs), 100)
plt.plot(xp, tfd.LogNormal(loc=0, scale=0.5).prob(xp))
pass

W0417 16:08:27.311493 4643866048 deprecation_wrapper.py:76] From /Users/cliburn/anaconda3/lib/python3.6/site-packages/tensorflow_probability/python/bijectors/bijector.py:992: The name shape is deprecated. Please use compat.v1.shape instead.

W0417 16:08:27.312743 4643866048 deprecation_wrapper.py:76] From /Users/cliburn/anaconda3/lib/python3.6/site-packages/tensorflow_probability/python/bijectors/bijector.py:1283: The name size is deprecated. Please use compat.v1.size instead.

notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_89_1.png

Regression¶

In [66]:

xs = tf.Variable([0., 1., 2., 5., 6., 8.])
ys = tf.sin(xs) + tfd.Normal(loc=0, scale=0.5).sample(xs.shape[0])

In [67]:

xs.shape, ys.shape

Out[67]:

(TensorShape([6]), TensorShape([6]))

In [68]:

xs.numpy()

Out[68]:

array([0., 1., 2., 5., 6., 8.], dtype=float32)

In [69]:

ys.numpy()

Out[69]:

array([ 0.31766635,  0.5954742 ,  0.7167998 , -0.06919491, -0.2230027 ,
        1.228898  ], dtype=float32)

In [70]:

xp = tf.linspace(-1., 9., 100)[:, None]
plt.scatter(xs.numpy(), ys.numpy())
plt.plot(xp, tf.sin(xp))
pass

notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_95_0.png

In [71]:

kernel = tfp.positive_semidefinite_kernels.ExponentiatedQuadratic(length_scale=1.5)
reg = tfd.GaussianProcessRegressionModel(
    kernel, xp[:, tf.newaxis], xs[:, tf.newaxis], ys
)

In [72]:

ub, lb = reg.mean() + [2*reg.stddev(), -2*reg.stddev()]
plt.fill_between(xp[..., 0], ub[...,0], lb[...,0], alpha=0.2)
plt.plot(xp, reg.mean(), c='red', linewidth=2)
plt.scatter(xs[:], ys[:], s=50, c='k')
pass

notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_97_0.png

Modeling¶

Sampling from a normal distribuiton using HMC (prior predictive samples)

In [73]:

[x for x in dir(tfp.mcmc) if x[0].isupper()]

Out[73]:

['CheckpointableStatesAndTrace',
 'HamiltonianMonteCarlo',
 'MetropolisAdjustedLangevinAlgorithm',
 'MetropolisHastings',
 'RandomWalkMetropolis',
 'ReplicaExchangeMC',
 'SimpleStepSizeAdaptation',
 'SliceSampler',
 'StatesAndTrace',
 'TransformedTransitionKernel',
 'TransitionKernel',
 'UncalibratedHamiltonianMonteCarlo',
 'UncalibratedLangevin',
 'UncalibratedRandomWalk']

In [74]:

dir(tfp.vi)

Out[74]:

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_allowed_symbols',
 'amari_alpha',
 'arithmetic_geometric',
 'chi_square',
 'csiszar_vimco',
 'csiszar_vimco_helper',
 'dual_csiszar_function',
 'jeffreys',
 'jensen_shannon',
 'kl_forward',
 'kl_reverse',
 'log1p_abs',
 'modified_gan',
 'monte_carlo_csiszar_f_divergence',
 'pearson',
 'squared_hellinger',
 'symmetrized_csiszar_function',
 't_power',
 'total_variation',
 'triangular']

In [75]:

from tensorflow_probability import edward2 as ed

In [76]:

# From example in help docs

def unnormalized_log_prob(x):
    return -x - x**2.

# Initialize the HMC transition kernel.
num_results = int(1e2)
num_burnin_steps = int(1e2)
adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation(
    tfp.mcmc.HamiltonianMonteCarlo(
        target_log_prob_fn=unnormalized_log_prob,
        num_leapfrog_steps=3,
        step_size=1.),
    num_adaptation_steps=int(num_burnin_steps * 0.8))

# Run the chain (with burn-in).
samples, is_accepted = tfp.mcmc.sample_chain(
    num_results=num_results,
    num_burnin_steps=num_burnin_steps,
    current_state=1.,
    kernel=adaptive_hmc,
    trace_fn=lambda _, pkr: pkr.inner_results.is_accepted)

sample_mean = tf.reduce_mean(samples)
sample_stddev = tf.math.reduce_std(samples)

W0417 16:08:28.351274 4643866048 deprecation_wrapper.py:76] From /Users/cliburn/anaconda3/lib/python3.6/site-packages/tensorflow_probability/python/mcmc/hmc.py:784: The name zeros_like is deprecated. Please use compat.v1.zeros_like instead.

W0417 16:08:28.354593 4643866048 deprecation_wrapper.py:76] From /Users/cliburn/anaconda3/lib/python3.6/site-packages/tensorflow_probability/python/mcmc/metropolis_hastings.py:272: The name ones_like is deprecated. Please use compat.v1.ones_like instead.

In [77]:

sample_mean

Out[77]:

<tf.Tensor: id=136175, shape=(), dtype=float32, numpy=-0.6822989>

In [78]:

sample_stddev

Out[78]:

<tf.Tensor: id=136182, shape=(), dtype=float32, numpy=0.7804036>

In [79]:

sns.distplot(samples)
plt.axvline(sample_mean.numpy(), c='red')
plt.plot([sample_mean - 2*sample_stddev, sample_mean + 2*sample_stddev],
         [0.01, 0.01], c='k', linewidth=3)
pass

notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_105_0.png

In [ ]:

Tensorflow¶

Working with tensors¶

Conversin to numpy¶

Indexing¶

Assignment¶

Reductions¶

Broadcasting¶

Matrix operations¶

Ufuncs¶

Random numbers¶

Linear algebra¶

Vectorization¶

Automatic differntiation¶

Gradient¶

Hessian¶

Keras¶

Tensorflow proability¶

Distributions¶

Broadcasting¶

Mixtures¶

Transformations¶

Regression¶

Modeling¶

Page contents

Previous page

This Page