Tensorflow

In [1]:
%matplotlib inline
In [2]:
import warnings
warnings.simplefilter('ignore', RuntimeWarning)
In [3]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

We work with TF1 but ask it to emulate TF2 behavior

In [4]:
import tensorflow.compat.v2 as tf
tf.enable_v2_behavior()
In [5]:
%%capture
import tensorflow_probability as tfp
tfd = tfp.distributions

Working with tensors

Almost exaclty like numpy arrays.`m

In [6]:
tf.constant([1., 2., 3.])
Out[6]:
<tf.Tensor: id=0, shape=(3,), dtype=float32, numpy=array([1., 2., 3.], dtype=float32)>
In [7]:
x = tf.Variable([[1.,2.,3.], [4.,5.,6.]])
In [8]:
x.shape
Out[8]:
TensorShape([2, 3])
In [9]:
x.dtype
Out[9]:
tf.float32

Conversin to numpy

In [10]:
x.numpy()
Out[10]:
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)

Indexing

In [11]:
x[:, :2]
Out[11]:
<tf.Tensor: id=16, shape=(2, 2), dtype=float32, numpy=
array([[1., 2.],
       [4., 5.]], dtype=float32)>

Assignment

In [12]:
x[0,:].assign([3.,2.,1.])
Out[12]:
<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[3., 2., 1.],
       [4., 5., 6.]], dtype=float32)>
In [13]:
x
Out[13]:
<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[3., 2., 1.],
       [4., 5., 6.]], dtype=float32)>

Reductions

In [14]:
tf.reduce_mean(x, axis=0)
Out[14]:
<tf.Tensor: id=30, shape=(3,), dtype=float32, numpy=array([3.5, 3.5, 3.5], dtype=float32)>
In [15]:
tf.reduce_sum(x, axis=1)
Out[15]:
<tf.Tensor: id=34, shape=(2,), dtype=float32, numpy=array([ 6., 15.], dtype=float32)>

Broadcasting

In [16]:
x + 10
Out[16]:
<tf.Tensor: id=38, shape=(2, 3), dtype=float32, numpy=
array([[13., 12., 11.],
       [14., 15., 16.]], dtype=float32)>
In [17]:
x * 10
Out[17]:
<tf.Tensor: id=42, shape=(2, 3), dtype=float32, numpy=
array([[30., 20., 10.],
       [40., 50., 60.]], dtype=float32)>
In [18]:
x - tf.reduce_mean(x, axis=1)[:, tf.newaxis]
Out[18]:
<tf.Tensor: id=52, shape=(2, 3), dtype=float32, numpy=
array([[ 1.,  0., -1.],
       [-1.,  0.,  1.]], dtype=float32)>

Matrix operations

In [19]:
x @ tf.transpose(x)
Out[19]:
<tf.Tensor: id=58, shape=(2, 2), dtype=float32, numpy=
array([[14., 28.],
       [28., 77.]], dtype=float32)>

Ufuncs

In [20]:
tf.exp(x)
Out[20]:
<tf.Tensor: id=61, shape=(2, 3), dtype=float32, numpy=
array([[ 20.085537 ,   7.389056 ,   2.7182817],
       [ 54.59815  , 148.41316  , 403.4288   ]], dtype=float32)>
In [21]:
tf.sqrt(x)
Out[21]:
<tf.Tensor: id=64, shape=(2, 3), dtype=float32, numpy=
array([[1.7320508, 1.4142135, 1.       ],
       [2.       , 2.236068 , 2.4494898]], dtype=float32)>

Random numbers

In [22]:
X = tf.random.normal(shape=(10,4))
y = tf.random.normal(shape=(10,1))

Linear algebra

In [23]:
tf.linalg.lstsq(X, y)
Out[23]:
<tf.Tensor: id=82, shape=(4, 1), dtype=float32, numpy=
array([[-0.20034605],
       [-0.18362193],
       [-0.77986723],
       [-0.558746  ]], dtype=float32)>

Vectorization

In [24]:
X = tf.random.normal(shape=(1000,10,4))
y = tf.random.normal(shape=(1000,10,1))
In [25]:
tf.linalg.lstsq(X, y)
Out[25]:
<tf.Tensor: id=100, shape=(1000, 4, 1), dtype=float32, numpy=
array([[[ 0.04171363],
        [ 0.8832583 ],
        [ 0.5841526 ],
        [-0.48581767]],

       [[-0.73950046],
        [-0.22255968],
        [ 0.7190923 ],
        [ 0.5090183 ]],

       [[ 0.74239963],
        [-0.5741371 ],
        [-0.19040915],
        [ 0.1790382 ]],

       ...,

       [[-0.21469031],
        [-0.5195575 ],
        [-0.08611798],
        [ 0.2624016 ]],

       [[ 0.01139675],
        [ 0.3803665 ],
        [-0.1892877 ],
        [ 0.27254012]],

       [[ 0.21505316],
        [ 0.43938717],
        [-0.4075264 ],
        [-0.12740411]]], dtype=float32)>

Automatic differntiation

In [26]:
def f(x,y):
    return x**2 + 2*y**2 + 3*x*y

Gradient

In [27]:
x, y = tf.Variable(1.0), tf.Variable(2.0)
In [28]:
with tf.GradientTape() as tape:
    z = f(x, y)
In [29]:
tape.gradient(z, [x,y])
Out[29]:
[<tf.Tensor: id=186, shape=(), dtype=float32, numpy=8.0>,
 <tf.Tensor: id=187, shape=(), dtype=float32, numpy=11.0>]

Hessian

In [30]:
with tf.GradientTape(persistent=True) as H_tape:
    with tf.GradientTape() as J_tape:
        z = f(x, y)
    Js = J_tape.gradient(z, [x,y])
Hs = [H_tape.gradient(J, [x,y]) for J in Js]
del H_tape
In [31]:
np.array(Hs)
Out[31]:
array([[2., 3.],
       [3., 4.]], dtype=float32)

Keras

In [32]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
In [33]:
data = fetch_california_housing()
In [34]:
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target)
In [35]:
y_train.min(), y_train.max()
Out[35]:
(0.14999, 5.00001)
In [36]:
scalar = StandardScaler()
X_train_s = scalar.fit_transform(X_train)
X_test_s = scalar.transform(X_test)
In [37]:
import tensorflow.keras as keras
In [38]:
Dense = keras.layers.Dense

We can consider a DL model as just a black box with a bunch of unnown parameters. For exanple, when the outoput is a Dense layer with just one node, the entire network model is just doing some form of regression. Hence we can replace a linear regression model with such a neural network model and run MCMC or VI as usual.

In [39]:
model = keras.models.Sequential([
    Dense(30,
          activation='elu',
          input_shape=X_train.shape[1:]),
    Dense(1)
])
In [40]:
model.compile(loss="mse", optimizer="nadam", metrics=["mae"])
In [41]:
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
dense (Dense)                (None, 30)                270
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 31
=================================================================
Total params: 301
Trainable params: 301
Non-trainable params: 0
_________________________________________________________________
In [42]:
model.layers
Out[42]:
[<tensorflow.python.keras.layers.core.Dense at 0x1a2fad3cc0>,
 <tensorflow.python.keras.layers.core.Dense at 0x1a2fc2b080>]
In [43]:
model.layers[0].name
Out[43]:
'dense'
In [44]:
model.layers[0].activation
Out[44]:
<function tensorflow.python.keras.activations.elu(x, alpha=1.0)>
In [45]:
hist = model.fit(X_train_s,
                 y_train,
                 epochs=10,
                 validation_split=0.2)
Train on 12384 samples, validate on 3096 samples
Epoch 1/10
12384/12384 [==============================] - 1s 118us/sample - loss: 1.8582 - mae: 0.9963 - val_loss: 0.5784 - val_mae: 0.5518
Epoch 2/10
12384/12384 [==============================] - 1s 73us/sample - loss: 0.5320 - mae: 0.5257 - val_loss: 0.4855 - val_mae: 0.5058
Epoch 3/10
12384/12384 [==============================] - 1s 73us/sample - loss: 0.4773 - mae: 0.4980 - val_loss: 0.4502 - val_mae: 0.4890
Epoch 4/10
12384/12384 [==============================] - 1s 78us/sample - loss: 0.4565 - mae: 0.4889 - val_loss: 0.4334 - val_mae: 0.4801
Epoch 5/10
12384/12384 [==============================] - 1s 79us/sample - loss: 0.4521 - mae: 0.4853 - val_loss: 0.4253 - val_mae: 0.4713
Epoch 6/10
12384/12384 [==============================] - 1s 77us/sample - loss: 0.4385 - mae: 0.4791 - val_loss: 0.4205 - val_mae: 0.4684
Epoch 7/10
12384/12384 [==============================] - 1s 79us/sample - loss: 0.4371 - mae: 0.4756 - val_loss: 0.4212 - val_mae: 0.4678
Epoch 8/10
12384/12384 [==============================] - 1s 81us/sample - loss: 0.4282 - mae: 0.4719 - val_loss: 0.4136 - val_mae: 0.4633
Epoch 9/10
12384/12384 [==============================] - 1s 73us/sample - loss: 0.4227 - mae: 0.4691 - val_loss: 0.4069 - val_mae: 0.4545
Epoch 10/10
12384/12384 [==============================] - 1s 60us/sample - loss: 0.4159 - mae: 0.4650 - val_loss: 0.4172 - val_mae: 0.4628
In [46]:
import pandas as pd
In [47]:
df = pd.DataFrame(hist.history)
In [48]:
df.head()
Out[48]:
loss mae val_loss val_mae
0 1.858223 0.996252 0.578439 0.551837
1 0.532025 0.525739 0.485485 0.505817
2 0.477273 0.497998 0.450215 0.489029
3 0.456520 0.488940 0.433416 0.480149
4 0.452106 0.485341 0.425300 0.471301
In [49]:
df.plot()
pass
notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_66_0.png
In [50]:
model.evaluate(X_test_s, y_test)
5160/5160 [==============================] - 0s 35us/sample - loss: 0.4270 - mae: 0.4642
Out[50]:
[0.4270468956278276, 0.46417126]
In [51]:
np.c_[model.predict(X_test_s[:3, :]), y_test[:3]]
Out[51]:
array([[0.97243857, 1.04      ],
       [1.58840811, 1.65      ],
       [0.96830416, 0.83      ]])
In [80]:
model.save('housing.h5')
In [81]:
model = keras.models.load_model('housing.h5')

Tensorflow proability

Distributions

In [52]:
[str(x).split('.')[-1][:-2] for x in tfd.distribution.Distribution.__subclasses__()]
Out[52]:
['ConditionalDistribution',
 'Autoregressive',
 'BatchReshape',
 'Bernoulli',
 'Beta',
 'Multinomial',
 'Binomial',
 'Blockwise',
 'Categorical',
 'Cauchy',
 'Gamma',
 'TransformedDistribution',
 '_BaseDeterministic',
 '_BaseDeterministic',
 'Dirichlet',
 'DirichletMultinomial',
 'Empirical',
 'GammaGamma',
 'Normal',
 'Geometric',
 'Uniform',
 'HalfCauchy',
 'HalfNormal',
 'HiddenMarkovModel',
 'Horseshoe',
 'Independent',
 'InverseGamma',
 'InverseGaussian',
 'JointDistribution',
 'Laplace',
 'LinearGaussianStateSpaceModel',
 'LKJ',
 'Logistic',
 'Mixture',
 'MixtureSameFamily',
 'MultivariateStudentTLinearOperator',
 'NegativeBinomial',
 'OneHotCategorical',
 'Pareto',
 'Poisson',
 'PoissonLogNormalQuadratureCompound',
 'QuantizedDistribution',
 'ExpRelaxedOneHotCategorical',
 'Sample',
 'StudentT',
 'Triangular',
 'TruncatedNormal',
 'VectorDiffeomixture',
 'VonMises',
 'VonMisesFisher',
 '_WishartLinearOperator',
 'Zipf',
 '_TensorCoercible']
In [53]:
dist = tfd.Normal(loc=100, scale=15)
In [54]:
x = dist.sample((3,4))
x
Out[54]:
<tf.Tensor: id=40381, shape=(3, 4), dtype=float32, numpy=
array([[ 85.994514,  97.646706,  94.947174, 117.32828 ],
       [111.22745 , 102.00683 , 107.21807 , 101.29047 ],
       [ 92.01316 , 111.45788 ,  97.36463 , 124.13283 ]], dtype=float32)>
In [55]:
n = 100
xs = dist.sample(n)
plt.hist(xs, density=True)
xp = tf.linspace(50., 150., 100)
plt.plot(xp, dist.prob(xp))
pass
notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_76_0.png

Broadcasting

In [56]:
dist = tfd.Normal(loc=[3,4,5,6], scale=0.5)
In [57]:
dist.sample(5)
Out[57]:
<tf.Tensor: id=40506, shape=(5, 4), dtype=float32, numpy=
array([[2.9720945, 4.0878305, 5.4115677, 5.4803023],
       [2.3959792, 4.4667177, 5.6659327, 5.7601957],
       [4.26844  , 3.9352567, 5.9535723, 6.14893  ],
       [2.587455 , 3.4145675, 4.2401953, 6.1878347],
       [3.5405107, 3.9414463, 4.8362327, 6.335392 ]], dtype=float32)>
In [58]:
xp = tf.linspace(0., 9., 100)[:, tf.newaxis]
plt.plot(np.tile(xp, dist.batch_shape), dist.prob(xp))
pass
notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_80_0.png

Mixtures

In [59]:
tfd.MixtureSameFamily?
In [60]:
gmm = tfd.MixtureSameFamily(
    mixture_distribution=tfd.Categorical(
        probs=[0.4, 0.1, 0.2, 0.3]
    ),
    components_distribution=tfd.Normal(
      loc=[3., 4., 5., 6.],
      scale=[0.1, 0.5, 0.5, .1])
)
In [61]:
n = 10000
xs = gmm.sample(n)
In [62]:
sns.distplot(xs)
pass
notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_85_0.png

Transformations

In [63]:
[x for x in dir(tfp.bijectors) if x[0].isupper()]
Out[63]:
['AbsoluteValue',
 'Affine',
 'AffineLinearOperator',
 'AffineScalar',
 'AutoregressiveLayer',
 'BatchNormalization',
 'Bijector',
 'Blockwise',
 'Chain',
 'CholeskyOuterProduct',
 'CholeskyToInvCholesky',
 'ConditionalBijector',
 'DiscreteCosineTransform',
 'Exp',
 'Expm1',
 'FillTriangular',
 'Gumbel',
 'Identity',
 'Inline',
 'Invert',
 'IteratedSigmoidCentered',
 'Kumaraswamy',
 'MaskedAutoregressiveFlow',
 'MatrixInverseTriL',
 'MatvecLU',
 'NormalCDF',
 'Ordered',
 'Permute',
 'PowerTransform',
 'RealNVP',
 'Reciprocal',
 'Reshape',
 'ScaleTriL',
 'Sigmoid',
 'SinhArcsinh',
 'SoftmaxCentered',
 'Softplus',
 'Softsign',
 'Square',
 'Tanh',
 'TransformDiagonal',
 'Transpose',
 'Weibull']
In [64]:
lognormal = tfp.bijectors.Exp()(tfd.Normal(0, 0.5))
In [65]:
xs = lognormal.sample(1000)
sns.distplot(xs)
xp = np.linspace(tf.reduce_min(xs), tf.reduce_max(xs), 100)
plt.plot(xp, tfd.LogNormal(loc=0, scale=0.5).prob(xp))
pass
W0417 16:08:27.311493 4643866048 deprecation_wrapper.py:76] From /Users/cliburn/anaconda3/lib/python3.6/site-packages/tensorflow_probability/python/bijectors/bijector.py:992: The name shape is deprecated. Please use compat.v1.shape instead.

W0417 16:08:27.312743 4643866048 deprecation_wrapper.py:76] From /Users/cliburn/anaconda3/lib/python3.6/site-packages/tensorflow_probability/python/bijectors/bijector.py:1283: The name size is deprecated. Please use compat.v1.size instead.

notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_89_1.png

Regression

In [66]:
xs = tf.Variable([0., 1., 2., 5., 6., 8.])
ys = tf.sin(xs) + tfd.Normal(loc=0, scale=0.5).sample(xs.shape[0])
In [67]:
xs.shape, ys.shape
Out[67]:
(TensorShape([6]), TensorShape([6]))
In [68]:
xs.numpy()
Out[68]:
array([0., 1., 2., 5., 6., 8.], dtype=float32)
In [69]:
ys.numpy()
Out[69]:
array([ 0.31766635,  0.5954742 ,  0.7167998 , -0.06919491, -0.2230027 ,
        1.228898  ], dtype=float32)
In [70]:
xp = tf.linspace(-1., 9., 100)[:, None]
plt.scatter(xs.numpy(), ys.numpy())
plt.plot(xp, tf.sin(xp))
pass
notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_95_0.png
In [71]:
kernel = tfp.positive_semidefinite_kernels.ExponentiatedQuadratic(length_scale=1.5)
reg = tfd.GaussianProcessRegressionModel(
    kernel, xp[:, tf.newaxis], xs[:, tf.newaxis], ys
)
In [72]:
ub, lb = reg.mean() + [2*reg.stddev(), -2*reg.stddev()]
plt.fill_between(xp[..., 0], ub[...,0], lb[...,0], alpha=0.2)
plt.plot(xp, reg.mean(), c='red', linewidth=2)
plt.scatter(xs[:], ys[:], s=50, c='k')
pass
notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_97_0.png

Modeling

Sampling from a normal distribuiton using HMC (prior predictive samples)

In [73]:
[x for x in dir(tfp.mcmc) if x[0].isupper()]
Out[73]:
['CheckpointableStatesAndTrace',
 'HamiltonianMonteCarlo',
 'MetropolisAdjustedLangevinAlgorithm',
 'MetropolisHastings',
 'RandomWalkMetropolis',
 'ReplicaExchangeMC',
 'SimpleStepSizeAdaptation',
 'SliceSampler',
 'StatesAndTrace',
 'TransformedTransitionKernel',
 'TransitionKernel',
 'UncalibratedHamiltonianMonteCarlo',
 'UncalibratedLangevin',
 'UncalibratedRandomWalk']
In [74]:
dir(tfp.vi)
Out[74]:
['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_allowed_symbols',
 'amari_alpha',
 'arithmetic_geometric',
 'chi_square',
 'csiszar_vimco',
 'csiszar_vimco_helper',
 'dual_csiszar_function',
 'jeffreys',
 'jensen_shannon',
 'kl_forward',
 'kl_reverse',
 'log1p_abs',
 'modified_gan',
 'monte_carlo_csiszar_f_divergence',
 'pearson',
 'squared_hellinger',
 'symmetrized_csiszar_function',
 't_power',
 'total_variation',
 'triangular']
In [75]:
from tensorflow_probability import edward2 as ed
In [76]:
# From example in help docs

def unnormalized_log_prob(x):
    return -x - x**2.

# Initialize the HMC transition kernel.
num_results = int(1e2)
num_burnin_steps = int(1e2)
adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation(
    tfp.mcmc.HamiltonianMonteCarlo(
        target_log_prob_fn=unnormalized_log_prob,
        num_leapfrog_steps=3,
        step_size=1.),
    num_adaptation_steps=int(num_burnin_steps * 0.8))

# Run the chain (with burn-in).
samples, is_accepted = tfp.mcmc.sample_chain(
    num_results=num_results,
    num_burnin_steps=num_burnin_steps,
    current_state=1.,
    kernel=adaptive_hmc,
    trace_fn=lambda _, pkr: pkr.inner_results.is_accepted)

sample_mean = tf.reduce_mean(samples)
sample_stddev = tf.math.reduce_std(samples)
W0417 16:08:28.351274 4643866048 deprecation_wrapper.py:76] From /Users/cliburn/anaconda3/lib/python3.6/site-packages/tensorflow_probability/python/mcmc/hmc.py:784: The name zeros_like is deprecated. Please use compat.v1.zeros_like instead.

W0417 16:08:28.354593 4643866048 deprecation_wrapper.py:76] From /Users/cliburn/anaconda3/lib/python3.6/site-packages/tensorflow_probability/python/mcmc/metropolis_hastings.py:272: The name ones_like is deprecated. Please use compat.v1.ones_like instead.

In [77]:
sample_mean
Out[77]:
<tf.Tensor: id=136175, shape=(), dtype=float32, numpy=-0.6822989>
In [78]:
sample_stddev
Out[78]:
<tf.Tensor: id=136182, shape=(), dtype=float32, numpy=0.7804036>
In [79]:
sns.distplot(samples)
plt.axvline(sample_mean.numpy(), c='red')
plt.plot([sample_mean - 2*sample_stddev, sample_mean + 2*sample_stddev],
         [0.01, 0.01], c='k', linewidth=3)
pass
notebook/../../build/doctrees/nbsphinx/notebook_S16B_TF2_105_0.png
In [ ]: