Regression in TensorFlow

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import h5py
warnings.resetwarnings()
warnings.simplefilter(action='ignore', category=ImportWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=ResourceWarning)
In [3]:
import tensorflow as tf

Steps in fittting a model

  • Define model variables (parameters) and placeholders (data)
  • Define the loss function
  • Choose an optimizer to minimize the loss
  • Start a session
    • Initialize global variables
    • Run the optimizer for \(n\) steps or epochs, feeding in appropriate data

Linear Regression

In [4]:
np.random.seed(123)
In [5]:
N = 10
W_true = 2
b_true = 1
X_obs = np.arange(N).reshape((-1,1))
eps = np.random.normal(0, 1, (N, 1))
y_obs = np.reshape(W_true * X_obs + b_true + eps, (-1, 1))
In [6]:
plt.scatter(X_obs, y_obs)
plt.plot(X_obs, W_true * X_obs + b_true, c='red')
pass
../_images/notebooks_S16B_Regression_In_TensorFlow_9_0.png
In [7]:
X = tf.placeholder(tf.float32, (N, 1))
y = tf.placeholder(tf.float32, (N, 1))
W = tf.Variable(tf.random_normal((1,1)))
b = tf.Variable(tf.random_normal((1,)))
yhat = tf.matmul(X, W) + b
loss = tf.reduce_sum(tf.square(y - yhat))

train_op = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
init_op = tf.global_variables_initializer()
In [8]:
niter = 1001

with tf.Session() as sess:
    sess.run(init_op)
    for i in range(niter):
        _, weights, bias, l = sess.run([train_op, W, b, loss], feed_dict={X: X_obs, y: y_obs})
        if i % 100 == 0:
            print('%03d\t%6.2f\t%6.2f\t%6.2f' % (i, weights[0][0], bias[0], l))
000       0.06    1.35  984.14
100       1.92    1.25   16.04
200       1.95    1.03   15.53
300       1.97    0.90   15.36
400       1.98    0.83   15.31
500       1.99    0.79   15.29
600       1.99    0.77   15.29
700       2.00    0.75   15.29
800       2.00    0.75   15.28
900       2.00    0.74   15.28
1000      2.00    0.74   15.28

Logistic Regression

We will use logistic regreesion to predict entry to graduate school based on GRE, GPA and rank of undegraduate college by prestige (1 = highest, 4= lowest).

In [9]:
import pandas as pd
In [10]:
df = pd.read_csv('https://stats.idre.ucla.edu/stat/data/binary.csv')
In [11]:
df.head()
Out[11]:
admit gre gpa rank
0 0 380 3.61 3
1 1 660 3.67 3
2 1 800 4.00 1
3 1 640 3.19 4
4 0 520 2.93 4
In [12]:
df = pd.get_dummies(df, columns=['rank'], drop_first=True)
df.head()
Out[12]:
admit gre gpa rank_2 rank_3 rank_4
0 0 380 3.61 0 1 0
1 1 660 3.67 0 1 0
2 1 800 4.00 0 0 0
3 1 640 3.19 0 0 1
4 0 520 2.93 0 0 1

Reset the data flow graph.

In [13]:
N = df.shape[0]
X = tf.placeholder(tf.float32, (N, 5))
y = tf.placeholder(tf.float32, (N, 1))

W = tf.Variable(tf.random_normal((5,1)))
b = tf.Variable(tf.random_normal((1,)))

yhat = tf.matmul(X, W) + b
In [14]:
loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=yhat, labels=y))
In [15]:
train_op = tf.train.AdamOptimizer().minimize(loss)
init_op = tf.global_variables_initializer()
In [16]:
niter = 25001

with tf.Session() as sess:
    sess.run(init_op)
    for i in range(niter):
        _, weights, bias, l = sess.run([train_op, W, b, loss], feed_dict={X: df.iloc[:, 1:], y: df.iloc[:, 0:1]})
        if i % 5000 == 0:
            print((i, weights.T[0], bias[0], l))
(0, array([-1.3257291 , -0.302939  ,  1.9480195 , -0.17506851,  0.37787107],
      dtype=float32), -0.5002541, 104295.26)
(5000, array([-0.00137983,  0.17184952,  0.4647249 , -0.5058274 , -0.73989314],
      dtype=float32), -0.38277972, 249.21332)
(10000, array([ 0.00200241,  0.35402513, -0.79581696, -1.4096469 , -1.6802471 ],
      dtype=float32), -2.1872034, 230.55487)
(15000, array([ 0.00223551,  0.74499196, -0.6892415 , -1.3469222 , -1.5659412 ],
      dtype=float32), -3.7580762, 229.27982)
(20000, array([ 0.00226194,  0.79895586, -0.67662704, -1.340773  , -1.5527016 ],
      dtype=float32), -3.9700239, 229.25891)
(25000, array([ 0.00225878,  0.80363715, -0.6757183 , -1.3404499 , -1.551768  ],
      dtype=float32), -3.9884007, 229.25922)

R fit for comparison

## Call:
## glm(formula = admit ~ gre + gpa + rank, family = "binomial",
##     data = mydata)
##
## Deviance Residuals:
##    Min      1Q  Median      3Q     Max
## -1.627  -0.866  -0.639   1.149   2.079
##
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.98998    1.13995   -3.50  0.00047 ***
## gre          0.00226    0.00109    2.07  0.03847 *
## gpa          0.80404    0.33182    2.42  0.01539 *
## rank2       -0.67544    0.31649   -2.13  0.03283 *
## rank3       -1.34020    0.34531   -3.88  0.00010 ***
## rank4       -1.55146    0.41783   -3.71  0.00020 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
##     Null deviance: 499.98  on 399  degrees of freedom
## Residual deviance: 458.52  on 394  degrees of freedom
## AIC: 470.5
##
## Number of Fisher Scoring iterations: 4