Regression in TensorFlow¶

In [1]:

%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np

In [2]:

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import h5py
warnings.resetwarnings()
warnings.simplefilter(action='ignore', category=ImportWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=ResourceWarning)

In [3]:

import tensorflow as tf

Steps in fittting a model¶

Define model variables (parameters) and placeholders (data)
Define the loss function
Choose an optimizer to minimize the loss
Start a session
- Initialize global variables
- Run the optimizer for \(n\) steps or epochs, feeding in appropriate data

Linear Regression¶

In [4]:

np.random.seed(123)

In [5]:

N = 10
W_true = 2
b_true = 1
X_obs = np.arange(N).reshape((-1,1))
eps = np.random.normal(0, 1, (N, 1))
y_obs = np.reshape(W_true * X_obs + b_true + eps, (-1, 1))

In [6]:

plt.scatter(X_obs, y_obs)
plt.plot(X_obs, W_true * X_obs + b_true, c='red')
pass

../_images/notebooks_S16B_Regression_In_TensorFlow_9_0.png

In [7]:

X = tf.placeholder(tf.float32, (N, 1))
y = tf.placeholder(tf.float32, (N, 1))
W = tf.Variable(tf.random_normal((1,1)))
b = tf.Variable(tf.random_normal((1,)))
yhat = tf.matmul(X, W) + b
loss = tf.reduce_sum(tf.square(y - yhat))

train_op = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
init_op = tf.global_variables_initializer()

In [8]:

niter = 1001

with tf.Session() as sess:
    sess.run(init_op)
    for i in range(niter):
        _, weights, bias, l = sess.run([train_op, W, b, loss], feed_dict={X: X_obs, y: y_obs})
        if i % 100 == 0:
            print('%03d\t%6.2f\t%6.2f\t%6.2f' % (i, weights[0][0], bias[0], l))

000       0.06    1.35  984.14
100       1.92    1.25   16.04
200       1.95    1.03   15.53
300       1.97    0.90   15.36
400       1.98    0.83   15.31
500       1.99    0.79   15.29
600       1.99    0.77   15.29
700       2.00    0.75   15.29
800       2.00    0.75   15.28
900       2.00    0.74   15.28
1000      2.00    0.74   15.28

Logistic Regression¶

We will use logistic regreesion to predict entry to graduate school based on GRE, GPA and rank of undegraduate college by prestige (1 = highest, 4= lowest).

In [9]:

import pandas as pd

In [10]:

df = pd.read_csv('https://stats.idre.ucla.edu/stat/data/binary.csv')

In [11]:

df.head()

Out[11]:

	admit	gre	gpa	rank
0	0	380	3.61	3
1	1	660	3.67	3
2	1	800	4.00	1
3	1	640	3.19	4
4	0	520	2.93	4

In [12]:

df = pd.get_dummies(df, columns=['rank'], drop_first=True)
df.head()

Out[12]:

	admit	gre	gpa	rank_3	rank_4
0	0	380	3.61	1	0
1	1	660	3.67	1	0
2	1	800	4.00	0	0
3	1	640	3.19	0	1
4	0	520	2.93	0	1

Reset the data flow graph.

In [13]:

N = df.shape[0]
X = tf.placeholder(tf.float32, (N, 5))
y = tf.placeholder(tf.float32, (N, 1))

W = tf.Variable(tf.random_normal((5,1)))
b = tf.Variable(tf.random_normal((1,)))

yhat = tf.matmul(X, W) + b

In [14]:

loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=yhat, labels=y))

In [15]:

train_op = tf.train.AdamOptimizer().minimize(loss)
init_op = tf.global_variables_initializer()

In [16]:

niter = 25001

with tf.Session() as sess:
    sess.run(init_op)
    for i in range(niter):
        _, weights, bias, l = sess.run([train_op, W, b, loss], feed_dict={X: df.iloc[:, 1:], y: df.iloc[:, 0:1]})
        if i % 5000 == 0:
            print((i, weights.T[0], bias[0], l))

(0, array([-1.3257291 , -0.302939  ,  1.9480195 , -0.17506851,  0.37787107],
      dtype=float32), -0.5002541, 104295.26)
(5000, array([-0.00137983,  0.17184952,  0.4647249 , -0.5058274 , -0.73989314],
      dtype=float32), -0.38277972, 249.21332)
(10000, array([ 0.00200241,  0.35402513, -0.79581696, -1.4096469 , -1.6802471 ],
      dtype=float32), -2.1872034, 230.55487)
(15000, array([ 0.00223551,  0.74499196, -0.6892415 , -1.3469222 , -1.5659412 ],
      dtype=float32), -3.7580762, 229.27982)
(20000, array([ 0.00226194,  0.79895586, -0.67662704, -1.340773  , -1.5527016 ],
      dtype=float32), -3.9700239, 229.25891)
(25000, array([ 0.00225878,  0.80363715, -0.6757183 , -1.3404499 , -1.551768  ],
      dtype=float32), -3.9884007, 229.25922)

R fit for comparison¶

## Call:
## glm(formula = admit ~ gre + gpa + rank, family = "binomial",
##     data = mydata)
##
## Deviance Residuals:
##    Min      1Q  Median      3Q     Max
## -1.627  -0.866  -0.639   1.149   2.079
##
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.98998    1.13995   -3.50  0.00047 ***
## gre          0.00226    0.00109    2.07  0.03847 *
## gpa          0.80404    0.33182    2.42  0.01539 *
## rank2       -0.67544    0.31649   -2.13  0.03283 *
## rank3       -1.34020    0.34531   -3.88  0.00010 ***
## rank4       -1.55146    0.41783   -3.71  0.00020 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
##     Null deviance: 499.98  on 399  degrees of freedom
## Residual deviance: 458.52  on 394  degrees of freedom
## AIC: 470.5
##
## Number of Fisher Scoring iterations: 4