Line search in gradient and Newton directions

[18]:
%matplotlib inline
[21]:
import matplotlib.pyplot as plt
from scipy.optimize import minimize_scalar
import numpy as np

Demo functions

[28]:
def f1(x):
    return x[0]**2 + x[1]**2
[3]:
def grad1(x):
    return np.array([2*x[0], 2*x[1]]).reshape([-1,1])
[4]:
def hess1(x):
    return np.array([
        [2, 0],
        [0, 2]
    ])
[31]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + Y**2
plt.contour(X, Y, Z, 10)
plt.axis('square')
pass
../_images/notebooks_S09E_Optimization_Line_Search_7_0.png
[27]:
np.c_[X.ravel(), Y.ravel()].shape
[27]:
(10000, 2)
[5]:
def f2(x):
    return x[0]**2 + 10*x[1]**2
[6]:
def grad2(x):
    return np.array([2*x[0], 20*x[1]]).reshape([-1,1])
[7]:
def hess2(x):
    return np.array([
        [2, 0],
        [0, 20]
    ])
[32]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + 10*Y**2
plt.contour(X, Y, Z, 10)
plt.axis('square')
pass
../_images/notebooks_S09E_Optimization_Line_Search_12_0.png

Gradient descent with step size found by numerical minimization

[87]:
def gd(x, f, grad, max_iter=10):
    orbit = np.zeros((max_iter+1, len(x)))
    orbit[0] = x.ravel()
    for i in range(max_iter):
        res = minimize_scalar(lambda alpha: f(x - alpha * grad(x)))
        alpha = res.x
        x = x - alpha * grad(x)
        orbit[i+1] = x.ravel()
    return orbit

Gradient descent with analytic step size for quadratic function

[95]:
def gd_alt(x, f, grad, hess, max_iter=10):
    orbit = np.zeros((max_iter+1, len(x)))
    orbit[0] = x.ravel()
    for i in range(max_iter):
        p = -grad(x)
        alpha = (p.T @ p)/(p.T @ hess(x) @ p)
        x = x - alpha * grad(x)
        orbit[i+1] = x.ravel()
    return orbit
[96]:
x0 = np.array([4,3]).reshape([-1,1])
[97]:
orbit1 = gd(x0, f1, grad1, max_iter=5)
[98]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + Y**2
plt.contour(X, Y, Z, 10)
plt.plot(orbit1[:, 0], orbit1[:, 1], '-')
plt.scatter(orbit1[0:1, 0], orbit1[0:1, 1])
plt.axis('square')
pass
../_images/notebooks_S09E_Optimization_Line_Search_19_0.png
[99]:
orbit1a = gd_alt(x0, f1, grad1, hess1, max_iter=5)
/usr/local/lib/python3.6/site-packages/ipykernel_launcher.py:6: RuntimeWarning: invalid value encountered in true_divide

[100]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + Y**2
plt.contour(X, Y, Z, 10)
plt.plot(orbit1a[:, 0], orbit1a[:, 1], '-')
plt.scatter(orbit1a[0:1, 0], orbit1a[0:1, 1])
plt.axis('square')
pass
../_images/notebooks_S09E_Optimization_Line_Search_21_0.png
[52]:
orbit2 = gd(x0, f2, grad2, max_iter=5)
[74]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + 10*Y**2
plt.contour(X, Y, Z, 10)
plt.plot(orbit2[:, 0], orbit2[:, 1], '-')
plt.scatter(orbit2[0:1, 0], orbit2[0:1, 1])
plt.axis('square')
pass
../_images/notebooks_S09E_Optimization_Line_Search_23_0.png
[101]:
orbit2a = gd_alt(x0, f2, grad2, hess2, max_iter=5)
[102]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + 10*Y**2
plt.contour(X, Y, Z, 10)
plt.plot(orbit2a[:, 0], orbit2a[:, 1], '-')
plt.scatter(orbit2a[0:1, 0], orbit2a[0:1, 1])
plt.axis('square')
pass
../_images/notebooks_S09E_Optimization_Line_Search_25_0.png

Line search in Newton direction with analytic step size

[65]:
def newton(x, f, grad, hess, max_iter=5):
    orbit = np.zeros((max_iter+1, len(x)))
    orbit[0] = x.ravel()
    for i in range(max_iter):
        x = x - np.linalg.inv(hess(x)) @ grad(x)
        orbit[i+1] = x.ravel()
    return orbit
[66]:
orbit3 = newton(x0, f1, grad1, hess1, max_iter=5)
[72]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + Y**2
plt.contour(X, Y, Z, 10)
plt.plot(orbit3[:, 0], orbit3[:, 1], '-')
plt.scatter(orbit3[0:1, 0], orbit3[0:1, 1])
plt.axis('square')
pass
../_images/notebooks_S09E_Optimization_Line_Search_29_0.png
[68]:
orbit4 = newton(x0, f2, grad2, hess2, max_iter=5)
[73]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + 10*Y**2
plt.contour(X, Y, Z, 10)
plt.plot(orbit4[:, 0], orbit4[:, 1], '-')
plt.scatter(orbit4[0:1, 0], orbit4[0:1, 1])
plt.axis('square')
pass
../_images/notebooks_S09E_Optimization_Line_Search_31_0.png
[ ]: