Line search in gradient and Newton directions¶
In [18]:
%matplotlib inline
In [21]:
import matplotlib.pyplot as plt
from scipy.optimize import minimize_scalar
import numpy as np
Demo functions¶
In [28]:
def f1(x):
return x[0]**2 + x[1]**2
In [3]:
def grad1(x):
return np.array([2*x[0], 2*x[1]]).reshape([-1,1])
In [4]:
def hess1(x):
return np.array([
[2, 0],
[0, 2]
])
In [31]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + Y**2
plt.contour(X, Y, Z, 10)
plt.axis('square')
pass
data:image/s3,"s3://crabby-images/517b7/517b7d6a39e1cfc225c09ce8b3a28fff47b090c4" alt="notebook/../../build/doctrees/nbsphinx/notebook_S09E_Optimization_Line_Search_7_0.png"
In [27]:
np.c_[X.ravel(), Y.ravel()].shape
Out[27]:
(10000, 2)
In [5]:
def f2(x):
return x[0]**2 + 10*x[1]**2
In [6]:
def grad2(x):
return np.array([2*x[0], 20*x[1]]).reshape([-1,1])
In [7]:
def hess2(x):
return np.array([
[2, 0],
[0, 20]
])
In [32]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + 10*Y**2
plt.contour(X, Y, Z, 10)
plt.axis('square')
pass
data:image/s3,"s3://crabby-images/bd743/bd743cf569b4e896ef7a4174c82a67037302d526" alt="notebook/../../build/doctrees/nbsphinx/notebook_S09E_Optimization_Line_Search_12_0.png"
Gradient descent with step size found by numerical minimization¶
In [87]:
def gd(x, f, grad, max_iter=10):
orbit = np.zeros((max_iter+1, len(x)))
orbit[0] = x.ravel()
for i in range(max_iter):
res = minimize_scalar(lambda alpha: f(x - alpha * grad(x)))
alpha = res.x
x = x - alpha * grad(x)
orbit[i+1] = x.ravel()
return orbit
Gradient descent with analytic step size for quadratic function¶
In [95]:
def gd_alt(x, f, grad, hess, max_iter=10):
orbit = np.zeros((max_iter+1, len(x)))
orbit[0] = x.ravel()
for i in range(max_iter):
p = -grad(x)
alpha = (p.T @ p)/(p.T @ hess(x) @ p)
x = x - alpha * grad(x)
orbit[i+1] = x.ravel()
return orbit
In [96]:
x0 = np.array([4,3]).reshape([-1,1])
In [97]:
orbit1 = gd(x0, f1, grad1, max_iter=5)
In [98]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + Y**2
plt.contour(X, Y, Z, 10)
plt.plot(orbit1[:, 0], orbit1[:, 1], '-')
plt.scatter(orbit1[0:1, 0], orbit1[0:1, 1])
plt.axis('square')
pass
data:image/s3,"s3://crabby-images/8349a/8349ad460d1d125d5536a99e77f3557b9fe1e66d" alt="notebook/../../build/doctrees/nbsphinx/notebook_S09E_Optimization_Line_Search_19_0.png"
In [99]:
orbit1a = gd_alt(x0, f1, grad1, hess1, max_iter=5)
/usr/local/lib/python3.6/site-packages/ipykernel_launcher.py:6: RuntimeWarning: invalid value encountered in true_divide
In [100]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + Y**2
plt.contour(X, Y, Z, 10)
plt.plot(orbit1a[:, 0], orbit1a[:, 1], '-')
plt.scatter(orbit1a[0:1, 0], orbit1a[0:1, 1])
plt.axis('square')
pass
data:image/s3,"s3://crabby-images/1dd25/1dd251cc44db4c98c3a5877a2c2e37eda22a06e4" alt="notebook/../../build/doctrees/nbsphinx/notebook_S09E_Optimization_Line_Search_21_0.png"
In [52]:
orbit2 = gd(x0, f2, grad2, max_iter=5)
In [74]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + 10*Y**2
plt.contour(X, Y, Z, 10)
plt.plot(orbit2[:, 0], orbit2[:, 1], '-')
plt.scatter(orbit2[0:1, 0], orbit2[0:1, 1])
plt.axis('square')
pass
data:image/s3,"s3://crabby-images/7b330/7b33093762584360b1881f2d4c6cc09fd2bf3eb3" alt="notebook/../../build/doctrees/nbsphinx/notebook_S09E_Optimization_Line_Search_23_0.png"
In [101]:
orbit2a = gd_alt(x0, f2, grad2, hess2, max_iter=5)
In [102]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + 10*Y**2
plt.contour(X, Y, Z, 10)
plt.plot(orbit2a[:, 0], orbit2a[:, 1], '-')
plt.scatter(orbit2a[0:1, 0], orbit2a[0:1, 1])
plt.axis('square')
pass
data:image/s3,"s3://crabby-images/76b2b/76b2b108cc2e8e5dc51be1eaae5d8d2e59e99cd2" alt="notebook/../../build/doctrees/nbsphinx/notebook_S09E_Optimization_Line_Search_25_0.png"
Line search in Newton direction with analytic step size¶
In [65]:
def newton(x, f, grad, hess, max_iter=5):
orbit = np.zeros((max_iter+1, len(x)))
orbit[0] = x.ravel()
for i in range(max_iter):
x = x - np.linalg.inv(hess(x)) @ grad(x)
orbit[i+1] = x.ravel()
return orbit
In [66]:
orbit3 = newton(x0, f1, grad1, hess1, max_iter=5)
In [72]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + Y**2
plt.contour(X, Y, Z, 10)
plt.plot(orbit3[:, 0], orbit3[:, 1], '-')
plt.scatter(orbit3[0:1, 0], orbit3[0:1, 1])
plt.axis('square')
pass
data:image/s3,"s3://crabby-images/e92fc/e92fcba412f5784fc246bcb37c02a7be64187128" alt="notebook/../../build/doctrees/nbsphinx/notebook_S09E_Optimization_Line_Search_29_0.png"
In [68]:
orbit4 = newton(x0, f2, grad2, hess2, max_iter=5)
In [73]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + 10*Y**2
plt.contour(X, Y, Z, 10)
plt.plot(orbit4[:, 0], orbit4[:, 1], '-')
plt.scatter(orbit4[0:1, 0], orbit4[0:1, 1])
plt.axis('square')
pass
data:image/s3,"s3://crabby-images/f3c9f/f3c9f70661d4eb8c0c98ff9f93936ece94b54053" alt="notebook/../../build/doctrees/nbsphinx/notebook_S09E_Optimization_Line_Search_31_0.png"
In [ ]: