Step by step Optimization : understanding gradient based method#
%matplotlib notebook
#%matplotlib widget
import matplotlib.pyplot as plt
import numpy as np
from scipy import optimize
import ipywidgets as ipw
from matplotlib import cm
import ipywidgets as widgets
Cost functions#
Goldstein–Price function#
def cost(p):
global counterCostCall
"""
Goldstein–Price.
https://en.wikipedia.org/wiki/Test_functions_for_optimization """
x, y = p
f = (
1
+ (
(x + y + 1) ** 2
* (19 - 14 * x + 3 * x**2 - 14 * y + 6 * x * y + 3 * y**2)
)
) * (
30
+ (2 * x - 3 * y) ** 2
* (18 - 32 * x + 12 * x**2 + 48 * y - 36 * x * y + 27 * y**2)
)
return f / 80000.0
counterCostCall = 0
Nx, Ny = 100, 100
x = np.linspace(-1.5, 1.5, Nx)
y = np.linspace(-1.5, 1.5, Ny)
X, Y = np.meshgrid(x, y)
zf = np.array([X.flatten(), Y.flatten()])
Z = cost(zf).reshape(Nx, Ny)
Z = np.where(Z > 0.3, np.nan, Z)
mask = np.ones_like(Z)
Himmelblau cost function#
def cost(p):
global counterCostCall
"""
Himmelblau cost function.
https://en.wikipedia.org/wiki/Himmelblau%27s_function
"""
x, y = p
return (x**2 + y - 11) ** 2 + (x + y**2 - 7) ** 2
counterCostCall = 0
Nx, Ny = 100, 100
x = np.linspace(-5.0, 5.0, Nx)
y = np.linspace(-5.0, 5.0, Ny)
X, Y = np.meshgrid(x, y)
zf = np.array([X.flatten(), Y.flatten()])
Z = cost(zf).reshape(Nx, Ny)
Z = np.where(Z > 200.0, np.nan, Z)
mask = np.ones_like(Z)
P = np.zeros((1000, 2)) * np.nan
V_down_hill_dir = np.zeros((1000, 2)) * np.nan
P[0] = 3.0, -0.0
def maj_mask_all(mask, Pis):
for Pi in Pis:
if ~np.isnan(Pi[0]):
mask = maj_mask(mask, Pi)
return mask
def maj_mask(mask, Pi):
R = 0.5
mask[np.sqrt((X - Pi[0]) ** 2 + (Y - Pi[1]) ** 2) < R] = np.nan
return mask
mask = maj_mask(mask, P[0])
def init():
global mask, P, V_down_hill_dir
mask = np.ones_like(Z)
P = np.zeros((1000, 2)) * np.nan
V_down_hill_dir = np.zeros((1000, 2)) * np.nan
P[0] = -0.0, -4.0
Gradiant based methode#
Down hill methode#
def grad_P(Pi):
# Gradient computation using finite difference approach
eps = 0.001
Pdx = Pi.copy()
Pdx[0] = Pdx[0] + eps
gradx = (cost(Pdx) - cost(Pi)) / eps
Pdy = Pi.copy()
Pdy[1] = Pdy[1] + eps
grady = (cost(Pdy) - cost(Pi)) / eps
return np.array([gradx, grady])
def optim_grad():
global counter, P, V_down_hill_dir
# compute gradient at curent point
g = grad_P(P[counter])
g_normalized = g / np.linalg.norm(g)
# compute down hill direction
down_hill_dir = -g_normalized
V_down_hill_dir[counter] = down_hill_dir
step = 0.1
# compute next point with a constante step in down hill dir
P[counter + 1] = P[counter] + step * down_hill_dir
counter += 1
Down hill methode with linear search#
def optim_grad_lin_search():
global counter, P, V_down_hill_dir
# compute gradient at curent point
g = grad_P(P[counter])
g_normalized = g / np.linalg.norm(g)
# compute down hill direction
down_hill_dir = -g_normalized
V_down_hill_dir[counter] = down_hill_dir
step = 0.1
go = True
while go:
P_new = P[counter] + step * down_hill_dir
if cost(P_new) > cost(P[counter]):
go = False
else:
P[counter + 1] = P_new
counter += 1
init()
mask = maj_mask_all(mask, P)
plt.figure(figsize=(8, 6))
title = plt.title("")
cost_map = plt.contourf(X, Y, np.log(Z), 50, cmap=cm.terrain)
plt.colorbar(cost_map)
plt.contour(X, Y, np.log(Z), 50, cmap=cm.gray, zorder=2.0)
(line,) = plt.plot(P[:, 0], P[:, 1], "og-", label="Path")
(line0,) = plt.plot(P[:1, 0], P[:1, 1], "r*", label="Start")
v = plt.quiver(
P[:, 0],
P[:, 1],
V_down_hill_dir[:, 0],
V_down_hill_dir[:, 1],
scale=10,
zorder=2.4,
width=0.005,
label="DownHill Dir",
angles="xy",
)
p = [plt.contourf(X, Y, mask, 2, cmap=cm.gray, zorder=2.3, alpha=0.75)]
plt.legend()
counter = 0
@ipw.interact_manual()
def run_step():
global P, counter, mask, v, V_down_hill_dir
optim_grad()
#optim_grad_lin_search()
mask = maj_mask_all(mask, P)
for tp in p[0].collections:
tp.remove()
v.remove()
p[0] = plt.contourf(X, Y, mask, 2, cmap=cm.gray, zorder=2.3, alpha=0.75)
v = plt.quiver(
P[:, 0],
P[:, 1],
V_down_hill_dir[:, 0],
V_down_hill_dir[:, 1],
scale=10,
zorder=2.4,
width=0.005,
label="DownHill Dir",
angles="xy",
)
line.set_xdata(P[:, 0])
line.set_ydata(P[:, 1])
# CostOverIt.set_ydata(cost(P.T))