Step by step Optimization : understanding gradient based method#

%matplotlib notebook
#%matplotlib widget
import matplotlib.pyplot as plt
import numpy as np
from scipy import optimize
import ipywidgets as ipw
from matplotlib import cm
import ipywidgets as widgets

Cost functions#

Goldstein–Price function#

def cost(p):
    global counterCostCall
    """
    Goldstein–Price.
    https://en.wikipedia.org/wiki/Test_functions_for_optimization    """

    x, y = p
    f = (
        1
        + (
            (x + y + 1) ** 2
            * (19 - 14 * x + 3 * x**2 - 14 * y + 6 * x * y + 3 * y**2)
        )
    ) * (
        30
        + (2 * x - 3 * y) ** 2
        * (18 - 32 * x + 12 * x**2 + 48 * y - 36 * x * y + 27 * y**2)
    )
    return f / 80000.0


counterCostCall = 0
Nx, Ny = 100, 100
x = np.linspace(-1.5, 1.5, Nx)
y = np.linspace(-1.5, 1.5, Ny)
X, Y = np.meshgrid(x, y)
zf = np.array([X.flatten(), Y.flatten()])
Z = cost(zf).reshape(Nx, Ny)
Z = np.where(Z > 0.3, np.nan, Z)
mask = np.ones_like(Z)

Himmelblau cost function#

def cost(p):
    global counterCostCall
    """
    Himmelblau cost function.
    https://en.wikipedia.org/wiki/Himmelblau%27s_function
    """

    x, y = p
    return (x**2 + y - 11) ** 2 + (x + y**2 - 7) ** 2


counterCostCall = 0
Nx, Ny = 100, 100
x = np.linspace(-5.0, 5.0, Nx)
y = np.linspace(-5.0, 5.0, Ny)
X, Y = np.meshgrid(x, y)
zf = np.array([X.flatten(), Y.flatten()])
Z = cost(zf).reshape(Nx, Ny)
Z = np.where(Z > 200.0, np.nan, Z)

mask = np.ones_like(Z)
P = np.zeros((1000, 2)) * np.nan
V_down_hill_dir = np.zeros((1000, 2)) * np.nan


P[0] = 3.0, -0.0


def maj_mask_all(mask, Pis):
    for Pi in Pis:
        if ~np.isnan(Pi[0]):
            mask = maj_mask(mask, Pi)
    return mask


def maj_mask(mask, Pi):
    R = 0.5
    mask[np.sqrt((X - Pi[0]) ** 2 + (Y - Pi[1]) ** 2) < R] = np.nan
    return mask


mask = maj_mask(mask, P[0])


def init():
    global mask, P, V_down_hill_dir
    mask = np.ones_like(Z)
    P = np.zeros((1000, 2)) * np.nan
    V_down_hill_dir = np.zeros((1000, 2)) * np.nan
    P[0] = -0.0, -4.0

Gradiant based methode#

Down hill methode#

def grad_P(Pi):
    # Gradient computation using finite difference approach
    eps = 0.001
    Pdx = Pi.copy()
    Pdx[0] = Pdx[0] + eps
    gradx = (cost(Pdx) - cost(Pi)) / eps

    Pdy = Pi.copy()
    Pdy[1] = Pdy[1] + eps
    grady = (cost(Pdy) - cost(Pi)) / eps
    return np.array([gradx, grady])


def optim_grad():
    global counter, P, V_down_hill_dir
    # compute gradient at curent point
    g = grad_P(P[counter])
    g_normalized = g / np.linalg.norm(g)
    # compute down hill direction
    down_hill_dir = -g_normalized
    V_down_hill_dir[counter] = down_hill_dir
    step = 0.1
    # compute next point with a constante step in down hill dir
    P[counter + 1] = P[counter] + step * down_hill_dir
    counter += 1