Posts about Gradient Descent

Gradient descent implementation from scratch


This post aims to introduce how to implement Gradient Descent from scratch.




In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Define the function to be explored

In [2]:
def surface_function(x):
    return np.sum(x**2)
In [63]:
delta = 0.025
x = np.arange(-3.0, 3.0, delta)
y = np.arange(-3.0, 3.0, delta)
X, Y = np.meshgrid(x, y)
Z = np.add(X**2, Y**2)
fig, ax = plt.subplots()
CS = ax.contour(X, Y, Z)
ax.clabel(CS, inline=1, fontsize=10);

Compute gradient given a function

In [36]:
def numerical_gradient(f, x0):
    h = 1e-5
    grad = np.zeros_like(x0)

    for i in range(x0.size):
        tmp = x0[i]
        # compute f(x0 + h)
        x0[i] = tmp + h
        fx0_p = f(x0)

        # compute f(x0 - h)
        x0[i] = tmp - h
        fx0_m = f(x0)
        grad[i] = (fx0_p - fx0_m) / (2*h)
        x0[i] = tmp
    return grad
In [37]:
numerical_gradient(surface_function, np.array([0., 1.]))
array([0., 2.])

Define gradient descent function

In [84]:
class GradientDescent:
    def __init__(self):
        self.trace = []
    def gradient_descent(self, f, init_x, lr=0.01, steps=200):
        x = init_x
        for i in range(steps):
            gradient = numerical_gradient(f, x)
            x -= lr * gradient

        return x
In [85]:
init = np.array([3., 3.])
gd = GradientDescent()
gd.gradient_descent(surface_function, init)
array([0.05276384, 0.05276384])

Plot the trace on the surface function

In [89]:
df_trace = pd.DataFrame(gd.trace, columns=['x', 'y']).reset_index()
index x y
0 0 3.000000 3.000000
1 1 2.940000 2.940000
2 2 2.881200 2.881200
3 3 2.823576 2.823576
4 4 2.767104 2.767104
In [106]:
fig, ax = plt.subplots(figsize=(8, 5))
CS = ax.contour(X, Y, Z)
ax.clabel(CS, inline=1, fontsize=10);
df_trace.iloc[::15, :].plot(kind='scatter', x='x', y='y', c='index', cmap='Reds', marker='x', s=200, 
              ax=ax, title='Trace of Gradient Descent');