benchmarkexp/main.py

import os
import torch
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D

from dmadam import DMAdam

plt.rcParams['figure.dpi'] = 140

def get_function_val(name, x, y):
    if name == "Sphere": return x**2 + y**2
    elif name == "Booth": return (x + 2*y - 7)**2 + (2*x + y - 5)**2
    elif name == "Matyas": return 0.26*(x**2 + y**2) - 0.48*x*y
    elif name == "Beale":
        return (1.5 - x + x*y)**2 + (2.25 - x + x*y**2)**2 + (2.625 - x + x*y**3)**2
    elif name == "Goldstein-Price":
        term1 = 1 + (x + y + 1)**2 * (19 - 14*x + 3*x**2 - 14*y + 6*x*y + 3*y**2)
        term2 = 30 + (2*x - 3*y)**2 * (18 - 32*x + 12*x**2 + 48*y - 36*x*y + 27*y**2)
        return term1 * term2
    elif name == "Bukin":
        term1 = 100 * (abs(y - 0.01 * x ** 2)) ** 0.5
        term2 = 0.01 * abs(x + 10)
        return term1 + term2
    return 0

def get_torch_func(name):
    if name == "Sphere": return lambda x: torch.sum(x ** 2)
    elif name == "Booth": return lambda x: (x[0] + 2*x[1] - 7)**2 + (2*x[0] + x[1] - 5)**2
    elif name == "Matyas": return lambda x: 0.26*(x[0]**2 + x[1]**2) - 0.48*x[0]*x[1]
    elif name == "Beale":
        return lambda x: (1.5 - x[0] + x[0]*x[1])**2 + (2.25 - x[0] + x[0]*x[1]**2)**2 + (2.625 - x[0] + x[0]*x[1]**3)**2
    elif name == "Goldstein-Price":
        def goldstein(x):
            x1, x2 = x[0], x[1]
            t1 = 1 + (x1 + x2 + 1)**2 * (19 - 14*x1 + 3*x1**2 - 14*x2 + 6*x1*x2 + 3*x2**2)
            t2 = 30 + (2*x1 - 3*x2)**2 * (18 - 32*x1 + 12*x1**2 + 48*x2 - 36*x1*x2 + 27*x2**2)
            return t1 * t2
        return goldstein
    elif name == "Bukin":
        def bukin(x):
            x1, x2 = x[0], x[1]
            t1 = 100 * torch.sqrt(torch.abs(x2 - 0.01 * x1 ** 2))
            t2 = 0.01 * torch.abs(x1 + 10)
            return t1 + t2
        return bukin
    return None

def run_trajectory(optimizer_name, func_name, start_point, iterations, lr, eta0):
    func = get_torch_func(func_name)
    params = torch.tensor(start_point, requires_grad=True, dtype=torch.float32)

    if optimizer_name == 'DMAdam':
        opt = DMAdam([params], lr=lr, eta0=eta0)
    elif optimizer_name == 'Adam':
        opt = torch.optim.Adam([params], lr=lr)
    elif optimizer_name == 'SGD':
        opt = torch.optim.SGD([params], lr=lr)
    elif optimizer_name == 'SGDM':
        opt = torch.optim.SGD([params], lr=lr, momentum=0.9)
    elif optimizer_name == 'Adagrad':
        opt = torch.optim.Adagrad([params], lr=lr)

    path = []
    with torch.no_grad():
        path.append([start_point[0], start_point[1], func(params).item()])

    for _ in range(iterations):
        opt.zero_grad()
        loss = func(params)
        loss.backward()
        opt.step()
        p_val = params.detach().cpu().numpy()
        path.append([p_val[0], p_val[1], loss.item()])

    return np.array(path)

def plot_3d_surface(ax, func_name, x_range, y_range, trajectories, global_min, view_angle):
    x = np.linspace(x_range[0], x_range[1], 150)
    y = np.linspace(y_range[0], y_range[1], 150)
    X, Y = np.meshgrid(x, y)
    Z = get_function_val(func_name, X, Y)

    z_min_val = np.min(Z)

    if func_name == "Goldstein-Price":
        clip_max = np.max(Z)
    elif func_name == "Beale":
        clip_max = 4000
    elif func_name == "Booth":
        clip_max = 800
    else:
        clip_max = np.max(Z)

    Z_plot = np.clip(Z, z_min_val, clip_max)

    surf = ax.plot_surface(X, Y, Z_plot, cmap=cm.viridis,
                           alpha=0.7, linewidth=0, antialiased=True)

    colors = {'SGD': 'red', 'Adagrad': 'orange', 'SGDM': 'green', 'Adam': 'blue', 'DMAdam': 'black'}
    styles = {'SGD': ':', 'Adagrad': ':', 'SGDM': '--', 'Adam': '--', 'DMAdam': '-'}

    for name, path in trajectories.items():
        path_z = np.clip(path[:, 2], z_min_val, clip_max)

        ax.plot(path[:, 0], path[:, 1], path_z,
                color=colors[name], label=name,
                linewidth=2.5, linestyle=styles[name], zorder=10)

        ax.scatter(path[-1, 0], path[-1, 1], path_z[-1], c=colors[name], marker='o', s=50, zorder=11)

    true_z = get_function_val(func_name, np.array(global_min[0]), np.array(global_min[1]))
    ax.scatter(global_min[0], global_min[1], max(true_z, z_min_val),
               c='red', marker='*', s=300, label='Global Min', zorder=20, edgecolors='white')

    ax.set_title(func_name, fontsize=14, pad=0)

    ax.view_init(elev=view_angle[0], azim=view_angle[1])
    ax.set_xlim(x_range)
    ax.set_ylim(y_range)
    ax.set_zlim(z_min_val, clip_max)
    ax.set_zticks([])

def main():
    iterations = 2000
    lr = 0.1
    lr_sgd = 0.01
    lr_sgdm = 0.01
    lr_gp = 0.1
    lr_bukin = 0.1

    eta0_normal = 3
    eta0_gp = 8
    eta0_bukin = 15

    benchmarks = [
        ("Sphere", [-3.0, 4.0], (0, 0), (-4, 4), (-4, 4), (60, -45), eta0_normal),
        ("Booth",  [-8.0, 8.0], (1, 3), (-10, 5), (-2, 10), (50, -140), eta0_normal),
        ("Matyas", [4.0, -4.0], (0, 0), (-6, 6), (-6, 6), (40, 45), eta0_normal),
        ("Beale",  [2.0, 2.0],  (3, 0.5), (-4, 4), (-4, 4), (50, -145), eta0_normal),
        ("Goldstein-Price", [-0.5, 1.5], (0, -1), (-2, 2), (-2, 2), (30, -60), eta0_gp),
        # ("Bukin", [-13, 2.5], (-10, 1), (-15, -3), (-3, 6), (35, -50), eta0_bukin)
    ]

    os.makedirs('results', exist_ok=True)

    for name, start_pt, glob_min, x_rng, y_rng, view, eta0 in benchmarks:
        print(f"Running {name} (Iter: {iterations}, Eta0: {eta0})...")

        cur_lr = lr_gp if name == "Goldstein-Price" else lr

        trajectories = {
            'SGD': run_trajectory('SGD', name, start_pt, iterations, lr_sgd, eta0),
            'Adagrad': run_trajectory('Adagrad', name, start_pt, iterations, cur_lr, eta0),
            'SGDM': run_trajectory('SGDM', name, start_pt, iterations, lr_sgdm, eta0),
            'Adam': run_trajectory('Adam', name, start_pt, iterations, cur_lr, eta0),
            'DMAdam': run_trajectory('DMAdam', name, start_pt, iterations, cur_lr, eta0),
        }

        fig = plt.figure(figsize=(10, 8))
        ax = fig.add_subplot(111, projection='3d')
        plot_3d_surface(ax, name, x_rng, y_rng, trajectories, glob_min, view)
        ax.legend(loc='upper right', prop={'size': 9})

        plt.tight_layout()
        plt.savefig(f'results/{name}.png', dpi=140, bbox_inches='tight')
        plt.close(fig)


if __name__ == "__main__":
    main()