Files
benchmarkexp/main.py
2026-02-04 16:49:52 +08:00

173 lines
6.3 KiB
Python

import os
import torch
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
from dmadam import DMAdam
plt.rcParams['figure.dpi'] = 140
def get_function_val(name, x, y):
if name == "Sphere": return x**2 + y**2
elif name == "Booth": return (x + 2*y - 7)**2 + (2*x + y - 5)**2
elif name == "Matyas": return 0.26*(x**2 + y**2) - 0.48*x*y
elif name == "Beale":
return (1.5 - x + x*y)**2 + (2.25 - x + x*y**2)**2 + (2.625 - x + x*y**3)**2
elif name == "Goldstein-Price":
term1 = 1 + (x + y + 1)**2 * (19 - 14*x + 3*x**2 - 14*y + 6*x*y + 3*y**2)
term2 = 30 + (2*x - 3*y)**2 * (18 - 32*x + 12*x**2 + 48*y - 36*x*y + 27*y**2)
return term1 * term2
elif name == "Bukin":
term1 = 100 * (abs(y - 0.01 * x ** 2)) ** 0.5
term2 = 0.01 * abs(x + 10)
return term1 + term2
return 0
def get_torch_func(name):
if name == "Sphere": return lambda x: torch.sum(x ** 2)
elif name == "Booth": return lambda x: (x[0] + 2*x[1] - 7)**2 + (2*x[0] + x[1] - 5)**2
elif name == "Matyas": return lambda x: 0.26*(x[0]**2 + x[1]**2) - 0.48*x[0]*x[1]
elif name == "Beale":
return lambda x: (1.5 - x[0] + x[0]*x[1])**2 + (2.25 - x[0] + x[0]*x[1]**2)**2 + (2.625 - x[0] + x[0]*x[1]**3)**2
elif name == "Goldstein-Price":
def goldstein(x):
x1, x2 = x[0], x[1]
t1 = 1 + (x1 + x2 + 1)**2 * (19 - 14*x1 + 3*x1**2 - 14*x2 + 6*x1*x2 + 3*x2**2)
t2 = 30 + (2*x1 - 3*x2)**2 * (18 - 32*x1 + 12*x1**2 + 48*x2 - 36*x1*x2 + 27*x2**2)
return t1 * t2
return goldstein
elif name == "Bukin":
def bukin(x):
x1, x2 = x[0], x[1]
t1 = 100 * torch.sqrt(torch.abs(x2 - 0.01 * x1 ** 2))
t2 = 0.01 * torch.abs(x1 + 10)
return t1 + t2
return bukin
return None
def run_trajectory(optimizer_name, func_name, start_point, iterations, lr, eta0):
func = get_torch_func(func_name)
params = torch.tensor(start_point, requires_grad=True, dtype=torch.float32)
if optimizer_name == 'DMAdam':
opt = DMAdam([params], lr=lr, eta0=eta0)
elif optimizer_name == 'Adam':
opt = torch.optim.Adam([params], lr=lr)
elif optimizer_name == 'SGD':
opt = torch.optim.SGD([params], lr=lr)
elif optimizer_name == 'SGDM':
opt = torch.optim.SGD([params], lr=lr, momentum=0.9)
elif optimizer_name == 'Adagrad':
opt = torch.optim.Adagrad([params], lr=lr)
path = []
with torch.no_grad():
path.append([start_point[0], start_point[1], func(params).item()])
for _ in range(iterations):
opt.zero_grad()
loss = func(params)
loss.backward()
opt.step()
p_val = params.detach().cpu().numpy()
path.append([p_val[0], p_val[1], loss.item()])
return np.array(path)
def plot_3d_surface(ax, func_name, x_range, y_range, trajectories, global_min, view_angle):
x = np.linspace(x_range[0], x_range[1], 150)
y = np.linspace(y_range[0], y_range[1], 150)
X, Y = np.meshgrid(x, y)
Z = get_function_val(func_name, X, Y)
z_min_val = np.min(Z)
if func_name == "Goldstein-Price":
clip_max = np.max(Z)
elif func_name == "Beale":
clip_max = 4000
elif func_name == "Booth":
clip_max = 800
else:
clip_max = np.max(Z)
Z_plot = np.clip(Z, z_min_val, clip_max)
surf = ax.plot_surface(X, Y, Z_plot, cmap=cm.viridis,
alpha=0.7, linewidth=0, antialiased=True)
colors = {'SGD': 'red', 'Adagrad': 'orange', 'SGDM': 'green', 'Adam': 'blue', 'DMAdam': 'black'}
styles = {'SGD': ':', 'Adagrad': ':', 'SGDM': '--', 'Adam': '--', 'DMAdam': '-'}
for name, path in trajectories.items():
path_z = np.clip(path[:, 2], z_min_val, clip_max)
ax.plot(path[:, 0], path[:, 1], path_z,
color=colors[name], label=name,
linewidth=2.5, linestyle=styles[name], zorder=10)
ax.scatter(path[-1, 0], path[-1, 1], path_z[-1], c=colors[name], marker='o', s=50, zorder=11)
true_z = get_function_val(func_name, np.array(global_min[0]), np.array(global_min[1]))
ax.scatter(global_min[0], global_min[1], max(true_z, z_min_val),
c='red', marker='*', s=300, label='Global Min', zorder=20, edgecolors='white')
ax.set_title(func_name, fontsize=14, pad=0)
ax.view_init(elev=view_angle[0], azim=view_angle[1])
ax.set_xlim(x_range)
ax.set_ylim(y_range)
ax.set_zlim(z_min_val, clip_max)
ax.set_zticks([])
def main():
iterations = 2000
lr = 0.1
lr_sgd = 0.01
lr_sgdm = 0.01
lr_gp = 0.1
lr_bukin = 0.1
eta0_normal = 3
eta0_gp = 8
eta0_bukin = 15
benchmarks = [
("Sphere", [-3.0, 4.0], (0, 0), (-4, 4), (-4, 4), (60, -45), eta0_normal),
("Booth", [-8.0, 8.0], (1, 3), (-10, 5), (-2, 10), (50, -140), eta0_normal),
("Matyas", [4.0, -4.0], (0, 0), (-6, 6), (-6, 6), (40, 45), eta0_normal),
("Beale", [2.0, 2.0], (3, 0.5), (-4, 4), (-4, 4), (50, -145), eta0_normal),
("Goldstein-Price", [-0.5, 1.5], (0, -1), (-2, 2), (-2, 2), (30, -60), eta0_gp),
# ("Bukin", [-13, 2.5], (-10, 1), (-15, -3), (-3, 6), (35, -50), eta0_bukin)
]
os.makedirs('results', exist_ok=True)
for name, start_pt, glob_min, x_rng, y_rng, view, eta0 in benchmarks:
print(f"Running {name} (Iter: {iterations}, Eta0: {eta0})...")
cur_lr = lr_gp if name == "Goldstein-Price" else lr
trajectories = {
'SGD': run_trajectory('SGD', name, start_pt, iterations, lr_sgd, eta0),
'Adagrad': run_trajectory('Adagrad', name, start_pt, iterations, cur_lr, eta0),
'SGDM': run_trajectory('SGDM', name, start_pt, iterations, lr_sgdm, eta0),
'Adam': run_trajectory('Adam', name, start_pt, iterations, cur_lr, eta0),
'DMAdam': run_trajectory('DMAdam', name, start_pt, iterations, cur_lr, eta0),
}
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
plot_3d_surface(ax, name, x_rng, y_rng, trajectories, glob_min, view)
ax.legend(loc='upper right', prop={'size': 9})
plt.tight_layout()
plt.savefig(f'results/{name}.png', dpi=140, bbox_inches='tight')
plt.close(fig)
if __name__ == "__main__":
main()