Comparing optimizers

Comparison of optimizers on various problems.

Python ソースコード: compare_optimizers.py

import functools
import pickle
import numpy as np
from scipy import optimize
from joblib import Memory
from cost_functions import mk_quad, mk_gauss, rosenbrock,\
rosenbrock_prime, rosenbrock_hessian, LoggingFunction, \
CountingFunction
def my_partial(function, **kwargs):
f = functools.partial(function, **kwargs)
functools.update_wrapper(f, function)
return f
methods = {
'Nelder-mead': my_partial(optimize.fmin,
ftol=1e-12, maxiter=5e3,
xtol=1e-7, maxfun=1e6),
'Powell': my_partial(optimize.fmin_powell,
ftol=1e-9, maxiter=5e3,
maxfun=1e7),
'BFGS': my_partial(optimize.fmin_bfgs,
gtol=1e-9, maxiter=5e3),
'Newton': my_partial(optimize.fmin_ncg,
avextol=1e-7, maxiter=5e3),
'Conjugate gradient': my_partial(optimize.fmin_cg,
gtol=1e-7, maxiter=5e3),
'L-BFGS': my_partial(optimize.fmin_l_bfgs_b,
approx_grad=1, factr=10.0,
pgtol=1e-8, maxfun=1e7),
"L-BFGS w f'": my_partial(optimize.fmin_l_bfgs_b,
factr=10.0,
pgtol=1e-8, maxfun=1e7),
}
###############################################################################
def bencher(cost_name, ndim, method_name, x0):
cost_function = mk_costs(ndim)[0][cost_name][0]
method = methods[method_name]
f = LoggingFunction(cost_function)
method(f, x0)
this_costs = np.array(f.all_f_i)
return this_costs
# Bench with gradients
def bencher_gradient(cost_name, ndim, method_name, x0):
cost_function, cost_function_prime, hessian = mk_costs(ndim)[0][cost_name]
method = methods[method_name]
f_prime = CountingFunction(cost_function_prime)
f = LoggingFunction(cost_function, counter=f_prime.counter)
method(f, x0, f_prime)
this_costs = np.array(f.all_f_i)
return this_costs, np.array(f.counts)
# Bench with the hessian
def bencher_hessian(cost_name, ndim, method_name, x0):
cost_function, cost_function_prime, hessian = mk_costs(ndim)[0][cost_name]
method = methods[method_name]
f_prime = CountingFunction(cost_function_prime)
hessian = CountingFunction(hessian, counter=f_prime.counter)
f = LoggingFunction(cost_function, counter=f_prime.counter)
method(f, x0, f_prime, fhess=hessian)
this_costs = np.array(f.all_f_i)
return this_costs, np.array(f.counts)
def mk_costs(ndim=2):
costs = {
'Well-conditioned quadratic': mk_quad(.7, ndim=ndim),
'Ill-conditioned quadratic': mk_quad(.02, ndim=ndim),
'Well-conditioned Gaussian': mk_gauss(.7, ndim=ndim),
'Ill-conditioned Gaussian': mk_gauss(.02, ndim=ndim),
'Rosenbrock ': (rosenbrock, rosenbrock_prime, rosenbrock_hessian),
}
rng = np.random.RandomState(0)
starting_points = 4*rng.rand(20, ndim) - 2
if ndim > 100:
starting_points = starting_points[:10]
return costs, starting_points
###############################################################################
# Compare methods without gradient
mem = Memory('.', verbose=3)
if 1:
gradient_less_benchs = dict()
for ndim in (2, 8, 32, 128):
this_dim_benchs = dict()
costs, starting_points = mk_costs(ndim)
for cost_name, cost_function in costs.iteritems():
# We don't need the derivative or the hessian
cost_function = cost_function[0]
function_bench = dict()
for x0 in starting_points:
all_bench = list()
# Bench gradient-less
for method_name, method in methods.iteritems():
if method_name in ('Newton', "L-BFGS w f'"):
continue
this_bench = function_bench.get(method_name, list())
this_costs = mem.cache(bencher)(cost_name, ndim,
method_name, x0)
if np.all(this_costs > .25*ndim**2*1e-9):
convergence = 2*len(this_costs)
else:
convergence = np.where(
np.diff(this_costs > .25*ndim**2*1e-9)
)[0].max() + 1
this_bench.append(convergence)
all_bench.append(convergence)
function_bench[method_name] = this_bench
# Bench with gradients
for method_name, method in methods.iteritems():
if method_name in ('Newton', 'Powell', 'Nelder-mead',
"L-BFGS"):
continue
this_method_name = method_name
if method_name.endswith(" w f'"):
this_method_name = method_name[:-4]
this_method_name = this_method_name + "\nw f'"
this_bench = function_bench.get(this_method_name, list())
this_costs, this_counts = mem.cache(bencher_gradient)(
cost_name, ndim, method_name, x0)
if np.all(this_costs > .25*ndim**2*1e-9):
convergence = 2*this_counts.max()
else:
convergence = np.where(
np.diff(this_costs > .25*ndim**2*1e-9)
)[0].max() + 1
convergence = this_counts[convergence]
this_bench.append(convergence)
all_bench.append(convergence)
function_bench[this_method_name] = this_bench
# Bench Newton with Hessian
method_name = 'Newton'
this_bench = function_bench.get(method_name, list())
this_costs = mem.cache(bencher_hessian)(cost_name, ndim,
method_name, x0)
if np.all(this_costs > .25*ndim**2*1e-9):
convergence = 2*len(this_costs)
else:
convergence = np.where(
np.diff(this_costs > .25*ndim**2*1e-9)
)[0].max() + 1
this_bench.append(convergence)
all_bench.append(convergence)
function_bench[method_name + '\nw Hessian '] = this_bench
# Normalize across methods
x0_mean = np.mean(all_bench)
for method_name in function_bench:
function_bench[method_name][-1] /= x0_mean
this_dim_benchs[cost_name] = function_bench
gradient_less_benchs[ndim] = this_dim_benchs
print 80*'_'
print 'Done cost %s, ndim %s' % (cost_name, ndim)
print 80*'_'
pickle.dump(gradient_less_benchs, file('compare_optimizers.pkl', 'w'))