1 | """ |
---|
2 | Solvers |
---|
3 | ======= |
---|
4 | |
---|
5 | This module contains an optimization routine adapted |
---|
6 | from scipy.optimize. The minimal scipy interface has been preserved, |
---|
7 | and functionality from the mystic solver API has been added with |
---|
8 | reasonable defaults. |
---|
9 | |
---|
10 | Minimal function interface to optimization routines:: |
---|
11 | fmin_ncg -- Newton-CG optimization |
---|
12 | |
---|
13 | The corresponding solvers built on mystic's AbstractSolver are:: |
---|
14 | NCGSolver -- Solver that uses the Newton-CG algorithm to minimize a function. |
---|
15 | |
---|
16 | Mystic solver behavior activated in fmin_ncg:: |
---|
17 | - EvaluationMonitor = Sow() |
---|
18 | - StepMonitor = CustomSow() with 4 columns |
---|
19 | - enable_signal_handler() |
---|
20 | - termination = SolutionImprovement(tolerance) |
---|
21 | |
---|
22 | Usage |
---|
23 | ===== |
---|
24 | |
---|
25 | See `mystic.examples.test_ncg` for an example of using |
---|
26 | NCGSolver. |
---|
27 | |
---|
28 | All solvers included in this module provide the standard signal handling. |
---|
29 | For more information, see `mystic.mystic.abstract_solver`. |
---|
30 | |
---|
31 | References |
---|
32 | ========== |
---|
33 | See Wright, and Nocedal 'Numerical Optimization', 1999, |
---|
34 | pg. 140. |
---|
35 | |
---|
36 | Adapted to Mystic, 2009 |
---|
37 | """ |
---|
38 | __all__ = ['NCGSolver','fmin_ncg'] |
---|
39 | |
---|
40 | import numpy |
---|
41 | from numpy import atleast_1d, eye, mgrid, argmin, zeros, shape, empty, \ |
---|
42 | squeeze, vectorize, asarray, absolute, sqrt, Inf, asfarray, isinf |
---|
43 | |
---|
44 | from mystic.tools import Null, wrap_function |
---|
45 | from mystic.tools import wrap_bounds |
---|
46 | from abstract_solver import AbstractSolver |
---|
47 | |
---|
48 | ############################################################################# |
---|
49 | # Helper methods and classes |
---|
50 | |
---|
51 | def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1): |
---|
52 | """Minimize over alpha, the function ``f(xk+alpha pk)``. |
---|
53 | |
---|
54 | Uses the interpolation algorithm (Armiijo backtracking) as suggested by |
---|
55 | Wright and Nocedal in 'Numerical Optimization', 1999, pg. 56-57 |
---|
56 | |
---|
57 | :Returns: (alpha, fc, gc) |
---|
58 | |
---|
59 | """ |
---|
60 | |
---|
61 | xk = atleast_1d(xk) |
---|
62 | fc = 0 |
---|
63 | phi0 = old_fval # compute f(xk) -- done in past loop |
---|
64 | phi_a0 = f(*((xk+alpha0*pk,)+args)) |
---|
65 | fc = fc + 1 |
---|
66 | derphi0 = numpy.dot(gfk,pk) |
---|
67 | |
---|
68 | if (phi_a0 <= phi0 + c1*alpha0*derphi0): |
---|
69 | return alpha0, fc, 0, phi_a0 |
---|
70 | |
---|
71 | # Otherwise compute the minimizer of a quadratic interpolant: |
---|
72 | |
---|
73 | alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0) |
---|
74 | phi_a1 = f(*((xk+alpha1*pk,)+args)) |
---|
75 | fc = fc + 1 |
---|
76 | |
---|
77 | if (phi_a1 <= phi0 + c1*alpha1*derphi0): |
---|
78 | return alpha1, fc, 0, phi_a1 |
---|
79 | |
---|
80 | # Otherwise loop with cubic interpolation until we find an alpha which |
---|
81 | # satifies the first Wolfe condition (since we are backtracking, we will |
---|
82 | # assume that the value of alpha is not too small and satisfies the second |
---|
83 | # condition. |
---|
84 | |
---|
85 | while 1: # we are assuming pk is a descent direction |
---|
86 | factor = alpha0**2 * alpha1**2 * (alpha1-alpha0) |
---|
87 | a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \ |
---|
88 | alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0) |
---|
89 | a = a / factor |
---|
90 | b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \ |
---|
91 | alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0) |
---|
92 | b = b / factor |
---|
93 | |
---|
94 | alpha2 = (-b + numpy.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a) |
---|
95 | phi_a2 = f(*((xk+alpha2*pk,)+args)) |
---|
96 | fc = fc + 1 |
---|
97 | |
---|
98 | if (phi_a2 <= phi0 + c1*alpha2*derphi0): |
---|
99 | return alpha2, fc, 0, phi_a2 |
---|
100 | |
---|
101 | if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96: |
---|
102 | alpha2 = alpha1 / 2.0 |
---|
103 | |
---|
104 | alpha0 = alpha1 |
---|
105 | alpha1 = alpha2 |
---|
106 | phi_a0 = phi_a1 |
---|
107 | phi_a1 = phi_a2 |
---|
108 | |
---|
109 | def approx_fhess_p(x0,p,fprime,epsilon,*args): |
---|
110 | f2 = fprime(*((x0+epsilon*p,)+args)) |
---|
111 | f1 = fprime(*((x0,)+args)) |
---|
112 | return (f2 - f1)/epsilon |
---|
113 | |
---|
114 | _epsilon = sqrt(numpy.finfo(float).eps) |
---|
115 | |
---|
116 | ######################################################################### |
---|
117 | |
---|
118 | class NCGSolver(AbstractSolver): |
---|
119 | """ |
---|
120 | Newton-CG Optimization. |
---|
121 | """ |
---|
122 | |
---|
123 | def __init__(self, dim): |
---|
124 | """ |
---|
125 | Takes one initial input: |
---|
126 | dim -- dimensionality of the problem |
---|
127 | |
---|
128 | All important class members are inherited from AbstractSolver. |
---|
129 | """ |
---|
130 | AbstractSolver.__init__(self,dim) |
---|
131 | |
---|
132 | |
---|
133 | def Solve(self, func, termination, sigint_callback=None, |
---|
134 | EvaluationMonitor=Null, StepMonitor=Null, #GradientMonitor=Null, |
---|
135 | ExtraArgs=(), **kwds): |
---|
136 | """Minimize a function using NCG. |
---|
137 | |
---|
138 | Description: |
---|
139 | |
---|
140 | Uses a Newton-CG algorithm to find the minimum of |
---|
141 | a function of one or more variables. |
---|
142 | |
---|
143 | Inputs: |
---|
144 | |
---|
145 | func -- the Python function or method to be minimized. |
---|
146 | termination -- callable object providing termination conditions. |
---|
147 | |
---|
148 | Additional Inputs: |
---|
149 | |
---|
150 | sigint_callback -- callback function for signal handler. |
---|
151 | EvaluationMonitor -- a callable object that will be passed x, fval |
---|
152 | whenever the cost function is evaluated. |
---|
153 | StepMonitor -- a callable object that will be passed x, fval, the gradient, and |
---|
154 | the Hessian after the end of an iteration. |
---|
155 | ExtraArgs -- extra arguments for func and fprime (same for both). |
---|
156 | |
---|
157 | Further Inputs: |
---|
158 | |
---|
159 | fprime -- callable f'(x,*args) |
---|
160 | Gradient of f. |
---|
161 | fhess_p : callable fhess_p(x,p,*args) |
---|
162 | Function which computes the Hessian of f times an |
---|
163 | arbitrary vector, p. |
---|
164 | fhess : callable fhess(x,*args) |
---|
165 | Function to compute the Hessian matrix of f. |
---|
166 | epsilon : float or ndarray |
---|
167 | If fhess is approximated, use this value for the step size. |
---|
168 | callback : callable |
---|
169 | An optional user-supplied function which is called after |
---|
170 | each iteration. Called as callback(xk), where xk is the |
---|
171 | current parameter vector. |
---|
172 | disp : bool |
---|
173 | If True, print convergence message. |
---|
174 | |
---|
175 | Notes: |
---|
176 | Only one of `fhess_p` or `fhess` need to be given. If `fhess` |
---|
177 | is provided, then `fhess_p` will be ignored. If neither `fhess` |
---|
178 | nor `fhess_p` is provided, then the hessian product will be |
---|
179 | approximated using finite differences on `fprime`. `fhess_p` |
---|
180 | must compute the hessian times an arbitrary vector. If it is not |
---|
181 | given, finite-differences on `fprime` are used to compute |
---|
182 | it. |
---|
183 | |
---|
184 | """ |
---|
185 | # allow for inputs that don't conform to AbstractSolver interface |
---|
186 | args = ExtraArgs |
---|
187 | x0 = self.population[0] |
---|
188 | x0 = asarray(x0).flatten() |
---|
189 | |
---|
190 | epsilon = _epsilon |
---|
191 | self.disp = 1 |
---|
192 | self.callback = None |
---|
193 | fhess_p = None |
---|
194 | fhess = None |
---|
195 | |
---|
196 | if kwds.has_key('epsilon'): epsilon = kwds['epsilon'] |
---|
197 | if kwds.has_key('callback'): self.callback = kwds['callback'] |
---|
198 | if kwds.has_key('disp'): self.disp = kwds['disp'] |
---|
199 | if kwds.has_key('fhess'): fhess = kwds['fhess'] |
---|
200 | if kwds.has_key('fhess_p'): fhess_p = kwds['fhess_p'] |
---|
201 | |
---|
202 | # fprime is actually required. Temporary fix?: |
---|
203 | if kwds.has_key('fprime'): fprime = kwds['fprime'] |
---|
204 | #------------------------------------------------------------- |
---|
205 | |
---|
206 | import signal |
---|
207 | self._EARLYEXIT = False |
---|
208 | |
---|
209 | fcalls, func = wrap_function(func, args, EvaluationMonitor) |
---|
210 | if self._useStrictRange: |
---|
211 | x0 = self._clipGuessWithinRangeBoundary(x0) |
---|
212 | func = wrap_bounds(func, self._strictMin, self._strictMax) |
---|
213 | |
---|
214 | #generate signal_handler |
---|
215 | self._generateHandler(sigint_callback) |
---|
216 | if self._handle_sigint: signal.signal(signal.SIGINT, self.signal_handler) |
---|
217 | |
---|
218 | #-------------------------------------------------------------- |
---|
219 | |
---|
220 | if self._maxiter is None: |
---|
221 | self._maxiter = len(x0)*200 |
---|
222 | |
---|
223 | # Wrap gradient function? |
---|
224 | # gcalls, fprime = wrap_function(fprime, args, GradientMonitor) |
---|
225 | gcalls, fprime = wrap_function(fprime, args, Null) |
---|
226 | |
---|
227 | # Wrap hessian monitor? |
---|
228 | # But wrap_function assumes the function takes one parameter... |
---|
229 | #if fhess is not None: |
---|
230 | # hcalls2, fhess = wrap_function(fhess, args, HessianMonitor) |
---|
231 | #else: |
---|
232 | # if fhess_p is not None: |
---|
233 | # hcalls2, fhess_p = wrap_function(fhess_p, args, HessianMonitor) |
---|
234 | |
---|
235 | #xtol = len(x0)*avextol |
---|
236 | #update = [2*xtol] |
---|
237 | xk = x0 |
---|
238 | k = 0 |
---|
239 | hcalls = 0 |
---|
240 | old_fval = func(x0) |
---|
241 | abs = absolute |
---|
242 | while k < self._maxiter: |
---|
243 | # Compute a search direction pk by applying the CG method to |
---|
244 | # del2 f(xk) p = - grad f(xk) starting from 0. |
---|
245 | b = -fprime(xk) |
---|
246 | maggrad = numpy.add.reduce(abs(b)) |
---|
247 | eta = min([0.5,numpy.sqrt(maggrad)]) |
---|
248 | termcond = eta * maggrad |
---|
249 | xsupi = zeros(len(x0), dtype=x0.dtype) |
---|
250 | ri = -b |
---|
251 | psupi = -ri |
---|
252 | i = 0 |
---|
253 | dri0 = numpy.dot(ri,ri) |
---|
254 | |
---|
255 | if fhess is not None: # you want to compute hessian once. |
---|
256 | A = fhess(*(xk,)+args) |
---|
257 | hcalls = hcalls + 1 |
---|
258 | |
---|
259 | while numpy.add.reduce(abs(ri)) > termcond: |
---|
260 | if fhess is None: |
---|
261 | if fhess_p is None: |
---|
262 | Ap = approx_fhess_p(xk,psupi,fprime,epsilon) |
---|
263 | else: |
---|
264 | Ap = fhess_p(xk,psupi, *args) |
---|
265 | hcalls = hcalls + 1 |
---|
266 | else: |
---|
267 | Ap = numpy.dot(A,psupi) |
---|
268 | # check curvature |
---|
269 | Ap = asarray(Ap).squeeze() # get rid of matrices... |
---|
270 | curv = numpy.dot(psupi,Ap) |
---|
271 | if curv == 0.0: |
---|
272 | break |
---|
273 | elif curv < 0: |
---|
274 | if (i > 0): |
---|
275 | break |
---|
276 | else: |
---|
277 | xsupi = xsupi + dri0/curv * psupi |
---|
278 | break |
---|
279 | alphai = dri0 / curv |
---|
280 | xsupi = xsupi + alphai * psupi |
---|
281 | ri = ri + alphai * Ap |
---|
282 | dri1 = numpy.dot(ri,ri) |
---|
283 | betai = dri1 / dri0 |
---|
284 | psupi = -ri + betai * psupi |
---|
285 | i = i + 1 |
---|
286 | dri0 = dri1 # update numpy.dot(ri,ri) for next time. |
---|
287 | |
---|
288 | pk = xsupi # search direction is solution to system. |
---|
289 | gfk = -b # gradient at xk |
---|
290 | alphak, fc, gc, old_fval = line_search_BFGS(func,xk,pk,gfk,old_fval) |
---|
291 | |
---|
292 | update = alphak * pk |
---|
293 | |
---|
294 | # Put last solution in trialSolution for termination() |
---|
295 | self.trialSolution = xk |
---|
296 | |
---|
297 | xk = xk + update # upcast if necessary |
---|
298 | if self.callback is not None: |
---|
299 | self.callback(xk) |
---|
300 | k += 1 |
---|
301 | |
---|
302 | # Update variables/monitors |
---|
303 | self.bestSolution = xk |
---|
304 | self.bestEnergy = old_fval |
---|
305 | StepMonitor(self.bestSolution,self.bestEnergy, gfk, Ap) |
---|
306 | self.energy_history.append(self.bestEnergy) |
---|
307 | |
---|
308 | if self._EARLYEXIT or termination(self): |
---|
309 | break |
---|
310 | |
---|
311 | self.generations = k |
---|
312 | |
---|
313 | # Fix me? |
---|
314 | self.hcalls = hcalls |
---|
315 | self.gcalls = gcalls[0] |
---|
316 | |
---|
317 | signal.signal(signal.SIGINT,signal.default_int_handler) |
---|
318 | |
---|
319 | if self.disp: |
---|
320 | fval = old_fval |
---|
321 | if k >= self._maxiter: |
---|
322 | if self.disp: |
---|
323 | print "Warning: Maximum number of iterations has been exceeded" |
---|
324 | print " Current function value: %f" % fval |
---|
325 | print " Iterations: %d" % k |
---|
326 | print " Function evaluations: %d" % fcalls[0] |
---|
327 | print " Gradient evaluations: %d" % gcalls[0] |
---|
328 | print " Hessian evaluations: %d" % hcalls |
---|
329 | else: |
---|
330 | if self.disp: |
---|
331 | print "Optimization terminated successfully." |
---|
332 | print " Current function value: %f" % fval |
---|
333 | print " Iterations: %d" % k |
---|
334 | print " Function evaluations: %d" % fcalls[0] |
---|
335 | print " Gradient evaluations: %d" % gcalls[0] |
---|
336 | print " Hessian evaluations: %d" % hcalls |
---|
337 | |
---|
338 | ############################################################################# |
---|
339 | # Interface for using NCGSolver |
---|
340 | |
---|
341 | def fmin_ncg(func, x0, fprime, fhess_p=None, fhess=None, args=(), xtol=1e-5, |
---|
342 | epsilon=_epsilon, maxiter=None, full_output=0, disp=1, retall=0, |
---|
343 | callback=None): |
---|
344 | """Minimize a function using the Newton-CG method. |
---|
345 | |
---|
346 | Input Parameters: |
---|
347 | |
---|
348 | f : callable f(x,*args) |
---|
349 | Objective function to be minimized. |
---|
350 | x0 : ndarray |
---|
351 | Initial guess. |
---|
352 | fprime : callable f'(x,*args) |
---|
353 | Gradient of f. |
---|
354 | |
---|
355 | Optional Input Parameters: |
---|
356 | |
---|
357 | fhess_p : callable fhess_p(x,p,*args) |
---|
358 | Function which computes the Hessian of f times an |
---|
359 | arbitrary vector, p. |
---|
360 | fhess : callable fhess(x,*args) |
---|
361 | Function to compute the Hessian matrix of f. |
---|
362 | args : tuple |
---|
363 | Extra arguments passed to f, fprime, fhess_p, and fhess |
---|
364 | (the same set of extra arguments is supplied to all of |
---|
365 | these functions). |
---|
366 | epsilon : float or ndarray |
---|
367 | If fhess is approximated, use this value for the step size. |
---|
368 | callback : callable |
---|
369 | An optional user-supplied function which is called after |
---|
370 | each iteration. Called as callback(xk), where xk is the |
---|
371 | current parameter vector. |
---|
372 | xtol : float |
---|
373 | Convergence is assumed when the relative error in |
---|
374 | the minimizer falls below this amount. |
---|
375 | maxiter : int |
---|
376 | Maximum number of iterations to perform. |
---|
377 | full_output : bool |
---|
378 | If True, return the optional outputs. |
---|
379 | disp : bool |
---|
380 | If True, print convergence message. |
---|
381 | retall : bool |
---|
382 | If True, return a list of results at each iteration. |
---|
383 | |
---|
384 | Returns: (xopt, {fopt, fcalls, gcalls, hcalls, warnflag},{allvecs}) |
---|
385 | |
---|
386 | xopt : ndarray |
---|
387 | Parameters which minimizer f, i.e. ``f(xopt) == fopt``. |
---|
388 | fopt : float |
---|
389 | Value of the function at xopt, i.e. ``fopt = f(xopt)``. |
---|
390 | fcalls : int |
---|
391 | Number of function calls made. |
---|
392 | gcalls : int |
---|
393 | Number of gradient calls made. |
---|
394 | hcalls : int |
---|
395 | Number of hessian calls made. |
---|
396 | warnflag : int |
---|
397 | Warnings generated by the algorithm. |
---|
398 | 1 : Maximum number of iterations exceeded. |
---|
399 | allvecs : list |
---|
400 | The result at each iteration, if retall is True (see below). |
---|
401 | |
---|
402 | Notes: |
---|
403 | Only one of `fhess_p` or `fhess` need to be given. If `fhess` |
---|
404 | is provided, then `fhess_p` will be ignored. If neither `fhess` |
---|
405 | nor `fhess_p` is provided, then the hessian product will be |
---|
406 | approximated using finite differences on `fprime`. `fhess_p` |
---|
407 | must compute the hessian times an arbitrary vector. If it is not |
---|
408 | given, finite-differences on `fprime` are used to compute |
---|
409 | it. |
---|
410 | """ |
---|
411 | |
---|
412 | from mystic.tools import Sow, CustomSow |
---|
413 | from mystic.termination import SolutionImprovement |
---|
414 | #stepmon = Sow() |
---|
415 | stepmon = CustomSow('x','y','g','h', x='x', y='fval', \ |
---|
416 | g='Gradient', h='InverseHessian') |
---|
417 | evalmon = Sow() |
---|
418 | |
---|
419 | solver = NCGSolver(len(x0)) |
---|
420 | solver.SetInitialPoints(x0) |
---|
421 | solver.enable_signal_handler() |
---|
422 | solver.SetEvaluationLimits(maxiter,None) |
---|
423 | # Does requiring fprime break abstract_solver interface? |
---|
424 | solver.Solve(func, SolutionImprovement(tolerance=xtol),\ |
---|
425 | EvaluationMonitor=evalmon,StepMonitor=stepmon,\ |
---|
426 | disp=disp, ExtraArgs=args, callback=callback,\ |
---|
427 | epsilon=epsilon, fhess_p=fhess_p,\ |
---|
428 | fhess=fhess, fprime=fprime) |
---|
429 | solution = solver.Solution() |
---|
430 | |
---|
431 | # code below here pushes output to scipy.optimize interface |
---|
432 | #x = list(solver.bestSolution) |
---|
433 | x = solver.bestSolution |
---|
434 | fval = solver.bestEnergy |
---|
435 | warnflag = 0 |
---|
436 | fcalls = len(evalmon.x) |
---|
437 | iterations = len(stepmon.x) |
---|
438 | |
---|
439 | # Fix me? |
---|
440 | gcalls = solver.gcalls |
---|
441 | hcalls = solver.hcalls |
---|
442 | |
---|
443 | allvecs = [] |
---|
444 | for i in range(iterations): |
---|
445 | #allvecs.append(list(stepmon.x[i])) |
---|
446 | allvecs.append(stepmon.x[i]) |
---|
447 | |
---|
448 | if iterations >= solver._maxiter: |
---|
449 | warnflag = 1 |
---|
450 | |
---|
451 | if full_output: |
---|
452 | retlist = x, fval, fcalls, gcalls, hcalls, warnflag |
---|
453 | if retall: |
---|
454 | retlist += (allvecs,) |
---|
455 | else: |
---|
456 | retlist = x |
---|
457 | if retall: |
---|
458 | retlist = (x, allvecs) |
---|
459 | |
---|
460 | return retlist |
---|
461 | |
---|
462 | if __name__=='__main__': |
---|
463 | help(__name__) |
---|
464 | |
---|
465 | # End of file |
---|