source: branches/alta/mystic-0.1a2/mystic/scipy_ncg.py @ 176

Revision 176, 15.8 KB checked in by altafang, 7 years ago (diff)

Fixing termination, gradient call counter

Line 
1"""
2Solvers
3=======
4
5This module contains an optimization routine adapted
6from scipy.optimize.  The minimal scipy interface has been preserved,
7and functionality from the mystic solver API has been added with
8reasonable defaults.
9
10Minimal function interface to optimization routines::
11   fmin_ncg -- Newton-CG optimization
12
13The corresponding solvers built on mystic's AbstractSolver are::
14   NCGSolver -- Solver that uses the Newton-CG algorithm to minimize a function.
15
16Mystic solver behavior activated in fmin_ncg::
17   - EvaluationMonitor = Sow()
18   - StepMonitor = CustomSow() with 4 columns
19   - enable_signal_handler()
20   - termination = SolutionImprovement(tolerance)
21
22Usage
23=====
24
25See `mystic.examples.test_ncg` for an example of using
26NCGSolver.
27
28All solvers included in this module provide the standard signal handling.
29For more information, see `mystic.mystic.abstract_solver`.
30
31References
32==========
33See Wright, and Nocedal 'Numerical Optimization', 1999,
34      pg. 140.
35
36Adapted to Mystic, 2009
37"""
38__all__ = ['NCGSolver','fmin_ncg']
39
40import numpy
41from numpy import atleast_1d, eye, mgrid, argmin, zeros, shape, empty, \
42     squeeze, vectorize, asarray, absolute, sqrt, Inf, asfarray, isinf
43
44from mystic.tools import Null, wrap_function
45from mystic.tools import wrap_bounds
46from abstract_solver import AbstractSolver
47
48#############################################################################
49# Helper methods and classes
50
51def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
52    """Minimize over alpha, the function ``f(xk+alpha pk)``.
53
54    Uses the interpolation algorithm (Armiijo backtracking) as suggested by
55    Wright and Nocedal in 'Numerical Optimization', 1999, pg. 56-57
56
57    :Returns: (alpha, fc, gc)
58
59    """
60
61    xk = atleast_1d(xk)
62    fc = 0
63    phi0 = old_fval # compute f(xk) -- done in past loop
64    phi_a0 = f(*((xk+alpha0*pk,)+args))
65    fc = fc + 1
66    derphi0 = numpy.dot(gfk,pk)
67
68    if (phi_a0 <= phi0 + c1*alpha0*derphi0):
69        return alpha0, fc, 0, phi_a0
70
71    # Otherwise compute the minimizer of a quadratic interpolant:
72
73    alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
74    phi_a1 = f(*((xk+alpha1*pk,)+args))
75    fc = fc + 1
76
77    if (phi_a1 <= phi0 + c1*alpha1*derphi0):
78        return alpha1, fc, 0, phi_a1
79
80    # Otherwise loop with cubic interpolation until we find an alpha which
81    # satifies the first Wolfe condition (since we are backtracking, we will
82    # assume that the value of alpha is not too small and satisfies the second
83    # condition.
84
85    while 1:       # we are assuming pk is a descent direction
86        factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
87        a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
88            alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
89        a = a / factor
90        b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
91            alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
92        b = b / factor
93
94        alpha2 = (-b + numpy.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
95        phi_a2 = f(*((xk+alpha2*pk,)+args))
96        fc = fc + 1
97
98        if (phi_a2 <= phi0 + c1*alpha2*derphi0):
99            return alpha2, fc, 0, phi_a2
100
101        if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
102            alpha2 = alpha1 / 2.0
103
104        alpha0 = alpha1
105        alpha1 = alpha2
106        phi_a0 = phi_a1
107        phi_a1 = phi_a2
108
109def approx_fhess_p(x0,p,fprime,epsilon,*args):
110    f2 = fprime(*((x0+epsilon*p,)+args))
111    f1 = fprime(*((x0,)+args))
112    return (f2 - f1)/epsilon
113
114_epsilon = sqrt(numpy.finfo(float).eps)
115
116#########################################################################
117
118class NCGSolver(AbstractSolver):
119    """
120Newton-CG Optimization.
121    """
122   
123    def __init__(self, dim):
124        """
125Takes one initial input:
126    dim  -- dimensionality of the problem
127
128All important class members are inherited from AbstractSolver.
129        """
130        AbstractSolver.__init__(self,dim)
131
132
133    def Solve(self, func, termination, sigint_callback=None,
134              EvaluationMonitor=Null, StepMonitor=Null, #GradientMonitor=Null,
135              ExtraArgs=(), **kwds):
136        """Minimize a function using NCG.
137
138Description:
139
140    Uses a Newton-CG algorithm to find the minimum of
141    a function of one or more variables.
142
143Inputs:
144
145    func -- the Python function or method to be minimized.
146    termination -- callable object providing termination conditions.
147
148Additional Inputs:
149
150    sigint_callback -- callback function for signal handler.
151    EvaluationMonitor -- a callable object that will be passed x, fval
152        whenever the cost function is evaluated.
153    StepMonitor -- a callable object that will be passed x, fval, the gradient, and
154                   the Hessian after the end of an iteration.
155    ExtraArgs -- extra arguments for func and fprime (same for both).
156
157Further Inputs:
158
159    fprime -- callable f'(x,*args)
160            Gradient of f.
161    fhess_p : callable fhess_p(x,p,*args)
162        Function which computes the Hessian of f times an
163        arbitrary vector, p.
164    fhess : callable fhess(x,*args)
165        Function to compute the Hessian matrix of f.
166    epsilon : float or ndarray
167        If fhess is approximated, use this value for the step size.
168    callback : callable
169        An optional user-supplied function which is called after
170        each iteration.  Called as callback(xk), where xk is the
171        current parameter vector.
172    disp : bool
173        If True, print convergence message.
174
175    Notes:
176      Only one of `fhess_p` or `fhess` need to be given.  If `fhess`
177      is provided, then `fhess_p` will be ignored.  If neither `fhess`
178      nor `fhess_p` is provided, then the hessian product will be
179      approximated using finite differences on `fprime`. `fhess_p`
180      must compute the hessian times an arbitrary vector. If it is not
181      given, finite-differences on `fprime` are used to compute
182      it.
183
184        """
185        # allow for inputs that don't conform to AbstractSolver interface
186        args = ExtraArgs
187        x0 = self.population[0]
188        x0 = asarray(x0).flatten()
189
190        epsilon = _epsilon
191        self.disp = 1
192        self.callback = None
193        fhess_p = None
194        fhess = None
195
196        if kwds.has_key('epsilon'): epsilon = kwds['epsilon']
197        if kwds.has_key('callback'): self.callback = kwds['callback']
198        if kwds.has_key('disp'): self.disp = kwds['disp']
199        if kwds.has_key('fhess'): fhess = kwds['fhess']
200        if kwds.has_key('fhess_p'): fhess_p = kwds['fhess_p']
201
202        # fprime is actually required. Temporary fix?:
203        if kwds.has_key('fprime'): fprime = kwds['fprime']
204        #-------------------------------------------------------------
205
206        import signal
207        self._EARLYEXIT = False
208
209        fcalls, func = wrap_function(func, args, EvaluationMonitor)
210        if self._useStrictRange:
211            x0 = self._clipGuessWithinRangeBoundary(x0)
212            func = wrap_bounds(func, self._strictMin, self._strictMax)
213
214        #generate signal_handler
215        self._generateHandler(sigint_callback)
216        if self._handle_sigint: signal.signal(signal.SIGINT, self.signal_handler)
217
218        #--------------------------------------------------------------
219
220        if self._maxiter is None:
221            self._maxiter = len(x0)*200
222
223        # Wrap gradient function?
224        # gcalls, fprime = wrap_function(fprime, args, GradientMonitor)   
225        gcalls, fprime = wrap_function(fprime, args, Null)
226
227        # Wrap hessian monitor?
228        # But wrap_function assumes the function takes one parameter...
229        #if fhess is not None:
230        #    hcalls2, fhess = wrap_function(fhess, args, HessianMonitor)
231        #else:
232        #    if fhess_p is not None:
233        #        hcalls2, fhess_p = wrap_function(fhess_p, args, HessianMonitor)
234
235        #xtol = len(x0)*avextol
236        #update = [2*xtol]
237        xk = x0
238        k = 0
239        hcalls = 0
240        old_fval = func(x0)
241        abs = absolute
242        while k < self._maxiter:
243            # Compute a search direction pk by applying the CG method to
244            #  del2 f(xk) p = - grad f(xk) starting from 0.
245            b = -fprime(xk)
246            maggrad = numpy.add.reduce(abs(b))
247            eta = min([0.5,numpy.sqrt(maggrad)])
248            termcond = eta * maggrad
249            xsupi = zeros(len(x0), dtype=x0.dtype)
250            ri = -b
251            psupi = -ri
252            i = 0
253            dri0 = numpy.dot(ri,ri)
254   
255            if fhess is not None:             # you want to compute hessian once.
256                A = fhess(*(xk,)+args)
257                hcalls = hcalls + 1
258   
259            while numpy.add.reduce(abs(ri)) > termcond:
260                if fhess is None:
261                    if fhess_p is None:
262                        Ap = approx_fhess_p(xk,psupi,fprime,epsilon)
263                    else:
264                        Ap = fhess_p(xk,psupi, *args)
265                        hcalls = hcalls + 1
266                else:
267                    Ap = numpy.dot(A,psupi)
268                # check curvature
269                Ap = asarray(Ap).squeeze() # get rid of matrices...
270                curv = numpy.dot(psupi,Ap)
271                if curv == 0.0:
272                    break
273                elif curv < 0:
274                    if (i > 0):
275                        break
276                    else:
277                        xsupi = xsupi + dri0/curv * psupi
278                        break
279                alphai = dri0 / curv
280                xsupi = xsupi + alphai * psupi
281                ri = ri + alphai * Ap
282                dri1 = numpy.dot(ri,ri)
283                betai = dri1 / dri0
284                psupi = -ri + betai * psupi
285                i = i + 1
286                dri0 = dri1          # update numpy.dot(ri,ri) for next time.
287   
288            pk = xsupi  # search direction is solution to system.
289            gfk = -b    # gradient at xk
290            alphak, fc, gc, old_fval = line_search_BFGS(func,xk,pk,gfk,old_fval)
291   
292            update = alphak * pk
293
294            # Put last solution in trialSolution for termination()
295            self.trialSolution = xk
296
297            xk = xk + update        # upcast if necessary
298            if self.callback is not None:
299                self.callback(xk)
300            k += 1
301
302            # Update variables/monitors
303            self.bestSolution = xk
304            self.bestEnergy = old_fval
305            StepMonitor(self.bestSolution,self.bestEnergy, gfk, Ap)
306            self.energy_history.append(self.bestEnergy)
307
308            if self._EARLYEXIT or termination(self):
309                break
310
311        self.generations = k
312
313        # Fix me?
314        self.hcalls = hcalls
315        self.gcalls = gcalls[0]
316
317        signal.signal(signal.SIGINT,signal.default_int_handler)
318   
319        if self.disp:
320            fval = old_fval
321        if k >= self._maxiter:
322            if self.disp:
323                print "Warning: Maximum number of iterations has been exceeded"
324                print "         Current function value: %f" % fval
325                print "         Iterations: %d" % k
326                print "         Function evaluations: %d" % fcalls[0]
327                print "         Gradient evaluations: %d" % gcalls[0]
328                print "         Hessian evaluations: %d" % hcalls
329        else:
330            if self.disp:
331                print "Optimization terminated successfully."
332                print "         Current function value: %f" % fval
333                print "         Iterations: %d" % k
334                print "         Function evaluations: %d" % fcalls[0]
335                print "         Gradient evaluations: %d" % gcalls[0]
336                print "         Hessian evaluations: %d" % hcalls
337   
338#############################################################################
339# Interface for using NCGSolver
340
341def fmin_ncg(func, x0, fprime, fhess_p=None, fhess=None, args=(), xtol=1e-5,
342             epsilon=_epsilon, maxiter=None, full_output=0, disp=1, retall=0,
343             callback=None):
344    """Minimize a function using the Newton-CG method.
345
346    Input Parameters:
347
348        f : callable f(x,*args)
349            Objective function to be minimized.
350        x0 : ndarray
351            Initial guess.
352        fprime : callable f'(x,*args)
353            Gradient of f.
354
355    Optional Input Parameters:
356
357        fhess_p : callable fhess_p(x,p,*args)
358            Function which computes the Hessian of f times an
359            arbitrary vector, p.
360        fhess : callable fhess(x,*args)
361            Function to compute the Hessian matrix of f.
362        args : tuple
363            Extra arguments passed to f, fprime, fhess_p, and fhess
364            (the same set of extra arguments is supplied to all of
365            these functions).
366        epsilon : float or ndarray
367            If fhess is approximated, use this value for the step size.
368        callback : callable
369            An optional user-supplied function which is called after
370            each iteration.  Called as callback(xk), where xk is the
371            current parameter vector.
372        xtol : float
373            Convergence is assumed when the relative error in
374            the minimizer falls below this amount.
375        maxiter : int
376            Maximum number of iterations to perform.
377        full_output : bool
378            If True, return the optional outputs.
379        disp : bool
380            If True, print convergence message.
381        retall : bool
382            If True, return a list of results at each iteration.
383
384    Returns: (xopt, {fopt, fcalls, gcalls, hcalls, warnflag},{allvecs})
385
386        xopt : ndarray
387            Parameters which minimizer f, i.e. ``f(xopt) == fopt``.
388        fopt : float
389            Value of the function at xopt, i.e. ``fopt = f(xopt)``.
390        fcalls : int
391            Number of function calls made.
392        gcalls : int
393            Number of gradient calls made.
394        hcalls : int
395            Number of hessian calls made.
396        warnflag : int
397            Warnings generated by the algorithm.
398            1 : Maximum number of iterations exceeded.
399        allvecs : list
400            The result at each iteration, if retall is True (see below).
401
402    Notes:
403      Only one of `fhess_p` or `fhess` need to be given.  If `fhess`
404      is provided, then `fhess_p` will be ignored.  If neither `fhess`
405      nor `fhess_p` is provided, then the hessian product will be
406      approximated using finite differences on `fprime`. `fhess_p`
407      must compute the hessian times an arbitrary vector. If it is not
408      given, finite-differences on `fprime` are used to compute
409      it.
410    """
411 
412    from mystic.tools import Sow, CustomSow
413    from mystic.termination import SolutionImprovement
414    #stepmon = Sow()
415    stepmon = CustomSow('x','y','g','h', x='x', y='fval', \
416                         g='Gradient', h='InverseHessian')
417    evalmon = Sow()
418
419    solver = NCGSolver(len(x0))
420    solver.SetInitialPoints(x0)
421    solver.enable_signal_handler()
422    solver.SetEvaluationLimits(maxiter,None)
423    # Does requiring fprime break abstract_solver interface?
424    solver.Solve(func, SolutionImprovement(tolerance=xtol),\
425                 EvaluationMonitor=evalmon,StepMonitor=stepmon,\
426                 disp=disp, ExtraArgs=args, callback=callback,\
427                 epsilon=epsilon, fhess_p=fhess_p,\
428                 fhess=fhess, fprime=fprime)
429    solution = solver.Solution()
430
431    # code below here pushes output to scipy.optimize interface
432    #x = list(solver.bestSolution)
433    x = solver.bestSolution
434    fval = solver.bestEnergy
435    warnflag = 0
436    fcalls = len(evalmon.x)
437    iterations = len(stepmon.x)
438
439    # Fix me?
440    gcalls = solver.gcalls 
441    hcalls = solver.hcalls
442
443    allvecs = []
444    for i in range(iterations):
445       #allvecs.append(list(stepmon.x[i]))
446        allvecs.append(stepmon.x[i])
447
448    if iterations >= solver._maxiter:
449        warnflag = 1
450
451    if full_output:
452        retlist = x, fval, fcalls, gcalls, hcalls, warnflag
453        if retall:
454            retlist += (allvecs,)
455    else:
456        retlist = x
457        if retall:
458            retlist = (x, allvecs)
459
460    return retlist
461
462if __name__=='__main__':
463    help(__name__)
464
465# End of file
Note: See TracBrowser for help on using the repository browser.