///////////////////////////////////////////////////////////////////////////////// // // Levenberg - Marquardt non-linear minimization algorithm // Copyright (C) 2004 Manolis Lourakis (lourakis@ics.forth.gr) // Institute of Computer Science, Foundation for Research & Technology - Hellas // Heraklion, Crete, Greece. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // ///////////////////////////////////////////////////////////////////////////////// // changes by Pablo d'Angelo: // added visualisation callback #ifndef LM_REAL // not included by lm.c #error This file should not be compiled directly! #endif /* precision-specific definitions */ #define LEVMAR_DER LM_ADD_PREFIX(levmar_der) #define LEVMAR_DIF LM_ADD_PREFIX(levmar_dif) #define FDIF_FORW_JAC_APPROX LM_ADD_PREFIX(fdif_forw_jac_approx) #define FDIF_CENT_JAC_APPROX LM_ADD_PREFIX(fdif_cent_jac_approx) #define TRANS_MAT_MAT_MULT LM_ADD_PREFIX(trans_mat_mat_mult) #define LEVMAR_COVAR LM_ADD_PREFIX(levmar_covar) #ifdef HAVE_LAPACK #define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU) #define AX_EQ_B_CHOL LM_ADD_PREFIX(Ax_eq_b_Chol) #define AX_EQ_B_QR LM_ADD_PREFIX(Ax_eq_b_QR) #define AX_EQ_B_QRLS LM_ADD_PREFIX(Ax_eq_b_QRLS) #define AX_EQ_B_SVD LM_ADD_PREFIX(Ax_eq_b_SVD) #else #define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU_noLapack) #endif /* HAVE_LAPACK */ /* * This function seeks the parameter vector p that best describes the measurements vector x. * More precisely, given a vector function func : R^m --> R^n with n>=m, * it finds p s.t. func(p) ~= x, i.e. the squared second order (i.e. L2) norm of * e=x-func(p) is minimized. * * This function requires an analytic jacobian. In case the latter is unavailable, * use LEVMAR_DIF() bellow * * Returns the number of iterations (>=0) if successfull, -1 if failed * * For more details, see H.B. Nielsen's (http://www.imm.dtu.dk/~hbn) IMM/DTU * tutorial at http://www.imm.dtu.dk/courses/02611/nllsq.pdf */ int LEVMAR_DER( void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n */ void (*jacf)(LM_REAL *p, LM_REAL *j, int m, int n, void *adata), /* function to evaluate the jacobian \part x / \part p */ LM_REAL *p, /* I/O: initial parameter estimates. On output has the estimated solution */ LM_REAL *x, /* I: measurement vector */ int m, /* I: parameter vector dimension (i.e. #unknowns) */ int n, /* I: measurement vector dimension */ int itmax, /* I: maximum number of iterations */ LM_REAL opts[4], /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3]. Respectively the scale factor for initial \mu, * stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2. Set to NULL for defaults to be used */ LM_REAL info[LM_INFO_SZ], /* O: information regarding the minimization. Set to NULL if don't care * info[0]= ||e||_2 at initial p. * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. * info[5]= # iterations, * info[6]=reason for terminating: 1 - stopped by small gradient J^T e * 2 - stopped by small Dp * 3 - stopped by itmax * 4 - singular matrix. Restart from current p with increased mu * 5 - no further error reduction is possible. Restart with increased mu * 6 - stopped by small ||e||_2 * info[7]= # function evaluations * info[8]= # jacobian evaluations */ LM_REAL *work, /* working memory, allocate if NULL */ LM_REAL *covar, /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */ void *adata) /* pointer to possibly additional data, passed uninterpreted to func & jacf. * Set to NULL if not needed */ { register int i, j, k, l; int worksz, freework=0, issolved; /* temp work arrays */ LM_REAL *e, /* nx1 */ *hx, /* \hat{x}_i, nx1 */ *jacTe, /* J^T e_i mx1 */ *jac, /* nxm */ *jacTjac, /* mxm */ *Dp, /* mx1 */ *diag_jacTjac, /* diagonal of J^T J, mx1 */ *pDp; /* p + Dp, mx1 */ register LM_REAL mu, /* damping constant */ tmp; /* mainly used in matrix & vector multiplications */ LM_REAL p_eL2, jacTe_inf, pDp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+Dp)||_2 */ LM_REAL p_L2, Dp_L2=LM_REAL_MAX, dF, dL; LM_REAL tau, eps1, eps2, eps2_sq, eps3; LM_REAL init_p_eL2; int nu=2, nu2, stop, nfev, njev=0; const int nm=n*m; mu=jacTe_inf=0.0; /* -Wall */ if(ntmp) tmp=diag_jacTjac[i]; /* find max diagonal element */ mu=tau*tmp; } /* determine increment using adaptive damping */ while(1){ /* augment normal equations */ for(i=0; i=(p_L2+eps2)/(CNST(EPSILON)*CNST(EPSILON))){ /* almost singular */ //if(Dp_L2>=(p_L2+eps2)/CNST(EPSILON)){ /* almost singular */ stop=4; break; } (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + Dp */ for(i=0, pDp_eL2=0.0; i0.0 && dF>0.0){ /* reduction in error, increment is accepted */ tmp=(CNST(2.0)*dF/dL-CNST(1.0)); tmp=CNST(1.0)-tmp*tmp*tmp; mu=mu*( (tmp>=CNST(ONE_THIRD))? tmp : CNST(ONE_THIRD) ); nu=2; for(i=0 ; i=itmax) stop=3; for(i=0; i=10)? m: 10, updjac, updp=1, newjac; const int nm=n*m; mu=jacTe_inf=p_L2=0.0; /* -Wall */ stop=updjac=newjac=0; /* -Wall */ if(n16) || updjac==K){ /* compute difference approximation to J */ if(using_ffdif){ /* use forward differences */ FDIF_FORW_JAC_APPROX(func, p, hx, wrk, delta, jac, m, n, adata); ++njap; nfev+=m; } else{ /* use central differences */ FDIF_CENT_JAC_APPROX(func, p, wrk, wrk2, delta, jac, m, n, adata); ++njap; nfev+=2*m; } nu=2; updjac=0; updp=0; newjac=1; } if(newjac){ /* jacobian has changed, recompute J^T J, J^t e, etc */ newjac=0; /* J^T J, J^T e */ if(nm<=__BLOCKSZ__SQ){ // this is a small problem /* This is the straightforward way to compute J^T J, J^T e. However, due to * its noncontinuous memory access pattern, it incures many cache misses when * applied to large minimization problems (i.e. problems involving a large * number of free variables and measurements), in which J is too large to * fit in the L1 cache. For such problems, a cache-efficient blocking scheme * is preferable. * * Thanks to John Nitao of Lawrence Livermore Lab for pointing out this * performance problem. * * On the other hand, the straightforward algorithm is faster on small * problems since in this case it avoids the overheads of blocking. */ for(i=0; itmp) tmp=diag_jacTjac[i]; /* find max diagonal element */ mu=tau*tmp; } /* determine increment using adaptive damping */ /* augment normal equations */ for(i=0; i=(p_L2+eps2)/(CNST(EPSILON)*CNST(EPSILON))){ /* almost singular */ //if(Dp_L2>=(p_L2+eps2)/CNST(EPSILON)){ /* almost singular */ stop=4; break; } (*func)(pDp, wrk, m, n, adata); ++nfev; /* evaluate function at p + Dp */ for(i=0, pDp_eL2=0.0; i0){ /* update jac */ for(i=0; i0.0 && dF>0.0){ /* reduction in error, increment is accepted */ dF=(CNST(2.0)*dF/dL-CNST(1.0)); tmp=dF*dF*dF; tmp=CNST(1.0)-tmp*tmp*dF; mu=mu*( (tmp>=CNST(ONE_THIRD))? tmp : CNST(ONE_THIRD) ); nu=2; for(i=0 ; i=itmax) stop=3; for(i=0; i