///////////////////////////////////////////////////////////////////////////////// // // Levenberg - Marquardt non-linear minimization algorithm // Copyright (C) 2004-05 Manolis Lourakis (lourakis@ics.forth.gr) // Institute of Computer Science, Foundation for Research & Technology - Hellas // Heraklion, Crete, Greece. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // ///////////////////////////////////////////////////////////////////////////////// #ifndef LM_REAL // not included by lmlec.c #error This file should not be compiled directly! #endif /* precision-specific definitions */ #define LMLEC_DATA LM_ADD_PREFIX(lmlec_data) #define LMLEC_ELIM LM_ADD_PREFIX(lmlec_elim) #define LMLEC_FUNC LM_ADD_PREFIX(lmlec_func) #define LMLEC_JACF LM_ADD_PREFIX(lmlec_jacf) #define LEVMAR_LEC_DER LM_ADD_PREFIX(levmar_lec_der) #define LEVMAR_LEC_DIF LM_ADD_PREFIX(levmar_lec_dif) #define LEVMAR_DER LM_ADD_PREFIX(levmar_der) #define LEVMAR_DIF LM_ADD_PREFIX(levmar_dif) #define TRANS_MAT_MAT_MULT LM_ADD_PREFIX(trans_mat_mat_mult) #define LEVMAR_COVAR LM_ADD_PREFIX(levmar_covar) #define FDIF_FORW_JAC_APPROX LM_ADD_PREFIX(fdif_forw_jac_approx) #define GEQP3 LM_ADD_PREFIX(geqp3_) #define ORGQR LM_ADD_PREFIX(orgqr_) #define TRTRI LM_ADD_PREFIX(trtri_) struct LMLEC_DATA{ LM_REAL *c, *Z, *p, *jac; int ncnstr; void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata); void (*jacf)(LM_REAL *p, LM_REAL *jac, int m, int n, void *adata); void *adata; }; /* prototypes for LAPACK routines */ extern int GEQP3(int *m, int *n, LM_REAL *a, int *lda, int *jpvt, LM_REAL *tau, LM_REAL *work, int *lwork, int *info); extern int ORGQR(int *m, int *n, int *k, LM_REAL *a, int *lda, LM_REAL *tau, LM_REAL *work, int *lwork, int *info); extern int TRTRI(char *uplo, char *diag, int *n, LM_REAL *a, int *lda, int *info); /* * This function implements an elimination strategy for linearly constrained * optimization problems. The strategy relies on QR decomposition to transform * an optimization problem constrained by Ax=b to an equivalent, unconstrained * one. Also referred to as "null space" or "reduced Hessian" method. * See pp. 430-433 (chap. 15) of "Numerical Optimization" by Nocedal-Wright * for details. * * A is mxn with m<=n and rank(A)=m * Two matrices Y and Z of dimensions nxm and nx(n-m) are computed from A^T so that * their columns are orthonormal and every x can be written as x=Y*b + Z*x_z= * c + Z*x_z, where c=Y*b is a fixed vector of dimension n and x_z is an * arbitrary vector of dimension n-m. Then, the problem of minimizing f(x) * subject to Ax=b is equivalent to minimizing f(c + Z*x_z) with no constraints. * The computed Y and Z are such that any solution of Ax=b can be written as * x=Y*x_y + Z*x_z for some x_y, x_z. Furthermore, A*Y is nonsingular, A*Z=0 * and Z spans the null space of A. * * The function accepts A, b and computes c, Y, Z. If b or c is NULL, c is not * computed. Also, Y can be NULL in which case it is not referenced. * The function returns 0 in case of error, A's computed rank if successfull * */ static int LMLEC_ELIM(LM_REAL *A, LM_REAL *b, LM_REAL *c, LM_REAL *Y, LM_REAL *Z, int m, int n) { static LM_REAL eps=CNST(-1.0); LM_REAL *buf=NULL; LM_REAL *a, *tau, *work, *r, aux; register LM_REAL tmp; int a_sz, jpvt_sz, tau_sz, r_sz, Y_sz, worksz; int info, rank, *jpvt, tot_sz, mintmn, tm, tn; register int i, j, k; if(m>n){ fprintf(stderr, RCAT("matrix of constraints cannot have more rows than columns in", LMLEC_ELIM) "()!\n"); exit(1); } tm=n; tn=m; // transpose dimensions mintmn=m; /* calculate required memory size */ worksz=-1; // workspace query. Optimal work size is returned in aux ORGQR((int *)&tm, (int *)&tm, (int *)&mintmn, NULL, (int *)&tm, NULL, (LM_REAL *)&aux, &worksz, &info); worksz=(int)aux; a_sz=tm*tm; // tm*tn is enough for xgeqp3() jpvt_sz=tn; tau_sz=mintmn; r_sz=mintmn*mintmn; // actually smaller if a is not of full row rank Y_sz=(Y)? 0 : tm*tn; tot_sz=jpvt_sz*sizeof(int) + (a_sz + tau_sz + r_sz + worksz + Y_sz)*sizeof(LM_REAL); buf=(LM_REAL *)malloc(tot_sz); /* allocate a "big" memory chunk at once */ if(!buf){ fprintf(stderr, RCAT("Memory allocation request failed in ", LMLEC_ELIM) "()\n"); exit(1); } a=(LM_REAL *)buf; jpvt=(int *)(a+a_sz); tau=(LM_REAL *)(jpvt + jpvt_sz); r=tau+tau_sz; work=r+r_sz; if(!Y) Y=work+worksz; /* copy input array so that LAPACK won't destroy it. Note that copying is * done in row-major order, which equals A^T in column-major */ for(i=0; i0){ fprintf(stderr, RCAT(RCAT("unknown LAPACK error (%d) for ", GEQP3) " in ", LMLEC_ELIM) "()\n", info); free(buf); return 0; } } /* the upper triangular part of a now contains the upper triangle of the unpermuted R */ if(eps<0.0){ LM_REAL aux; /* compute machine epsilon. DBL_EPSILON should do also */ for(eps=CNST(1.0); aux=eps+CNST(1.0), aux-CNST(1.0)>0.0; eps*=CNST(0.5)) ; eps*=CNST(2.0); } tmp=tm*CNST(10.0)*eps*FABS(a[0]); // threshold. tm is max(tm, tn) tmp=(tmp>CNST(1E-12))? tmp : CNST(1E-12); // ensure that threshold is not too small /* compute A^T's numerical rank by counting the non-zeros in R's diagonal */ for(i=rank=0; itmp || a[i*(tm+1)]<-tmp) ++rank; /* loop across R's diagonal elements */ else break; /* diagonal is arranged in absolute decreasing order */ if(rank0){ fprintf(stderr, RCAT(RCAT("A(%d, %d) is exactly zero for ", TRTRI) " (singular matrix) in ", LMLEC_ELIM) "()\n", info, info); free(buf); return 0; } } /* then, transpose r in place */ for(i=0; i0){ fprintf(stderr, RCAT(RCAT("unknown LAPACK error (%d) for ", ORGQR) " in ", LMLEC_ELIM) "()\n", info); free(buf); return 0; } } /* compute Y=Q_1*R^-T*P^T. Y is tm x rank */ for(i=0; incnstr; c=data->c; Z=data->Z; p=data->p; /* p=c + Z*pp */ for(i=0; ifunc))(p, hx, m, n, data->adata); } /* constrained jacobian: given pp, compute the jacobian at c + Z*pp * Using the chain rule, the jacobian with respect to pp equals the * product of the jacobian with respect to p (at c + Z*pp) times Z */ static void LMLEC_JACF(LM_REAL *pp, LM_REAL *jacjac, int mm, int n, void *adata) { struct LMLEC_DATA *data=(struct LMLEC_DATA *)adata; int m; register int i, j, l; register LM_REAL sum, *aux1, *aux2; LM_REAL *c, *Z, *p, *jac; m=mm+data->ncnstr; c=data->c; Z=data->Z; p=data->p; jac=data->jac; /* p=c + Z*pp */ for(i=0; ijacf))(p, jac, m, n, data->adata); /* compute jac*Z in jacjac */ if(n*m<=__BLOCKSZ__SQ){ // this is a small problem /* This is the straightforward way to compute jac*Z. However, due to * its noncontinuous memory access pattern, it incures many cache misses when * applied to large minimization problems (i.e. problems involving a large * number of free variables and measurements), in which jac is too large to * fit in the L1 cache. For such problems, a cache-efficient blocking scheme * is preferable. On the other hand, the straightforward algorithm is faster * on small problems since in this case it avoids the overheads of blocking. */ for(i=0; iCNST(1E-03)) fprintf(stderr, RCAT("Warning: component %d of starting point not feasible in ", LEVMAR_LEC_DER) "()! [%.10g reset to %.10g]\n", i, p0[i], tmp); } if(!info) info=locinfo; /* make sure that LEVMAR_DER() is called with non-null info */ /* note that covariance computation is not requested from LEVMAR_DER() */ ret=LEVMAR_DER(LMLEC_FUNC, LMLEC_JACF, pp, x, mm, n, itmax, opts, info, work, NULL, (void *)&data); /* p=c + Z*pp */ for(i=0; iCNST(1E-03)) fprintf(stderr, RCAT("Warning: component %d of starting point not feasible in ", LEVMAR_LEC_DIF) "()! [%.10g reset to %.10g]\n", i, p0[i], tmp); } if(!info) info=locinfo; /* make sure that LEVMAR_DIF() is called with non-null info */ /* note that covariance computation is not requested from LEVMAR_DIF() */ ret=LEVMAR_DIF(LMLEC_FUNC, NULL, pp, x, mm, n, itmax, opts, info, work, NULL, (void *)&data); /* p=c + Z*pp */ for(i=0; i