summaryrefslogtreecommitdiff
path: root/libmoped/libs/sba-1.6/sba_levmar.c
diff options
context:
space:
mode:
Diffstat (limited to 'libmoped/libs/sba-1.6/sba_levmar.c')
-rw-r--r--libmoped/libs/sba-1.6/sba_levmar.c2528
1 files changed, 2528 insertions, 0 deletions
diff --git a/libmoped/libs/sba-1.6/sba_levmar.c b/libmoped/libs/sba-1.6/sba_levmar.c
new file mode 100644
index 0000000..aa6b2e9
--- /dev/null
+++ b/libmoped/libs/sba-1.6/sba_levmar.c
@@ -0,0 +1,2528 @@
+/////////////////////////////////////////////////////////////////////////////////
+////
+//// Expert drivers for sparse bundle adjustment based on the
+//// Levenberg - Marquardt minimization algorithm
+//// Copyright (C) 2004-2009 Manolis Lourakis (lourakis at ics forth gr)
+//// Institute of Computer Science, Foundation for Research & Technology - Hellas
+//// Heraklion, Crete, Greece.
+////
+//// This program is free software; you can redistribute it and/or modify
+//// it under the terms of the GNU General Public License as published by
+//// the Free Software Foundation; either version 2 of the License, or
+//// (at your option) any later version.
+////
+//// This program is distributed in the hope that it will be useful,
+//// but WITHOUT ANY WARRANTY; without even the implied warranty of
+//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//// GNU General Public License for more details.
+////
+///////////////////////////////////////////////////////////////////////////////////
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+
+#include "compiler.h"
+#include "sba.h"
+#include "sba_chkjac.h"
+
+
+#define SBA_EPSILON 1E-12
+#define SBA_EPSILON_SQ ( (SBA_EPSILON)*(SBA_EPSILON) )
+
+#define SBA_ONE_THIRD 0.3333333334 /* 1.0/3.0 */
+
+
+#define emalloc(sz) emalloc_(__FILE__, __LINE__, sz)
+
+#define FABS(x) (((x)>=0)? (x) : -(x))
+
+#define ROW_MAJOR 0
+#define COLUMN_MAJOR 1
+#define MAT_STORAGE COLUMN_MAJOR
+
+
+/* contains information necessary for computing a finite difference approximation to a jacobian,
+ * e.g. function to differentiate, problem dimensions and pointers to working memory buffers
+ */
+struct fdj_data_x_ {
+ void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata); /* function to differentiate */
+ int cnp, pnp, mnp; /* parameter numbers */
+ int *func_rcidxs,
+ *func_rcsubs; /* working memory for func invocations.
+ * Notice that this has to be different
+ * than the working memory used for
+ * evaluating the jacobian!
+ */
+ double *hx, *hxx; /* memory to save results in */
+ void *adata;
+};
+
+/* auxiliary memory allocation routine with error checking */
+inline static void *emalloc_(char *file, int line, size_t sz)
+{
+void *ptr;
+
+ ptr=(void *)malloc(sz);
+ if(ptr==NULL){
+ fprintf(stderr, "SBA: memory allocation request for %u bytes failed in file %s, line %d, exiting", sz, file, line);
+ exit(1);
+ }
+
+ return ptr;
+}
+
+/* auxiliary routine for clearing an array of doubles */
+inline static void _dblzero(register double *arr, register int count)
+{
+ while(--count>=0)
+ *arr++=0.0;
+}
+
+/* auxiliary routine for computing the mean reprojection error; used for debugging */
+static double sba_mean_repr_error(int n, int mnp, double *x, double *hx, struct sba_crsm *idxij, int *rcidxs, int *rcsubs)
+{
+register int i, j;
+int nnz, nprojs;
+double *ptr1, *ptr2;
+double err;
+
+ for(i=nprojs=0, err=0.0; i<n; ++i){
+ nnz=sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */
+ nprojs+=nnz;
+ for(j=0; j<nnz; ++j){ /* point i projecting on camera rcsubs[j] */
+ ptr1=x + idxij->val[rcidxs[j]]*mnp;
+ ptr2=hx + idxij->val[rcidxs[j]]*mnp;
+
+ err+=sqrt((ptr1[0]-ptr2[0])*(ptr1[0]-ptr2[0]) + (ptr1[1]-ptr2[1])*(ptr1[1]-ptr2[1]));
+ }
+ }
+
+ return err/((double)(nprojs));
+}
+
+/* print the solution in p using sba's text format. If cnp/pnp==0 only points/cameras are printed */
+static void sba_print_sol(int n, int m, double *p, int cnp, int pnp, double *x, int mnp, struct sba_crsm *idxij, int *rcidxs, int *rcsubs)
+{
+register int i, j, ii;
+int nnz;
+double *ptr;
+
+ if(cnp){
+ /* print camera parameters */
+ for(j=0; j<m; ++j){
+ ptr=p+cnp*j;
+ for(ii=0; ii<cnp; ++ii)
+ printf("%g ", ptr[ii]);
+ printf("\n");
+ }
+ }
+
+ if(pnp){
+ /* 3D & 2D point parameters */
+ printf("\n\n\n# X Y Z nframes frame0 x0 y0 frame1 x1 y1 ...\n");
+ for(i=0; i<n; ++i){
+ ptr=p+cnp*m+i*pnp;
+ for(ii=0; ii<pnp; ++ii) // print 3D coordinates
+ printf("%g ", ptr[ii]);
+
+ nnz=sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */
+ printf("%d ", nnz);
+
+ for(j=0; j<nnz; ++j){ /* point i projecting on camera rcsubs[j] */
+ ptr=x + idxij->val[rcidxs[j]]*mnp;
+
+ printf("%d ", rcsubs[j]);
+ for(ii=0; ii<mnp; ++ii) // print 2D coordinates
+ printf("%g ", ptr[ii]);
+ }
+ printf("\n");
+ }
+ }
+}
+
+/* Compute e=x-y for two n-vectors x and y and return the squared L2 norm of e.
+ * e can coincide with either x or y.
+ * Uses loop unrolling and blocking to reduce bookkeeping overhead & pipeline
+ * stalls and increase instruction-level parallelism; see http://www.abarnett.demon.co.uk/tutorial.html
+ */
+static double nrmL2xmy(double *const e, const double *const x, const double *const y, const int n)
+{
+const int blocksize=8, bpwr=3; /* 8=2^3 */
+register int i;
+int j1, j2, j3, j4, j5, j6, j7;
+int blockn;
+register double sum0=0.0, sum1=0.0, sum2=0.0, sum3=0.0;
+
+ /* n may not be divisible by blocksize,
+ * go as near as we can first, then tidy up.
+ */
+ blockn = (n>>bpwr)<<bpwr; /* (n / blocksize) * blocksize; */
+
+ /* unroll the loop in blocks of `blocksize'; looping downwards gains some more speed */
+ for(i=blockn-1; i>0; i-=blocksize){
+ e[i ]=x[i ]-y[i ]; sum0+=e[i ]*e[i ];
+ j1=i-1; e[j1]=x[j1]-y[j1]; sum1+=e[j1]*e[j1];
+ j2=i-2; e[j2]=x[j2]-y[j2]; sum2+=e[j2]*e[j2];
+ j3=i-3; e[j3]=x[j3]-y[j3]; sum3+=e[j3]*e[j3];
+ j4=i-4; e[j4]=x[j4]-y[j4]; sum0+=e[j4]*e[j4];
+ j5=i-5; e[j5]=x[j5]-y[j5]; sum1+=e[j5]*e[j5];
+ j6=i-6; e[j6]=x[j6]-y[j6]; sum2+=e[j6]*e[j6];
+ j7=i-7; e[j7]=x[j7]-y[j7]; sum3+=e[j7]*e[j7];
+ }
+
+ /*
+ * There may be some left to do.
+ * This could be done as a simple for() loop,
+ * but a switch is faster (and more interesting)
+ */
+
+ i=blockn;
+ if(i<n){
+ /* Jump into the case at the place that will allow
+ * us to finish off the appropriate number of items.
+ */
+ switch(n - i){
+ case 7 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
+ case 6 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
+ case 5 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
+ case 4 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
+ case 3 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
+ case 2 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
+ case 1 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
+ }
+ }
+
+ return sum0+sum1+sum2+sum3;
+}
+
+/* Compute e=W*(x-y) for two n-vectors x and y and return the squared L2 norm of e.
+ * This norm equals the squared C norm of x-y with C=W^T*W, W being block diagonal
+ * matrix with nvis mnp*mnp blocks: e^T*e=(x-y)^T*W^T*W*(x-y)=||x-y||_C.
+ * Note that n=nvis*mnp; e can coincide with either x or y.
+ *
+ * Similarly to nrmL2xmy() above, uses loop unrolling and blocking
+ */
+static double nrmCxmy(double *const e, const double *const x, const double *const y,
+ const double *const W, const int mnp, const int nvis)
+{
+const int n=nvis*mnp;
+const int blocksize=8, bpwr=3; /* 8=2^3 */
+register int i, ii, k;
+int j1, j2, j3, j4, j5, j6, j7;
+int blockn, mnpsq;
+register double norm, sum;
+register const double *Wptr, *auxptr;
+register double *eptr;
+
+ /* n may not be divisible by blocksize,
+ * go as near as we can first, then tidy up.
+ */
+ blockn = (n>>bpwr)<<bpwr; /* (n / blocksize) * blocksize; */
+
+ /* unroll the loop in blocks of `blocksize'; looping downwards gains some more speed */
+ for(i=blockn-1; i>0; i-=blocksize){
+ e[i ]=x[i ]-y[i ];
+ j1=i-1; e[j1]=x[j1]-y[j1];
+ j2=i-2; e[j2]=x[j2]-y[j2];
+ j3=i-3; e[j3]=x[j3]-y[j3];
+ j4=i-4; e[j4]=x[j4]-y[j4];
+ j5=i-5; e[j5]=x[j5]-y[j5];
+ j6=i-6; e[j6]=x[j6]-y[j6];
+ j7=i-7; e[j7]=x[j7]-y[j7];
+ }
+
+ /*
+ * There may be some left to do.
+ * This could be done as a simple for() loop,
+ * but a switch is faster (and more interesting)
+ */
+
+ i=blockn;
+ if(i<n){
+ /* Jump into the case at the place that will allow
+ * us to finish off the appropriate number of items.
+ */
+ switch(n - i){
+ case 7 : e[i]=x[i]-y[i]; ++i;
+ case 6 : e[i]=x[i]-y[i]; ++i;
+ case 5 : e[i]=x[i]-y[i]; ++i;
+ case 4 : e[i]=x[i]-y[i]; ++i;
+ case 3 : e[i]=x[i]-y[i]; ++i;
+ case 2 : e[i]=x[i]-y[i]; ++i;
+ case 1 : e[i]=x[i]-y[i]; ++i;
+ }
+ }
+
+ /* compute w_x_ij e_ij in e and its L2 norm.
+ * Since w_x_ij is upper triangular, the products can be safely saved
+ * directly in e_ij, without the need for intermediate storage
+ */
+ mnpsq=mnp*mnp;
+ /* Wptr, eptr point to w_x_ij, e_ij below */
+ for(i=0, Wptr=W, eptr=e, norm=0.0; i<nvis; ++i, Wptr+=mnpsq, eptr+=mnp){
+ for(ii=0, auxptr=Wptr; ii<mnp; ++ii, auxptr+=mnp){ /* auxptr=Wptr+ii*mnp */
+ for(k=ii, sum=0.0; k<mnp; ++k) // k>=ii since w_x_ij is upper triangular
+ sum+=auxptr[k]*eptr[k]; //Wptr[ii*mnp+k]*eptr[k];
+ eptr[ii]=sum;
+ norm+=sum*sum;
+ }
+ }
+
+ return norm;
+}
+
+/* search for & print image projection components that are infinite; useful for identifying errors */
+static void sba_print_inf(double *hx, int nimgs, int mnp, struct sba_crsm *idxij, int *rcidxs, int *rcsubs)
+{
+register int i, j, k;
+int nnz;
+double *phxij;
+
+ for(j=0; j<nimgs; ++j){
+ nnz=sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero hx_ij, i=0...n-1 */
+ for(i=0; i<nnz; ++i){
+ phxij=hx + idxij->val[rcidxs[i]]*mnp;
+ for(k=0; k<mnp; ++k)
+ if(!SBA_FINITE(phxij[k]))
+ printf("SBA: component %d of the estimated projection of point %d on camera %d is invalid!\n", k, rcsubs[i], j);
+ }
+ }
+}
+
+/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in
+ * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images.
+ * The jacobian is approximated with the aid of finite differences and is returned in the order
+ * (A_11, B_11, ..., A_1m, B_1m, ..., A_n1, B_n1, ..., A_nm, B_nm),
+ * where A_ij=dx_ij/da_j and B_ij=dx_ij/db_i (see HZ).
+ * Notice that depending on idxij, some of the A_ij, B_ij might be missing
+ *
+ * Problem-specific information is assumed to be stored in a structure pointed to by "dat".
+ *
+ * NOTE: The jacobian (for n=4, m=3) in matrix form has the following structure:
+ * A_11 0 0 B_11 0 0 0
+ * 0 A_12 0 B_12 0 0 0
+ * 0 0 A_13 B_13 0 0 0
+ * A_21 0 0 0 B_21 0 0
+ * 0 A_22 0 0 B_22 0 0
+ * 0 0 A_23 0 B_23 0 0
+ * A_31 0 0 0 0 B_31 0
+ * 0 A_32 0 0 0 B_32 0
+ * 0 0 A_33 0 0 B_33 0
+ * A_41 0 0 0 0 0 B_41
+ * 0 A_42 0 0 0 0 B_42
+ * 0 0 A_43 0 0 0 B_43
+ * To reduce the total number of objective function evaluations, this structure can be
+ * exploited as follows: A certain d is added to the j-th parameters of all cameras and
+ * the objective function is evaluated at the resulting point. This evaluation suffices
+ * to compute the corresponding columns of *all* A_ij through finite differences. A similar
+ * strategy allows the computation of the B_ij. Overall, only cnp+pnp+1 objective function
+ * evaluations are needed to compute the jacobian, much fewer compared to the m*cnp+n*pnp+1
+ * that would be required by the naive strategy of computing one column of the jacobian
+ * per function evaluation. See Nocedal-Wright, ch. 7, pp. 169. Although this approach is
+ * much faster compared to the naive strategy, it is not preferable to analytic jacobians,
+ * since the latter are considerably faster to compute and result in fewer LM iterations.
+ */
+static void sba_fdjac_x(
+ double *p, /* I: current parameter estimate, (m*cnp+n*pnp)x1 */
+ struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */
+ int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */
+ int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */
+ double *jac, /* O: array for storing the approximated jacobian */
+ void *dat) /* I: points to a "fdj_data_x_" structure */
+{
+ register int i, j, ii, jj;
+ double *pa, *pb, *pqr, *ppt;
+ register double *pAB, *phx, *phxx;
+ int n, m, nm, nnz, Asz, Bsz, ABsz, idx;
+
+ double *tmpd;
+ register double d;
+
+ struct fdj_data_x_ *fdjd;
+ void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata);
+ double *hx, *hxx;
+ int cnp, pnp, mnp;
+ void *adata;
+
+
+ /* retrieve problem-specific information passed in *dat */
+ fdjd=(struct fdj_data_x_ *)dat;
+ func=fdjd->func;
+ cnp=fdjd->cnp; pnp=fdjd->pnp; mnp=fdjd->mnp;
+ hx=fdjd->hx;
+ hxx=fdjd->hxx;
+ adata=fdjd->adata;
+
+ n=idxij->nr; m=idxij->nc;
+ pa=p; pb=p+m*cnp;
+ Asz=mnp*cnp; Bsz=mnp*pnp; ABsz=Asz+Bsz;
+
+ nm=(n>=m)? n : m; // max(n, m);
+ tmpd=(double *)emalloc(nm*sizeof(double));
+
+ (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hx, adata); // evaluate supplied function on current solution
+
+ if(cnp){ // is motion varying?
+ /* compute A_ij */
+ for(jj=0; jj<cnp; ++jj){
+ for(j=0; j<m; ++j){
+ pqr=pa+j*cnp; // j-th camera parameters
+ /* determine d=max(SBA_DELTA_SCALE*|pqr[jj]|, SBA_MIN_DELTA), see HZ */
+ d=(double)(SBA_DELTA_SCALE)*pqr[jj]; // force evaluation
+ d=FABS(d);
+ if(d<SBA_MIN_DELTA) d=SBA_MIN_DELTA;
+
+ tmpd[j]=d;
+ pqr[jj]+=d;
+ }
+
+ (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata);
+
+ for(j=0; j<m; ++j){
+ pqr=pa+j*cnp; // j-th camera parameters
+ pqr[jj]-=tmpd[j]; /* restore */
+ d=1.0/tmpd[j]; /* invert so that divisions can be carried out faster as multiplications */
+
+ nnz=sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */
+ for(i=0; i<nnz; ++i){
+ idx=idxij->val[rcidxs[i]];
+ phx=hx + idx*mnp; // set phx to point to hx_ij
+ phxx=hxx + idx*mnp; // set phxx to point to hxx_ij
+ pAB=jac + idx*ABsz; // set pAB to point to A_ij
+
+ for(ii=0; ii<mnp; ++ii)
+ pAB[ii*cnp+jj]=(phxx[ii]-phx[ii])*d;
+ }
+ }
+ }
+ }
+
+ if(pnp){ // is structure varying?
+ /* compute B_ij */
+ for(jj=0; jj<pnp; ++jj){
+ for(i=0; i<n; ++i){
+ ppt=pb+i*pnp; // i-th point parameters
+ /* determine d=max(SBA_DELTA_SCALE*|ppt[jj]|, SBA_MIN_DELTA), see HZ */
+ d=(double)(SBA_DELTA_SCALE)*ppt[jj]; // force evaluation
+ d=FABS(d);
+ if(d<SBA_MIN_DELTA) d=SBA_MIN_DELTA;
+
+ tmpd[i]=d;
+ ppt[jj]+=d;
+ }
+
+ (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata);
+
+ for(i=0; i<n; ++i){
+ ppt=pb+i*pnp; // i-th point parameters
+ ppt[jj]-=tmpd[i]; /* restore */
+ d=1.0/tmpd[i]; /* invert so that divisions can be carried out faster as multiplications */
+
+ nnz=sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */
+ for(j=0; j<nnz; ++j){
+ idx=idxij->val[rcidxs[j]];
+ phx=hx + idx*mnp; // set phx to point to hx_ij
+ phxx=hxx + idx*mnp; // set phxx to point to hxx_ij
+ pAB=jac + idx*ABsz + Asz; // set pAB to point to B_ij
+
+ for(ii=0; ii<mnp; ++ii)
+ pAB[ii*pnp+jj]=(phxx[ii]-phx[ii])*d;
+ }
+ }
+ }
+ }
+
+ free(tmpd);
+}
+
+typedef int (*PLS)(double *A, double *B, double *x, int m, int iscolmaj);
+
+/* Bundle adjustment on camera and structure parameters
+ * using the sparse Levenberg-Marquardt as described in HZ p. 568
+ *
+ * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed
+ */
+
+int sba_motstr_levmar_x(
+ const int n, /* number of points */
+ const int ncon,/* number of points (starting from the 1st) whose parameters should not be modified.
+ * All B_ij (see below) with i<ncon are assumed to be zero
+ */
+ const int m, /* number of images */
+ const int mcon,/* number of images (starting from the 1st) whose parameters should not be modified.
+ * All A_ij (see below) with j<mcon are assumed to be zero
+ */
+ char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */
+ double *p, /* initial parameter vector p0: (a1, ..., am, b1, ..., bn).
+ * aj are the image j parameters, bi are the i-th point parameters,
+ * size m*cnp + n*pnp
+ */
+ const int cnp,/* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */
+ const int pnp,/* number of parameters for ONE point; e.g. 3 for Euclidean points */
+ double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
+ * x_ij is the projection of the i-th point on the j-th image.
+ * NOTE: some of the x_ij might be missing, if point i is not visible in image j;
+ * see vmask[i, j], max. size n*m*mnp
+ */
+ double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm),
+ * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no
+ * covariance estimates are available (identity matrices are implicitly used in this case).
+ * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing;
+ * see vmask[i, j], max. size n*m*mnp*mnp
+ */
+ const int mnp,/* number of parameters for EACH measurement; usually 2 */
+ void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
+ /* functional relation describing measurements. Given a parameter vector p,
+ * computes a prediction of the measurements \hat{x}. p is (m*cnp + n*pnp)x1,
+ * \hat{x} is (n*m*mnp)x1, maximum
+ * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
+ * as working memory
+ */
+ void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata),
+ /* function to evaluate the sparse jacobian dX/dp.
+ * The Jacobian is returned in jac as
+ * (dx_11/da_1, ..., dx_1m/da_m, ..., dx_n1/da_1, ..., dx_nm/da_m,
+ * dx_11/db_1, ..., dx_1m/db_1, ..., dx_n1/db_n, ..., dx_nm/db_n), or (using HZ's notation),
+ * jac=(A_11, B_11, ..., A_1m, B_1m, ..., A_n1, B_n1, ..., A_nm, B_nm)
+ * Notice that depending on idxij, some of the A_ij and B_ij might be missing.
+ * Note also that A_ij and B_ij are mnp x cnp and mnp x pnp matrices resp. and they
+ * should be stored in jac in row-major order.
+ * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
+ * as working memory
+ *
+ * If NULL, the jacobian is approximated by repetitive func calls and finite
+ * differences. This is computationally inefficient and thus NOT recommended.
+ */
+ void *adata, /* pointer to possibly additional data, passed uninterpreted to func, fjac */
+
+ const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate return */
+ const int verbose, /* I: verbosity */
+ const double opts[SBA_OPTSSZ],
+ /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon4]. Respectively the scale factor for initial \mu,
+ * stopping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2
+ */
+ double info[SBA_INFOSZ]
+ /* O: information regarding the minimization. Set to NULL if don't care
+ * info[0]=||e||_2 at initial p.
+ * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
+ * info[5]= # iterations,
+ * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
+ * 2 - stopped by small dp
+ * 3 - stopped by itmax
+ * 4 - stopped by small relative reduction in ||e||_2
+ * 5 - stopped by small ||e||_2
+ * 6 - too many attempts to increase damping. Restart with increased mu
+ * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
+ * info[7]= # function evaluations
+ * info[8]= # jacobian evaluations
+ * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
+ */
+)
+{
+register int i, j, ii, jj, k, l;
+int nvis, nnz, retval;
+
+/* The following are work arrays that are dynamically allocated by sba_motstr_levmar_x() */
+double *jac; /* work array for storing the jacobian, max. size n*m*(mnp*cnp + mnp*pnp) */
+double *U; /* work array for storing the U_j in the order U_1, ..., U_m, size m*cnp*cnp */
+double *V; /* work array for storing the *strictly upper triangles* of V_i in the order V_1, ..., V_n, size n*pnp*pnp.
+ * V also stores the lower triangles of (V*_i)^-1 in the order (V*_1)^-1, ..., (V*_n)^-1.
+ * Note that diagonal elements of V_1 are saved in diagUV
+ */
+
+double *e; /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm,
+ max. size n*m*mnp */
+double *eab; /* work array for storing the ea_j & eb_i in the order ea_1, .. ea_m eb_1, .. eb_n size m*cnp + n*pnp */
+
+double *E; /* work array for storing the e_j in the order e_1, .. e_m, size m*cnp */
+
+/* Notice that the blocks W_ij, Y_ij are zero iff A_ij (equivalently B_ij) is zero. This means
+ * that the matrix consisting of blocks W_ij is itself sparse, similarly to the
+ * block matrix made up of the A_ij and B_ij (i.e. jac)
+ */
+double *W; /* work array for storing the W_ij in the order W_11, ..., W_1m, ..., W_n1, ..., W_nm,
+ max. size n*m*cnp*pnp */
+double *Yj; /* work array for storing the Y_ij for a *fixed* j in the order Y_1j, Y_nj,
+ max. size n*cnp*pnp */
+double *YWt; /* work array for storing \sum_i Y_ij W_ik^T, size cnp*cnp */
+double *S; /* work array for storing the block array S_jk, size m*m*cnp*cnp */
+double *dp; /* work array for storing the parameter vector updates da_1, ..., da_m, db_1, ..., db_n, size m*cnp + n*pnp */
+double *Wtda; /* work array for storing \sum_j W_ij^T da_j, size pnp */
+double *wght= /* work array for storing the weights computed from the covariance inverses, max. size n*m*mnp*mnp */
+ NULL;
+
+/* Of the above arrays, jac, e, W, Yj, wght are sparse and
+ * U, V, eab, E, S, dp are dense. Sparse arrays (except Yj) are indexed
+ * through idxij (see below), that is with the same mechanism as the input
+ * measurements vector x
+ */
+
+double *pa, *pb, *ea, *eb, *dpa, *dpb; /* pointers into p, jac, eab and dp respectively */
+
+/* submatrices sizes */
+int Asz, Bsz, ABsz, Usz, Vsz,
+ Wsz, Ysz, esz, easz, ebsz,
+ YWtsz, Wtdasz, Sblsz, covsz;
+
+int Sdim; /* S matrix actual dimension */
+
+register double *ptr1, *ptr2, *ptr3, *ptr4, sum;
+struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also
+ * the location of A_ij, B_ij in jac, etc.
+ * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous
+ * index k and it is used to efficiently lookup the memory locations where the non-zero
+ * blocks of a sparse matrix/vector are stored
+ */
+int maxCvis, /* max. of projections of a single point across cameras, <=m */
+ maxPvis, /* max. of projections in a single camera across points, <=n */
+ maxCPvis, /* max. of the above */
+ *rcidxs, /* work array for the indexes corresponding to the nonzero elements of a single row or
+ column in a sparse matrix, size max(n, m) */
+ *rcsubs; /* work array for the subscripts of nonzero elements in a single row or column of a
+ sparse matrix, size max(n, m) */
+
+/* The following variables are needed by the LM algorithm */
+register int itno; /* iteration counter */
+int issolved;
+/* temporary work arrays that are dynamically allocated */
+double *hx, /* \hat{x}_i, max. size m*n*mnp */
+ *diagUV, /* diagonals of U_j, V_i, size m*cnp + n*pnp */
+ *pdp; /* p + dp, size m*cnp + n*pnp */
+
+double *diagU, *diagV; /* pointers into diagUV */
+
+register double mu, /* damping constant */
+ tmp; /* mainly used in matrix & vector multiplications */
+double p_eL2, eab_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */
+double p_L2, dp_L2=DBL_MAX, dF, dL;
+double tau=FABS(opts[0]), eps1=FABS(opts[1]), eps2=FABS(opts[2]), eps2_sq=opts[2]*opts[2],
+ eps3_sq=opts[3]*opts[3], eps4_sq=opts[4]*opts[4];
+double init_p_eL2;
+int nu=2, nu2, stop=0, nfev, njev=0, nlss=0;
+int nobs, nvars;
+const int mmcon=m-mcon;
+PLS linsolver=NULL;
+int (*matinv)(double *A, int m)=NULL;
+
+struct fdj_data_x_ fdj_data;
+void *jac_adata;
+
+/* Initialization */
+ mu=eab_inf=0.0; /* -Wall */
+
+ /* block sizes */
+ Asz=mnp * cnp; Bsz=mnp * pnp; ABsz=Asz + Bsz;
+ Usz=cnp * cnp; Vsz=pnp * pnp;
+ Wsz=cnp * pnp; Ysz=cnp * pnp;
+ esz=mnp;
+ easz=cnp; ebsz=pnp;
+ YWtsz=cnp * cnp;
+ Wtdasz=pnp;
+ Sblsz=cnp * cnp;
+ Sdim=mmcon * cnp;
+ covsz=mnp * mnp;
+
+ /* count total number of visible image points */
+ for(i=nvis=0, jj=n*m; i<jj; ++i)
+ nvis+=(vmask[i]!=0);
+
+ nobs=nvis*mnp;
+ nvars=m*cnp + n*pnp;
+ if(nobs<nvars){
+ fprintf(stderr, "SBA: sba_motstr_levmar_x() cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", nobs, nvars);
+ return SBA_ERROR;
+ }
+
+ /* allocate & fill up the idxij structure */
+ sba_crsm_alloc(&idxij, n, m, nvis);
+ for(i=k=0; i<n; ++i){
+ idxij.rowptr[i]=k;
+ ii=i*m;
+ for(j=0; j<m; ++j)
+ if(vmask[ii+j]){
+ idxij.val[k]=k;
+ idxij.colidx[k++]=j;
+ }
+ }
+ idxij.rowptr[n]=nvis;
+
+ /* find the maximum number (for all cameras) of visible image projections coming from a single 3D point */
+ for(i=maxCvis=0; i<n; ++i)
+ if((k=idxij.rowptr[i+1]-idxij.rowptr[i])>maxCvis) maxCvis=k;
+
+ /* find the maximum number (for all points) of visible image projections in any single camera */
+ for(j=maxPvis=0; j<m; ++j){
+ for(i=ii=0; i<n; ++i)
+ if(vmask[i*m+j]) ++ii;
+ if(ii>maxPvis) maxPvis=ii;
+ }
+ maxCPvis=(maxCvis>=maxPvis)? maxCvis : maxPvis;
+
+#if 0
+ /* determine the density of blocks in matrix S */
+ for(j=mcon, ii=0; j<m; ++j){
+ ++ii; /* block Sjj is surely nonzero */
+ for(k=j+1; k<m; ++k)
+ if(sba_crsm_common_row(&idxij, j, k)) ii+=2; /* blocks Sjk & Skj are nonzero */
+ }
+ printf("\nS block density: %.5g\n", ((double)ii)/(mmcon*mmcon)); fflush(stdout);
+#endif
+
+ /* allocate work arrays */
+ /* W is big enough to hold both jac & W. Note also the extra Wsz, see the initialization of jac below for explanation */
+ W=(double *)emalloc((nvis*((Wsz>=ABsz)? Wsz : ABsz) + Wsz)*sizeof(double));
+ U=(double *)emalloc(m*Usz*sizeof(double));
+ V=(double *)emalloc(n*Vsz*sizeof(double));
+ e=(double *)emalloc(nobs*sizeof(double));
+ eab=(double *)emalloc(nvars*sizeof(double));
+ E=(double *)emalloc(m*cnp*sizeof(double));
+ Yj=(double *)emalloc(maxPvis*Ysz*sizeof(double));
+ YWt=(double *)emalloc(YWtsz*sizeof(double));
+ S=(double *)emalloc(m*m*Sblsz*sizeof(double));
+ dp=(double *)emalloc(nvars*sizeof(double));
+ Wtda=(double *)emalloc(pnp*sizeof(double));
+ rcidxs=(int *)emalloc(maxCPvis*sizeof(int));
+ rcsubs=(int *)emalloc(maxCPvis*sizeof(int));
+#ifndef SBA_DESTROY_COVS
+ if(covx!=NULL) wght=(double *)emalloc(nvis*covsz*sizeof(double));
+#else
+ if(covx!=NULL) wght=covx;
+#endif /* SBA_DESTROY_COVS */
+
+
+ hx=(double *)emalloc(nobs*sizeof(double));
+ diagUV=(double *)emalloc(nvars*sizeof(double));
+ pdp=(double *)emalloc(nvars*sizeof(double));
+
+ /* to save resources, W and jac share the same memory: First, the jacobian
+ * is computed in some working memory that is then overwritten during the
+ * computation of W. To account for the case of W being larger than jac,
+ * extra memory is reserved "before" jac.
+ * Care must be taken, however, to ensure that storing a certain W_ij
+ * does not overwrite the A_ij, B_ij used to compute it. To achieve
+ * this is, note that if p1 and p2 respectively point to the first elements
+ * of a certain W_ij and A_ij, B_ij pair, we should have p2-p1>=Wsz.
+ * There are two cases:
+ * a) Wsz>=ABsz: Then p1=W+k*Wsz and p2=jac+k*ABsz=W+Wsz+nvis*(Wsz-ABsz)+k*ABsz
+ * for some k (0<=k<nvis), thus p2-p1=(nvis-k)*(Wsz-ABsz)+Wsz.
+ * The right side of the last equation is obviously > Wsz for all 0<=k<nvis
+ *
+ * b) Wsz<ABsz: Then p1=W+k*Wsz and p2=jac+k*ABsz=W+Wsz+k*ABsz and
+ * p2-p1=Wsz+k*(ABsz-Wsz), which is again > Wsz for all 0<=k<nvis
+ *
+ * In conclusion, if jac is initialized as below, the memory allocated to all
+ * W_ij is guaranteed not to overlap with that allocated to their corresponding
+ * A_ij, B_ij pairs
+ */
+ jac=W + Wsz + ((Wsz>ABsz)? nvis*(Wsz-ABsz) : 0);
+
+ /* set up auxiliary pointers */
+ pa=p; pb=p+m*cnp;
+ ea=eab; eb=eab+m*cnp;
+ dpa=dp; dpb=dp+m*cnp;
+
+ diagU=diagUV; diagV=diagUV + m*cnp;
+
+ /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */
+ if(!fjac){
+ fdj_data.func=func;
+ fdj_data.cnp=cnp;
+ fdj_data.pnp=pnp;
+ fdj_data.mnp=mnp;
+ fdj_data.hx=hx;
+ fdj_data.hxx=(double *)emalloc(nobs*sizeof(double));
+ fdj_data.func_rcidxs=(int *)emalloc(2*maxCPvis*sizeof(int));
+ fdj_data.func_rcsubs=fdj_data.func_rcidxs+maxCPvis;
+ fdj_data.adata=adata;
+
+ fjac=sba_fdjac_x;
+ jac_adata=(void *)&fdj_data;
+ }
+ else{
+ fdj_data.hxx=NULL;
+ jac_adata=adata;
+ }
+
+ if(itmax==0){ /* verify jacobian */
+ sba_motstr_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, ncon, mcon, cnp, pnp, mnp, adata, jac_adata);
+ retval=0;
+ goto freemem_and_return;
+ }
+
+ /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their
+ * inverses and using the resulting matrices w_x_ij to weigh A_ij, B_ij, and e_ij as w_x_ij A_ij,
+ * w_x_ij*B_ij and w_x_ij*e_ij. In this way, auxiliary variables as U_j=\sum_i A_ij^T A_ij
+ * actually become \sum_i (w_x_ij A_ij)^T w_x_ij A_ij= \sum_i A_ij^T w_x_ij^T w_x_ij A_ij =
+ * A_ij^T Sigma_x_ij^-1 A_ij
+ *
+ * ea_j, V_i, eb_i, W_ij are weighted in a similar manner
+ */
+ if(covx!=NULL){
+ for(i=0; i<n; ++i){
+ nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */
+ for(j=0; j<nnz; ++j){
+ /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */
+ ptr1=covx + (k=idxij.val[rcidxs[j]]*covsz);
+ ptr2=wght + k;
+ if(!sba_mat_cholinv(ptr1, ptr2, mnp)){ /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */
+ fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n", i, rcsubs[j]);
+ retval=SBA_ERROR;
+ goto freemem_and_return;
+ }
+ }
+ }
+ sba_mat_cholinv(NULL, NULL, 0); /* cleanup */
+ }
+
+ /* compute the error vectors e_ij in hx */
+ (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev=1;
+ /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */
+ if(covx==NULL)
+ p_eL2=nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */
+ else
+ p_eL2=nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */
+ if(verbose) printf("initial motstr-SBA error %g [%g]\n", p_eL2, p_eL2/nvis);
+ init_p_eL2=p_eL2;
+ if(!SBA_FINITE(p_eL2)) stop=7;
+
+ for(itno=0; itno<itmax && !stop; ++itno){
+ /* Note that p, e and ||e||_2 have been updated at the previous iteration */
+
+ /* compute derivative submatrices A_ij, B_ij */
+ (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev;
+
+ if(covx!=NULL){
+ /* compute w_x_ij A_ij and w_x_ij B_ij.
+ * Since w_x_ij is upper triangular, the products can be safely saved
+ * directly in A_ij, B_ij, without the need for intermediate storage
+ */
+ for(i=0; i<nvis; ++i){
+ /* set ptr1, ptr2, ptr3 to point to w_x_ij, A_ij, B_ij, resp. */
+ ptr1=wght + i*covsz;
+ ptr2=jac + i*ABsz;
+ ptr3=ptr2 + Asz; // ptr3=jac + i*ABsz + Asz;
+
+ /* w_x_ij is mnp x mnp, A_ij is mnp x cnp, B_ij is mnp x pnp
+ * NOTE: Jamming the outter (i.e., ii) loops did not run faster!
+ */
+ /* A_ij */
+ for(ii=0; ii<mnp; ++ii)
+ for(jj=0; jj<cnp; ++jj){
+ for(k=ii, sum=0.0; k<mnp; ++k) // k>=ii since w_x_ij is upper triangular
+ sum+=ptr1[ii*mnp+k]*ptr2[k*cnp+jj];
+ ptr2[ii*cnp+jj]=sum;
+ }
+
+ /* B_ij */
+ for(ii=0; ii<mnp; ++ii)
+ for(jj=0; jj<pnp; ++jj){
+ for(k=ii, sum=0.0; k<mnp; ++k) // k>=ii since w_x_ij is upper triangular
+ sum+=ptr1[ii*mnp+k]*ptr3[k*pnp+jj];
+ ptr3[ii*pnp+jj]=sum;
+ }
+ }
+ }
+
+ /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here!
+ /* U_j is symmetric, therefore its computation can be sped up by
+ * computing only the upper part and then reusing it for the lower one.
+ * Recall that A_ij is mnp x cnp
+ */
+ /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here!
+ /* Recall that e_ij is mnp x 1
+ */
+ _dblzero(U, m*Usz); /* clear all U_j */
+ _dblzero(ea, m*easz); /* clear all ea_j */
+ for(j=mcon; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ ptr2=ea + j*easz; // set ptr2 to point to ea_j
+
+ nnz=sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */
+ for(i=0; i<nnz; ++i){
+ /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */
+ ptr3=jac + idxij.val[rcidxs[i]]*ABsz;
+
+ /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */
+ for(ii=0; ii<cnp; ++ii){
+ for(jj=ii; jj<cnp; ++jj){
+ for(k=0, sum=0.0; k<mnp; ++k)
+ sum+=ptr3[k*cnp+ii]*ptr3[k*cnp+jj];
+ ptr1[ii*cnp+jj]+=sum;
+ }
+
+ /* copy the LOWER TRIANGULAR PART of U_j from the upper one */
+ for(jj=0; jj<ii; ++jj)
+ ptr1[ii*cnp+jj]=ptr1[jj*cnp+ii];
+ }
+
+ ptr4=e + idxij.val[rcidxs[i]]*esz; /* set ptr4 to point to e_ij */
+ /* compute A_ij^T e_ij and add it to ea_j */
+ for(ii=0; ii<cnp; ++ii){
+ for(jj=0, sum=0.0; jj<mnp; ++jj)
+ sum+=ptr3[jj*cnp+ii]*ptr4[jj];
+ ptr2[ii]+=sum;
+ }
+ }
+ }
+
+ /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here!
+ /* V_i is symmetric, therefore its computation can be sped up by
+ * computing only the upper part and then reusing it for the lower one.
+ * Recall that B_ij is mnp x pnp
+ */
+ /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here!
+ /* Recall that e_ij is mnp x 1
+ */
+ _dblzero(V, n*Vsz); /* clear all V_i */
+ _dblzero(eb, n*ebsz); /* clear all eb_i */
+ for(i=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ ptr2=eb + i*ebsz; // set ptr2 to point to eb_i
+
+ nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */
+ for(j=0; j<nnz; ++j){
+ /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */
+ ptr3=jac + idxij.val[rcidxs[j]]*ABsz + Asz;
+
+ /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */
+ for(ii=0; ii<pnp; ++ii){
+ for(jj=ii; jj<pnp; ++jj){
+ for(k=0, sum=0.0; k<mnp; ++k)
+ sum+=ptr3[k*pnp+ii]*ptr3[k*pnp+jj];
+ ptr1[ii*pnp+jj]+=sum;
+ }
+ }
+
+ ptr4=e + idxij.val[rcidxs[j]]*esz; /* set ptr4 to point to e_ij */
+ /* compute B_ij^T e_ij and add it to eb_i */
+ for(ii=0; ii<pnp; ++ii){
+ for(jj=0, sum=0.0; jj<mnp; ++jj)
+ sum+=ptr3[jj*pnp+ii]*ptr4[jj];
+ ptr2[ii]+=sum;
+ }
+ }
+ }
+
+ /* compute W_ij = A_ij^T B_ij */ // \Sigma here!
+ /* Recall that A_ij is mnp x cnp and B_ij is mnp x pnp
+ */
+ for(i=ncon; i<n; ++i){
+ nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */
+ for(j=0; j<nnz; ++j){
+ /* set ptr1 to point to W_ij, actual column number in rcsubs[j] */
+ ptr1=W + idxij.val[rcidxs[j]]*Wsz;
+
+ if(rcsubs[j]<mcon){ /* A_ij is zero */
+ //_dblzero(ptr1, Wsz); /* clear W_ij */
+ continue;
+ }
+
+ /* set ptr2 & ptr3 to point to A_ij & B_ij resp. */
+ ptr2=jac + idxij.val[rcidxs[j]]*ABsz;
+ ptr3=ptr2 + Asz;
+ /* compute A_ij^T B_ij and store it in W_ij
+ * Recall that storage for A_ij, B_ij does not overlap with that for W_ij,
+ * see the comments related to the initialization of jac above
+ */
+ /* assert(ptr2-ptr1>=Wsz); */
+ for(ii=0; ii<cnp; ++ii)
+ for(jj=0; jj<pnp; ++jj){
+ for(k=0, sum=0.0; k<mnp; ++k)
+ sum+=ptr2[k*cnp+ii]*ptr3[k*pnp+jj];
+ ptr1[ii*pnp+jj]=sum;
+ }
+ }
+ }
+
+ /* Compute ||J^T e||_inf and ||p||^2 */
+ for(i=0, p_L2=eab_inf=0.0; i<nvars; ++i){
+ if(eab_inf < (tmp=FABS(eab[i]))) eab_inf=tmp;
+ p_L2+=p[i]*p[i];
+ }
+ //p_L2=sqrt(p_L2);
+
+ /* save diagonal entries so that augmentation can be later canceled.
+ * Diagonal entries are in U_j and V_i
+ */
+ for(j=mcon; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
+ for(i=0; i<cnp; ++i)
+ ptr2[i]=ptr1[i*cnp+i];
+ }
+ for(i=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
+ for(j=0; j<pnp; ++j)
+ ptr2[j]=ptr1[j*pnp+j];
+ }
+
+/*
+if(!(itno%100)){
+ printf("Current estimate: ");
+ for(i=0; i<nvars; ++i)
+ printf("%.9g ", p[i]);
+ printf("-- errors %.9g %0.9g\n", eab_inf, p_eL2);
+}
+*/
+
+ /* check for convergence */
+ if((eab_inf <= eps1)){
+ dp_L2=0.0; /* no increment for p in this case */
+ stop=1;
+ break;
+ }
+
+ /* compute initial damping factor */
+ if(itno==0){
+ /* find max diagonal element */
+ for(i=mcon*cnp, tmp=DBL_MIN; i<m*cnp; ++i)
+ if(diagUV[i]>tmp) tmp=diagUV[i];
+ for(i=m*cnp + ncon*pnp; i<nvars; ++i) /* tmp is not re-initialized! */
+ if(diagUV[i]>tmp) tmp=diagUV[i];
+ mu=tau*tmp;
+ }
+
+ /* determine increment using adaptive damping */
+ while(1){
+ /* augment U, V */
+ for(j=mcon; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ for(i=0; i<cnp; ++i)
+ ptr1[i*cnp+i]+=mu;
+ }
+ for(i=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ for(j=0; j<pnp; ++j)
+ ptr1[j*pnp+j]+=mu;
+
+ /* compute V*_i^-1.
+ * Recall that only the upper triangle of the symmetric pnp x pnp matrix V*_i
+ * is stored in ptr1; its (also symmetric) inverse is saved in the lower triangle of ptr1
+ */
+ /* inverting V*_i with LDLT seems to result in faster overall execution compared to when using LU or Cholesky */
+ //j=sba_symat_invert_LU(ptr1, pnp); matinv=sba_symat_invert_LU;
+ //j=sba_symat_invert_Chol(ptr1, pnp); matinv=sba_symat_invert_Chol;
+ j=sba_symat_invert_BK(ptr1, pnp); matinv=sba_symat_invert_BK;
+ if(!j){
+ fprintf(stderr, "SBA: singular matrix V*_i (i=%d) in sba_motstr_levmar_x(), increasing damping\n", i);
+ goto moredamping; // increasing damping will eventually make V*_i diagonally dominant, thus nonsingular
+ //retval=SBA_ERROR;
+ //goto freemem_and_return;
+ }
+ }
+
+ _dblzero(E, m*easz); /* clear all e_j */
+ /* compute the mmcon x mmcon block matrix S and e_j */
+
+ /* Note that S is symmetric, therefore its computation can be
+ * sped up by computing only the upper part and then reusing
+ * it for the lower one.
+ */
+ for(j=mcon; j<m; ++j){
+ int mmconxUsz=mmcon*Usz;
+
+ nnz=sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero Y_ij, i=0...n-1 */
+
+ /* get rid of all Y_ij with i<ncon that are treated as zeros.
+ * In this way, all rcsubs[i] below are guaranteed to be >= ncon
+ */
+ if(ncon){
+ for(i=ii=0; i<nnz; ++i){
+ if(rcsubs[i]>=ncon){
+ rcidxs[ii]=rcidxs[i];
+ rcsubs[ii++]=rcsubs[i];
+ }
+ }
+ nnz=ii;
+ }
+
+ /* compute all Y_ij = W_ij (V*_i)^-1 for a *fixed* j.
+ * To save memory, the block matrix consisting of the Y_ij
+ * is not stored. Instead, only a block column of this matrix
+ * is computed & used at each time: For each j, all nonzero
+ * Y_ij are computed in Yj and then used in the calculations
+ * involving S_jk and e_j.
+ * Recall that W_ij is cnp x pnp and (V*_i) is pnp x pnp
+ */
+ for(i=0; i<nnz; ++i){
+ /* set ptr3 to point to (V*_i)^-1, actual row number in rcsubs[i] */
+ ptr3=V + rcsubs[i]*Vsz;
+
+ /* set ptr1 to point to Y_ij, actual row number in rcsubs[i] */
+ ptr1=Yj + i*Ysz;
+ /* set ptr2 to point to W_ij resp. */
+ ptr2=W + idxij.val[rcidxs[i]]*Wsz;
+ /* compute W_ij (V*_i)^-1 and store it in Y_ij.
+ * Recall that only the lower triangle of (V*_i)^-1 is stored
+ */
+ for(ii=0; ii<cnp; ++ii){
+ ptr4=ptr2+ii*pnp;
+ for(jj=0; jj<pnp; ++jj){
+ for(k=0, sum=0.0; k<=jj; ++k)
+ sum+=ptr4[k]*ptr3[jj*pnp+k]; //ptr2[ii*pnp+k]*ptr3[jj*pnp+k];
+ for( ; k<pnp; ++k)
+ sum+=ptr4[k]*ptr3[k*pnp+jj]; //ptr2[ii*pnp+k]*ptr3[k*pnp+jj];
+ ptr1[ii*pnp+jj]=sum;
+ }
+ }
+ }
+
+ /* compute the UPPER TRIANGULAR PART of S */
+ for(k=j; k<m; ++k){ // j>=mcon
+ /* compute \sum_i Y_ij W_ik^T in YWt. Note that for an off-diagonal block defined by j, k
+ * YWt (and thus S_jk) is nonzero only if there exists a point that is visible in both the
+ * j-th and k-th images
+ */
+
+ /* Recall that Y_ij is cnp x pnp and W_ik is cnp x pnp */
+ _dblzero(YWt, YWtsz); /* clear YWt */
+
+ for(i=0; i<nnz; ++i){
+ register double *pYWt;
+
+ /* find the min and max column indices of the elements in row i (actually rcsubs[i])
+ * and make sure that k falls within them. This test handles W_ik's which are
+ * certain to be zero without bothering to call sba_crsm_elmidx()
+ */
+ ii=idxij.colidx[idxij.rowptr[rcsubs[i]]];
+ jj=idxij.colidx[idxij.rowptr[rcsubs[i]+1]-1];
+ if(k<ii || k>jj) continue; /* W_ik == 0 */
+
+ /* set ptr2 to point to W_ik */
+ l=sba_crsm_elmidxp(&idxij, rcsubs[i], k, j, rcidxs[i]);
+ //l=sba_crsm_elmidx(&idxij, rcsubs[i], k);
+ if(l==-1) continue; /* W_ik == 0 */
+
+ ptr2=W + idxij.val[l]*Wsz;
+ /* set ptr1 to point to Y_ij, actual row number in rcsubs[i] */
+ ptr1=Yj + i*Ysz;
+ for(ii=0; ii<cnp; ++ii){
+ ptr3=ptr1+ii*pnp;
+ pYWt=YWt+ii*cnp;
+
+ for(jj=0; jj<cnp; ++jj){
+ ptr4=ptr2+jj*pnp;
+ for(l=0, sum=0.0; l<pnp; ++l)
+ sum+=ptr3[l]*ptr4[l]; //ptr1[ii*pnp+l]*ptr2[jj*pnp+l];
+ pYWt[jj]+=sum; //YWt[ii*cnp+jj]+=sum;
+ }
+ }
+ }
+
+ /* since the linear system involving S is solved with lapack,
+ * it is preferable to store S in column major (i.e. fortran)
+ * order, so as to avoid unecessary transposing/copying.
+ */
+#if MAT_STORAGE==COLUMN_MAJOR
+ ptr2=S + (k-mcon)*mmconxUsz + (j-mcon)*cnp; // set ptr2 to point to the beginning of block j,k in S
+#else
+ ptr2=S + (j-mcon)*mmconxUsz + (k-mcon)*cnp; // set ptr2 to point to the beginning of block j,k in S
+#endif
+
+ if(j!=k){ /* Kronecker */
+ for(ii=0; ii<cnp; ++ii, ptr2+=Sdim)
+ for(jj=0; jj<cnp; ++jj)
+ ptr2[jj]=
+#if MAT_STORAGE==COLUMN_MAJOR
+ -YWt[jj*cnp+ii];
+#else
+ -YWt[ii*cnp+jj];
+#endif
+ }
+ else{
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+
+ for(ii=0; ii<cnp; ++ii, ptr2+=Sdim)
+ for(jj=0; jj<cnp; ++jj)
+ ptr2[jj]=
+#if MAT_STORAGE==COLUMN_MAJOR
+ ptr1[jj*cnp+ii] - YWt[jj*cnp+ii];
+#else
+ ptr1[ii*cnp+jj] - YWt[ii*cnp+jj];
+#endif
+ }
+ }
+
+ /* copy the LOWER TRIANGULAR PART of S from the upper one */
+ for(k=mcon; k<j; ++k){
+#if MAT_STORAGE==COLUMN_MAJOR
+ ptr1=S + (k-mcon)*mmconxUsz + (j-mcon)*cnp; // set ptr1 to point to the beginning of block j,k in S
+ ptr2=S + (j-mcon)*mmconxUsz + (k-mcon)*cnp; // set ptr2 to point to the beginning of block k,j in S
+#else
+ ptr1=S + (j-mcon)*mmconxUsz + (k-mcon)*cnp; // set ptr1 to point to the beginning of block j,k in S
+ ptr2=S + (k-mcon)*mmconxUsz + (j-mcon)*cnp; // set ptr2 to point to the beginning of block k,j in S
+#endif
+ for(ii=0; ii<cnp; ++ii, ptr1+=Sdim)
+ for(jj=0, ptr3=ptr2+ii; jj<cnp; ++jj, ptr3+=Sdim)
+ ptr1[jj]=*ptr3;
+ }
+
+ /* compute e_j=ea_j - \sum_i Y_ij eb_i */
+ /* Recall that Y_ij is cnp x pnp and eb_i is pnp x 1 */
+ ptr1=E + j*easz; // set ptr1 to point to e_j
+
+ for(i=0; i<nnz; ++i){
+ /* set ptr2 to point to Y_ij, actual row number in rcsubs[i] */
+ ptr2=Yj + i*Ysz;
+
+ /* set ptr3 to point to eb_i */
+ ptr3=eb + rcsubs[i]*ebsz;
+ for(ii=0; ii<cnp; ++ii){
+ ptr4=ptr2+ii*pnp;
+ for(jj=0, sum=0.0; jj<pnp; ++jj)
+ sum+=ptr4[jj]*ptr3[jj]; //ptr2[ii*pnp+jj]*ptr3[jj];
+ ptr1[ii]+=sum;
+ }
+ }
+
+ ptr2=ea + j*easz; // set ptr2 to point to ea_j
+ for(i=0; i<easz; ++i)
+ ptr1[i]=ptr2[i] - ptr1[i];
+ }
+
+#if 0
+ if(verbose>1){ /* count the nonzeros in S */
+ for(i=ii=0; i<Sdim*Sdim; ++i)
+ if(S[i]!=0.0) ++ii;
+ printf("\nS density: %.5g\n", ((double)ii)/(Sdim*Sdim)); fflush(stdout);
+ }
+#endif
+
+ /* solve the linear system S dpa = E to compute the da_j.
+ *
+ * Note that if MAT_STORAGE==1 S is modified in the following call;
+ * this is OK since S is recomputed for each iteration
+ */
+ //issolved=sba_Axb_LU(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_LU;
+ issolved=sba_Axb_Chol(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_Chol;
+ //issolved=sba_Axb_BK(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_BK;
+ //issolved=sba_Axb_QRnoQ(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_QRnoQ;
+ //issolved=sba_Axb_QR(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_QR;
+ //issolved=sba_Axb_SVD(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); linsolver=sba_Axb_SVD;
+ //issolved=sba_Axb_CG(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, (3*Sdim)/2, 1E-10, SBA_CG_JACOBI, MAT_STORAGE); linsolver=(PLS)sba_Axb_CG;
+
+ ++nlss;
+
+ _dblzero(dpa, mcon*cnp); /* no change for the first mcon camera params */
+
+ if(issolved){
+
+ /* compute the db_i */
+ for(i=ncon; i<n; ++i){
+ ptr1=dpb + i*ebsz; // set ptr1 to point to db_i
+
+ /* compute \sum_j W_ij^T da_j */
+ /* Recall that W_ij is cnp x pnp and da_j is cnp x 1 */
+ _dblzero(Wtda, Wtdasz); /* clear Wtda */
+ nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */
+ for(j=0; j<nnz; ++j){
+ /* set ptr2 to point to W_ij, actual column number in rcsubs[j] */
+ if(rcsubs[j]<mcon) continue; /* W_ij is zero */
+
+ ptr2=W + idxij.val[rcidxs[j]]*Wsz;
+
+ /* set ptr3 to point to da_j */
+ ptr3=dpa + rcsubs[j]*cnp;
+
+ for(ii=0; ii<pnp; ++ii){
+ ptr4=ptr2+ii;
+ for(jj=0, sum=0.0; jj<cnp; ++jj)
+ sum+=ptr4[jj*pnp]*ptr3[jj]; //ptr2[jj*pnp+ii]*ptr3[jj];
+ Wtda[ii]+=sum;
+ }
+ }
+
+ /* compute eb_i - \sum_j W_ij^T da_j = eb_i - Wtda in Wtda */
+ ptr2=eb + i*ebsz; // set ptr2 to point to eb_i
+ for(ii=0; ii<pnp; ++ii)
+ Wtda[ii]=ptr2[ii] - Wtda[ii];
+
+ /* compute the product (V*_i)^-1 Wtda = (V*_i)^-1 (eb_i - \sum_j W_ij^T da_j).
+ * Recall that only the lower triangle of (V*_i)^-1 is stored
+ */
+ ptr2=V + i*Vsz; // set ptr2 to point to (V*_i)^-1
+ for(ii=0; ii<pnp; ++ii){
+ for(jj=0, sum=0.0; jj<=ii; ++jj)
+ sum+=ptr2[ii*pnp+jj]*Wtda[jj];
+ for( ; jj<pnp; ++jj)
+ sum+=ptr2[jj*pnp+ii]*Wtda[jj];
+ ptr1[ii]=sum;
+ }
+ }
+ _dblzero(dpb, ncon*pnp); /* no change for the first ncon point params */
+
+ /* parameter vector updates are now in dpa, dpb */
+
+ /* compute p's new estimate and ||dp||^2 */
+ for(i=0, dp_L2=0.0; i<nvars; ++i){
+ pdp[i]=p[i] + (tmp=dp[i]);
+ dp_L2+=tmp*tmp;
+ }
+ //dp_L2=sqrt(dp_L2);
+
+ if(dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */
+ //if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */
+ stop=2;
+ break;
+ }
+
+ if(dp_L2>=(p_L2+eps2)/SBA_EPSILON_SQ){ /* almost singular */
+ //if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */
+ fprintf(stderr, "SBA: the matrix of the augmented normal equations is almost singular in sba_motstr_levmar_x(),\n"
+ " minimization should be restarted from the current solution with an increased damping term\n");
+ retval=SBA_ERROR;
+ goto freemem_and_return;
+ }
+
+ (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; /* evaluate function at p + dp */
+ if(verbose>1)
+ printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs));
+ /* ### compute ||e(pdp)||_2 */
+ if(covx==NULL)
+ pdp_eL2=nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */
+ else
+ pdp_eL2=nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */
+ if(!SBA_FINITE(pdp_eL2)){
+ if(verbose) /* identify the offending point projection */
+ sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs);
+
+ stop=7;
+ break;
+ }
+
+ for(i=0, dL=0.0; i<nvars; ++i)
+ dL+=dp[i]*(mu*dp[i]+eab[i]);
+
+ dF=p_eL2-pdp_eL2;
+
+ if(verbose>1)
+ printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL!=0.0? dF/dL : dF/DBL_EPSILON, p_eL2/nvis, pdp_eL2/nvis, p_eL2/pdp_eL2);
+
+ if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */
+ tmp=(2.0*dF/dL-1.0);
+ tmp=1.0-tmp*tmp*tmp;
+ mu=mu*( (tmp>=SBA_ONE_THIRD)? tmp : SBA_ONE_THIRD );
+ nu=2;
+
+ /* the test below is equivalent to the relative reduction of the RMS reprojection error: sqrt(p_eL2)-sqrt(pdp_eL2)<eps4*sqrt(p_eL2) */
+ if(pdp_eL2-2.0*sqrt(p_eL2*pdp_eL2)<(eps4_sq-1.0)*p_eL2) stop=4;
+
+ for(i=0; i<nvars; ++i) /* update p's estimate */
+ p[i]=pdp[i];
+
+ for(i=0; i<nobs; ++i) /* update e and ||e||_2 */
+ e[i]=hx[i];
+ p_eL2=pdp_eL2;
+ break;
+ }
+ } /* issolved */
+
+moredamping:
+ /* if this point is reached (also via an explicit goto!), either the linear system could
+ * not be solved or the error did not reduce; in any case, the increment must be rejected
+ */
+
+ mu*=nu;
+ nu2=nu<<1; // 2*nu;
+ if(nu2<=nu){ /* nu has wrapped around (overflown) */
+ fprintf(stderr, "SBA: too many failed attempts to increase the damping factor in sba_motstr_levmar_x()! Singular Hessian matrix?\n");
+ //exit(1);
+ stop=6;
+ break;
+ }
+ nu=nu2;
+
+#if 0
+ /* restore U, V diagonal entries */
+ for(j=mcon; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
+ for(i=0; i<cnp; ++i)
+ ptr1[i*cnp+i]=ptr2[i];
+ }
+ for(i=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
+ for(j=0; j<pnp; ++j)
+ ptr1[j*pnp+j]=ptr2[j];
+ }
+#endif
+ } /* inner while loop */
+
+ if(p_eL2<=eps3_sq) stop=5; // error is small, force termination of outer loop
+ }
+
+ if(itno>=itmax) stop=3;
+
+ /* restore U, V diagonal entries */
+ for(j=mcon; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
+ for(i=0; i<cnp; ++i)
+ ptr1[i*cnp+i]=ptr2[i];
+ }
+ for(i=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
+ for(j=0; j<pnp; ++j)
+ ptr1[j*pnp+j]=ptr2[j];
+ }
+
+ if(info){
+ info[0]=init_p_eL2;
+ info[1]=p_eL2;
+ info[2]=eab_inf;
+ info[3]=dp_L2;
+ for(j=mcon, tmp=DBL_MIN; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ for(i=0; i<cnp; ++i)
+ if(tmp<ptr1[i*cnp+i]) tmp=ptr1[i*cnp+i];
+ }
+ for(i=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ for(j=0; j<pnp; ++j)
+ if(tmp<ptr1[j*pnp+j]) tmp=ptr1[j*pnp+j];
+ }
+ info[4]=mu/tmp;
+ info[5]=itno;
+ info[6]=stop;
+ info[7]=nfev;
+ info[8]=njev;
+ info[9]=nlss;
+ }
+
+ //sba_print_sol(n, m, p, cnp, pnp, x, mnp, &idxij, rcidxs, rcsubs);
+ retval=(stop!=7)? itno : SBA_ERROR;
+
+freemem_and_return: /* NOTE: this point is also reached via a goto! */
+
+ /* free whatever was allocated */
+ free(W); free(U); free(V);
+ free(e); free(eab);
+ free(E); free(Yj); free(YWt);
+ free(S); free(dp); free(Wtda);
+ free(rcidxs); free(rcsubs);
+#ifndef SBA_DESTROY_COVS
+ if(wght) free(wght);
+#else
+ /* nothing to do */
+#endif /* SBA_DESTROY_COVS */
+
+ free(hx); free(diagUV); free(pdp);
+ if(fdj_data.hxx){ // cleanup
+ free(fdj_data.hxx);
+ free(fdj_data.func_rcidxs);
+ }
+
+ sba_crsm_free(&idxij);
+
+ /* free the memory allocated by the matrix inversion & linear solver routines */
+ if(matinv) (*matinv)(NULL, 0);
+ if(linsolver) (*linsolver)(NULL, NULL, NULL, 0, 0);
+
+ return retval;
+}
+
+
+/* Bundle adjustment on camera parameters only
+ * using the sparse Levenberg-Marquardt as described in HZ p. 568
+ *
+ * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed
+ */
+
+int sba_mot_levmar_x(
+ const int n, /* number of points */
+ const int m, /* number of images */
+ const int mcon,/* number of images (starting from the 1st) whose parameters should not be modified.
+ * All A_ij (see below) with j<mcon are assumed to be zero
+ */
+ char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */
+ double *p, /* initial parameter vector p0: (a1, ..., am).
+ * aj are the image j parameters, size m*cnp */
+ const int cnp,/* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */
+ double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
+ * x_ij is the projection of the i-th point on the j-th image.
+ * NOTE: some of the x_ij might be missing, if point i is not visible in image j;
+ * see vmask[i, j], max. size n*m*mnp
+ */
+ double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm),
+ * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no
+ * covariance estimates are available (identity matrices are implicitly used in this case).
+ * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing;
+ * see vmask[i, j], max. size n*m*mnp*mnp
+ */
+ const int mnp,/* number of parameters for EACH measurement; usually 2 */
+ void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
+ /* functional relation describing measurements. Given a parameter vector p,
+ * computes a prediction of the measurements \hat{x}. p is (m*cnp)x1,
+ * \hat{x} is (n*m*mnp)x1, maximum
+ * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
+ * as working memory
+ */
+ void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata),
+ /* function to evaluate the sparse jacobian dX/dp.
+ * The Jacobian is returned in jac as
+ * (dx_11/da_1, ..., dx_1m/da_m, ..., dx_n1/da_1, ..., dx_nm/da_m), or (using HZ's notation),
+ * jac=(A_11, ..., A_1m, ..., A_n1, ..., A_nm)
+ * Notice that depending on idxij, some of the A_ij might be missing.
+ * Note also that the A_ij are mnp x cnp matrices and they
+ * should be stored in jac in row-major order.
+ * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
+ * as working memory
+ *
+ * If NULL, the jacobian is approximated by repetitive func calls and finite
+ * differences. This is computationally inefficient and thus NOT recommended.
+ */
+ void *adata, /* pointer to possibly additional data, passed uninterpreted to func, fjac */
+
+ const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate return */
+ const int verbose, /* I: verbosity */
+ const double opts[SBA_OPTSSZ],
+ /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon4]. Respectively the scale factor for initial \mu,
+ * stopping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2
+ */
+ double info[SBA_INFOSZ]
+ /* O: information regarding the minimization. Set to NULL if don't care
+ * info[0]=||e||_2 at initial p.
+ * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
+ * info[5]= # iterations,
+ * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
+ * 2 - stopped by small dp
+ * 3 - stopped by itmax
+ * 4 - stopped by small relative reduction in ||e||_2
+ * 5 - stopped by small ||e||_2
+ * 6 - too many attempts to increase damping. Restart with increased mu
+ * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
+ * info[7]= # function evaluations
+ * info[8]= # jacobian evaluations
+ * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
+ */
+)
+{
+register int i, j, ii, jj, k;
+int nvis, nnz, retval;
+
+/* The following are work arrays that are dynamically allocated by sba_mot_levmar_x() */
+double *jac; /* work array for storing the jacobian, max. size n*m*mnp*cnp */
+double *U; /* work array for storing the U_j in the order U_1, ..., U_m, size m*cnp*cnp */
+
+double *e; /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm,
+ max. size n*m*mnp */
+double *ea; /* work array for storing the ea_j in the order ea_1, .. ea_m, size m*cnp */
+
+double *dp; /* work array for storing the parameter vector updates da_1, ..., da_m, size m*cnp */
+
+double *wght= /* work array for storing the weights computed from the covariance inverses, max. size n*m*mnp*mnp */
+ NULL;
+
+/* Of the above arrays, jac, e, wght are sparse and
+ * U, ea, dp are dense. Sparse arrays are indexed through
+ * idxij (see below), that is with the same mechanism as the input
+ * measurements vector x
+ */
+
+/* submatrices sizes */
+int Asz, Usz,
+ esz, easz, covsz;
+
+register double *ptr1, *ptr2, *ptr3, *ptr4, sum;
+struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location of A_ij
+ * in jac, e_ij in e, etc.
+ * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous
+ * index k and it is used to efficiently lookup the memory locations where the non-zero
+ * blocks of a sparse matrix/vector are stored
+ */
+int maxCPvis, /* max. of projections across cameras & projections across points */
+ *rcidxs, /* work array for the indexes corresponding to the nonzero elements of a single row or
+ column in a sparse matrix, size max(n, m) */
+ *rcsubs; /* work array for the subscripts of nonzero elements in a single row or column of a
+ sparse matrix, size max(n, m) */
+
+/* The following variables are needed by the LM algorithm */
+register int itno; /* iteration counter */
+int nsolved;
+/* temporary work arrays that are dynamically allocated */
+double *hx, /* \hat{x}_i, max. size m*n*mnp */
+ *diagU, /* diagonals of U_j, size m*cnp */
+ *pdp; /* p + dp, size m*cnp */
+
+register double mu, /* damping constant */
+ tmp; /* mainly used in matrix & vector multiplications */
+double p_eL2, ea_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */
+double p_L2, dp_L2=DBL_MAX, dF, dL;
+double tau=FABS(opts[0]), eps1=FABS(opts[1]), eps2=FABS(opts[2]), eps2_sq=opts[2]*opts[2],
+ eps3_sq=opts[3]*opts[3], eps4_sq=opts[4]*opts[4];
+double init_p_eL2;
+int nu=2, nu2, stop=0, nfev, njev=0, nlss=0;
+int nobs, nvars;
+PLS linsolver=NULL;
+
+struct fdj_data_x_ fdj_data;
+void *jac_adata;
+
+/* Initialization */
+ mu=ea_inf=0.0; /* -Wall */
+
+ /* block sizes */
+ Asz=mnp * cnp; Usz=cnp * cnp;
+ esz=mnp; easz=cnp;
+ covsz=mnp * mnp;
+
+ /* count total number of visible image points */
+ for(i=nvis=0, jj=n*m; i<jj; ++i)
+ nvis+=(vmask[i]!=0);
+
+ nobs=nvis*mnp;
+ nvars=m*cnp;
+ if(nobs<nvars){
+ fprintf(stderr, "SBA: sba_mot_levmar_x() cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", nobs, nvars);
+ return SBA_ERROR;
+ }
+
+ /* allocate & fill up the idxij structure */
+ sba_crsm_alloc(&idxij, n, m, nvis);
+ for(i=k=0; i<n; ++i){
+ idxij.rowptr[i]=k;
+ ii=i*m;
+ for(j=0; j<m; ++j)
+ if(vmask[ii+j]){
+ idxij.val[k]=k;
+ idxij.colidx[k++]=j;
+ }
+ }
+ idxij.rowptr[n]=nvis;
+
+ /* find the maximum number of visible image points in any single camera or coming from a single 3D point */
+ /* cameras */
+ for(i=maxCPvis=0; i<n; ++i)
+ if((k=idxij.rowptr[i+1]-idxij.rowptr[i])>maxCPvis) maxCPvis=k;
+
+ /* points, note that maxCPvis is not reinitialized! */
+ for(j=0; j<m; ++j){
+ for(i=ii=0; i<n; ++i)
+ if(vmask[i*m+j]) ++ii;
+ if(ii>maxCPvis) maxCPvis=ii;
+ }
+
+ /* allocate work arrays */
+ jac=(double *)emalloc(nvis*Asz*sizeof(double));
+ U=(double *)emalloc(m*Usz*sizeof(double));
+ e=(double *)emalloc(nobs*sizeof(double));
+ ea=(double *)emalloc(nvars*sizeof(double));
+ dp=(double *)emalloc(nvars*sizeof(double));
+ rcidxs=(int *)emalloc(maxCPvis*sizeof(int));
+ rcsubs=(int *)emalloc(maxCPvis*sizeof(int));
+#ifndef SBA_DESTROY_COVS
+ if(covx!=NULL) wght=(double *)emalloc(nvis*covsz*sizeof(double));
+#else
+ if(covx!=NULL) wght=covx;
+#endif /* SBA_DESTROY_COVS */
+
+
+ hx=(double *)emalloc(nobs*sizeof(double));
+ diagU=(double *)emalloc(nvars*sizeof(double));
+ pdp=(double *)emalloc(nvars*sizeof(double));
+
+ /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */
+ if(!fjac){
+ fdj_data.func=func;
+ fdj_data.cnp=cnp;
+ fdj_data.pnp=0;
+ fdj_data.mnp=mnp;
+ fdj_data.hx=hx;
+ fdj_data.hxx=(double *)emalloc(nobs*sizeof(double));
+ fdj_data.func_rcidxs=(int *)emalloc(2*maxCPvis*sizeof(int));
+ fdj_data.func_rcsubs=fdj_data.func_rcidxs+maxCPvis;
+ fdj_data.adata=adata;
+
+ fjac=sba_fdjac_x;
+ jac_adata=(void *)&fdj_data;
+ }
+ else{
+ fdj_data.hxx=NULL;
+ jac_adata=adata;
+ }
+
+ if(itmax==0){ /* verify jacobian */
+ sba_mot_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, mcon, cnp, mnp, adata, jac_adata);
+ retval=0;
+ goto freemem_and_return;
+ }
+
+ /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their
+ * inverses and using the resulting matrices w_x_ij to weigh A_ij and e_ij as w_x_ij A_ij
+ * and w_x_ij*e_ij. In this way, auxiliary variables as U_j=\sum_i A_ij^T A_ij
+ * actually become \sum_i (w_x_ij A_ij)^T w_x_ij A_ij= \sum_i A_ij^T w_x_ij^T w_x_ij A_ij =
+ * A_ij^T Sigma_x_ij^-1 A_ij
+ *
+ * ea_j are weighted in a similar manner
+ */
+ if(covx!=NULL){
+ for(i=0; i<n; ++i){
+ nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */
+ for(j=0; j<nnz; ++j){
+ /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */
+ ptr1=covx + (k=idxij.val[rcidxs[j]]*covsz);
+ ptr2=wght + k;
+ if(!sba_mat_cholinv(ptr1, ptr2, mnp)){ /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */
+ fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n", i, rcsubs[j]);
+ retval=SBA_ERROR;
+ goto freemem_and_return;
+ }
+ }
+ }
+ sba_mat_cholinv(NULL, NULL, 0); /* cleanup */
+ }
+
+ /* compute the error vectors e_ij in hx */
+ (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev=1;
+ /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */
+ if(covx==NULL)
+ p_eL2=nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */
+ else
+ p_eL2=nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */
+ if(verbose) printf("initial mot-SBA error %g [%g]\n", p_eL2, p_eL2/nvis);
+ init_p_eL2=p_eL2;
+ if(!SBA_FINITE(p_eL2)) stop=7;
+
+ for(itno=0; itno<itmax && !stop; ++itno){
+ /* Note that p, e and ||e||_2 have been updated at the previous iteration */
+
+ /* compute derivative submatrices A_ij */
+ (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev;
+
+ if(covx!=NULL){
+ /* compute w_x_ij A_ij
+ * Since w_x_ij is upper triangular, the products can be safely saved
+ * directly in A_ij, without the need for intermediate storage
+ */
+ for(i=0; i<nvis; ++i){
+ /* set ptr1, ptr2 to point to w_x_ij, A_ij, resp. */
+ ptr1=wght + i*covsz;
+ ptr2=jac + i*Asz;
+
+ /* w_x_ij is mnp x mnp, A_ij is mnp x cnp */
+ for(ii=0; ii<mnp; ++ii)
+ for(jj=0; jj<cnp; ++jj){
+ for(k=ii, sum=0.0; k<mnp; ++k) // k>=ii since w_x_ij is upper triangular
+ sum+=ptr1[ii*mnp+k]*ptr2[k*cnp+jj];
+ ptr2[ii*cnp+jj]=sum;
+ }
+ }
+ }
+
+ /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here!
+ /* U_j is symmetric, therefore its computation can be sped up by
+ * computing only the upper part and then reusing it for the lower one.
+ * Recall that A_ij is mnp x cnp
+ */
+ /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here!
+ /* Recall that e_ij is mnp x 1
+ */
+ _dblzero(U, m*Usz); /* clear all U_j */
+ _dblzero(ea, m*easz); /* clear all ea_j */
+ for(j=mcon; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ ptr2=ea + j*easz; // set ptr2 to point to ea_j
+
+ nnz=sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */
+ for(i=0; i<nnz; ++i){
+ /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */
+ ptr3=jac + idxij.val[rcidxs[i]]*Asz;
+
+ /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */
+ for(ii=0; ii<cnp; ++ii){
+ for(jj=ii; jj<cnp; ++jj){
+ for(k=0, sum=0.0; k<mnp; ++k)
+ sum+=ptr3[k*cnp+ii]*ptr3[k*cnp+jj];
+ ptr1[ii*cnp+jj]+=sum;
+ }
+
+ /* copy the LOWER TRIANGULAR PART of U_j from the upper one */
+ for(jj=0; jj<ii; ++jj)
+ ptr1[ii*cnp+jj]=ptr1[jj*cnp+ii];
+ }
+
+ ptr4=e + idxij.val[rcidxs[i]]*esz; /* set ptr4 to point to e_ij */
+ /* compute A_ij^T e_ij and add it to ea_j */
+ for(ii=0; ii<cnp; ++ii){
+ for(jj=0, sum=0.0; jj<mnp; ++jj)
+ sum+=ptr3[jj*cnp+ii]*ptr4[jj];
+ ptr2[ii]+=sum;
+ }
+ }
+ }
+
+ /* Compute ||J^T e||_inf and ||p||^2 */
+ for(i=0, p_L2=ea_inf=0.0; i<nvars; ++i){
+ if(ea_inf < (tmp=FABS(ea[i]))) ea_inf=tmp;
+ p_L2+=p[i]*p[i];
+ }
+ //p_L2=sqrt(p_L2);
+
+ /* save diagonal entries so that augmentation can be later canceled.
+ * Diagonal entries are in U_j
+ */
+ for(j=mcon; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
+ for(i=0; i<cnp; ++i)
+ ptr2[i]=ptr1[i*cnp+i];
+ }
+
+/*
+if(!(itno%100)){
+ printf("Current estimate: ");
+ for(i=0; i<nvars; ++i)
+ printf("%.9g ", p[i]);
+ printf("-- errors %.9g %0.9g\n", ea_inf, p_eL2);
+}
+*/
+
+ /* check for convergence */
+ if((ea_inf <= eps1)){
+ dp_L2=0.0; /* no increment for p in this case */
+ stop=1;
+ break;
+ }
+
+ /* compute initial damping factor */
+ if(itno==0){
+ for(i=mcon*cnp, tmp=DBL_MIN; i<nvars; ++i)
+ if(diagU[i]>tmp) tmp=diagU[i]; /* find max diagonal element */
+ mu=tau*tmp;
+ }
+
+ /* determine increment using adaptive damping */
+ while(1){
+ /* augment U */
+ for(j=mcon; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ for(i=0; i<cnp; ++i)
+ ptr1[i*cnp+i]+=mu;
+ }
+
+ /* solve the linear systems U_j da_j = ea_j to compute the da_j */
+ _dblzero(dp, mcon*cnp); /* no change for the first mcon camera params */
+ for(j=nsolved=mcon; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ ptr2=ea + j*easz; // set ptr2 to point to ea_j
+ ptr3=dp + j*cnp; // set ptr3 to point to da_j
+
+ //nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_LU;
+ nsolved+=sba_Axb_Chol(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_Chol;
+ //nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_BK;
+ //nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_QRnoQ;
+ //nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_QR;
+ //nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, cnp, 0); linsolver=sba_Axb_SVD;
+ //nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, cnp, (3*cnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0); linsolver=(PLS)sba_Axb_CG;
+
+ ++nlss;
+ }
+
+ if(nsolved==m){
+
+ /* parameter vector updates are now in dp */
+
+ /* compute p's new estimate and ||dp||^2 */
+ for(i=0, dp_L2=0.0; i<nvars; ++i){
+ pdp[i]=p[i] + (tmp=dp[i]);
+ dp_L2+=tmp*tmp;
+ }
+ //dp_L2=sqrt(dp_L2);
+
+ if(dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */
+ //if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */
+ stop=2;
+ break;
+ }
+
+ if(dp_L2>=(p_L2+eps2)/SBA_EPSILON_SQ){ /* almost singular */
+ //if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */
+ fprintf(stderr, "SBA: the matrix of the augmented normal equations is almost singular in sba_mot_levmar_x(),\n"
+ " minimization should be restarted from the current solution with an increased damping term\n");
+ retval=SBA_ERROR;
+ goto freemem_and_return;
+ }
+
+ (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; /* evaluate function at p + dp */
+ if(verbose>1)
+ printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs));
+ /* ### compute ||e(pdp)||_2 */
+ if(covx==NULL)
+ pdp_eL2=nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */
+ else
+ pdp_eL2=nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */
+ if(!SBA_FINITE(pdp_eL2)){
+ if(verbose) /* identify the offending point projection */
+ sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs);
+
+ stop=7;
+ break;
+ }
+
+ for(i=0, dL=0.0; i<nvars; ++i)
+ dL+=dp[i]*(mu*dp[i]+ea[i]);
+
+ dF=p_eL2-pdp_eL2;
+
+ if(verbose>1)
+ printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL!=0.0? dF/dL : dF/DBL_EPSILON, p_eL2/nvis, pdp_eL2/nvis, p_eL2/pdp_eL2);
+
+ if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */
+ tmp=(2.0*dF/dL-1.0);
+ tmp=1.0-tmp*tmp*tmp;
+ mu=mu*( (tmp>=SBA_ONE_THIRD)? tmp : SBA_ONE_THIRD );
+ nu=2;
+
+ /* the test below is equivalent to the relative reduction of the RMS reprojection error: sqrt(p_eL2)-sqrt(pdp_eL2)<eps4*sqrt(p_eL2) */
+ if(pdp_eL2-2.0*sqrt(p_eL2*pdp_eL2)<(eps4_sq-1.0)*p_eL2) stop=4;
+
+ for(i=0; i<nvars; ++i) /* update p's estimate */
+ p[i]=pdp[i];
+
+ for(i=0; i<nobs; ++i) /* update e and ||e||_2 */
+ e[i]=hx[i];
+ p_eL2=pdp_eL2;
+ break;
+ }
+ } /* nsolved==m */
+
+ /* if this point is reached, either at least one linear system could not be solved or
+ * the error did not reduce; in any case, the increment must be rejected
+ */
+
+ mu*=nu;
+ nu2=nu<<1; // 2*nu;
+ if(nu2<=nu){ /* nu has wrapped around (overflown) */
+ fprintf(stderr, "SBA: too many failed attempts to increase the damping factor in sba_mot_levmar_x()! Singular Hessian matrix?\n");
+ //exit(1);
+ stop=6;
+ break;
+ }
+ nu=nu2;
+
+#if 0
+ /* restore U diagonal entries */
+ for(j=mcon; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
+ for(i=0; i<cnp; ++i)
+ ptr1[i*cnp+i]=ptr2[i];
+ }
+#endif
+ } /* inner while loop */
+
+ if(p_eL2<=eps3_sq) stop=5; // error is small, force termination of outer loop
+ }
+
+ if(itno>=itmax) stop=3;
+
+ /* restore U diagonal entries */
+ for(j=mcon; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
+ for(i=0; i<cnp; ++i)
+ ptr1[i*cnp+i]=ptr2[i];
+ }
+
+ if(info){
+ info[0]=init_p_eL2;
+ info[1]=p_eL2;
+ info[2]=ea_inf;
+ info[3]=dp_L2;
+ for(j=mcon, tmp=DBL_MIN; j<m; ++j){
+ ptr1=U + j*Usz; // set ptr1 to point to U_j
+ for(i=0; i<cnp; ++i)
+ if(tmp<ptr1[i*cnp+i]) tmp=ptr1[i*cnp+i];
+ }
+ info[4]=mu/tmp;
+ info[5]=itno;
+ info[6]=stop;
+ info[7]=nfev;
+ info[8]=njev;
+ info[9]=nlss;
+ }
+ //sba_print_sol(n, m, p, cnp, 0, x, mnp, &idxij, rcidxs, rcsubs);
+ retval=(stop!=7)? itno : SBA_ERROR;
+
+freemem_and_return: /* NOTE: this point is also reached via a goto! */
+
+ /* free whatever was allocated */
+ free(jac); free(U);
+ free(e); free(ea);
+ free(dp);
+ free(rcidxs); free(rcsubs);
+#ifndef SBA_DESTROY_COVS
+ if(wght) free(wght);
+#else
+ /* nothing to do */
+#endif /* SBA_DESTROY_COVS */
+
+ free(hx); free(diagU); free(pdp);
+ if(fdj_data.hxx){ // cleanup
+ free(fdj_data.hxx);
+ free(fdj_data.func_rcidxs);
+ }
+
+ sba_crsm_free(&idxij);
+
+ /* free the memory allocated by the linear solver routine */
+ if(linsolver) (*linsolver)(NULL, NULL, NULL, 0, 0);
+
+ return retval;
+}
+
+
+/* Bundle adjustment on structure parameters only
+ * using the sparse Levenberg-Marquardt as described in HZ p. 568
+ *
+ * Returns the number of iterations (>=0) if successfull, SBA_ERROR if failed
+ */
+
+int sba_str_levmar_x(
+ const int n, /* number of points */
+ const int ncon,/* number of points (starting from the 1st) whose parameters should not be modified.
+ * All B_ij (see below) with i<ncon are assumed to be zero
+ */
+ const int m, /* number of images */
+ char *vmask, /* visibility mask: vmask[i, j]=1 if point i visible in image j, 0 otherwise. nxm */
+ double *p, /* initial parameter vector p0: (b1, ..., bn).
+ * bi are the i-th point parameters, * size n*pnp */
+ const int pnp,/* number of parameters for ONE point; e.g. 3 for Euclidean points */
+ double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
+ * x_ij is the projection of the i-th point on the j-th image.
+ * NOTE: some of the x_ij might be missing, if point i is not visible in image j;
+ * see vmask[i, j], max. size n*m*mnp
+ */
+ double *covx, /* measurements covariance matrices: (Sigma_x_11, .. Sigma_x_1m, ..., Sigma_x_n1, .. Sigma_x_nm),
+ * where Sigma_x_ij is the mnp x mnp covariance of x_ij stored row-by-row. Set to NULL if no
+ * covariance estimates are available (identity matrices are implicitly used in this case).
+ * NOTE: a certain Sigma_x_ij is missing if the corresponding x_ij is also missing;
+ * see vmask[i, j], max. size n*m*mnp*mnp
+ */
+ const int mnp,/* number of parameters for EACH measurement; usually 2 */
+ void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
+ /* functional relation describing measurements. Given a parameter vector p,
+ * computes a prediction of the measurements \hat{x}. p is (n*pnp)x1,
+ * \hat{x} is (n*m*mnp)x1, maximum
+ * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
+ * as working memory
+ */
+ void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata),
+ /* function to evaluate the sparse jacobian dX/dp.
+ * The Jacobian is returned in jac as
+ * (dx_11/db_1, ..., dx_1m/db_1, ..., dx_n1/db_n, ..., dx_nm/db_n), or (using HZ's notation),
+ * jac=(B_11, ..., B_1m, ..., B_n1, ..., B_nm)
+ * Notice that depending on idxij, some of the B_ij might be missing.
+ * Note also that B_ij are mnp x pnp matrices and they
+ * should be stored in jac in row-major order.
+ * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
+ * as working memory
+ *
+ * If NULL, the jacobian is approximated by repetitive func calls and finite
+ * differences. This is computationally inefficient and thus NOT recommended.
+ */
+ void *adata, /* pointer to possibly additional data, passed uninterpreted to func, fjac */
+
+ const int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate return */
+ const int verbose, /* I: verbosity */
+ const double opts[SBA_OPTSSZ],
+ /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \epsilon4]. Respectively the scale factor for initial \mu,
+ * stopping thresholds for ||J^T e||_inf, ||dp||_2, ||e||_2 and (||e||_2-||e_new||_2)/||e||_2
+ */
+ double info[SBA_INFOSZ]
+ /* O: information regarding the minimization. Set to NULL if don't care
+ * info[0]=||e||_2 at initial p.
+ * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
+ * info[5]= # iterations,
+ * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
+ * 2 - stopped by small dp
+ * 3 - stopped by itmax
+ * 4 - stopped by small relative reduction in ||e||_2
+ * 5 - stopped by small ||e||_2
+ * 6 - too many attempts to increase damping. Restart with increased mu
+ * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
+ * info[7]= # function evaluations
+ * info[8]= # jacobian evaluations
+ * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
+ */
+)
+{
+register int i, j, ii, jj, k;
+int nvis, nnz, retval;
+
+/* The following are work arrays that are dynamically allocated by sba_str_levmar_x() */
+double *jac; /* work array for storing the jacobian, max. size n*m*mnp*pnp */
+double *V; /* work array for storing the V_i in the order V_1, ..., V_n, size n*pnp*pnp */
+
+double *e; /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm,
+ max. size n*m*mnp */
+double *eb; /* work array for storing the eb_i in the order eb_1, .. eb_n size n*pnp */
+
+double *dp; /* work array for storing the parameter vector updates db_1, ..., db_n, size n*pnp */
+
+double *wght= /* work array for storing the weights computed from the covariance inverses, max. size n*m*mnp*mnp */
+ NULL;
+
+/* Of the above arrays, jac, e, wght are sparse and
+ * V, eb, dp are dense. Sparse arrays are indexed through
+ * idxij (see below), that is with the same mechanism as the input
+ * measurements vector x
+ */
+
+/* submatrices sizes */
+int Bsz, Vsz,
+ esz, ebsz, covsz;
+
+register double *ptr1, *ptr2, *ptr3, *ptr4, sum;
+struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location
+ * of B_ij in jac, etc.
+ * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous
+ * index k and it is used to efficiently lookup the memory locations where the non-zero
+ * blocks of a sparse matrix/vector are stored
+ */
+int maxCPvis, /* max. of projections across cameras & projections across points */
+ *rcidxs, /* work array for the indexes corresponding to the nonzero elements of a single row or
+ column in a sparse matrix, size max(n, m) */
+ *rcsubs; /* work array for the subscripts of nonzero elements in a single row or column of a
+ sparse matrix, size max(n, m) */
+
+/* The following variables are needed by the LM algorithm */
+register int itno; /* iteration counter */
+int nsolved;
+/* temporary work arrays that are dynamically allocated */
+double *hx, /* \hat{x}_i, max. size m*n*mnp */
+ *diagV, /* diagonals of V_i, size n*pnp */
+ *pdp; /* p + dp, size n*pnp */
+
+register double mu, /* damping constant */
+ tmp; /* mainly used in matrix & vector multiplications */
+double p_eL2, eb_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */
+double p_L2, dp_L2=DBL_MAX, dF, dL;
+double tau=FABS(opts[0]), eps1=FABS(opts[1]), eps2=FABS(opts[2]), eps2_sq=opts[2]*opts[2],
+ eps3_sq=opts[3]*opts[3], eps4_sq=opts[4]*opts[4];
+double init_p_eL2;
+int nu=2, nu2, stop=0, nfev, njev=0, nlss=0;
+int nobs, nvars;
+PLS linsolver=NULL;
+
+struct fdj_data_x_ fdj_data;
+void *jac_adata;
+
+/* Initialization */
+ mu=eb_inf=tmp=0.0; /* -Wall */
+
+ /* block sizes */
+ Bsz=mnp * pnp; Vsz=pnp * pnp;
+ esz=mnp; ebsz=pnp;
+ covsz=mnp * mnp;
+
+ /* count total number of visible image points */
+ for(i=nvis=0, jj=n*m; i<jj; ++i)
+ nvis+=(vmask[i]!=0);
+
+ nobs=nvis*mnp;
+ nvars=n*pnp;
+ if(nobs<nvars){
+ fprintf(stderr, "SBA: sba_str_levmar_x() cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", nobs, nvars);
+ return SBA_ERROR;
+ }
+
+ /* allocate & fill up the idxij structure */
+ sba_crsm_alloc(&idxij, n, m, nvis);
+ for(i=k=0; i<n; ++i){
+ idxij.rowptr[i]=k;
+ ii=i*m;
+ for(j=0; j<m; ++j)
+ if(vmask[ii+j]){
+ idxij.val[k]=k;
+ idxij.colidx[k++]=j;
+ }
+ }
+ idxij.rowptr[n]=nvis;
+
+ /* find the maximum number of visible image points in any single camera or coming from a single 3D point */
+ /* cameras */
+ for(i=maxCPvis=0; i<n; ++i)
+ if((k=idxij.rowptr[i+1]-idxij.rowptr[i])>maxCPvis) maxCPvis=k;
+
+ /* points, note that maxCPvis is not reinitialized! */
+ for(j=0; j<m; ++j){
+ for(i=ii=0; i<n; ++i)
+ if(vmask[i*m+j]) ++ii;
+ if(ii>maxCPvis) maxCPvis=ii;
+ }
+
+ /* allocate work arrays */
+ jac=(double *)emalloc(nvis*Bsz*sizeof(double));
+ V=(double *)emalloc(n*Vsz*sizeof(double));
+ e=(double *)emalloc(nobs*sizeof(double));
+ eb=(double *)emalloc(nvars*sizeof(double));
+ dp=(double *)emalloc(nvars*sizeof(double));
+ rcidxs=(int *)emalloc(maxCPvis*sizeof(int));
+ rcsubs=(int *)emalloc(maxCPvis*sizeof(int));
+#ifndef SBA_DESTROY_COVS
+ if(covx!=NULL) wght=(double *)emalloc(nvis*covsz*sizeof(double));
+#else
+ if(covx!=NULL) wght=covx;
+#endif /* SBA_DESTROY_COVS */
+
+
+ hx=(double *)emalloc(nobs*sizeof(double));
+ diagV=(double *)emalloc(nvars*sizeof(double));
+ pdp=(double *)emalloc(nvars*sizeof(double));
+
+ /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */
+ if(!fjac){
+ fdj_data.func=func;
+ fdj_data.cnp=0;
+ fdj_data.pnp=pnp;
+ fdj_data.mnp=mnp;
+ fdj_data.hx=hx;
+ fdj_data.hxx=(double *)emalloc(nobs*sizeof(double));
+ fdj_data.func_rcidxs=(int *)emalloc(2*maxCPvis*sizeof(int));
+ fdj_data.func_rcsubs=fdj_data.func_rcidxs+maxCPvis;
+ fdj_data.adata=adata;
+
+ fjac=sba_fdjac_x;
+ jac_adata=(void *)&fdj_data;
+ }
+ else{
+ fdj_data.hxx=NULL;
+ jac_adata=adata;
+ }
+
+ if(itmax==0){ /* verify jacobian */
+ sba_str_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, ncon, pnp, mnp, adata, jac_adata);
+ retval=0;
+ goto freemem_and_return;
+ }
+
+ /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their
+ * inverses and using the resulting matrices w_x_ij to weigh B_ij and e_ij as
+ * w_x_ij*B_ij and w_x_ij*e_ij. In this way, auxiliary variables as V_i=\sum_j B_ij^T B_ij
+ * actually become \sum_j (w_x_ij B_ij)^T w_x_ij B_ij= \sum_j B_ij^T w_x_ij^T w_x_ij B_ij =
+ * B_ij^T Sigma_x_ij^-1 B_ij
+ *
+ * eb_i are weighted in a similar manner
+ */
+ if(covx!=NULL){
+ for(i=0; i<n; ++i){
+ nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */
+ for(j=0; j<nnz; ++j){
+ /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */
+ ptr1=covx + (k=idxij.val[rcidxs[j]]*covsz);
+ ptr2=wght + k;
+ if(!sba_mat_cholinv(ptr1, ptr2, mnp)){ /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */
+ fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n", i, rcsubs[j]);
+ retval=SBA_ERROR;
+ goto freemem_and_return;
+ }
+ }
+ }
+ sba_mat_cholinv(NULL, NULL, 0); /* cleanup */
+ }
+
+ /* compute the error vectors e_ij in hx */
+ (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev=1;
+ /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */
+ if(covx==NULL)
+ p_eL2=nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */
+ else
+ p_eL2=nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */
+ if(verbose) printf("initial str-SBA error %g [%g]\n", p_eL2, p_eL2/nvis);
+ init_p_eL2=p_eL2;
+ if(!SBA_FINITE(p_eL2)) stop=7;
+
+ for(itno=0; itno<itmax && !stop; ++itno){
+ /* Note that p, e and ||e||_2 have been updated at the previous iteration */
+
+ /* compute derivative submatrices B_ij */
+ (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev;
+
+ if(covx!=NULL){
+ /* compute w_x_ij B_ij.
+ * Since w_x_ij is upper triangular, the products can be safely saved
+ * directly in B_ij, without the need for intermediate storage
+ */
+ for(i=0; i<nvis; ++i){
+ /* set ptr1, ptr2 to point to w_x_ij, B_ij, resp. */
+ ptr1=wght + i*covsz;
+ ptr2=jac + i*Bsz;
+
+ /* w_x_ij is mnp x mnp, B_ij is mnp x pnp */
+ for(ii=0; ii<mnp; ++ii)
+ for(jj=0; jj<pnp; ++jj){
+ for(k=ii, sum=0.0; k<mnp; ++k) // k>=ii since w_x_ij is upper triangular
+ sum+=ptr1[ii*mnp+k]*ptr2[k*pnp+jj];
+ ptr2[ii*pnp+jj]=sum;
+ }
+ }
+ }
+
+ /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here!
+ /* V_i is symmetric, therefore its computation can be sped up by
+ * computing only the upper part and then reusing it for the lower one.
+ * Recall that B_ij is mnp x pnp
+ */
+ /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here!
+ /* Recall that e_ij is mnp x 1
+ */
+ _dblzero(V, n*Vsz); /* clear all V_i */
+ _dblzero(eb, n*ebsz); /* clear all eb_i */
+ for(i=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ ptr2=eb + i*ebsz; // set ptr2 to point to eb_i
+
+ nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */
+ for(j=0; j<nnz; ++j){
+ /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */
+ ptr3=jac + idxij.val[rcidxs[j]]*Bsz;
+
+ /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */
+ for(ii=0; ii<pnp; ++ii){
+ for(jj=ii; jj<pnp; ++jj){
+ for(k=0, sum=0.0; k<mnp; ++k)
+ sum+=ptr3[k*pnp+ii]*ptr3[k*pnp+jj];
+ ptr1[ii*pnp+jj]+=sum;
+ }
+
+ /* copy the LOWER TRIANGULAR PART of V_i from the upper one */
+ for(jj=0; jj<ii; ++jj)
+ ptr1[ii*pnp+jj]=ptr1[jj*pnp+ii];
+ }
+
+ ptr4=e + idxij.val[rcidxs[j]]*esz; /* set ptr4 to point to e_ij */
+ /* compute B_ij^T e_ij and add it to eb_i */
+ for(ii=0; ii<pnp; ++ii){
+ for(jj=0, sum=0.0; jj<mnp; ++jj)
+ sum+=ptr3[jj*pnp+ii]*ptr4[jj];
+ ptr2[ii]+=sum;
+ }
+ }
+ }
+
+ /* Compute ||J^T e||_inf and ||p||^2 */
+ for(i=0, p_L2=eb_inf=0.0; i<nvars; ++i){
+ if(eb_inf < (tmp=FABS(eb[i]))) eb_inf=tmp;
+ p_L2+=p[i]*p[i];
+ }
+ //p_L2=sqrt(p_L2);
+
+ /* save diagonal entries so that augmentation can be later canceled.
+ * Diagonal entries are in V_i
+ */
+ for(i=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
+ for(j=0; j<pnp; ++j)
+ ptr2[j]=ptr1[j*pnp+j];
+ }
+
+/*
+if(!(itno%100)){
+ printf("Current estimate: ");
+ for(i=0; i<nvars; ++i)
+ printf("%.9g ", p[i]);
+ printf("-- errors %.9g %0.9g\n", eb_inf, p_eL2);
+}
+*/
+
+ /* check for convergence */
+ if((eb_inf <= eps1)){
+ dp_L2=0.0; /* no increment for p in this case */
+ stop=1;
+ break;
+ }
+
+ /* compute initial damping factor */
+ if(itno==0){
+ for(i=ncon*pnp, tmp=DBL_MIN; i<nvars; ++i)
+ if(diagV[i]>tmp) tmp=diagV[i]; /* find max diagonal element */
+ mu=tau*tmp;
+ }
+
+ /* determine increment using adaptive damping */
+ while(1){
+ /* augment V */
+ for(i=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ for(j=0; j<pnp; ++j)
+ ptr1[j*pnp+j]+=mu;
+ }
+
+ /* solve the linear systems V*_i db_i = eb_i to compute the db_i */
+ _dblzero(dp, ncon*pnp); /* no change for the first ncon point params */
+ for(i=nsolved=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ ptr2=eb + i*ebsz; // set ptr2 to point to eb_i
+ ptr3=dp + i*pnp; // set ptr3 to point to db_i
+
+ //nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_LU;
+ nsolved+=sba_Axb_Chol(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_Chol;
+ //nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_BK;
+ //nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_QRnoQ;
+ //nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_QR;
+ //nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, pnp, 0); linsolver=sba_Axb_SVD;
+ //nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, pnp, (3*pnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0); linsolver=(PLS)sba_Axb_CG;
+
+ ++nlss;
+ }
+
+ if(nsolved==n){
+
+ /* parameter vector updates are now in dp */
+
+ /* compute p's new estimate and ||dp||^2 */
+ for(i=0, dp_L2=0.0; i<nvars; ++i){
+ pdp[i]=p[i] + (tmp=dp[i]);
+ dp_L2+=tmp*tmp;
+ }
+ //dp_L2=sqrt(dp_L2);
+
+ if(dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */
+ //if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */
+ stop=2;
+ break;
+ }
+
+ if(dp_L2>=(p_L2+eps2)/SBA_EPSILON_SQ){ /* almost singular */
+ //if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */
+ fprintf(stderr, "SBA: the matrix of the augmented normal equations is almost singular in sba_motstr_levmar_x(),\n"
+ " minimization should be restarted from the current solution with an increased damping term\n");
+ retval=SBA_ERROR;
+ goto freemem_and_return;
+ }
+
+ (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; /* evaluate function at p + dp */
+ if(verbose>1)
+ printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs));
+ /* ### compute ||e(pdp)||_2 */
+ if(covx==NULL)
+ pdp_eL2=nrmL2xmy(hx, x, hx, nobs); /* hx=x-hx, pdp_eL2=||hx|| */
+ else
+ pdp_eL2=nrmCxmy(hx, x, hx, wght, mnp, nvis); /* hx=wght*(x-hx), pdp_eL2=||hx|| */
+ if(!SBA_FINITE(pdp_eL2)){
+ if(verbose) /* identify the offending point projection */
+ sba_print_inf(hx, m, mnp, &idxij, rcidxs, rcsubs);
+
+ stop=7;
+ break;
+ }
+
+ for(i=0, dL=0.0; i<nvars; ++i)
+ dL+=dp[i]*(mu*dp[i]+eb[i]);
+
+ dF=p_eL2-pdp_eL2;
+
+ if(verbose>1)
+ printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL!=0.0? dF/dL : dF/DBL_EPSILON, p_eL2/nvis, pdp_eL2/nvis, p_eL2/pdp_eL2);
+
+ if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */
+ tmp=(2.0*dF/dL-1.0);
+ tmp=1.0-tmp*tmp*tmp;
+ mu=mu*( (tmp>=SBA_ONE_THIRD)? tmp : SBA_ONE_THIRD );
+ nu=2;
+
+ /* the test below is equivalent to the relative reduction of the RMS reprojection error: sqrt(p_eL2)-sqrt(pdp_eL2)<eps4*sqrt(p_eL2) */
+ if(pdp_eL2-2.0*sqrt(p_eL2*pdp_eL2)<(eps4_sq-1.0)*p_eL2) stop=4;
+
+ for(i=0; i<nvars; ++i) /* update p's estimate */
+ p[i]=pdp[i];
+
+ for(i=0; i<nobs; ++i) /* update e and ||e||_2 */
+ e[i]=hx[i];
+ p_eL2=pdp_eL2;
+ break;
+ }
+ } /* nsolved==n */
+
+ /* if this point is reached, either at least one linear system could not be solved or
+ * the error did not reduce; in any case, the increment must be rejected
+ */
+
+ mu*=nu;
+ nu2=nu<<1; // 2*nu;
+ if(nu2<=nu){ /* nu has wrapped around (overflown) */
+ fprintf(stderr, "SBA: too many failed attempts to increase the damping factor in sba_str_levmar_x()! Singular Hessian matrix?\n");
+ //exit(1);
+ stop=6;
+ break;
+ }
+ nu=nu2;
+
+#if 0
+ /* restore V diagonal entries */
+ for(i=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
+ for(j=0; j<pnp; ++j)
+ ptr1[j*pnp+j]=ptr2[j];
+ }
+#endif
+ } /* inner while loop */
+
+ if(p_eL2<=eps3_sq) stop=5; // error is small, force termination of outer loop
+ }
+
+ if(itno>=itmax) stop=3;
+
+ /* restore V diagonal entries */
+ for(i=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
+ for(j=0; j<pnp; ++j)
+ ptr1[j*pnp+j]=ptr2[j];
+ }
+
+ if(info){
+ info[0]=init_p_eL2;
+ info[1]=p_eL2;
+ info[2]=eb_inf;
+ info[3]=dp_L2;
+ for(i=ncon; i<n; ++i){
+ ptr1=V + i*Vsz; // set ptr1 to point to V_i
+ for(j=0; j<pnp; ++j)
+ if(tmp<ptr1[j*pnp+j]) tmp=ptr1[j*pnp+j];
+ }
+ info[4]=mu/tmp;
+ info[5]=itno;
+ info[6]=stop;
+ info[7]=nfev;
+ info[8]=njev;
+ info[9]=nlss;
+ }
+ //sba_print_sol(n, m, p, 0, pnp, x, mnp, &idxij, rcidxs, rcsubs);
+ retval=(stop!=7)? itno : SBA_ERROR;
+
+freemem_and_return: /* NOTE: this point is also reached via a goto! */
+
+ /* free whatever was allocated */
+ free(jac); free(V);
+ free(e); free(eb);
+ free(dp);
+ free(rcidxs); free(rcsubs);
+#ifndef SBA_DESTROY_COVS
+ if(wght) free(wght);
+#else
+ /* nothing to do */
+#endif /* SBA_DESTROY_COVS */
+
+ free(hx); free(diagV); free(pdp);
+ if(fdj_data.hxx){ // cleanup
+ free(fdj_data.hxx);
+ free(fdj_data.func_rcidxs);
+ }
+
+ sba_crsm_free(&idxij);
+
+ /* free the memory allocated by the linear solver routine */
+ if(linsolver) (*linsolver)(NULL, NULL, NULL, 0, 0);
+
+ return retval;
+}