// file: $isip/class/numeric/NonlinearOptimization/nonlin_05.cc // version: $Id: nonlin_05.cc 7473 2001-11-13 15:30:23Z gao $ // // isip include files // #include "NonlinearOptimization.h" // method: levMarqTemplate // // arguments: // TVector& params: (input/output) parameter vector to optimize // TIntegral& chi_square: (output) converged chi-square estimate // const TVector& x: (input) measured x data // const TVector& y: (input) measured y data // const TVector& stddev: (input) the standard deviation of the error in // measuring y(i) // bool8 (*opt_func)(TIntegral&, // TVector&, // const TIntegral, // const TVector&): (input) function whose parameters we // are optimizing // TIntegral convergence: (input) iterations end when the chi-square // estimate decreases by less than convergence // percent // // return: bool8 value indicating status // // this method implements the Levenberg-Marquardt method of // least-squares optimization of nonlinear functions. we follow closely // the derivation provided in: // // W. Press, S. Teukolsky, W. Vetterling, B. Flannery, // Numerical Recipes in C, Second Edition, Cambridge University Press, // New York, New York, USA, pp. 681-685, 1995. // template bool8 NonlinearOptimization::levMarqTemplate(TVector& params_a, TIntegral& chi_square_a, const TVector& x_a, const TVector& y_a, const TVector& stddev_a, bool8 (*opt_func_a)(TIntegral&, TVector&, const TIntegral, const TVector&), TIntegral convergence_a) { // verify the internal parameters // int32 params_length = params_a.length(); if (params_length <= 0) { return Error::handle(name(), L"levMarqTemplate", ERR_LEV_MARQ, __FILE__, __LINE__); } // verify the function pointer // if (opt_func_a == NULL) { return Error::handle(name(), L"levMarqTemplate", ERR_LEV_MARQ, __FILE__, __LINE__); } // check vector lengths // int32 length = x_a.length(); if ((y_a.length() != length) || (stddev_a.length() != length)) { return Error::handle(name(), L"levMarqTemplate", ERR_LEV_MARQ, __FILE__, __LINE__); } // if no data to be optimized, just return // if (length == 0) { return true; } // setup the internal data sizes // TMatrix alpha(params_length, params_length, Integral::SYMMETRIC); TVector beta(params_length); // set the initial lambda parameter: // lambda is the deviation parameter that tells the algorithm how // much to adjust the parameters on each iteration. As the Chi // square errors decrease, the lambda value is decreased so the // gradient descent does not jump around the minimum. If the Chi // square error is very large, the lambda value is increased so // that the estimates take a large jump on the next iteration. // TIntegral lambda = LEVMARQ_INIT_LAMBDA; // precompute the inverse variance // TVector inv_variance(length); inv_variance.square(stddev_a); inv_variance.inverse(); // store the current parameter set // TVector tmp_params(params_a); // run the initial iteration to get initial chi-square measurements // step 1 on pp. 684 // if (!levMarqChiSquare(chi_square_a, alpha, beta, x_a, y_a, inv_variance, tmp_params, opt_func_a)) { return Error::handle(name(), L"levMarqTemplate", ERR_LEV_MARQ, __FILE__, __LINE__); } // copies of alpha and beta // TMatrix tmp_alpha(alpha); TVector tmp_beta(beta); // loop until convergence // TIntegral new_chi_square = chi_square_a; bool8 converged = false; bool8 is_plateau = false; while (!converged) { // modify the alpha matrix (eq. 15.5.13) // scaleDiagonal(alpha, (1 + lambda)); // solve the set of linear equations (eq. 15.5.14) for a candidate // shift in the parameters. after this operation, tmp_params // contains the candidate shift. step 3 on pp. 684 // LinearAlgebra::linearSolve(tmp_params, alpha, beta); // test the candidate shift // tmp_params.add(params_a); if (!levMarqChiSquare(new_chi_square, alpha, beta, x_a, y_a, inv_variance, tmp_params, opt_func_a)) { return Error::handle(name(), L"levMarqTemplate", ERR_LEV_MARQ, __FILE__, __LINE__); } // see if the chi-square measurement decreased. steps 4 and 5 pp. 684 // if (new_chi_square > chi_square_a) { // reset the parameters // alpha.assign(tmp_alpha); beta.assign(tmp_beta); tmp_params.assign(params_a); // increase lambda to cause a steeper descent and reset the // test parameters // lambda *= 10.0; } else { // accept the current solution and update the convergence values // lambda *= 0.1; params_a.assign(tmp_params); // set the new alpha and beta // tmp_alpha.assign(alpha); tmp_beta.assign(beta); // check for equality. if we are exactly equal then we'll give another // chance to converge. // if (chi_square_a == new_chi_square) { if (is_plateau) { converged = true; } is_plateau = true; } // check the percent difference for convergence // else { is_plateau = false; if (Integral::almostEqual(chi_square_a, new_chi_square, convergence_a)) { converged = true; } } // update the chi-square estimate // chi_square_a = new_chi_square; } } return true; } // explicit instantiations // template bool8 NonlinearOptimization::levMarqTemplate(VectorFloat&, float32& chi_square_a, const VectorFloat& x_a, const VectorFloat& y_a, const VectorFloat& stddev_a, bool8 (*opt_func_a)(float32&, VectorFloat&, const float32, const VectorFloat&), float32 convergence_a); template bool8 NonlinearOptimization::levMarqTemplate(VectorDouble&, float64& chi_square_a, const VectorDouble& x_a, const VectorDouble& y_a, const VectorDouble& stddev_a, bool8 (*opt_func_a)(float64&, VectorDouble&, const float64, const VectorDouble&), float64 convergence_a); // method: levMarqChiSquare // // arguments: // TIntegral& chi_square: (output) the error estimate of the input data // given the model parameters defined by: // chi_square = { y(i) - y_est(x(i); params) } ^ 2 // sum ------------------------------------ // i stddev(i)^2 // TMatrix& alpha: (output) is a modification of the Hessian (2nd // derivative) matrix with respect to the parameters // being optimized as shown on page 684 of the reference. // alpha(k,l) is the 2nd derivative of the Chi square // merit function w.r.t (params(k) * params(l)) // TVector& beta: (output) gradient of the Chi square merit function w.r.t // params(k). As the estimation converges, // beta(k) should approach 0.0 // const TVector& x: (input) measured x data // const TVector& y: (input) measured y data // const TVector& inv_variance: (input) 1 / (stddev^2) // const TVector& params: (input) test parameters // bool8 (*opt_func)(TIntegral&, // TVector&, // const TIntegral, // const TVector&): (input) function whose parameters we // are optimizing // // return: bool8 value indicating status // // this method measures the chi-square merit of a set of parameters as a // parametric fit of the input (x,y) data to a functional form (opt_func) // according to the derivation provided in: // // W. Press, S. Teukolsky, W. Vetterling, B. Flannery, // Numerical Recipes in C, Second Edition, Cambridge University Press, // New York, New York, USA, pp. 681-685, 1995. // template bool8 NonlinearOptimization::levMarqChiSquare(TIntegral& chi_square_a, TMatrix& alpha_a, TVector& beta_a, const TVector& x_a, const TVector& y_a, const TVector& inv_variance_a, const TVector& params_a, bool8 (*opt_func_a)(TIntegral&, TVector&, const TIntegral, const TVector&) ) { // check the internal data // if (opt_func_a == NULL) { return Error::handle(name(), L"levMarqChiSquare", ERR_LEV_MARQ, __FILE__, __LINE__); } // declare local variables // int32 num_params = params_a.length(); int32 num_points = x_a.length(); TIntegral y_tmp = 0; TVector y_diff(num_points); TVector derivatives(num_params); // initialize outputs // chi_square_a = 0; alpha_a.changeType(Integral::SYMMETRIC); alpha_a.setDimensions(num_params, num_params); alpha_a.assign(0.0); beta_a.setLength(num_params); beta_a.assign(0.0); // loop over all data points and accumulate the chi-squared statistic // a side effect of this is updating the alpha and beta parameters // for (int32 i = 0; i < num_points; i++) { // call the user-supplied function and compute the sample error // (*opt_func_a)(y_tmp, derivatives, (TIntegral)x_a(i), params_a); y_diff(i) = y_a(i) - y_tmp; // update the alpha values (eq. 15.5.11) and beta values (eq. 15.5.6 and // eq. 15.5.8). note that alpha_d is symmetric so only alpha_d(k,l), // k <= l, need be computed // for (int32 k = 0; k < num_params; k++) { beta_a(k) += y_diff(i) * derivatives(k) * inv_variance_a(i); for (int32 l = 0; l <= k; l++) { alpha_a.setValue(k, l, alpha_a.getValue(k, l) + (derivatives(k) * derivatives(l) * inv_variance_a(i))); } } } // update the chi square estimate // y_diff.square(); y_diff.mult(inv_variance_a); chi_square_a = y_diff.sum(); // exit gracefully // return true; } // explicit instantiations // template bool8 NonlinearOptimization::levMarqChiSquare(float32& chi_square_a, MatrixFloat& alpha_a, VectorFloat& beta_a, const VectorFloat& x_a, const VectorFloat& y_a, const VectorFloat& inv_variance_a, const VectorFloat& params_a, bool8 (*)(float32&, VectorFloat&, const float32, const VectorFloat&)); template bool8 NonlinearOptimization::levMarqChiSquare(float64& chi_square_a, MatrixDouble& alpha_a, VectorDouble& beta_a, const VectorDouble& x_a, const VectorDouble& y_a, const VectorDouble& inv_variance_a, const VectorDouble& params_a, bool8 (*opt_func_a)(float64&, VectorDouble&, const float64, const VectorDouble&)); // method: scaleDiagonal // // arguments: // TMatrix& mat: (input/output) matrix whose diagonal we want to scale // const TIntegral& scale: (input) value to scale the diagonal by // // return: bool8 value indicating status // // this method scales each element of the matrix diagonal by the // scale value // template bool8 NonlinearOptimization::scaleDiagonal(TMatrix& mat_a, const TIntegral& scale_a) { // multiply each element of the diagonal of the matrix by // the scale // int32 dim = mat_a.getNumRows(); for (int32 i = 0; i < dim; i++) { mat_a.setValue(i, i, (mat_a(i, i) * scale_a)); } // exit gracefully // return true; } // explicit instantiations // template bool8 NonlinearOptimization::scaleDiagonal(MatrixFloat&, const float32&); template bool8 NonlinearOptimization::scaleDiagonal(MatrixDouble&, const float64&);