Added Gauss-Newton least squares solver.

2023-04-01 12:07:41 +02:00 · 2023-04-01 12:07:41 +02:00 · 528f7900bf
commit 528f7900bf
parent d8ffac626f
4 changed files with 165 additions and 6 deletions
--- a/README.md
+++ b/README.md
@ -45,7 +45,7 @@ Here is a list of the univariate gradient-based solvers implemented in the libra

 ##### Derivative-free solvers

-Derivative-free solvers, as they name suggest, do not require the derivatives of the function in order to solve it. They are usually more robust, but suffer from a lower convergence rate. They are howeve very useful when the function to solve does not have a closed-form derivative, or when it is too noisy.
+Derivative-free solvers, as they name suggest, do not require the derivatives of the function in order to solve it. They are usually more robust, but suffer from a lower convergence rate. They are however very useful when the function to solve does not have a closed-form derivative, or when it is too noisy.

 Here is a list of the univariate derivative-free solvers implemented in the library :
 - Bisection method (`bisection_solve`)
@ -69,11 +69,11 @@ Gradient-based optimizers require the gradient of the function in order to speed

 ##### Derivative-free optimizers

-Derivative-free optimizers do not require the gradient of the function and are generally more robust to noisy objective functions. Some optimizers are local and other are global. Local optimizers tend to converge to the minimum that is closest to the given starting point. Global minimizers are capable of exploring the solution space more extensively and can sometimes return the global minimum of the objective function. Some algorithms are also multimodal : they can return several distinct minima after a single run (particle swarm, differential evolution).
+Derivative-free optimizers do not require the gradient of the function and are generally more robust to noisy objective functions. Some optimizers are local and other are global. Local optimizers tend to converge to the minimum that is closest to the given starting point. Global minimizers are capable of exploring the solution space more extensively and can sometimes return the global minimum of the objective function.

-Here is a list of univariate derivative-free optimizers :
+Here is a list of univariate derivative-free optimizers implemented in the library :
 - Golden section search (`golden_section_minimize`)
- Cubic Lagrange polynomial optimization (`cubic_lagrange_minimize`)
+- ~~Cubic Lagrange polynomial optimization (`cubic_lagrange_minimize`)~~

 #### Multivariate optimizers

@ -87,5 +87,14 @@ Gradient-based optimizers require the gradient of the function in order to speed

 Derivative-free optimizers do not require the gradient of the function and are generally more robust to noisy objective functions.

-Here is a list of multivariate derivative-free optimizers :
- Nelder-Mead (`nelder_mead`)
+Here is a list of multivariate derivative-free optimizers implemented in the library :
+- Nelder-Mead (`nelder_mead`)
+- ~~Particle Swarm Optimization (`particle_swarm_minimize`)~~
+- ~~Differential evolution (`differential_evolution_minimize`)~~
+
+#### Least-squares solvers
+
+The following non-linear least-squares problem can be solved more efficiently using specialised techniques than generic optimizers :$$ \min_{\beta} \sum_{i=0}^{N} (f(x_i, \beta) - y_i)^2 $$
+
+Here is a list of multivariate non-linear least-squares solvers implemented in the library :
+- Gauss-Newton (`gauss_newton_lsqr`)
--- a/src/main.rs
+++ b/src/main.rs
@ -4,6 +4,8 @@ extern crate nalgebra as na;
 mod univariate_solvers;
 mod univariate_minimizers;
 mod nelder_mead;
+mod particle_swarm_optimization;
+mod non_linear_least_squares;

 use colored::Colorize;

@ -81,6 +83,26 @@ fn check_result_optim(x: &na::DVector<f64>, f_x: f64, x_true: &na::DVector<f64>,
    }
 }

+fn check_result_vector(x : &na::DVector<f64>, x_true : &na::DVector<f64>, tol : f64, test_name: &str, verbose: bool) -> u32 {
+    let diff : f64 = (x - x_true).norm();
+    let test_name_padded: String = format!("{:<30}", test_name);
+    if diff < tol {
+        if verbose {
+            println!("{}\t: x = {}\t{}", test_name_padded, format!("{:<20}", x), "passed".green());
+        } else {
+            println!("{} {}", test_name_padded, "passed".green());
+        }
+        return 1;
+    } else {
+        if verbose {
+            println!("{}\t: x = {}\t{} (expected {}, delta = {})", test_name_padded, format!("{:<20}", x), "failed".red(), x_true, (x - x_true));
+        } else {
+            println!("{} {} : expected {}, got {}", test_name_padded, "failed".red(), x_true, x);
+        }
+        return 0;
+    }
+}
+
 fn print_test_results(num_tests_passed: u32, num_tests_total: u32) {
    let ratio_str:String = format!("{}/{} ({} %)", num_tests_passed, num_tests_total, ((num_tests_passed as f64)/(num_tests_total as f64)*100.0).round());
    if num_tests_passed == num_tests_total {
@ -152,10 +174,51 @@ fn test_multivariate_optimizers(verbose: bool) {
    print_test_results(num_tests_passed, num_tests_total);
 }

+fn test_particle_swarm_debug() {
+    let n_particles: u32 = 10;
+    let lb: na::DVector<f64> = na::DVector::from_vec(vec![-5.0,-5.0]);
+    let ub: na::DVector<f64> = na::DVector::from_vec(vec![5.0,5.0]);
+    let tol: f64 = 1e-6;
+    let n_iter_max: u32 = 1000;
+    let rng_seed: u32 = 69;
+    let x: f64 = particle_swarm_optimization::particle_swarm_minimize(&rosenbrock, n_particles, &lb, &ub, tol, n_iter_max, rng_seed);
+    println!("x = {}", x);
+}
+
+fn test_non_linear_lsqr_solvers(verbose: bool) {
+    let mut num_tests_passed : u32 = 0;
+    let num_tests_total :      u32 = 1;
+
+    let tol:        f64 = 1e-6;
+    let dx_num:     f64 = 1e-7;
+    let n_iter_max: u32 = 1000;
+
+    let xp: na::DVector<f64> = na::DVector::from_vec(vec![0.038, 0.194, 0.425, 0.626, 1.253, 2.500, 3.740]);
+    let yp: na::DVector<f64> = na::DVector::from_vec(vec![0.050, 0.127, 0.094, 0.2122, 0.2729, 0.2665, 0.3317]);
+
+    fn fct_lsqr(x: &na::DVector<f64>, beta: &na::DVector<f64>) -> na::DVector<f64> {
+        let mut y: na::DVector<f64> = na::DVector::from_element(x.nrows(), 0.0);
+        for i in 0..x.nrows() {
+            y[i] = (beta[0] * x[i])/(beta[1] + x[i]);
+        }
+        return y;
+    }
+
+    let beta_numpy: na::DVector<f64> = na::DVector::from_vec(vec![0.3618368601272124, 0.5562663893098662]);
+    let mut beta_gauss_newton: na::DVector<f64> = na::DVector::from_vec(vec![0.9, 0.2]);
+    beta_gauss_newton = non_linear_least_squares::gauss_newton_lsqr(&xp, &yp, &fct_lsqr, &beta_gauss_newton, tol, n_iter_max, dx_num, false);
+    // println!("beta_gauss_newton = {}\tf(beta_gauss_newton) - yp = {}", beta_gauss_newton, fct_lsqr(&xp, &beta_gauss_newton) - yp);
+
+    num_tests_passed += check_result_vector(&beta_gauss_newton, &beta_numpy, tol, "Gauss-Newton least squares", verbose);
+    print_test_results(num_tests_passed, num_tests_total);
+}
+
 fn main() {
    println!("Testing Rust numerical solvers.");
    let verbose : bool = true;
    test_univariate_solvers(verbose);
    test_univariate_optimizers(verbose);
    test_multivariate_optimizers(verbose);
+    // test_particle_swarm_debug();// debug
+    test_non_linear_lsqr_solvers(verbose);
 }
--- a/src/non_linear_least_squares.rs
+++ b/src/non_linear_least_squares.rs
@ -0,0 +1,47 @@
+extern crate nalgebra as na;
+
+/// Gauss-Newton algorithm to solve a non-linear least squares problem. It minimizes the difference between fct_lsqr(xp, beta) and the data (xp, yp)
+/// @param xp: vector of x values of the data points
+/// @param yp: vector of y values of the data points
+/// @param fct_lsqr: function that computes the least squares function. It takes as input the parameters and the data points and returns the model for the data fit : .
+pub fn gauss_newton_lsqr<F: Fn(&na::DVector<f64>, &na::DVector<f64>) -> na::DVector<f64>>(xp: &na::DVector<f64>, yp: &na::DVector<f64>, fct_lsqr: &F, beta0: &na::DVector<f64>, tol: f64, n_iter_max: u32, dx_num: f64, verbose: bool) -> na::DVector<f64> {
+    let mut beta: na::DVector<f64> = beta0.clone();
+
+    let n_pts:  usize = xp.len();
+    let n_dims: usize = beta.len();
+
+    for iter in 0..n_iter_max {
+        let residuals = yp - fct_lsqr(xp, &beta);// Residual vector
+
+        // Compute the Jacobian
+        let mut jac: na::DMatrix<f64> = na::DMatrix::zeros(n_pts, n_dims);
+
+        let f_beta: na::DVector<f64> = fct_lsqr(&xp, &beta);
+        for j in 0..n_dims {
+            let mut beta_dx: na::DVector<f64> = beta.clone();
+            beta_dx[j] += dx_num;
+            let jac_col = (fct_lsqr(&xp, &beta_dx) - &f_beta) / dx_num;
+            for i in 0..n_pts {
+                jac[(i, j)] = jac_col[i];
+            }
+        }
+
+        // Compute the Gauss-Newton step
+        let jac_t = jac.transpose();// J^T
+        let jac_t_jac = &jac_t*jac;// J^T*J
+        let jac_t_res = - &jac_t*&residuals;// J^T*residuals
+        let delta_beta = jac_t_jac.qr().solve(&jac_t_res).unwrap();// (J^T*J)^{-1}*J^T*residuals
+        
+        if verbose {
+            println!("iter = {}\tbeta = {}\tresiduals = {}\tdelta_beta = {}", iter, &beta, &residuals, &delta_beta);
+        }
+
+        beta = &beta - &delta_beta;
+
+        if delta_beta.norm() < tol {
+            break;
+        }
+    }
+
+    return beta;
+}
--- a/src/particle_swarm_optimization.rs
+++ b/src/particle_swarm_optimization.rs
@ -0,0 +1,40 @@
+extern crate nalgebra as na;
+
+#[path = "./xorwow.rs"]
+mod xorwow;
+
+use xorwow::Xorwow;
+
+/// A particle in the particle swarm optimization algorithm
+struct Particle {
+    x: na::DVector<f64>,// position
+    v: na::DVector<f64>,// velocity
+    fx: f64,            // function value at x
+}
+
+impl Particle {
+    fn new(x: na::DVector<f64>, v: na::DVector<f64>, fx: f64) -> Particle {
+        Particle {
+            x: x,
+            v: v,
+            fx: fx,
+        }
+    }
+}
+
+pub fn particle_swarm_minimize<F: Fn(&na::DVector<f64>) -> f64>(f: F, n_particles: u32, lb: &na::DVector<f64>, ub: &na::DVector<f64>, tol: f64, n_iter_max: u32, rng_seed: u32) -> f64 {
+    let mut particles: Vec<Particle> = Vec::new();
+
+    let mut rng = Xorwow::new(rng_seed);
+
+    for _ in 0..n_particles {
+        let x = lb + (ub - lb) * rng.rand_vec(lb.len());
+        let v = (lb - ub) * rng.rand_vec(lb.len());
+        let fx = f(&x);
+        println!("x = {}\tv = {}\tf(x) = {}", &x, &v, fx);// DEBUG
+        particles.push(Particle::new(x, v, fx));
+    }
+
+    let x = (lb + ub) / 2.0;
+    return f(&x);
+}