Commented code

2594c6f4 · Andrey Filippov · 09a0cb0e · 2594c6f4
Commit 2594c6f4 authored Mar 23, 2025 by Andrey Filippov
Show whitespace changes
Inline Side-by-side

Showing with 176 additions and 37 deletions

CholeskyBlock.java src/main/java/com/elphel/imagej/common/CholeskyBlock.java +176 -37

No files found.
--- a/src/main/java/com/elphel/imagej/common/CholeskyBlock.java
+++ b/src/main/java/com/elphel/imagej/common/CholeskyBlock.java
@@ -4,6 +4,22 @@ package com.elphel.imagej.common;
 **
 ** Copyright (C) 2025 Elphel, Inc.
 **
+** Using publication with Block Cholesky description:
+** Chen, Jianping, et al. "Block algorithm and its implementation for Cholesky factorization."
+** ICCGI 2013 (2013): 245.
+**
+** Data (A and L) are stored as 1D arrays, in columns of specified width (m), in line-scan
+** order in each column (right, then down), columns themselves - to the right.
+** This makes m x m "tiles" compact in terms of cache, for my computer with the 8-core processor
+** and N=2258 matrices the optimal m=70. Multithreaded methods operate on tiles.
+** When matrix size is not multiple of m, the last (bottom-right) block has a remainder size,
+** other bottom and rightmost are rectangular, all other are m x m.
+**
+** For the tested processor and matrix size the multithreaded improvement is ~9x, for the
+** solve() method ~5x compared to a standard Jama single-threaded   CholeskyDecomposition class.
+** First run in multithreaded mode results is longer execution time than the subsequent runs. 
+** End of this file contains measured timing.  
+**
 ** -----------------------------------------------------------------------------**
 **
 **  CholeskyBlock.java is free software: you can redistribute it and/or modify
@@ -23,13 +39,14 @@ package com.elphel.imagej.common;
 import java.util.Arrays;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
-import com.elphel.imagej.tileprocessor.ImageDtt;
 import Jama.Matrix;
 public class CholeskyBlock {
+	// These 3 parameters are public and may be set before using the constructor
 	public static int dflt_m =            70; // tile size if not specified during initialization
 	public static int dflt_threads =     100; // maximal number of threads to use for decomposition
 	public static int dflt_solve_threads =16; //  4; // maximal number of threads to use for solve()
 	public static boolean debug = false; // true;
 	private final int m;  // tile size
 	private final int np; // number of elements in row/col
@@ -39,7 +56,10 @@ public class CholeskyBlock {
 	private final double [] A;
 	private final double [] L;
 	private final double [] B;
-//wjtjlambda_copy1.getArray()
+	/**
+	 * Create Cholesky factorization from SPD matrix mA
+	 * @param mA - SPD Matrix instance
+	 */
 	public CholeskyBlock (Matrix mA) {
 		m = dflt_m;
 		decomp_threads = dflt_threads;
@@ -53,6 +73,11 @@ public class CholeskyBlock {
 		choleskyBlockMulti(); 
 	}
+	/**
+	 * Create Cholesky factorization from 2D square array representing SPD matrix
+	 * @param A_in - 2D array corresponding to SPD
+	 */
 	public CholeskyBlock (
 			double [][] A_in) {
 		m = dflt_m;
@@ -67,6 +92,11 @@ public class CholeskyBlock {
 		choleskyBlockMulti(); 
 	}
+	/**
+	 * Create Cholesky factorization from 2D square array representing SPD matrix
+	 * @param A_in - 2D array corresponding to SPD
+	 * @param size block size (tested optimal 70 easier debugging with 100)
+	 */
 	public CholeskyBlock (
 			double [][] A_in,
 			int size) {
@@ -82,6 +112,12 @@ public class CholeskyBlock {
 		choleskyBlockMulti(); 
 	}
+	/**
+	 * Create Cholesky factorization from 2D square array representing SPD matrix
+	 * @param A_in - 2D array corresponding to SPD
+	 * @param size block size (tested optimal 70 easier debugging with 100)
+	 * @param decomp_threads specify number of decomposition threads
+	 */
 	public CholeskyBlock (
 			double [][] A_in,
 			int size,
@@ -98,6 +134,11 @@ public class CholeskyBlock {
 		choleskyBlockMulti(); 
 	}
+	/**
+	 * Set the internal data Array A stored as 1D, linescan in columns from a square SPD array 
+	 * @param A_in square SPD array (only lower triangle with the diagonal are used)
+	 */
 	private void setup_ATriangle(double [][] A_in) {
 		for (int tile_row = 0; tile_row < nf; tile_row++) {
 			for (int tile_col = 0; tile_col < tile_row; tile_col++) {
@@ -150,21 +191,19 @@ public class CholeskyBlock {
 		}
 	}
+	/**
+	 * Get L lower triangular matrix of Cholesky facterization from the internal column-line-scan 
+	 * representation. 
+	 * @return lower triangular Cholesky Matrix
+	 */
 	public Matrix getL() {
 		return new Matrix(get_LTriangle(),np,np);
 	}
-	public void setB(Matrix mb) {
+	/**
-		double [][] a = mb.getArray();
+	 * Get internal Cholesky lower-left matrix as a 2d square array
-		for (int i = 0; i < np; i++) {
+	 * @return 2d square array containing Cholesky low-left matrix data
-			B[i] = a[i][0];
+	 */
-		}
-	}
-	public Matrix getX() {
-		return new Matrix (B, np);
-	}
 	private double [][] get_LTriangle() {
 		double [][] L_out = new double[np][np];
 		for (int tile_row = 0; tile_row < nf; tile_row++) {
@@ -217,7 +256,27 @@ public class CholeskyBlock {
 			}
 		}
 		return L_out;
+	}
+	/**
+	 * Set internal B/Y/X single-column matrix. Internal representation changes from B to Y to X (solution)
+	 * @param mb right-side single-column Matrix (not modified)
+	 */
+	public void setB(Matrix mb) {
+		double [][] a = mb.getArray();
+		for (int i = 0; i < np; i++) {
+			B[i] = a[i][0];
+		}
+	}
+	/**
+	 * Get internal B/Y/X single-column matrix. Internal representation changes from B to Y to X (solution)
+	 * @return a single-column Matrix containing the solution
+	 */
+	public Matrix getX() {
+		return new Matrix (B, np);
 	}
 	/**
@@ -230,6 +289,11 @@ public class CholeskyBlock {
 		return j * (m * np) + ((j >= nf) ? (np-nf*m): m) * m * i;
 	}
+	/**
+	 * Calculate non-diagonal tiles of the block-Cholesky factorization 
+	 * @param i - tile row
+	 * @param j - tile column < i (tile row)
+	 */
 	private void setL21(
 			int       i,   // i > j,
 			int       j) { // j <nf
@@ -251,6 +315,13 @@ public class CholeskyBlock {
 		return;
 	}
+	/**
+	 * Convert remaining tiles of A-matrix (only in lower-left triangle of tiles, including diagonal ones)
+	 * @param diag row/column of the diagonal tile that defines the remainder of the A matrix (to the right
+	 *        and below the diagonal tile
+	 * @param row row of the transformed A tile (> diag)
+	 * @param col column of the transformed A tile (> diag, <= row)
+	 */
 	private void setA22(
 			int diag,// < col,  < row
 			int row, // >= col
@@ -279,8 +350,11 @@ public class CholeskyBlock {
 		return;
 	}
+	/**
+	 * Run multithreaded Cholosky factorization, convert internal representation of the A-data to L-data
+	 */
 	private void choleskyBlockMulti() {
-		final Thread[] threads =       ImageDtt.newThreadArray(decomp_threads); // 1);
+		final Thread[] threads =       newThreadArray(decomp_threads); // 1);
 		final AtomicInteger ai =       new AtomicInteger(0);
 		cholesky_single(0);
 		// Calculate first column under diagonal (L21) - maybe use m
@@ -294,8 +368,7 @@ public class CholeskyBlock {
 				}
 			};
 		}		      
-		ImageDtt.startAndJoin(threads);
+		startAndJoin(threads);
 		for (int tile_diag = 1; tile_diag < n; tile_diag++) {
 			final int ftile_diag = tile_diag;
 			// Calculate A in one tile column of the remaining A2'
@@ -320,7 +393,7 @@ public class CholeskyBlock {
 					}
 				};
 			}		      
-			ImageDtt.startAndJoin(threads);
+			startAndJoin(threads);
 			if (ftile_diag < (n-1)) {
 				// Now in parallel calculate L in column ftile_diag under diagonal and
 				// finish A2' to the right of ftile_diag column
@@ -354,15 +427,18 @@ public class CholeskyBlock {
 						}
 					};
 				}		      
-				ImageDtt.startAndJoin(threads);
+				startAndJoin(threads);
 			}
 		}
 		return;
 	}
 	// Single-threaded, used for a single-tile Cholesky deconstruction
+	/**
+	 * A single-threaded, single-tile Cholesky factorization converting diagonal A-tile to
+	 * the corresponding diagonal L-tile 
+	 * @param diag row/column of the tile to convert.
+	 */
 	private void cholesky_single(int diag) {
 		int h = (diag < nf) ? m : (np-nf*m);			
 		int indx = indx_IJ(diag, diag);
@@ -388,14 +464,14 @@ public class CholeskyBlock {
 	/**
-	 * B has data, will be modified
+	 * Solve L * Y = B for a single-tile data. B has data, will be modified
+	 * to contain Y
 	 * @param diag number of the diagonal tile (last may be smaller)
 	 */
 	private void solveY(int diag) {
 		int h = (diag < nf) ? m : (np-nf*m);			
 		int lindx = indx_IJ(diag, diag);
 		int xindx = diag * m; // start of B/x
-		// Solve L*Y = B;
 		for (int k = 0; k < h; k++) {
 			double x = B[xindx+k];
 			for (int i = 0; i < k ; i++) {
@@ -407,9 +483,9 @@ public class CholeskyBlock {
 	}
 	/**
-	 * Solve single-tile Lt*X=Y, tiles are counted from the right-bottom
+	 * Solve single-tile Lt*X=Y
-	 * B has data, will be modified
+	 * B has data (Y), will be modified to contain X
-	 * @param diag number of the diagonal tile from the bottom-right, 
+	 * @param diag number of the diagonal tile 
 	 *        last may be smaller.
 	 */
 	private void solveX(int diag) {
@@ -429,7 +505,7 @@ public class CholeskyBlock {
 	/**
 	 * Subtract Y-column from running B using Y-data in tile j
-	 * from the tile i > j  
+	 * from the tile i > j.  
 	 * @param i tile row to subtract from in B
 	 * @param j tile row corresponding to diagonal L tiles
 	 */
@@ -452,15 +528,15 @@ public class CholeskyBlock {
 	/**
 	 * Subtract X-column from running B using X-data in tile j
-	 * from the tile i > j, counting from the right-bottom  
+	 * from the tile i > j.  
-	 * @param i tile row (from the bottom) to subtract from in B
+	 * @param i tile row to subtract from in B
-	 * @param j tile row corresponding to diagonal L tiles, from the bottom
+	 * @param j tile row corresponding to diagonal L tiles
 	 */
 	private void subXCol(
 			int       i,   // i < j,
 			int       j) { // j > i
-		int lindx = indx_IJ(j,i); // transposed
+		int lindx = indx_IJ(j,i);
 		int xsrc = j * m; // start of B/x to use
 		int xdst = i * m; // start of B/x to modify
 		int h = (j < nf) ? m : (np-nf*m);
@@ -473,20 +549,32 @@ public class CholeskyBlock {
 		}
 		return;
 	}
+	/**
+	 * Solve with existing Cholesky decomposition for a single-column Matrix b
+	 * @param b a single-column matrix B that L * Lt * X = B
+	 * @return A single-column solution X
+	 */
 	public Matrix solve (Matrix b) {
 		setB(b);
 		solve();
 		return getX();
 	}
+	/**
+	 * Solve with existing Cholesky decomposition for an internal representation of matrix B,
+	 * replace B data with X data
+	 */
 	public void solve() {
 		solve(dflt_solve_threads); // this.solve_threads);
 	}
+	/**
+	 * Solve with existing Cholesky decomposition for an internal representation of matrix B,
+	 * replace B data with X data, limit number of threads to use.
+	 * @param solve_threads maximal number of threads to use
+	 */
 	public void solve(int solve_threads) {
-		final Thread[] threads =             ImageDtt.newThreadArray(solve_threads);
+		final Thread[] threads =             newThreadArray(solve_threads);
 		final AtomicInteger ai =             new AtomicInteger(0);
 		AtomicBoolean [][] tile_started =    new AtomicBoolean[n][n]; 
 		AtomicBoolean [][] tile_done =       new AtomicBoolean[n][n];
@@ -554,7 +642,7 @@ public class CholeskyBlock {
 				}
 			};
 		}		      
-		ImageDtt.startAndJoin(threads);
+		startAndJoin(threads);
 		// Solve L'X = Y
 		rows_done.set(0);// this number of tile rows solved
 		for (int row = 0; row < n; row++) {
@@ -611,7 +699,7 @@ public class CholeskyBlock {
 				}
 			};
 		}		      
-		ImageDtt.startAndJoin(threads);
+		startAndJoin(threads);
 		starts[4] = (((double) System.nanoTime()) * 1E-9) - start_time;
 		if (debug) {
 			System.out.println("\ncholeskyBlock.solve(): ==== number of threads = "+threads.length+ "====");
@@ -624,6 +712,12 @@ public class CholeskyBlock {
 	}
 	// for comparison/verification
+	/**
+	 * Standard single-threaded solution with Cholesky decomposition from Jama used for comparison.
+	 * @param B A single-column Matrix for the right side of the equation A * X = L * Lt * X = B
+	 * @param L Cholesky decomposition lower-left Matrix
+	 * @return a single-column solution X.
+	 */
 	public static Matrix solve_single (Matrix B, double [][] L) {
 		int n = L.length;
 		if (B.getRowDimension() != n) {
@@ -647,6 +741,51 @@ public class CholeskyBlock {
 		}
 		return new Matrix(x,n);
 	}
+	/*
+	 * Multithreading methods from Stephan Preibisch's Multithreading.java class. See:
+	 * http://repo.or.cz/w/trakem2.git?a=blob;f=mpi/fruitfly/general/MultiThreading.java;hb=HEAD
+	 */
+	/**
+	 * Create a Thread[] array as large as the number of processors available.
+	 * @return an array of threads 
+	 */
+	public static Thread[] newThreadArray() {
+		return newThreadArray(100);
+	}
+	/**
+	 * Create a Thread[] array as large as the number of processors available.
+	 * @param maxCPUs limit number of threads
+	 * @return an array of threads
+	 */
+	public static Thread[] newThreadArray(int maxCPUs) { // USED in lwir
+		int n_cpus = Runtime.getRuntime().availableProcessors();
+		if (n_cpus>maxCPUs)n_cpus=maxCPUs;
+		return new Thread[n_cpus];
+	}
+	/**
+	 * Start all given threads and wait on each of them until all are done.
+	 * @param threads to start
+	 */
+	public static void startAndJoin(Thread[] threads) // USED in lwir
+	{
+		for (int ithread = 0; ithread < threads.length; ++ithread)
+		{
+			threads[ithread].setPriority(Thread.NORM_PRIORITY);
+			threads[ithread].start();
+		}
+		try
+		{
+			for (int ithread = 0; ithread < threads.length; ++ithread)
+				threads[ithread].join();
+		} catch (InterruptedException ie)
+		{
+			throw new RuntimeException(ie);
+		}
+	}
 	/* Results
    First run, next MULTITHREADED ones are faster. Ran on an 8-core x86, 16 threads
 	choleskyBlockMulti() last pass n= 33, tile_diag=32