Commit 2594c6f4 authored by Andrey Filippov's avatar Andrey Filippov

Commented code

parent 09a0cb0e
...@@ -4,6 +4,22 @@ package com.elphel.imagej.common; ...@@ -4,6 +4,22 @@ package com.elphel.imagej.common;
** **
** Copyright (C) 2025 Elphel, Inc. ** Copyright (C) 2025 Elphel, Inc.
** **
** Using publication with Block Cholesky description:
** Chen, Jianping, et al. "Block algorithm and its implementation for Cholesky factorization."
** ICCGI 2013 (2013): 245.
**
** Data (A and L) are stored as 1D arrays, in columns of specified width (m), in line-scan
** order in each column (right, then down), columns themselves - to the right.
** This makes m x m "tiles" compact in terms of cache, for my computer with the 8-core processor
** and N=2258 matrices the optimal m=70. Multithreaded methods operate on tiles.
** When matrix size is not multiple of m, the last (bottom-right) block has a remainder size,
** other bottom and rightmost are rectangular, all other are m x m.
**
** For the tested processor and matrix size the multithreaded improvement is ~9x, for the
** solve() method ~5x compared to a standard Jama single-threaded CholeskyDecomposition class.
** First run in multithreaded mode results is longer execution time than the subsequent runs.
** End of this file contains measured timing.
**
** -----------------------------------------------------------------------------** ** -----------------------------------------------------------------------------**
** **
** CholeskyBlock.java is free software: you can redistribute it and/or modify ** CholeskyBlock.java is free software: you can redistribute it and/or modify
...@@ -23,13 +39,14 @@ package com.elphel.imagej.common; ...@@ -23,13 +39,14 @@ package com.elphel.imagej.common;
import java.util.Arrays; import java.util.Arrays;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import com.elphel.imagej.tileprocessor.ImageDtt;
import Jama.Matrix; import Jama.Matrix;
public class CholeskyBlock { public class CholeskyBlock {
// These 3 parameters are public and may be set before using the constructor
public static int dflt_m = 70; // tile size if not specified during initialization public static int dflt_m = 70; // tile size if not specified during initialization
public static int dflt_threads = 100; // maximal number of threads to use for decomposition public static int dflt_threads = 100; // maximal number of threads to use for decomposition
public static int dflt_solve_threads =16; // 4; // maximal number of threads to use for solve() public static int dflt_solve_threads =16; // 4; // maximal number of threads to use for solve()
public static boolean debug = false; // true; public static boolean debug = false; // true;
private final int m; // tile size private final int m; // tile size
private final int np; // number of elements in row/col private final int np; // number of elements in row/col
...@@ -39,7 +56,10 @@ public class CholeskyBlock { ...@@ -39,7 +56,10 @@ public class CholeskyBlock {
private final double [] A; private final double [] A;
private final double [] L; private final double [] L;
private final double [] B; private final double [] B;
//wjtjlambda_copy1.getArray() /**
* Create Cholesky factorization from SPD matrix mA
* @param mA - SPD Matrix instance
*/
public CholeskyBlock (Matrix mA) { public CholeskyBlock (Matrix mA) {
m = dflt_m; m = dflt_m;
decomp_threads = dflt_threads; decomp_threads = dflt_threads;
...@@ -53,6 +73,11 @@ public class CholeskyBlock { ...@@ -53,6 +73,11 @@ public class CholeskyBlock {
choleskyBlockMulti(); choleskyBlockMulti();
} }
/**
* Create Cholesky factorization from 2D square array representing SPD matrix
* @param A_in - 2D array corresponding to SPD
*/
public CholeskyBlock ( public CholeskyBlock (
double [][] A_in) { double [][] A_in) {
m = dflt_m; m = dflt_m;
...@@ -67,6 +92,11 @@ public class CholeskyBlock { ...@@ -67,6 +92,11 @@ public class CholeskyBlock {
choleskyBlockMulti(); choleskyBlockMulti();
} }
/**
* Create Cholesky factorization from 2D square array representing SPD matrix
* @param A_in - 2D array corresponding to SPD
* @param size block size (tested optimal 70 easier debugging with 100)
*/
public CholeskyBlock ( public CholeskyBlock (
double [][] A_in, double [][] A_in,
int size) { int size) {
...@@ -82,6 +112,12 @@ public class CholeskyBlock { ...@@ -82,6 +112,12 @@ public class CholeskyBlock {
choleskyBlockMulti(); choleskyBlockMulti();
} }
/**
* Create Cholesky factorization from 2D square array representing SPD matrix
* @param A_in - 2D array corresponding to SPD
* @param size block size (tested optimal 70 easier debugging with 100)
* @param decomp_threads specify number of decomposition threads
*/
public CholeskyBlock ( public CholeskyBlock (
double [][] A_in, double [][] A_in,
int size, int size,
...@@ -98,6 +134,11 @@ public class CholeskyBlock { ...@@ -98,6 +134,11 @@ public class CholeskyBlock {
choleskyBlockMulti(); choleskyBlockMulti();
} }
/**
* Set the internal data Array A stored as 1D, linescan in columns from a square SPD array
* @param A_in square SPD array (only lower triangle with the diagonal are used)
*/
private void setup_ATriangle(double [][] A_in) { private void setup_ATriangle(double [][] A_in) {
for (int tile_row = 0; tile_row < nf; tile_row++) { for (int tile_row = 0; tile_row < nf; tile_row++) {
for (int tile_col = 0; tile_col < tile_row; tile_col++) { for (int tile_col = 0; tile_col < tile_row; tile_col++) {
...@@ -150,21 +191,19 @@ public class CholeskyBlock { ...@@ -150,21 +191,19 @@ public class CholeskyBlock {
} }
} }
/**
* Get L lower triangular matrix of Cholesky facterization from the internal column-line-scan
* representation.
* @return lower triangular Cholesky Matrix
*/
public Matrix getL() { public Matrix getL() {
return new Matrix(get_LTriangle(),np,np); return new Matrix(get_LTriangle(),np,np);
} }
public void setB(Matrix mb) { /**
double [][] a = mb.getArray(); * Get internal Cholesky lower-left matrix as a 2d square array
for (int i = 0; i < np; i++) { * @return 2d square array containing Cholesky low-left matrix data
B[i] = a[i][0]; */
}
}
public Matrix getX() {
return new Matrix (B, np);
}
private double [][] get_LTriangle() { private double [][] get_LTriangle() {
double [][] L_out = new double[np][np]; double [][] L_out = new double[np][np];
for (int tile_row = 0; tile_row < nf; tile_row++) { for (int tile_row = 0; tile_row < nf; tile_row++) {
...@@ -217,7 +256,27 @@ public class CholeskyBlock { ...@@ -217,7 +256,27 @@ public class CholeskyBlock {
} }
} }
return L_out; return L_out;
}
/**
* Set internal B/Y/X single-column matrix. Internal representation changes from B to Y to X (solution)
* @param mb right-side single-column Matrix (not modified)
*/
public void setB(Matrix mb) {
double [][] a = mb.getArray();
for (int i = 0; i < np; i++) {
B[i] = a[i][0];
}
}
/**
* Get internal B/Y/X single-column matrix. Internal representation changes from B to Y to X (solution)
* @return a single-column Matrix containing the solution
*/
public Matrix getX() {
return new Matrix (B, np);
} }
/** /**
...@@ -230,6 +289,11 @@ public class CholeskyBlock { ...@@ -230,6 +289,11 @@ public class CholeskyBlock {
return j * (m * np) + ((j >= nf) ? (np-nf*m): m) * m * i; return j * (m * np) + ((j >= nf) ? (np-nf*m): m) * m * i;
} }
/**
* Calculate non-diagonal tiles of the block-Cholesky factorization
* @param i - tile row
* @param j - tile column < i (tile row)
*/
private void setL21( private void setL21(
int i, // i > j, int i, // i > j,
int j) { // j <nf int j) { // j <nf
...@@ -251,6 +315,13 @@ public class CholeskyBlock { ...@@ -251,6 +315,13 @@ public class CholeskyBlock {
return; return;
} }
/**
* Convert remaining tiles of A-matrix (only in lower-left triangle of tiles, including diagonal ones)
* @param diag row/column of the diagonal tile that defines the remainder of the A matrix (to the right
* and below the diagonal tile
* @param row row of the transformed A tile (> diag)
* @param col column of the transformed A tile (> diag, <= row)
*/
private void setA22( private void setA22(
int diag,// < col, < row int diag,// < col, < row
int row, // >= col int row, // >= col
...@@ -279,8 +350,11 @@ public class CholeskyBlock { ...@@ -279,8 +350,11 @@ public class CholeskyBlock {
return; return;
} }
/**
* Run multithreaded Cholosky factorization, convert internal representation of the A-data to L-data
*/
private void choleskyBlockMulti() { private void choleskyBlockMulti() {
final Thread[] threads = ImageDtt.newThreadArray(decomp_threads); // 1); final Thread[] threads = newThreadArray(decomp_threads); // 1);
final AtomicInteger ai = new AtomicInteger(0); final AtomicInteger ai = new AtomicInteger(0);
cholesky_single(0); cholesky_single(0);
// Calculate first column under diagonal (L21) - maybe use m // Calculate first column under diagonal (L21) - maybe use m
...@@ -294,8 +368,7 @@ public class CholeskyBlock { ...@@ -294,8 +368,7 @@ public class CholeskyBlock {
} }
}; };
} }
ImageDtt.startAndJoin(threads); startAndJoin(threads);
for (int tile_diag = 1; tile_diag < n; tile_diag++) { for (int tile_diag = 1; tile_diag < n; tile_diag++) {
final int ftile_diag = tile_diag; final int ftile_diag = tile_diag;
// Calculate A in one tile column of the remaining A2' // Calculate A in one tile column of the remaining A2'
...@@ -320,7 +393,7 @@ public class CholeskyBlock { ...@@ -320,7 +393,7 @@ public class CholeskyBlock {
} }
}; };
} }
ImageDtt.startAndJoin(threads); startAndJoin(threads);
if (ftile_diag < (n-1)) { if (ftile_diag < (n-1)) {
// Now in parallel calculate L in column ftile_diag under diagonal and // Now in parallel calculate L in column ftile_diag under diagonal and
// finish A2' to the right of ftile_diag column // finish A2' to the right of ftile_diag column
...@@ -354,15 +427,18 @@ public class CholeskyBlock { ...@@ -354,15 +427,18 @@ public class CholeskyBlock {
} }
}; };
} }
ImageDtt.startAndJoin(threads); startAndJoin(threads);
} }
} }
return; return;
} }
// Single-threaded, used for a single-tile Cholesky deconstruction // Single-threaded, used for a single-tile Cholesky deconstruction
/**
* A single-threaded, single-tile Cholesky factorization converting diagonal A-tile to
* the corresponding diagonal L-tile
* @param diag row/column of the tile to convert.
*/
private void cholesky_single(int diag) { private void cholesky_single(int diag) {
int h = (diag < nf) ? m : (np-nf*m); int h = (diag < nf) ? m : (np-nf*m);
int indx = indx_IJ(diag, diag); int indx = indx_IJ(diag, diag);
...@@ -388,14 +464,14 @@ public class CholeskyBlock { ...@@ -388,14 +464,14 @@ public class CholeskyBlock {
/** /**
* B has data, will be modified * Solve L * Y = B for a single-tile data. B has data, will be modified
* to contain Y
* @param diag number of the diagonal tile (last may be smaller) * @param diag number of the diagonal tile (last may be smaller)
*/ */
private void solveY(int diag) { private void solveY(int diag) {
int h = (diag < nf) ? m : (np-nf*m); int h = (diag < nf) ? m : (np-nf*m);
int lindx = indx_IJ(diag, diag); int lindx = indx_IJ(diag, diag);
int xindx = diag * m; // start of B/x int xindx = diag * m; // start of B/x
// Solve L*Y = B;
for (int k = 0; k < h; k++) { for (int k = 0; k < h; k++) {
double x = B[xindx+k]; double x = B[xindx+k];
for (int i = 0; i < k ; i++) { for (int i = 0; i < k ; i++) {
...@@ -407,9 +483,9 @@ public class CholeskyBlock { ...@@ -407,9 +483,9 @@ public class CholeskyBlock {
} }
/** /**
* Solve single-tile Lt*X=Y, tiles are counted from the right-bottom * Solve single-tile Lt*X=Y
* B has data, will be modified * B has data (Y), will be modified to contain X
* @param diag number of the diagonal tile from the bottom-right, * @param diag number of the diagonal tile
* last may be smaller. * last may be smaller.
*/ */
private void solveX(int diag) { private void solveX(int diag) {
...@@ -429,7 +505,7 @@ public class CholeskyBlock { ...@@ -429,7 +505,7 @@ public class CholeskyBlock {
/** /**
* Subtract Y-column from running B using Y-data in tile j * Subtract Y-column from running B using Y-data in tile j
* from the tile i > j * from the tile i > j.
* @param i tile row to subtract from in B * @param i tile row to subtract from in B
* @param j tile row corresponding to diagonal L tiles * @param j tile row corresponding to diagonal L tiles
*/ */
...@@ -452,15 +528,15 @@ public class CholeskyBlock { ...@@ -452,15 +528,15 @@ public class CholeskyBlock {
/** /**
* Subtract X-column from running B using X-data in tile j * Subtract X-column from running B using X-data in tile j
* from the tile i > j, counting from the right-bottom * from the tile i > j.
* @param i tile row (from the bottom) to subtract from in B * @param i tile row to subtract from in B
* @param j tile row corresponding to diagonal L tiles, from the bottom * @param j tile row corresponding to diagonal L tiles
*/ */
private void subXCol( private void subXCol(
int i, // i < j, int i, // i < j,
int j) { // j > i int j) { // j > i
int lindx = indx_IJ(j,i); // transposed int lindx = indx_IJ(j,i);
int xsrc = j * m; // start of B/x to use int xsrc = j * m; // start of B/x to use
int xdst = i * m; // start of B/x to modify int xdst = i * m; // start of B/x to modify
int h = (j < nf) ? m : (np-nf*m); int h = (j < nf) ? m : (np-nf*m);
...@@ -473,20 +549,32 @@ public class CholeskyBlock { ...@@ -473,20 +549,32 @@ public class CholeskyBlock {
} }
return; return;
} }
/**
* Solve with existing Cholesky decomposition for a single-column Matrix b
* @param b a single-column matrix B that L * Lt * X = B
* @return A single-column solution X
*/
public Matrix solve (Matrix b) { public Matrix solve (Matrix b) {
setB(b); setB(b);
solve(); solve();
return getX(); return getX();
} }
/**
* Solve with existing Cholesky decomposition for an internal representation of matrix B,
* replace B data with X data
*/
public void solve() { public void solve() {
solve(dflt_solve_threads); // this.solve_threads); solve(dflt_solve_threads); // this.solve_threads);
} }
/**
* Solve with existing Cholesky decomposition for an internal representation of matrix B,
* replace B data with X data, limit number of threads to use.
* @param solve_threads maximal number of threads to use
*/
public void solve(int solve_threads) { public void solve(int solve_threads) {
final Thread[] threads = ImageDtt.newThreadArray(solve_threads); final Thread[] threads = newThreadArray(solve_threads);
final AtomicInteger ai = new AtomicInteger(0); final AtomicInteger ai = new AtomicInteger(0);
AtomicBoolean [][] tile_started = new AtomicBoolean[n][n]; AtomicBoolean [][] tile_started = new AtomicBoolean[n][n];
AtomicBoolean [][] tile_done = new AtomicBoolean[n][n]; AtomicBoolean [][] tile_done = new AtomicBoolean[n][n];
...@@ -554,7 +642,7 @@ public class CholeskyBlock { ...@@ -554,7 +642,7 @@ public class CholeskyBlock {
} }
}; };
} }
ImageDtt.startAndJoin(threads); startAndJoin(threads);
// Solve L'X = Y // Solve L'X = Y
rows_done.set(0);// this number of tile rows solved rows_done.set(0);// this number of tile rows solved
for (int row = 0; row < n; row++) { for (int row = 0; row < n; row++) {
...@@ -611,7 +699,7 @@ public class CholeskyBlock { ...@@ -611,7 +699,7 @@ public class CholeskyBlock {
} }
}; };
} }
ImageDtt.startAndJoin(threads); startAndJoin(threads);
starts[4] = (((double) System.nanoTime()) * 1E-9) - start_time; starts[4] = (((double) System.nanoTime()) * 1E-9) - start_time;
if (debug) { if (debug) {
System.out.println("\ncholeskyBlock.solve(): ==== number of threads = "+threads.length+ "===="); System.out.println("\ncholeskyBlock.solve(): ==== number of threads = "+threads.length+ "====");
...@@ -624,6 +712,12 @@ public class CholeskyBlock { ...@@ -624,6 +712,12 @@ public class CholeskyBlock {
} }
// for comparison/verification // for comparison/verification
/**
* Standard single-threaded solution with Cholesky decomposition from Jama used for comparison.
* @param B A single-column Matrix for the right side of the equation A * X = L * Lt * X = B
* @param L Cholesky decomposition lower-left Matrix
* @return a single-column solution X.
*/
public static Matrix solve_single (Matrix B, double [][] L) { public static Matrix solve_single (Matrix B, double [][] L) {
int n = L.length; int n = L.length;
if (B.getRowDimension() != n) { if (B.getRowDimension() != n) {
...@@ -647,6 +741,51 @@ public class CholeskyBlock { ...@@ -647,6 +741,51 @@ public class CholeskyBlock {
} }
return new Matrix(x,n); return new Matrix(x,n);
} }
/*
* Multithreading methods from Stephan Preibisch's Multithreading.java class. See:
* http://repo.or.cz/w/trakem2.git?a=blob;f=mpi/fruitfly/general/MultiThreading.java;hb=HEAD
*/
/**
* Create a Thread[] array as large as the number of processors available.
* @return an array of threads
*/
public static Thread[] newThreadArray() {
return newThreadArray(100);
}
/**
* Create a Thread[] array as large as the number of processors available.
* @param maxCPUs limit number of threads
* @return an array of threads
*/
public static Thread[] newThreadArray(int maxCPUs) { // USED in lwir
int n_cpus = Runtime.getRuntime().availableProcessors();
if (n_cpus>maxCPUs)n_cpus=maxCPUs;
return new Thread[n_cpus];
}
/**
* Start all given threads and wait on each of them until all are done.
* @param threads to start
*/
public static void startAndJoin(Thread[] threads) // USED in lwir
{
for (int ithread = 0; ithread < threads.length; ++ithread)
{
threads[ithread].setPriority(Thread.NORM_PRIORITY);
threads[ithread].start();
}
try
{
for (int ithread = 0; ithread < threads.length; ++ithread)
threads[ithread].join();
} catch (InterruptedException ie)
{
throw new RuntimeException(ie);
}
}
/* Results /* Results
First run, next MULTITHREADED ones are faster. Ran on an 8-core x86, 16 threads First run, next MULTITHREADED ones are faster. Ran on an 8-core x86, 16 threads
choleskyBlockMulti() last pass n= 33, tile_diag=32 choleskyBlockMulti() last pass n= 33, tile_diag=32
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment