From 4a7ba41bd7135216752c4dc7488ee165ce475816 Mon Sep 17 00:00:00 2001
From: Andrey Filippov <andrey@elphel.com>
Date: Wed, 19 Mar 2025 00:33:53 -0600
Subject: [PATCH] Started solution

---
 .../elphel/imagej/common/CholeskyBlock.java   | 262 +++++++++++++++---
 1 file changed, 222 insertions(+), 40 deletions(-)

diff --git a/src/main/java/com/elphel/imagej/common/CholeskyBlock.java b/src/main/java/com/elphel/imagej/common/CholeskyBlock.java
index 4f672df..eb2ea95 100644
--- a/src/main/java/com/elphel/imagej/common/CholeskyBlock.java
+++ b/src/main/java/com/elphel/imagej/common/CholeskyBlock.java
@@ -35,6 +35,7 @@ public class CholeskyBlock {
 	public final int nf; // number of full rows/cols
 	public final double [] A;
 	public final double [] L;
+	public final double [] B;
 	
 	public CholeskyBlock (
 			double [][] A_in,
@@ -45,7 +46,10 @@ public class CholeskyBlock {
 		n = ((nf * m) < np)? (nf + 1) : nf; 
 		A = new double [np*np]; //  [n*n*m*m];
 		L = new double [A.length];
+		B = new double [np];
 		setup_ATriangle(A_in);
+		// Add convertion here later after done with time measurements
+		// choleskyBlockMulti() 
 	}
 	
 	private void setup_ATriangle(double [][] A_in) {
@@ -104,6 +108,16 @@ public class CholeskyBlock {
 		return new Matrix(get_LTriangle(),np,np);
 	}
 	
+	public void setB(Matrix mb) {
+		double [][] a = mb.getArray();
+		for (int i = 0; i < np; i++) {
+			B[i] = a[i][0];
+		}
+	}
+	
+	public Matrix getX() {
+		return new Matrix (B, np);
+	}
 	
 	public double [][] get_LTriangle() {
 		double [][] L_out = new double[np][np];
@@ -170,6 +184,19 @@ public class CholeskyBlock {
 		return j * (m * np) + ((j >= nf) ? (np-nf*m): m) * m * i;
 	}
 
+	/**
+	 * Tile bottom-right corner, counting i - up, j - to the left.
+	 * Full tile are on the bottom and right (partial - on the top
+	 * and left
+	 * @param i tile row from bottom
+	 * @param j tile column from right
+	 * @return index of the bottom-right corner of the tile (the largest!)
+	 */
+	public int rindx_IJ(int i, int j) { // upside down
+//		return (j * (m * np)) + (np-1 -i)  * ((j >= nf) ?(np-nf*m): m);
+//		return (np*np-1) - indx_IJ(i, j);
+		return np*np-1 - (j * (m * np) + ((j >= nf) ? (np-nf*m): m) * m * i);
+	}
 
 
 	
@@ -226,28 +253,6 @@ public class CholeskyBlock {
 		final Thread[] threads =       ImageDtt.newThreadArray();
 		final AtomicInteger ai =       new AtomicInteger(0);
 		cholesky_single(0);
-		/*
-		final double [][] Am = new double[m][m];
-		final double [][] Ah = (n > nf) ? new double[(np-nf*m)][(np-nf*m)] : null;
-		final double [][] A1 = (np < m) ? Ah : Am; // smaller than a tile
-		final double [][] Lm = new double[m][m];
-		final double [][] Lh = (n > nf) ? new double[(np-nf*m)][(np-nf*m)] : null;
-		final double [][] L1 = (np < m) ? Lh : Lm;  // smaller than a tile
-		// Extract top-left tile (only lower triangle)
-		getDiagLTriangle(
-				A,   // double [] arr,
-				A1,  // double [][] a_diag,
-				0);  // int i)
-		// Decompose, get L (lower triangle)
-		cholesky_single (
-				A1,  // double [][] a,
-				L1); // double [][] l)
-		// Save it to L-array:
-		putDiagLTriangle(
-				L,   // double []   arr,
-				L1,  // double [][] l_diag,
-				0);  // int         i)
-		*/
 		// Calculate first column under diagonal (L21) - maybe use m
 		ai.set(1); // start from the second tile row
 		for (int ithread = 0; ithread < threads.length; ithread++) { // first sum for pairs
@@ -280,24 +285,6 @@ public class CholeskyBlock {
 									ftile_diag);  // int col) 							
 							if (nRow == ftile_diag) {
 								cholesky_single(ftile_diag);
-								/*
-								double [][] At = (ftile_diag < nf) ? Am: Ah; // full or reduced array
-								double [][] Lt = (ftile_diag < nf) ? Lm: Lh; // full or reduced array
-								// Extract top-left tile (only lower triangle)
-								getDiagLTriangle(
-										A,            // double [] arr,
-										At,           // double [][] a_diag,
-										ftile_diag);  // int i)
-								// Decompose, get L (lower triangle)
-								cholesky_single (
-										At,           // double [][] a,
-										Lt);          // double [][] l)
-								// Save it to L-array:
-								putDiagLTriangle(
-										L,            // double []   arr,
-										Lt,           // double [][] l_diag,
-										ftile_diag);  // int         i)
-								*/
 							}
 						}
 					}
@@ -340,6 +327,7 @@ public class CholeskyBlock {
 				ImageDtt.startAndJoin(threads);
 			}
 		}
+		return;
 	}
 	
 	
@@ -369,6 +357,200 @@ public class CholeskyBlock {
 		}
 	}
 
+	
+	/**
+	 * B has data, will be modified
+	 * @param diag number of the diagonal tile (last may be smaller)
+	 */
+	public void solveY(int diag) {
+		int h = (diag < nf) ? m : (np-nf*m);			
+		int lindx = indx_IJ(diag, diag);
+		int xindx = diag * m; // start of B/x
+		// Solve L*Y = B;
+		for (int k = 0; k < h; k++) {
+			double x = B[xindx+k];
+			for (int i = 0; i < k ; i++) {
+				x -= B[xindx + i] * L[lindx + h*k + i];
+//				x[k] -= x[i]*L[k][i];
+			}
+			B[xindx+k] = x/L[lindx + (h+1)*k];
+//			x[k] /= L[k][k];
+		}
+	}
+
+	/**
+	 * Solve single-tile Lt*X=Y, tiles are counted from the right-bottom
+	 * B has data, will be modified
+	 * @param diag number of the diagonal tile from the bottom-right, 
+	 *        last may be smaller.
+	 */
+	public void solveX(int diag) {
+		int h = (diag < nf) ? m : (np-nf*m);			
+		int lindx = indx_IJ(diag, diag);
+		int xindx = (np - 1) - diag * m; // start of B/x
+		// Solve L'*X = Y;
+		for (int k = 0; k < h; k++) {
+			double x = B[xindx - k];
+			for (int i = 0; i < k ; i++) {
+				x -= B[xindx - i] * L[lindx - k - h * i];
+			}
+			B[xindx-k] = x/L[lindx - (h+1)*k];
+			
+		}
+	}
+	
+	/**
+	 * Subtract Y-column from running B using Y-data in tile j
+	 * from the tile i > j  
+	 * @param i tile row corresponding to diagonal L tiles
+	 * @param j tile row to subtract from in B
+	 */
+	public void subYCol(
+			int       i,   // i > j,
+			int       j) { // j < i
+		int lindx =   indx_IJ(i,j);
+		int xindx = j * m; // start of B/x
+		int h = (i < nf) ? m : (np-nf*m);
+		for (int k = 0; k < h; k++) {
+			double x = B[xindx+k];
+			for (int l = 0; l < m; l++) {
+				x -= L[lindx + k * m + l] * B[xindx+l];
+			}
+			B[xindx+k] = x;
+		}
+	}
+
+	/**
+	 * Subtract X-column from running B using X-data in tile j
+	 * from the tile i > j, counting from the right-bottom  
+	 * @param i tile row corresponding to diagonal L tiles, from the bottom
+	 * @param j tile row (from the bottom) to subtract from in B
+	 */
+	public void subXCol(
+			int       i,   // i > j,
+			int       j) { // j < i
+		int lindx =   rindx_IJ(i,j);
+		int xindx = np - 1 - j * m; // start of B/x
+		int h = (i < nf) ? m : (np-nf*m);
+		for (int k = 0; k < h; k++) {
+			double x = B[xindx-k];
+			for (int l = 0; l < m; l++) {
+				x -= L[lindx - l * m - k] * B[xindx-l];
+			}
+			B[xindx-k] = x;
+		}
+	}
+	
+	public Matrix solve (Matrix b) {
+		setB(b);
+		solve();
+		return getX();
+	}
+	
+	public void solve() {
+		final Thread[] threads =       ImageDtt.newThreadArray();
+		final AtomicInteger ai =       new AtomicInteger(0);
+		// L, B are set up
+		// convert top fragment of B->Y using top-left tile of L
+		solveY(0); // int diag)	
+		for (int tile_diag = 1; tile_diag < n; tile_diag++) {
+			final int ftile_diag = tile_diag;
+			// subtract vertical column of tiles for top (previous) segment of B->Y from the remaining
+			// segments. After subtracting from the top segment, immediately calculate Y for it, all
+			// other segments subtract in parallel
+			ai.set(tile_diag); // start from the second tile segment
+			for (int ithread = 0; ithread < threads.length; ithread++) { // first sum for pairs
+				threads[ithread] = new Thread() {
+					public void run() {
+						for (int tile_seg = ai.getAndIncrement(); tile_seg < n; tile_seg = ai.getAndIncrement()) {
+							 subYCol(
+									 tile_seg, // int       i,   // i > j,
+									 ftile_diag); // 	int       j)
+							 if (tile_seg == ftile_diag) {
+								 solveY(ftile_diag); // int diag)	
+							 }
+						}
+					}
+				};
+			}		      
+			ImageDtt.startAndJoin(threads);
+		}
+		// Solve L'X = Y
+		solveX(0); // int diag)	
+		for (int tile_diag = 1; tile_diag < n; tile_diag++) {
+			final int ftile_diag = tile_diag;
+			// subtract vertical column of tiles for top (previous) segment of X->Y from the remaining
+			// segments. After subtracting from the top segment, immediately calculate Y for it, all
+			// other segments subtract in parallel
+			ai.set(tile_diag); // start from the second tile segment
+			for (int ithread = 0; ithread < threads.length; ithread++) { // first sum for pairs
+				threads[ithread] = new Thread() {
+					public void run() {
+						for (int tile_seg = ai.getAndIncrement(); tile_seg < n; tile_seg = ai.getAndIncrement()) {
+							 subXCol(
+									 tile_seg, // int       i,   // i > j,
+									 ftile_diag); // 	int       j)
+							 if (tile_seg == ftile_diag) {
+								 solveX(ftile_diag); // int diag)	
+							 }
+						}
+					}
+				};
+			}		      
+			ImageDtt.startAndJoin(threads);
+		}
+		return;
+	}
+	
+	
+	public static Matrix solve (Matrix B, double [][] L) {
+		int n = L.length;
+		if (B.getRowDimension() != n) {
+			throw new IllegalArgumentException("Matrix row dimensions must agree.");
+		}
+		// Copy right hand side.
+		double[] x = B.getColumnPackedCopy (); //  (for single-column)
+		// Solve L*Y = B;
+		for (int k = 0; k < n; k++) {
+			for (int i = 0; i < k ; i++) {
+				x[k] -= x[i]*L[k][i];
+			}
+			x[k] /= L[k][k];
+		}
+		// Solve L'*X = Y;
+		for (int k = n-1; k >= 0; k--) {
+			for (int i = k+1; i < n ; i++) {
+				x[k] -= x[i]*L[i][k];
+			}
+			x[k] /= L[k][k];
+		}
+		return new Matrix(x,n);
+	}
+	
+/*
+	public void setL21(
+			int       i,   // i > j,
+			int       j) { // j <nf
+		int indx_diag = indx_IJ(j,j);
+		int indx_ij =   indx_IJ(i,j);
+		int h = (i < nf) ? m : (np-nf*m);
+		// prepare solving Lx = b, copy tile A -> L
+		System.arraycopy(A, indx_ij, L, indx_ij, m * h);
+		for (int l_row = 0; l_row < m; l_row++) { // was <m <h!
+			for (int x_col= 0; x_col < h; x_col++) { // b-vector
+				int lindx = indx_ij + m * x_col + l_row;
+				double ls = L[lindx];
+				for (int l_col = 0; l_col < l_row; l_col++) {
+					ls -= L[indx_ij + m * x_col + l_col] * L[indx_diag + m* l_row+l_col];
+				}
+				L[lindx] = ls/L[indx_diag + (m + 1)* l_row];
+			}
+		}
+		return;
+	}
+
+ */
+	
 	// ======================== unused =====================
 	
 	
-- 
2.18.1