Commit f616c5d7 authored by Andrey Filippov's avatar Andrey Filippov

Fixed non-multiple block size

parent 3cedfc7a
package com.elphel.imagej.common;
/**
** CholeskyBlock - multithreaded Chelesky decomposition
**
** Copyright (C) 2025 Elphel, Inc.
**
** -----------------------------------------------------------------------------**
**
** CholeskyBlock.java is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
*/
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;
......@@ -22,7 +43,7 @@ public class CholeskyBlock {
np = A_in.length;
nf = np / m;
n = ((nf * m) < np)? (nf + 1) : nf;
A = new double [n*n*m*m];
A = new double [np*np]; // [n*n*m*m];
L = new double [A.length];
setup_ATriangle(A_in);
}
......@@ -53,10 +74,10 @@ public class CholeskyBlock {
}
if (n > nf) { // if there are small tiles below and to the right
int tile_row = nf;
int m1 = np - m * nf;
int h = np - m * nf;
for (int tile_col = 0; tile_col < nf; tile_col++) {
int indx = indx_IJ(tile_row, tile_col);
for (int k = 0; k < m1; k++) {
for (int k = 0; k < h; k++) {
System.arraycopy(
A_in[tile_row*m +k],
tile_col * m,
......@@ -68,16 +89,15 @@ public class CholeskyBlock {
}
// copy diagonal
int indx = indx_IJ(tile_row, tile_row);
for (int k = 0; k < m1; k++) {
for (int k = 0; k < h; k++) {
System.arraycopy(
A_in[tile_row*m +k],
tile_row * m,
A,
indx + m1 * k,
indx + h * k,
k+1);
}
}
}
public Matrix getL() {
......@@ -112,10 +132,10 @@ public class CholeskyBlock {
}
if (n > nf) { // if there are small tiles below and to the right
int tile_row = nf;
int m1 = np - m * nf;
int h = np - m * nf;
for (int tile_col = 0; tile_col < nf; tile_col++) {
int indx = indx_IJ(tile_row, tile_col);
for (int k = 0; k < m1; k++) {
for (int k = 0; k < h; k++) {
System.arraycopy(
L,
indx + m * k,
......@@ -127,10 +147,10 @@ public class CholeskyBlock {
}
// copy diagonal
int indx = indx_IJ(tile_row, tile_row);
for (int k = 0; k < m1; k++) {
for (int k = 0; k < h; k++) {
System.arraycopy(
L,
indx + m1 * k,
indx + h * k,
L_out[tile_row*m +k],
tile_row * m,
k+1);
......@@ -140,9 +160,6 @@ public class CholeskyBlock {
}
/**
* Get index of the top-left tile corner
* @param i tile row
......@@ -249,7 +266,7 @@ public class CholeskyBlock {
int h = (i < nf) ? m : (np-nf*m);
// prepare solving Lx = b, copy tile A -> L
System.arraycopy(A, indx_ij, L, indx_ij, m * h);
for (int l_row = 0; l_row < m; l_row++) {
for (int l_row = 0; l_row < m; l_row++) { // was <m <h!
for (int x_col= 0; x_col < h; x_col++) { // b-vector
int lindx = indx_ij + m * x_col + l_row;
double ls = L[lindx];
......@@ -272,8 +289,8 @@ public class CholeskyBlock {
if (row == col) {
for (int i = 0; i < h; i++) {
for (int j = 0; j <= i; j++) {
for (int k = 0; k < h; k++) {
A[indx_a + i * m + j] -= L[indx_lrow + i * h + k] * L[indx_lrow + j * h + k];
for (int k = 0; k < m; k++) { // was h
A[indx_a + i * h + j] -= L[indx_lrow + i * m + k] * L[indx_lrow + j * m + k];
}
}
}
......@@ -299,7 +316,6 @@ public class CholeskyBlock {
final double [][] Lm = new double[m][m];
final double [][] Lh = (n > nf) ? new double[(np-nf*m)][(np-nf*m)] : null;
final double [][] L1 = (np < m) ? Lh : Lm; // smaller than a tile
// Extract top-left tile (only lower triangle)
getDiagLTriangle(
A, // double [] arr,
......@@ -332,11 +348,18 @@ public class CholeskyBlock {
// Calculate A in one tile column of the remaining A2'
// start with diagonal (top) tile, and calculate its Cholesky
// In parallel, calculate A for all tiles in that column below diagonal
if (tile_diag == (n-1)) {
System.out.println("choleskyBlockMulti() last pass n= "+n+", tile_diag="+tile_diag);
}
ai.set(ftile_diag);
for (int ithread = 0; ithread < threads.length; ithread++) { // first sum for pairs
threads[ithread] = new Thread() {
public void run() {
for (int nRow= ai.getAndIncrement(); nRow < n; nRow = ai.getAndIncrement()) {
// if ((nRow == (n-1)) && (ftile_diag == (n-1))) {
// System.out.println("calling setA22() first: diag="+(ftile_diag-1)+", nRow="+nRow);
// }
setA22(
ftile_diag-1, // int diag,// < col, < row
nRow, // int row, // >= col
......@@ -394,6 +417,9 @@ public class CholeskyBlock {
int ncol = (ntile-1) - (nrow * (nrow + 1) /2);
int row = ftile_diag + nrow + 1;
int col = ftile_diag + ncol + 1;
// if ((row == (n-1)) && (col == (n-1))) {
// System.out.println("calling setA22(): diag="+(ftile_diag-1)+", row="+row);
// }
setA22(
ftile_diag-1, // int diag,// < col, < row
row, // int row, // >= col
......
......@@ -265,11 +265,30 @@ public class CholeskyLLTMulti {
}
public static Matrix[] testCholesky(
Matrix wjtjlambda,
Matrix jty,
Matrix wjtjlambda_in,
Matrix jty_in,
String title) {
int block_size = 70; // 64; // 60; // 70; // 80; // 120; // 150; // 200; // 100; // 4; // 10; // 100; // 28;
String dbg_title=title+"ch_diff_choleskyBlock-choleskyDecomposition-"+block_size;
int block_size = 70; // 100; // 70;//70~best // 64; // 60; // 70; // 80; // 120; // 150; // 200; // 100; // 4; // 10; // 100; // 28;
boolean truncate = false;
int trunc_size = 199; // 0 to use full size
Matrix wjtjlambda,jty;
if (truncate) {
int tr_size = (trunc_size==0)?(((wjtjlambda_in.getRowDimension())/block_size) * block_size):trunc_size;
wjtjlambda=wjtjlambda_in.getMatrix (
0, // int i0,
tr_size-1, // int i1,
0, // int j0,
tr_size-1); //int j1)
jty=jty_in.getMatrix (
0, // int i0,
tr_size-1, // int i1,
0, // int j0,
0); //int j1)
} else {
wjtjlambda=wjtjlambda_in;
jty = jty_in;
}
String dbg_title=title+"ch_diff_choleskyBlock-choleskyDecomposition-"+block_size+(truncate?("-truncate"+trunc_size):"");
Matrix wjtjlambda_copy0 = new Matrix(wjtjlambda.getArrayCopy());
Matrix wjtjlambda_copy1 = new Matrix(wjtjlambda.getArrayCopy());
Matrix wjtjlambda_copy2 = new Matrix(wjtjlambda.getArrayCopy());
......@@ -301,8 +320,8 @@ public class CholeskyLLTMulti {
System.out.println("testCholesky(): get_LTriangle(): "+(starts[4]-starts[3])+" sec");
System.out.println("testCholesky(): solve_single(,single): "+(starts[5]-starts[4])+" sec");
System.out.println("testCholesky(): solve_single(,block): "+(starts[6]-starts[5])+" sec");
System.out.println("testCholesky(): title= "+block_size);
System.out.println("testCholesky(): dbg_title= "+block_size);
System.out.println("testCholesky(): title= "+title);
System.out.println("testCholesky(): dbg_title= "+dbg_title);
Matrix ch_diff = choleskyLLTMulti_single.getL().minus(choleskyDecomposition.getL());
Matrix ch_diff_fast = choleskyBlock.getL().minus(choleskyDecomposition.getL());
......@@ -328,6 +347,8 @@ public class CholeskyLLTMulti {
Matrix wjtjlambda,
Matrix jty) {
int block_size = 128;
boolean truncate = true;
Matrix wjtjlambda_copy0 = new Matrix(wjtjlambda.getArrayCopy());
Matrix wjtjlambda_copy1 = new Matrix(wjtjlambda.getArrayCopy());
Matrix wjtjlambda_copy2 = new Matrix(wjtjlambda.getArrayCopy());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment