Commit e27934db authored by Andrey Filippov's avatar Andrey Filippov

Finished solve, but it is somewhat slower than single

parent 4a7ba41b
......@@ -48,8 +48,7 @@ public class CholeskyBlock {
L = new double [A.length];
B = new double [np];
setup_ATriangle(A_in);
// Add convertion here later after done with time measurements
// choleskyBlockMulti()
choleskyBlockMulti();
}
private void setup_ATriangle(double [][] A_in) {
......@@ -376,6 +375,7 @@ public class CholeskyBlock {
B[xindx+k] = x/L[lindx + (h+1)*k];
// x[k] /= L[k][k];
}
return;
}
/**
......@@ -387,58 +387,82 @@ public class CholeskyBlock {
public void solveX(int diag) {
int h = (diag < nf) ? m : (np-nf*m);
int lindx = indx_IJ(diag, diag);
int xindx = (np - 1) - diag * m; // start of B/x
int xindx = diag * m; // start of B/x
// Solve L'*X = Y;
for (int k = 0; k < h; k++) {
double x = B[xindx - k];
for (int i = 0; i < k ; i++) {
x -= B[xindx - i] * L[lindx - k - h * i];
for (int k = h-1; k >= 0; k--) {
double x = B[xindx + k];
for (int i = k+1; i < h ; i++) {
x -= B[xindx + i] * L[lindx + h*i + k];
}
B[xindx-k] = x/L[lindx - (h+1)*k];
B[xindx+k] = x/L[lindx + (h+1)*k];
}
return;
}
/**
* Subtract Y-column from running B using Y-data in tile j
* from the tile i > j
* @param i tile row corresponding to diagonal L tiles
* @param j tile row to subtract from in B
* @param i tile row to subtract from in B
* @param j tile row corresponding to diagonal L tiles
*/
public void subYCol(
int i, // i > j,
int j) { // j < i
int lindx = indx_IJ(i,j);
int xindx = j * m; // start of B/x
int lindx = indx_IJ(i,j);
int xsrc = j * m; // start of B/x to use
int xdst = i * m; // start of B/x to modify
int h = (i < nf) ? m : (np-nf*m);
for (int k = 0; k < h; k++) {
double x = B[xindx+k];
double x = B[xdst + k];
for (int l = 0; l < m; l++) {
x -= L[lindx + k * m + l] * B[xindx+l];
x -= L[lindx + k * m + l] * B[xsrc+l];
}
B[xindx+k] = x;
B[xdst + k] = x;
}
return;
}
/**
* Subtract X-column from running B using X-data in tile j
* from the tile i > j, counting from the right-bottom
* @param i tile row corresponding to diagonal L tiles, from the bottom
* @param j tile row (from the bottom) to subtract from in B
* @param i tile row (from the bottom) to subtract from in B
* @param j tile row corresponding to diagonal L tiles, from the bottom
*/
public void subXCol(
int i, // i < j,
int j) { // j > i
int lindx = indx_IJ(j,i); // transposed
int xsrc = j * m; // start of B/x to use
int xdst = i * m; // start of B/x to modify
int h = (j < nf) ? m : (np-nf*m);
for (int k = 0; k < m; k++) {
double x = B[xdst + k];
for (int l = 0; l < h; l++) {
x -= L[lindx + l * m + k] * B[xsrc + l];
}
B[xdst + k] = x;
}
return;
}
public void subXCol0(
int i, // i > j,
int j) { // j < i
int lindx = rindx_IJ(i,j);
int xindx = np - 1 - j * m; // start of B/x
int lindx = rindx_IJ(i,j);
int xsrc = np - 1 - j * m; // start of B/x to use
int xdst = np - 1 - i * m; // start of B/x to modify
int h = (i < nf) ? m : (np-nf*m);
for (int k = 0; k < h; k++) {
double x = B[xindx-k];
double x = B[xdst - k];
for (int l = 0; l < m; l++) {
x -= L[lindx - l * m - k] * B[xindx-l];
x -= L[lindx - l * m - k] * B[xsrc - l];
}
B[xindx-k] = x;
B[xdst - k] = x;
}
return;
}
public Matrix solve (Matrix b) {
......@@ -453,12 +477,12 @@ public class CholeskyBlock {
// L, B are set up
// convert top fragment of B->Y using top-left tile of L
solveY(0); // int diag)
for (int tile_diag = 1; tile_diag < n; tile_diag++) {
for (int tile_diag = 0; tile_diag < (n -1); tile_diag++) {
final int ftile_diag = tile_diag;
// subtract vertical column of tiles for top (previous) segment of B->Y from the remaining
// segments. After subtracting from the top segment, immediately calculate Y for it, all
// other segments subtract in parallel
ai.set(tile_diag); // start from the second tile segment
ai.set(tile_diag+1); // start from the second tile segment
for (int ithread = 0; ithread < threads.length; ithread++) { // first sum for pairs
threads[ithread] = new Thread() {
public void run() {
......@@ -466,8 +490,8 @@ public class CholeskyBlock {
subYCol(
tile_seg, // int i, // i > j,
ftile_diag); // int j)
if (tile_seg == ftile_diag) {
solveY(ftile_diag); // int diag)
if (tile_seg == (ftile_diag+1)) {
solveY(tile_seg); // int diag)
}
}
}
......@@ -476,22 +500,22 @@ public class CholeskyBlock {
ImageDtt.startAndJoin(threads);
}
// Solve L'X = Y
solveX(0); // int diag)
for (int tile_diag = 1; tile_diag < n; tile_diag++) {
solveX(n-1); // int diag)
for (int tile_diag = n-1; tile_diag > 0; tile_diag--) {
final int ftile_diag = tile_diag;
// subtract vertical column of tiles for top (previous) segment of X->Y from the remaining
// segments. After subtracting from the top segment, immediately calculate Y for it, all
// other segments subtract in parallel
ai.set(tile_diag); // start from the second tile segment
ai.set(tile_diag - 1); // start from the second tile segment
for (int ithread = 0; ithread < threads.length; ithread++) { // first sum for pairs
threads[ithread] = new Thread() {
public void run() {
for (int tile_seg = ai.getAndIncrement(); tile_seg < n; tile_seg = ai.getAndIncrement()) {
for (int tile_seg = ai.getAndDecrement(); tile_seg >= 0; tile_seg = ai.getAndDecrement()) {
subXCol(
tile_seg, // int i, // i > j,
ftile_diag); // int j)
if (tile_seg == ftile_diag) {
solveX(ftile_diag); // int diag)
if (tile_seg == (ftile_diag - 1)) {
solveX(tile_seg); // int diag)
}
}
}
......
......@@ -293,7 +293,7 @@ public class CholeskyLLTMulti {
Matrix wjtjlambda_copy1 = new Matrix(wjtjlambda.getArrayCopy());
Matrix wjtjlambda_copy2 = new Matrix(wjtjlambda.getArrayCopy());
Matrix wjtjlambda_copy = new Matrix(wjtjlambda.getArrayCopy());
double [] starts = new double[7];
double [] starts = new double[8];
double start_time = (((double) System.nanoTime()) * 1E-9);
CholeskyDecomposition choleskyDecomposition = new CholeskyDecomposition(wjtjlambda_copy);
......@@ -301,8 +301,8 @@ public class CholeskyLLTMulti {
CholeskyLLTMulti choleskyLLTMulti_single = new CholeskyLLTMulti(wjtjlambda_copy0,0);
starts[1] = (((double) System.nanoTime()) * 1E-9) - start_time;
CholeskyBlock choleskyBlock = new CholeskyBlock(wjtjlambda_copy1.getArray(),block_size);
starts[2] = (((double) System.nanoTime()) * 1E-9) - start_time;
choleskyBlock.choleskyBlockMulti();
// starts[2] = (((double) System.nanoTime()) * 1E-9) - start_time;
// choleskyBlock.choleskyBlockMulti();
starts[3] = (((double) System.nanoTime()) * 1E-9) - start_time;
double [][] LTriangle = choleskyBlock.get_LTriangle();
Matrix mdelta_cholesky = choleskyDecomposition.solve(jty);
......@@ -311,15 +311,20 @@ public class CholeskyLLTMulti {
starts[5] = (((double) System.nanoTime()) * 1E-9) - start_time;
Matrix mdelta_cholesky_multi = CholeskyLLTMulti.solve_single(jty, LTriangle);
starts[6] = (((double) System.nanoTime()) * 1E-9) - start_time;
Matrix mdelta_cholesky_block = choleskyBlock.solve(jty);
starts[7] = (((double) System.nanoTime()) * 1E-9) - start_time;
System.out.println("testCholesky(): block_size= "+block_size);
System.out.println("testCholesky(): choleskyDecomposition: "+(starts[0])+" sec");
System.out.println("testCholesky(): choleskyLLTMulti_single:"+(starts[1]-starts[0])+" sec");
System.out.println("testCholesky(): CholeskyBlock(): "+ (starts[2]-starts[1])+" sec");
System.out.println("testCholesky(): choleskyBlockMulti(): "+(starts[3]-starts[2])+" sec");
System.out.println("testCholesky(): CholeskyBlock(): "+(starts[3]-starts[1])+" sec");
// System.out.println("testCholesky(): choleskyBlockMulti(): "+(starts[3]-starts[2])+" sec");
System.out.println("testCholesky(): get_LTriangle(): "+(starts[4]-starts[3])+" sec");
System.out.println("testCholesky(): solve_single(,single): "+(starts[5]-starts[4])+" sec");
System.out.println("testCholesky(): solve_single(,block): "+(starts[6]-starts[5])+" sec");
System.out.println("testCholesky(): block.solve(): "+(starts[7]-starts[6])+" sec");
System.out.println("testCholesky(): title= "+title);
System.out.println("testCholesky(): dbg_title= "+dbg_title);
......@@ -373,6 +378,8 @@ public class CholeskyLLTMulti {
starts[6] = (((double) System.nanoTime()) * 1E-9) - start_time;
Matrix mdelta_cholesky_fast = choleskyLLTMulti.solve(jty, 2);
starts[7] = (((double) System.nanoTime()) * 1E-9) - start_time;
System.out.println("testCholesky(): choleskyDecomposition: "+(starts[0])+" sec");
System.out.println("testCholesky(): choleskyLLTMulti_single:"+(starts[1]-starts[0])+" sec");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment