Commit e27934db authored by Andrey Filippov's avatar Andrey Filippov

Finished solve, but it is somewhat slower than single

parent 4a7ba41b
...@@ -48,8 +48,7 @@ public class CholeskyBlock { ...@@ -48,8 +48,7 @@ public class CholeskyBlock {
L = new double [A.length]; L = new double [A.length];
B = new double [np]; B = new double [np];
setup_ATriangle(A_in); setup_ATriangle(A_in);
// Add convertion here later after done with time measurements choleskyBlockMulti();
// choleskyBlockMulti()
} }
private void setup_ATriangle(double [][] A_in) { private void setup_ATriangle(double [][] A_in) {
...@@ -376,6 +375,7 @@ public class CholeskyBlock { ...@@ -376,6 +375,7 @@ public class CholeskyBlock {
B[xindx+k] = x/L[lindx + (h+1)*k]; B[xindx+k] = x/L[lindx + (h+1)*k];
// x[k] /= L[k][k]; // x[k] /= L[k][k];
} }
return;
} }
/** /**
...@@ -387,58 +387,82 @@ public class CholeskyBlock { ...@@ -387,58 +387,82 @@ public class CholeskyBlock {
public void solveX(int diag) { public void solveX(int diag) {
int h = (diag < nf) ? m : (np-nf*m); int h = (diag < nf) ? m : (np-nf*m);
int lindx = indx_IJ(diag, diag); int lindx = indx_IJ(diag, diag);
int xindx = (np - 1) - diag * m; // start of B/x int xindx = diag * m; // start of B/x
// Solve L'*X = Y; // Solve L'*X = Y;
for (int k = 0; k < h; k++) { for (int k = h-1; k >= 0; k--) {
double x = B[xindx - k]; double x = B[xindx + k];
for (int i = 0; i < k ; i++) { for (int i = k+1; i < h ; i++) {
x -= B[xindx - i] * L[lindx - k - h * i]; x -= B[xindx + i] * L[lindx + h*i + k];
} }
B[xindx-k] = x/L[lindx - (h+1)*k]; B[xindx+k] = x/L[lindx + (h+1)*k];
} }
return;
} }
/** /**
* Subtract Y-column from running B using Y-data in tile j * Subtract Y-column from running B using Y-data in tile j
* from the tile i > j * from the tile i > j
* @param i tile row corresponding to diagonal L tiles * @param i tile row to subtract from in B
* @param j tile row to subtract from in B * @param j tile row corresponding to diagonal L tiles
*/ */
public void subYCol( public void subYCol(
int i, // i > j, int i, // i > j,
int j) { // j < i int j) { // j < i
int lindx = indx_IJ(i,j); int lindx = indx_IJ(i,j);
int xindx = j * m; // start of B/x int xsrc = j * m; // start of B/x to use
int xdst = i * m; // start of B/x to modify
int h = (i < nf) ? m : (np-nf*m); int h = (i < nf) ? m : (np-nf*m);
for (int k = 0; k < h; k++) { for (int k = 0; k < h; k++) {
double x = B[xindx+k]; double x = B[xdst + k];
for (int l = 0; l < m; l++) { for (int l = 0; l < m; l++) {
x -= L[lindx + k * m + l] * B[xindx+l]; x -= L[lindx + k * m + l] * B[xsrc+l];
} }
B[xindx+k] = x; B[xdst + k] = x;
} }
return;
} }
/** /**
* Subtract X-column from running B using X-data in tile j * Subtract X-column from running B using X-data in tile j
* from the tile i > j, counting from the right-bottom * from the tile i > j, counting from the right-bottom
* @param i tile row corresponding to diagonal L tiles, from the bottom * @param i tile row (from the bottom) to subtract from in B
* @param j tile row (from the bottom) to subtract from in B * @param j tile row corresponding to diagonal L tiles, from the bottom
*/ */
public void subXCol( public void subXCol(
int i, // i < j,
int j) { // j > i
int lindx = indx_IJ(j,i); // transposed
int xsrc = j * m; // start of B/x to use
int xdst = i * m; // start of B/x to modify
int h = (j < nf) ? m : (np-nf*m);
for (int k = 0; k < m; k++) {
double x = B[xdst + k];
for (int l = 0; l < h; l++) {
x -= L[lindx + l * m + k] * B[xsrc + l];
}
B[xdst + k] = x;
}
return;
}
public void subXCol0(
int i, // i > j, int i, // i > j,
int j) { // j < i int j) { // j < i
int lindx = rindx_IJ(i,j); int lindx = rindx_IJ(i,j);
int xindx = np - 1 - j * m; // start of B/x int xsrc = np - 1 - j * m; // start of B/x to use
int xdst = np - 1 - i * m; // start of B/x to modify
int h = (i < nf) ? m : (np-nf*m); int h = (i < nf) ? m : (np-nf*m);
for (int k = 0; k < h; k++) { for (int k = 0; k < h; k++) {
double x = B[xindx-k]; double x = B[xdst - k];
for (int l = 0; l < m; l++) { for (int l = 0; l < m; l++) {
x -= L[lindx - l * m - k] * B[xindx-l]; x -= L[lindx - l * m - k] * B[xsrc - l];
} }
B[xindx-k] = x; B[xdst - k] = x;
} }
return;
} }
public Matrix solve (Matrix b) { public Matrix solve (Matrix b) {
...@@ -453,12 +477,12 @@ public class CholeskyBlock { ...@@ -453,12 +477,12 @@ public class CholeskyBlock {
// L, B are set up // L, B are set up
// convert top fragment of B->Y using top-left tile of L // convert top fragment of B->Y using top-left tile of L
solveY(0); // int diag) solveY(0); // int diag)
for (int tile_diag = 1; tile_diag < n; tile_diag++) { for (int tile_diag = 0; tile_diag < (n -1); tile_diag++) {
final int ftile_diag = tile_diag; final int ftile_diag = tile_diag;
// subtract vertical column of tiles for top (previous) segment of B->Y from the remaining // subtract vertical column of tiles for top (previous) segment of B->Y from the remaining
// segments. After subtracting from the top segment, immediately calculate Y for it, all // segments. After subtracting from the top segment, immediately calculate Y for it, all
// other segments subtract in parallel // other segments subtract in parallel
ai.set(tile_diag); // start from the second tile segment ai.set(tile_diag+1); // start from the second tile segment
for (int ithread = 0; ithread < threads.length; ithread++) { // first sum for pairs for (int ithread = 0; ithread < threads.length; ithread++) { // first sum for pairs
threads[ithread] = new Thread() { threads[ithread] = new Thread() {
public void run() { public void run() {
...@@ -466,8 +490,8 @@ public class CholeskyBlock { ...@@ -466,8 +490,8 @@ public class CholeskyBlock {
subYCol( subYCol(
tile_seg, // int i, // i > j, tile_seg, // int i, // i > j,
ftile_diag); // int j) ftile_diag); // int j)
if (tile_seg == ftile_diag) { if (tile_seg == (ftile_diag+1)) {
solveY(ftile_diag); // int diag) solveY(tile_seg); // int diag)
} }
} }
} }
...@@ -476,22 +500,22 @@ public class CholeskyBlock { ...@@ -476,22 +500,22 @@ public class CholeskyBlock {
ImageDtt.startAndJoin(threads); ImageDtt.startAndJoin(threads);
} }
// Solve L'X = Y // Solve L'X = Y
solveX(0); // int diag) solveX(n-1); // int diag)
for (int tile_diag = 1; tile_diag < n; tile_diag++) { for (int tile_diag = n-1; tile_diag > 0; tile_diag--) {
final int ftile_diag = tile_diag; final int ftile_diag = tile_diag;
// subtract vertical column of tiles for top (previous) segment of X->Y from the remaining // subtract vertical column of tiles for top (previous) segment of X->Y from the remaining
// segments. After subtracting from the top segment, immediately calculate Y for it, all // segments. After subtracting from the top segment, immediately calculate Y for it, all
// other segments subtract in parallel // other segments subtract in parallel
ai.set(tile_diag); // start from the second tile segment ai.set(tile_diag - 1); // start from the second tile segment
for (int ithread = 0; ithread < threads.length; ithread++) { // first sum for pairs for (int ithread = 0; ithread < threads.length; ithread++) { // first sum for pairs
threads[ithread] = new Thread() { threads[ithread] = new Thread() {
public void run() { public void run() {
for (int tile_seg = ai.getAndIncrement(); tile_seg < n; tile_seg = ai.getAndIncrement()) { for (int tile_seg = ai.getAndDecrement(); tile_seg >= 0; tile_seg = ai.getAndDecrement()) {
subXCol( subXCol(
tile_seg, // int i, // i > j, tile_seg, // int i, // i > j,
ftile_diag); // int j) ftile_diag); // int j)
if (tile_seg == ftile_diag) { if (tile_seg == (ftile_diag - 1)) {
solveX(ftile_diag); // int diag) solveX(tile_seg); // int diag)
} }
} }
} }
......
...@@ -293,7 +293,7 @@ public class CholeskyLLTMulti { ...@@ -293,7 +293,7 @@ public class CholeskyLLTMulti {
Matrix wjtjlambda_copy1 = new Matrix(wjtjlambda.getArrayCopy()); Matrix wjtjlambda_copy1 = new Matrix(wjtjlambda.getArrayCopy());
Matrix wjtjlambda_copy2 = new Matrix(wjtjlambda.getArrayCopy()); Matrix wjtjlambda_copy2 = new Matrix(wjtjlambda.getArrayCopy());
Matrix wjtjlambda_copy = new Matrix(wjtjlambda.getArrayCopy()); Matrix wjtjlambda_copy = new Matrix(wjtjlambda.getArrayCopy());
double [] starts = new double[7]; double [] starts = new double[8];
double start_time = (((double) System.nanoTime()) * 1E-9); double start_time = (((double) System.nanoTime()) * 1E-9);
CholeskyDecomposition choleskyDecomposition = new CholeskyDecomposition(wjtjlambda_copy); CholeskyDecomposition choleskyDecomposition = new CholeskyDecomposition(wjtjlambda_copy);
...@@ -301,8 +301,8 @@ public class CholeskyLLTMulti { ...@@ -301,8 +301,8 @@ public class CholeskyLLTMulti {
CholeskyLLTMulti choleskyLLTMulti_single = new CholeskyLLTMulti(wjtjlambda_copy0,0); CholeskyLLTMulti choleskyLLTMulti_single = new CholeskyLLTMulti(wjtjlambda_copy0,0);
starts[1] = (((double) System.nanoTime()) * 1E-9) - start_time; starts[1] = (((double) System.nanoTime()) * 1E-9) - start_time;
CholeskyBlock choleskyBlock = new CholeskyBlock(wjtjlambda_copy1.getArray(),block_size); CholeskyBlock choleskyBlock = new CholeskyBlock(wjtjlambda_copy1.getArray(),block_size);
starts[2] = (((double) System.nanoTime()) * 1E-9) - start_time; // starts[2] = (((double) System.nanoTime()) * 1E-9) - start_time;
choleskyBlock.choleskyBlockMulti(); // choleskyBlock.choleskyBlockMulti();
starts[3] = (((double) System.nanoTime()) * 1E-9) - start_time; starts[3] = (((double) System.nanoTime()) * 1E-9) - start_time;
double [][] LTriangle = choleskyBlock.get_LTriangle(); double [][] LTriangle = choleskyBlock.get_LTriangle();
Matrix mdelta_cholesky = choleskyDecomposition.solve(jty); Matrix mdelta_cholesky = choleskyDecomposition.solve(jty);
...@@ -311,15 +311,20 @@ public class CholeskyLLTMulti { ...@@ -311,15 +311,20 @@ public class CholeskyLLTMulti {
starts[5] = (((double) System.nanoTime()) * 1E-9) - start_time; starts[5] = (((double) System.nanoTime()) * 1E-9) - start_time;
Matrix mdelta_cholesky_multi = CholeskyLLTMulti.solve_single(jty, LTriangle); Matrix mdelta_cholesky_multi = CholeskyLLTMulti.solve_single(jty, LTriangle);
starts[6] = (((double) System.nanoTime()) * 1E-9) - start_time; starts[6] = (((double) System.nanoTime()) * 1E-9) - start_time;
Matrix mdelta_cholesky_block = choleskyBlock.solve(jty);
starts[7] = (((double) System.nanoTime()) * 1E-9) - start_time;
System.out.println("testCholesky(): block_size= "+block_size); System.out.println("testCholesky(): block_size= "+block_size);
System.out.println("testCholesky(): choleskyDecomposition: "+(starts[0])+" sec"); System.out.println("testCholesky(): choleskyDecomposition: "+(starts[0])+" sec");
System.out.println("testCholesky(): choleskyLLTMulti_single:"+(starts[1]-starts[0])+" sec"); System.out.println("testCholesky(): choleskyLLTMulti_single:"+(starts[1]-starts[0])+" sec");
System.out.println("testCholesky(): CholeskyBlock(): "+ (starts[2]-starts[1])+" sec"); System.out.println("testCholesky(): CholeskyBlock(): "+(starts[3]-starts[1])+" sec");
System.out.println("testCholesky(): choleskyBlockMulti(): "+(starts[3]-starts[2])+" sec"); // System.out.println("testCholesky(): choleskyBlockMulti(): "+(starts[3]-starts[2])+" sec");
System.out.println("testCholesky(): get_LTriangle(): "+(starts[4]-starts[3])+" sec"); System.out.println("testCholesky(): get_LTriangle(): "+(starts[4]-starts[3])+" sec");
System.out.println("testCholesky(): solve_single(,single): "+(starts[5]-starts[4])+" sec"); System.out.println("testCholesky(): solve_single(,single): "+(starts[5]-starts[4])+" sec");
System.out.println("testCholesky(): solve_single(,block): "+(starts[6]-starts[5])+" sec"); System.out.println("testCholesky(): solve_single(,block): "+(starts[6]-starts[5])+" sec");
System.out.println("testCholesky(): block.solve(): "+(starts[7]-starts[6])+" sec");
System.out.println("testCholesky(): title= "+title); System.out.println("testCholesky(): title= "+title);
System.out.println("testCholesky(): dbg_title= "+dbg_title); System.out.println("testCholesky(): dbg_title= "+dbg_title);
...@@ -373,6 +378,8 @@ public class CholeskyLLTMulti { ...@@ -373,6 +378,8 @@ public class CholeskyLLTMulti {
starts[6] = (((double) System.nanoTime()) * 1E-9) - start_time; starts[6] = (((double) System.nanoTime()) * 1E-9) - start_time;
Matrix mdelta_cholesky_fast = choleskyLLTMulti.solve(jty, 2); Matrix mdelta_cholesky_fast = choleskyLLTMulti.solve(jty, 2);
starts[7] = (((double) System.nanoTime()) * 1E-9) - start_time; starts[7] = (((double) System.nanoTime()) * 1E-9) - start_time;
System.out.println("testCholesky(): choleskyDecomposition: "+(starts[0])+" sec"); System.out.println("testCholesky(): choleskyDecomposition: "+(starts[0])+" sec");
System.out.println("testCholesky(): choleskyLLTMulti_single:"+(starts[1]-starts[0])+" sec"); System.out.println("testCholesky(): choleskyLLTMulti_single:"+(starts[1]-starts[0])+" sec");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment