Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
2594c6f4
Commit
2594c6f4
authored
Mar 23, 2025
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Commented code
parent
09a0cb0e
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
176 additions
and
37 deletions
+176
-37
CholeskyBlock.java
src/main/java/com/elphel/imagej/common/CholeskyBlock.java
+176
-37
No files found.
src/main/java/com/elphel/imagej/common/CholeskyBlock.java
View file @
2594c6f4
...
@@ -4,6 +4,22 @@ package com.elphel.imagej.common;
...
@@ -4,6 +4,22 @@ package com.elphel.imagej.common;
**
**
** Copyright (C) 2025 Elphel, Inc.
** Copyright (C) 2025 Elphel, Inc.
**
**
** Using publication with Block Cholesky description:
** Chen, Jianping, et al. "Block algorithm and its implementation for Cholesky factorization."
** ICCGI 2013 (2013): 245.
**
** Data (A and L) are stored as 1D arrays, in columns of specified width (m), in line-scan
** order in each column (right, then down), columns themselves - to the right.
** This makes m x m "tiles" compact in terms of cache, for my computer with the 8-core processor
** and N=2258 matrices the optimal m=70. Multithreaded methods operate on tiles.
** When matrix size is not multiple of m, the last (bottom-right) block has a remainder size,
** other bottom and rightmost are rectangular, all other are m x m.
**
** For the tested processor and matrix size the multithreaded improvement is ~9x, for the
** solve() method ~5x compared to a standard Jama single-threaded CholeskyDecomposition class.
** First run in multithreaded mode results is longer execution time than the subsequent runs.
** End of this file contains measured timing.
**
** -----------------------------------------------------------------------------**
** -----------------------------------------------------------------------------**
**
**
** CholeskyBlock.java is free software: you can redistribute it and/or modify
** CholeskyBlock.java is free software: you can redistribute it and/or modify
...
@@ -23,13 +39,14 @@ package com.elphel.imagej.common;
...
@@ -23,13 +39,14 @@ package com.elphel.imagej.common;
import
java.util.Arrays
;
import
java.util.Arrays
;
import
java.util.concurrent.atomic.AtomicBoolean
;
import
java.util.concurrent.atomic.AtomicBoolean
;
import
java.util.concurrent.atomic.AtomicInteger
;
import
java.util.concurrent.atomic.AtomicInteger
;
import
com.elphel.imagej.tileprocessor.ImageDtt
;
import
Jama.Matrix
;
import
Jama.Matrix
;
public
class
CholeskyBlock
{
public
class
CholeskyBlock
{
// These 3 parameters are public and may be set before using the constructor
public
static
int
dflt_m
=
70
;
// tile size if not specified during initialization
public
static
int
dflt_m
=
70
;
// tile size if not specified during initialization
public
static
int
dflt_threads
=
100
;
// maximal number of threads to use for decomposition
public
static
int
dflt_threads
=
100
;
// maximal number of threads to use for decomposition
public
static
int
dflt_solve_threads
=
16
;
// 4; // maximal number of threads to use for solve()
public
static
int
dflt_solve_threads
=
16
;
// 4; // maximal number of threads to use for solve()
public
static
boolean
debug
=
false
;
// true;
public
static
boolean
debug
=
false
;
// true;
private
final
int
m
;
// tile size
private
final
int
m
;
// tile size
private
final
int
np
;
// number of elements in row/col
private
final
int
np
;
// number of elements in row/col
...
@@ -39,7 +56,10 @@ public class CholeskyBlock {
...
@@ -39,7 +56,10 @@ public class CholeskyBlock {
private
final
double
[]
A
;
private
final
double
[]
A
;
private
final
double
[]
L
;
private
final
double
[]
L
;
private
final
double
[]
B
;
private
final
double
[]
B
;
//wjtjlambda_copy1.getArray()
/**
* Create Cholesky factorization from SPD matrix mA
* @param mA - SPD Matrix instance
*/
public
CholeskyBlock
(
Matrix
mA
)
{
public
CholeskyBlock
(
Matrix
mA
)
{
m
=
dflt_m
;
m
=
dflt_m
;
decomp_threads
=
dflt_threads
;
decomp_threads
=
dflt_threads
;
...
@@ -53,6 +73,11 @@ public class CholeskyBlock {
...
@@ -53,6 +73,11 @@ public class CholeskyBlock {
choleskyBlockMulti
();
choleskyBlockMulti
();
}
}
/**
* Create Cholesky factorization from 2D square array representing SPD matrix
* @param A_in - 2D array corresponding to SPD
*/
public
CholeskyBlock
(
public
CholeskyBlock
(
double
[][]
A_in
)
{
double
[][]
A_in
)
{
m
=
dflt_m
;
m
=
dflt_m
;
...
@@ -67,6 +92,11 @@ public class CholeskyBlock {
...
@@ -67,6 +92,11 @@ public class CholeskyBlock {
choleskyBlockMulti
();
choleskyBlockMulti
();
}
}
/**
* Create Cholesky factorization from 2D square array representing SPD matrix
* @param A_in - 2D array corresponding to SPD
* @param size block size (tested optimal 70 easier debugging with 100)
*/
public
CholeskyBlock
(
public
CholeskyBlock
(
double
[][]
A_in
,
double
[][]
A_in
,
int
size
)
{
int
size
)
{
...
@@ -82,6 +112,12 @@ public class CholeskyBlock {
...
@@ -82,6 +112,12 @@ public class CholeskyBlock {
choleskyBlockMulti
();
choleskyBlockMulti
();
}
}
/**
* Create Cholesky factorization from 2D square array representing SPD matrix
* @param A_in - 2D array corresponding to SPD
* @param size block size (tested optimal 70 easier debugging with 100)
* @param decomp_threads specify number of decomposition threads
*/
public
CholeskyBlock
(
public
CholeskyBlock
(
double
[][]
A_in
,
double
[][]
A_in
,
int
size
,
int
size
,
...
@@ -98,6 +134,11 @@ public class CholeskyBlock {
...
@@ -98,6 +134,11 @@ public class CholeskyBlock {
choleskyBlockMulti
();
choleskyBlockMulti
();
}
}
/**
* Set the internal data Array A stored as 1D, linescan in columns from a square SPD array
* @param A_in square SPD array (only lower triangle with the diagonal are used)
*/
private
void
setup_ATriangle
(
double
[][]
A_in
)
{
private
void
setup_ATriangle
(
double
[][]
A_in
)
{
for
(
int
tile_row
=
0
;
tile_row
<
nf
;
tile_row
++)
{
for
(
int
tile_row
=
0
;
tile_row
<
nf
;
tile_row
++)
{
for
(
int
tile_col
=
0
;
tile_col
<
tile_row
;
tile_col
++)
{
for
(
int
tile_col
=
0
;
tile_col
<
tile_row
;
tile_col
++)
{
...
@@ -150,21 +191,19 @@ public class CholeskyBlock {
...
@@ -150,21 +191,19 @@ public class CholeskyBlock {
}
}
}
}
/**
* Get L lower triangular matrix of Cholesky facterization from the internal column-line-scan
* representation.
* @return lower triangular Cholesky Matrix
*/
public
Matrix
getL
()
{
public
Matrix
getL
()
{
return
new
Matrix
(
get_LTriangle
(),
np
,
np
);
return
new
Matrix
(
get_LTriangle
(),
np
,
np
);
}
}
public
void
setB
(
Matrix
mb
)
{
/**
double
[][]
a
=
mb
.
getArray
();
* Get internal Cholesky lower-left matrix as a 2d square array
for
(
int
i
=
0
;
i
<
np
;
i
++)
{
* @return 2d square array containing Cholesky low-left matrix data
B
[
i
]
=
a
[
i
][
0
];
*/
}
}
public
Matrix
getX
()
{
return
new
Matrix
(
B
,
np
);
}
private
double
[][]
get_LTriangle
()
{
private
double
[][]
get_LTriangle
()
{
double
[][]
L_out
=
new
double
[
np
][
np
];
double
[][]
L_out
=
new
double
[
np
][
np
];
for
(
int
tile_row
=
0
;
tile_row
<
nf
;
tile_row
++)
{
for
(
int
tile_row
=
0
;
tile_row
<
nf
;
tile_row
++)
{
...
@@ -217,7 +256,27 @@ public class CholeskyBlock {
...
@@ -217,7 +256,27 @@ public class CholeskyBlock {
}
}
}
}
return
L_out
;
return
L_out
;
}
/**
* Set internal B/Y/X single-column matrix. Internal representation changes from B to Y to X (solution)
* @param mb right-side single-column Matrix (not modified)
*/
public
void
setB
(
Matrix
mb
)
{
double
[][]
a
=
mb
.
getArray
();
for
(
int
i
=
0
;
i
<
np
;
i
++)
{
B
[
i
]
=
a
[
i
][
0
];
}
}
/**
* Get internal B/Y/X single-column matrix. Internal representation changes from B to Y to X (solution)
* @return a single-column Matrix containing the solution
*/
public
Matrix
getX
()
{
return
new
Matrix
(
B
,
np
);
}
}
/**
/**
...
@@ -230,6 +289,11 @@ public class CholeskyBlock {
...
@@ -230,6 +289,11 @@ public class CholeskyBlock {
return
j
*
(
m
*
np
)
+
((
j
>=
nf
)
?
(
np
-
nf
*
m
):
m
)
*
m
*
i
;
return
j
*
(
m
*
np
)
+
((
j
>=
nf
)
?
(
np
-
nf
*
m
):
m
)
*
m
*
i
;
}
}
/**
* Calculate non-diagonal tiles of the block-Cholesky factorization
* @param i - tile row
* @param j - tile column < i (tile row)
*/
private
void
setL21
(
private
void
setL21
(
int
i
,
// i > j,
int
i
,
// i > j,
int
j
)
{
// j <nf
int
j
)
{
// j <nf
...
@@ -251,6 +315,13 @@ public class CholeskyBlock {
...
@@ -251,6 +315,13 @@ public class CholeskyBlock {
return
;
return
;
}
}
/**
* Convert remaining tiles of A-matrix (only in lower-left triangle of tiles, including diagonal ones)
* @param diag row/column of the diagonal tile that defines the remainder of the A matrix (to the right
* and below the diagonal tile
* @param row row of the transformed A tile (> diag)
* @param col column of the transformed A tile (> diag, <= row)
*/
private
void
setA22
(
private
void
setA22
(
int
diag
,
// < col, < row
int
diag
,
// < col, < row
int
row
,
// >= col
int
row
,
// >= col
...
@@ -279,8 +350,11 @@ public class CholeskyBlock {
...
@@ -279,8 +350,11 @@ public class CholeskyBlock {
return
;
return
;
}
}
/**
* Run multithreaded Cholosky factorization, convert internal representation of the A-data to L-data
*/
private
void
choleskyBlockMulti
()
{
private
void
choleskyBlockMulti
()
{
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
decomp_threads
);
// 1);
final
Thread
[]
threads
=
newThreadArray
(
decomp_threads
);
// 1);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
cholesky_single
(
0
);
cholesky_single
(
0
);
// Calculate first column under diagonal (L21) - maybe use m
// Calculate first column under diagonal (L21) - maybe use m
...
@@ -294,8 +368,7 @@ public class CholeskyBlock {
...
@@ -294,8 +368,7 @@ public class CholeskyBlock {
}
}
};
};
}
}
ImageDtt
.
startAndJoin
(
threads
);
startAndJoin
(
threads
);
for
(
int
tile_diag
=
1
;
tile_diag
<
n
;
tile_diag
++)
{
for
(
int
tile_diag
=
1
;
tile_diag
<
n
;
tile_diag
++)
{
final
int
ftile_diag
=
tile_diag
;
final
int
ftile_diag
=
tile_diag
;
// Calculate A in one tile column of the remaining A2'
// Calculate A in one tile column of the remaining A2'
...
@@ -320,7 +393,7 @@ public class CholeskyBlock {
...
@@ -320,7 +393,7 @@ public class CholeskyBlock {
}
}
};
};
}
}
ImageDtt
.
startAndJoin
(
threads
);
startAndJoin
(
threads
);
if
(
ftile_diag
<
(
n
-
1
))
{
if
(
ftile_diag
<
(
n
-
1
))
{
// Now in parallel calculate L in column ftile_diag under diagonal and
// Now in parallel calculate L in column ftile_diag under diagonal and
// finish A2' to the right of ftile_diag column
// finish A2' to the right of ftile_diag column
...
@@ -354,15 +427,18 @@ public class CholeskyBlock {
...
@@ -354,15 +427,18 @@ public class CholeskyBlock {
}
}
};
};
}
}
ImageDtt
.
startAndJoin
(
threads
);
startAndJoin
(
threads
);
}
}
}
}
return
;
return
;
}
}
// Single-threaded, used for a single-tile Cholesky deconstruction
// Single-threaded, used for a single-tile Cholesky deconstruction
/**
* A single-threaded, single-tile Cholesky factorization converting diagonal A-tile to
* the corresponding diagonal L-tile
* @param diag row/column of the tile to convert.
*/
private
void
cholesky_single
(
int
diag
)
{
private
void
cholesky_single
(
int
diag
)
{
int
h
=
(
diag
<
nf
)
?
m
:
(
np
-
nf
*
m
);
int
h
=
(
diag
<
nf
)
?
m
:
(
np
-
nf
*
m
);
int
indx
=
indx_IJ
(
diag
,
diag
);
int
indx
=
indx_IJ
(
diag
,
diag
);
...
@@ -388,14 +464,14 @@ public class CholeskyBlock {
...
@@ -388,14 +464,14 @@ public class CholeskyBlock {
/**
/**
* B has data, will be modified
* Solve L * Y = B for a single-tile data. B has data, will be modified
* to contain Y
* @param diag number of the diagonal tile (last may be smaller)
* @param diag number of the diagonal tile (last may be smaller)
*/
*/
private
void
solveY
(
int
diag
)
{
private
void
solveY
(
int
diag
)
{
int
h
=
(
diag
<
nf
)
?
m
:
(
np
-
nf
*
m
);
int
h
=
(
diag
<
nf
)
?
m
:
(
np
-
nf
*
m
);
int
lindx
=
indx_IJ
(
diag
,
diag
);
int
lindx
=
indx_IJ
(
diag
,
diag
);
int
xindx
=
diag
*
m
;
// start of B/x
int
xindx
=
diag
*
m
;
// start of B/x
// Solve L*Y = B;
for
(
int
k
=
0
;
k
<
h
;
k
++)
{
for
(
int
k
=
0
;
k
<
h
;
k
++)
{
double
x
=
B
[
xindx
+
k
];
double
x
=
B
[
xindx
+
k
];
for
(
int
i
=
0
;
i
<
k
;
i
++)
{
for
(
int
i
=
0
;
i
<
k
;
i
++)
{
...
@@ -407,9 +483,9 @@ public class CholeskyBlock {
...
@@ -407,9 +483,9 @@ public class CholeskyBlock {
}
}
/**
/**
* Solve single-tile Lt*X=Y
, tiles are counted from the right-bottom
* Solve single-tile Lt*X=Y
* B has data
, will be modified
* B has data
(Y), will be modified to contain X
* @param diag number of the diagonal tile
from the bottom-right,
* @param diag number of the diagonal tile
* last may be smaller.
* last may be smaller.
*/
*/
private
void
solveX
(
int
diag
)
{
private
void
solveX
(
int
diag
)
{
...
@@ -429,7 +505,7 @@ public class CholeskyBlock {
...
@@ -429,7 +505,7 @@ public class CholeskyBlock {
/**
/**
* Subtract Y-column from running B using Y-data in tile j
* Subtract Y-column from running B using Y-data in tile j
* from the tile i > j
* from the tile i > j
.
* @param i tile row to subtract from in B
* @param i tile row to subtract from in B
* @param j tile row corresponding to diagonal L tiles
* @param j tile row corresponding to diagonal L tiles
*/
*/
...
@@ -452,15 +528,15 @@ public class CholeskyBlock {
...
@@ -452,15 +528,15 @@ public class CholeskyBlock {
/**
/**
* Subtract X-column from running B using X-data in tile j
* Subtract X-column from running B using X-data in tile j
* from the tile i > j
, counting from the right-bottom
* from the tile i > j
.
* @param i tile row
(from the bottom)
to subtract from in B
* @param i tile row to subtract from in B
* @param j tile row corresponding to diagonal L tiles
, from the bottom
* @param j tile row corresponding to diagonal L tiles
*/
*/
private
void
subXCol
(
private
void
subXCol
(
int
i
,
// i < j,
int
i
,
// i < j,
int
j
)
{
// j > i
int
j
)
{
// j > i
int
lindx
=
indx_IJ
(
j
,
i
);
// transposed
int
lindx
=
indx_IJ
(
j
,
i
);
int
xsrc
=
j
*
m
;
// start of B/x to use
int
xsrc
=
j
*
m
;
// start of B/x to use
int
xdst
=
i
*
m
;
// start of B/x to modify
int
xdst
=
i
*
m
;
// start of B/x to modify
int
h
=
(
j
<
nf
)
?
m
:
(
np
-
nf
*
m
);
int
h
=
(
j
<
nf
)
?
m
:
(
np
-
nf
*
m
);
...
@@ -473,20 +549,32 @@ public class CholeskyBlock {
...
@@ -473,20 +549,32 @@ public class CholeskyBlock {
}
}
return
;
return
;
}
}
/**
* Solve with existing Cholesky decomposition for a single-column Matrix b
* @param b a single-column matrix B that L * Lt * X = B
* @return A single-column solution X
*/
public
Matrix
solve
(
Matrix
b
)
{
public
Matrix
solve
(
Matrix
b
)
{
setB
(
b
);
setB
(
b
);
solve
();
solve
();
return
getX
();
return
getX
();
}
}
/**
* Solve with existing Cholesky decomposition for an internal representation of matrix B,
* replace B data with X data
*/
public
void
solve
()
{
public
void
solve
()
{
solve
(
dflt_solve_threads
);
// this.solve_threads);
solve
(
dflt_solve_threads
);
// this.solve_threads);
}
}
/**
* Solve with existing Cholesky decomposition for an internal representation of matrix B,
* replace B data with X data, limit number of threads to use.
* @param solve_threads maximal number of threads to use
*/
public
void
solve
(
int
solve_threads
)
{
public
void
solve
(
int
solve_threads
)
{
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
solve_threads
);
final
Thread
[]
threads
=
newThreadArray
(
solve_threads
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
AtomicBoolean
[][]
tile_started
=
new
AtomicBoolean
[
n
][
n
];
AtomicBoolean
[][]
tile_started
=
new
AtomicBoolean
[
n
][
n
];
AtomicBoolean
[][]
tile_done
=
new
AtomicBoolean
[
n
][
n
];
AtomicBoolean
[][]
tile_done
=
new
AtomicBoolean
[
n
][
n
];
...
@@ -554,7 +642,7 @@ public class CholeskyBlock {
...
@@ -554,7 +642,7 @@ public class CholeskyBlock {
}
}
};
};
}
}
ImageDtt
.
startAndJoin
(
threads
);
startAndJoin
(
threads
);
// Solve L'X = Y
// Solve L'X = Y
rows_done
.
set
(
0
);
// this number of tile rows solved
rows_done
.
set
(
0
);
// this number of tile rows solved
for
(
int
row
=
0
;
row
<
n
;
row
++)
{
for
(
int
row
=
0
;
row
<
n
;
row
++)
{
...
@@ -611,7 +699,7 @@ public class CholeskyBlock {
...
@@ -611,7 +699,7 @@ public class CholeskyBlock {
}
}
};
};
}
}
ImageDtt
.
startAndJoin
(
threads
);
startAndJoin
(
threads
);
starts
[
4
]
=
(((
double
)
System
.
nanoTime
())
*
1
E
-
9
)
-
start_time
;
starts
[
4
]
=
(((
double
)
System
.
nanoTime
())
*
1
E
-
9
)
-
start_time
;
if
(
debug
)
{
if
(
debug
)
{
System
.
out
.
println
(
"\ncholeskyBlock.solve(): ==== number of threads = "
+
threads
.
length
+
"===="
);
System
.
out
.
println
(
"\ncholeskyBlock.solve(): ==== number of threads = "
+
threads
.
length
+
"===="
);
...
@@ -624,6 +712,12 @@ public class CholeskyBlock {
...
@@ -624,6 +712,12 @@ public class CholeskyBlock {
}
}
// for comparison/verification
// for comparison/verification
/**
* Standard single-threaded solution with Cholesky decomposition from Jama used for comparison.
* @param B A single-column Matrix for the right side of the equation A * X = L * Lt * X = B
* @param L Cholesky decomposition lower-left Matrix
* @return a single-column solution X.
*/
public
static
Matrix
solve_single
(
Matrix
B
,
double
[][]
L
)
{
public
static
Matrix
solve_single
(
Matrix
B
,
double
[][]
L
)
{
int
n
=
L
.
length
;
int
n
=
L
.
length
;
if
(
B
.
getRowDimension
()
!=
n
)
{
if
(
B
.
getRowDimension
()
!=
n
)
{
...
@@ -647,6 +741,51 @@ public class CholeskyBlock {
...
@@ -647,6 +741,51 @@ public class CholeskyBlock {
}
}
return
new
Matrix
(
x
,
n
);
return
new
Matrix
(
x
,
n
);
}
}
/*
* Multithreading methods from Stephan Preibisch's Multithreading.java class. See:
* http://repo.or.cz/w/trakem2.git?a=blob;f=mpi/fruitfly/general/MultiThreading.java;hb=HEAD
*/
/**
* Create a Thread[] array as large as the number of processors available.
* @return an array of threads
*/
public
static
Thread
[]
newThreadArray
()
{
return
newThreadArray
(
100
);
}
/**
* Create a Thread[] array as large as the number of processors available.
* @param maxCPUs limit number of threads
* @return an array of threads
*/
public
static
Thread
[]
newThreadArray
(
int
maxCPUs
)
{
// USED in lwir
int
n_cpus
=
Runtime
.
getRuntime
().
availableProcessors
();
if
(
n_cpus
>
maxCPUs
)
n_cpus
=
maxCPUs
;
return
new
Thread
[
n_cpus
];
}
/**
* Start all given threads and wait on each of them until all are done.
* @param threads to start
*/
public
static
void
startAndJoin
(
Thread
[]
threads
)
// USED in lwir
{
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
++
ithread
)
{
threads
[
ithread
].
setPriority
(
Thread
.
NORM_PRIORITY
);
threads
[
ithread
].
start
();
}
try
{
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
++
ithread
)
threads
[
ithread
].
join
();
}
catch
(
InterruptedException
ie
)
{
throw
new
RuntimeException
(
ie
);
}
}
/* Results
/* Results
First run, next MULTITHREADED ones are faster. Ran on an 8-core x86, 16 threads
First run, next MULTITHREADED ones are faster. Ran on an 8-core x86, 16 threads
choleskyBlockMulti() last pass n= 33, tile_diag=32
choleskyBlockMulti() last pass n= 33, tile_diag=32
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment