Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
b09025fb
Commit
b09025fb
authored
Mar 22, 2025
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixed multi-threaded solution
parent
e27934db
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
172 additions
and
231 deletions
+172
-231
CholeskyBlock.java
src/main/java/com/elphel/imagej/common/CholeskyBlock.java
+172
-231
No files found.
src/main/java/com/elphel/imagej/common/CholeskyBlock.java
View file @
b09025fb
...
...
@@ -20,27 +20,61 @@ package com.elphel.imagej.common;
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
*/
import
java.util.Arrays
;
import
java.util.concurrent.atomic.AtomicBoolean
;
import
java.util.concurrent.atomic.AtomicInteger
;
import
com.elphel.imagej.tileprocessor.ImageDtt
;
import
Jama.Matrix
;
public
class
CholeskyBlock
{
public
static
int
dflt_m
=
70
;
// tile size if not specified during initialization
public
static
int
dflt_threads
=
100
;
// maximal number of threads to use for decomposition
public
static
int
dflt_solve_threads
=
16
;
// 4; // maximal number of threads to use for solve()
public
static
boolean
debug
=
false
;
// true; // false;
public
final
int
m
;
// tile size
public
final
int
np
;
// number of elements in row/col
public
final
int
n
;
// number of tiles in row/col
public
final
int
nf
;
// number of full rows/cols
public
int
decomp_threads
;
public
final
double
[]
A
;
public
final
double
[]
L
;
public
final
double
[]
B
;
public
CholeskyBlock
(
double
[][]
A_in
)
{
m
=
dflt_m
;
decomp_threads
=
dflt_threads
;
np
=
A_in
.
length
;
nf
=
np
/
m
;
n
=
((
nf
*
m
)
<
np
)?
(
nf
+
1
)
:
nf
;
A
=
new
double
[
np
*
np
];
// [n*n*m*m];
L
=
new
double
[
A
.
length
];
B
=
new
double
[
np
];
setup_ATriangle
(
A_in
);
choleskyBlockMulti
();
}
public
CholeskyBlock
(
double
[][]
A_in
,
int
size
)
{
m
=
size
;
decomp_threads
=
dflt_threads
;
np
=
A_in
.
length
;
nf
=
np
/
m
;
n
=
((
nf
*
m
)
<
np
)?
(
nf
+
1
)
:
nf
;
A
=
new
double
[
np
*
np
];
// [n*n*m*m];
L
=
new
double
[
A
.
length
];
B
=
new
double
[
np
];
setup_ATriangle
(
A_in
);
choleskyBlockMulti
();
}
public
CholeskyBlock
(
double
[][]
A_in
,
int
size
,
int
decomp_threads
)
{
m
=
size
;
this
.
decomp_threads
=
decomp_threads
;
np
=
A_in
.
length
;
nf
=
np
/
m
;
n
=
((
nf
*
m
)
<
np
)?
(
nf
+
1
)
:
nf
;
...
...
@@ -183,22 +217,6 @@ public class CholeskyBlock {
return
j
*
(
m
*
np
)
+
((
j
>=
nf
)
?
(
np
-
nf
*
m
):
m
)
*
m
*
i
;
}
/**
* Tile bottom-right corner, counting i - up, j - to the left.
* Full tile are on the bottom and right (partial - on the top
* and left
* @param i tile row from bottom
* @param j tile column from right
* @return index of the bottom-right corner of the tile (the largest!)
*/
public
int
rindx_IJ
(
int
i
,
int
j
)
{
// upside down
// return (j * (m * np)) + (np-1 -i) * ((j >= nf) ?(np-nf*m): m);
// return (np*np-1) - indx_IJ(i, j);
return
np
*
np
-
1
-
(
j
*
(
m
*
np
)
+
((
j
>=
nf
)
?
(
np
-
nf
*
m
):
m
)
*
m
*
i
);
}
public
void
setL21
(
int
i
,
// i > j,
int
j
)
{
// j <nf
...
...
@@ -249,7 +267,7 @@ public class CholeskyBlock {
}
public
void
choleskyBlockMulti
()
{
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
();
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
decomp_threads
);
// 1
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
cholesky_single
(
0
);
// Calculate first column under diagonal (L21) - maybe use m
...
...
@@ -330,8 +348,7 @@ public class CholeskyBlock {
}
// Cholesky-Banachiewicz Algorithm ?
// Single-threaded
// Single-threaded, used for a single-tile Cholesky deconstruction
public
void
cholesky_single
(
int
diag
)
{
int
h
=
(
diag
<
nf
)
?
m
:
(
np
-
nf
*
m
);
...
...
@@ -370,10 +387,8 @@ public class CholeskyBlock {
double
x
=
B
[
xindx
+
k
];
for
(
int
i
=
0
;
i
<
k
;
i
++)
{
x
-=
B
[
xindx
+
i
]
*
L
[
lindx
+
h
*
k
+
i
];
// x[k] -= x[i]*L[k][i];
}
B
[
xindx
+
k
]
=
x
/
L
[
lindx
+
(
h
+
1
)*
k
];
// x[k] /= L[k][k];
}
return
;
}
...
...
@@ -447,24 +462,6 @@ public class CholeskyBlock {
}
public
void
subXCol0
(
int
i
,
// i > j,
int
j
)
{
// j < i
int
lindx
=
rindx_IJ
(
i
,
j
);
int
xsrc
=
np
-
1
-
j
*
m
;
// start of B/x to use
int
xdst
=
np
-
1
-
i
*
m
;
// start of B/x to modify
int
h
=
(
i
<
nf
)
?
m
:
(
np
-
nf
*
m
);
for
(
int
k
=
0
;
k
<
h
;
k
++)
{
double
x
=
B
[
xdst
-
k
];
for
(
int
l
=
0
;
l
<
m
;
l
++)
{
x
-=
L
[
lindx
-
l
*
m
-
k
]
*
B
[
xsrc
-
l
];
}
B
[
xdst
-
k
]
=
x
;
}
return
;
}
public
Matrix
solve
(
Matrix
b
)
{
setB
(
b
);
solve
();
...
...
@@ -472,61 +469,147 @@ public class CholeskyBlock {
}
public
void
solve
()
{
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
();
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
// L, B are set up
// convert top fragment of B->Y using top-left tile of L
solveY
(
0
);
// int diag)
for
(
int
tile_diag
=
0
;
tile_diag
<
(
n
-
1
);
tile_diag
++)
{
final
int
ftile_diag
=
tile_diag
;
// subtract vertical column of tiles for top (previous) segment of B->Y from the remaining
// segments. After subtracting from the top segment, immediately calculate Y for it, all
// other segments subtract in parallel
ai
.
set
(
tile_diag
+
1
);
// start from the second tile segment
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
// first sum for pairs
threads
[
ithread
]
=
new
Thread
()
{
public
void
run
()
{
for
(
int
tile_seg
=
ai
.
getAndIncrement
();
tile_seg
<
n
;
tile_seg
=
ai
.
getAndIncrement
())
{
subYCol
(
tile_seg
,
// int i, // i > j,
ftile_diag
);
// int j)
if
(
tile_seg
==
(
ftile_diag
+
1
))
{
solveY
(
tile_seg
);
// int diag)
}
solve
(
dflt_solve_threads
);
// this.solve_threads);
}
public
void
solve
(
int
solve_threads
)
{
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
solve_threads
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
AtomicBoolean
[][]
tile_started
=
new
AtomicBoolean
[
n
][
n
];
AtomicBoolean
[][]
tile_done
=
new
AtomicBoolean
[
n
][
n
];
AtomicInteger
[]
row_len_started
=
new
AtomicInteger
[
n
];
//
AtomicInteger
[]
row_len_done
=
new
AtomicInteger
[
n
];
//
AtomicBoolean
[]
row_busy
=
new
AtomicBoolean
[
n
];
AtomicInteger
rows_done
=
new
AtomicInteger
(
0
);
// this number of tile rows solved
for
(
int
row
=
0
;
row
<
n
;
row
++)
{
for
(
int
col
=
0
;
col
<=
row
;
col
++)
{
tile_started
[
row
][
col
]
=
new
AtomicBoolean
(
false
);
tile_done
[
row
][
col
]
=
new
AtomicBoolean
(
false
);
}
row_len_started
[
row
]
=
new
AtomicInteger
(
0
);
row_len_done
[
row
]
=
new
AtomicInteger
(
0
);
row_busy
[
row
]
=
new
AtomicBoolean
(
false
);
}
double
[]
starts
=
new
double
[
5
];
double
start_time
=
(((
double
)
System
.
nanoTime
())
*
1
E
-
9
);
solveY
(
0
);
// int diag)
tile_started
[
0
][
0
].
set
(
true
);
tile_done
[
0
][
0
].
set
(
true
);
rows_done
.
set
(
1
);
row_len_started
[
0
].
set
(
1
);
row_len_done
[
0
].
set
(
1
);
starts
[
0
]
=
(((
double
)
System
.
nanoTime
())
*
1
E
-
9
)
-
start_time
;
int
[][]
debug_order
=
debug
?
new
int
[
n
][
n
]:
null
;
if
(
debug
)
debug_order
[
0
][
0
]
=
ai
.
getAndIncrement
()+
1
;
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
// first sum for pairs
threads
[
ithread
]
=
new
Thread
()
{
public
void
run
()
{
while
(
rows_done
.
get
()
<
n
)
{
got_tile:
{
for
(
int
row1
=
rows_done
.
get
();
row1
<
n
;
row1
++)
{
// is it ready for diagonal solve?
if
((
row_len_done
[
row1
].
get
()
==
row1
)
&&
!
tile_started
[
row1
][
row1
].
getAndSet
(
true
))
{
solveY
(
row1
);
rows_done
.
set
(
row1
+
1
);
// no race is possible?
if
(
debug
)
debug_order
[
row1
][
row1
]
=
ai
.
getAndIncrement
()+
1
;
break
got_tile
;
}
if
(!
row_busy
[
row1
].
getAndSet
(
true
))
{
// do not write to the same row, avoid races
for
(
int
col1
=
row_len_started
[
row1
].
get
();
(
col1
<
row1
)
&&
(
col1
<
rows_done
.
get
())
;
col1
++)
{
if
(!
tile_started
[
row1
][
col1
].
getAndSet
(
true
))
{
row_len_started
[
row1
].
getAndAccumulate
(
col1
+
1
,
Math:
:
max
);
subYCol
(
row1
,
// int i, // i > j,
col1
);
// int j)
row_busy
[
row1
].
set
(
false
);
// release row
if
(
debug
)
debug_order
[
row1
][
col1
]
=
ai
.
getAndIncrement
()+
1
;
tile_done
[
row1
][
col1
].
set
(
true
);
// verify if all below down to row_len_done are finished, set row_len_done to max
for
(
int
col2
=
row_len_done
[
row1
].
get
();
col2
<
row1
;
col2
++)
{
if
(
tile_done
[
row1
][
col2
].
get
())
{
row_len_done
[
row1
].
getAndAccumulate
(
col2
+
1
,
Math:
:
max
);
}
}
break
got_tile
;
}
}
row_busy
[
row1
].
set
(
false
);
// release row
}
}
break
;
// could not find a tile to process (number of parallels go down) - reduce number of remaining threads
}
}
}
;
}
ImageDtt
.
startAndJoin
(
threads
);
}
}
}
;
}
ImageDtt
.
startAndJoin
(
threads
);
// Solve L'X = Y
solveX
(
n
-
1
);
// int diag)
for
(
int
tile_diag
=
n
-
1
;
tile_diag
>
0
;
tile_diag
--)
{
final
int
ftile_diag
=
tile_diag
;
// subtract vertical column of tiles for top (previous) segment of X->Y from the remaining
// segments. After subtracting from the top segment, immediately calculate Y for it, all
// other segments subtract in parallel
ai
.
set
(
tile_diag
-
1
);
// start from the second tile segment
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
// first sum for pairs
threads
[
ithread
]
=
new
Thread
()
{
public
void
run
()
{
for
(
int
tile_seg
=
ai
.
getAndDecrement
();
tile_seg
>=
0
;
tile_seg
=
ai
.
getAndDecrement
())
{
subXCol
(
tile_seg
,
// int i, // i > j,
ftile_diag
);
// int j)
if
(
tile_seg
==
(
ftile_diag
-
1
))
{
solveX
(
tile_seg
);
// int diag)
}
rows_done
.
set
(
0
);
// this number of tile rows solved
for
(
int
row
=
0
;
row
<
n
;
row
++)
{
for
(
int
col
=
0
;
col
<=
row
;
col
++)
{
tile_started
[
row
][
col
].
set
(
false
);
tile_done
[
row
][
col
].
set
(
false
);
}
row_len_started
[
row
].
set
(
0
);
row_len_done
[
row
].
set
(
0
);
}
starts
[
2
]
=
(((
double
)
System
.
nanoTime
())
*
1
E
-
9
)
-
start_time
;
solveX
(
n
-
1
);
// int diag)
starts
[
3
]
=
(((
double
)
System
.
nanoTime
())
*
1
E
-
9
)
-
start_time
;
tile_started
[
0
][
0
].
set
(
true
);
tile_done
[
0
][
0
].
set
(
true
);
rows_done
.
set
(
1
);
row_len_started
[
0
].
set
(
1
);
row_len_done
[
0
].
set
(
1
);
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
// first sum for pairs
threads
[
ithread
]
=
new
Thread
()
{
public
void
run
()
{
while
(
rows_done
.
get
()
<
n
)
{
got_tile:
{
for
(
int
row1
=
rows_done
.
get
();
row1
<
n
;
row1
++)
{
// is it ready for diagonal solve?
if
((
row_len_done
[
row1
].
get
()
==
row1
)
&&
!
tile_started
[
row1
][
row1
].
getAndSet
(
true
))
{
solveX
(
n
-
1
-
row1
);
rows_done
.
set
(
row1
+
1
);
// no race is possible?
break
got_tile
;
}
if
(!
row_busy
[
row1
].
getAndSet
(
true
))
{
// do not write to the same row, avoid races
for
(
int
col1
=
row_len_started
[
row1
].
get
();
(
col1
<
row1
)
&&
(
col1
<
rows_done
.
get
())
;
col1
++)
{
if
(!
tile_started
[
row1
][
col1
].
getAndSet
(
true
))
{
row_len_started
[
row1
].
getAndAccumulate
(
col1
+
1
,
Math:
:
max
);
subXCol
(
n
-
1
-
row1
,
// int i, // i > j,
n
-
1
-
col1
);
// int j)
row_busy
[
row1
].
set
(
false
);
// release row
tile_done
[
row1
][
col1
].
set
(
true
);
// verify if all below down to row_len_done are finished, set row_len_done to max
for
(
int
col2
=
row_len_done
[
row1
].
get
();
col2
<
row1
;
col2
++)
{
if
(
tile_done
[
row1
][
col2
].
get
())
{
row_len_done
[
row1
].
getAndAccumulate
(
col2
+
1
,
Math:
:
max
);
}
}
break
got_tile
;
}
}
row_busy
[
row1
].
set
(
false
);
// release row
}
}
break
;
// could not find a tile to process (number of parallels go down) - reduce number of remaining threads
}
}
};
}
ImageDtt
.
startAndJoin
(
threads
);
}
};
}
ImageDtt
.
startAndJoin
(
threads
);
starts
[
4
]
=
(((
double
)
System
.
nanoTime
())
*
1
E
-
9
)
-
start_time
;
if
(
debug
)
{
System
.
out
.
println
(
"\ncholeskyBlock.solve1(): ==== number of threads = "
+
threads
.
length
+
"===="
);
System
.
out
.
println
(
String
.
format
(
"choleskyBlock.solve1(): solveY(0) %12.9f sec"
,(
starts
[
0
])));
System
.
out
.
println
(
String
.
format
(
"choleskyBlock.solve1(): solveY_other %12.9f sec- including 1-st column"
,(
starts
[
2
]
-
starts
[
0
])));
System
.
out
.
println
(
String
.
format
(
"choleskyBlock.solve1(): solveX(n-1) %12.9f sec"
,(
starts
[
3
]
-
starts
[
2
])));
System
.
out
.
println
(
String
.
format
(
"choleskyBlock.solve1(): solveX_other %12.9f sec"
,(
starts
[
4
]
-
starts
[
3
])));
}
return
;
}
public
static
Matrix
solve
(
Matrix
B
,
double
[][]
L
)
{
int
n
=
L
.
length
;
if
(
B
.
getRowDimension
()
!=
n
)
{
...
...
@@ -551,146 +634,4 @@ public class CholeskyBlock {
return
new
Matrix
(
x
,
n
);
}
/*
public void setL21(
int i, // i > j,
int j) { // j <nf
int indx_diag = indx_IJ(j,j);
int indx_ij = indx_IJ(i,j);
int h = (i < nf) ? m : (np-nf*m);
// prepare solving Lx = b, copy tile A -> L
System.arraycopy(A, indx_ij, L, indx_ij, m * h);
for (int l_row = 0; l_row < m; l_row++) { // was <m <h!
for (int x_col= 0; x_col < h; x_col++) { // b-vector
int lindx = indx_ij + m * x_col + l_row;
double ls = L[lindx];
for (int l_col = 0; l_col < l_row; l_col++) {
ls -= L[indx_ij + m * x_col + l_col] * L[indx_diag + m* l_row+l_col];
}
L[lindx] = ls/L[indx_diag + (m + 1)* l_row];
}
}
return;
}
*/
// ======================== unused =====================
public
static
double
[][]
cholesky_single
(
double
[][]
a
,
double
[][]
l
)
{
int
n
=
a
.
length
;
for
(
int
j
=
0
;
j
<
n
;
j
++)
{
Arrays
.
fill
(
l
[
j
],
0.0
);
}
// Main loop.
for
(
int
j
=
0
;
j
<
n
;
j
++)
{
double
[]
Lrowj
=
l
[
j
];
double
d
=
0.0
;
for
(
int
k
=
0
;
k
<
j
;
k
++)
{
double
[]
Lrowk
=
l
[
k
];
double
s
=
0.0
;
for
(
int
i
=
0
;
i
<
k
;
i
++)
{
s
+=
Lrowk
[
i
]*
Lrowj
[
i
];
}
Lrowj
[
k
]
=
s
=
(
a
[
j
][
k
]
-
s
)/
l
[
k
][
k
];
d
=
d
+
s
*
s
;
}
d
=
a
[
j
][
j
]
-
d
;
l
[
j
][
j
]
=
Math
.
sqrt
(
Math
.
max
(
d
,
0.0
));
}
return
l
;
}
/**
* Get a new diagonal square submatrix, copy only lower triangle including diagonal
* @param arr A or L packed array (line-scan order for each tile column)
* @param i tile index on the diagonal
* @return new array with tile data (m x m, or smaller for the bottom right)
*/
public
double
[][]
getDiagTriangle
(
double
[]
arr
,
int
i
){
int
l
=
(
i
>=
nf
)
?
(
np
-
nf
*
m
):
m
;
double
[][]
a_diag
=
new
double
[
l
][
l
];
return
getDiagLTriangle
(
arr
,
a_diag
,
i
);
}
/**
* Get a new diagonal square submatrix, copy only lower triangle including diagonal
* @param arr A or L packed array (line-scan order for each tile column)
* @param a_diag - preallocated array (should be smaller if needed)
* @param i tile index on the diagonal
* @return new array with tile data (m x m, or smaller for the bottom right)
*/
public
double
[][]
getDiagLTriangle
(
double
[]
arr
,
double
[][]
a_diag
,
int
i
){
int
indx
=
indx_IJ
(
i
,
i
);
int
l
=
a_diag
.
length
;
for
(
int
k
=
0
;
k
<
l
;
k
++)
{
int
kl
=
k
*
l
;
System
.
arraycopy
(
arr
,
indx
+
kl
,
a_diag
[
k
],
0
,
k
+
1
);
}
return
a_diag
;
}
/**
* Save calculated tile L lower diagonal matrix to a packed array
* @param arr A or L packed array (line-scan order for each tile column)
* @param l_diag lower triangular array with Cholesky decomposition
* @param i tile index on a diagonal
*/
public
void
putDiagLTriangle
(
double
[]
arr
,
double
[][]
l_diag
,
int
i
)
{
int
indx
=
indx_IJ
(
i
,
i
);
int
l
=
l_diag
.
length
;
for
(
int
k
=
0
;
k
<
l
;
k
++)
{
int
kl
=
k
*
l
;
System
.
arraycopy
(
l_diag
[
k
],
0
,
arr
,
indx
+
kl
,
k
+
1
);
}
}
/**
* Get a new diagonal square submatrix
* @param arr A or L packed array (line-scan order for each tile column)
* @param i tile index on the diagonal
* @return new array with tile data (m x m, or smaller for the bottom right)
*/
public
double
[][]
getDiagSquare
(
double
[]
arr
,
int
i
){
int
l
=
(
i
>=
nf
)
?
(
np
-
nf
*
m
):
m
;
double
[][]
a_diag
=
new
double
[
l
][
l
];
return
getDiagSquare
(
arr
,
a_diag
,
i
);
}
/**
* Get a new diagonal square submatrix
* @param arr A or L packed array (line-scan order for each tile column)
* @param a_diag - preallocated array (should be smaller if needed)
* @param i tile index on the diagonal
* @return new array with tile data (m x m, or smaller for the bottom right)
*/
public
double
[][]
getDiagSquare
(
double
[]
arr
,
double
[][]
a_diag
,
int
i
){
int
indx
=
indx_IJ
(
i
,
i
);
int
l
=
a_diag
.
length
;
for
(
int
k
=
0
;
k
<
l
;
k
++)
{
int
kl
=
k
*
l
;
System
.
arraycopy
(
arr
,
indx
+
kl
,
a_diag
[
k
],
0
,
l
);
}
return
a_diag
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment