Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
663a964a
Commit
663a964a
authored
Mar 12, 2025
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Testing parallel Cholesky
parent
54367395
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
481 additions
and
21 deletions
+481
-21
CholeskyLDLTMulti.java
...main/java/com/elphel/imagej/common/CholeskyLDLTMulti.java
+427
-0
VegetationLMA.java
...main/java/com/elphel/imagej/vegetation/VegetationLMA.java
+54
-21
No files found.
src/main/java/com/elphel/imagej/common/CholeskyLDLTMulti.java
0 → 100644
View file @
663a964a
package
com
.
elphel
.
imagej
.
common
;
import
java.util.Arrays
;
import
java.util.concurrent.atomic.AtomicInteger
;
import
com.elphel.imagej.tileprocessor.ImageDtt
;
import
Jama.CholeskyDecomposition
;
import
Jama.Matrix
;
public
class
CholeskyLDLTMulti
{
public
final
int
n
;
public
final
double
[][]
L
;
public
int
solve_step
=
10
;
public
double
thread_scale
=
2.5
;
// split jobs in thread_scale * num_treads chunks
// public double [][] L;
public
CholeskyLDLTMulti
(
Matrix
matA
,
boolean
b
)
{
boolean
debug
=
true
;
int
dbg_j
=
100
;
// 1520;
n
=
matA
.
getRowDimension
();
L
=
matA
.
getArray
();
// will be modified, copy externally if needed
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
();
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
for
(
int
j
=
0
;
j
<
n
;
j
++)
{
if
(
j
==
dbg_j
)
{
System
.
out
.
println
(
"CholeskyLDLTMulti(): j=dbg_j="
+
j
);
}
final
int
fj
=
j
;
for
(
int
k
=
0
;
k
<
j
;
k
++)
{
final
int
fk
=
k
;
L
[
j
][
j
]
-=
L
[
j
][
k
]*
L
[
j
][
k
];
// diagonal
final
double
Ljk
=
L
[
j
][
k
];
// for (int i = j+1; i < n; i++) {
// l[i][j] -= l[i][k]*l[j][k]; // not including diagonal
// }
// parallel
ai
.
set
(
j
+
1
);
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
threads
[
ithread
]
=
new
Thread
()
{
public
void
run
()
{
for
(
int
i
=
ai
.
getAndIncrement
();
i
<
n
;
i
=
ai
.
getAndIncrement
())
{
L
[
i
][
fj
]
-=
L
[
i
][
fk
]*
Ljk
;
}
}
};
}
ImageDtt
.
startAndJoin
(
threads
);
}
// parallel
// for (int i = j+1; i < n; i++) {
// l[i][j] /= l[j][j];
// }
final
double
Ljj
=
L
[
j
][
j
];
ai
.
set
(
j
+
1
);
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
threads
[
ithread
]
=
new
Thread
()
{
public
void
run
()
{
for
(
int
i
=
ai
.
getAndIncrement
();
i
<
n
;
i
=
ai
.
getAndIncrement
())
{
L
[
i
][
fj
]
/=
Ljj
;
}
}
};
}
ImageDtt
.
startAndJoin
(
threads
);
if
(
debug
)
{
System
.
out
.
println
(
"CholeskyLDLTMulti(): j="
+
j
);
}
}
return
;
}
// L overwites strict lower A, d overwrites diagonal A
public
CholeskyLDLTMulti
(
Matrix
matA
,
int
[]
ii
)
{
// boolean debug = true;
int
dbg_j
=
100
;
// 1520;
n
=
matA
.
getRowDimension
();
// not needed
L
=
matA
.
getArray
();
// will be modified, copy externally if needed
final
double
[][]
a
=
matA
.
getArray
();
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
();
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
for
(
int
j
=
0
;
j
<
n
;
j
++)
{
final
int
fj
=
j
;
if
(
j
==
dbg_j
)
{
System
.
out
.
println
(
"CholeskyLDLTMulti(): j=dbg_j="
+
j
);
}
/*
if (j > 0) {
ai.set(0);
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
public void run() {
for (int i = ai.getAndIncrement(); i < fj; i = ai.getAndIncrement()) {
a[fj][fj] -= a[i][i]*a[fj][i]*a[fj][i]; // squared
}
}
};
}
ImageDtt.startAndJoin(threads);
}
*/
for
(
int
i
=
0
;
i
<
j
;
i
++)
{
a
[
j
][
j
]
-=
a
[
i
][
i
]*
a
[
j
][
i
]*
a
[
j
][
i
];
// squared
}
/* for (int i = 0; i < j; i++) {
a[j][j] -= a[i][i]*a[j][i]*a[j][i]; // squared
} */
for
(
int
k
=
j
+
1
;
k
<
n
;
k
++)
{
final
int
fk
=
k
;
/*
if (j > 0) {
ai.set(0);
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
public void run() {
for (int i = ai.getAndIncrement(); i < fj; i = ai.getAndIncrement()) {
a[fk][fj] -= a[i][i]* a[fk][i] * a[fj][i];
}
}
};
}
ImageDtt.startAndJoin(threads);
}
*/
for
(
int
i
=
0
;
i
<
j
;
i
++)
{
a
[
k
][
j
]
-=
a
[
i
][
i
]*
a
[
k
][
i
]
*
a
[
j
][
i
];
}
a
[
k
][
j
]
/=
a
[
j
][
j
];
}
/* for (int k = j+1; k < n; k++) {
for (int i = 0; i < j; i++) {
a[k][j] -= a[i][i]* a[k][i] * a[j][i];
}
a[k][j] /= a[j][j];
} */
}
return
;
}
public
CholeskyLDLTMulti
(
Matrix
matA
,
int
ii
)
{
// single-threaded
n
=
matA
.
getRowDimension
();
// not needed
// L = matA.getArray(); // will be modified, copy externally if needed
final
double
[][]
a
=
matA
.
getArray
();
for
(
int
j
=
0
;
j
<
n
;
j
++)
{
for
(
int
i
=
0
;
i
<
j
;
i
++)
{
a
[
j
][
j
]
-=
a
[
i
][
i
]*
a
[
j
][
i
]*
a
[
j
][
i
];
// squared
}
for
(
int
k
=
j
+
1
;
k
<
n
;
k
++)
{
for
(
int
i
=
0
;
i
<
j
;
i
++)
{
a
[
k
][
j
]
-=
a
[
i
][
i
]*
a
[
k
][
i
]
*
a
[
j
][
i
];
}
a
[
k
][
j
]
/=
a
[
j
][
j
];
}
}
L
=
a
;
return
;
}
public
CholeskyLDLTMulti
(
Matrix
matA
)
{
n
=
matA
.
getRowDimension
();
final
double
[][]
a
=
matA
.
getArray
();
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
();
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
final
AtomicInteger
ati
=
new
AtomicInteger
(
0
);
final
double
[]
ajj_threaded
=
new
double
[
threads
.
length
];
final
int
threads_chunk
=
(
int
)
(
n
/
(
thread_scale
*
threads
.
length
));
for
(
int
j
=
0
;
j
<
n
;
j
++)
{
final
int
fj
=
j
;
final
double
[]
aj
=
a
[
j
];
double
ajj
=
aj
[
j
];
if
(
j
>
threads_chunk
)
{
// use multithreaded
Arrays
.
fill
(
ajj_threaded
,
0.0
);
ai
.
set
(
0
);
ati
.
set
(
0
);
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
// first sum for pairs
threads
[
ithread
]
=
new
Thread
()
{
public
void
run
()
{
int
nthread
=
ati
.
getAndIncrement
();
for
(
int
i0
=
ai
.
getAndAdd
(
threads_chunk
);
i0
<
fj
;
i0
=
ai
.
getAndAdd
(
threads_chunk
))
{
int
i1
=
Math
.
min
(
fj
,
i0
+
threads_chunk
);
for
(
int
i
=
i0
;
i
<
i1
;
i
++)
{
double
aji
=
aj
[
i
];
ajj_threaded
[
nthread
]
+=
a
[
i
][
i
]*
aji
*
aji
;
// squared
}
}
}
};
}
ImageDtt
.
startAndJoin
(
threads
);
for
(
int
nthread
=
0
;
nthread
<
ajj_threaded
.
length
;
nthread
++)
{
ajj
-=
ajj_threaded
[
nthread
];
}
}
else
{
for
(
int
i
=
0
;
i
<
j
;
i
++)
{
// use aj, a, i
double
aji
=
aj
[
i
];
ajj
-=
a
[
i
][
i
]*
aji
*
aji
;
// squared
}
}
aj
[
j
]
=
ajj
;
// TODO: above run single-threaded for small, then multithreaded with post-accumulation of ajj
if
((
n
-
j
+
1
)
>
threads_chunk
)
{
// use multithreaded
ai
.
set
(
j
+
1
);
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
threads
[
ithread
]
=
new
Thread
()
{
public
void
run
()
{
for
(
int
k0
=
ai
.
getAndAdd
(
threads_chunk
);
k0
<
n
;
k0
=
ai
.
getAndAdd
(
threads_chunk
))
{
int
k1
=
Math
.
min
(
n
,
k0
+
threads_chunk
);
for
(
int
k
=
k0
;
k
<
k1
;
k
++)
{
double
[]
ak
=
a
[
k
];
for
(
int
i
=
0
;
i
<
fj
;
i
++)
{
// ak, aj, i,
ak
[
fj
]
-=
a
[
i
][
i
]
*
ak
[
i
]
*
aj
[
i
];
}
ak
[
fj
]
/=
aj
[
fj
];
}
}
}
};
}
ImageDtt
.
startAndJoin
(
threads
);
}
else
{
// use single-threaded
for
(
int
k
=
j
+
1
;
k
<
n
;
k
++)
{
final
double
[]
ak
=
a
[
k
];
for
(
int
i
=
0
;
i
<
j
;
i
++)
{
// ak, aj, i,
ak
[
j
]
-=
a
[
i
][
i
]*
ak
[
i
]
*
aj
[
i
];
}
ak
[
j
]
/=
ajj
;
}
}
}
L
=
a
;
return
;
}
public
CholeskyLDLTMulti
(
Matrix
matA
,
long
ll
)
{
// single-threaded
n
=
matA
.
getRowDimension
();
// not needed
final
double
[][]
a
=
matA
.
getArray
();
for
(
int
j
=
0
;
j
<
n
;
j
++)
{
final
double
[]
aj
=
a
[
j
];
double
ajj
=
aj
[
j
];
for
(
int
i
=
0
;
i
<
j
;
i
++)
{
// use aj, a, i
double
aji
=
aj
[
i
];
// a[j][j] -= a[i][i]*a[j][i]*a[j][i]; // squared
ajj
-=
a
[
i
][
i
]*
aji
*
aji
;
// squared
aj
[
j
]
-=
a
[
i
][
i
]*
aji
*
aji
;
// squared
}
aj
[
j
]
=
ajj
;
for
(
int
k
=
j
+
1
;
k
<
n
;
k
++)
{
final
double
[]
ak
=
a
[
k
];
for
(
int
i
=
0
;
i
<
j
;
i
++)
{
// ak, aj, i,
// a[k][j] -= a[i][i]* a[k][i] * a[j][i]; // use digonal
ak
[
j
]
-=
a
[
i
][
i
]*
ak
[
i
]
*
aj
[
i
];
}
// a[k][j] /= a[j][j];
// ak[j] /= aj[j];
ak
[
j
]
/=
ajj
;
}
}
L
=
a
;
return
;
}
public
Matrix
solve0
(
Matrix
B
)
{
// multithreased
if
(
B
.
getRowDimension
()
!=
n
)
{
throw
new
IllegalArgumentException
(
"Matrix row dimensions must agree."
);
}
// Copy right hand side.
double
[]
x
=
B
.
getColumnPackedCopy
();
// (for single-column)
// Solve L*D*Y = B;
for
(
int
k
=
0
;
k
<
n
;
k
++)
{
for
(
int
i
=
0
;
i
<
k
;
i
++)
{
x
[
k
]
-=
x
[
i
]*
L
[
k
][
i
];
}
}
for
(
int
k
=
0
;
k
<
n
;
k
++)
{
x
[
k
]
/=
L
[
k
][
k
];
}
// Solve L'*X = Y;
for
(
int
k
=
n
-
1
;
k
>=
0
;
k
--)
{
for
(
int
i
=
k
+
1
;
i
<
n
;
i
++)
{
x
[
k
]
-=
x
[
i
]*
L
[
i
][
k
];
}
}
return
new
Matrix
(
x
,
n
);
}
public
Matrix
solve
(
Matrix
B
)
{
if
(
B
.
getRowDimension
()
!=
n
)
{
throw
new
IllegalArgumentException
(
"Matrix row dimensions must agree."
);
}
// Copy right hand side.
// double[][] X = B.getArrayCopy();
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
();
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
final
int
threads_chunk
=
(
int
)
(
n
/
(
thread_scale
*
threads
.
length
));
double
[]
x
=
B
.
getColumnPackedCopy
();
// (for single-column)
// Solve L*D*Y = B;
for
(
int
row0
=
0
;
row0
<
n
;
row0
+=
solve_step
)
{
final
int
frow0
=
row0
;
final
int
frow1
=
Math
.
min
(
row0
+
solve_step
,
n
);
// filling triangle single-threaded
for
(
int
row
=
frow0
;
row
<
frow1
;
row
++)
{
double
[]
l_row
=
L
[
row
];
for
(
int
i
=
frow0
;
i
<
row
;
i
++)
{
x
[
row
]
-=
x
[
i
]*
l_row
[
i
];
}
}
// Filling rectangle parallel
if
(
frow1
<
n
)
{
ai
.
set
(
frow1
);
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
threads
[
ithread
]
=
new
Thread
()
{
public
void
run
()
{
double
[]
l_row
;
for
(
int
row
=
ai
.
getAndAdd
(
threads_chunk
);
row
<
n
;
row
=
ai
.
getAndAdd
(
threads_chunk
))
{
int
row_lim
=
Math
.
min
(
n
,
row
+
threads_chunk
);
for
(;
row
<
row_lim
;
row
++)
{
l_row
=
L
[
row
];
for
(
int
col
=
frow0
;
col
<
frow1
;
col
++)
{
x
[
row
]
-=
x
[
col
]*
l_row
[
col
];
}
}
}
}
};
}
ImageDtt
.
startAndJoin
(
threads
);
}
}
// for (int k = 0; k < n; k++) {
// for (int i = 0; i < k ; i++) {
// x[k] -= x[i]*L[k][i];
// }
// }
// make parallel
ai
.
set
(
0
);
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
threads
[
ithread
]
=
new
Thread
()
{
public
void
run
()
{
for
(
int
row
=
ai
.
getAndAdd
(
threads_chunk
);
row
<
n
;
row
=
ai
.
getAndAdd
(
threads_chunk
))
{
int
row_lim
=
Math
.
min
(
n
,
row
+
threads_chunk
);
for
(;
row
<
row_lim
;
row
++)
{
x
[
row
]
/=
L
[
row
][
row
];
}
}
}
};
}
ImageDtt
.
startAndJoin
(
threads
);
/*
for (int row = 0; row < n; row++) {
x[row] /= L[row][row];
}
*/
// Solve L'*X = Y;
for
(
int
row1
=
n
-
1
;
row1
>
0
;
row1
-=
solve_step
)
{
final
int
frow1
=
row1
;
final
int
frow0
=
Math
.
max
(
row1
-
solve_step
,
0
);
// filling triangle single-threaded
for
(
int
row
=
row1
-
1
;
row
>=
frow0
;
row
--)
{
for
(
int
i
=
row
+
1
;
i
<=
frow1
;
i
++)
{
x
[
row
]
-=
x
[
i
]*
L
[
i
][
row
];
}
}
// Filling rectangle parallel
if
(
frow1
>
0
)
{
ai
.
set
(
0
);
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
threads
[
ithread
]
=
new
Thread
()
{
public
void
run
()
{
for
(
int
row
=
ai
.
getAndAdd
(
threads_chunk
);
row
<
frow0
;
row
=
ai
.
getAndAdd
(
threads_chunk
))
{
int
row_lim
=
Math
.
min
(
frow0
,
row
+
threads_chunk
);
for
(;
row
<
row_lim
;
row
++)
{
for
(
int
col
=
frow0
+
1
;
col
<=
frow1
;
col
++)
{
x
[
row
]
-=
x
[
col
]*
L
[
col
][
row
];
}
}
}
}
};
}
ImageDtt
.
startAndJoin
(
threads
);
}
}
/**
* What was that?
for (int k = n-1; k >= 0; k--) {
for (int i = k+1; i < n ; i++) {
x[k] -= x[i]*L[i][k];
}
// X[k] /= L[k][k];
}
*/
return
new
Matrix
(
x
,
n
);
}
public
static
Matrix
[]
testCholesky
(
Matrix
wjtjlambda
,
Matrix
jty
)
{
Matrix
wjtjlambda_copy0
=
new
Matrix
(
wjtjlambda
.
getArrayCopy
());
CholeskyDecomposition
choleskyDecomposition
=
new
CholeskyDecomposition
(
wjtjlambda_copy0
);
Matrix
wjtjlambda_copy
=
new
Matrix
(
wjtjlambda
.
getArrayCopy
());
CholeskyLDLTMulti
choleskyLDLTMulti
=
new
CholeskyLDLTMulti
(
wjtjlambda_copy
);
Matrix
mdelta_cholesky
=
choleskyDecomposition
.
solve
(
jty
);
Matrix
mdelta_cholesky_multi0
=
choleskyLDLTMulti
.
solve0
(
jty
);
Matrix
mdelta_cholesky_multi
=
choleskyLDLTMulti
.
solve
(
jty
);
return
new
Matrix
[]
{
mdelta_cholesky
,
mdelta_cholesky_multi0
,
mdelta_cholesky_multi
};
}
}
src/main/java/com/elphel/imagej/vegetation/VegetationLMA.java
View file @
663a964a
...
...
@@ -12,6 +12,7 @@ import java.util.HashSet;
import
java.util.concurrent.atomic.AtomicBoolean
;
import
java.util.concurrent.atomic.AtomicInteger
;
import
com.elphel.imagej.common.CholeskyLDLTMulti
;
import
com.elphel.imagej.common.DoubleGaussianBlur
;
import
com.elphel.imagej.common.ShowDoubleFloatArrays
;
import
com.elphel.imagej.jp4.JP46_Reader_camera
;
...
...
@@ -21,6 +22,7 @@ import com.elphel.imagej.tileprocessor.QuadCLT;
import
com.elphel.imagej.tileprocessor.TileNeibs
;
import
com.elphel.imagej.tileprocessor.TileProcessor
;
import
Jama.CholeskyDecomposition
;
import
Jama.Matrix
;
import
ij.IJ
;
import
ij.ImagePlus
;
...
...
@@ -1852,31 +1854,62 @@ public class VegetationLMA {
System
.
out
.
println
(
"JtJ + lambda*diag(JtJ"
);
wjtjlambda
.
print
(
18
,
6
);
}
Matrix
jtjl_inv
=
null
;
try
{
jtjl_inv
=
wjtjlambda
.
inverse
();
// check for errors
}
catch
(
RuntimeException
e
)
{
rslt
[
1
]
=
true
;
if
(
debug_level
>
-
2
)
{
System
.
out
.
println
(
"Singular Matrix!"
);
}
return
rslt
;
}
if
(
debug_level
>
4
)
{
System
.
out
.
println
(
"(JtJ + lambda*diag(JtJ).inv()"
);
jtjl_inv
.
print
(
18
,
6
);
}
//last_jt has NaNs
// Matrix jty = (new Matrix(this.last_jt)).times(y_minus_fx_weighted);
Matrix
jty
=
(
new
Matrix
(
last_jt_decimated
)).
times
(
y_minus_fx_weighted
);
if
(
debug_level
>
2
)
{
System
.
out
.
println
(
"Jt * (y-fx)"
);
jty
.
print
(
18
,
6
);
Matrix
mdelta
=
null
;
// jtjl_inv.times(jty);
boolean
use_cholesky
=
true
;
// false;
double
matrix_start_time
=
((
double
)
System
.
nanoTime
())
*
1
E
-
9
;
if
(
use_cholesky
)
{
try
{
mdelta
=
(
new
CholeskyDecomposition
(
wjtjlambda
)).
solve
(
jty
);
}
catch
(
RuntimeException
e
)
{
rslt
[
1
]
=
true
;
if
(
debug_level
>
-
2
)
{
System
.
out
.
println
(
"Singular Matrix!"
);
}
return
rslt
;
}
}
else
{
// old way - inverse() using LU
Matrix
jtjl_inv
=
null
;
try
{
jtjl_inv
=
wjtjlambda
.
inverse
();
// check for errors
}
catch
(
RuntimeException
e
)
{
rslt
[
1
]
=
true
;
if
(
debug_level
>
-
2
)
{
System
.
out
.
println
(
"Singular Matrix!"
);
}
return
rslt
;
}
if
(
debug_level
>
4
)
{
System
.
out
.
println
(
"(JtJ + lambda*diag(JtJ).inv()"
);
jtjl_inv
.
print
(
18
,
6
);
}
//last_jt has NaNs
// Matrix jty = (new Matrix(last_jt_decimated)).times(y_minus_fx_weighted);
if
(
debug_level
>
2
)
{
System
.
out
.
println
(
"Jt * (y-fx)"
);
jty
.
print
(
18
,
6
);
}
mdelta
=
jtjl_inv
.
times
(
jty
);
}
Matrix
mdelta
=
jtjl_inv
.
times
(
jty
);
if
(
debug_level
>-
2
)
{
System
.
out
.
println
(
"lmaStep(): Matrix inverted in "
+((((
double
)
System
.
nanoTime
())
*
1
E
-
9
)-
matrix_start_time
)+
", used "
+(
use_cholesky
?
"Cholesky"
:
"LU"
));
}
//
/*
CholeskyDecomposition choleskyDecomposition = new CholeskyDecomposition(wjtjlambda);
CholeskyLDLTMulti choleskyLDLTMulti = new CholeskyLDLTMulti(wjtjlambda);
Matrix mdelta_cholesky = choleskyDecomposition.solve(jty);
Matrix mdelta_cholesky_multi = choleskyLDLTMulti.solve(jty);
*/
CholeskyLDLTMulti
.
testCholesky
(
wjtjlambda
,
// Matrix wjtjlambda,
jty
);
if
(
debug_level
>
2
)
{
System
.
out
.
println
(
"mdelta"
);
mdelta
.
print
(
18
,
6
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment