Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
c1955bd8
Commit
c1955bd8
authored
Jun 19, 2022
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
More debugging
parent
7d01b009
Changes
5
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
731 additions
and
134 deletions
+731
-134
GpuQuad.java
src/main/java/com/elphel/imagej/gpu/GpuQuad.java
+43
-39
IntersceneMatchParameters.java
...lphel/imagej/tileprocessor/IntersceneMatchParameters.java
+264
-4
OpticalFlow.java
...ain/java/com/elphel/imagej/tileprocessor/OpticalFlow.java
+337
-75
QuadCLT.java
src/main/java/com/elphel/imagej/tileprocessor/QuadCLT.java
+23
-16
TileNeibs.java
src/main/java/com/elphel/imagej/tileprocessor/TileNeibs.java
+64
-0
No files found.
src/main/java/com/elphel/imagej/gpu/GpuQuad.java
View file @
c1955bd8
...
...
@@ -65,8 +65,11 @@ public class GpuQuad{ // quad camera description
private
CUdeviceptr
gpu_kernels
;
private
CUdeviceptr
gpu_kernel_offsets
;
private
CUdeviceptr
gpu_bayer
;
// private CUdeviceptr gpu_tasks;
private
CUdeviceptr
gpu_ftasks
;
private
CUdeviceptr
gpu_ftasks
=
null
;
private
int
gpu_ftasks_len
=
0
;
private
CUdeviceptr
gpu_active_tiles
=
null
;
// will be re-allocated with gpu_ftasks
private
CUdeviceptr
gpu_corrs
;
private
CUdeviceptr
gpu_corr_weights
;
private
CUdeviceptr
gpu_corrs_td
;
...
...
@@ -98,7 +101,7 @@ public class GpuQuad{ // quad camera description
private
CUdeviceptr
gpu_rot_deriv
;
private
CUdeviceptr
gpu_geometry_correction
;
private
CUdeviceptr
gpu_rByRDist
;
private
CUdeviceptr
gpu_active_tiles
;
//
private CUdeviceptr gpu_active_tiles;
private
CUdeviceptr
gpu_num_active_tiles
;
private
int
mclt_stride
;
private
int
corr_stride
;
...
...
@@ -274,8 +277,7 @@ public class GpuQuad{ // quad camera description
gpu_kernels
=
new
CUdeviceptr
();
gpu_kernel_offsets
=
new
CUdeviceptr
();
gpu_bayer
=
new
CUdeviceptr
();
// gpu_tasks = new CUdeviceptr(); // allocate tilesX * tilesY * TPTASK_SIZE * Sizeof.FLOAT
gpu_ftasks
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * getTaskSize() * Sizeof.FLOAT
/// gpu_ftasks = new CUdeviceptr(); // allocate tilesX * tilesY * getTaskSize() * Sizeof.FLOAT
gpu_corrs
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.FLOAT
gpu_corr_weights
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * NUM_PAIRS * Sizeof.FLOAT
...
...
@@ -284,8 +286,6 @@ public class GpuQuad{ // quad camera description
gpu_corrs_combo_td
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * 4 * DTT_SIZE * DTT_SIZE * Sizeof.FLOAT
gpu_textures
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * ? * 256 * Sizeof.FLOAT
/// gpu_clt = new CUdeviceptr();
/// gpu_4_images = new CUdeviceptr();
gpu_corr_indices
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * 6 * Sizeof.FLOAT
// May add separate gpu_corr_indices_td here
gpu_corr_combo_indices
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * 1 * Sizeof.FLOAT
...
...
@@ -309,7 +309,7 @@ public class GpuQuad{ // quad camera description
gpu_geometry_correction
=
new
CUdeviceptr
();
gpu_rByRDist
=
new
CUdeviceptr
();
// calculated once for the camera distortion model in CPU (move to GPU?)
gpu_active_tiles
=
new
CUdeviceptr
();
// TILESX*TILESY*sizeof(int)
///
gpu_active_tiles = new CUdeviceptr(); // TILESX*TILESY*sizeof(int)
gpu_num_active_tiles
=
new
CUdeviceptr
();
// 1 int
// Init data arrays for all kernels
...
...
@@ -330,19 +330,6 @@ public class GpuQuad{ // quad camera description
Sizeof
.
FLOAT
);
// int ElementSizeBytes)
mclt_stride
=
(
int
)(
device_stride
[
0
]
/
Sizeof
.
FLOAT
);
// Maybe move _bayer to use variable width/height as gpu_clt, gpu_corr_images_h
/*
gpu_corr_images_h[ncam] = new CUdeviceptr();
cuMemAllocPitch (
gpu_corr_images_h[ncam], // CUdeviceptr dptr,
device_stride, // long[] pPitch,
(img_width + GPUTileProcessor.DTT_SIZE) * Sizeof.FLOAT, // long WidthInBytes,
3*(img_height + GPUTileProcessor.DTT_SIZE),// long Height,
Sizeof.FLOAT); // int ElementSizeBytes)
imclt_stride = (int)(device_stride[0] / Sizeof.FLOAT);
*/
/// gpu_clt_h[ncam] = new CUdeviceptr();
/// cuMemAlloc(gpu_clt_h[ncam],tilesY * tilesX * num_colors * 4 * GPUTileProcessor.DTT_SIZE * GPUTileProcessor.DTT_SIZE * Sizeof.FLOAT ); // public static int cuMemAlloc(CUdeviceptr dptr, long bytesize)
}
// now create device arrays pointers
if
(
Sizeof
.
POINTER
!=
Sizeof
.
LONG
)
{
...
...
@@ -353,14 +340,10 @@ public class GpuQuad{ // quad camera description
cuMemAlloc
(
gpu_kernels
,
num_cams
*
Sizeof
.
POINTER
);
cuMemAlloc
(
gpu_kernel_offsets
,
num_cams
*
Sizeof
.
POINTER
);
cuMemAlloc
(
gpu_bayer
,
num_cams
*
Sizeof
.
POINTER
);
/// cuMemAlloc(gpu_clt, num_cams * Sizeof.POINTER);
/// cuMemAlloc(gpu_4_images, num_cams * Sizeof.POINTER);
long
[]
gpu_kernels_l
=
new
long
[
num_cams
];
long
[]
gpu_kernel_offsets_l
=
new
long
[
num_cams
];
long
[]
gpu_bayer_l
=
new
long
[
num_cams
];
/// long [] gpu_clt_l = new long [num_cams];
/// long [] gpu_4_images_l = new long [num_cams];
for
(
int
ncam
=
0
;
ncam
<
num_cams
;
ncam
++)
gpu_kernels_l
[
ncam
]
=
GPUTileProcessor
.
getPointerAddress
(
gpu_kernels_h
[
ncam
]);
cuMemcpyHtoD
(
gpu_kernels
,
Pointer
.
to
(
gpu_kernels_l
),
num_cams
*
Sizeof
.
POINTER
);
...
...
@@ -371,23 +354,16 @@ public class GpuQuad{ // quad camera description
for
(
int
ncam
=
0
;
ncam
<
num_cams
;
ncam
++)
gpu_bayer_l
[
ncam
]
=
GPUTileProcessor
.
getPointerAddress
(
gpu_bayer_h
[
ncam
]);
cuMemcpyHtoD
(
gpu_bayer
,
Pointer
.
to
(
gpu_bayer_l
),
num_cams
*
Sizeof
.
POINTER
);
/// for (int ncam = 0; ncam < num_cams; ncam++) gpu_clt_l[ncam] = GPUTileProcessor.getPointerAddress(gpu_clt_h[ncam]);
/// cuMemcpyHtoD(gpu_clt, Pointer.to(gpu_clt_l), num_cams * Sizeof.POINTER);
/// for (int ncam = 0; ncam < num_cams; ncam++) gpu_4_images_l[ncam] = GPUTileProcessor.getPointerAddress(gpu_corr_images_h[ncam]);
/// cuMemcpyHtoD(gpu_4_images, Pointer.to(gpu_4_images_l), num_cams * Sizeof.POINTER);
// Set GeometryCorrection data
cuMemAlloc
(
gpu_geometry_correction
,
GeometryCorrection
.
arrayLength
(
GPUTileProcessor
.
MAX_NUM_CAMS
)
*
Sizeof
.
FLOAT
);
// always maximal number of cameras (sparse)
cuMemAlloc
(
gpu_rByRDist
,
GPUTileProcessor
.
RBYRDIST_LEN
*
Sizeof
.
FLOAT
);
cuMemAlloc
(
gpu_rot_deriv
,
5
*
GPUTileProcessor
.
MAX_NUM_CAMS
*
3
*
3
*
Sizeof
.
FLOAT
);
// always maximal number of cameras (sparse)
// cuMemAlloc(gpu_correction_vector, CorrVector.LENGTH * Sizeof.FLOAT);
cuMemAlloc
(
gpu_correction_vector
,
GPUTileProcessor
.
CORR_VECTOR_MAX_LENGTH
*
Sizeof
.
FLOAT
);
// update CORR_VECTOR_LENGTH to fit
// Set task array
// cuMemAlloc(gpu_tasks, tilesX * tilesY * GPUTileProcessor.TPTASK_SIZE * Sizeof.FLOAT);
cuMemAlloc
(
gpu_ftasks
,
tilesX
*
tilesY
*
getTaskSize
()
*
Sizeof
.
FLOAT
);
// cuMemAlloc(gpu_ftasks, tilesX * tilesY * getTaskSize() * Sizeof.FLOAT);
// checkAllocateGpuFtasks((tilesX+4) * (tilesY+4)); // +4 - "a little more" (will reallocate if needed)
checkAllocateGpuFtasks
(
tilesX
*
tilesY
);
// +4 - "a little more" (will reallocate if needed)
//=========== Seems that in many places Sizeof.POINTER (==8) is used instead of Sizeof.FLOAT !!! ============
// Set corrs array
int
num_pairs
=
Correlation2d
.
getNumPairs
(
quadCLT
.
getNumSensors
());
...
...
@@ -412,8 +388,8 @@ public class GpuQuad{ // quad camera description
cuMemAlloc
(
gpu_num_texture_ovlp
,
8
*
Sizeof
.
FLOAT
);
cuMemAlloc
(
gpu_texture_indices_len
,
1
*
Sizeof
.
FLOAT
);
cuMemAlloc
(
gpu_active_tiles
,
tilesX
*
tilesY
*
Sizeof
.
FLOAT
);
// will be dynamically allocated with gpu_ftasks
//
cuMemAlloc(gpu_active_tiles, tilesX * tilesY * Sizeof.FLOAT);
cuMemAlloc
(
gpu_num_active_tiles
,
1
*
Sizeof
.
FLOAT
);
cuMemAlloc
(
gpu_corr_weights
,
num_pairs
*
tilesX
*
tilesY
*
Sizeof
.
FLOAT
);
...
...
@@ -473,6 +449,33 @@ public class GpuQuad{ // quad camera description
Sizeof
.
FLOAT
);
// int ElementSizeBytes)
texture_stride_rgba
=
(
int
)(
device_stride
[
0
]
/
Sizeof
.
FLOAT
);
}
private
void
checkAllocateGpuFtasks
(
int
max_tasks
)
{
/*
* Got jcuda.CudaException: CUDA_ERROR_MISALIGNED_ADDRESS
at jcuda.driver.JCudaDriver.checkResult(JCudaDriver.java:396)
at jcuda.driver.JCudaDriver.cuCtxSynchronize(JCudaDriver.java:2426)
at com.elphel.imagej.gpu.GpuQuad.execConvertDirect(GpuQuad.java:1596)
Will try rounding to multiple of smth (was 0x80)
*/
max_tasks
=
(
max_tasks
+
0x7f
)
&
~
0x7f
;
if
(
max_tasks
>
gpu_ftasks_len
)
{
if
(
gpu_ftasks
!=
null
)
{
cuMemFree
(
gpu_ftasks
);
}
if
(
gpu_active_tiles
!=
null
)
{
cuMemFree
(
gpu_active_tiles
);
}
gpu_ftasks
=
new
CUdeviceptr
();
cuMemAlloc
(
gpu_ftasks
,
max_tasks
*
getTaskSize
()
*
Sizeof
.
FLOAT
);
gpu_active_tiles
=
new
CUdeviceptr
();
cuMemAlloc
(
gpu_active_tiles
,
max_tasks
*
Sizeof
.
FLOAT
);
gpu_ftasks_len
=
max_tasks
;
}
}
public
int
getTilesX
()
{
return
getImageWidth
()
/
GPUTileProcessor
.
DTT_SIZE
;
}
...
...
@@ -619,7 +622,8 @@ public class GpuQuad{ // quad camera description
)
{
if
(
verify
)
checkTasks
(
tile_tasks
);
num_task_tiles
=
tile_tasks
.
length
;
num_task_tiles
=
tile_tasks
.
length
;
// only place that modifies num_task_tiles and may require allocation
checkAllocateGpuFtasks
(
num_task_tiles
);
int
task_size
=
getTaskSize
();
float
[]
ftasks
=
new
float
[
task_size
*
num_task_tiles
];
for
(
int
i
=
0
;
i
<
num_task_tiles
;
i
++)
{
...
...
@@ -645,7 +649,7 @@ public class GpuQuad{ // quad camera description
boolean
use_aux
// while is it in class member? - just to be able to free
)
{
num_task_tiles
=
tile_tasks
.
length
;
num_task_tiles
=
tile_tasks
.
length
;
// does not require re-allocation, as tile_tasks should be already set to GPU
int
task_size
=
getTaskSize
();
float
[]
ftasks
=
new
float
[
task_size
*
num_task_tiles
];
cuMemcpyDtoH
(
Pointer
.
to
(
ftasks
),
gpu_ftasks
,
task_size
*
num_task_tiles
*
Sizeof
.
FLOAT
);
...
...
src/main/java/com/elphel/imagej/tileprocessor/IntersceneMatchParameters.java
View file @
c1955bd8
This diff is collapsed.
Click to expand it.
src/main/java/com/elphel/imagej/tileprocessor/OpticalFlow.java
View file @
c1955bd8
This diff is collapsed.
Click to expand it.
src/main/java/com/elphel/imagej/tileprocessor/QuadCLT.java
View file @
c1955bd8
...
...
@@ -224,6 +224,7 @@ public class QuadCLT extends QuadCLTCPU {
final
double
max_strength
,
// do not touch stronger
final
double
diff_from_lma_pos
,
// Difference from farthest FG objects (OK to have large, e.g. 100)
final
double
diff_from_lma_neg
,
// Difference from nearest BG objects (small, as FG are usually more visible)
final
int
search_radius
,
// Search farther if no LMA neighbor is found closer. Original value - 1 (8 neighbors)
final
boolean
remove_no_lma_neib
,
// remove without LMA neighbors
final
int
width
,
//tilesX
final
int
threadsMax
,
...
...
@@ -238,7 +239,7 @@ public class QuadCLT extends QuadCLTCPU {
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
threadsMax
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
final
AtomicInteger
anum_updated
=
new
AtomicInteger
(
0
);
final
int
dbg_tile
=
1
235
;
final
int
dbg_tile
=
1
944
;
anum_updated
.
set
(
0
);
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
threads
[
ithread
]
=
new
Thread
()
{
...
...
@@ -250,8 +251,10 @@ public class QuadCLT extends QuadCLTCPU {
if
(
Double
.
isNaN
(
disparity_lma
[
nTile
])
&&
!
Double
.
isNaN
(
disparity
[
nTile
])
&&
(
strength
[
nTile
]
<
max_strength
))
{
double
best_fit_pos
=
Double
.
NaN
;
// Closest higher disparity than this
double
best_fit_neg
=
Double
.
NaN
;
// Closest lower disparity than this
for
(
int
dir
=
0
;
dir
<
8
;
dir
++)
{
int
ineib
=
tn
.
getNeibIndex
(
nTile
,
dir
);
for
(
int
rad
=
1
;
rad
<=
search_radius
;
rad
++)
{
int
numdir
=
TileNeibs
.
getNumDirs
(
rad
);
for
(
int
dir
=
0
;
dir
<
numdir
;
dir
++)
{
int
ineib
=
tn
.
getNeibIndexRadius
(
nTile
,
dir
,
rad
);
if
(
(
ineib
>=
0
)
&&
!
Double
.
isNaN
(
disparity_lma
[
ineib
])
&&
!
Double
.
isNaN
(
disparity
[
ineib
]))
{
...
...
@@ -267,6 +270,10 @@ public class QuadCLT extends QuadCLTCPU {
}
}
}
if
(!(
Double
.
isNaN
(
best_fit_pos
)
&&
Double
.
isNaN
(
best_fit_neg
)))
{
break
;
}
}
if
(
(
best_fit_neg
>
diff_from_lma_neg
)
||
(
best_fit_pos
>
diff_from_lma_pos
)
||
(
Double
.
isNaN
(
best_fit_pos
)
&&
Double
.
isNaN
(
best_fit_neg
)
&&
remove_no_lma_neib
))
{
...
...
@@ -2064,7 +2071,7 @@ public class QuadCLT extends QuadCLTCPU {
null
,
// final boolean [] selection, // may be null, if not null do not process unselected tiles
scene
.
getErsCorrection
(),
// final GeometryCorrection geometryCorrection,
0.0
,
// final double disparity_corr,
0
,
// margin, // final int margin, // do not use tiles if their centers are closer to the edges
-
1
,
//
0, // margin, // final int margin, // do not use tiles if their centers are closer to the edges
null
,
// final boolean [] valid_tiles,
threadsMax
);
// final int threadsMax) // maximal number of threads to launch
scene
.
saveQuadClt
();
// to re-load new set of Bayer images to the GPU (do nothing for CPU) and Geometry
...
...
src/main/java/com/elphel/imagej/tileprocessor/TileNeibs.java
View file @
c1955bd8
...
...
@@ -140,6 +140,70 @@ public class TileNeibs{
default
:
return
indx
;
}
}
public
static
int
getNumDirs
(
int
radius
)
{
if
(
radius
<
0
)
{
return
0
;
}
else
if
(
radius
==
0
)
{
return
1
;
}
else
{
return
8
*
radius
;
}
}
/**
* Get 2d element index after step of variable radius:
* radius==1 - same as getNeibIndex(int indx, int dir), 8 directions
* radius==2 - 16 directions (5x5 square), 0 - still up, north
* radius==3 - 24 directions (7x7 square)
* ...
* @param indx start index
* @param dir step direction (CW from up)
* @param radius - "distance" from the start point
* @return new index or -1 if leaving array in any direction
*/
public
int
getNeibIndexRadius
(
int
indx
,
int
dir
,
int
radius
)
{
if
(
radius
<
2
)
{
return
getNeibIndex
(
indx
,
dir
);
}
int
y
=
indx
/
sizeX
;
int
x
=
indx
%
sizeX
;
if
(
dir
>
(
8
*
radius
))
{
System
.
out
.
println
(
"getNeibIndex(): indx="
+
indx
+
", dir="
+
dir
+
", radius="
+
radius
);
}
int
dr
=
(
dir
+
radius
)
%
(
8
*
radius
);
int
quad
=
dr
/
(
2
*
radius
);
int
side
=
dr
%
(
2
*
radius
);
switch
(
quad
)
{
case
0
:
x
=
x
-
radius
+
side
;
y
=
y
-
radius
;
break
;
case
1
:
x
=
x
+
radius
;
y
=
y
-
radius
+
side
;
break
;
case
2
:
x
=
x
+
radius
-
side
;
y
=
y
+
radius
;
break
;
case
3
:
x
=
x
-
radius
;
y
=
y
+
radius
-
side
;
break
;
}
if
((
x
>=
0
)
&&
(
y
>=
0
)
&&
(
x
<
sizeX
)
&&
(
y
<
sizeY
))
{
return
x
+
sizeX
*
y
;
}
else
{
return
-
1
;
}
}
/**
* Get 2d element index after step N, NE, ... NW. Returns -1 if leaving array
* And 2 steps for dir = 8(N), 9(NNE),..23(NNW)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment