Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
514057c6
Commit
514057c6
authored
Apr 13, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implemented/tested per-tile geometric distortions setup
parent
50630abc
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
434 additions
and
34 deletions
+434
-34
GPUTileProcessor.java
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+132
-21
GeometryCorrection.java
...a/com/elphel/imagej/tileprocessor/GeometryCorrection.java
+250
-1
ImageDtt.java
src/main/java/com/elphel/imagej/tileprocessor/ImageDtt.java
+17
-0
TwoQuadCLT.java
...main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
+35
-12
No files found.
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
View file @
514057c6
...
@@ -96,7 +96,8 @@ public class GPUTileProcessor {
...
@@ -96,7 +96,8 @@ public class GPUTileProcessor {
{"*","dtt8x8.h","dtt8x8.cu"},
{"*","dtt8x8.h","dtt8x8.cu"},
{"*","dtt8x8.h","geometry_correction.h","TileProcessor.h","TileProcessor.cuh"}};
{"*","dtt8x8.h","geometry_correction.h","TileProcessor.h","TileProcessor.cuh"}};
*/
*/
static
String
[][]
GPU_SRC_FILES
=
{{
"*"
,
"dtt8x8.h"
,
"dtt8x8.cu"
,
"geometry_correction.h"
,
"TileProcessor.h"
,
"TileProcessor.cuh"
}};
static
String
[][]
GPU_SRC_FILES
=
{{
"*"
,
"dtt8x8.h"
,
"dtt8x8.cu"
,
"geometry_correction.h"
,
"geometry_correction.cu"
,
"TileProcessor.h"
,
"TileProcessor.cuh"
}};
// static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","TileProcessor.h","TileProcessor.cuh"}};
// static String [][] GPU_SRC_FILES = {{"*","dtt8x8.cuh","TileProcessor.cuh"}};
// static String [][] GPU_SRC_FILES = {{"*","dtt8x8.cuh","TileProcessor.cuh"}};
static
String
GPU_CONVERT_CORRECT_TILES_NAME
=
"convert_correct_tiles"
;
// name in C code
static
String
GPU_CONVERT_CORRECT_TILES_NAME
=
"convert_correct_tiles"
;
// name in C code
static
String
GPU_IMCLT_RBG_NAME
=
"imclt_rbg"
;
// name in C code
static
String
GPU_IMCLT_RBG_NAME
=
"imclt_rbg"
;
// name in C code
...
@@ -104,6 +105,8 @@ public class GPUTileProcessor {
...
@@ -104,6 +105,8 @@ public class GPUTileProcessor {
// static String GPU_TEXTURES_NAME = "textures_gen"; // name in C code
// static String GPU_TEXTURES_NAME = "textures_gen"; // name in C code
static
String
GPU_TEXTURES_NAME
=
"textures_accumulate"
;
// name in C code
static
String
GPU_TEXTURES_NAME
=
"textures_accumulate"
;
// name in C code
static
String
GPU_RBGA_NAME
=
"generate_RBGA"
;
// name in C code
static
String
GPU_RBGA_NAME
=
"generate_RBGA"
;
// name in C code
static
String
GPU_ROT_DERIV
=
"calc_rot_deriv"
;
// calculate rotation matrices and derivatives
static
String
SET_TILES_OFFSETS
=
"get_tiles_offsets"
;
// calculate pixel offsets and disparity distortions
// pass some defines to gpu source code with #ifdef JCUDA
// pass some defines to gpu source code with #ifdef JCUDA
...
@@ -148,7 +151,7 @@ public class GPUTileProcessor {
...
@@ -148,7 +151,7 @@ public class GPUTileProcessor {
public
static
int
RBYRDIST_LEN
=
5001
;
//for double, 10001 - float; // length of rByRDist to allocate shared memory
public
static
int
RBYRDIST_LEN
=
5001
;
//for double, 10001 - float; // length of rByRDist to allocate shared memory
public
static
double
RBYRDIST_STEP
=
0.0004
;
// for double, 0.0002 - for float; // to fit into GPU shared memory (was 0.001);
public
static
double
RBYRDIST_STEP
=
0.0004
;
// for double, 0.0002 - for float; // to fit into GPU shared memory (was 0.001);
public
static
int
TILES_PER_BLOCK_GEOM
=
32
;
// blockDim.x = NUM_CAMS; blockDim.x = TILES_PER_BLOCK_GEOM
public
static
int
TILES_PER_BLOCK_GEOM
=
32
/
NUM_CAMS
;
// blockDim.x = NUM_CAMS; blockDim.x = TILES_PER_BLOCK_GEOM
public
static
int
TASK_TEXTURE_BITS
=
((
1
<<
TASK_TEXTURE_N_BIT
)
|
(
1
<<
TASK_TEXTURE_E_BIT
)
|
(
1
<<
TASK_TEXTURE_S_BIT
)
|
(
1
<<
TASK_TEXTURE_W_BIT
));
public
static
int
TASK_TEXTURE_BITS
=
((
1
<<
TASK_TEXTURE_N_BIT
)
|
(
1
<<
TASK_TEXTURE_E_BIT
)
|
(
1
<<
TASK_TEXTURE_S_BIT
)
|
(
1
<<
TASK_TEXTURE_W_BIT
));
...
@@ -163,15 +166,18 @@ public class GPUTileProcessor {
...
@@ -163,15 +166,18 @@ public class GPUTileProcessor {
private
CUfunction
GPU_CORRELATE2D_kernel
=
null
;
private
CUfunction
GPU_CORRELATE2D_kernel
=
null
;
private
CUfunction
GPU_TEXTURES_kernel
=
null
;
private
CUfunction
GPU_TEXTURES_kernel
=
null
;
private
CUfunction
GPU_RBGA_kernel
=
null
;
private
CUfunction
GPU_RBGA_kernel
=
null
;
private
CUfunction
GPU_ROT_DERIV_kernel
=
null
;
private
CUfunction
SET_TILES_OFFSETS_kernel
=
null
;
// CPU arrays of pointers to GPU memory
// CPU arrays of pointers to GPU memory
// These arrays may go to method, they are here just to be able to free GPU memory if needed
// These arrays may go to method
s
, they are here just to be able to free GPU memory if needed
private
CUdeviceptr
[]
gpu_kernels_h
=
new
CUdeviceptr
[
NUM_CAMS
];
private
CUdeviceptr
[]
gpu_kernels_h
=
new
CUdeviceptr
[
NUM_CAMS
];
private
CUdeviceptr
[]
gpu_kernel_offsets_h
=
new
CUdeviceptr
[
NUM_CAMS
];
private
CUdeviceptr
[]
gpu_kernel_offsets_h
=
new
CUdeviceptr
[
NUM_CAMS
];
private
CUdeviceptr
[]
gpu_bayer_h
=
new
CUdeviceptr
[
NUM_CAMS
];
private
CUdeviceptr
[]
gpu_bayer_h
=
new
CUdeviceptr
[
NUM_CAMS
];
private
CUdeviceptr
[]
gpu_clt_h
=
new
CUdeviceptr
[
NUM_CAMS
];
private
CUdeviceptr
[]
gpu_clt_h
=
new
CUdeviceptr
[
NUM_CAMS
];
private
CUdeviceptr
[]
gpu_corr_images_h
=
new
CUdeviceptr
[
NUM_CAMS
];
private
CUdeviceptr
[]
gpu_corr_images_h
=
new
CUdeviceptr
[
NUM_CAMS
];
// GPU pointers to array of GPU pointers
// GPU pointers to array of GPU pointers
private
CUdeviceptr
gpu_kernels
=
new
CUdeviceptr
();
private
CUdeviceptr
gpu_kernels
=
new
CUdeviceptr
();
private
CUdeviceptr
gpu_kernel_offsets
=
new
CUdeviceptr
();
private
CUdeviceptr
gpu_kernel_offsets
=
new
CUdeviceptr
();
...
@@ -187,6 +193,11 @@ public class GPUTileProcessor {
...
@@ -187,6 +193,11 @@ public class GPUTileProcessor {
private
CUdeviceptr
gpu_num_texture_tiles
=
new
CUdeviceptr
();
// 8 ints
private
CUdeviceptr
gpu_num_texture_tiles
=
new
CUdeviceptr
();
// 8 ints
private
CUdeviceptr
gpu_textures_rgba
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * ? * 256 * Sizeof.POINTER
private
CUdeviceptr
gpu_textures_rgba
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * ? * 256 * Sizeof.POINTER
private
CUdeviceptr
gpu_correction_vector
=
new
CUdeviceptr
();
private
CUdeviceptr
gpu_rot_deriv
=
new
CUdeviceptr
();
// used internally by device, may be read to CPU for testing
private
CUdeviceptr
gpu_geometry_correction
=
new
CUdeviceptr
();
private
CUdeviceptr
gpu_rByRDist
=
new
CUdeviceptr
();
// calculated once for the camera distortion model in CPU (move to GPU?)
CUmodule
module
;
// to access constants memory
CUmodule
module
;
// to access constants memory
private
int
mclt_stride
;
private
int
mclt_stride
;
private
int
corr_stride
;
private
int
corr_stride
;
...
@@ -227,15 +238,22 @@ public class GPUTileProcessor {
...
@@ -227,15 +238,22 @@ public class GPUTileProcessor {
flt
[
indx
++]
=
Float
.
intBitsToFloat
(
tx
+
(
ty
<<
16
));
flt
[
indx
++]
=
Float
.
intBitsToFloat
(
tx
+
(
ty
<<
16
));
float
[][]
offsets
=
use_aux
?
this
.
xy_aux
:
this
.
xy
;
float
[][]
offsets
=
use_aux
?
this
.
xy_aux
:
this
.
xy
;
for
(
int
i
=
0
;
i
<
NUM_CAMS
;
i
++)
{
for
(
int
i
=
0
;
i
<
NUM_CAMS
;
i
++)
{
flt
[
indx
++]
=
offsets
[
i
][
0
];
if
(
offsets
!=
null
)
{
flt
[
indx
++]
=
offsets
[
i
][
1
];
flt
[
indx
++]
=
offsets
[
i
][
0
];
flt
[
indx
++]
=
offsets
[
i
][
1
];
}
else
{
indx
+=
2
;
}
}
}
flt
[
indx
++]
=
this
.
target_disparity
;
flt
[
indx
++]
=
this
.
target_disparity
;
for
(
int
i
=
0
;
i
<
NUM_CAMS
;
i
++)
{
// actually disp_dist will be initialized by the GPU
for
(
int
i
=
0
;
i
<
NUM_CAMS
;
i
++)
{
// actually disp_dist will be initialized by the GPU
indx
+=
4
;
/*
flt[indx++] = disp_dist[i][0];
flt[indx++] = disp_dist[i][0];
flt[indx++] = disp_dist[i][1];
flt[indx++] = disp_dist[i][1];
flt[indx++] = disp_dist[i][2];
flt[indx++] = disp_dist[i][2];
flt[indx++] = disp_dist[i][3];
flt[indx++] = disp_dist[i][3];
*/
}
}
return
flt
;
return
flt
;
}
}
...
@@ -446,16 +464,22 @@ public class GPUTileProcessor {
...
@@ -446,16 +464,22 @@ public class GPUTileProcessor {
GPU_IMCLT_RBG_NAME
,
GPU_IMCLT_RBG_NAME
,
GPU_CORRELATE2D_NAME
,
GPU_CORRELATE2D_NAME
,
GPU_TEXTURES_NAME
,
GPU_TEXTURES_NAME
,
GPU_RBGA_NAME
};
GPU_RBGA_NAME
,
GPU_ROT_DERIV
,
SET_TILES_OFFSETS
};
CUfunction
[]
functions
=
createFunctions
(
kernelSources
,
CUfunction
[]
functions
=
createFunctions
(
kernelSources
,
func_names
,
func_names
,
capability
);
// on my - 75
capability
);
// on my - 75
this
.
GPU_CONVERT_CORRECT_TILES_kernel
=
functions
[
0
];
GPU_CONVERT_CORRECT_TILES_kernel
=
functions
[
0
];
this
.
GPU_IMCLT_RBG_kernel
=
functions
[
1
];
GPU_IMCLT_RBG_kernel
=
functions
[
1
];
this
.
GPU_CORRELATE2D_kernel
=
functions
[
2
];
GPU_CORRELATE2D_kernel
=
functions
[
2
];
this
.
GPU_TEXTURES_kernel
=
functions
[
3
];
GPU_TEXTURES_kernel
=
functions
[
3
];
this
.
GPU_RBGA_kernel
=
functions
[
4
];
GPU_RBGA_kernel
=
functions
[
4
];
GPU_ROT_DERIV_kernel
=
functions
[
5
];
SET_TILES_OFFSETS_kernel
=
functions
[
6
];
System
.
out
.
println
(
"GPU kernel functions initialized"
);
System
.
out
.
println
(
"GPU kernel functions initialized"
);
System
.
out
.
println
(
GPU_CONVERT_CORRECT_TILES_kernel
.
toString
());
System
.
out
.
println
(
GPU_CONVERT_CORRECT_TILES_kernel
.
toString
());
...
@@ -463,6 +487,8 @@ public class GPUTileProcessor {
...
@@ -463,6 +487,8 @@ public class GPUTileProcessor {
System
.
out
.
println
(
GPU_CORRELATE2D_kernel
.
toString
());
System
.
out
.
println
(
GPU_CORRELATE2D_kernel
.
toString
());
System
.
out
.
println
(
GPU_TEXTURES_kernel
.
toString
());
System
.
out
.
println
(
GPU_TEXTURES_kernel
.
toString
());
System
.
out
.
println
(
GPU_RBGA_kernel
.
toString
());
System
.
out
.
println
(
GPU_RBGA_kernel
.
toString
());
System
.
out
.
println
(
GPU_ROT_DERIV_kernel
.
toString
());
System
.
out
.
println
(
SET_TILES_OFFSETS_kernel
.
toString
());
// Init data arrays for all kernels
// Init data arrays for all kernels
int
tilesX
=
IMG_WIDTH
/
DTT_SIZE
;
int
tilesX
=
IMG_WIDTH
/
DTT_SIZE
;
...
@@ -522,9 +548,15 @@ public class GPUTileProcessor {
...
@@ -522,9 +548,15 @@ public class GPUTileProcessor {
for
(
int
ncam
=
0
;
ncam
<
NUM_CAMS
;
ncam
++)
gpu_clt_l
[
ncam
]
=
getPointerAddress
(
gpu_clt_h
[
ncam
]);
for
(
int
ncam
=
0
;
ncam
<
NUM_CAMS
;
ncam
++)
gpu_clt_l
[
ncam
]
=
getPointerAddress
(
gpu_clt_h
[
ncam
]);
cuMemcpyHtoD
(
gpu_clt
,
Pointer
.
to
(
gpu_clt_l
),
NUM_CAMS
*
Sizeof
.
POINTER
);
cuMemcpyHtoD
(
gpu_clt
,
Pointer
.
to
(
gpu_clt_l
),
NUM_CAMS
*
Sizeof
.
POINTER
);
// Set task array
// Set GeometryCorrection data
cuMemAlloc
(
gpu_tasks
,
tilesX
*
tilesY
*
TPTASK_SIZE
*
Sizeof
.
POINTER
);
cuMemAlloc
(
gpu_geometry_correction
,
GeometryCorrection
.
arrayLength
(
NUM_CAMS
)
*
Sizeof
.
FLOAT
);
cuMemAlloc
(
gpu_rByRDist
,
RBYRDIST_LEN
*
Sizeof
.
FLOAT
);
cuMemAlloc
(
gpu_rot_deriv
,
5
*
NUM_CAMS
*
3
*
3
*
Sizeof
.
FLOAT
);
cuMemAlloc
(
gpu_correction_vector
,
GeometryCorrection
.
CorrVector
.
LENGTH
*
Sizeof
.
FLOAT
);
// Set task array
cuMemAlloc
(
gpu_tasks
,
tilesX
*
tilesY
*
TPTASK_SIZE
*
Sizeof
.
FLOAT
);
//=========== Seems that in many places Sizeof.POINTER (==8) is used instead of Sizeof.FLOAT !!! ============
// Set corrs array
// Set corrs array
/// cuMemAlloc(gpu_corrs, tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.POINTER);
/// cuMemAlloc(gpu_corrs, tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.POINTER);
cuMemAlloc
(
gpu_corr_indices
,
tilesX
*
tilesY
*
NUM_PAIRS
*
Sizeof
.
POINTER
);
cuMemAlloc
(
gpu_corr_indices
,
tilesX
*
tilesY
*
NUM_PAIRS
*
Sizeof
.
POINTER
);
...
@@ -568,6 +600,27 @@ public class GPUTileProcessor {
...
@@ -568,6 +600,27 @@ public class GPUTileProcessor {
}
}
public
void
setGeometryCorrection
(
GeometryCorrection
gc
)
{
float
[]
fgc
=
gc
.
toFloatArray
();
double
[]
rByRDist
=
gc
.
getRByRDist
();
float
[]
fFByRDist
=
new
float
[
rByRDist
.
length
];
for
(
int
i
=
0
;
i
<
rByRDist
.
length
;
i
++)
{
fFByRDist
[
i
]
=
(
float
)
rByRDist
[
i
];
}
cuMemcpyHtoD
(
gpu_geometry_correction
,
Pointer
.
to
(
fgc
),
fgc
.
length
*
Sizeof
.
FLOAT
);
cuMemcpyHtoD
(
gpu_rByRDist
,
Pointer
.
to
(
fFByRDist
),
fFByRDist
.
length
*
Sizeof
.
FLOAT
);
cuMemAlloc
(
gpu_rot_deriv
,
5
*
NUM_CAMS
*
3
*
3
*
Sizeof
.
FLOAT
);
// NCAM of 3x3 rotation matrices, plus 4 derivative matrices for each camera
}
public
void
setExtrinsicsVector
(
GeometryCorrection
.
CorrVector
cv
)
{
double
[]
dcv
=
cv
.
toFullRollArray
();
float
[]
fcv
=
new
float
[
dcv
.
length
];
for
(
int
i
=
0
;
i
<
dcv
.
length
;
i
++)
{
fcv
[
i
]
=
(
float
)
dcv
[
i
];
}
cuMemcpyHtoD
(
gpu_correction_vector
,
Pointer
.
to
(
fcv
),
fcv
.
length
*
Sizeof
.
FLOAT
);
}
public
void
setTasks
(
TpTask
[]
tile_tasks
,
boolean
use_aux
)
// while is it in class member? - just to be able to free
public
void
setTasks
(
TpTask
[]
tile_tasks
,
boolean
use_aux
)
// while is it in class member? - just to be able to free
{
{
...
@@ -576,7 +629,7 @@ public class GPUTileProcessor {
...
@@ -576,7 +629,7 @@ public class GPUTileProcessor {
for
(
int
i
=
0
;
i
<
num_task_tiles
;
i
++)
{
for
(
int
i
=
0
;
i
<
num_task_tiles
;
i
++)
{
tile_tasks
[
i
].
asFloatArray
(
ftasks
,
i
*
TPTASK_SIZE
,
use_aux
);
tile_tasks
[
i
].
asFloatArray
(
ftasks
,
i
*
TPTASK_SIZE
,
use_aux
);
}
}
cuMemcpyHtoD
(
gpu_tasks
,
Pointer
.
to
(
ftasks
),
TPTASK_SIZE
*
num_task_tiles
*
Sizeof
.
FLOAT
);
cuMemcpyHtoD
(
gpu_tasks
,
Pointer
.
to
(
ftasks
),
TPTASK_SIZE
*
num_task_tiles
*
Sizeof
.
FLOAT
);
}
}
public
void
setCorrIndices
(
int
[]
corr_indices
)
public
void
setCorrIndices
(
int
[]
corr_indices
)
...
@@ -700,6 +753,7 @@ public class GPUTileProcessor {
...
@@ -700,6 +753,7 @@ public class GPUTileProcessor {
// prepare tasks for full frame, same dispaity.
// prepare tasks for full frame, same dispaity.
// need to run setTasks(TpTask [] tile_tasks, boolean use_aux) to format/transfer to GPU memory
// need to run setTasks(TpTask [] tile_tasks, boolean use_aux) to format/transfer to GPU memory
public
TpTask
[]
setFullFrameImages
(
public
TpTask
[]
setFullFrameImages
(
boolean
calc_offsets
,
// old way, now not needed with GPU calculation
Rectangle
woi
,
Rectangle
woi
,
boolean
round_woi
,
boolean
round_woi
,
float
target_disparity
,
// apply same disparity to all tiles
float
target_disparity
,
// apply same disparity to all tiles
...
@@ -725,6 +779,7 @@ public class GPUTileProcessor {
...
@@ -725,6 +779,7 @@ public class GPUTileProcessor {
corr_masks
[
i
]
=
corr_mask
;
// 0x3f; // all 6 correlations
corr_masks
[
i
]
=
corr_mask
;
// 0x3f; // all 6 correlations
}
}
return
setFullFrameImages
(
return
setFullFrameImages
(
calc_offsets
,
// boolean calc_offsets, // old way, now not needed with GPU calculation
woi
,
// Rectangle woi,
woi
,
// Rectangle woi,
round_woi
,
// boolean round_woi,
round_woi
,
// boolean round_woi,
target_disparities
,
// should be tilesX*tilesY long
target_disparities
,
// should be tilesX*tilesY long
...
@@ -740,6 +795,7 @@ public class GPUTileProcessor {
...
@@ -740,6 +795,7 @@ public class GPUTileProcessor {
}
}
public
TpTask
[]
setFullFrameImages
(
public
TpTask
[]
setFullFrameImages
(
boolean
calc_offsets
,
// old way, now not needed with GPU calculation
Rectangle
woi
,
// or null
Rectangle
woi
,
// or null
boolean
round_woi
,
boolean
round_woi
,
float
[]
target_disparities
,
// should be tilesX*tilesY long
float
[]
target_disparities
,
// should be tilesX*tilesY long
...
@@ -838,13 +894,15 @@ public class GPUTileProcessor {
...
@@ -838,13 +894,15 @@ public class GPUTileProcessor {
indx
++;
indx
++;
}
}
}
}
getTileSubcamOffsets
(
if
(
calc_offsets
)
{
tp_tasks
,
// final TpTask[] tp_tasks, // will use // modify to have offsets for 8 cameras
getTileSubcamOffsets
(
(
use_master
?
geometryCorrection_main:
null
),
// final GeometryCorrection geometryCorrection_main,
tp_tasks
,
// final TpTask[] tp_tasks, // will use // modify to have offsets for 8 cameras
(
use_aux
?
geometryCorrection_aux:
null
),
// final GeometryCorrection geometryCorrection_aux, // if null, will only calculate offsets fro the main camera
(
use_master
?
geometryCorrection_main:
null
),
// final GeometryCorrection geometryCorrection_main,
ers_delay
,
// final double [][][] ers_delay, // if not null - fill with tile center acquisition delay
(
use_aux
?
geometryCorrection_aux:
null
),
// final GeometryCorrection geometryCorrection_aux, // if null, will only calculate offsets fro the main camera
threadsMax
,
// final int threadsMax, // maximal number of threads to launch
ers_delay
,
// final double [][][] ers_delay, // if not null - fill with tile center acquisition delay
debugLevel
);
// final int debugLevel)
threadsMax
,
// final int threadsMax, // maximal number of threads to launch
debugLevel
);
// final int debugLevel)
}
return
tp_tasks
;
return
tp_tasks
;
}
}
...
@@ -966,6 +1024,58 @@ public class GPUTileProcessor {
...
@@ -966,6 +1024,58 @@ public class GPUTileProcessor {
// All data is already copied to GPU memory
// All data is already copied to GPU memory
public
void
execRotDerivs
()
{
if
(
GPU_ROT_DERIV_kernel
==
null
)
{
IJ
.
showMessage
(
"Error"
,
"No GPU kernel: GPU_ROT_DERIV_kernel"
);
return
;
}
// kernel parameters: pointer to pointers
int
[]
GridFullWarps
=
{
NUM_CAMS
,
1
,
1
};
// round up
int
[]
ThreadsFullWarps
=
{
3
,
3
,
3
};
Pointer
kernelParameters
=
Pointer
.
to
(
Pointer
.
to
(
gpu_correction_vector
),
Pointer
.
to
(
gpu_rot_deriv
)
);
cuCtxSynchronize
();
// Call the kernel function
cuLaunchKernel
(
GPU_ROT_DERIV_kernel
,
GridFullWarps
[
0
],
GridFullWarps
[
1
],
GridFullWarps
[
2
],
// Grid dimension
ThreadsFullWarps
[
0
],
ThreadsFullWarps
[
1
],
ThreadsFullWarps
[
2
],
// Block dimension
0
,
null
,
// Shared memory size and stream (shared - only dynamic, static is in code)
kernelParameters
,
null
);
// Kernel- and extra parameters
cuCtxSynchronize
();
// remove later
}
public
void
execSetTilesOffsets
()
{
if
(
SET_TILES_OFFSETS_kernel
==
null
)
{
IJ
.
showMessage
(
"Error"
,
"No GPU kernel: SET_TILES_OFFSETS_kernel"
);
return
;
}
// kernel parameters: pointer to pointers
int
[]
GridFullWarps
=
{(
num_task_tiles
+
TILES_PER_BLOCK_GEOM
-
1
)/
TILES_PER_BLOCK_GEOM
,
1
,
1
};
// round up
int
[]
ThreadsFullWarps
=
{
NUM_CAMS
,
TILES_PER_BLOCK_GEOM
,
1
};
// 4,8,1
Pointer
kernelParameters
=
Pointer
.
to
(
Pointer
.
to
(
gpu_tasks
),
// struct tp_task * gpu_tasks,
Pointer
.
to
(
new
int
[]
{
num_task_tiles
}),
// int num_tiles, // number of tiles in task list
Pointer
.
to
(
gpu_geometry_correction
),
// struct gc * gpu_geometry_correction,
Pointer
.
to
(
gpu_correction_vector
),
// struct corr_vector * gpu_correction_vector,
Pointer
.
to
(
gpu_rByRDist
),
// float * gpu_rByRDist) // length should match RBYRDIST_LEN
Pointer
.
to
(
gpu_rot_deriv
));
// trot_deriv * gpu_rot_deriv);
cuCtxSynchronize
();
cuLaunchKernel
(
SET_TILES_OFFSETS_kernel
,
GridFullWarps
[
0
],
GridFullWarps
[
1
],
GridFullWarps
[
2
],
// Grid dimension
ThreadsFullWarps
[
0
],
ThreadsFullWarps
[
1
],
ThreadsFullWarps
[
2
],
// Block dimension
0
,
null
,
// Shared memory size and stream (shared - only dynamic, static is in code)
kernelParameters
,
null
);
// Kernel- and extra parameters
cuCtxSynchronize
();
// remove later
}
public
void
execConverCorrectTiles
()
{
public
void
execConverCorrectTiles
()
{
if
(
GPU_CONVERT_CORRECT_TILES_kernel
==
null
)
if
(
GPU_CONVERT_CORRECT_TILES_kernel
==
null
)
{
{
...
@@ -1437,6 +1547,7 @@ public class GPUTileProcessor {
...
@@ -1437,6 +1547,7 @@ public class GPUTileProcessor {
// for (String sourceCode: sourceCodeUnits) {
// for (String sourceCode: sourceCodeUnits) {
for
(
int
cunit
=
0
;
cunit
<
ptxDataUnits
.
length
;
cunit
++)
{
for
(
int
cunit
=
0
;
cunit
<
ptxDataUnits
.
length
;
cunit
++)
{
String
sourceCode
=
sourceCodeUnits
[
cunit
];
String
sourceCode
=
sourceCodeUnits
[
cunit
];
//System.out.print(sourceCode);
// Use the NVRTC to create a program by compiling the source code
// Use the NVRTC to create a program by compiling the source code
nvrtcProgram
program
=
new
nvrtcProgram
();
nvrtcProgram
program
=
new
nvrtcProgram
();
nvrtcCreateProgram
(
program
,
sourceCode
,
null
,
0
,
null
,
null
);
nvrtcCreateProgram
(
program
,
sourceCode
,
null
,
0
,
null
,
null
);
...
...
src/main/java/com/elphel/imagej/tileprocessor/GeometryCorrection.java
View file @
514057c6
...
@@ -156,6 +156,11 @@ public class GeometryCorrection {
...
@@ -156,6 +156,11 @@ public class GeometryCorrection {
(
float
)
disparityRadius
//=150.0; // distance between cameras to normalize disparity units to. sqrt(2)*disparityRadius for quad
(
float
)
disparityRadius
//=150.0; // distance between cameras to normalize disparity units to. sqrt(2)*disparityRadius for quad
};
};
}
}
public
static
int
arrayLength
(
int
ncam
)
{
return
21
+
8
*
ncam
;
}
public
double
[]
toDoubleArray
()
{
// for GPU comparison
public
double
[]
toDoubleArray
()
{
// for GPU comparison
return
new
double
[]
{
return
new
double
[]
{
pixelCorrectionWidth
,
// =2592; // virtual camera center is at (pixelCorrectionWidth/2, pixelCorrectionHeight/2)
pixelCorrectionWidth
,
// =2592; // virtual camera center is at (pixelCorrectionWidth/2, pixelCorrectionHeight/2)
...
@@ -322,6 +327,11 @@ cameraRadius, // average distance from the "mass center" of the sensors to t
...
@@ -322,6 +327,11 @@ cameraRadius, // average distance from the "mass center" of the sensors to t
return
wh
;
return
wh
;
}
}
public
GeometryCorrection
()
{
// just to get the length of toFloatArray()
resetCorrVector
();
}
public
GeometryCorrection
(
double
[]
extrinsic_corr
)
public
GeometryCorrection
(
double
[]
extrinsic_corr
)
{
{
this
.
extrinsic_corr
=
new
CorrVector
(
extrinsic_corr
);
this
.
extrinsic_corr
=
new
CorrVector
(
extrinsic_corr
);
...
@@ -1328,7 +1338,7 @@ cameraRadius, // average distance from the "mass center" of the sensors to t
...
@@ -1328,7 +1338,7 @@ cameraRadius, // average distance from the "mass center" of the sensors to t
public
class
CorrVector
{
public
class
CorrVector
{
static
final
int
LENGTH
=
19
;
// 10;
public
static
final
int
LENGTH
=
19
;
// 10;
static
final
int
LENGTH_ANGLES
=
10
;
static
final
int
LENGTH_ANGLES
=
10
;
static
final
int
TILT_INDEX
=
0
;
static
final
int
TILT_INDEX
=
0
;
static
final
int
AZIMUTH_INDEX
=
3
;
static
final
int
AZIMUTH_INDEX
=
3
;
...
@@ -3307,6 +3317,245 @@ matrix([[-0.125, -0.125, 0.125, 0.125, -0.125, 0.125, -0. , -0. , -0.
...
@@ -3307,6 +3317,245 @@ matrix([[-0.125, -0.125, 0.125, 0.125, -0.125, 0.125, -0. , -0. , -0.
return
pXY
;
return
pXY
;
}
}
public
double
[][]
getPortsCoordinatesAndDerivativesDbg
(
// To print intermediate results for comparison with the GPU
GeometryCorrection
gc_main
,
boolean
use_rig_offsets
,
Matrix
[]
rots
,
Matrix
[][]
deriv_rots
,
double
[][]
pXYderiv
,
// if not null, should be double[8][] - not used here
double
[][]
disp_dist
,
//
double
px
,
double
py
,
double
disparity
)
{
// String dbg_s = corr_vector.toString();
/* Starting with required tile center X, Y and nominal distortion, for each sensor port:
* 1) unapply common distortion (maybe for different - master camera)
* 2) apply disparity
* 3) apply rotations and zoom
* 4) re-apply distortion
* 5) return port center X and Y
* line_time
*/
// moved here so disp_dist and imu will always be created
if
(
disp_dist
==
null
)
{
disp_dist
=
new
double
[
numSensors
][
4
];
}
double
[]
imu
=
null
;
if
(
disp_dist
!=
null
)
{
imu
=
extrinsic_corr
.
getIMU
();
// currently it is common for all channels
if
((
deriv_rots
==
null
)
&&
((
imu
[
0
]
!=
0.0
)
||
(
imu
[
1
]
!=
0.0
)
||(
imu
[
2
]
!=
0.0
))){
deriv_rots
=
extrinsic_corr
.
getRotDeriveMatrices
();
}
}
/// if ((disp_dist == null) && (pXYderiv != null)) {
/// disp_dist = new double [numSensors][4];
/// }
double
[][]
rXY
=
getRXY
(
use_rig_offsets
);
// may include rig offsets
double
[][]
pXY
=
new
double
[
numSensors
][
2
];
double
pXcd
=
px
-
0.5
*
gc_main
.
pixelCorrectionWidth
;
double
pYcd
=
py
-
0.5
*
gc_main
.
pixelCorrectionHeight
;
double
rD
=
Math
.
sqrt
(
pXcd
*
pXcd
+
pYcd
*
pYcd
)*
0.001
*
gc_main
.
pixelSize
;
// distorted radius in a virtual center camera
double
rND2R
=
gc_main
.
getRByRDist
(
rD
/
gc_main
.
distortionRadius
,
(
debugLevel
>
-
1
));
double
pXc
=
pXcd
*
rND2R
;
// non-distorted coordinates relative to the (0.5 * this.pixelCorrectionWidth, 0.5 * this.pixelCorrectionHeight)
double
pYc
=
pYcd
*
rND2R
;
// in pixels
System
.
out
.
println
(
"px="
+
px
+
", py="
+
py
);
System
.
out
.
println
(
"pXcd="
+
pXcd
+
", pYcd="
+
pYcd
);
System
.
out
.
println
(
"rD="
+
rD
+
", rND2R="
+
rND2R
);
System
.
out
.
println
(
"pXc="
+
pXc
+
", pYc="
+
pYc
);
// next radial distortion coefficients are for this, not master camera (may be the same)
double
[]
rad_coeff
={
this
.
distortionC
,
this
.
distortionB
,
this
.
distortionA
,
this
.
distortionA5
,
this
.
distortionA6
,
this
.
distortionA7
,
this
.
distortionA8
};
double
fl_pix
=
focalLength
/(
0.001
*
pixelSize
);
// focal length in pixels - this camera
double
ri_scale
=
0.001
*
this
.
pixelSize
/
this
.
distortionRadius
;
System
.
out
.
println
(
"fl_pix="
+
fl_pix
+
", ri_scale="
+
ri_scale
);
double
[]
xyz
=
(
disparity
>
0
)
?
getWorldCoordinates
(
// USED in lwir
px
,
// double px,
py
,
// double py,
disparity
,
// double disparity,
true
)
:
null
;
// boolean correctDistortions)
System
.
out
.
println
(
"xyz[0]="
+
xyz
[
0
]+
", xyz[1]="
+
xyz
[
1
]+
", xyz[2]="
+
xyz
[
2
]);
for
(
int
i
=
0
;
i
<
numSensors
;
i
++){
// non-distorted XY of the shifted location of the individual sensor
double
pXci0
=
pXc
-
disparity
*
rXY
[
i
][
0
];
// in pixels
double
pYci0
=
pYc
-
disparity
*
rXY
[
i
][
1
];
// rectilinear, end of dealing with possibly other (master) camera, below all is for this camera distortions
System
.
out
.
println
(
"ncam="
+
i
+
": pXci0="
+
pXci0
+
", pYci0="
+
pYci0
);
// Convert a 2-d non-distorted vector to 3d at fl_pix distance in z direction
double
[][]
avi
=
{{
pXci0
},
{
pYci0
},{
fl_pix
}};
Matrix
vi
=
new
Matrix
(
avi
);
// non-distorted sensor channel view vector in pixels (z -along the common axis)
System
.
out
.
println
(
"ncam="
+
i
+
": vi="
);
vi
.
print
(
10
,
5
);
// Apply port-individual combined rotation/zoom matrix
Matrix
rvi
=
rots
[
i
].
times
(
vi
);
System
.
out
.
println
(
"ncam="
+
i
+
": rvi="
);
rvi
.
print
(
10
,
5
);
// get back to the projection plane by normalizing vector
double
norm_z
=
fl_pix
/
rvi
.
get
(
2
,
0
);
double
pXci
=
rvi
.
get
(
0
,
0
)
*
norm_z
;
double
pYci
=
rvi
.
get
(
1
,
0
)
*
norm_z
;
System
.
out
.
println
(
"ncam="
+
i
+
": norm_z="
+
norm_z
+
", pXci="
+
pXci
+
", pYci="
+
pYci
);
// Re-apply distortion
double
rNDi
=
Math
.
sqrt
(
pXci
*
pXci
+
pYci
*
pYci
);
// in pixels
// Rdist/R=A8*R^7+A7*R^6+A6*R^5+A5*R^4+A*R^3+B*R^2+C*R+(1-A6-A7-A6-A5-A-B-C)");
double
ri
=
rNDi
*
ri_scale
;
// relative to distortion radius
// double rD2rND = (1.0 - distortionA8 - distortionA7 - distortionA6 - distortionA5 - distortionA - distortionB - distortionC);
System
.
out
.
println
(
"ncam="
+
i
+
": rNDi="
+
rNDi
+
", ri="
+
ri
);
double
rD2rND
=
1.0
;
double
rri
=
1.0
;
for
(
int
j
=
0
;
j
<
rad_coeff
.
length
;
j
++){
rri
*=
ri
;
rD2rND
+=
rad_coeff
[
j
]*(
rri
-
1.0
);
// Fixed
}
System
.
out
.
println
(
"ncam="
+
i
+
": rri="
+
rri
+
", rD2rND="
+
rD2rND
);
// Get port pixel coordinates by scaling the 2d vector with Rdistorted/Dnondistorted coefficient)
double
pXid
=
pXci
*
rD2rND
;
double
pYid
=
pYci
*
rD2rND
;
System
.
out
.
println
(
"ncam="
+
i
+
": pXid="
+
pXid
+
", pYid="
+
pYid
);
pXY
[
i
][
0
]
=
pXid
+
this
.
pXY0
[
i
][
0
];
pXY
[
i
][
1
]
=
pYid
+
this
.
pXY0
[
i
][
1
];
System
.
out
.
println
(
"pXY["
+
i
+
"][0]="
+
pXY
[
i
][
0
]+
", pXY["
+
i
+
"][1]="
+
pXY
[
i
][
1
]);
// used when calculating derivatives, TODO: combine calculations !
double
drD2rND_dri
=
0.0
;
Matrix
drvi_daz
=
null
;
Matrix
drvi_dtl
=
null
;
Matrix
drvi_drl
=
null
;
double
dpXci_dazimuth
=
0.0
;
double
dpYci_dazimuth
=
0.0
;
double
dpXci_dtilt
=
0.0
;
double
dpYci_dtilt
=
0.0
;
double
dpXci_droll
=
0.0
;
double
dpYci_droll
=
0.0
;
if
((
disp_dist
!=
null
)
||
(
pXYderiv
!=
null
))
{
rri
=
1.0
;
for
(
int
j
=
0
;
j
<
rad_coeff
.
length
;
j
++){
drD2rND_dri
+=
rad_coeff
[
j
]
*
(
j
+
1
)
*
rri
;
rri
*=
ri
;
}
if
(
deriv_rots
!=
null
)
{
// needed for derivatives and IMU
drvi_daz
=
deriv_rots
[
i
][
0
].
times
(
vi
);
drvi_dtl
=
deriv_rots
[
i
][
1
].
times
(
vi
);
drvi_drl
=
deriv_rots
[
i
][
2
].
times
(
vi
);
System
.
out
.
println
(
"ncam="
+
i
+
": drvi_daz="
);
drvi_daz
.
print
(
10
,
5
);
System
.
out
.
println
(
"ncam="
+
i
+
": drvi_dtl="
);
drvi_dtl
.
print
(
10
,
5
);
System
.
out
.
println
(
"ncam="
+
i
+
": drvi_drl="
);
drvi_drl
.
print
(
10
,
5
);
dpXci_dazimuth
=
drvi_daz
.
get
(
0
,
0
)
*
norm_z
-
pXci
*
drvi_daz
.
get
(
2
,
0
)
/
rvi
.
get
(
2
,
0
);
dpYci_dazimuth
=
drvi_daz
.
get
(
1
,
0
)
*
norm_z
-
pYci
*
drvi_daz
.
get
(
2
,
0
)
/
rvi
.
get
(
2
,
0
);
dpXci_dtilt
=
drvi_dtl
.
get
(
0
,
0
)
*
norm_z
-
pXci
*
drvi_dtl
.
get
(
2
,
0
)
/
rvi
.
get
(
2
,
0
);
dpYci_dtilt
=
drvi_dtl
.
get
(
1
,
0
)
*
norm_z
-
pYci
*
drvi_dtl
.
get
(
2
,
0
)
/
rvi
.
get
(
2
,
0
);
dpXci_droll
=
drvi_drl
.
get
(
0
,
0
)
*
norm_z
-
pXci
*
drvi_drl
.
get
(
2
,
0
)
/
rvi
.
get
(
2
,
0
);
dpYci_droll
=
drvi_drl
.
get
(
1
,
0
)
*
norm_z
-
pYci
*
drvi_drl
.
get
(
2
,
0
)
/
rvi
.
get
(
2
,
0
);
System
.
out
.
println
(
"ncam="
+
i
+
": dpXci_dazimuth="
+
dpXci_dazimuth
+
", dpYci_dazimuth="
+
dpYci_dazimuth
);
System
.
out
.
println
(
"ncam="
+
i
+
": dpXci_dtilt="
+
dpXci_dtilt
+
", dpYci_dtilt="
+
dpYci_dtilt
);
System
.
out
.
println
(
"ncam="
+
i
+
": dpXci_droll="
+
dpXci_droll
+
", dpYci_droll="
+
dpYci_droll
);
}
}
double
delta_t
=
0.0
;
// double [] imu = null;
double
[][]
dpXci_pYci_imu_lin
=
new
double
[
2
][
3
];
// null
if
(
disp_dist
!=
null
)
{
disp_dist
[
i
]
=
new
double
[
4
];
// dx/d_disp, dx_d_ccw_disp
// Not clear - what should be in Z direction before rotation here?
double
[][]
add0
=
{
{-
rXY
[
i
][
0
],
rXY
[
i
][
1
],
0.0
},
{-
rXY
[
i
][
1
],
-
rXY
[
i
][
0
],
0.0
},
{
0.0
,
0.0
,
0.0
}};
// what is last element???
Matrix
dd0
=
new
Matrix
(
add0
);
Matrix
dd1
=
rots
[
i
].
times
(
dd0
).
getMatrix
(
0
,
1
,
0
,
1
).
times
(
norm_z
);
// get top left 2x2 sub-matrix
//// Matrix dd1 = dd0.getMatrix(0, 1,0,1); // get top left 2x2 sub-matrix
// now first column of 2x2 dd1 - x, y components of derivatives by disparity, second column - derivatives by ortho to disparity (~Y in 2d correlation)
// unity vector in the direction of radius
System
.
out
.
println
(
"ncam="
+
i
+
": dd1="
);
dd1
.
print
(
10
,
5
);
double
c_dist
=
pXci
/
rNDi
;
double
s_dist
=
pYci
/
rNDi
;
double
[][]
arot2
=
{
{
c_dist
,
s_dist
},
{-
s_dist
,
c_dist
}};
Matrix
rot2
=
new
Matrix
(
arot2
);
// convert from non-distorted X,Y to parallel and perpendicular (CCW) to the radius
System
.
out
.
println
(
"ncam="
+
i
+
": rot2="
);
rot2
.
print
(
10
,
5
);
double
[][]
ascale_distort
=
{
{
rD2rND
+
ri
*
drD2rND_dri
,
0
},
{
0
,
rD2rND
}};
Matrix
scale_distort
=
new
Matrix
(
ascale_distort
);
// scale component parallel to radius as distortion derivative, perpendicular - as distortion
Matrix
dd2
=
rot2
.
transpose
().
times
(
scale_distort
).
times
(
rot2
).
times
(
dd1
);
System
.
out
.
println
(
"ncam="
+
i
+
": scale_distortXrot2Xdd1="
);
scale_distort
.
times
(
rot2
).
times
(
dd1
).
print
(
10
,
5
);
System
.
out
.
println
(
"ncam="
+
i
+
": dd2="
);
dd2
.
print
(
10
,
5
);
disp_dist
[
i
][
0
]
=
dd2
.
get
(
0
,
0
);
disp_dist
[
i
][
1
]
=
dd2
.
get
(
0
,
1
);
disp_dist
[
i
][
2
]
=
dd2
.
get
(
1
,
0
);
// d_py/d_disp
disp_dist
[
i
][
3
]
=
dd2
.
get
(
1
,
1
);
System
.
out
.
println
(
"disp_dist["
+
i
+
"][0]="
+
disp_dist
[
i
][
0
]);
System
.
out
.
println
(
"disp_dist["
+
i
+
"][1]="
+
disp_dist
[
i
][
1
]);
System
.
out
.
println
(
"disp_dist["
+
i
+
"][2]="
+
disp_dist
[
i
][
2
]);
System
.
out
.
println
(
"disp_dist["
+
i
+
"][3]="
+
disp_dist
[
i
][
3
]);
// imu = extrinsic_corr.getIMU(i); // currently it is common for all channels
// ERS linear does not yet use per-port rotations, probably not needed
// double [][] dpXci_pYci_imu_lin = new double[2][3]; // null
if
((
imu
!=
null
)
&&((
imu
[
0
]
!=
0.0
)
||
(
imu
[
1
]
!=
0.0
)
||(
imu
[
2
]
!=
0.0
)
||(
imu
[
3
]
!=
0.0
)
||(
imu
[
4
]
!=
0.0
)
||(
imu
[
5
]
!=
0.0
)))
{
delta_t
=
dd2
.
get
(
1
,
0
)
*
disparity
*
line_time
;
// positive for top cameras, negative - for bottom
double
ers_Xci
=
delta_t
*
(
dpXci_dtilt
*
imu
[
0
]
+
dpXci_dazimuth
*
imu
[
1
]
+
dpXci_droll
*
imu
[
2
]);
double
ers_Yci
=
delta_t
*
(
dpYci_dtilt
*
imu
[
0
]
+
dpYci_dazimuth
*
imu
[
1
]
+
dpYci_droll
*
imu
[
2
]);
if
(
xyz
!=
null
)
{
double
k
=
SCENE_UNITS_SCALE
*
this
.
disparityRadius
;
double
wdisparity
=
disparity
;
double
dwdisp_dz
=
(
k
*
this
.
focalLength
/
(
0.001
*
this
.
pixelSize
))
/
(
xyz
[
2
]
*
xyz
[
2
]);
System
.
out
.
println
(
"ncam="
+
i
+
": k="
+
k
+
", wdisparity="
+
wdisparity
+
", dwdisp_dz="
+
dwdisp_dz
);
dpXci_pYci_imu_lin
[
0
][
0
]
=
-
wdisparity
/
k
;
// dpx/ dworld_X
dpXci_pYci_imu_lin
[
1
][
1
]
=
wdisparity
/
k
;
// dpy/ dworld_Y
dpXci_pYci_imu_lin
[
0
][
2
]
=
(
xyz
[
0
]
/
k
)
*
dwdisp_dz
;
// dpx/ dworld_Z
dpXci_pYci_imu_lin
[
1
][
2
]
=
(
xyz
[
1
]
/
k
)
*
dwdisp_dz
;
// dpy/ dworld_Z
System
.
out
.
println
(
"ncam="
+
i
+
": dpXci_pYci_imu_lin[0][0]="
+
dpXci_pYci_imu_lin
[
0
][
0
]+
", dpXci_pYci_imu_lin[0][2]="
+
dpXci_pYci_imu_lin
[
0
][
2
]);
System
.
out
.
println
(
"ncam="
+
i
+
": dpXci_pYci_imu_lin[1][1]="
+
dpXci_pYci_imu_lin
[
1
][
1
]+
", dpXci_pYci_imu_lin[1][2]="
+
dpXci_pYci_imu_lin
[
1
][
2
]);
ers_Xci
+=
delta_t
*
(
dpXci_pYci_imu_lin
[
0
][
0
]
*
imu
[
3
]
+
dpXci_pYci_imu_lin
[
0
][
2
]
*
imu
[
5
]);
ers_Yci
+=
delta_t
*
(
dpXci_pYci_imu_lin
[
1
][
1
]
*
imu
[
4
]
+
dpXci_pYci_imu_lin
[
1
][
2
]
*
imu
[
5
]);
System
.
out
.
println
(
"ncam="
+
i
+
": ers_Xci="
+
ers_Xci
+
", ers_Yci="
+
ers_Yci
);
}
pXY
[
i
][
0
]
+=
ers_Xci
*
rD2rND
;
// added correction to pixel X
pXY
[
i
][
1
]
+=
ers_Yci
*
rD2rND
;
// added correction to pixel Y
System
.
out
.
println
(
"pXY["
+
i
+
"][0]="
+
pXY
[
i
][
0
]+
", pXY["
+
i
+
"][1]="
+
pXY
[
i
][
1
]);
}
else
{
imu
=
null
;
}
// TODO: calculate derivatives of pX, pY by 3 imu omegas
}
}
return
pXY
;
}
// private Matrix m_balance_xy = null; // [2*numSensors][2*numSensors] 8x8 matrix to make XY ports correction to have average == 0
// private Matrix m_balance_xy = null; // [2*numSensors][2*numSensors] 8x8 matrix to make XY ports correction to have average == 0
// private Matrix m_balance_dd = null; // [2*numSensors+1)][2*numSensors] 9x8 matrix to extract disparity from dd
// private Matrix m_balance_dd = null; // [2*numSensors+1)][2*numSensors] 9x8 matrix to extract disparity from dd
...
...
src/main/java/com/elphel/imagej/tileprocessor/ImageDtt.java
View file @
514057c6
...
@@ -9705,6 +9705,23 @@ public class ImageDtt {
...
@@ -9705,6 +9705,23 @@ public class ImageDtt {
centerX
,
centerX
,
centerY
,
centerY
,
disparity_aux
);
// + disparity_corr);
disparity_aux
);
// + disparity_corr);
if
((
tileX
==
debug_tileX
)
&&
(
tileY
==
debug_tileY
))
{
// will just print debug data
geometryCorrection_main
.
getPortsCoordinatesAndDerivativesDbg
(
geometryCorrection_main
,
// GeometryCorrection gc_main,
false
,
// boolean use_rig_offsets,
corr_rots_main
,
// Matrix [] rots,
null
,
// Matrix [][] deriv_rots,
null
,
// double [][] pXYderiv, // if not null, should be double[8][]
disp_dist_main
,
// used to correct 3D correlations
centerX
,
centerY
,
disparity_main
);
// + disparity_corr);
}
// acquisition time of the tiles centers in scanline times
// acquisition time of the tiles centers in scanline times
if
(
ers_delay
!=
null
)
{
if
(
ers_delay
!=
null
)
{
for
(
int
i
=
0
;
i
<
quad_main
;
i
++)
ers_delay
[
0
][
i
][
nTile
]
=
centersXY_main
[
i
][
1
]-
geometryCorrection_main
.
woi_tops
[
i
];
for
(
int
i
=
0
;
i
<
quad_main
;
i
++)
ers_delay
[
0
][
i
][
nTile
]
=
centersXY_main
[
i
][
1
]-
geometryCorrection_main
.
woi_tops
[
i
];
...
...
src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
View file @
514057c6
...
@@ -2018,7 +2018,8 @@ public class TwoQuadCLT {
...
@@ -2018,7 +2018,8 @@ public class TwoQuadCLT {
clt_parameters
.
gpu_woi_twidth
,
clt_parameters
.
gpu_woi_twidth
,
clt_parameters
.
gpu_woi_theight
);
clt_parameters
.
gpu_woi_theight
);
GPUTileProcessor
.
TpTask
[]
tp_tasks
=
gPUTileProcessor
.
setFullFrameImages
(
GPUTileProcessor
.
TpTask
[]
tp_tasks
=
gPUTileProcessor
.
setFullFrameImages
(
twoi
,
// Rectangle woi,
false
,
// boolean calc_offsets, // old way, now not needed with GPU calculation
twoi
,
// Rectangle woi,
clt_parameters
.
gpu_woi_round
,
// boolean round_woi,
clt_parameters
.
gpu_woi_round
,
// boolean round_woi,
(
float
)
clt_parameters
.
disparity
,
// float target_disparity, // apply same disparity to all tiles
(
float
)
clt_parameters
.
disparity
,
// float target_disparity, // apply same disparity to all tiles
0xf
,
// int out_image, // from which tiles to generate image (currently 0/1)
0xf
,
// int out_image, // from which tiles to generate image (currently 0/1)
...
@@ -2031,7 +2032,6 @@ public class TwoQuadCLT {
...
@@ -2031,7 +2032,6 @@ public class TwoQuadCLT {
threadsMax
,
// final int threadsMax, // maximal number of threads to launch
threadsMax
,
// final int threadsMax, // maximal number of threads to launch
debugLevel
);
// final int debugLevel)
debugLevel
);
// final int debugLevel)
// Optionally save offsets here?
// Optionally save offsets here?
// EyesisCorrectionParameters.CorrectionParameters ecp,
// EyesisCorrectionParameters.CorrectionParameters ecp,
boolean
save_ports_xy
=
false
;
// true; Same files as saved with the kernels
boolean
save_ports_xy
=
false
;
// true; Same files as saved with the kernels
...
@@ -2087,6 +2087,8 @@ public class TwoQuadCLT {
...
@@ -2087,6 +2087,8 @@ public class TwoQuadCLT {
tp_tasks
);
tp_tasks
);
gPUTileProcessor
.
setTextureIndices
(
gPUTileProcessor
.
setTextureIndices
(
texture_indices
);
texture_indices
);
gPUTileProcessor
.
setGeometryCorrection
(
quadCLT_main
.
getGeometryCorrection
());
// once
gPUTileProcessor
.
setExtrinsicsVector
(
quadCLT_main
.
getGeometryCorrection
().
getCorrVector
());
// for each new image
// TODO: calculate from the camera geometry?
// TODO: calculate from the camera geometry?
double
[][]
port_offsets
=
{
// used only in textures to scale differences
double
[][]
port_offsets
=
{
// used only in textures to scale differences
...
@@ -2099,7 +2101,20 @@ public class TwoQuadCLT {
...
@@ -2099,7 +2101,20 @@ public class TwoQuadCLT {
int
NREPEAT
=
1
;
// 00;
int
NREPEAT
=
1
;
// 00;
System
.
out
.
println
(
"\n------------ Running GPU "
+
NREPEAT
+
" times ----------------"
);
System
.
out
.
println
(
"\n------------ Running GPU "
+
NREPEAT
+
" times ----------------"
);
long
startGPU
=
System
.
nanoTime
();
long
startGPU
=
System
.
nanoTime
();
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
gPUTileProcessor
.
execConverCorrectTiles
();
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
gPUTileProcessor
.
execRotDerivs
();
}
long
startTasksSetup
=
System
.
nanoTime
();
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
gPUTileProcessor
.
execSetTilesOffsets
();
}
long
startDirectConvert
=
System
.
nanoTime
();
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
gPUTileProcessor
.
execConverCorrectTiles
();
}
// run imclt;
// run imclt;
long
startIMCLT
=
System
.
nanoTime
();
long
startIMCLT
=
System
.
nanoTime
();
...
@@ -2145,18 +2160,26 @@ public class TwoQuadCLT {
...
@@ -2145,18 +2160,26 @@ public class TwoQuadCLT {
long
endTexturesRBGA
=
System
.
nanoTime
();
long
endTexturesRBGA
=
System
.
nanoTime
();
long
endGPUTime
=
System
.
nanoTime
();
long
endGPUTime
=
System
.
nanoTime
();
long
firstGPUTime
=
(
startIMCLT
-
startGPU
)
/
NREPEAT
;
long
runImcltTime
=
(
endImcltTime
-
startIMCLT
)
/
NREPEAT
;
long
rotDerivsTime
=
(
startTasksSetup
-
startGPU
)
/
NREPEAT
;
long
runCorr2DTime
=
(
endCorr2d
-
startCorr2d
)
/
NREPEAT
;
long
tasksSetupTime
=
(
startDirectConvert
-
startTasksSetup
)
/
NREPEAT
;
long
runTexturesTime
=
(
endTextures
-
startTextures
)
/
NREPEAT
;
long
firstGPUTime
=
(
startIMCLT
-
startDirectConvert
)
/
NREPEAT
;
long
runTexturesRBGATime
=
(
endTexturesRBGA
-
startTexturesRBGA
)/
NREPEAT
;
long
runImcltTime
=
(
endImcltTime
-
startIMCLT
)
/
NREPEAT
;
long
runGPUTime
=
(
endGPUTime
-
startGPU
)
/
NREPEAT
;
long
runCorr2DTime
=
(
endCorr2d
-
startCorr2d
)
/
NREPEAT
;
long
runTexturesTime
=
(
endTextures
-
startTextures
)
/
NREPEAT
;
long
runTexturesRBGATime
=
(
endTexturesRBGA
-
startTexturesRBGA
)
/
NREPEAT
;
long
runGPUTime
=
(
endGPUTime
-
startGPU
)
/
NREPEAT
;
// run corr2d
// run corr2d
System
.
out
.
println
(
"\n------------ End of running GPU "
+
NREPEAT
+
" times ----------------"
);
System
.
out
.
println
(
"\n------------ End of running GPU "
+
NREPEAT
+
" times ----------------"
);
System
.
out
.
println
(
"GPU run time ="
+(
runGPUTime
*
1.0
e
-
6
)+
"ms, (direct conversion: "
+(
firstGPUTime
*
1.0
e
-
6
)+
"ms, imclt: "
+
System
.
out
.
println
(
"GPU run time ="
+
(
runGPUTime
*
1.0
e
-
6
)+
"ms"
);
(
runImcltTime
*
1.0
e
-
6
)+
"ms), corr2D: "
+(
runCorr2DTime
*
1.0
e
-
6
)+
"ms), textures: "
+(
runTexturesTime
*
1.0
e
-
6
)+
"ms, RGBA: "
+
System
.
out
.
println
(
" - rot/derivs: "
+(
rotDerivsTime
*
1.0
e
-
6
)+
"ms"
);
(
runTexturesRBGATime
*
1.0
e
-
6
)+
"ms"
);
System
.
out
.
println
(
" - tasks setup: "
+(
tasksSetupTime
*
1.0
e
-
6
)+
"ms"
);
System
.
out
.
println
(
" - direct conversion: "
+(
firstGPUTime
*
1.0
e
-
6
)+
"ms"
);
System
.
out
.
println
(
" - imclt: "
+(
runImcltTime
*
1.0
e
-
6
)+
"ms"
);
System
.
out
.
println
(
" - corr2D: "
+(
runCorr2DTime
*
1.0
e
-
6
)+
"ms"
);
System
.
out
.
println
(
" - textures: "
+(
runTexturesTime
*
1.0
e
-
6
)+
"ms"
);
System
.
out
.
println
(
" - RGBA: "
+(
runTexturesRBGATime
*
1.0
e
-
6
)+
"ms"
);
// get data back from GPU
// get data back from GPU
float
[][][]
iclt_fimg
=
new
float
[
GPUTileProcessor
.
NUM_CAMS
][][];
float
[][][]
iclt_fimg
=
new
float
[
GPUTileProcessor
.
NUM_CAMS
][][];
for
(
int
ncam
=
0
;
ncam
<
iclt_fimg
.
length
;
ncam
++)
{
for
(
int
ncam
=
0
;
ncam
<
iclt_fimg
.
length
;
ncam
++)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment