Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
20df596a
Commit
20df596a
authored
Apr 16, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
changing direct conversion to CDP, handling sparse tasks
parent
0bb31239
Changes
7
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
436 additions
and
344 deletions
+436
-344
GPUTileProcessor.java
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+95
-105
GeometryCorrection.java
...a/com/elphel/imagej/tileprocessor/GeometryCorrection.java
+0
-4
TwoQuadCLT.java
...main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
+19
-13
TileProcessor.cuh
src/main/resources/kernels/TileProcessor.cuh
+215
-7
TileProcessor.h
src/main/resources/kernels/TileProcessor.h
+36
-13
geometry_correction.cu
src/main/resources/kernels/geometry_correction.cu
+65
-197
geometry_correction.h
src/main/resources/kernels/geometry_correction.h
+6
-5
No files found.
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
View file @
20df596a
This diff is collapsed.
Click to expand it.
src/main/java/com/elphel/imagej/tileprocessor/GeometryCorrection.java
View file @
20df596a
...
@@ -4210,10 +4210,6 @@ matrix([[-0.125, -0.125, 0.125, 0.125, -0.125, 0.125, -0. , -0. , -0.
...
@@ -4210,10 +4210,6 @@ matrix([[-0.125, -0.125, 0.125, 0.125, -0.125, 0.125, -0. , -0. , -0.
double
minDerivative
=
0.01
;
double
minDerivative
=
0.01
;
int
numIterations
=
1000
;
int
numIterations
=
1000
;
double
drDistDr
=
1.0
;
double
drDistDr
=
1.0
;
// public double distortionA5=0.0; //r^5 (normalized to focal length or to sensor half width?)
// public double distortionA=0.0; // r^4 (normalized to focal length or to sensor half width?)
// public double distortionB=0.0; // r^3
// public double distortionC=0.0; // r^2
boolean
use8
=(
this
.
distortionA8
!=
0.0
)
||
(
this
.
distortionA7
!=
0.0
)
||
(
this
.
distortionA6
!=
0.0
);
boolean
use8
=(
this
.
distortionA8
!=
0.0
)
||
(
this
.
distortionA7
!=
0.0
)
||
(
this
.
distortionA6
!=
0.0
);
double
d
=
1.0
-
this
.
distortionA8
-
this
.
distortionA7
-
this
.
distortionA6
-
this
.
distortionA5
-
this
.
distortionA
-
this
.
distortionB
-
this
.
distortionC
;
double
d
=
1.0
-
this
.
distortionA8
-
this
.
distortionA7
-
this
.
distortionA6
-
this
.
distortionA5
-
this
.
distortionA
-
this
.
distortionB
-
this
.
distortionC
;
double
rPrev
=
0.0
;
double
rPrev
=
0.0
;
...
...
src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
View file @
20df596a
...
@@ -2087,7 +2087,9 @@ public class TwoQuadCLT {
...
@@ -2087,7 +2087,9 @@ public class TwoQuadCLT {
tp_tasks
);
tp_tasks
);
gPUTileProcessor
.
setTextureIndices
(
gPUTileProcessor
.
setTextureIndices
(
texture_indices
);
texture_indices
);
gPUTileProcessor
.
setGeometryCorrection
(
quadCLT_main
.
getGeometryCorrection
());
// once
gPUTileProcessor
.
setGeometryCorrection
(
quadCLT_main
.
getGeometryCorrection
(),
false
);
// boolean use_java_rByRDist) { // false - use newer GPU execCalcReverseDistortions); // once
gPUTileProcessor
.
setExtrinsicsVector
(
quadCLT_main
.
getGeometryCorrection
().
getCorrVector
());
// for each new image
gPUTileProcessor
.
setExtrinsicsVector
(
quadCLT_main
.
getGeometryCorrection
().
getCorrVector
());
// for each new image
// TODO: calculate from the camera geometry?
// TODO: calculate from the camera geometry?
...
@@ -2101,6 +2103,10 @@ public class TwoQuadCLT {
...
@@ -2101,6 +2103,10 @@ public class TwoQuadCLT {
int
NREPEAT
=
1
;
// 00;
int
NREPEAT
=
1
;
// 00;
System
.
out
.
println
(
"\n------------ Running GPU "
+
NREPEAT
+
" times ----------------"
);
System
.
out
.
println
(
"\n------------ Running GPU "
+
NREPEAT
+
" times ----------------"
);
long
startGPU
=
System
.
nanoTime
();
long
startGPU
=
System
.
nanoTime
();
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
gPUTileProcessor
.
execCalcReverseDistortions
();
}
long
startRotDerivs
=
System
.
nanoTime
();
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
gPUTileProcessor
.
execRotDerivs
();
gPUTileProcessor
.
execRotDerivs
();
}
}
...
@@ -2113,13 +2119,12 @@ public class TwoQuadCLT {
...
@@ -2113,13 +2119,12 @@ public class TwoQuadCLT {
long
startDirectConvert
=
System
.
nanoTime
();
long
startDirectConvert
=
System
.
nanoTime
();
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
gPUTileProcessor
.
execConver
CorrectTiles
();
gPUTileProcessor
.
execConver
Direct
();
}
}
// run imclt;
// run imclt;
long
startIMCLT
=
System
.
nanoTime
();
long
startIMCLT
=
System
.
nanoTime
();
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
// gPUTileProcessor.execImcltRbg(quadCLT_main.isMonochrome());
gPUTileProcessor
.
execImcltRbgAll
(
quadCLT_main
.
isMonochrome
());
gPUTileProcessor
.
execImcltRbgAll
(
quadCLT_main
.
isMonochrome
());
}
}
long
endImcltTime
=
System
.
nanoTime
();
long
endImcltTime
=
System
.
nanoTime
();
...
@@ -2159,10 +2164,10 @@ public class TwoQuadCLT {
...
@@ -2159,10 +2164,10 @@ public class TwoQuadCLT {
clt_parameters
.
min_agree
,
// double min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
clt_parameters
.
min_agree
,
// double min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
clt_parameters
.
dust_remove
);
// boolean dust_remove,
clt_parameters
.
dust_remove
);
// boolean dust_remove,
long
endTexturesRBGA
=
System
.
nanoTime
();
long
endTexturesRBGA
=
System
.
nanoTime
();
long
endGPUTime
=
System
.
nanoTime
();
long
endGPUTime
=
System
.
nanoTime
();
long
rotDerivsTime
=
(
startTasksSetup
-
startGPU
)
/
NREPEAT
;
long
calcReverseTime
=
(
startRotDerivs
-
startGPU
)
/
NREPEAT
;
long
rotDerivsTime
=
(
startTasksSetup
-
startRotDerivs
)
/
NREPEAT
;
long
tasksSetupTime
=
(
startDirectConvert
-
startTasksSetup
)
/
NREPEAT
;
long
tasksSetupTime
=
(
startDirectConvert
-
startTasksSetup
)
/
NREPEAT
;
long
firstGPUTime
=
(
startIMCLT
-
startDirectConvert
)
/
NREPEAT
;
long
firstGPUTime
=
(
startIMCLT
-
startDirectConvert
)
/
NREPEAT
;
long
runImcltTime
=
(
endImcltTime
-
startIMCLT
)
/
NREPEAT
;
long
runImcltTime
=
(
endImcltTime
-
startIMCLT
)
/
NREPEAT
;
...
@@ -2171,9 +2176,10 @@ public class TwoQuadCLT {
...
@@ -2171,9 +2176,10 @@ public class TwoQuadCLT {
long
runTexturesRBGATime
=
(
endTexturesRBGA
-
startTexturesRBGA
)
/
NREPEAT
;
long
runTexturesRBGATime
=
(
endTexturesRBGA
-
startTexturesRBGA
)
/
NREPEAT
;
long
runGPUTime
=
(
endGPUTime
-
startGPU
)
/
NREPEAT
;
long
runGPUTime
=
(
endGPUTime
-
startGPU
)
/
NREPEAT
;
// run corr2d
// run corr2d
//RotDerivs
System
.
out
.
println
(
"\n------------ End of running GPU "
+
NREPEAT
+
" times ----------------"
);
System
.
out
.
println
(
"\n------------ End of running GPU "
+
NREPEAT
+
" times ----------------"
);
System
.
out
.
println
(
"GPU run time ="
+
(
runGPUTime
*
1.0
e
-
6
)+
"ms"
);
System
.
out
.
println
(
"GPU run time ="
+
(
runGPUTime
*
1.0
e
-
6
)+
"ms"
);
System
.
out
.
println
(
" - calc reverse dist.: "
+(
calcReverseTime
*
1.0
e
-
6
)+
"ms"
);
System
.
out
.
println
(
" - rot/derivs: "
+(
rotDerivsTime
*
1.0
e
-
6
)+
"ms"
);
System
.
out
.
println
(
" - rot/derivs: "
+(
rotDerivsTime
*
1.0
e
-
6
)+
"ms"
);
System
.
out
.
println
(
" - tasks setup: "
+(
tasksSetupTime
*
1.0
e
-
6
)+
"ms"
);
System
.
out
.
println
(
" - tasks setup: "
+(
tasksSetupTime
*
1.0
e
-
6
)+
"ms"
);
System
.
out
.
println
(
" - direct conversion: "
+(
firstGPUTime
*
1.0
e
-
6
)+
"ms"
);
System
.
out
.
println
(
" - direct conversion: "
+(
firstGPUTime
*
1.0
e
-
6
)+
"ms"
);
...
...
src/main/resources/kernels/TileProcessor.cuh
View file @
20df596a
This diff is collapsed.
Click to expand it.
src/main/resources/kernels/TileProcessor.h
View file @
20df596a
...
@@ -41,9 +41,14 @@
...
@@ -41,9 +41,14 @@
#include "tp_defines.h"
#include "tp_defines.h"
#endif
#endif
extern
"C"
__global__
void
index_direct
(
struct
tp_task
*
gpu_tasks
,
int
num_tiles
,
// number of tiles in task
int
*
active_tiles
,
// pointer to the calculated number of non-zero tiles
int
*
num_active_tiles
);
// indices to gpu_tasks // should be initialized to zero
extern
"C"
extern
"C"
__global__
void
convert_direct
(
// called with a single block, CONVERT_DIRECT_INDEXING_THREADS threads
__global__
void
convert_correct_tiles
(
// struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct parameters
float
**
gpu_kernel_offsets
,
// [NUM_CAMS],
float
**
gpu_kernel_offsets
,
// [NUM_CAMS],
float
**
gpu_kernels
,
// [NUM_CAMS],
float
**
gpu_kernels
,
// [NUM_CAMS],
float
**
gpu_images
,
// [NUM_CAMS],
float
**
gpu_images
,
// [NUM_CAMS],
...
@@ -51,6 +56,24 @@ __global__ void convert_correct_tiles(
...
@@ -51,6 +56,24 @@ __global__ void convert_correct_tiles(
float
**
gpu_clt
,
// [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt
,
// [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t
dstride
,
// in floats (pixels)
size_t
dstride
,
// in floats (pixels)
int
num_tiles
,
// number of tiles in task
int
num_tiles
,
// number of tiles in task
int
lpf_mask
,
// apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
int
woi_width
,
int
woi_height
,
int
kernels_hor
,
int
kernels_vert
,
int
*
gpu_active_tiles
,
// pointer to the calculated number of non-zero tiles
int
*
pnum_active_tiles
);
// indices to gpu_tasks
extern
"C"
__global__
void
convert_correct_tiles
(
float
**
gpu_kernel_offsets
,
// [NUM_CAMS],
float
**
gpu_kernels
,
// [NUM_CAMS],
float
**
gpu_images
,
// [NUM_CAMS],
struct
tp_task
*
gpu_tasks
,
int
*
gpu_active_tiles
,
// indices in gpu_tasks to non-zero tiles
int
num_active_tiles
,
// number of tiles in task
float
**
gpu_clt
,
// [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t
dstride
,
// in floats (pixels)
// int num_tiles, // number of tiles in task
int
lpf_mask
,
// apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
int
lpf_mask
,
// apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
int
woi_width
,
int
woi_width
,
int
woi_height
,
int
woi_height
,
...
...
src/main/resources/kernels/geometry_correction.cu
View file @
20df596a
...
@@ -62,6 +62,8 @@ __device__ void printExtrinsicCorrection(corr_vector * cv);
...
@@ -62,6 +62,8 @@ __device__ void printExtrinsicCorrection(corr_vector * cv);
inline
__device__
float
getRByRDist
(
float
rDist
,
inline
__device__
float
getRByRDist
(
float
rDist
,
float
rByRDist
[
RBYRDIST_LEN
]);
//shared memory
float
rByRDist
[
RBYRDIST_LEN
]);
//shared memory
__constant__
float
ROTS_TEMPLATE
[
7
][
3
][
3
][
3
]
=
{
// ...{cos,sin,const}...
__constant__
float
ROTS_TEMPLATE
[
7
][
3
][
3
][
3
]
=
{
// ...{cos,sin,const}...
{
// azimuth
{
// azimuth
{{
1
,
0
,
0
},{
0
,
0
,
0
},{
0
,
-
1
,
0
}},
{{
1
,
0
,
0
},{
0
,
0
,
0
},{
0
,
-
1
,
0
}},
...
@@ -116,201 +118,6 @@ __constant__ int mm_seq [3][3][3]={
...
@@ -116,201 +118,6 @@ __constant__ int mm_seq [3][3][3]={
{
-
1
,
-
1
,
-
1
}
// do nothing
{
-
1
,
-
1
,
-
1
}
// do nothing
}};
}};
#if 0
__device__ float rot_matrices [NUM_CAMS][3][3];
//__device__ float rot_deriv_matrices [NUM_CAMS][4][3][3]; // /d_azimuth, /d_tilt, /d_roll, /d_zoom)
// threads (3,3,4)
extern "C" __global__ void calc_rot_matrices(
struct corr_vector * gpu_correction_vector)
{
__shared__ float zoom [NUM_CAMS];
__shared__ float sincos [NUM_CAMS][3][2]; // {az,tilt,roll, d_az, d_tilt, d_roll, d_az}{cos,sin}
__shared__ float matrices[NUM_CAMS][4][3][3]; // [7] - extra
float angle;
int ncam = threadIdx.z;
int nangle1 = threadIdx.x + threadIdx.y * blockDim.x; // * >> 1;
int nangle = nangle1 >> 1;
int is_sin = nangle1 & 1;
#ifdef DEBUG20a
if ((threadIdx.x == 0) && ( threadIdx.y == 0) && ( threadIdx.z == 0)){
printf("\nget_tiles_offsets() threadIdx.x = %d, blockIdx.x= %d\n", (int)threadIdx.x, (int) blockIdx.x);
printExtrinsicCorrection(gpu_correction_vector);
}
__syncthreads();// __syncwarp();
#endif // DEBUG20
if (nangle < 4){ // this part only for 1-st 3
float* gangles =
(nangle ==0)?gpu_correction_vector->azimuth:(
(nangle ==1)?gpu_correction_vector->tilt:(
(nangle ==2)?gpu_correction_vector->roll:
gpu_correction_vector->zoom));
if ((ncam < (NUM_CAMS -1)) || (nangle == 2)){ // for rolls - all 4
angle = *(gangles + ncam);
} else {
angle = 0.0f;
#pragma unroll
for (int n = 0; n < (NUM_CAMS-1); n++){
angle -= *(gangles + n);
}
}
if (!is_sin){
angle += M_PI/2;
}
if (nangle < 3) {
sincos[ncam][nangle][is_sin]=sinf(angle);
} else if (is_sin){
zoom[ncam] = angle;
}
}
__syncthreads();
#ifdef DEBUG20a
if ((threadIdx.x == 0) && (threadIdx.y == 0) && (threadIdx.z == 0)){
for (int n = 0; n < NUM_CAMS; n++){
printf("\n Azimuth matrix for camera %d, sincos[0] = %f, sincos[1] = %f, zoom = %f\n", n, sincos[n][0][0], sincos[n][0][1], zoom[n]);
printf(" Tilt matrix for camera %d, sincos[0] = %f, sincos[0] = %f\n", n, sincos[n][1][0], sincos[n][1][1]);
printf(" Roll matrix for camera %d, sincos[0] = %f, sincos[2] = %f\n", n, sincos[n][2][0], sincos[n][2][1]);
}
}
__syncthreads();// __syncwarp();
#endif // DEBUG20
if (nangle == 3) {
sincos[ncam][2][is_sin] *= (1.0 + zoom[ncam]); // modify roll
}
__syncthreads();
#ifdef DEBUG20a
if ((threadIdx.x == 0) && (threadIdx.y == 0) && (threadIdx.z == 0)){
for (int n = 0; n < NUM_CAMS; n++){
printf("\na Azimuth matrix for camera %d, sincos[0] = %f, sincos[1] = %f, zoom = %f\n", n, sincos[n][0][0], sincos[n][0][1], zoom[n]);
printf("a Tilt matrix for camera %d, sincos[0] = %f, sincos[0] = %f\n", n, sincos[n][1][0], sincos[n][1][1]);
printf("a Roll matrix for camera %d, sincos[0] = %f, sincos[2] = %f\n", n, sincos[n][2][0], sincos[n][2][1]);
}
}
__syncthreads();// __syncwarp();
#endif // DEBUG20
// now 3x3
for (int axis = 0; axis < 3; axis++) {
matrices[ncam][axis][threadIdx.y][threadIdx.x] =
ROTS_TEMPLATE[axis][threadIdx.y][threadIdx.x][0] * sincos[ncam][axis][0]+ // cos
ROTS_TEMPLATE[axis][threadIdx.y][threadIdx.x][1] * sincos[ncam][axis][1]+ // sin
ROTS_TEMPLATE[axis][threadIdx.y][threadIdx.x][2]; // const
}
__syncthreads();
#ifdef DEBUG20a
if ((threadIdx.x == 0) && (threadIdx.y == 0) && (threadIdx.z == 0)){
for (int n = 0; n < NUM_CAMS; n++){
printf("\n1-Azimuth matrix for camera %d, sincos[0] = %f, sincos[1] = %f\n", n, sincos[n][0][0], sincos[n][0][1]);
for (int i = 0; i < 3; i++){
for (int j = 0; j < 3; j++){
printf("%9.6f, ", matrices[n][0][i][j]);
}
printf("\n");
}
printf("1-Tilt matrix for camera %d, sincos[0] = %f, sincos[1] = %f\n", n, sincos[n][1][0], sincos[n][1][1]);
for (int i = 0; i < 3; i++){
for (int j = 0; j < 3; j++){
printf("%9.6f, ", matrices[n][1][i][j]);
}
printf("\n");
}
printf("1-Roll/Zoom matrix for camera %d, sincos[0] = %f, sincos[1] = %f\n", n, sincos[n][2][0], sincos[n][2][1]);
for (int i = 0; i < 3; i++){
for (int j = 0; j < 3; j++){
printf("%9.6f, ", matrices[n][2][i][j]);
}
printf("\n");
}
}
}
__syncthreads();// __syncwarp();
#endif // DEBUG20
// tilt * az ->
// multiply matrices[ncam][1] * matrices[ncam][0] -> matrices[ncam][3]
matrices[ncam][3][threadIdx.y][threadIdx.x] =
matrices[ncam][1][threadIdx.y][0] * matrices[ncam][0][0][threadIdx.x]+
matrices[ncam][1][threadIdx.y][1] * matrices[ncam][0][1][threadIdx.x]+
matrices[ncam][1][threadIdx.y][2] * matrices[ncam][0][2][threadIdx.x];
// multiply matrices[ncam][2] * matrices[ncam][3] -> rot_matrices[ncam]
__syncthreads();
rot_matrices[ncam][threadIdx.y][threadIdx.x] =
matrices[ncam][2][threadIdx.y][0] * matrices[ncam][3][0][threadIdx.x]+
matrices[ncam][2][threadIdx.y][1] * matrices[ncam][3][1][threadIdx.x]+
matrices[ncam][2][threadIdx.y][2] * matrices[ncam][3][2][threadIdx.x];
__syncthreads();
#ifdef DEBUG20
if ((threadIdx.x == 0) && (threadIdx.y == 0) && (threadIdx.z == 0)){
for (int n = 0; n < NUM_CAMS; n++){
printf("\n2 - Azimuth matrix for camera %d, sincos[0] = %f, sincos[1] = %f\n", n, sincos[n][0][0], sincos[n][0][1]);
for (int i = 0; i < 3; i++){
for (int j = 0; j < 3; j++){
printf("%9.6f, ", matrices[n][0][i][j]);
}
printf("\n");
}
printf("2 - Tilt matrix for camera %d, sincos[0] = %f, sincos[1] = %f\n", n, sincos[n][1][0], sincos[n][1][1]);
for (int i = 0; i < 3; i++){
for (int j = 0; j < 3; j++){
printf("%9.6f, ", matrices[n][1][i][j]);
}
printf("\n");
}
printf("2 - Roll/Zoom matrix for camera %d, sincos[0] = %f, sincos[1] = %f\n", n, sincos[n][2][0], sincos[n][2][1]);
for (int i = 0; i < 3; i++){
for (int j = 0; j < 3; j++){
printf("%9.6f, ", matrices[n][2][i][j]);
}
printf("\n");
}
printf("2 - Rotation matrix for camera %d\n", n);
for (int i = 0; i < 3; i++){
for (int j = 0; j < 3; j++){
printf("%9.6f, ", rot_matrices[n][i][j]);
}
printf("\n");
}
}
}
__syncthreads();// __syncwarp();
#endif // DEBUG20
}
#endif
__constant__
int
offset_rots
=
0
;
//0
__constant__
int
offset_rots
=
0
;
//0
__constant__
int
offset_derivs
=
1
;
// 1..4 // should be next
__constant__
int
offset_derivs
=
1
;
// 1..4 // should be next
__constant__
int
offset_matrices
=
5
;
// 5..11
__constant__
int
offset_matrices
=
5
;
// 5..11
...
@@ -890,8 +697,69 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -890,8 +697,69 @@ extern "C" __global__ void get_tiles_offsets(
}
}
extern
"C"
__global__
void
calcReverseDistortionTable
(
struct
gc
*
geometry_correction
,
float
*
rByRDist
)
{
//int num_threads = NUM_CAMS * blockDim.z * blockDim.y * blockDim.x; // 36
int
indx
=
((
blockIdx
.
x
*
blockDim
.
z
+
threadIdx
.
z
)
*
blockDim
.
y
+
threadIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
// double delta=1E-20; // 12; // 10; // -8; 215.983994 ms
// double delta=1E-4; //rByRDist error = 0.000072
double
delta
=
1E-10
;
// 12; // 10; // -8; 0.730000 ms
double
minDerivative
=
0.01
;
int
numIterations
=
1000
;
double
drDistDr
=
1.0
;
double
d
=
1.0
-
geometry_correction
->
distortionA8
-
geometry_correction
->
distortionA7
-
geometry_correction
->
distortionA6
-
geometry_correction
->
distortionA5
-
geometry_correction
->
distortionA
-
geometry_correction
->
distortionB
-
geometry_correction
->
distortionC
;
double
rPrev
=
0.0
;
int
num_points
=
(
RBYRDIST_LEN
+
CALC_REVERSE_TABLE_BLOCK_THREADS
-
1
)
/
CALC_REVERSE_TABLE_BLOCK_THREADS
;
for
(
int
p
=
0
;
p
<
num_points
;
p
++
){
int
i
=
indx
*
num_points
+
p
;
if
(
i
>=
RBYRDIST_LEN
){
return
;
}
if
(
i
==
0
){
rByRDist
[
0
]
=
(
float
)
1.0
/
d
;
break
;
}
double
rDist
=
RBYRDIST_STEP
*
i
;
double
r
=
(
p
==
0
)
?
rDist
:
rPrev
;
for
(
int
iteration
=
0
;
iteration
<
numIterations
;
iteration
++
){
double
k
=
(((((((
geometry_correction
->
distortionA8
)
*
r
+
geometry_correction
->
distortionA7
)
*
r
+
geometry_correction
->
distortionA6
)
*
r
+
geometry_correction
->
distortionA5
)
*
r
+
geometry_correction
->
distortionA
)
*
r
+
geometry_correction
->
distortionB
)
*
r
+
geometry_correction
->
distortionC
)
*
r
+
d
;
drDistDr
=
(((((((
8
*
geometry_correction
->
distortionA8
)
*
r
+
7
*
geometry_correction
->
distortionA7
)
*
r
+
6
*
geometry_correction
->
distortionA6
)
*
r
+
5
*
geometry_correction
->
distortionA5
)
*
r
+
4
*
geometry_correction
->
distortionA
)
*
r
+
3
*
geometry_correction
->
distortionB
)
*
r
+
2
*
geometry_correction
->
distortionC
)
*
r
+
d
;
if
(
drDistDr
<
minDerivative
)
{
// folds backwards !
return
;
// too high distortion
}
double
rD
=
r
*
k
;
if
(
fabs
(
rD
-
rDist
)
<
delta
){
break
;
}
r
+=
(
rDist
-
rD
)
/
drDistDr
;
}
rPrev
=
r
;
rByRDist
[
i
]
=
(
float
)
r
/
rDist
;
}
}
/**
/**
* Calculate non-distorted radius from distorted using table approximation
* Calculate non-distorted radius from distorted using table approximation
...
...
src/main/resources/kernels/geometry_correction.h
View file @
20df596a
...
@@ -148,14 +148,15 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -148,14 +148,15 @@ extern "C" __global__ void get_tiles_offsets(
float
*
gpu_rByRDist
,
// length should match RBYRDIST_LEN
float
*
gpu_rByRDist
,
// length should match RBYRDIST_LEN
trot_deriv
*
gpu_rot_deriv
);
trot_deriv
*
gpu_rot_deriv
);
#if 0
// uses 3 threadIdx.x, 3 - threadIdx.y, 4 - threadIdx.z
extern "C" __global__ void calc_rot_matrices(
struct corr_vector * gpu_correction_vector);
#endif
// uses NUM_CAMS blocks, (3,3,3) threads
// uses NUM_CAMS blocks, (3,3,3) threads
extern
"C"
__global__
void
calc_rot_deriv
(
extern
"C"
__global__
void
calc_rot_deriv
(
struct
corr_vector
*
gpu_correction_vector
,
struct
corr_vector
*
gpu_correction_vector
,
trot_deriv
*
gpu_rot_deriv
);
trot_deriv
*
gpu_rot_deriv
);
#define CALC_REVERSE_TABLE_BLOCK_THREADS (NUM_CAMS * 3 * 3 * 3) // fixed blockDim
// Use same blocks/threads as with calc_rot_deriv() - NUM_CAMS blocks, (3,3,3) threads
extern
"C"
__global__
void
calcReverseDistortionTable
(
struct
gc
*
geometry_correction
,
float
*
rByRDist
);
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment