Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
93480b46
Commit
93480b46
authored
Aug 10, 2022
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Motion blur correction for rendering only
parent
7d6fb681
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
356 additions
and
100 deletions
+356
-100
GpuQuad.java
src/main/java/com/elphel/imagej/gpu/GpuQuad.java
+7
-6
IntersceneLma.java
...n/java/com/elphel/imagej/tileprocessor/IntersceneLma.java
+8
-8
OpticalFlow.java
...ain/java/com/elphel/imagej/tileprocessor/OpticalFlow.java
+285
-56
QuadCLT.java
src/main/java/com/elphel/imagej/tileprocessor/QuadCLT.java
+10
-10
TileProcessor.cuh
src/main/resources/kernels/TileProcessor.cuh
+32
-12
geometry_correction.cu
src/main/resources/kernels/geometry_correction.cu
+5
-5
geometry_correction.h
src/main/resources/kernels/geometry_correction.h
+9
-3
No files found.
src/main/java/com/elphel/imagej/gpu/GpuQuad.java
View file @
93480b46
...
@@ -3842,19 +3842,21 @@ public class GpuQuad{ // quad camera description
...
@@ -3842,19 +3842,21 @@ public class GpuQuad{ // quad camera description
final
int
tilesX
=
img_width
/
GPUTileProcessor
.
DTT_SIZE
;
final
int
tilesX
=
img_width
/
GPUTileProcessor
.
DTT_SIZE
;
final
int
tiles
=
pXpYD
.
length
;
final
int
tiles
=
pXpYD
.
length
;
final
Matrix
[]
corr_rots
=
geometryCorrection
.
getCorrVector
().
getRotMatrices
();
// get array of per-sensor rotation matrices
final
Matrix
[]
corr_rots
=
geometryCorrection
.
getCorrVector
().
getRotMatrices
();
// get array of per-sensor rotation matrices
final
int
quad_main
=
(
geometryCorrection
!=
null
)?
num_cams:
0
;
final
int
quad_main
=
num_cams
;
//
(geometryCorrection != null)? num_cams:0;
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
threadsMax
);
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
threadsMax
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
00
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
00
);
final
AtomicInteger
aTiles
=
new
AtomicInteger
(
0
);
final
AtomicInteger
aTiles
=
new
AtomicInteger
(
0
);
final
TpTask
[][]
tp_tasks
=
new
TpTask
[
2
][
tiles
];
// aTiles.get()]; // [0] - main, [1] - shifted
final
TpTask
[][]
tp_tasks
=
new
TpTask
[
2
][
tiles
];
// aTiles.get()]; // [0] - main, [1] - shifted
final
double
mb_len_scale
=
-
Math
.
log
(
1.0
-
1.0
/
mb_max_gain
);
final
double
mb_len_scale
=
-
Math
.
log
(
1.0
-
1.0
/
mb_max_gain
);
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
threads
[
ithread
]
=
new
Thread
()
{
threads
[
ithread
]
=
new
Thread
()
{
@Override
@Override
public
void
run
()
{
public
void
run
()
{
for
(
int
nTile
=
ai
.
getAndIncrement
();
nTile
<
tiles
;
nTile
=
ai
.
getAndIncrement
())
for
(
int
nTile
=
ai
.
getAndIncrement
();
nTile
<
tiles
;
nTile
=
ai
.
getAndIncrement
())
if
((
pXpYD
[
nTile
]
!=
null
)
&&
(
mb_vectors
[
nTile
]
!=
null
)
&&
((
selection
==
null
)
||
selection
[
nTile
]))
{
if
((
pXpYD
[
nTile
]
!=
null
)
&&
!
Double
.
isNaN
(
mb_vectors
[
0
][
nTile
])
&&
!
Double
.
isNaN
(
mb_vectors
[
1
][
nTile
])
&&
((
selection
==
null
)
||
selection
[
nTile
]))
{
int
tileY
=
nTile
/
tilesX
;
int
tileY
=
nTile
/
tilesX
;
int
tileX
=
nTile
%
tilesX
;
int
tileX
=
nTile
%
tilesX
;
TpTask
tp_task
=
new
TpTask
(
num_cams
,
tileX
,
tileY
);
TpTask
tp_task
=
new
TpTask
(
num_cams
,
tileX
,
tileY
);
...
@@ -3867,8 +3869,8 @@ public class GpuQuad{ // quad camera description
...
@@ -3867,8 +3869,8 @@ public class GpuQuad{ // quad camera description
double
[]
centerXY
=
pXpYD
[
nTile
];
double
[]
centerXY
=
pXpYD
[
nTile
];
tp_task
.
setCenterXY
(
centerXY
);
// this pair of coordinates will be used by GPU to set tp_task.xy and task.disp_dist!
tp_task
.
setCenterXY
(
centerXY
);
// this pair of coordinates will be used by GPU to set tp_task.xy and task.disp_dist!
// calculate offset for the secondary tile and weigh
// calculate offset for the secondary tile and weigh
double
dx
=
mb_vectors
[
nTile
][
0
];
double
dx
=
mb_vectors
[
0
][
nTile
];
double
dy
=
mb_vectors
[
nTile
][
1
];
double
dy
=
mb_vectors
[
1
][
nTile
];
double
mb_len
=
Math
.
sqrt
(
dx
*
dx
+
dy
*
dy
);
// in pixels/s
double
mb_len
=
Math
.
sqrt
(
dx
*
dx
+
dy
*
dy
);
// in pixels/s
dx
/=
mb_len
;
// unit vector
dx
/=
mb_len
;
// unit vector
dy
/=
mb_len
;
dy
/=
mb_len
;
...
@@ -3887,7 +3889,6 @@ public class GpuQuad{ // quad camera description
...
@@ -3887,7 +3889,6 @@ public class GpuQuad{ // quad camera description
double
gain_sub
=
-
gain
*
exp_offs
;
double
gain_sub
=
-
gain
*
exp_offs
;
tp_task
.
setScale
(
gain
);
tp_task
.
setScale
(
gain
);
tp_task_sub
.
setScale
(
gain_sub
);
tp_task_sub
.
setScale
(
gain_sub
);
boolean
bad_margins
=
false
;
boolean
bad_margins
=
false
;
if
(
calcPortsCoordinatesAndDerivatives
)
{
// for non-GPU?
if
(
calcPortsCoordinatesAndDerivatives
)
{
// for non-GPU?
double
[][]
disp_dist
=
new
double
[
quad_main
][];
// used to correct 3D correlations (not yet used here)
double
[][]
disp_dist
=
new
double
[
quad_main
][];
// used to correct 3D correlations (not yet used here)
...
...
src/main/java/com/elphel/imagej/tileprocessor/IntersceneLma.java
View file @
93480b46
...
@@ -15,7 +15,7 @@ import javax.xml.bind.DatatypeConverter;
...
@@ -15,7 +15,7 @@ import javax.xml.bind.DatatypeConverter;
import
Jama.Matrix
;
import
Jama.Matrix
;
public
class
IntersceneLma
{
public
class
IntersceneLma
{
OpticalFlow
opticalFlow
=
null
;
//
OpticalFlow opticalFlow = null;
QuadCLT
[]
scenesCLT
=
null
;
// now will use just 2 - 0 -reference scene, 1 - scene.
QuadCLT
[]
scenesCLT
=
null
;
// now will use just 2 - 0 -reference scene, 1 - scene.
private
double
[]
last_rms
=
null
;
// {rms, rms_pure}, matching this.vector
private
double
[]
last_rms
=
null
;
// {rms, rms_pure}, matching this.vector
private
double
[]
good_or_bad_rms
=
null
;
// just for diagnostics, to read last (failed) rms
private
double
[]
good_or_bad_rms
=
null
;
// just for diagnostics, to read last (failed) rms
...
@@ -37,11 +37,11 @@ public class IntersceneLma {
...
@@ -37,11 +37,11 @@ public class IntersceneLma {
private
int
num_samples
=
0
;
private
int
num_samples
=
0
;
private
boolean
thread_invariant
=
true
;
// Do not use DoubleAdder, provide results not dependent on threads
private
boolean
thread_invariant
=
true
;
// Do not use DoubleAdder, provide results not dependent on threads
public
IntersceneLma
(
public
IntersceneLma
(
OpticalFlow
opticalFlow
,
//
OpticalFlow opticalFlow,
boolean
thread_invariant
boolean
thread_invariant
)
{
)
{
this
.
thread_invariant
=
thread_invariant
;
this
.
thread_invariant
=
thread_invariant
;
this
.
opticalFlow
=
opticalFlow
;
//
this.opticalFlow = opticalFlow;
}
}
public
double
[][]
getLastJT
(){
public
double
[][]
getLastJT
(){
...
@@ -549,7 +549,7 @@ public class IntersceneLma {
...
@@ -549,7 +549,7 @@ public class IntersceneLma {
{
{
this
.
weights
=
new
double
[
num_samples
+
parameters_vector
.
length
];
this
.
weights
=
new
double
[
num_samples
+
parameters_vector
.
length
];
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
opticalFlow
.
threadsMax
);
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
QuadCLT
.
THREADS_MAX
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
double
sum_weights
;
double
sum_weights
;
if
(
thread_invariant
)
{
if
(
thread_invariant
)
{
...
@@ -652,7 +652,7 @@ public class IntersceneLma {
...
@@ -652,7 +652,7 @@ public class IntersceneLma {
private
void
normalizeWeights
()
private
void
normalizeWeights
()
{
{
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
opticalFlow
.
threadsMax
);
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
QuadCLT
.
THREADS_MAX
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
double
full_weight
,
sum_weight_pure
;
double
full_weight
,
sum_weight_pure
;
if
(
thread_invariant
)
{
if
(
thread_invariant
)
{
...
@@ -763,7 +763,7 @@ public class IntersceneLma {
...
@@ -763,7 +763,7 @@ public class IntersceneLma {
scene_atr
,
// double [] atr);
scene_atr
,
// double [] atr);
false
)[
0
];
// boolean invert));
false
)[
0
];
// boolean invert));
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
opticalFlow
.
threadsMax
);
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
QuadCLT
.
THREADS_MAX
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
threads
[
ithread
]
=
new
Thread
()
{
threads
[
ithread
]
=
new
Thread
()
{
...
@@ -840,7 +840,7 @@ public class IntersceneLma {
...
@@ -840,7 +840,7 @@ public class IntersceneLma {
final
int
num_pars2
=
num_pars
*
num_pars
;
final
int
num_pars2
=
num_pars
*
num_pars
;
final
int
nup_points
=
jt
[
0
].
length
;
final
int
nup_points
=
jt
[
0
].
length
;
final
double
[][]
wjtjl
=
new
double
[
num_pars
][
num_pars
];
final
double
[][]
wjtjl
=
new
double
[
num_pars
][
num_pars
];
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
opticalFlow
.
threadsMax
);
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
QuadCLT
.
THREADS_MAX
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
for
(
int
ithread
=
0
;
ithread
<
threads
.
length
;
ithread
++)
{
threads
[
ithread
]
=
new
Thread
()
{
threads
[
ithread
]
=
new
Thread
()
{
...
@@ -876,7 +876,7 @@ public class IntersceneLma {
...
@@ -876,7 +876,7 @@ public class IntersceneLma {
final
double
[]
fx
,
final
double
[]
fx
,
final
double
[]
rms_fp
// null or [2]
final
double
[]
rms_fp
// null or [2]
)
{
)
{
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
opticalFlow
.
threadsMax
);
final
Thread
[]
threads
=
ImageDtt
.
newThreadArray
(
QuadCLT
.
THREADS_MAX
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
final
AtomicInteger
ai
=
new
AtomicInteger
(
0
);
final
double
[]
wymfw
=
new
double
[
fx
.
length
];
final
double
[]
wymfw
=
new
double
[
fx
.
length
];
double
s_rms
;
double
s_rms
;
...
...
src/main/java/com/elphel/imagej/tileprocessor/OpticalFlow.java
View file @
93480b46
This diff is collapsed.
Click to expand it.
src/main/java/com/elphel/imagej/tileprocessor/QuadCLT.java
View file @
93480b46
...
@@ -2706,7 +2706,7 @@ public class QuadCLT extends QuadCLTCPU {
...
@@ -2706,7 +2706,7 @@ public class QuadCLT extends QuadCLTCPU {
// motion blur compensation
// motion blur compensation
double
mb_tau
,
// 0.008; // time constant, sec
double
mb_tau
,
// 0.008; // time constant, sec
double
mb_max_gain
,
// 5.0; // motion blur maximal gain (if more - move second point more than a pixel
double
mb_max_gain
,
// 5.0; // motion blur maximal gain (if more - move second point more than a pixel
double
[][]
mb_vectors
,
//
double
[][]
mb_vectors
,
//
now [2][ntiles];
final
double
[]
scene_xyz
,
// camera center in world coordinates
final
double
[]
scene_xyz
,
// camera center in world coordinates
final
double
[]
scene_atr
,
// camera orientation relative to world frame
final
double
[]
scene_atr
,
// camera orientation relative to world frame
...
@@ -2740,15 +2740,15 @@ public class QuadCLT extends QuadCLTCPU {
...
@@ -2740,15 +2740,15 @@ public class QuadCLT extends QuadCLTCPU {
for
(
int
i
=
0
;
i
<
dbg_img
.
length
;
i
++)
{
for
(
int
i
=
0
;
i
<
dbg_img
.
length
;
i
++)
{
Arrays
.
fill
(
dbg_img
[
i
],
Double
.
NaN
);
Arrays
.
fill
(
dbg_img
[
i
],
Double
.
NaN
);
}
}
for
(
int
nTile
=
0
;
nTile
<
pXpYD
.
length
;
nTile
++)
if
(
pXpYD
[
nTile
]
!=
null
){
for
(
int
nTile
=
0
;
nTile
<
pXpYD
.
length
;
nTile
++){
for
(
int
i
=
0
;
i
<
pXpYD
[
nTile
].
length
;
i
++)
{
if
(
pXpYD
[
nTile
]
!=
null
)
{
dbg_img
[
i
][
nTile
]
=
pXpYD
[
nTile
][
i
];
for
(
int
i
=
0
;
i
<
pXpYD
[
nTile
].
length
;
i
++)
{
}
dbg_img
[
i
][
nTile
]
=
pXpYD
[
nTile
][
i
];
if
(
mb_vectors
[
nTile
]!=
null
)
{
for
(
int
i
=
0
;
i
<
2
;
i
++)
{
dbg_img
[
3
+
i
][
nTile
]
=
mb_tau
*
mb_vectors
[
nTile
][
i
];
}
}
}
}
for
(
int
i
=
0
;
i
<
2
;
i
++)
{
dbg_img
[
3
+
i
][
nTile
]
=
mb_tau
*
mb_vectors
[
i
][
nTile
];
}
}
}
(
new
ShowDoubleFloatArrays
()).
showArrays
(
// out of boundary 15
(
new
ShowDoubleFloatArrays
()).
showArrays
(
// out of boundary 15
dbg_img
,
dbg_img
,
...
@@ -2804,8 +2804,8 @@ public class QuadCLT extends QuadCLTCPU {
...
@@ -2804,8 +2804,8 @@ public class QuadCLT extends QuadCLTCPU {
full_woi_in
.
width
*
GPUTileProcessor
.
DTT_SIZE
,
full_woi_in
.
width
*
GPUTileProcessor
.
DTT_SIZE
,
full_woi_in
.
height
*
GPUTileProcessor
.
DTT_SIZE
};
full_woi_in
.
height
*
GPUTileProcessor
.
DTT_SIZE
};
int
erase_clt
=
show_nan
?
1
:
0
;
int
erase_clt
=
show_nan
?
1
:
0
;
boolean
test1
=
true
;
//
boolean test1 = true;
if
(
(
mb_vectors
!=
null
)
&&
test1
)
{
if
(
mb_vectors
!=
null
)
{
//
&& test1) {
image_dtt
.
setReferenceTDMotionBlur
(
// change to main?
image_dtt
.
setReferenceTDMotionBlur
(
// change to main?
erase_clt
,
//final int erase_clt,
erase_clt
,
//final int erase_clt,
wh
,
// null, // final int [] wh, // null (use sensor dimensions) or pair {width, height} in pixels
wh
,
// null, // final int [] wh, // null (use sensor dimensions) or pair {width, height} in pixels
...
...
src/main/resources/kernels/TileProcessor.cuh
View file @
93480b46
...
@@ -862,6 +862,7 @@ __device__ void convertCorrectTile(
...
@@ -862,6 +862,7 @@ __device__ void convertCorrectTile(
const
float
centerX
,
const
float
centerX
,
const
float
centerY
,
const
float
centerY
,
const
int
txy
,
const
int
txy
,
const
float
tscale
,
const
size_t
dstride
,
// in floats (pixels)
const
size_t
dstride
,
// in floats (pixels)
float
*
clt_tile
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float
*
clt_tile
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float
*
clt_kernels
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float
*
clt_kernels
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
...
@@ -3118,7 +3119,7 @@ __global__ void convert_correct_tiles(
...
@@ -3118,7 +3119,7 @@ __global__ void convert_correct_tiles(
int
thread0
=
threadIdx
.
x
&
1
;
// 0,1
int
thread0
=
threadIdx
.
x
&
1
;
// 0,1
int
thread12
=
threadIdx
.
x
>>
1
;
// now 0..3 (total number == (DTT_SIZE), will not change
int
thread12
=
threadIdx
.
x
>>
1
;
// now 0..3 (total number == (DTT_SIZE), will not change
float
*
tp
=
tp0
+
tp_task_xy_offset
+
threadIdx
.
x
;
float
*
tp
=
tp0
+
TP_TASK_XY_OFFSET
+
threadIdx
.
x
;
if
(
thread12
<
num_cams
)
{
if
(
thread12
<
num_cams
)
{
tt
[
tile_in_block
].
xy
[
thread12
][
thread0
]
=
*
(
tp
);
// gpu_task -> xy[thread12][thread0];
tt
[
tile_in_block
].
xy
[
thread12
][
thread0
]
=
*
(
tp
);
// gpu_task -> xy[thread12][thread0];
}
}
...
@@ -3135,7 +3136,9 @@ __global__ void convert_correct_tiles(
...
@@ -3135,7 +3136,9 @@ __global__ void convert_correct_tiles(
if
(
threadIdx
.
x
==
0
){
// only one thread calculates, others - wait
if
(
threadIdx
.
x
==
0
){
// only one thread calculates, others - wait
tt
[
tile_in_block
].
task
=
*
(
int
*
)
(
tp0
++
);
// get first integer value
tt
[
tile_in_block
].
task
=
*
(
int
*
)
(
tp0
++
);
// get first integer value
tt
[
tile_in_block
].
txy
=
*
(
int
*
)
(
tp0
++
);
// get second integer value
tt
[
tile_in_block
].
txy
=
*
(
int
*
)
(
tp0
++
);
// get second integer value
tt
[
tile_in_block
].
target_disparity
=
*
(
tp0
++
);
//
tt
[
tile_in_block
].
target_disparity
=
*
(
tp0
);
//
tp0
+=
3
;
// skip centerXY and previous increment (was tt[tile_in_block].target_disparity = *(tp0++);
tt
[
tile_in_block
].
scale
=
*
(
tp0
++
);
// get scale to multiply before accumulating/saving
}
}
// float centerXY[2] is not used/copied here
// float centerXY[2] is not used/copied here
...
@@ -3167,7 +3170,8 @@ __global__ void convert_correct_tiles(
...
@@ -3167,7 +3170,8 @@ __global__ void convert_correct_tiles(
lpf_mask
,
// const int lpf_mask,
lpf_mask
,
// const int lpf_mask,
tt
[
tile_in_block
].
xy
[
ncam
][
0
],
// const float centerX,
tt
[
tile_in_block
].
xy
[
ncam
][
0
],
// const float centerX,
tt
[
tile_in_block
].
xy
[
ncam
][
1
],
// const float centerY,
tt
[
tile_in_block
].
xy
[
ncam
][
1
],
// const float centerY,
tt
[
tile_in_block
].
txy
,
// const int txy,
tt
[
tile_in_block
].
txy
,
// const int txy,
tt
[
tile_in_block
].
scale
,
// const float tscale,
dstride
,
// size_t dstride, // in floats (pixels)
dstride
,
// size_t dstride, // in floats (pixels)
(
float
*
)(
clt_tile
[
tile_in_block
]),
// float clt_tile [TILES_PER_BLOCK][NUM_CAMS][num_colors][4][DTT_SIZE][DTT_SIZE])
(
float
*
)(
clt_tile
[
tile_in_block
]),
// float clt_tile [TILES_PER_BLOCK][NUM_CAMS][num_colors][4][DTT_SIZE][DTT_SIZE])
(
float
*
)(
clt_kernels
[
tile_in_block
]),
// float clt_tile [num_colors][4][DTT_SIZE][DTT_SIZE],
(
float
*
)(
clt_kernels
[
tile_in_block
]),
// float clt_tile [num_colors][4][DTT_SIZE][DTT_SIZE],
...
@@ -4457,6 +4461,7 @@ __device__ void normalizeTileAmplitude(
...
@@ -4457,6 +4461,7 @@ __device__ void normalizeTileAmplitude(
* @param centerX full X-offset of the tile center, calculated from the geometry, distortions and disparity
* @param centerX full X-offset of the tile center, calculated from the geometry, distortions and disparity
* @param centerY full Y-offset of the tile center
* @param centerY full Y-offset of the tile center
* @param txy integer value combining tile X (low 16 bits) and tile Y (high 16 bits)
* @param txy integer value combining tile X (low 16 bits) and tile Y (high 16 bits)
* @param tscale float value to scale result. 0 - set. >0 scale and set, <0 subtract
* @param dstride stride (in floats) for the input Bayer images
* @param dstride stride (in floats) for the input Bayer images
* @param clt_tile image tile in shared memory [4][DTT_SIZE][DTT_SIZE1] (just allocated)
* @param clt_tile image tile in shared memory [4][DTT_SIZE][DTT_SIZE1] (just allocated)
* @param clt_kernels kernel tile in shared memory [4][DTT_SIZE][DTT_SIZE1] (just allocated)
* @param clt_kernels kernel tile in shared memory [4][DTT_SIZE][DTT_SIZE1] (just allocated)
...
@@ -4482,6 +4487,7 @@ __device__ void convertCorrectTile(
...
@@ -4482,6 +4487,7 @@ __device__ void convertCorrectTile(
const
float
centerX
,
const
float
centerX
,
const
float
centerY
,
const
float
centerY
,
const
int
txy
,
const
int
txy
,
const
float
tscale
,
const
size_t
dstride
,
// in floats (pixels)
const
size_t
dstride
,
// in floats (pixels)
float
*
clt_tile
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float
*
clt_tile
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float
*
clt_kernels
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float
*
clt_kernels
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
...
@@ -5078,18 +5084,32 @@ __device__ void convertCorrectTile(
...
@@ -5078,18 +5084,32 @@ __device__ void convertCorrectTile(
#endif
#endif
if
(
tscale
==
0
)
{
// just set w/o scaling
#pragma unroll
#pragma unroll
for
(
int
j
=
0
;
j
<
DTT_SIZE
*
4
;
j
++
){
// all 4 components, 8 rows
for
(
int
j
=
0
;
j
<
DTT_SIZE
*
4
;
j
++
){
// all 4 components, 8 rows
// shared memory tiles use DTT_SIZE1
// shared memory tiles use DTT_SIZE1
*
clt_dst
=
*
clt_src
;
*
clt_dst
=
*
clt_src
;
clt_src
+=
DTT_SIZE1
;
clt_src
+=
DTT_SIZE1
;
clt_dst
+=
DTT_SIZE
;
clt_dst
+=
DTT_SIZE
;
}
}
else
if
(
tscale
>
0
)
{
// positive - scale and set. For motion blur positive should be first
#pragma unroll
for
(
int
j
=
0
;
j
<
DTT_SIZE
*
4
;
j
++
){
// all 4 components, 8 rows
// shared memory tiles use DTT_SIZE1
*
clt_dst
=
*
clt_src
*
tscale
;
clt_src
+=
DTT_SIZE1
;
clt_dst
+=
DTT_SIZE
;
}
}
else
{
// negative - scale and subtract from existing. For motion blur positive should be first
#pragma unroll
for
(
int
j
=
0
;
j
<
DTT_SIZE
*
4
;
j
++
){
// all 4 components, 8 rows
// shared memory tiles use DTT_SIZE1
*
clt_dst
+=
*
clt_src
*
tscale
;
clt_src
+=
DTT_SIZE1
;
clt_dst
+=
DTT_SIZE
;
}
}
}
__syncthreads
();
// __syncwarp();
__syncthreads
();
// __syncwarp();
// just for testing perform imclt, save result to clt_kernels
//#endif
}
}
...
...
src/main/resources/kernels/geometry_correction.cu
View file @
93480b46
...
@@ -460,11 +460,11 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -460,11 +460,11 @@ extern "C" __global__ void get_tiles_offsets(
// common code, calculated in parallel
// common code, calculated in parallel
/// int cxy = gpu_tasks[task_num].txy;
/// int cxy = gpu_tasks[task_num].txy;
/// float disparity = gpu_tasks[task_num].target_disparity;
/// float disparity = gpu_tasks[task_num].target_disparity;
float
disparity
=
*
(
gpu_ftasks
+
task_size
*
task_num
+
2
);
float
disparity
=
*
(
gpu_ftasks
+
task_size
*
task_num
+
TP_TASK_DISPARITY_OFFSET
);
float
*
centerXY
=
gpu_ftasks
+
task_size
*
task_num
+
tp_task_centerXY_offset
;
float
*
centerXY
=
gpu_ftasks
+
task_size
*
task_num
+
TP_TASK_CENTERXY_OFFSET
;
float
px
=
*
(
centerXY
);
float
px
=
*
(
centerXY
);
float
py
=
*
(
centerXY
+
1
);
float
py
=
*
(
centerXY
+
1
);
int
cxy
=
*
(
int
*
)
(
gpu_ftasks
+
task_size
*
task_num
+
1
);
int
cxy
=
*
(
int
*
)
(
gpu_ftasks
+
task_size
*
task_num
+
TP_TASK_TXY_OFFSET
);
int
tileX
=
(
cxy
&
0xffff
);
int
tileX
=
(
cxy
&
0xffff
);
int
tileY
=
(
cxy
>>
16
);
int
tileY
=
(
cxy
>>
16
);
...
@@ -705,7 +705,7 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -705,7 +705,7 @@ extern "C" __global__ void get_tiles_offsets(
/// gpu_tasks[task_num].disp_dist[ncam][1] = disp_dist[1];
/// gpu_tasks[task_num].disp_dist[ncam][1] = disp_dist[1];
/// gpu_tasks[task_num].disp_dist[ncam][2] = disp_dist[2];
/// gpu_tasks[task_num].disp_dist[ncam][2] = disp_dist[2];
/// gpu_tasks[task_num].disp_dist[ncam][3] = disp_dist[3];
/// gpu_tasks[task_num].disp_dist[ncam][3] = disp_dist[3];
float
*
disp_dist_p
=
gpu_ftasks
+
task_size
*
task_num
+
tp_task_xy_offset
+
num_cams
*
2
+
ncam
*
4
;
// ncam = threadIdx.x, so each thread will have different offset
float
*
disp_dist_p
=
gpu_ftasks
+
task_size
*
task_num
+
TP_TASK_XY_OFFSET
+
num_cams
*
2
+
ncam
*
4
;
// ncam = threadIdx.x, so each thread will have different offset
*
(
disp_dist_p
++
)
=
disp_dist
[
0
];
// global memory
*
(
disp_dist_p
++
)
=
disp_dist
[
0
];
// global memory
*
(
disp_dist_p
++
)
=
disp_dist
[
1
];
*
(
disp_dist_p
++
)
=
disp_dist
[
1
];
*
(
disp_dist_p
++
)
=
disp_dist
[
2
];
*
(
disp_dist_p
++
)
=
disp_dist
[
2
];
...
@@ -768,7 +768,7 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -768,7 +768,7 @@ extern "C" __global__ void get_tiles_offsets(
// gpu_tasks[task_num].xy[ncam][1] = pXY[1];
// gpu_tasks[task_num].xy[ncam][1] = pXY[1];
// float * tile_xy_p = gpu_ftasks + task_size * task_num + 3 + num_cams * 4 + ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
// float * tile_xy_p = gpu_ftasks + task_size * task_num + 3 + num_cams * 4 + ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
// .xy goes right after 3 commonn (tak, txy and target_disparity
// .xy goes right after 3 commonn (tak, txy and target_disparity
float
*
tile_xy_p
=
gpu_ftasks
+
task_size
*
task_num
+
tp_task_xy_offset
+
ncam
*
2
;
// ncam = threadIdx.x, so each thread will have different offset
float
*
tile_xy_p
=
gpu_ftasks
+
task_size
*
task_num
+
TP_TASK_XY_OFFSET
+
ncam
*
2
;
// ncam = threadIdx.x, so each thread will have different offset
*
(
tile_xy_p
++
)
=
pXY
[
0
];
// global memory
*
(
tile_xy_p
++
)
=
pXY
[
0
];
// global memory
*
(
tile_xy_p
++
)
=
pXY
[
1
];
// global memory
*
(
tile_xy_p
++
)
=
pXY
[
1
];
// global memory
}
}
...
...
src/main/resources/kernels/geometry_correction.h
View file @
93480b46
...
@@ -64,13 +64,19 @@ struct tp_task {
...
@@ -64,13 +64,19 @@ struct tp_task {
float
target_disparity
;
float
target_disparity
;
float
centerXY
[
2
];
// "ideal" centerX, centerY to use instead of the uniform tile centers (txy) for interscene accumulation
float
centerXY
[
2
];
// "ideal" centerX, centerY to use instead of the uniform tile centers (txy) for interscene accumulation
// if isnan(centerXY[0]), then txy is used to calculate centerXY and all xy
// if isnan(centerXY[0]), then txy is used to calculate centerXY and all xy
float
xy
[
NUM_CAMS
][
2
];
// scale == 0 - old way, just set. Scale !=0 - accumulate. Or make > 0 - set too? only negative - subtract?
float
scale
;
// multiply during direct conversion before accumulating in TD - used for motion blur correction
float
xy
[
NUM_CAMS
][
2
];
float
disp_dist
[
NUM_CAMS
][
4
];
// calculated with getPortsCoordinates()
float
disp_dist
[
NUM_CAMS
][
4
];
// calculated with getPortsCoordinates()
};
};
#define get_task_size(x) (sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - x))
#define get_task_size(x) (sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - x))
#define tp_task_xy_offset 5
#define TP_TASK_TASK_OFFSET 0
#define tp_task_centerXY_offset 3
#define TP_TASK_TXY_OFFSET 1
#define TP_TASK_DISPARITY_OFFSET 2
#define TP_TASK_CENTERXY_OFFSET 3
#define TP_TASK_SCALE_OFFSET 5
#define TP_TASK_XY_OFFSET 6
struct
corr_vector
{
struct
corr_vector
{
float
tilt
[
NUM_CAMS
-
1
];
// 0..2
float
tilt
[
NUM_CAMS
-
1
];
// 0..2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment