Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
1b16c1e5
Commit
1b16c1e5
authored
Aug 27, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
gpu intra to batch
parent
fa5947b6
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
90 additions
and
58 deletions
+90
-58
GPUTileProcessor.java
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+23
-13
ImageDtt.java
src/main/java/com/elphel/imagej/tileprocessor/ImageDtt.java
+65
-45
QuadCLT.java
src/main/java/com/elphel/imagej/tileprocessor/QuadCLT.java
+2
-0
No files found.
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
View file @
1b16c1e5
...
@@ -896,7 +896,7 @@ public class GPUTileProcessor {
...
@@ -896,7 +896,7 @@ public class GPUTileProcessor {
}
}
cuMemcpyHtoD
(
gpu_tasks
,
Pointer
.
to
(
ftasks
),
TPTASK_SIZE
*
num_task_tiles
*
Sizeof
.
FLOAT
);
cuMemcpyHtoD
(
gpu_tasks
,
Pointer
.
to
(
ftasks
),
TPTASK_SIZE
*
num_task_tiles
*
Sizeof
.
FLOAT
);
}
}
/*
public void setCorrIndices(int [] corr_indices)
public void setCorrIndices(int [] corr_indices)
{
{
num_corr_tiles = corr_indices.length;
num_corr_tiles = corr_indices.length;
...
@@ -906,7 +906,6 @@ public class GPUTileProcessor {
...
@@ -906,7 +906,6 @@ public class GPUTileProcessor {
}
}
cuMemcpyHtoD(gpu_corr_indices, Pointer.to(fcorr_indices), num_corr_tiles * Sizeof.FLOAT);
cuMemcpyHtoD(gpu_corr_indices, Pointer.to(fcorr_indices), num_corr_tiles * Sizeof.FLOAT);
}
}
public void setTextureIndices(int [] texture_indices) // never used
public void setTextureIndices(int [] texture_indices) // never used
{
{
num_texture_tiles = texture_indices.length;
num_texture_tiles = texture_indices.length;
...
@@ -916,6 +915,7 @@ public class GPUTileProcessor {
...
@@ -916,6 +915,7 @@ public class GPUTileProcessor {
}
}
cuMemcpyHtoD(gpu_texture_indices, Pointer.to(ftexture_indices), num_texture_tiles * Sizeof.FLOAT);
cuMemcpyHtoD(gpu_texture_indices, Pointer.to(ftexture_indices), num_texture_tiles * Sizeof.FLOAT);
}
}
*/
public
int
[]
getTextureIndices
()
public
int
[]
getTextureIndices
()
{
{
...
@@ -1733,6 +1733,7 @@ public class GPUTileProcessor {
...
@@ -1733,6 +1733,7 @@ public class GPUTileProcessor {
*/
*/
public
void
execCorr2D_normalize
(
public
void
execCorr2D_normalize
(
boolean
combo
,
// normalize combo correlations (false - per-pair ones)
double
fat_zero
,
double
fat_zero
,
int
corr_radius
)
{
int
corr_radius
)
{
if
(
GPU_CORR2D_NORMALIZE_kernel
==
null
)
if
(
GPU_CORR2D_NORMALIZE_kernel
==
null
)
...
@@ -1740,13 +1741,12 @@ public class GPUTileProcessor {
...
@@ -1740,13 +1741,12 @@ public class GPUTileProcessor {
IJ
.
showMessage
(
"Error"
,
"No GPU kernel: GPU_CORR2D_NORMALIZE_kernel"
);
IJ
.
showMessage
(
"Error"
,
"No GPU kernel: GPU_CORR2D_NORMALIZE_kernel"
);
return
;
return
;
}
}
// float [] fnum_corrs = new float[1];
// cuMemcpyDtoH(Pointer.to(fnum_corrs), gpu_num_corr_tiles, 1 * Sizeof.FLOAT);
// int num_tiles = Float.floatToIntBits(fnum_corrs[0])/num_pairs; // number of correlation tiles calculated
int
[]
GridFullWarps
=
{
1
,
1
,
1
};
int
[]
GridFullWarps
=
{
1
,
1
,
1
};
int
[]
ThreadsFullWarps
=
{
1
,
1
,
1
};
int
[]
ThreadsFullWarps
=
{
1
,
1
,
1
};
Pointer
kernelParameters
=
Pointer
.
to
(
Pointer
kernelParameters
;
if
(
combo
)
{
kernelParameters
=
Pointer
.
to
(
Pointer
.
to
(
new
int
[]
{
num_corr_combo_tiles
}),
// num_task_tiles }), // int num_corr_tiles, // number of correlation tiles to process
Pointer
.
to
(
new
int
[]
{
num_corr_combo_tiles
}),
// num_task_tiles }), // int num_corr_tiles, // number of correlation tiles to process
Pointer
.
to
(
new
int
[]
{
corr_stride_combo_td
}),
// const size_t corr_stride_td, // in floats
Pointer
.
to
(
new
int
[]
{
corr_stride_combo_td
}),
// const size_t corr_stride_td, // in floats
Pointer
.
to
(
gpu_corrs_combo_td
),
// float * gpu_corrs_combo); // combined correlation output (one per tile)
Pointer
.
to
(
gpu_corrs_combo_td
),
// float * gpu_corrs_combo); // combined correlation output (one per tile)
...
@@ -1754,6 +1754,16 @@ public class GPUTileProcessor {
...
@@ -1754,6 +1754,16 @@ public class GPUTileProcessor {
Pointer
.
to
(
gpu_corrs_combo
),
// float * gpu_corrs, // correlation output data (pixel domain)
Pointer
.
to
(
gpu_corrs_combo
),
// float * gpu_corrs, // correlation output data (pixel domain)
Pointer
.
to
(
new
float
[]
{(
float
)
fat_zero
}),
// float fat_zero, // here - absolute
Pointer
.
to
(
new
float
[]
{(
float
)
fat_zero
}),
// float fat_zero, // here - absolute
Pointer
.
to
(
new
int
[]
{
corr_radius
}));
// int corr_radius, // radius of the output correlation (7 for 15x15)
Pointer
.
to
(
new
int
[]
{
corr_radius
}));
// int corr_radius, // radius of the output correlation (7 for 15x15)
}
else
{
kernelParameters
=
Pointer
.
to
(
Pointer
.
to
(
new
int
[]
{
num_corr_tiles
}),
// num_task_tiles }), // int num_corr_tiles, // number of correlation tiles to process
Pointer
.
to
(
new
int
[]
{
corr_stride_td
}),
// const size_t corr_stride_td, // in floats
Pointer
.
to
(
gpu_corrs_td
),
// float * gpu_corrs_combo); // combined correlation output (one per tile)
Pointer
.
to
(
new
int
[]
{
corr_stride
}),
// const size_t corr_stride, // in floats
Pointer
.
to
(
gpu_corrs
),
// float * gpu_corrs, // correlation output data (pixel domain)
Pointer
.
to
(
new
float
[]
{(
float
)
fat_zero
}),
// float fat_zero, // here - absolute
Pointer
.
to
(
new
int
[]
{
corr_radius
}));
// int corr_radius, // radius of the output correlation (7 for 15x15)
}
cuCtxSynchronize
();
cuCtxSynchronize
();
// Call the kernel function
// Call the kernel function
...
...
src/main/java/com/elphel/imagej/tileprocessor/ImageDtt.java
View file @
1b16c1e5
...
@@ -258,39 +258,6 @@ public class ImageDtt extends ImageDttCPU {
...
@@ -258,39 +258,6 @@ public class ImageDtt extends ImageDttCPU {
" debug_tileX="
+
debug_tileX
+
" debug_tileY="
+
debug_tileY
+
" globalDebugLevel="
+
globalDebugLevel
);
" debug_tileX="
+
debug_tileX
+
" debug_tileY="
+
debug_tileY
+
" globalDebugLevel="
+
globalDebugLevel
);
}
}
// TODO: Remove unused
/**
final int [][] zi =
{{ 0, 1, 2, 3},
{-1, 0, -3, 2},
{-2, -3, 0, 1},
{ 3, -2, -1, 0}};
final int [][] corr_pairs ={ // {first, second, rot} rot: 0 - as is, 1 - swap y,x // not used in lwir
{0,1,0},
{2,3,0},
{0,2,1},
{1,3,1}};
final double[][] port_offsets = { // lwir: used only in textures to scale differences
{-0.5, -0.5},
{ 0.5, -0.5},
{-0.5, 0.5},
{ 0.5, 0.5}};
final int transform_len = transform_size * transform_size;
final double [] filter = doubleGetCltLpfFd(corr_sigma);
*/
// prepare disparity maps and weights
//// final int max_search_radius = (int) Math.abs(max_corr_radius); // use negative max_corr_radius for squares instead of circles?
//// final int max_search_radius_poly = 1;
/**
if (globalDebugLevel > 0){
System.out.println("max_corr_radius= "+max_corr_radius);
System.out.println("max_search_radius= "+max_search_radius);
System.out.println("max_search_radius_poly="+max_search_radius_poly);
System.out.println("gpu_fat_zero= "+gpu_fat_zero);
System.out.println("disparity_array[0][0]= "+disparity_array[0][0]);
}
*/
// add optional initialization of debug layers here
// add optional initialization of debug layers here
boolean
need_macro
=
false
;
boolean
need_macro
=
false
;
...
@@ -445,17 +412,52 @@ public class ImageDtt extends ImageDttCPU {
...
@@ -445,17 +412,52 @@ public class ImageDtt extends ImageDttCPU {
// does it need correlations?
// does it need correlations?
if
(
fneed_corr
)
{
if
(
fneed_corr
)
{
//Generate 2D phase correlations from the CLT representation
//Generate 2D phase correlations from the CLT representation
/*
gpuQuad.execCorr2D(
gpuQuad.execCorr2D(
col_weights, // scales,// double [] scales,
col_weights, // scales,// double [] scales,
gpu_fat_zero, // double fat_zero);
gpu_fat_zero, // double fat_zero);
gpu_corr_rad); // int corr_radius
gpu_corr_rad); // int corr_radius
//Show 2D correlations
// int [] wh = new int[2];
final int [] corr_indices = gpuQuad.getCorrIndices();
final int [] corr_indices = gpuQuad.getCorrIndices();
final
float
[][]
fcorr2D
=
gpuQuad
.
getCorr2D
(
final float [][] fcorr2D = gpuQuad.getCorr2D(gpu_corr_rad); // int corr_rad);
gpu_corr_rad
);
// int corr_rad);
*/
gpuQuad
.
execCorr2D_TD
(
col_weights
);
// Get TD version of correlations (may be read out and saved)
final
int
[]
corr_indices
=
gpuQuad
.
getCorrIndices
();
gpuQuad
.
execCorr2D_normalize
(
false
,
// boolean combo, // normalize combo correlations (false - per-pair ones)
gpu_fat_zero
,
// double fat_zero);
gpu_corr_rad
);
// int corr_radius
final
float
[][]
fcorr2D
=
gpuQuad
.
getCorr2D
(
gpu_corr_rad
);
// int corr_rad);
// calculate combine quad correlation
gpuQuad
.
execCorr2D_combine
(
// calculate cross pairs
true
,
// boolean init_corr, // initialize output tiles (false - add to current)
GPUTileProcessor
.
NUM_PAIRS
,
// int num_pairs_in, // typically 6 - number of pairs per tile (tile task should have same number per each tile
0x0f
);
// int pairs_mask // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
// normalize and convert to pixel domain
gpuQuad
.
execCorr2D_normalize
(
true
,
// boolean combo, // normalize combo correlations (false - per-pair ones)
gpu_fat_zero
,
// double fat_zero);
gpu_corr_rad
);
// int corr_radius
final
int
[]
corr_quad_indices
=
gpuQuad
.
getCorrComboIndices
();
// get quad
final
float
[][]
fcorr2D_quad
=
gpuQuad
.
getCorr2DCombo
(
gpu_corr_rad
);
// calculate and get cross here
gpuQuad
.
execCorr2D_combine
(
// calculate cross pairs
true
,
// boolean init_corr, // initialize output tiles (false - add to current)
GPUTileProcessor
.
NUM_PAIRS
,
// int num_pairs_in, // typically 6 - number of pairs per tile (tile task should have same number per each tile
0x30
);
// int pairs_mask // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
gpuQuad
.
execCorr2D_normalize
(
true
,
// boolean combo, // normalize combo correlations (false - per-pair ones)
gpu_fat_zero
,
// double fat_zero);
gpu_corr_rad
);
// int corr_radius
// final int [] corr_cross_indices = gpuQuad.getCorrComboIndices(); // cross indices are the quad
final
float
[][]
fcorr2D_cross
=
gpuQuad
.
getCorr2DCombo
(
gpu_corr_rad
);
if
(
corr_indices
.
length
>
0
)
{
if
(
corr_indices
.
length
>
0
)
{
if
(
true
)
{
/*
if (true) { // debugging only
int [] wh = new int[2];
int [] wh = new int[2];
double [][] dbg_corr = GPUTileProcessor.getCorr2DView(
double [][] dbg_corr = GPUTileProcessor.getCorr2DView(
tilesX,
tilesX,
...
@@ -471,7 +473,7 @@ public class ImageDtt extends ImageDttCPU {
...
@@ -471,7 +473,7 @@ public class ImageDtt extends ImageDttCPU {
"dbg-corr2D", // name+"-CORR2D-D"+clt_parameters.disparity,
"dbg-corr2D", // name+"-CORR2D-D"+clt_parameters.disparity,
GPUTileProcessor.getCorrTitles());
GPUTileProcessor.getCorrTitles());
}
}
*/
final
int
corr_length
=
fcorr2D
[
0
].
length
;
// all correlation tiles have the same size
final
int
corr_length
=
fcorr2D
[
0
].
length
;
// all correlation tiles have the same size
// assuming that the correlation pairs sets are the same for each tile that has correlations
// assuming that the correlation pairs sets are the same for each tile that has correlations
...
@@ -481,7 +483,7 @@ public class ImageDtt extends ImageDttCPU {
...
@@ -481,7 +483,7 @@ public class ImageDtt extends ImageDttCPU {
for
(
int
i
=
1
;
(
i
<
corr_indices
.
length
)
&&
((
corr_indices
[
i
]
>>
GPUTileProcessor
.
CORR_NTILE_SHIFT
)
==
nt0
)
;
i
++)
{
for
(
int
i
=
1
;
(
i
<
corr_indices
.
length
)
&&
((
corr_indices
[
i
]
>>
GPUTileProcessor
.
CORR_NTILE_SHIFT
)
==
nt0
)
;
i
++)
{
nc0
++;
nc0
++;
}
}
final
int
num_tile_corr
=
nc0
;
final
int
num_tile_corr
=
nc0
;
// normally 6
final
int
num_tiles
=
corr_indices
.
length
/
num_tile_corr
;
final
int
num_tiles
=
corr_indices
.
length
/
num_tile_corr
;
...
@@ -504,7 +506,9 @@ public class ImageDtt extends ImageDttCPU {
...
@@ -504,7 +506,9 @@ public class ImageDtt extends ImageDttCPU {
(
imgdtt_params
.
lma_debug_level
>
1
));
// boolean debug);
(
imgdtt_params
.
lma_debug_level
>
1
));
// boolean debug);
for
(
int
indx_tile
=
ai
.
getAndIncrement
();
indx_tile
<
num_tiles
;
indx_tile
=
ai
.
getAndIncrement
())
{
for
(
int
indx_tile
=
ai
.
getAndIncrement
();
indx_tile
<
num_tiles
;
indx_tile
=
ai
.
getAndIncrement
())
{
double
[][]
corrs
=
new
double
[
GPUTileProcessor
.
NUM_PAIRS
][
corr_length
];
// 225-long (15x15)
// double [][] corrs = new double [GPUTileProcessor.NUM_PAIRS][corr_length]; // 225-long (15x15)
// added quad and cross combos
double
[][]
corrs
=
new
double
[
GPUTileProcessor
.
NUM_PAIRS
+
2
][
corr_length
];
// 225-long (15x15)
int
indx_corr
=
indx_tile
*
num_tile_corr
;
int
indx_corr
=
indx_tile
*
num_tile_corr
;
int
nt
=
(
corr_indices
[
indx_corr
]
>>
GPUTileProcessor
.
CORR_NTILE_SHIFT
);
int
nt
=
(
corr_indices
[
indx_corr
]
>>
GPUTileProcessor
.
CORR_NTILE_SHIFT
);
int
tileX
=
nt
%
tilesX
;
int
tileX
=
nt
%
tilesX
;
...
@@ -520,7 +524,21 @@ public class ImageDtt extends ImageDttCPU {
...
@@ -520,7 +524,21 @@ public class ImageDtt extends ImageDttCPU {
}
}
indx_corr
++;
indx_corr
++;
}
}
// add 2 combo layers
int
pair
=
GPUTileProcessor
.
NUM_PAIRS
;
// 6
nt
=
(
corr_quad_indices
[
indx_tile
]
>>
GPUTileProcessor
.
CORR_NTILE_SHIFT
);
// corr_quad_indices - different sequence
for
(
int
i
=
0
;
i
<
corr_length
;
i
++)
{
corrs
[
pair
][
i
]
=
gpu_corr_scale
*
fcorr2D_quad
[
indx_tile
][
i
];
// from float to double
}
// indices for cross are the same as for quad
pair
++;
for
(
int
i
=
0
;
i
<
corr_length
;
i
++)
{
corrs
[
pair
][
i
]
=
gpu_corr_scale
*
fcorr2D_cross
[
indx_tile
][
i
];
// from float to double
}
// does not include combo
int
used_pairs
=
pair_mask
;
// imgdtt_params.dbg_pair_mask; //TODO: use tile tasks
int
used_pairs
=
pair_mask
;
// imgdtt_params.dbg_pair_mask; //TODO: use tile tasks
int
tile_lma_debug_level
=
((
tileX
==
debug_tileX
)
&&
(
tileY
==
debug_tileY
))?
(
imgdtt_params
.
lma_debug_level
-
1
)
:
-
2
;
int
tile_lma_debug_level
=
((
tileX
==
debug_tileX
)
&&
(
tileY
==
debug_tileY
))?
(
imgdtt_params
.
lma_debug_level
-
1
)
:
-
2
;
boolean
debugTile
=(
tileX
==
debug_tileX
)
&&
(
tileY
==
debug_tileY
)
&&
(
globalDebugLevel
>
-
1
);
boolean
debugTile
=(
tileX
==
debug_tileX
)
&&
(
tileY
==
debug_tileY
)
&&
(
globalDebugLevel
>
-
1
);
...
@@ -611,8 +629,10 @@ public class ImageDtt extends ImageDttCPU {
...
@@ -611,8 +629,10 @@ public class ImageDtt extends ImageDttCPU {
clt_corr_partial
[
tileY
][
tileX
][
0
][
3
]
=
corrs
[
3
];
// 4
clt_corr_partial
[
tileY
][
tileX
][
0
][
3
]
=
corrs
[
3
];
// 4
clt_corr_partial
[
tileY
][
tileX
][
1
][
0
]
=
corrs
[
4
];
// 5
clt_corr_partial
[
tileY
][
tileX
][
1
][
0
]
=
corrs
[
4
];
// 5
clt_corr_partial
[
tileY
][
tileX
][
1
][
1
]
=
corrs
[
5
];
// 6
clt_corr_partial
[
tileY
][
tileX
][
1
][
1
]
=
corrs
[
5
];
// 6
clt_corr_partial
[
tileY
][
tileX
][
1
][
2
]
=
corr2d
.
debugStrip
(
strip_hor
);
// 7
clt_corr_partial
[
tileY
][
tileX
][
1
][
2
]
=
corrs
[
6
];
// 5
clt_corr_partial
[
tileY
][
tileX
][
1
][
3
]
=
corr2d
.
debugStrip
(
strip_vert
);
// 8
clt_corr_partial
[
tileY
][
tileX
][
1
][
3
]
=
corrs
[
7
];
// 6
// clt_corr_partial[tileY][tileX][1][2] = corr2d.debugStrip(strip_hor); // 7
// clt_corr_partial[tileY][tileX][1][3] = corr2d.debugStrip(strip_vert); // 8
clt_corr_partial
[
tileY
][
tileX
][
2
][
0
]
=
corr2d
.
debugStrip
(
strips
[
4
]);
// 9
clt_corr_partial
[
tileY
][
tileX
][
2
][
0
]
=
corr2d
.
debugStrip
(
strips
[
4
]);
// 9
clt_corr_partial
[
tileY
][
tileX
][
2
][
1
]
=
corr2d
.
debugStrip
(
strips
[
5
]);
// 10
clt_corr_partial
[
tileY
][
tileX
][
2
][
1
]
=
corr2d
.
debugStrip
(
strips
[
5
]);
// 10
clt_corr_partial
[
tileY
][
tileX
][
2
][
2
]
=
corr2d
.
debugStrip2
(
strip_hor
);
// 11
clt_corr_partial
[
tileY
][
tileX
][
2
][
2
]
=
corr2d
.
debugStrip2
(
strip_hor
);
// 11
...
...
src/main/java/com/elphel/imagej/tileprocessor/QuadCLT.java
View file @
1b16c1e5
...
@@ -677,6 +677,7 @@ public class QuadCLT extends QuadCLTCPU {
...
@@ -677,6 +677,7 @@ public class QuadCLT extends QuadCLTCPU {
0x0f
);
// int pairs_mask // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
0x0f
);
// int pairs_mask // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
quadCLT_main
.
getGPU
().
execCorr2D_normalize
(
quadCLT_main
.
getGPU
().
execCorr2D_normalize
(
true
,
// boolean combo, // normalize combo correlations (false - per-pair ones)
fat_zero
,
// double fat_zero);
fat_zero
,
// double fat_zero);
clt_parameters
.
gpu_corr_rad
);
// int corr_radius
clt_parameters
.
gpu_corr_rad
);
// int corr_radius
...
@@ -803,6 +804,7 @@ public class QuadCLT extends QuadCLTCPU {
...
@@ -803,6 +804,7 @@ public class QuadCLT extends QuadCLTCPU {
0x30
);
// int pairs_mask // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
0x30
);
// int pairs_mask // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
quadCLT_main
.
getGPU
().
execCorr2D_normalize
(
quadCLT_main
.
getGPU
().
execCorr2D_normalize
(
true
,
// boolean combo, // normalize combo correlations (false - per-pair ones)
fat_zero
,
// double fat_zero);
fat_zero
,
// double fat_zero);
clt_parameters
.
gpu_corr_rad
);
// int corr_radius
clt_parameters
.
gpu_corr_rad
);
// int corr_radius
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment