Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
4fb94627
Commit
4fb94627
authored
Apr 16, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
changing corr2D to CDP
parent
20df596a
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
233 additions
and
322 deletions
+233
-322
GPUTileProcessor.java
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+35
-16
TwoQuadCLT.java
...main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
+5
-4
TileProcessor.cuh
src/main/resources/kernels/TileProcessor.cuh
+131
-207
TileProcessor.h
src/main/resources/kernels/TileProcessor.h
+62
-95
No files found.
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
View file @
4fb94627
...
@@ -190,6 +190,7 @@ public class GPUTileProcessor {
...
@@ -190,6 +190,7 @@ public class GPUTileProcessor {
private
CUdeviceptr
gpu_clt
=
new
CUdeviceptr
();
private
CUdeviceptr
gpu_clt
=
new
CUdeviceptr
();
private
CUdeviceptr
gpu_4_images
=
new
CUdeviceptr
();
private
CUdeviceptr
gpu_4_images
=
new
CUdeviceptr
();
private
CUdeviceptr
gpu_corr_indices
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * 6 * Sizeof.POINTER
private
CUdeviceptr
gpu_corr_indices
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * 6 * Sizeof.POINTER
private
CUdeviceptr
gpu_num_corr_tiles
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * 6 * Sizeof.POINTER
private
CUdeviceptr
gpu_texture_indices
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * 6 * Sizeof.POINTER
private
CUdeviceptr
gpu_texture_indices
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * 6 * Sizeof.POINTER
private
CUdeviceptr
gpu_port_offsets
=
new
CUdeviceptr
();
// allocate Quad * 2 * Sizeof.POINTER
private
CUdeviceptr
gpu_port_offsets
=
new
CUdeviceptr
();
// allocate Quad * 2 * Sizeof.POINTER
private
CUdeviceptr
gpu_woi
=
new
CUdeviceptr
();
// 4 integers (x, y, width, height) Rectangle - in tiles
private
CUdeviceptr
gpu_woi
=
new
CUdeviceptr
();
// 4 integers (x, y, width, height) Rectangle - in tiles
...
@@ -575,8 +576,8 @@ public class GPUTileProcessor {
...
@@ -575,8 +576,8 @@ public class GPUTileProcessor {
cuMemAlloc
(
gpu_tasks
,
tilesX
*
tilesY
*
TPTASK_SIZE
*
Sizeof
.
FLOAT
);
cuMemAlloc
(
gpu_tasks
,
tilesX
*
tilesY
*
TPTASK_SIZE
*
Sizeof
.
FLOAT
);
//=========== Seems that in many places Sizeof.POINTER (==8) is used instead of Sizeof.FLOAT !!! ============
//=========== Seems that in many places Sizeof.POINTER (==8) is used instead of Sizeof.FLOAT !!! ============
// Set corrs array
// Set corrs array
/// cuMemAlloc(gpu_corrs, tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.POINTER
);
cuMemAlloc
(
gpu_corr_indices
,
tilesX
*
tilesY
*
NUM_PAIRS
*
Sizeof
.
FLOAT
);
cuMemAlloc
(
gpu_
corr_indices
,
tilesX
*
tilesY
*
NUM_PAIRS
*
Sizeof
.
POINTER
);
cuMemAlloc
(
gpu_
num_corr_tiles
,
1
*
Sizeof
.
FLOAT
);
//#define TILESYA ((TILESY +3) & (~3))
//#define TILESYA ((TILESY +3) & (~3))
int
tilesYa
=
(
tilesY
+
3
)
&
~
3
;
int
tilesYa
=
(
tilesY
+
3
)
&
~
3
;
...
@@ -1119,7 +1120,7 @@ public class GPUTileProcessor {
...
@@ -1119,7 +1120,7 @@ public class GPUTileProcessor {
cuCtxSynchronize
();
// remove later
cuCtxSynchronize
();
// remove later
}
}
public
void
execConverDirect
()
{
public
void
execConver
t
Direct
()
{
if
(
GPU_CONVERT_DIRECT_kernel
==
null
)
if
(
GPU_CONVERT_DIRECT_kernel
==
null
)
{
{
IJ
.
showMessage
(
"Error"
,
"No GPU kernel: GPU_CONVERT_DIRECT_kernel"
);
IJ
.
showMessage
(
"Error"
,
"No GPU kernel: GPU_CONVERT_DIRECT_kernel"
);
...
@@ -1206,20 +1207,24 @@ public class GPUTileProcessor {
...
@@ -1206,20 +1207,24 @@ public class GPUTileProcessor {
float
fscale0
=
(
float
)
scales
[
0
];
float
fscale0
=
(
float
)
scales
[
0
];
float
fscale1
=
(
num_colors
>
1
)?((
float
)
scales
[
1
]):
0.0f
;
float
fscale1
=
(
num_colors
>
1
)?((
float
)
scales
[
1
]):
0.0f
;
float
fscale2
=
(
num_colors
>
2
)?((
float
)
scales
[
2
]):
0.0f
;
float
fscale2
=
(
num_colors
>
2
)?((
float
)
scales
[
2
]):
0.0f
;
int
[]
GridFullWarps
=
{(
num_corr_tiles
+
CORR_TILES_PER_BLOCK
-
1
)
/
CORR_TILES_PER_BLOCK
,
1
,
1
};
// int [] GridFullWarps = {(num_corr_tiles + CORR_TILES_PER_BLOCK-1) / CORR_TILES_PER_BLOCK,1,1};
int
[]
ThreadsFullWarps
=
{
CORR_THREADS_PER_TILE
,
CORR_TILES_PER_BLOCK
,
1
};
// int [] ThreadsFullWarps = {CORR_THREADS_PER_TILE, CORR_TILES_PER_BLOCK, 1};
int
[]
GridFullWarps
=
{
1
,
1
,
1
};
int
[]
ThreadsFullWarps
=
{
1
,
1
,
1
};
Pointer
kernelParameters
=
Pointer
.
to
(
Pointer
kernelParameters
=
Pointer
.
to
(
Pointer
.
to
(
gpu_clt
),
Pointer
.
to
(
gpu_clt
),
// float ** gpu_clt,
Pointer
.
to
(
new
int
[]
{
num_colors
}),
Pointer
.
to
(
new
int
[]
{
num_colors
}),
// int colors, // number of colors (3/1)
Pointer
.
to
(
new
float
[]
{
fscale0
}),
Pointer
.
to
(
new
float
[]
{
fscale0
}),
// float scale0, // scale for R
Pointer
.
to
(
new
float
[]
{
fscale1
}),
Pointer
.
to
(
new
float
[]
{
fscale1
}),
// float scale1, // scale for B
Pointer
.
to
(
new
float
[]
{
fscale2
}),
Pointer
.
to
(
new
float
[]
{
fscale2
}),
// float scale2, // scale for G
Pointer
.
to
(
new
float
[]
{(
float
)
fat_zero
}),
Pointer
.
to
(
new
float
[]
{(
float
)
fat_zero
}),
// float fat_zero, // here - absolute
Pointer
.
to
(
new
int
[]
{
num_corr_tiles
}),
// lpf_mask
Pointer
.
to
(
gpu_tasks
),
// struct tp_task * gpu_tasks,
Pointer
.
to
(
gpu_corr_indices
),
Pointer
.
to
(
new
int
[]
{
num_task_tiles
}),
// int num_tiles // number of tiles in task
Pointer
.
to
(
new
int
[]
{
corr_stride
}),
Pointer
.
to
(
gpu_corr_indices
),
// int * gpu_corr_indices, // packed tile+pair
Pointer
.
to
(
new
int
[]
{
corr_radius
}),
Pointer
.
to
(
gpu_num_corr_tiles
),
// int * pnum_corr_tiles, // pointer to a number of tiles to process
Pointer
.
to
(
gpu_corrs
)
// lpf_mask
Pointer
.
to
(
new
int
[]
{
corr_stride
}),
// const size_t corr_stride, // in floats
Pointer
.
to
(
new
int
[]
{
corr_radius
}),
// int corr_radius, // radius of the output correlation (7 for 15x15)
Pointer
.
to
(
gpu_corrs
)
// float * gpu_corrs); // correlation output data
);
);
cuCtxSynchronize
();
cuCtxSynchronize
();
// Call the kernel function
// Call the kernel function
...
@@ -1395,6 +1400,20 @@ public class GPUTileProcessor {
...
@@ -1395,6 +1400,20 @@ public class GPUTileProcessor {
}
}
return
corrs
;
return
corrs
;
}
}
public
int
[]
getCorrIndices
()
{
float
[]
fnum_corrs
=
new
float
[
1
];
cuMemcpyDtoH
(
Pointer
.
to
(
fnum_corrs
),
gpu_num_corr_tiles
,
1
*
Sizeof
.
FLOAT
);
int
num_corrs
=
Float
.
floatToIntBits
(
fnum_corrs
[
0
]);
float
[]
fcorr_indices
=
new
float
[
num_corrs
];
cuMemcpyDtoH
(
Pointer
.
to
(
fcorr_indices
),
gpu_corr_indices
,
num_corrs
*
Sizeof
.
FLOAT
);
int
[]
corr_indices
=
new
int
[
num_corrs
];
for
(
int
i
=
0
;
i
<
num_corrs
;
i
++)
{
corr_indices
[
i
]
=
Float
.
floatToIntBits
(
fcorr_indices
[
i
]);
}
num_corr_tiles
=
num_corrs
;
return
corr_indices
;
}
/**
/**
* Get woi and RBGA image from the GPU after execRBGA call as 2/4 slices.
* Get woi and RBGA image from the GPU after execRBGA call as 2/4 slices.
...
...
src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
View file @
4fb94627
...
@@ -2078,10 +2078,10 @@ public class TwoQuadCLT {
...
@@ -2078,10 +2078,10 @@ public class TwoQuadCLT {
use_aux
);
// boolean use_aux)
use_aux
);
// boolean use_aux)
int
[]
corr_indices
=
gPUTileProcessor
.
getCorrTasks
(
//
int [] corr_indices = gPUTileProcessor.getCorrTasks(
tp_tasks
);
//
tp_tasks);
// corr_indices array of integers to be passed to GPU
// corr_indices array of integers to be passed to GPU
gPUTileProcessor
.
setCorrIndices
(
corr_indices
);
//
gPUTileProcessor.setCorrIndices(corr_indices);
int
[]
texture_indices
=
gPUTileProcessor
.
getTextureTasks
(
int
[]
texture_indices
=
gPUTileProcessor
.
getTextureTasks
(
tp_tasks
);
tp_tasks
);
...
@@ -2119,7 +2119,7 @@ public class TwoQuadCLT {
...
@@ -2119,7 +2119,7 @@ public class TwoQuadCLT {
long
startDirectConvert
=
System
.
nanoTime
();
long
startDirectConvert
=
System
.
nanoTime
();
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
gPUTileProcessor
.
execConverDirect
();
gPUTileProcessor
.
execConver
t
Direct
();
}
}
// run imclt;
// run imclt;
...
@@ -2221,6 +2221,7 @@ public class TwoQuadCLT {
...
@@ -2221,6 +2221,7 @@ public class TwoQuadCLT {
int
tilesY
=
GPUTileProcessor
.
IMG_HEIGHT
/
GPUTileProcessor
.
DTT_SIZE
;
int
tilesY
=
GPUTileProcessor
.
IMG_HEIGHT
/
GPUTileProcessor
.
DTT_SIZE
;
int
[]
wh
=
new
int
[
2
];
int
[]
wh
=
new
int
[
2
];
if
(
clt_parameters
.
show_corr
)
{
if
(
clt_parameters
.
show_corr
)
{
int
[]
corr_indices
=
gPUTileProcessor
.
getCorrIndices
();
float
[][]
corr2D
=
gPUTileProcessor
.
getCorr2D
(
float
[][]
corr2D
=
gPUTileProcessor
.
getCorr2D
(
clt_parameters
.
gpu_corr_rad
);
// int corr_rad);
clt_parameters
.
gpu_corr_rad
);
// int corr_rad);
// convert to 6-layer image using tasks
// convert to 6-layer image using tasks
...
...
src/main/resources/kernels/TileProcessor.cuh
View file @
4fb94627
This diff is collapsed.
Click to expand it.
src/main/resources/kernels/TileProcessor.h
View file @
4fb94627
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment