Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
39e75987
Commit
39e75987
authored
Apr 06, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
simplified by using larger output array
parent
3f0b0bc0
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
32 additions
and
27 deletions
+32
-27
GPUTileProcessor.java
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+7
-4
TileProcessor.cuh
src/main/resources/kernels/TileProcessor.cuh
+25
-23
No files found.
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
View file @
39e75987
...
@@ -495,8 +495,8 @@ public class GPUTileProcessor {
...
@@ -495,8 +495,8 @@ public class GPUTileProcessor {
tilesX
*
tilesY
,
// long Height,
tilesX
*
tilesY
,
// long Height,
Sizeof
.
FLOAT
);
// int ElementSizeBytes)
Sizeof
.
FLOAT
);
// int ElementSizeBytes)
texture_stride
=
(
int
)(
device_stride
[
0
]
/
Sizeof
.
FLOAT
);
texture_stride
=
(
int
)(
device_stride
[
0
]
/
Sizeof
.
FLOAT
);
int
max_rgba_width
=
tilesX
*
DTT_SIZE
;
int
max_rgba_width
=
(
tilesX
+
1
)
*
DTT_SIZE
;
int
max_rgba_height
=
tilesY
*
DTT_SIZE
;
int
max_rgba_height
=
(
tilesY
+
1
)
*
DTT_SIZE
;
int
max_rbga_slices
=
NUM_COLORS
+
1
;
int
max_rbga_slices
=
NUM_COLORS
+
1
;
cuMemAllocPitch
(
cuMemAllocPitch
(
...
@@ -1095,7 +1095,7 @@ public class GPUTileProcessor {
...
@@ -1095,7 +1095,7 @@ public class GPUTileProcessor {
int
[]
ThreadsFullWarps
=
{
TEXTURE_THREADS_PER_TILE
,
NUM_CAMS
,
1
};
int
[]
ThreadsFullWarps
=
{
TEXTURE_THREADS_PER_TILE
,
NUM_CAMS
,
1
};
Pointer
kernelParameters
=
Pointer
.
to
(
Pointer
kernelParameters
=
Pointer
.
to
(
Pointer
.
to
(
new
int
[]
{
0
}),
// 0, // int border_tile, // if 1 - watch for border
//
Pointer.to(new int[] {0}), // 0, // int border_tile, // if 1 - watch for border
Pointer
.
to
(
gpu_texture_indices
),
// int * woi, - not used
Pointer
.
to
(
gpu_texture_indices
),
// int * woi, - not used
Pointer
.
to
(
gpu_clt
),
Pointer
.
to
(
gpu_clt
),
Pointer
.
to
(
new
int
[]
{
num_texture_tiles
}),
Pointer
.
to
(
new
int
[]
{
num_texture_tiles
}),
...
@@ -1155,6 +1155,7 @@ public class GPUTileProcessor {
...
@@ -1155,6 +1155,7 @@ public class GPUTileProcessor {
/**
/**
* Get woi and RBGA image from the GPU after execRBGA call as 2/4 slices.
* Get woi and RBGA image from the GPU after execRBGA call as 2/4 slices.
* device array has 4 pixels margins on each side, skip them here
* @param num_colors number of colors (1 or 3)
* @param num_colors number of colors (1 or 3)
* @param woi should be initialized as Rectangle(). x,y,width, height will be populated (in pixels,)
* @param woi should be initialized as Rectangle(). x,y,width, height will be populated (in pixels,)
* @return RBGA slices, last (alpha) in 0.0... 1.0 range, colors match input range
* @return RBGA slices, last (alpha) in 0.0... 1.0 range, colors match input range
...
@@ -1180,9 +1181,11 @@ public class GPUTileProcessor {
...
@@ -1180,9 +1181,11 @@ public class GPUTileProcessor {
copy_rbga
.
WidthInBytes
=
woi
.
width
*
Sizeof
.
FLOAT
;
copy_rbga
.
WidthInBytes
=
woi
.
width
*
Sizeof
.
FLOAT
;
copy_rbga
.
Height
=
woi
.
height
;
copy_rbga
.
Height
=
woi
.
height
;
copy_rbga
.
srcXInBytes
=
4
*
Sizeof
.
FLOAT
;
for
(
int
ncol
=
0
;
ncol
<=
num_colors
;
ncol
++
)
{
for
(
int
ncol
=
0
;
ncol
<=
num_colors
;
ncol
++
)
{
copy_rbga
.
dstHost
=
Pointer
.
to
(
rslt
[
ncol
]);
copy_rbga
.
dstHost
=
Pointer
.
to
(
rslt
[
ncol
]);
copy_rbga
.
srcY
=
woi
.
height
*
ncol
;
copy_rbga
.
srcY
=
4
+
(
woi
.
height
+
DTT_SIZE
)
*
ncol
;
cuMemcpy2D
(
copy_rbga
);
// run copy
cuMemcpy2D
(
copy_rbga
);
// run copy
}
}
return
rslt
;
return
rslt
;
...
...
src/main/resources/kernels/TileProcessor.cuh
View file @
39e75987
...
@@ -1149,7 +1149,7 @@ extern "C" __global__ void clear_texture_rbga(
...
@@ -1149,7 +1149,7 @@ extern "C" __global__ void clear_texture_rbga(
const
size_t
texture_rbga_stride
,
// in floats 8*stride
const
size_t
texture_rbga_stride
,
// in floats 8*stride
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
extern
"C"
__global__
void
textures_accumulate
(
extern
"C"
__global__
void
textures_accumulate
(
int
border_tile
,
// if 1 - watch for border
//
int border_tile, // if 1 - watch for border
int
*
woi
,
// x, y, width,height
int
*
woi
,
// x, y, width,height
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t
num_texture_tiles
,
// number of texture tiles to process
size_t
num_texture_tiles
,
// number of texture tiles to process
...
@@ -1537,9 +1537,9 @@ __global__ void generate_RBGA(
...
@@ -1537,9 +1537,9 @@ __global__ void generate_RBGA(
__syncthreads
();
__syncthreads
();
// Zero output textures. Trim
// Zero output textures. Trim
// texture_rbga_stride
// texture_rbga_stride
int
texture_width
=
*
(
woi
+
2
)
*
DTT_SIZE
;
int
texture_width
=
(
*
(
woi
+
2
)
+
1
)
*
DTT_SIZE
;
int
texture_tiles_height
=
*
(
woi
+
3
)
*
DTT_SIZE
;
int
texture_tiles_height
=
(
*
(
woi
+
3
)
+
1
)
*
DTT_SIZE
;
int
texture_height
=
texture_tiles_height
*
DTT_SIZE
;
///
int texture_height = texture_tiles_height * DTT_SIZE;
int
texture_slices
=
colors
+
1
;
int
texture_slices
=
colors
+
1
;
if
(
threadIdx
.
x
==
0
)
{
if
(
threadIdx
.
x
==
0
)
{
...
@@ -1581,7 +1581,8 @@ __global__ void generate_RBGA(
...
@@ -1581,7 +1581,8 @@ __global__ void generate_RBGA(
#endif
#endif
/* */
/* */
textures_accumulate
<<<
grid_texture
,
threads_texture
>>>
(
textures_accumulate
<<<
grid_texture
,
threads_texture
>>>
(
border_tile
,
// int border_tile, // if 1 - watch for border
// get rid of border tile
// border_tile, // int border_tile, // if 1 - watch for border
woi
,
// int * woi, // x, y, width,height
woi
,
// int * woi, // x, y, width,height
gpu_clt
,
// float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
gpu_clt
,
// float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
ntt
,
// size_t num_texture_tiles, // number of texture tiles to process
ntt
,
// size_t num_texture_tiles, // number of texture tiles to process
...
@@ -2205,7 +2206,7 @@ __global__ void textures_gen(
...
@@ -2205,7 +2206,7 @@ __global__ void textures_gen(
#endif // ifdef USE_textures_gen
#endif // ifdef USE_textures_gen
extern
"C"
extern
"C"
__global__
void
textures_accumulate
(
__global__
void
textures_accumulate
(
int
border_tile
,
// if 1 - watch for border
//
int border_tile, // if 1 - watch for border
int
*
woi
,
// x, y, width,height
int
*
woi
,
// x, y, width,height
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t
num_texture_tiles
,
// number of texture tiles to process
size_t
num_texture_tiles
,
// number of texture tiles to process
...
@@ -2484,8 +2485,9 @@ __global__ void textures_accumulate(
...
@@ -2484,8 +2485,9 @@ __global__ void textures_accumulate(
if
(
gpu_texture_rbg
&&
(
texture_rbg_stride
!=
0
))
{
// generate RGBA
if
(
gpu_texture_rbg
&&
(
texture_rbg_stride
!=
0
))
{
// generate RGBA
#ifdef DEBUG12
#ifdef DEBUG12
if
((
tile_num
==
DBG_TILE
)
&&
(
threadIdx
.
x
==
0
)
&&
(
threadIdx
.
y
==
0
)){
if
((
tile_num
==
DBG_TILE
)
&&
(
threadIdx
.
x
==
0
)
&&
(
threadIdx
.
y
==
0
)){
printf
(
"
\n
textures_accumulate accumulating tile = %d, tile_code= %d, border_tile=%d
\n
"
,
// printf("\ntextures_accumulate accumulating tile = %d, tile_code= %d, border_tile=%d\n",
tile_num
,
(
int
)
tile_code
,
border_tile
);
// tile_num, (int) tile_code, border_tile);
printf
(
"
\n
textures_accumulate accumulating tile = %d, tile_code= %d
\n
"
,
tile_num
,
(
int
)
tile_code
);
for
(
int
ncol
=
0
;
ncol
<=
colors
;
ncol
++
)
{
for
(
int
ncol
=
0
;
ncol
<=
colors
;
ncol
++
)
{
printf
(
"
\n
tile[%d]
\n
"
,
ncol
);
printf
(
"
\n
tile[%d]
\n
"
,
ncol
);
...
@@ -2517,11 +2519,11 @@ __global__ void textures_accumulate(
...
@@ -2517,11 +2519,11 @@ __global__ void textures_accumulate(
}
}
}
}
}
}
int
slice_stride
=
texture_rbg_stride
*
*
(
woi
+
3
)
*
DTT_SIZE
;
// offset to the next color
int
slice_stride
=
texture_rbg_stride
*
(
*
(
woi
+
3
)
+
1
)
*
DTT_SIZE
;
// offset to the next color
int
tileY
=
tile_num
/
TILESX
;
// slow, but 1 per tile
int
tileY
=
tile_num
/
TILESX
;
// slow, but 1 per tile
int
tileX
=
tile_num
-
tileY
*
TILESX
;
int
tileX
=
tile_num
-
tileY
*
TILESX
;
int
tile_x0
=
(
tileX
-
*
(
woi
+
0
))
*
DTT_SIZE
-
(
DTT_SIZE
/
2
);
// may be negative == -4
int
tile_x0
=
(
tileX
-
*
(
woi
+
0
))
*
DTT_SIZE
;
//
- (DTT_SIZE/2); // may be negative == -4
int
tile_y0
=
(
tileY
-
*
(
woi
+
1
))
*
DTT_SIZE
-
(
DTT_SIZE
/
2
);
// may be negative == -4
int
tile_y0
=
(
tileY
-
*
(
woi
+
1
))
*
DTT_SIZE
;
//
- (DTT_SIZE/2); // may be negative == -4
int
height
=
*
(
woi
+
3
)
<<
DTT_SIZE_LOG2
;
int
height
=
*
(
woi
+
3
)
<<
DTT_SIZE_LOG2
;
#ifdef DEBUG12
#ifdef DEBUG12
...
@@ -2556,9 +2558,9 @@ __global__ void textures_accumulate(
...
@@ -2556,9 +2558,9 @@ __global__ void textures_accumulate(
__syncthreads
();
// __syncwarp();
__syncthreads
();
// __syncwarp();
#endif // DEBUG12
#endif // DEBUG12
if
(
!
border_tile
||
///
if (!border_tile ||
/// ((g_row >= 0) && (g_col >= 0) && (g_row < (DTT_SIZE * TILESY)) && (g_col < (DTT_SIZE * TILESX)))){
/// ((g_row >= 0) && (g_col >= 0) && (g_row < (DTT_SIZE * TILESY)) && (g_col < (DTT_SIZE * TILESX)))){
((
g_row
>=
0
)
&&
(
g_col
>=
0
)
&&
(
g_row
<
height
)
&&
(
g_col
<
(
DTT_SIZE
*
TILESX
)))){
///
((g_row >= 0) && (g_col >= 0) && (g_row < height) && (g_col < (DTT_SIZE * TILESX)))){
// always copy 3 (1) colors + alpha
// always copy 3 (1) colors + alpha
if
(
colors
==
3
){
if
(
colors
==
3
){
#pragma unroll
#pragma unroll
...
@@ -2571,7 +2573,7 @@ __global__ void textures_accumulate(
...
@@ -2571,7 +2573,7 @@ __global__ void textures_accumulate(
*
(
gpu_texture_rbg_gi
+
ncol
*
slice_stride
)
+=
*
(
rgba_i
+
ncol
*
(
DTT_SIZE2
*
DTT_SIZE21
));
*
(
gpu_texture_rbg_gi
+
ncol
*
slice_stride
)
+=
*
(
rgba_i
+
ncol
*
(
DTT_SIZE2
*
DTT_SIZE21
));
}
}
}
}
}
///
}
}
}
}
// if (gpu_texture_rbg) { // generate RGBA
}
// if (gpu_texture_rbg) { // generate RGBA
}
// textures_accumulate()
}
// textures_accumulate()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment