Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
0253bab4
Commit
0253bab4
authored
Apr 06, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
simplified by using larger output array
parent
03329430
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
32 additions
and
28 deletions
+32
-28
TileProcessor.cuh
src/TileProcessor.cuh
+25
-23
dtt8x8.cu
src/dtt8x8.cu
+7
-5
No files found.
src/TileProcessor.cuh
View file @
0253bab4
...
@@ -1149,7 +1149,7 @@ extern "C" __global__ void clear_texture_rbga(
...
@@ -1149,7 +1149,7 @@ extern "C" __global__ void clear_texture_rbga(
const size_t texture_rbga_stride, // in floats 8*stride
const size_t texture_rbga_stride, // in floats 8*stride
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
extern "C" __global__ void textures_accumulate(
extern "C" __global__ void textures_accumulate(
int border_tile, // if 1 - watch for border
//
int border_tile, // if 1 - watch for border
int * woi, // x, y, width,height
int * woi, // x, y, width,height
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t num_texture_tiles, // number of texture tiles to process
size_t num_texture_tiles, // number of texture tiles to process
...
@@ -1537,9 +1537,9 @@ __global__ void generate_RBGA(
...
@@ -1537,9 +1537,9 @@ __global__ void generate_RBGA(
__syncthreads();
__syncthreads();
// Zero output textures. Trim
// Zero output textures. Trim
// texture_rbga_stride
// texture_rbga_stride
int texture_width =
*(woi + 2)
* DTT_SIZE;
int texture_width =
(*(woi + 2) + 1)
* DTT_SIZE;
int texture_tiles_height =
*(woi + 3
) * DTT_SIZE;
int texture_tiles_height =
(*(woi + 3) + 1
) * DTT_SIZE;
int texture_height = texture_tiles_height * DTT_SIZE;
///
int texture_height = texture_tiles_height * DTT_SIZE;
int texture_slices = colors + 1;
int texture_slices = colors + 1;
if (threadIdx.x == 0) {
if (threadIdx.x == 0) {
...
@@ -1581,7 +1581,8 @@ __global__ void generate_RBGA(
...
@@ -1581,7 +1581,8 @@ __global__ void generate_RBGA(
#endif
#endif
/* */
/* */
textures_accumulate<<<grid_texture,threads_texture>>>(
textures_accumulate<<<grid_texture,threads_texture>>>(
border_tile, // int border_tile, // if 1 - watch for border
// get rid of border tile
// border_tile, // int border_tile, // if 1 - watch for border
woi, // int * woi, // x, y, width,height
woi, // int * woi, // x, y, width,height
gpu_clt, // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
gpu_clt, // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
ntt, // size_t num_texture_tiles, // number of texture tiles to process
ntt, // size_t num_texture_tiles, // number of texture tiles to process
...
@@ -2205,7 +2206,7 @@ __global__ void textures_gen(
...
@@ -2205,7 +2206,7 @@ __global__ void textures_gen(
#endif // ifdef USE_textures_gen
#endif // ifdef USE_textures_gen
extern "C"
extern "C"
__global__ void textures_accumulate(
__global__ void textures_accumulate(
int border_tile, // if 1 - watch for border
//
int border_tile, // if 1 - watch for border
int * woi, // x, y, width,height
int * woi, // x, y, width,height
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t num_texture_tiles, // number of texture tiles to process
size_t num_texture_tiles, // number of texture tiles to process
...
@@ -2484,8 +2485,9 @@ __global__ void textures_accumulate(
...
@@ -2484,8 +2485,9 @@ __global__ void textures_accumulate(
if (gpu_texture_rbg && (texture_rbg_stride != 0)) { // generate RGBA
if (gpu_texture_rbg && (texture_rbg_stride != 0)) { // generate RGBA
#ifdef DEBUG12
#ifdef DEBUG12
if ((tile_num == DBG_TILE) && (threadIdx.x == 0) && (threadIdx.y == 0)){
if ((tile_num == DBG_TILE) && (threadIdx.x == 0) && (threadIdx.y == 0)){
printf("\ntextures_accumulate accumulating tile = %d, tile_code= %d, border_tile=%d\n",
// printf("\ntextures_accumulate accumulating tile = %d, tile_code= %d, border_tile=%d\n",
tile_num, (int) tile_code, border_tile);
// tile_num, (int) tile_code, border_tile);
printf("\ntextures_accumulate accumulating tile = %d, tile_code= %d\n", tile_num, (int) tile_code);
for (int ncol = 0; ncol <= colors; ncol++) {
for (int ncol = 0; ncol <= colors; ncol++) {
printf("\ntile[%d]\n",ncol);
printf("\ntile[%d]\n",ncol);
...
@@ -2517,11 +2519,11 @@ __global__ void textures_accumulate(
...
@@ -2517,11 +2519,11 @@ __global__ void textures_accumulate(
}
}
}
}
}
}
int slice_stride = texture_rbg_stride *
*(woi + 3
) * DTT_SIZE; // offset to the next color
int slice_stride = texture_rbg_stride *
(*(woi + 3) + 1
) * DTT_SIZE; // offset to the next color
int tileY = tile_num / TILESX; // slow, but 1 per tile
int tileY = tile_num / TILESX; // slow, but 1 per tile
int tileX = tile_num - tileY * TILESX;
int tileX = tile_num - tileY * TILESX;
int tile_x0 = (tileX - *(woi + 0)) * DTT_SIZE - (DTT_SIZE/2); // may be negative == -4
int tile_x0 = (tileX - *(woi + 0)) * DTT_SIZE
; //
- (DTT_SIZE/2); // may be negative == -4
int tile_y0 = (tileY - *(woi + 1)) * DTT_SIZE - (DTT_SIZE/2); // may be negative == -4
int tile_y0 = (tileY - *(woi + 1)) * DTT_SIZE
; //
- (DTT_SIZE/2); // may be negative == -4
int height = *(woi + 3) << DTT_SIZE_LOG2;
int height = *(woi + 3) << DTT_SIZE_LOG2;
#ifdef DEBUG12
#ifdef DEBUG12
...
@@ -2556,9 +2558,9 @@ __global__ void textures_accumulate(
...
@@ -2556,9 +2558,9 @@ __global__ void textures_accumulate(
__syncthreads();// __syncwarp();
__syncthreads();// __syncwarp();
#endif // DEBUG12
#endif // DEBUG12
if (!border_tile ||
///
if (!border_tile ||
/// ((g_row >= 0) && (g_col >= 0) && (g_row < (DTT_SIZE * TILESY)) && (g_col < (DTT_SIZE * TILESX)))){
/// ((g_row >= 0) && (g_col >= 0) && (g_row < (DTT_SIZE * TILESY)) && (g_col < (DTT_SIZE * TILESX)))){
((g_row >= 0) && (g_col >= 0) && (g_row < height) && (g_col < (DTT_SIZE * TILESX)))){
///
((g_row >= 0) && (g_col >= 0) && (g_row < height) && (g_col < (DTT_SIZE * TILESX)))){
// always copy 3 (1) colors + alpha
// always copy 3 (1) colors + alpha
if (colors == 3){
if (colors == 3){
#pragma unroll
#pragma unroll
...
@@ -2571,7 +2573,7 @@ __global__ void textures_accumulate(
...
@@ -2571,7 +2573,7 @@ __global__ void textures_accumulate(
*(gpu_texture_rbg_gi + ncol * slice_stride) += *(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21));
*(gpu_texture_rbg_gi + ncol * slice_stride) += *(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21));
}
}
}
}
}
///
}
}
}
} // if (gpu_texture_rbg) { // generate RGBA
} // if (gpu_texture_rbg) { // generate RGBA
} // textures_accumulate()
} // textures_accumulate()
...
...
src/dtt8x8.cu
View file @
0253bab4
...
@@ -512,8 +512,8 @@ int main(int argc, char **argv)
...
@@ -512,8 +512,8 @@ int main(int argc, char **argv)
tile_texture_size, // int width (floats),
tile_texture_size, // int width (floats),
TILESX * TILESY); // int height);
TILESX * TILESY); // int height);
int rgba_width =
TILESX
* DTT_SIZE;
int rgba_width =
(TILESX+1)
* DTT_SIZE;
int rgba_height =
TILESY
* DTT_SIZE;
int rgba_height =
(TILESY+1)
* DTT_SIZE;
int rbga_slices = texture_colors + 1; // 4/1
int rbga_slices = texture_colors + 1; // 4/1
gpu_textures_rbga = alloc_image_gpu(
gpu_textures_rbga = alloc_image_gpu(
...
@@ -789,7 +789,7 @@ int main(int argc, char **argv)
...
@@ -789,7 +789,7 @@ int main(int argc, char **argv)
// Channel1 weight = 0.117647
// Channel1 weight = 0.117647
// Channel2 weight = 0.588235
// Channel2 weight = 0.588235
textures_accumulate<<<grid_texture,threads_texture>>> (
textures_accumulate<<<grid_texture,threads_texture>>> (
0, // int border_tile, // if 1 - watch for border
//
0, // int border_tile, // if 1 - watch for border
(int *) 0, // int * woi, // x, y, width,height
(int *) 0, // int * woi, // x, y, width,height
gpu_clt , // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
gpu_clt , // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
num_textures, // size_t num_texture_tiles, // number of texture tiles to process
num_textures, // size_t num_texture_tiles, // number of texture tiles to process
...
@@ -1008,8 +1008,10 @@ int main(int argc, char **argv)
...
@@ -1008,8 +1008,10 @@ int main(int argc, char **argv)
cudaMemcpyDeviceToHost));
cudaMemcpyDeviceToHost));
printf("WOI x=%d, y=%d, width=%d, height=%d\n", cpu_woi[0], cpu_woi[1], cpu_woi[2], cpu_woi[3]);
printf("WOI x=%d, y=%d, width=%d, height=%d\n", cpu_woi[0], cpu_woi[1], cpu_woi[2], cpu_woi[3]);
int rgba_woi_width = cpu_woi[2] * DTT_SIZE;
int rgba_woi_height = cpu_woi[3] * DTT_SIZE;
// temporarily use larger array (4 pixels each size, switch to cudaMemcpy2DFromArray()
int rgba_woi_width = (cpu_woi[2] + 1) * DTT_SIZE;
int rgba_woi_height = (cpu_woi[3] + 1)* DTT_SIZE;
int rslt_rgba_size = rgba_woi_width * rgba_woi_height * rbga_slices;
int rslt_rgba_size = rgba_woi_width * rgba_woi_height * rbga_slices;
float * cpu_textures_rgba = (float *)malloc(rslt_rgba_size * sizeof(float));
float * cpu_textures_rgba = (float *)malloc(rslt_rgba_size * sizeof(float));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment