Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
0253bab4
Commit
0253bab4
authored
5 years ago
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
simplified by using larger output array
parent
03329430
master
c11-0
lwir16
lwir16-palani
lwir16_2
separate-compilation
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
32 additions
and
28 deletions
+32
-28
TileProcessor.cuh
src/TileProcessor.cuh
+25
-23
dtt8x8.cu
src/dtt8x8.cu
+7
-5
No files found.
src/TileProcessor.cuh
View file @
0253bab4
...
...
@@ -1149,7 +1149,7 @@ extern "C" __global__ void clear_texture_rbga(
const size_t texture_rbga_stride, // in floats 8*stride
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
extern "C" __global__ void textures_accumulate(
int border_tile, // if 1 - watch for border
//
int border_tile, // if 1 - watch for border
int * woi, // x, y, width,height
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t num_texture_tiles, // number of texture tiles to process
...
...
@@ -1537,9 +1537,9 @@ __global__ void generate_RBGA(
__syncthreads();
// Zero output textures. Trim
// texture_rbga_stride
int texture_width =
*(woi + 2)
* DTT_SIZE;
int texture_tiles_height =
*(woi + 3
) * DTT_SIZE;
int texture_height = texture_tiles_height * DTT_SIZE;
int texture_width =
(*(woi + 2) + 1)
* DTT_SIZE;
int texture_tiles_height =
(*(woi + 3) + 1
) * DTT_SIZE;
///
int texture_height = texture_tiles_height * DTT_SIZE;
int texture_slices = colors + 1;
if (threadIdx.x == 0) {
...
...
@@ -1581,7 +1581,8 @@ __global__ void generate_RBGA(
#endif
/* */
textures_accumulate<<<grid_texture,threads_texture>>>(
border_tile, // int border_tile, // if 1 - watch for border
// get rid of border tile
// border_tile, // int border_tile, // if 1 - watch for border
woi, // int * woi, // x, y, width,height
gpu_clt, // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
ntt, // size_t num_texture_tiles, // number of texture tiles to process
...
...
@@ -2205,7 +2206,7 @@ __global__ void textures_gen(
#endif // ifdef USE_textures_gen
extern "C"
__global__ void textures_accumulate(
int border_tile, // if 1 - watch for border
//
int border_tile, // if 1 - watch for border
int * woi, // x, y, width,height
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t num_texture_tiles, // number of texture tiles to process
...
...
@@ -2484,8 +2485,9 @@ __global__ void textures_accumulate(
if (gpu_texture_rbg && (texture_rbg_stride != 0)) { // generate RGBA
#ifdef DEBUG12
if ((tile_num == DBG_TILE) && (threadIdx.x == 0) && (threadIdx.y == 0)){
printf("\ntextures_accumulate accumulating tile = %d, tile_code= %d, border_tile=%d\n",
tile_num, (int) tile_code, border_tile);
// printf("\ntextures_accumulate accumulating tile = %d, tile_code= %d, border_tile=%d\n",
// tile_num, (int) tile_code, border_tile);
printf("\ntextures_accumulate accumulating tile = %d, tile_code= %d\n", tile_num, (int) tile_code);
for (int ncol = 0; ncol <= colors; ncol++) {
printf("\ntile[%d]\n",ncol);
...
...
@@ -2517,11 +2519,11 @@ __global__ void textures_accumulate(
}
}
}
int slice_stride = texture_rbg_stride *
*(woi + 3
) * DTT_SIZE; // offset to the next color
int slice_stride = texture_rbg_stride *
(*(woi + 3) + 1
) * DTT_SIZE; // offset to the next color
int tileY = tile_num / TILESX; // slow, but 1 per tile
int tileX = tile_num - tileY * TILESX;
int tile_x0 = (tileX - *(woi + 0)) * DTT_SIZE - (DTT_SIZE/2); // may be negative == -4
int tile_y0 = (tileY - *(woi + 1)) * DTT_SIZE - (DTT_SIZE/2); // may be negative == -4
int tile_x0 = (tileX - *(woi + 0)) * DTT_SIZE
; //
- (DTT_SIZE/2); // may be negative == -4
int tile_y0 = (tileY - *(woi + 1)) * DTT_SIZE
; //
- (DTT_SIZE/2); // may be negative == -4
int height = *(woi + 3) << DTT_SIZE_LOG2;
#ifdef DEBUG12
...
...
@@ -2556,9 +2558,9 @@ __global__ void textures_accumulate(
__syncthreads();// __syncwarp();
#endif // DEBUG12
if (!border_tile ||
/// ((g_row >= 0) && (g_col >= 0) && (g_row < (DTT_SIZE * TILESY)) && (g_col < (DTT_SIZE * TILESX)))){
((g_row >= 0) && (g_col >= 0) && (g_row < height) && (g_col < (DTT_SIZE * TILESX)))){
///
if (!border_tile ||
/// ((g_row >= 0) && (g_col >= 0) && (g_row < (DTT_SIZE * TILESY)) && (g_col < (DTT_SIZE * TILESX)))){
///
((g_row >= 0) && (g_col >= 0) && (g_row < height) && (g_col < (DTT_SIZE * TILESX)))){
// always copy 3 (1) colors + alpha
if (colors == 3){
#pragma unroll
...
...
@@ -2571,7 +2573,7 @@ __global__ void textures_accumulate(
*(gpu_texture_rbg_gi + ncol * slice_stride) += *(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21));
}
}
}
///
}
}
} // if (gpu_texture_rbg) { // generate RGBA
} // textures_accumulate()
...
...
This diff is collapsed.
Click to expand it.
src/dtt8x8.cu
View file @
0253bab4
...
...
@@ -512,8 +512,8 @@ int main(int argc, char **argv)
tile_texture_size, // int width (floats),
TILESX * TILESY); // int height);
int rgba_width =
TILESX
* DTT_SIZE;
int rgba_height =
TILESY
* DTT_SIZE;
int rgba_width =
(TILESX+1)
* DTT_SIZE;
int rgba_height =
(TILESY+1)
* DTT_SIZE;
int rbga_slices = texture_colors + 1; // 4/1
gpu_textures_rbga = alloc_image_gpu(
...
...
@@ -789,7 +789,7 @@ int main(int argc, char **argv)
// Channel1 weight = 0.117647
// Channel2 weight = 0.588235
textures_accumulate<<<grid_texture,threads_texture>>> (
0, // int border_tile, // if 1 - watch for border
//
0, // int border_tile, // if 1 - watch for border
(int *) 0, // int * woi, // x, y, width,height
gpu_clt , // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
num_textures, // size_t num_texture_tiles, // number of texture tiles to process
...
...
@@ -1008,8 +1008,10 @@ int main(int argc, char **argv)
cudaMemcpyDeviceToHost));
printf("WOI x=%d, y=%d, width=%d, height=%d\n", cpu_woi[0], cpu_woi[1], cpu_woi[2], cpu_woi[3]);
int rgba_woi_width = cpu_woi[2] * DTT_SIZE;
int rgba_woi_height = cpu_woi[3] * DTT_SIZE;
// temporarily use larger array (4 pixels each size, switch to cudaMemcpy2DFromArray()
int rgba_woi_width = (cpu_woi[2] + 1) * DTT_SIZE;
int rgba_woi_height = (cpu_woi[3] + 1)* DTT_SIZE;
int rslt_rgba_size = rgba_woi_width * rgba_woi_height * rbga_slices;
float * cpu_textures_rgba = (float *)malloc(rslt_rgba_size * sizeof(float));
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment