Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
bbcae7a3
Commit
bbcae7a3
authored
Aug 07, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
finished removing former constants
parent
44e87f14
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
24 additions
and
18 deletions
+24
-18
TileProcessor.cuh
src/TileProcessor.cuh
+23
-17
tp_defines.h
src/tp_defines.h
+1
-1
No files found.
src/TileProcessor.cuh
View file @
bbcae7a3
...
@@ -101,13 +101,13 @@ GPU run time =523.451927ms, (direct conversion: 24.080189999999998ms, imclt: 17.
...
@@ -101,13 +101,13 @@ GPU run time =523.451927ms, (direct conversion: 24.080189999999998ms, imclt: 17.
#define KERNELS_STEP (1 << KERNELS_LSTEP)
#define KERNELS_STEP (1 << KERNELS_LSTEP)
//#define TILES-X (IMG
_
WIDTH / DTT_SIZE)
//#define TILES-X (IMG
-
WIDTH / DTT_SIZE)
#define TILESY (IMG_
HEIGHT / DTT_SIZE)
//#define TILES-Y (IMG-
HEIGHT / DTT_SIZE)
#define CONVERT_DIRECT_INDEXING_THREADS_LOG2 5
#define CONVERT_DIRECT_INDEXING_THREADS_LOG2 5
#define CONVERT_DIRECT_INDEXING_THREADS (1 << CONVERT_DIRECT_INDEXING_THREADS_LOG2) // 32
#define CONVERT_DIRECT_INDEXING_THREADS (1 << CONVERT_DIRECT_INDEXING_THREADS_LOG2) // 32
// Make TILES-YA >= TILES-X and a multiple of 4
// Make TILES-YA >= TILES-X and a multiple of 4
//#define TILES-YA ((TILESY +3) & (~3))
//#define TILES-YA ((TILES
-
Y +3) & (~3))
// increase row length by 1 so vertical passes will use different ports
// increase row length by 1 so vertical passes will use different ports
#define MCLT_UNION_LEN (DTT_SIZE2 * (DTT_SIZE2 + 2))
#define MCLT_UNION_LEN (DTT_SIZE2 * (DTT_SIZE2 + 2))
...
@@ -840,6 +840,7 @@ __global__ void mark_texture_neighbor_tiles(
...
@@ -840,6 +840,7 @@ __global__ void mark_texture_neighbor_tiles(
struct tp_task * gpu_tasks,
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task list
int num_tiles, // number of tiles in task list
int width, // number of tiles in a row
int width, // number of tiles in a row
int height, // number of tiles rows
int * gpu_texture_indices, // packed tile + bits (now only (1 << 7)
int * gpu_texture_indices, // packed tile + bits (now only (1 << 7)
int * woi); // x,y,width,height of the woi
int * woi); // x,y,width,height of the woi
...
@@ -1301,7 +1302,7 @@ extern "C" __global__ void generate_RBGA(
...
@@ -1301,7 +1302,7 @@ extern "C" __global__ void generate_RBGA(
float diff_sigma = params[2]; // pixel value/pixel change
float diff_sigma = params[2]; // pixel value/pixel change
float diff_threshold = params[3]; // pixel value/pixel change
float diff_threshold = params[3]; // pixel value/pixel change
float min_agree = params[4]; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float min_agree = params[4]; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
int tilesya = ((height +3) & (~3)); //#define TILES-YA ((TILESY +3) & (~3))
int tilesya = ((height +3) & (~3)); //#define TILES-YA ((TILES
-
Y +3) & (~3))
dim3 threads0((1 << THREADS_DYNAMIC_BITS), 1, 1);
dim3 threads0((1 << THREADS_DYNAMIC_BITS), 1, 1);
int blocks_x = (width + ((1 << THREADS_DYNAMIC_BITS) - 1)) >> THREADS_DYNAMIC_BITS;
int blocks_x = (width + ((1 << THREADS_DYNAMIC_BITS) - 1)) >> THREADS_DYNAMIC_BITS;
dim3 blocks0 (blocks_x, height, 1);
dim3 blocks0 (blocks_x, height, 1);
...
@@ -1331,6 +1332,7 @@ extern "C" __global__ void generate_RBGA(
...
@@ -1331,6 +1332,7 @@ extern "C" __global__ void generate_RBGA(
gpu_tasks,
gpu_tasks,
num_tiles, // number of tiles in task list
num_tiles, // number of tiles in task list
width, // number of tiles in a row
width, // number of tiles in a row
height, // number of tiles rows
gpu_texture_indices, // packed tile + bits (now only (1 << 7)
gpu_texture_indices, // packed tile + bits (now only (1 << 7)
woi); // min_x, min_y, max_x, max_y
woi); // min_x, min_y, max_x, max_y
...
@@ -1512,8 +1514,9 @@ __global__ void prepare_texture_list(
...
@@ -1512,8 +1514,9 @@ __global__ void prepare_texture_list(
gpu_tasks,
gpu_tasks,
num_tiles, // number of tiles in task list
num_tiles, // number of tiles in task list
width, // number of tiles in a row
width, // number of tiles in a row
height, // number of tiles rows
gpu_texture_indices, // packed tile + bits (now only (1 << 7)
gpu_texture_indices, // packed tile + bits (now only (1 << 7)
woi); // min_x, min_y, max_x, max_y
woi);
// min_x, min_y, max_x, max_y
cudaDeviceSynchronize();
cudaDeviceSynchronize();
// Generate tile indices list, upper 24 bits - tile index, lower 4 bits: n/e/s/w neighbors, bit 7 - set to 1
// Generate tile indices list, upper 24 bits - tile index, lower 4 bits: n/e/s/w neighbors, bit 7 - set to 1
*(num_texture_tiles+0) = 0;
*(num_texture_tiles+0) = 0;
...
@@ -1598,6 +1601,8 @@ __global__ void mark_texture_tiles(
...
@@ -1598,6 +1601,8 @@ __global__ void mark_texture_tiles(
*
*
* @param gpu_tasks array of per-tile tasks (struct tp_task)
* @param gpu_tasks array of per-tile tasks (struct tp_task)
* @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param width number of tiles in a row
* @param height number of tiles rows
* @param gpu_texture_indices allocated array - 1 integer per tile to process
* @param gpu_texture_indices allocated array - 1 integer per tile to process
* @param woi 4-element int array ( x,y,width,height of the woi, in tiles)
* @param woi 4-element int array ( x,y,width,height of the woi, in tiles)
*/
*/
...
@@ -1606,6 +1611,7 @@ __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__?
...
@@ -1606,6 +1611,7 @@ __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__?
struct tp_task * gpu_tasks,
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task list
int num_tiles, // number of tiles in task list
int width, // number of tiles in a row
int width, // number of tiles in a row
int height, // number of tiles rows
int * gpu_texture_indices, // packed tile + bits (now only (1 << 7)
int * gpu_texture_indices, // packed tile + bits (now only (1 << 7)
int * woi) // x,y,width,height of the woi
int * woi) // x,y,width,height of the woi
...
@@ -1628,12 +1634,12 @@ __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__?
...
@@ -1628,12 +1634,12 @@ __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__?
int d = 0;
int d = 0;
// if ((y > 0) && *(gpu_texture_indices + x + (y - 1) * TILES-X)) d |= (1 << TASK_TEXTURE_N_BIT);
// if ((y > 0) && *(gpu_texture_indices + x + (y - 1) * TILES-X)) d |= (1 << TASK_TEXTURE_N_BIT);
// if ((x < (TILES-X - 1)) && *(gpu_texture_indices + (x + 1) + y * TILES-X)) d |= (1 << TASK_TEXTURE_E_BIT);
// if ((x < (TILES-X - 1)) && *(gpu_texture_indices + (x + 1) + y * TILES-X)) d |= (1 << TASK_TEXTURE_E_BIT);
// if ((y < (TILESY - 1)) && *(gpu_texture_indices + x + (y + 1) * TILES-X)) d |= (1 << TASK_TEXTURE_S_BIT);
// if ((y < (TILES
-
Y - 1)) && *(gpu_texture_indices + x + (y + 1) * TILES-X)) d |= (1 << TASK_TEXTURE_S_BIT);
// if ((x > 0) && *(gpu_texture_indices + (x - 1) + y * TILES-X)) d |= (1 << TASK_TEXTURE_W_BIT);
// if ((x > 0) && *(gpu_texture_indices + (x - 1) + y * TILES-X)) d |= (1 << TASK_TEXTURE_W_BIT);
if ((y > 0) && *(gpu_texture_indices + x + (y - 1) * width)) d |= (1 << TASK_TEXTURE_N_BIT);
if ((y > 0) && *(gpu_texture_indices + x + (y - 1) * width)) d |= (1 << TASK_TEXTURE_N_BIT);
if ((x < (width - 1)) && *(gpu_texture_indices + (x + 1) + y * width)) d |= (1 << TASK_TEXTURE_E_BIT);
if ((x < (width - 1)) && *(gpu_texture_indices + (x + 1) + y * width)) d |= (1 << TASK_TEXTURE_E_BIT);
if ((y < (
TILESY
- 1)) && *(gpu_texture_indices + x + (y + 1) * width)) d |= (1 << TASK_TEXTURE_S_BIT);
if ((y < (
height
- 1)) && *(gpu_texture_indices + x + (y + 1) * width)) d |= (1 << TASK_TEXTURE_S_BIT);
if ((x > 0) && *(gpu_texture_indices + (x - 1) + y * width)) d |= (1 << TASK_TEXTURE_W_BIT);
if ((x > 0) && *(gpu_texture_indices + (x - 1) + y * width)) d |= (1 << TASK_TEXTURE_W_BIT);
gpu_tasks[task_num].task = ((task ^ d) & TASK_TEXTURE_BITS) ^ task;
gpu_tasks[task_num].task = ((task ^ d) & TASK_TEXTURE_BITS) ^ task;
}
}
...
@@ -1661,7 +1667,7 @@ __global__ void gen_texture_list(
...
@@ -1661,7 +1667,7 @@ __global__ void gen_texture_list(
int * woi) // min_x, min_y, max_x, max_y input
int * woi) // min_x, min_y, max_x, max_y input
{
{
int tilesya = ((height +3) & (~3)); //#define TILES-YA ((TILESY +3) & (~3))
int tilesya = ((height +3) & (~3)); //#define TILES-YA ((TILES
-
Y +3) & (~3))
int task_num = blockDim.x * blockIdx.x + threadIdx.x;
int task_num = blockDim.x * blockIdx.x + threadIdx.x;
if (task_num >= num_tiles) {
if (task_num >= num_tiles) {
return; // nothing to do
return; // nothing to do
...
@@ -1832,8 +1838,8 @@ __global__ void index_correlate(
...
@@ -1832,8 +1838,8 @@ __global__ void index_correlate(
* @param dstride stride (in floats) for the input Bayer images
* @param dstride stride (in floats) for the input Bayer images
* @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param lpf_mask apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
* @param lpf_mask apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
* @param woi_width image width (was constant IMG
_
WIDTH, now variable to use with EO+LWIR
* @param woi_width image width (was constant IMG
-
WIDTH, now variable to use with EO+LWIR
* @param woi_height image height (was constant IMG
_
HEIGHT, now variable to use with EO+LWIR
* @param woi_height image height (was constant IMG
-
HEIGHT, now variable to use with EO+LWIR
* @param kernels_hor number of deconvolution kernels per image width
* @param kernels_hor number of deconvolution kernels per image width
* @param kernels_vert number of deconvolution kernels per image height
* @param kernels_vert number of deconvolution kernels per image height
* @param gpu_active_tiles pointer to the calculated list of tiles
* @param gpu_active_tiles pointer to the calculated list of tiles
...
@@ -1900,8 +1906,8 @@ extern "C" __global__ void convert_direct( // called with a single block, singl
...
@@ -1900,8 +1906,8 @@ extern "C" __global__ void convert_direct( // called with a single block, singl
* @param gpu_clt output array of per-camera aberration-corrected transform-domain image representations
* @param gpu_clt output array of per-camera aberration-corrected transform-domain image representations
* @param dstride stride (in floats) for the input Bayer images
* @param dstride stride (in floats) for the input Bayer images
* @param lpf_mask apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
* @param lpf_mask apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
* @param woi_width image width (was constant IMG
_
WIDTH, now variable to use with EO+LWIR
* @param woi_width image width (was constant IMG
-
WIDTH, now variable to use with EO+LWIR
* @param woi_height image height (was constant IMG
_
HEIGHT, now variable to use with EO+LWIR
* @param woi_height image height (was constant IMG
-
HEIGHT, now variable to use with EO+LWIR
* @param kernels_hor number of deconvolution kernels per image width
* @param kernels_hor number of deconvolution kernels per image width
* @param kernels_vert number of deconvolution kernels per image height
* @param kernels_vert number of deconvolution kernels per image height
*/
*/
...
@@ -2531,7 +2537,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -2531,7 +2537,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
#endif // DEBUG12
#endif // DEBUG12
/// if (!border_tile ||
/// if (!border_tile ||
/// ((g_row >= 0) && (g_col >= 0) && (g_row < (DTT_SIZE * TILESY)) && (g_col < (DTT_SIZE * TILES-X)))){
/// ((g_row >= 0) && (g_col >= 0) && (g_row < (DTT_SIZE * TILES
-
Y)) && (g_col < (DTT_SIZE * TILES-X)))){
/// ((g_row >= 0) && (g_col >= 0) && (g_row < height) && (g_col < (DTT_SIZE * TILES-X)))){
/// ((g_row >= 0) && (g_col >= 0) && (g_row < height) && (g_col < (DTT_SIZE * TILES-X)))){
// always copy 3 (1) colors + alpha
// always copy 3 (1) colors + alpha
if (colors == 3){
if (colors == 3){
...
@@ -3101,8 +3107,8 @@ __device__ void normalizeTileAmplitude(
...
@@ -3101,8 +3107,8 @@ __device__ void normalizeTileAmplitude(
* @param window_hor_cos array in shared memory for window horizontal cosine [2*DTT_SIZE]
* @param window_hor_cos array in shared memory for window horizontal cosine [2*DTT_SIZE]
* @param window_hor_sin array in shared memory for window horizontal cosine [2*DTT_SIZE]
* @param window_hor_sin array in shared memory for window horizontal cosine [2*DTT_SIZE]
* @param window_vert_cos array in shared memory for window horizontal cosine [2*DTT_SIZE]
* @param window_vert_cos array in shared memory for window horizontal cosine [2*DTT_SIZE]
* @param woi_width image width (was constant IMG
_
WIDTH, now variable to use with EO+LWIR
* @param woi_width image width (was constant IMG
-
WIDTH, now variable to use with EO+LWIR
* @param woi_height image height (was constant IMG
_
HEIGHT, now variable to use with EO+LWIR
* @param woi_height image height (was constant IMG
-
HEIGHT, now variable to use with EO+LWIR
* @param kernels_hor number of deconvolution kernels per image width
* @param kernels_hor number of deconvolution kernels per image width
* @param kernels_vert number of deconvolution kernels per image height
* @param kernels_vert number of deconvolution kernels per image height
*/
*/
...
@@ -3132,8 +3138,8 @@ __device__ void convertCorrectTile(
...
@@ -3132,8 +3138,8 @@ __device__ void convertCorrectTile(
{
{
// int tilesx = TILES-X;
// int tilesx = TILES-X;
// TODO: pass these values instead of constants to handle EO/LWIR
// TODO: pass these values instead of constants to handle EO/LWIR
int max_px = woi_width - 1; // IMG
_
WIDTH - 1; // odd
int max_px = woi_width - 1; // IMG
-
WIDTH - 1; // odd
int max_py = woi_height - 1; // IMG
_
HEIGHT - 1; // odd
int max_py = woi_height - 1; // IMG
-
HEIGHT - 1; // odd
int max_pxm1 = max_px - 1; // even
int max_pxm1 = max_px - 1; // even
int max_pym1 = max_py - 1; // even
int max_pym1 = max_py - 1; // even
int max_kernel_hor = kernels_hor - 1; // KERNELS_HOR -1;
int max_kernel_hor = kernels_hor - 1; // KERNELS_HOR -1;
...
...
src/tp_defines.h
View file @
bbcae7a3
...
@@ -79,7 +79,7 @@
...
@@ -79,7 +79,7 @@
// only used in C++ test
// only used in C++ test
#define TILESX (IMG_WIDTH / DTT_SIZE)
#define TILESX (IMG_WIDTH / DTT_SIZE)
//
#define TILESY (IMG_HEIGHT / DTT_SIZE)
#define TILESY (IMG_HEIGHT / DTT_SIZE)
#define TILESYA ((TILESY +3) & (~3))
#define TILESYA ((TILESY +3) & (~3))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment