Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
70ff9492
Commit
70ff9492
authored
Apr 20, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
consolidated 5 float inputs to an array of 5 floats to solve CUDA_ERROR_INVALID_PTX
parent
fcd21d6e
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
94 additions
and
50 deletions
+94
-50
TileProcessor.cuh
src/TileProcessor.cuh
+34
-15
TileProcessor.h
src/TileProcessor.h
+37
-28
test_tp.cu
src/test_tp.cu
+23
-7
No files found.
src/TileProcessor.cuh
View file @
70ff9492
...
@@ -1217,8 +1217,7 @@ extern "C" __global__ void correlate2D_inner(
...
@@ -1217,8 +1217,7 @@ extern "C" __global__ void correlate2D_inner(
}
}
#define USE_CDP
#define USE_CDP
#ifdef USE_CDP
#ifdef USE_CDP
extern "C"
extern "C" __global__ void generate_RBGA(
__global__ void generate_RBGA(
// Parameters to generate texture tasks
// Parameters to generate texture tasks
struct tp_task * gpu_tasks,
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task list
int num_tiles, // number of tiles in task list
...
@@ -1232,26 +1231,40 @@ __global__ void generate_RBGA(
...
@@ -1232,26 +1231,40 @@ __global__ void generate_RBGA(
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY !
// TODO: use geometry_correction rXY !
struct gc * gpu_geometry_correction,
struct gc * gpu_geometry_correction,
// float * gpu_geometry_correction,
// float * gpu_port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate
int colors, // number of colors (3/1)
int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction
int is_lwir, // do not perform shot correction
float params[5],
/*
float min_shot, // 10.0
float min_shot, // 10.0
float scale_shot, // 3.0
float scale_shot, // 3.0
float diff_sigma, // pixel value/pixel change
float diff_sigma, // pixel value/pixel change
float diff_threshold, // pixel value/pixel change
float diff_threshold, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
*/
float weights[3], // scale for R,B,G
float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average
int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed)
int keep_weights, // return channel weights after A in RGBA (was removed)
const size_t texture_rbga_stride, // in floats
const size_t texture_rbga_stride, // in floats
float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
// float aaaa)
// float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
/*
const size_t texture_rbga_stride, // in floats
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
float aaaa)
*/
{
{
float min_shot = params[0]; // 10.0
float scale_shot = params[1]; // 3.0
float diff_sigma = params[2]; // pixel value/pixel change
float diff_threshold = params[3]; // pixel value/pixel change
float min_agree = params[4]; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
// TODO use atomic_add to increment num_texture_tiles
// TODO use atomic_add to increment num_texture_tiles
// TODO calculate woi
// TODO calculate woi
dim3 threads0((1 << THREADS_DYNAMIC_BITS), 1, 1);
dim3 threads0((1 << THREADS_DYNAMIC_BITS), 1, 1);
int blocks_x = (width + ((1 << THREADS_DYNAMIC_BITS) - 1)) >> THREADS_DYNAMIC_BITS;
int blocks_x = (width + ((1 << THREADS_DYNAMIC_BITS) - 1)) >> THREADS_DYNAMIC_BITS;
dim3 blocks0 (blocks_x, height, 1);
dim3 blocks0 (blocks_x, height, 1);
...
@@ -1380,8 +1393,7 @@ __global__ void generate_RBGA(
...
@@ -1380,8 +1393,7 @@ __global__ void generate_RBGA(
gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
0, // size_t texture_stride, // in floats (now 256*4 = 1024)
0, // size_t texture_stride, // in floats (now 256*4 = 1024)
gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
(float *)0);//gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
// gpu_diff_rgb_combo + ti_offset * NUM_CAMS*(colors+1)); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
cudaDeviceSynchronize(); // not needed yet, just for testing
cudaDeviceSynchronize(); // not needed yet, just for testing
/* */
/* */
...
@@ -1848,11 +1860,12 @@ extern "C" __global__ void textures_nonoverlap(
...
@@ -1848,11 +1860,12 @@ extern "C" __global__ void textures_nonoverlap(
struct gc * gpu_geometry_correction,
struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1)
int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction
int is_lwir, // do not perform shot correction
float min_shot, // 10.0
float params[5],
float scale_shot, // 3.0
// float min_shot, // 10.0
float diff_sigma, // pixel value/pixel change
// float scale_shot, // 3.0
float diff_threshold, // pixel value/pixel change
// float diff_sigma, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
// float diff_threshold, // pixel value/pixel change
// float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weights[3], // scale for R,B,G
float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average
int dust_remove, // Do not reduce average weight when only one image differs much from the average
// int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
// int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
...
@@ -1861,6 +1874,12 @@ extern "C" __global__ void textures_nonoverlap(
...
@@ -1861,6 +1874,12 @@ extern "C" __global__ void textures_nonoverlap(
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
{
{
float min_shot = params[0]; // 10.0
float scale_shot = params[1]; // 3.0
float diff_sigma = params[2]; // pixel value/pixel change
float diff_threshold = params[3]; // pixel value/pixel change
float min_agree = params[4]; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
dim3 threads0(CONVERT_DIRECT_INDEXING_THREADS, 1, 1);
dim3 threads0(CONVERT_DIRECT_INDEXING_THREADS, 1, 1);
dim3 blocks0 ((num_tiles + CONVERT_DIRECT_INDEXING_THREADS -1) >> CONVERT_DIRECT_INDEXING_THREADS_LOG2,1, 1);
dim3 blocks0 ((num_tiles + CONVERT_DIRECT_INDEXING_THREADS -1) >> CONVERT_DIRECT_INDEXING_THREADS_LOG2,1, 1);
...
...
src/TileProcessor.h
View file @
70ff9492
...
@@ -86,11 +86,12 @@ extern "C" __global__ void textures_nonoverlap(
...
@@ -86,11 +86,12 @@ extern "C" __global__ void textures_nonoverlap(
struct
gc
*
gpu_geometry_correction
,
struct
gc
*
gpu_geometry_correction
,
int
colors
,
// number of colors (3/1)
int
colors
,
// number of colors (3/1)
int
is_lwir
,
// do not perform shot correction
int
is_lwir
,
// do not perform shot correction
float
min_shot
,
// 10.0
float
params
[
5
],
float
scale_shot
,
// 3.0
// float min_shot, // 10.0
float
diff_sigma
,
// pixel value/pixel change
// float scale_shot, // 3.0
float
diff_threshold
,
// pixel value/pixel change
// float diff_sigma, // pixel value/pixel change
float
min_agree
,
// minimal number of channels to agree on a point (real number to work with fuzzy averages)
// float diff_threshold, // pixel value/pixel change
// float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float
weights
[
3
],
// scale for R,B,G
float
weights
[
3
],
// scale for R,B,G
int
dust_remove
,
// Do not reduce average weight when only one image differs much from the average
int
dust_remove
,
// Do not reduce average weight when only one image differs much from the average
// int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
// int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
...
@@ -120,27 +121,35 @@ extern "C" __global__ void imclt_rbg(
...
@@ -120,27 +121,35 @@ extern "C" __global__ void imclt_rbg(
int
woi_twidth
,
int
woi_twidth
,
int
woi_theight
,
int
woi_theight
,
const
size_t
dstride
);
// in floats (pixels)
const
size_t
dstride
);
// in floats (pixels)
extern
"C"
__global__
void
textures_accumulate
(
// (8,4,1) (N,1,1)
int
*
woi
,
// x, y, width,height
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t
num_texture_tiles
,
// number of texture tiles to process
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
// TODO: use geometry_correction rXY !
struct
gc
*
gpu_geometry_correction
,
int
colors
,
// number of colors (3/1)
int
is_lwir
,
// do not perform shot correction
float
min_shot
,
// 10.0
float
scale_shot
,
// 3.0
float
diff_sigma
,
// pixel value/pixel change
float
diff_threshold
,
// pixel value/pixel change
float
min_agree
,
// minimal number of channels to agree on a point (real number to work with fuzzy averages)
float
weights
[
3
],
// scale for R,B,G
int
dust_remove
,
// Do not reduce average weight when only one image differs much from the average
int
keep_weights
,
// return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
// combining both non-overlap and overlap (each calculated if pointer is not null )
size_t
texture_rbg_stride
,
// in floats
float
*
gpu_texture_rbg
,
// (number of colors +1 + ?)*16*16 rgba texture tiles
size_t
texture_stride
,
// in floats (now 256*4 = 1024)
float
*
gpu_texture_tiles
,
// (number of colors +1 + ?)*16*16 rgba texture tiles
float
*
gpu_diff_rgb_combo
);
// diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
/*
extern "C" __global__ void generate_RBGA(
// Parameters to generate texture tasks
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task list
// declare arrays in device code?
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
int * num_texture_tiles, // number of texture tiles to process (8 separate elements for accumulation)
int * woi, // x,y,width,height of the woi
int width, // <= TILESX, use for faster processing of LWIR images (should be actual + 1)
int height, // <= TILESY, use for faster processing of LWIR images
// Parameters for the texture generation
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY !
struct gc * gpu_geometry_correction,
// float * gpu_geometry_correction,
// float * gpu_port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate
int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction
float min_shot, // 10.0
float scale_shot, // 3.0
float diff_sigma, // pixel value/pixel change
float diff_threshold, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed)
const size_t texture_rbga_stride, // in floats
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
*/
src/test_tp.cu
View file @
70ff9492
...
@@ -348,6 +348,7 @@ int main(int argc, char **argv)
...
@@ -348,6 +348,7 @@ int main(int argc, char **argv)
int * gpu_num_texture_tiles;
int * gpu_num_texture_tiles;
float * gpu_port_offsets;
float * gpu_port_offsets;
float * gpu_color_weights;
float * gpu_color_weights;
float * gpu_generate_RBGA_params;
int num_corrs;
int num_corrs;
int num_textures;
int num_textures;
int num_ports = NUM_CAMS;
int num_ports = NUM_CAMS;
...
@@ -562,9 +563,17 @@ int main(int argc, char **argv)
...
@@ -562,9 +563,17 @@ int main(int argc, char **argv)
0.294118, // float weight0, // scale for R
0.294118, // float weight0, // scale for R
0.117647, // float weight1, // scale for B
0.117647, // float weight1, // scale for B
0.588235}; // float weight2, // scale for G
0.588235}; // float weight2, // scale for G
float generate_RBGA_params[]={
10.0, // float min_shot, // 10.0
3.0, // float scale_shot, // 3.0
1.5f, // float diff_sigma, // pixel value/pixel change
10.0f, // float diff_threshold, // pixel value/pixel change
3.0 // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
};
gpu_port_offsets = (float *) copyalloc_kernel_gpu((float * ) port_offsets, num_ports * 2);
gpu_port_offsets = (float *) copyalloc_kernel_gpu((float * ) port_offsets, num_ports * 2);
gpu_color_weights = (float *) copyalloc_kernel_gpu((float * ) color_weights, sizeof(color_weights));
gpu_color_weights = (float *) copyalloc_kernel_gpu((float * ) color_weights, sizeof(color_weights));
gpu_generate_RBGA_params = (float *) copyalloc_kernel_gpu((float * ) generate_RBGA_params, sizeof(generate_RBGA_params));
...
@@ -1088,11 +1097,14 @@ int main(int argc, char **argv)
...
@@ -1088,11 +1097,14 @@ int main(int argc, char **argv)
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
texture_colors, // int colors, // number of colors (3/1)
texture_colors, // int colors, // number of colors (3/1)
(texture_colors == 1), // int is_lwir, // do not perform shot correction
(texture_colors == 1), // int is_lwir, // do not perform shot correction
10.0, // float min_shot, // 10.0
gpu_generate_RBGA_params,
3.0, // float scale_shot, // 3.0
/*
1.5f, // float diff_sigma, // pixel value/pixel change
10.0, // float min_shot, // 10.0
10.0f, // float diff_threshold, // pixel value/pixel change
3.0, // float scale_shot, // 3.0
3.0, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
1.5f, // float diff_sigma, // pixel value/pixel change
10.0f, // float diff_threshold, // pixel value/pixel change
3.0, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
*/
gpu_color_weights, // float weights[3], // scale for R
gpu_color_weights, // float weights[3], // scale for R
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
// combining both non-overlap and overlap (each calculated if pointer is not null )
// combining both non-overlap and overlap (each calculated if pointer is not null )
...
@@ -1267,11 +1279,14 @@ int main(int argc, char **argv)
...
@@ -1267,11 +1279,14 @@ int main(int argc, char **argv)
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
texture_colors, // int colors, // number of colors (3/1)
texture_colors, // int colors, // number of colors (3/1)
(texture_colors == 1), // int is_lwir, // do not perform shot correction
(texture_colors == 1), // int is_lwir, // do not perform shot correction
gpu_generate_RBGA_params,
/*
10.0, // float min_shot, // 10.0
10.0, // float min_shot, // 10.0
3.0, // float scale_shot, // 3.0
3.0, // float scale_shot, // 3.0
1.5f, // float diff_sigma, // pixel value/pixel change
1.5f, // float diff_sigma, // pixel value/pixel change
10.0f, // float diff_threshold, // pixel value/pixel change
10.0f, // float diff_threshold, // pixel value/pixel change
3.0, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
3.0, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
*/
gpu_color_weights, // float weights[3], // scale for R
gpu_color_weights, // float weights[3], // scale for R
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
0, // int keep_weights, // return channel weights after A in RGBA
0, // int keep_weights, // return channel weights after A in RGBA
...
@@ -1366,6 +1381,7 @@ int main(int argc, char **argv)
...
@@ -1366,6 +1381,7 @@ int main(int argc, char **argv)
checkCudaErrors(cudaFree(gpu_texture_indices));
checkCudaErrors(cudaFree(gpu_texture_indices));
checkCudaErrors(cudaFree(gpu_port_offsets));
checkCudaErrors(cudaFree(gpu_port_offsets));
checkCudaErrors(cudaFree(gpu_color_weights));
checkCudaErrors(cudaFree(gpu_color_weights));
checkCudaErrors(cudaFree(gpu_generate_RBGA_params));
checkCudaErrors(cudaFree(gpu_textures));
checkCudaErrors(cudaFree(gpu_textures));
checkCudaErrors(cudaFree(gpu_textures_rbga));
checkCudaErrors(cudaFree(gpu_textures_rbga));
checkCudaErrors(cudaFree(gpu_diff_rgb_combo));
checkCudaErrors(cudaFree(gpu_diff_rgb_combo));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment