Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
f9641f6c
Commit
f9641f6c
authored
Dec 08, 2021
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Tested nonoverlap textures with 16xmono, without Dynamic Parallelism
parent
29147908
Changes
3
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
146 additions
and
136 deletions
+146
-136
TileProcessor.cuh
src/TileProcessor.cuh
+92
-123
test_tp.cu
src/test_tp.cu
+50
-9
tp_defines.h
src/tp_defines.h
+4
-4
No files found.
src/TileProcessor.cuh
View file @
f9641f6c
This diff is collapsed.
Click to expand it.
src/test_tp.cu
View file @
f9641f6c
...
@@ -861,7 +861,8 @@ int main(int argc, char **argv)
...
@@ -861,7 +861,8 @@ int main(int argc, char **argv)
gpu_generate_RBGA_params = (float *) copyalloc_kernel_gpu((float * ) generate_RBGA_params, sizeof(generate_RBGA_params));
gpu_generate_RBGA_params = (float *) copyalloc_kernel_gpu((float * ) generate_RBGA_params, sizeof(generate_RBGA_params));
/// int tile_texture_size = (texture_colors + 1 + (keep_texture_weights? (NUM_CAMS + texture_colors + 1): 0)) *256;
/// int tile_texture_size = (texture_colors + 1 + (keep_texture_weights? (NUM_CAMS + texture_colors + 1): 0)) *256;
int tile_texture_size = (texture_colors + 1 + (keep_texture_weights? (num_cams + texture_colors + 1): 0)) *256;
int tile_texture_layers = (texture_colors + 1 + (keep_texture_weights? (num_cams + texture_colors + 1): 0));
int tile_texture_size = tile_texture_layers *256;
gpu_textures = alloc_image_gpu(
gpu_textures = alloc_image_gpu(
&dstride_textures, // in bytes ! for one rgba/ya 16x16 tile
&dstride_textures, // in bytes ! for one rgba/ya 16x16 tile
...
@@ -1475,7 +1476,7 @@ int main(int argc, char **argv)
...
@@ -1475,7 +1476,7 @@ int main(int argc, char **argv)
dim3 threads0(CONVERT_DIRECT_INDEXING_THREADS, 1, 1);
dim3 threads0(CONVERT_DIRECT_INDEXING_THREADS, 1, 1);
dim3 blocks0 ((tp_task_size + CONVERT_DIRECT_INDEXING_THREADS -1) >> CONVERT_DIRECT_INDEXING_THREADS_LOG2,1, 1);
dim3 blocks0 ((tp_task_size + CONVERT_DIRECT_INDEXING_THREADS -1) >> CONVERT_DIRECT_INDEXING_THREADS_LOG2,1, 1);
int linescan_order = 1; // output low-res in linescan order, 0 - in gpu_texture_indices order
printf("threads0=(%d, %d, %d)\n",threads0.x,threads0.y,threads0.z);
printf("threads0=(%d, %d, %d)\n",threads0.x,threads0.y,threads0.z);
printf("blocks0=(%d, %d, %d)\n",blocks0.x,blocks0.y,blocks0.z);
printf("blocks0=(%d, %d, %d)\n",blocks0.x,blocks0.y,blocks0.z);
int cpu_pnum_texture_tiles = 0;
int cpu_pnum_texture_tiles = 0;
...
@@ -1549,12 +1550,13 @@ int main(int argc, char **argv)
...
@@ -1549,12 +1550,13 @@ int main(int argc, char **argv)
generate_RBGA_params[4], // min_agree, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
generate_RBGA_params[4], // min_agree, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
gpu_color_weights, // float weights[3], // scale for R,B,G
gpu_color_weights, // float weights[3], // scale for R,B,G
1, // dust_remove, // int dust_remove, // Do not reduce average weight when only one image differs much from the average
1, // dust_remove, // int dust_remove, // Do not reduce average weight when only one image differs much from the average
1, // 0,
// int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
keep_texture_weights, // 0, // 1
// int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
// combining both non-overlap and overlap (each calculated if pointer is not null )
// combining both non-overlap and overlap (each calculated if pointer is not null )
0, // size_t texture_rbg_stride, // in floats
0, // size_t texture_rbg_stride, // in floats
(float *) 0, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
(float *) 0, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
0, // texture_stride, // size_t texture_stride, // in floats (now 256*4 = 1024)
dstride_textures /sizeof(float), // texture_stride, // size_t texture_stride, // in floats (now 256*4 = 1024)
(float *) 0, // gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_textures, // (float *) 0, // gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
linescan_order, // int linescan_order, // if !=0 then output gpu_diff_rgb_combo in linescan order, else - in gpu_texture_indices order
gpu_diff_rgb_combo, //); // float * gpu_diff_rgb_combo) // diff[num_cams], R[num_cams], B[num_cams],G[num_cams]
gpu_diff_rgb_combo, //); // float * gpu_diff_rgb_combo) // diff[num_cams], R[num_cams], B[num_cams],G[num_cams]
TILESX);
TILESX);
getLastCudaError("Kernel failure");
getLastCudaError("Kernel failure");
...
@@ -1568,9 +1570,14 @@ int main(int argc, char **argv)
...
@@ -1568,9 +1570,14 @@ int main(int argc, char **argv)
printf("Average Texture run time =%f ms\n", avgTimeTEXTURES);
printf("Average Texture run time =%f ms\n", avgTimeTEXTURES);
int rslt_texture_size = num_textures * tile_texture_size;
int rslt_texture_size = num_textures * tile_texture_size;
float * cpu_textures = (float *)malloc(rslt_texture_size * sizeof(float));
checkCudaErrors(cudaMemcpy(
(float * ) texture_indices,
gpu_texture_indices,
cpu_pnum_texture_tiles * sizeof(float),
cudaMemcpyDeviceToHost));
checkCudaErrors(cudaMemcpy2D( // something wrong with size
float * cpu_textures = (float *)malloc(rslt_texture_size * sizeof(float));
checkCudaErrors(cudaMemcpy2D(
cpu_textures,
cpu_textures,
tile_texture_size * sizeof(float),
tile_texture_size * sizeof(float),
gpu_textures,
gpu_textures,
...
@@ -1578,6 +1585,33 @@ int main(int argc, char **argv)
...
@@ -1578,6 +1585,33 @@ int main(int argc, char **argv)
tile_texture_size * sizeof(float),
tile_texture_size * sizeof(float),
num_textures,
num_textures,
cudaMemcpyDeviceToHost));
cudaMemcpyDeviceToHost));
// float non_overlap_layers [tile_texture_layers][TILESY*16][TILESX*16];
int num_nonoverlap_pixels = tile_texture_layers * TILESY*16 * TILESX*16;
float * non_overlap_layers = (float *)malloc(num_nonoverlap_pixels* sizeof(float));
for (int i = 0; i < num_nonoverlap_pixels; i++){
non_overlap_layers[i] = NAN;
}
for (int itile = 0; itile < cpu_pnum_texture_tiles; itile++) { // if (texture_indices[itile] & ((1 << LIST_TEXTURE_BIT))){
int ntile = texture_indices[itile] >> CORR_NTILE_SHIFT;
int tileX = ntile % TILESX;
int tileY = ntile / TILESX;
for (int ilayer = 0; ilayer < tile_texture_layers; ilayer++){
int src_index0 = itile * tile_texture_size + 256 * ilayer;
int dst_index0 = ilayer * (TILESX * TILESYA * 256) + (tileY * 16) * (16 * TILESX) + (tileX * 16);
for (int iy = 0; iy < 16; iy++){
int src_index1 = src_index0 + 16 * iy;
int dst_index1 = dst_index0 + iy * (16 * TILESX);
for (int ix = 0; ix < 16; ix++){
// int src_index = src_index1 + ix;
// int dst_index = dst_index1 + ix;
int src_index= itile * tile_texture_size + 256 * ilayer + 16 * iy + ix;
int dst_index = ilayer * (TILESX * TILESYA * 256) + (tileY * 16 + iy) * (16 * TILESX) + (tileX * 16) + ix;
non_overlap_layers[dst_index] = cpu_textures[src_index];
}
}
}
}
int ntiles = TILESX * TILESY;
int ntiles = TILESX * TILESY;
int nlayers = num_cams * (num_colors + 1);
int nlayers = num_cams * (num_colors + 1);
...
@@ -1605,11 +1639,18 @@ int main(int argc, char **argv)
...
@@ -1605,11 +1639,18 @@ int main(int argc, char **argv)
rslt_texture_size, // int size, // length in elements
rslt_texture_size, // int size, // length in elements
result_textures_file); // const char * path) // file path
result_textures_file); // const char * path) // file path
*/
*/
writeFloatsToFile(
non_overlap_layers, // float * data, // allocated array
rslt_texture_size, // int size, // length in elements
result_textures_file); // const char * path) // file path
/*
* non_overlap_layers
writeFloatsToFile(
writeFloatsToFile(
cpu_diff_rgb_combo, // cpu_diff_rgb_combo, // float * data, // allocated array
cpu_diff_rgb_combo, // cpu_diff_rgb_combo, // float * data, // allocated array
diff_rgb_combo_size, // int size, // length in elements
diff_rgb_combo_size, // int size, // length in elements
result_textures_file); // const char * path) // file path
result_textures_file); // const char * path) // file path
*/
printf("Writing low-res data to %s\n", result_diff_rgb_combo_file);
printf("Writing low-res data to %s\n", result_diff_rgb_combo_file);
writeFloatsToFile(
writeFloatsToFile(
cpu_diff_rgb_combo_out, // cpu_diff_rgb_combo, // float * data, // allocated array
cpu_diff_rgb_combo_out, // cpu_diff_rgb_combo, // float * data, // allocated array
...
...
src/tp_defines.h
View file @
f9641f6c
...
@@ -106,8 +106,8 @@
...
@@ -106,8 +106,8 @@
//#define DBG_TILE_X 40
//#define DBG_TILE_X 40
//#define DBG_TILE_Y 80
//#define DBG_TILE_Y 80
#if TEST_LWIR
#if TEST_LWIR
#define DBG_TILE_X 52 // 32 // 162 // 151 // 161 // 49
#define DBG_TILE_X 5
0 // 5
2 // 32 // 162 // 151 // 161 // 49
#define DBG_TILE_Y 5 // 36 // 88 // 121 // 69 // 111 // 66
#define DBG_TILE_Y
19 //
5 // 36 // 88 // 121 // 69 // 111 // 66
#define DBG_TILE (DBG_TILE_Y * 80 + DBG_TILE_X)
#define DBG_TILE (DBG_TILE_Y * 80 + DBG_TILE_X)
#else
#else
#define DBG_TILE_X 114 // 32 // 162 // 151 // 161 // 49
#define DBG_TILE_X 114 // 32 // 162 // 151 // 161 // 49
...
@@ -128,7 +128,7 @@
...
@@ -128,7 +128,7 @@
//#define DEBUG6 1
//#define DEBUG6 1
// #define DEBUG7 1
// #define DEBUG7 1
#define DEBUG7A 1
////
#define DEBUG7A 1
/*
/*
#define DEBUG7 1
#define DEBUG7 1
#define DEBUG8 1
#define DEBUG8 1
...
@@ -148,7 +148,7 @@
...
@@ -148,7 +148,7 @@
#define DEBUG20 1 // Geometry Correction
#define DEBUG20 1 // Geometry Correction
#define DEBUG21 1 // Geometry Correction
#define DEBUG21 1 // Geometry Correction
//#define DEBUG210 1
//#define DEBUG210 1
#define DEBUG30 1
////
#define DEBUG30 1
//#define DEBUG22 1
//#define DEBUG22 1
//#define DEBUG23 1
//#define DEBUG23 1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment