Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
86054c53
Commit
86054c53
authored
Aug 06, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
removing tilesX
parent
9182fde0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
12 deletions
+20
-12
TileProcessor.cuh
src/TileProcessor.cuh
+14
-3
TileProcessor.h
src/TileProcessor.h
+2
-0
test_tp.cu
src/test_tp.cu
+4
-9
No files found.
src/TileProcessor.cuh
View file @
86054c53
...
...
@@ -866,12 +866,14 @@ __global__ void index_direct(
__global__ void index_correlate(
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task
int width, // number of tiles in a row
int * gpu_corr_indices, // array of correlation tasks
int * pnum_corr_tiles); // pointer to the length of correlation tasks array
__global__ void create_nonoverlap_list(
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task
int width, // number of tiles in a row
int * nonoverlap_list, // pointer to the calculated number of non-zero tiles
int * pnonoverlap_length); // indices to gpu_tasks // should be initialized to zero
...
...
@@ -957,6 +959,7 @@ extern "C" __global__ void correlate2D(
float fat_zero, // here - absolute
struct tp_task * gpu_tasks, // array of per-tile tasks (now bits 4..9 - correlation pairs)
int num_tiles, // number of tiles in task
int tilesx, // number of tile rows
int * gpu_corr_indices, // packed tile+pair
int * pnum_corr_tiles, // pointer to a number of correlation tiles to process
const size_t corr_stride, // in floats
...
...
@@ -970,6 +973,7 @@ extern "C" __global__ void correlate2D(
index_correlate<<<blocks0,threads0>>>(
gpu_tasks, // struct tp_task * gpu_tasks,
num_tiles, // int num_tiles, // number of tiles in task
tilesx, // int width, // number of tiles in a row
gpu_corr_indices, // int * gpu_corr_indices, // array of correlation tasks
pnum_corr_tiles); // int * pnum_corr_tiles); // pointer to the length of correlation tasks array
cudaDeviceSynchronize();
...
...
@@ -1752,6 +1756,7 @@ __global__ void index_direct(
__global__ void create_nonoverlap_list(
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task
int width, // number of tiles in a row
int * nonoverlap_list, // pointer to the calculated number of non-zero tiles
int * pnonoverlap_length) // indices to gpu_tasks // should be initialized to zero
{
...
...
@@ -1763,7 +1768,8 @@ __global__ void create_nonoverlap_list(
return; // nothing to do
}
int cxy = gpu_tasks[num_tile].txy;
int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * TILESX) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS;
// int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * TILESX) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS;
int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS;
if (gpu_tasks[num_tile].task != 0) {
nonoverlap_list[atomicAdd(pnonoverlap_length, 1)] = texture_task_code;
}
...
...
@@ -1781,6 +1787,7 @@ __global__ void create_nonoverlap_list(
__global__ void index_correlate(
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task
int width, // number of tiles in a row
int * gpu_corr_indices, // array of correlation tasks
int * pnum_corr_tiles) // pointer to the length of correlation tasks array
{
...
...
@@ -1795,7 +1802,8 @@ __global__ void index_correlate(
int txy = gpu_tasks[num_tile].txy;
int tx = txy & 0xffff;
int ty = txy >> 16;
int nt = ty * TILESX + tx;
// int nt = ty * TILESX + tx;
int nt = ty * width + tx;
for (int b = 0; b < NUM_PAIRS; b++) if ((cm & (1 << b)) != 0) {
gpu_corr_indices[indx++] = (nt << CORR_NTILE_SHIFT) | b;
}
...
...
@@ -2011,6 +2019,7 @@ __global__ void convert_correct_tiles(
extern "C" __global__ void textures_nonoverlap(
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task list
// int num_tilesx, // number of tiles in a row
// declare arrays in device code?
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array
...
...
@@ -2033,6 +2042,7 @@ extern "C" __global__ void textures_nonoverlap(
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
{
int num_tilesx = TILESX;
float min_shot = params[0]; // 10.0
float scale_shot = params[1]; // 3.0
float diff_sigma = params[2]; // pixel value/pixel change
...
...
@@ -2047,8 +2057,9 @@ extern "C" __global__ void textures_nonoverlap(
create_nonoverlap_list<<<blocks0,threads0>>>(
gpu_tasks, // struct tp_task * gpu_tasks,
num_tiles, // int num_tiles, // number of tiles in task
num_tilesx, // int width, // number of tiles in a row
gpu_texture_indices, // int * nonoverlap_list, // pointer to the calculated number of non-zero tiles
pnum_texture_tiles);
// int * pnonoverlap_length) // indices to gpu_tasks // should be initialized to zero
pnum_texture_tiles); // int * pnonoverlap_length) // indices to gpu_tasks // should be initialized to zero
cudaDeviceSynchronize();
dim3 threads_texture(TEXTURE_THREADS_PER_TILE, NUM_CAMS, 1); // TEXTURE_TILES_PER_BLOCK, 1);
dim3 grid_texture((*pnum_texture_tiles + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1);
...
...
src/TileProcessor.h
View file @
86054c53
...
...
@@ -68,6 +68,7 @@ extern "C" __global__ void correlate2D(
float
fat_zero
,
// here - absolute
struct
tp_task
*
gpu_tasks
,
// array of per-tile tasks (now bits 4..9 - correlation pairs)
int
num_tiles
,
// number of tiles in task
int
tilesx
,
// number of tile rows
int
*
gpu_corr_indices
,
// packed tile+pair
int
*
pnum_corr_tiles
,
// pointer to a number of correlation tiles to process
const
size_t
corr_stride
,
// in floats
...
...
@@ -78,6 +79,7 @@ extern "C" __global__ void correlate2D(
extern
"C"
__global__
void
textures_nonoverlap
(
struct
tp_task
*
gpu_tasks
,
int
num_tiles
,
// number of tiles in task list
// int num_tilesx, // number of tiles in a row
// declare arrays in device code?
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
int
*
pnum_texture_tiles
,
// returns total number of elements in gpu_texture_indices array
...
...
src/test_tp.cu
View file @
86054c53
...
...
@@ -1015,7 +1015,8 @@ int main(int argc, char **argv)
30.0, // float fat_zero, // here - absolute
gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles) // number of tiles in task
gpu_corr_indices, // int * gpu_corr_indices, // packed tile+pair
TILESX, // int tilesx, // number of tile rows
gpu_corr_indices, // int * gpu_corr_indices, // packed tile+pair
gpu_num_corr_tiles, // int * pnum_corr_tiles, // pointer to a number of correlation tiles to process
dstride_corr/sizeof(float), // const size_t corr_stride, // in floats
CORR_OUT_RAD, // int corr_radius, // radius of the output correlation (7 for 15x15)
...
...
@@ -1089,6 +1090,8 @@ int main(int argc, char **argv)
textures_nonoverlap<<<1,1>>> (
gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles, // number of tiles in task list
// TILESX, // int num_tilesx, // number of tiles in a row
// declare arrays in device code?
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_num_texture_tiles, // int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array
...
...
@@ -1098,18 +1101,10 @@ int main(int argc, char **argv)
texture_colors, // int colors, // number of colors (3/1)
(texture_colors == 1), // int is_lwir, // do not perform shot correction
gpu_generate_RBGA_params,
/*
10.0, // float min_shot, // 10.0
3.0, // float scale_shot, // 3.0
1.5f, // float diff_sigma, // pixel value/pixel change
10.0f, // float diff_threshold, // pixel value/pixel change
3.0, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
*/
gpu_color_weights, // float weights[3], // scale for R
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
// combining both non-overlap and overlap (each calculated if pointer is not null )
0, // dstride_textures/sizeof(float), // size_t texture_stride, // in floats (now 256*4 = 1024) // may be 0 if not needed
// gpu_textures, // float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles // may be 0 if not needed
(float *) 0, // gpu_textures, // float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles // may be 0 if not needed
gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
getLastCudaError("Kernel failure");
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment