Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
8d03d500
Commit
8d03d500
authored
Nov 21, 2022
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixing per-sensor textures
parent
f3cba575
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
35 additions
and
16 deletions
+35
-16
TileProcessor.cuh
src/TileProcessor.cuh
+35
-16
No files found.
src/TileProcessor.cuh
View file @
8d03d500
...
@@ -3445,7 +3445,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3445,7 +3445,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
rgbaw, // (float *) shr1.rgbaw, // float * rgba,
rgbaw, // (float *) shr1.rgbaw, // float * rgba,
// if calc_extra, rbg_tile will be ignored and output generated with blurred (debayered) data. Done so as debayered data is needed
// if calc_extra, rbg_tile will be ignored and output generated with blurred (debayered) data. Done so as debayered data is needed
// to calculate max_diff_shared
// to calculate max_diff_shared
calc_extra, // int calc_extra, // 1 - calcualate ports_rgb, max_diff
calc_extra,
// | (keep_weights & 2),
// int calc_extra, // 1 - calcualate ports_rgb, max_diff
ports_rgb_shared,// float ports_rgb_shared [colors][num_cams], // return to system memory (optionally pass null to skip calculation)
ports_rgb_shared,// float ports_rgb_shared [colors][num_cams], // return to system memory (optionally pass null to skip calculation)
max_diff_shared, // float max_diff_shared [num_cams], // return to system memory (optionally pass null to skip calculation)
max_diff_shared, // float max_diff_shared [num_cams], // return to system memory (optionally pass null to skip calculation)
max_diff_tmp, // float max_diff_tmp [num_cams][TEXTURE_THREADS_PER_TILE],
max_diff_tmp, // float max_diff_tmp [num_cams][TEXTURE_THREADS_PER_TILE],
...
@@ -3456,7 +3456,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3456,7 +3456,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
min_agree, // float min_agree, NOT USED? // minimal number of channels to agree on a point (real number to work with fuzzy averages)
min_agree, // float min_agree, NOT USED? // minimal number of channels to agree on a point (real number to work with fuzzy averages)
weights, // float * chn_weights, // color channel weights, sum == 1.0
weights, // float * chn_weights, // color channel weights, sum == 1.0
dust_remove, // int dust_remove, // Do not reduce average weight when only one image differs much from the average
dust_remove, // int dust_remove, // Do not reduce average weight when only one image differs much from the average
(keep_weights & 1), // int keep_weights, // return channel weights and rms after A in RGBA (weight are always calculated)
(keep_weights & 1),
// | (keep_weights & 2),
// int keep_weights, // return channel weights and rms after A in RGBA (weight are always calculated)
debug ); // int debug );
debug ); // int debug );
__syncthreads(); // _syncthreads();1
__syncthreads(); // _syncthreads();1
...
@@ -3598,7 +3598,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3598,7 +3598,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
int i = row * DTT_SIZE21 + col;
int i = row * DTT_SIZE21 + col;
float * rgba_i = rgbaw + i;
float * rgba_i = rgbaw + i;
// always copy 3 (1) colors + alpha
// always copy 3 (1) colors + alpha
*(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21)) += textureBlend[idir][(threadIdx.y <<3) + threadIdx.x] * avg_val;
*(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21)) += textureBlend[idir][(threadIdx.y <<3) + threadIdx.x] * avg_val;
/*
/*
for (int pass = 0; pass < 8; pass ++) {
for (int pass = 0; pass < 8; pass ++) {
int row1 = pass * 2 + (threadIdx.y >> 1);
int row1 = pass * 2 + (threadIdx.y >> 1);
...
@@ -3674,8 +3674,37 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3674,8 +3674,37 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
//if (alpha_mode){ // only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ???
//if (alpha_mode){ // only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ???
if (alpha_mode != 0xff){
if (alpha_mode != 0xff){
for (int ncol = 0; ncol < colors; ncol++) {
for (int ncol = 0; ncol < colors; ncol++) {
// will re-use ports_rgb_shared[], if needed - average here for combined texture (see texture_averaging[sum_index] above)
// calculate average value for blending
float avg_val = ports_rgb_shared[ncol * num_cams + ncam]; // texture_averaging[0];
int sum_index = threadIdx.x + threadIdx.y * TEXTURE_THREADS_PER_TILE; // 0.. 31
texture_averaging[sum_index] = 0;
for (int pass = 0; pass < 8; pass ++) {
int row = pass * 2 + (threadIdx.y >> 1);
int col = ((threadIdx.y & 1) << 3) + threadIdx.x;
int i = row * DTT_SIZE21 + col;
float * rgba_i = rgbaw + i;
texture_averaging[sum_index] += *(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21));
}
__syncthreads();
if (threadIdx.y == 0){ // combine sums
#pragma unroll
for (int i = 1; i < 4; i++) { // reduce sums to 8
texture_averaging[threadIdx.x] += texture_averaging[threadIdx.x + TEXTURE_THREADS_PER_TILE * i];
}
}
__syncthreads();
if ((threadIdx.y == 0) && (threadIdx.x == 0)){ // combine sums
#pragma unroll
for (int i = 1; i < TEXTURE_THREADS_PER_TILE; i++) { // reduce sums to 8
texture_averaging[0] += texture_averaging[i];
}
texture_averaging[0] /= 64; // average value for uniform field
}
__syncthreads();
float avg_val = texture_averaging[0];
// Possible to re-use ports_rgb_shared[], if needed (change to (calc_extra | (keep_weights & 2) in tile_combine_rgba()).
// Now using averaging here (less noise if averaging sensor outside).
// float avg_val = ports_rgb_shared[ncol * num_cams + ncam]; // texture_averaging[0];
for (int idir = 0; idir < 8; idir ++) if ((alpha_mode & (1 << idir)) == 0) { // no tile in this direction
for (int idir = 0; idir < 8; idir ++) if ((alpha_mode & (1 << idir)) == 0) { // no tile in this direction
/* */
/* */
int row, col;
int row, col;
...
@@ -3700,17 +3729,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3700,17 +3729,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
int i = row * DTT_SIZE21 + col;
int i = row * DTT_SIZE21 + col;
float * mclt_dst_i = mclt_dst_ncam + i;
float * mclt_dst_i = mclt_dst_ncam + i;
int gi = (threadIdx.y <<3) + threadIdx.x;
int gi = (threadIdx.y <<3) + threadIdx.x;
*(mclt_dst_i + ncol * (MCLT_UNION_LEN)) += textureBlend[alpha_mode][gi] * avg_val;
*(mclt_dst_i + ncol * (MCLT_UNION_LEN)) += textureBlend[idir][gi] * avg_val;
/*
for (int pass = 0; pass < 8; pass ++) {
int row1 = pass * 2 + (threadIdx.y >> 1);
int col1 = ((threadIdx.y & 1) << 3) + threadIdx.x;
int i = row1 * DTT_SIZE21 + col1;
int gi = row1 * DTT_SIZE2 + col1;
float * mclt_dst_i = mclt_dst_ncam + i;
*(mclt_dst_i + ncol * (MCLT_UNION_LEN)) += textureBlend[alpha_mode][gi] * avg_val;
}
*/
}
}
__syncthreads(); // needed?
__syncthreads(); // needed?
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment