Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
adac766c
Commit
adac766c
authored
Mar 27, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed NaN by adding fat zero to channel weights after exp()
parent
edce489f
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
58 additions
and
28 deletions
+58
-28
TileProcessor.cuh
src/TileProcessor.cuh
+58
-28
No files found.
src/TileProcessor.cuh
View file @
adac766c
...
@@ -41,22 +41,22 @@
...
@@ -41,22 +41,22 @@
#pragma once
#pragma once
#include "dtt8x8.cuh"
#include "dtt8x8.cuh"
#define THREADSX (DTT_SIZE)
#define THREADSX (DTT_SIZE)
#define NUM_CAMS 4
#define NUM_PAIRS 6
#define NUM_COLORS 3
#define IMG_WIDTH 2592
#define IMG_WIDTH 2592
#define IMG_HEIGHT 1936
#define IMG_HEIGHT 1936
#define KERNELS_HOR 164
#define KERNELS_HOR 164
#define KERNELS_VERT 123
#define KERNELS_VERT 123
#define NUM_CAMS 4
#define NUM_PAIRS 6
#define NUM_COLORS 3
#define KERNELS_LSTEP 4
#define KERNELS_LSTEP 4
#define THREADS_PER_TILE 8
#define THREADS_PER_TILE 8
#define TILES_PER_BLOCK 4
#define TILES_PER_BLOCK 4
#define CORR_THREADS_PER_TILE 8
#define CORR_THREADS_PER_TILE 8
#define CORR_TILES_PER_BLOCK 4
#define CORR_TILES_PER_BLOCK 4
#define IMCLT_THREADS_PER_TILE 16
#define IMCLT_TILES_PER_BLOCK 4
#define TEXTURE_THREADS_PER_TILE 8
#define TEXTURE_THREADS_PER_TILE 8
#define TEXTURE_TILES_PER_BLOCK 1
#define TEXTURE_TILES_PER_BLOCK 1
#define IMCLT_THREADS_PER_TILE 16
#define IMCLT_TILES_PER_BLOCK 4
#define CORR_NTILE_SHIFT 8 // higher bits - number of a pair, other bits tile number
#define CORR_NTILE_SHIFT 8 // higher bits - number of a pair, other bits tile number
#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
#define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile
#define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile
...
@@ -64,6 +64,7 @@
...
@@ -64,6 +64,7 @@
#define TASK_TEXTURE_BIT 3 // bit to request texture calculation int task field of struct tp_task
#define TASK_TEXTURE_BIT 3 // bit to request texture calculation int task field of struct tp_task
#define LIST_TEXTURE_BIT 7 // bit to request texture calculation
#define LIST_TEXTURE_BIT 7 // bit to request texture calculation
#define CORR_OUT_RAD 4
#define CORR_OUT_RAD 4
#define FAT_ZERO_WEIGHT 0.0001 // add to port weights to avoid nan
//7
//7
//#define DEBUG1 1
//#define DEBUG1 1
...
@@ -74,7 +75,7 @@
...
@@ -74,7 +75,7 @@
//#define DEBUG6 1
//#define DEBUG6 1
#define DEBUG7 1
#define DEBUG7 1
#define DEBUG8 1
#define DEBUG8 1
//
#define DEBUG9 1
#define DEBUG9 1
#endif
#endif
...
@@ -146,10 +147,10 @@
...
@@ -146,10 +147,10 @@
#define BAYER_RED_COL 1
#define BAYER_RED_COL 1
//#define BAYER_BLUE_ROW (1 - BAYER_RED_ROW)
//#define BAYER_BLUE_ROW (1 - BAYER_RED_ROW)
//#define BAYER_BLUE_COL (1 - BAYER_RED_COL)
//#define BAYER_BLUE_COL (1 - BAYER_RED_COL)
//#define DBG_TILE_X 40
//#define DBG_TILE_Y 80
#define DBG_TILE_X 4
0
#define DBG_TILE_X 4
9
#define DBG_TILE_Y
80
#define DBG_TILE_Y
66
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
//56494
//56494
...
@@ -503,6 +504,7 @@ __device__ void debayer_shot(
...
@@ -503,6 +504,7 @@ __device__ void debayer_shot(
__device__ void tile_combine_rgba(
__device__ void tile_combine_rgba(
int colors, // number of colors
int colors, // number of colors
float * mclt_tile, // debayer
float * mclt_tile, // debayer
float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output
float * rgba, // result
float * rgba, // result
float * ports_rgb, // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
float * ports_rgb, // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
float * max_diff, // maximal (weighted) deviation of each channel from the average /null
float * max_diff, // maximal (weighted) deviation of each channel from the average /null
...
@@ -803,7 +805,7 @@ __global__ void convert_correct_tiles(
...
@@ -803,7 +805,7 @@ __global__ void convert_correct_tiles(
float ** gpu_clt, // [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float ** gpu_clt, // [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t dstride, // in floats (pixels)
size_t dstride, // in floats (pixels)
int num_tiles, // number of tiles in task
int num_tiles, // number of tiles in task
int lpf_mask) // apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green
int lpf_mask) // apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green
. Now - always 0 !
{
{
dim3 t = threadIdx;
dim3 t = threadIdx;
int tile_in_block = threadIdx.y;
int tile_in_block = threadIdx.y;
...
@@ -902,7 +904,11 @@ __global__ void textures_gen(
...
@@ -902,7 +904,11 @@ __global__ void textures_gen(
return; // nothing to do
return; // nothing to do
}
}
// get number of tile
// get number of tile
int tile_num = (gpu_texture_indices[tile_indx]) >> CORR_NTILE_SHIFT;
int tile_code = gpu_texture_indices[tile_indx];
if ((tile_code & (1 << CORR_TEXTURE_BIT)) == 0){
return; // nothing to do
}
int tile_num = tile_code >> CORR_NTILE_SHIFT;
__shared__ float mclt_tiles [NUM_CAMS][NUM_COLORS][2*DTT_SIZE][DTT_SIZE21];
__shared__ float mclt_tiles [NUM_CAMS][NUM_COLORS][2*DTT_SIZE][DTT_SIZE21];
__shared__ union {
__shared__ union {
float clt_tiles [NUM_CAMS][NUM_COLORS][4][DTT_SIZE][DTT_SIZE1]; // NUM_CAMS == 4
float clt_tiles [NUM_CAMS][NUM_COLORS][4][DTT_SIZE][DTT_SIZE1]; // NUM_CAMS == 4
...
@@ -1000,6 +1006,20 @@ __global__ void textures_gen(
...
@@ -1000,6 +1006,20 @@ __global__ void textures_gen(
__syncthreads();// __syncwarp();
__syncthreads();// __syncwarp();
} else {
} else {
// copy? - no, just remember to use mclt_tile, not mclt_dst
// copy? - no, just remember to use mclt_tile, not mclt_dst
// will have to copy mclt_tiles -> mclt_dst as they have different gaps
// untested copy for mono mode
#pragma unroll
for (int n = 0; n <= DTT_SIZE; n += DTT_SIZE){
float * msp = mclt_tile + threadIdx.x + n;
float * dst = mclt_dst + threadIdx.x + n;
#pragma unroll
for (int row = 0; row < DTT_SIZE2; row++){
*dst = *msp;
msp += DTT_SIZE21;
dst += DTT_SIZE21;
}
}
__syncthreads();
}
}
#ifdef DEBUG77
#ifdef DEBUG77
// float * mclt_dst = (float *) shr.mclt_debayer[camera_num][color];
// float * mclt_dst = (float *) shr.mclt_debayer[camera_num][color];
...
@@ -1055,10 +1075,11 @@ __global__ void textures_gen(
...
@@ -1055,10 +1075,11 @@ __global__ void textures_gen(
}
}
__syncthreads();// __syncwarp();
__syncthreads();// __syncwarp();
#endif
#endif
// __shared__ float mclt_tiles [NUM_CAMS][NUM_COLORS][2*DTT_SIZE][DTT_SIZE21];
tile_combine_rgba(
tile_combine_rgba(
colors, // int colors, // number of colors
colors, // int colors, // number of colors
(float*) shr.mclt_debayer, // float * mclt_tile, // debayer // has gaps to align with union !
(float*) shr.mclt_debayer, // float * mclt_tile, // debayer // has gaps to align with union !
(float*) mclt_tiles, // float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output
(float *) shr1.rgbaw, // float * rgba, // result
(float *) shr1.rgbaw, // float * rgba, // result
(float * ) 0, // float * ports_rgb, // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
(float * ) 0, // float * ports_rgb, // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
(float * ) 0, // float * max_diff, // maximal (weighted) deviation of each channel from the average /null
(float * ) 0, // float * max_diff, // maximal (weighted) deviation of each channel from the average /null
...
@@ -1638,7 +1659,7 @@ __device__ void convertCorrectTile(
...
@@ -1638,7 +1659,7 @@ __device__ void convertCorrectTile(
float * gpu_images,
float * gpu_images,
float * gpu_clt,
float * gpu_clt,
const int color,
const int color,
const int lpf_mask,
const int lpf_mask,
// now 0
const float centerX,
const float centerX,
const float centerY,
const float centerY,
const int txy,
const int txy,
...
@@ -2711,6 +2732,7 @@ __device__ void debayer(
...
@@ -2711,6 +2732,7 @@ __device__ void debayer(
__device__ void tile_combine_rgba(
__device__ void tile_combine_rgba(
int colors, // number of colors
int colors, // number of colors
float * mclt_tile, // debayer // has gaps to align with union !
float * mclt_tile, // debayer // has gaps to align with union !
float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output
float * rgba, // result
float * rgba, // result
float * ports_rgb, // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
float * ports_rgb, // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
float * max_diff, // maximal (weighted) deviation of each channel from the average /null
float * max_diff, // maximal (weighted) deviation of each channel from the average /null
...
@@ -2858,7 +2880,7 @@ __device__ void tile_combine_rgba(
...
@@ -2858,7 +2880,7 @@ __device__ void tile_combine_rgba(
dc *= wnd2_inv; // to compensate fading near the edges
dc *= wnd2_inv; // to compensate fading near the edges
d+= *(chn_weights + ncol) * dc * dc;
d+= *(chn_weights + ncol) * dc * dc;
}
}
d = expf(-pair_dist2r[ipair] * d); // 0.5 for exact match, lower for mismatch. Add this weight to both ports involved
d = expf(-pair_dist2r[ipair] * d)
+ (FAT_ZERO_WEIGHT)
; // 0.5 for exact match, lower for mismatch. Add this weight to both ports involved
// Add weight to both channels in a pair
// Add weight to both channels in a pair
*(port_weights_i + (DTT_SIZE2*DTT_SIZE21) * pair_ports[ipair][0]) +=d;
*(port_weights_i + (DTT_SIZE2*DTT_SIZE21) * pair_ports[ipair][0]) +=d;
*(port_weights_i + (DTT_SIZE2*DTT_SIZE21) * pair_ports[ipair][1]) +=d;
*(port_weights_i + (DTT_SIZE2*DTT_SIZE21) * pair_ports[ipair][1]) +=d;
...
@@ -2964,7 +2986,7 @@ __device__ void tile_combine_rgba(
...
@@ -2964,7 +2986,7 @@ __device__ void tile_combine_rgba(
}
}
// TODO: Should it use pair_dist2r ? no as it is relative?
// TODO: Should it use pair_dist2r ? no as it is relative?
// port_weights[ip][i] = Math.exp(-ksigma * d2[ip]);
// port_weights[ip][i] = Math.exp(-ksigma * d2[ip]);
*(port_weights_i + (DTT_SIZE2*DTT_SIZE21) * cam) = expf(-ksigma * d2_ip);
*(port_weights_i + (DTT_SIZE2*DTT_SIZE21) * cam) = expf(-ksigma * d2_ip)
+ (FAT_ZERO_WEIGHT)
;
}
}
// and now make a new average with those weights
// and now make a new average with those weights
// Inserting dust remove here
// Inserting dust remove here
...
@@ -3033,22 +3055,30 @@ __device__ void tile_combine_rgba(
...
@@ -3033,22 +3055,30 @@ __device__ void tile_combine_rgba(
///
///
if (rbg_tile) {
float k = 0.0;
float k = 0.0;
int rbga_offset = colors * (DTT_SIZE2*DTT_SIZE21); // padded in union !
#pragma unroll
#pragma unroll
for (int cam = 0; cam < NUM_CAMS; cam++){
for (int cam = 0; cam < NUM_CAMS; cam++){
k += *(port_weights_i + (DTT_SIZE2*DTT_SIZE21) * cam); // port_weights[ip][i];
k += *(port_weights_i + (DTT_SIZE2*DTT_SIZE21) * cam); // port_weights[ip][i];
}
}
k = 1.0/k;
k = 1.0/k;
float * rbg_tile_i = rbg_tile + i;
#pragma unroll // non-constant
#pragma unroll // non-constant
for (int ncol = 0; ncol < colors; ncol++) { // if (iclt_tile[0][ncol] != null) {
for (int ncol = 0; ncol < colors; ncol++) { // if (iclt_tile[0][ncol] != null) {
float * rgba_col_i = rgba_i + ncol * (DTT_SIZE2*DTT_SIZE21);
float * rgba_col_i = rgba_i + ncol * (DTT_SIZE2*DTT_SIZE21);
float * mclt_col_i = mclt_tile_i + MCLT_UNION_LEN * ncol;
// float * mclt_col_i = mclt_tile_i + MCLT_UNION_LEN * ncol;
float * rbg_col_i = rbg_tile_i + ncol * (DTT_SIZE2*DTT_SIZE21); // different gap between tiles than MCLT_UNION_LEN
*rgba_col_i = 0.0; // color_avg[ncol][i] = 0;
*rgba_col_i = 0.0; // color_avg[ncol][i] = 0;
#pragma unroll
#pragma unroll
for (int cam = 0; cam < NUM_CAMS; cam++) {
for (int cam = 0; cam < NUM_CAMS; cam++) {
*rgba_col_i += k * *(port_weights_i + (DTT_SIZE2*DTT_SIZE21) * cam) * *(mclt_col_i + cam * colors_offset);
// *rgba_col_i += k * *(port_weights_i + (DTT_SIZE2*DTT_SIZE21) * cam) * *(mclt_col_i + cam * colors_offset);
*rgba_col_i += k * *(port_weights_i + (DTT_SIZE2*DTT_SIZE21) * cam) * *(rbg_col_i + cam * rbga_offset);
}
}
}
}
}
// int colors_offset = colors * MCLT_UNION_LEN; // padded in union !
// calculate alpha from channel weights. Start with just a sum of weights?
// calculate alpha from channel weights. Start with just a sum of weights?
// int used_ports = NUM_CAMS;
// int used_ports = NUM_CAMS;
// if (dust_remove){
// if (dust_remove){
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment