TileProcessor.h 9.04 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
/**
 **
 ** TileProcessor.h
 **
 ** Copyright (C) 2020 Elphel, Inc.
 **
 ** -----------------------------------------------------------------------------**
 **
 **  TileProcessor.h is free software: you can redistribute it and/or modify
 **  it under the terms of the GNU General Public License as published by
 **  the Free Software Foundation, either version 3 of the License, or
 **  (at your option) any later version.
 **
 **  This program is distributed in the hope that it will be useful,
 **  but WITHOUT ANY WARRANTY; without even the implied warranty of
 **  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 **  GNU General Public License for more details.
 **
 **  You should have received a copy of the GNU General Public License
 **  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **
 **  Additional permission under GNU GPL version 3 section 7
 **
 **  If you modify this Program, or any covered work, by linking or
 **  combining it with NVIDIA Corporation's CUDA libraries from the
 **  NVIDIA CUDA Toolkit (or a modified version of those libraries),
 **  containing parts covered by the terms of NVIDIA CUDA Toolkit
 **  EULA, the licensors of this Program grant you additional
 **  permission to convey the resulting work.
 ** -----------------------------------------------------------------------------**
 */

/**
34 35 36
 **************************************************************************
 * \file TileProcessor.h
 * \brief header file for  the Tile Processor for frequency domain
37

38
 */
39 40 41 42 43
#pragma once
#ifndef NUM_CAMS
#include "tp_defines.h"
#endif

44 45 46 47 48 49

extern "C" __global__ void convert_direct( // called with a single block, single thread
		//		struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct parameters
		float           ** gpu_kernel_offsets, // [NUM_CAMS],
		float           ** gpu_kernels,        // [NUM_CAMS],
		float           ** gpu_images,         // [NUM_CAMS],
Andrey Filippov's avatar
Andrey Filippov committed
50
		struct tp_task   * gpu_tasks,
51
		float           ** gpu_clt,            // [NUM_CAMS][TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
52
		size_t             dstride,            // in floats (pixels)
Andrey Filippov's avatar
Andrey Filippov committed
53
		int                num_tiles,          // number of tiles in task
54 55 56 57 58 59
		int                lpf_mask,           // apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
		int                woi_width,
		int                woi_height,
		int                kernels_hor,
		int                kernels_vert,
		int *              gpu_active_tiles,      // pointer to the calculated number of non-zero tiles
60 61 62
		int *              pnum_active_tiles,  //  indices to gpu_tasks
		int                tilesx);

63

64
extern "C" __global__ void correlate2D(
65
		float          ** gpu_clt,            // [NUM_CAMS] ->[TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
66 67 68 69 70 71 72
		int               colors,             // number of colors (3/1)
		float             scale0,             // scale for R
		float             scale1,             // scale for B
		float             scale2,             // scale for G
		float             fat_zero,           // here - absolute
		struct tp_task  * gpu_tasks,          // array of per-tile tasks (now bits 4..9 - correlation pairs)
		int               num_tiles,          // number of tiles in task
73
		int               tilesx,             // number of tile rows
74 75 76 77 78
		int             * gpu_corr_indices,   // packed tile+pair
		int             * pnum_corr_tiles,    // pointer to a number of correlation tiles to process
		const size_t      corr_stride,        // in floats
		int               corr_radius,        // radius of the output correlation (7 for 15x15)
		float           * gpu_corrs);          // correlation output data
79

80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
extern "C" __global__ void corr2D_normalize(
		int               num_corr_tiles,     // number of correlation tiles to process
		const size_t      corr_stride_td,     // in floats
		float           * gpu_corrs_td,       // correlation tiles in transform domain
		const size_t      corr_stride,        // in floats
		float           * gpu_corrs,          // correlation output data (either pixel domain or transform domain
		float             fat_zero,           // here - absolute
		int               corr_radius);        // radius of the output correlation (7 for 15x15)

extern "C" __global__ void corr2D_combine(
		int               num_tiles,          // number of tiles to process (each with num_pairs)
		int               num_pairs,          // num pairs per tile (should be the same)
		int               init_output,        // !=0 - reset output tiles to zero before accumulating
		int               pairs_mask,         // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
		int             * gpu_corr_indices,   // packed tile+pair
		int             * gpu_combo_indices,  // output if noty null: packed tile+pairs_mask (will point to the first used pair
		const size_t      corr_stride,        // (in floats) stride for the input TD correlations
		float           * gpu_corrs,          // input correlation tiles
		const size_t      corr_stride_combo,  // (in floats) stride for the output TD correlations (same as input)
		float           * gpu_corrs_combo);   // combined correlation output (one per tile)
100

101 102 103
extern "C" __global__ void textures_nonoverlap(
		struct tp_task  * gpu_tasks,
		int               num_tiles,          // number of tiles in task list
104
//		int               num_tilesx,         // number of tiles in a row
105
// declare arrays in device code?
106
		int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
107
		int             * pnum_texture_tiles,  // returns total number of elements in gpu_texture_indices array
108
		float          ** gpu_clt,            // [NUM_CAMS] ->[TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
Andrey Filippov's avatar
Andrey Filippov committed
109
		// TODO: use geometry_correction rXY !
110
		struct gc       * gpu_geometry_correction,
111 112
		int               colors,             // number of colors (3/1)
		int               is_lwir,            // do not perform shot correction
113
		float             params[5],
114
		float             weights[3],         // scale for R,B,G
115
		int               dust_remove,        // Do not reduce average weight when only one image differs much from the average
116
// combining both non-overlap and overlap (each calculated if pointer is not null )
117 118
		size_t            texture_stride,     // in floats (now 256*4 = 1024)  // may be 0 if not needed
		float           * gpu_texture_tiles,  // (number of colors +1 + ?)*16*16 rgba texture tiles    // may be 0 if not needed
119 120
		float           * gpu_diff_rgb_combo, //); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
		int               num_tilesx);
121

Andrey Filippov's avatar
Andrey Filippov committed
122 123
extern "C"
__global__ void imclt_rbg_all(
124
		float           ** gpu_clt,            // [NUM_CAMS][TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
Andrey Filippov's avatar
Andrey Filippov committed
125 126 127 128 129 130 131
		float           ** gpu_corr_images,    // [NUM_CAMS][WIDTH, 3 * HEIGHT]
		int                apply_lpf,
		int                colors,
		int                woi_twidth,
		int                woi_theight,
		const size_t       dstride);            // in floats (pixels)

132
extern "C" __global__ void imclt_rbg(
133
		float           * gpu_clt,            // [TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
134 135 136 137 138 139
		float           * gpu_rbg,            // WIDTH, 3 * HEIGHT
		int               apply_lpf,
		int               mono,               // defines lpf filter
		int               color,              // defines location of clt data
		int               v_offset,
		int               h_offset,
Andrey Filippov's avatar
Andrey Filippov committed
140 141
		int               woi_twidth,
		int               woi_theight,
142
		const size_t      dstride);            // in floats (pixels)
143

144 145 146 147 148 149 150 151
extern "C" __global__ void generate_RBGA(
		// Parameters to generate texture tasks
		struct tp_task   * gpu_tasks,
		int                num_tiles,          // number of tiles in task list
		// declare arrays in device code?
		int              * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
		int              * num_texture_tiles,  // number of texture tiles to process  (8 separate elements for accumulation)
		int              * woi,                // x,y,width,height of the woi
152 153
		int                width,  // <= TILES-X, use for faster processing of LWIR images (should be actual + 1)
		int                height, // <= TILES-Y, use for faster processing of LWIR images
154
		// Parameters for the texture generation
155
		float          ** gpu_clt,            // [NUM_CAMS] ->[TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
156
		// TODO: use geometry_correction rXY !
157
		struct gc       * gpu_geometry_correction,
158 159
		int               colors,             // number of colors (3/1)
		int               is_lwir,            // do not perform shot correction
160 161
		float             params[5],          // mitigating CUDA_ERROR_INVALID_PTX
		float             weights[3],         // scale for R,B,G
162 163 164 165
		int               dust_remove,        // Do not reduce average weight when only one image differs much from the average
		int               keep_weights,       // return channel weights after A in RGBA (was removed)
		const size_t      texture_rbga_stride,     // in floats
		float           * gpu_texture_tiles);  // (number of colors +1 + ?)*16*16 rgba texture tiles
166