Commit 44e87f14 authored by Andrey Filippov's avatar Andrey Filippov

more TILESX

parent bb7792f8
This diff is collapsed.
...@@ -48,7 +48,7 @@ extern "C" __global__ void convert_direct( // called with a single block, single ...@@ -48,7 +48,7 @@ extern "C" __global__ void convert_direct( // called with a single block, single
float ** gpu_kernels, // [NUM_CAMS], float ** gpu_kernels, // [NUM_CAMS],
float ** gpu_images, // [NUM_CAMS], float ** gpu_images, // [NUM_CAMS],
struct tp_task * gpu_tasks, struct tp_task * gpu_tasks,
float ** gpu_clt, // [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] float ** gpu_clt, // [NUM_CAMS][TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t dstride, // in floats (pixels) size_t dstride, // in floats (pixels)
int num_tiles, // number of tiles in task int num_tiles, // number of tiles in task
int lpf_mask, // apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 ! int lpf_mask, // apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
...@@ -57,10 +57,12 @@ extern "C" __global__ void convert_direct( // called with a single block, single ...@@ -57,10 +57,12 @@ extern "C" __global__ void convert_direct( // called with a single block, single
int kernels_hor, int kernels_hor,
int kernels_vert, int kernels_vert,
int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles
int * pnum_active_tiles); // indices to gpu_tasks int * pnum_active_tiles, // indices to gpu_tasks
int tilesx);
extern "C" __global__ void correlate2D( extern "C" __global__ void correlate2D(
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] float ** gpu_clt, // [NUM_CAMS] ->[TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
float scale0, // scale for R float scale0, // scale for R
float scale1, // scale for B float scale1, // scale for B
...@@ -83,7 +85,7 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -83,7 +85,7 @@ extern "C" __global__ void textures_nonoverlap(
// declare arrays in device code? // declare arrays in device code?
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7) int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] float ** gpu_clt, // [NUM_CAMS] ->[TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY ! // TODO: use geometry_correction rXY !
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
...@@ -99,7 +101,7 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -99,7 +101,7 @@ extern "C" __global__ void textures_nonoverlap(
extern "C" extern "C"
__global__ void imclt_rbg_all( __global__ void imclt_rbg_all(
float ** gpu_clt, // [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] float ** gpu_clt, // [NUM_CAMS][TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float ** gpu_corr_images, // [NUM_CAMS][WIDTH, 3 * HEIGHT] float ** gpu_corr_images, // [NUM_CAMS][WIDTH, 3 * HEIGHT]
int apply_lpf, int apply_lpf,
int colors, int colors,
...@@ -108,7 +110,7 @@ __global__ void imclt_rbg_all( ...@@ -108,7 +110,7 @@ __global__ void imclt_rbg_all(
const size_t dstride); // in floats (pixels) const size_t dstride); // in floats (pixels)
extern "C" __global__ void imclt_rbg( extern "C" __global__ void imclt_rbg(
float * gpu_clt, // [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] float * gpu_clt, // [TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float * gpu_rbg, // WIDTH, 3 * HEIGHT float * gpu_rbg, // WIDTH, 3 * HEIGHT
int apply_lpf, int apply_lpf,
int mono, // defines lpf filter int mono, // defines lpf filter
...@@ -127,10 +129,10 @@ extern "C" __global__ void generate_RBGA( ...@@ -127,10 +129,10 @@ extern "C" __global__ void generate_RBGA(
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7) int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
int * num_texture_tiles, // number of texture tiles to process (8 separate elements for accumulation) int * num_texture_tiles, // number of texture tiles to process (8 separate elements for accumulation)
int * woi, // x,y,width,height of the woi int * woi, // x,y,width,height of the woi
int width, // <= TILESX, use for faster processing of LWIR images (should be actual + 1) int width, // <= TILES-X, use for faster processing of LWIR images (should be actual + 1)
int height, // <= TILESY, use for faster processing of LWIR images int height, // <= TILES-Y, use for faster processing of LWIR images
// Parameters for the texture generation // Parameters for the texture generation
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] float ** gpu_clt, // [NUM_CAMS] ->[TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY ! // TODO: use geometry_correction rXY !
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
......
...@@ -870,7 +870,9 @@ int main(int argc, char **argv) ...@@ -870,7 +870,9 @@ int main(int argc, char **argv)
KERNELS_HOR, // int kernels_hor, KERNELS_HOR, // int kernels_hor,
KERNELS_VERT, // int kernels_vert); KERNELS_VERT, // int kernels_vert);
gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles
gpu_num_active); // int * pnum_active_tiles); // indices to gpu_tasks gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks
TILESX); // int tilesx)
getLastCudaError("Kernel execution failed"); getLastCudaError("Kernel execution failed");
checkCudaErrors(cudaDeviceSynchronize()); checkCudaErrors(cudaDeviceSynchronize());
......
...@@ -77,6 +77,14 @@ ...@@ -77,6 +77,14 @@
#define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001); #define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define TILES_PER_BLOCK_GEOM (32/NUM_CAMS) // each tile has NUM_CAMS threads #define TILES_PER_BLOCK_GEOM (32/NUM_CAMS) // each tile has NUM_CAMS threads
// only used in C++ test
#define TILESX (IMG_WIDTH / DTT_SIZE)
//#define TILESY (IMG_HEIGHT / DTT_SIZE)
#define TILESYA ((TILESY +3) & (~3))
#define DEBUG_OOB1 1 #define DEBUG_OOB1 1
// Use CORR_OUT_RAD for the correlation output // Use CORR_OUT_RAD for the correlation output
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment