implemented (not tested) corr2D_normalize()

47b1fb86 · Andrey Filippov · 75fa734d · 47b1fb86 · 47b1fb86 · 47b1fb86
Commit 47b1fb86 authored Aug 26, 2020 by Andrey Filippov
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 248 additions and 45 deletions

TileProcessor.cuh src/TileProcessor.cuh +203 -9

TileProcessor.h src/TileProcessor.h +8 -0

tp_defines.h src/tp_defines.h +37 -36

No files found.
--- a/src/TileProcessor.cuh
+++ b/src/TileProcessor.cuh
--- a/src/TileProcessor.h
+++ b/src/TileProcessor.h
@@ -77,6 +77,14 @@ extern "C" __global__ void correlate2D(
 		int               corr_radius,        // radius of the output correlation (7 for 15x15)
 		float           * gpu_corrs);          // correlation output data
+extern "C" __global__ void corr2D_normalize(
+		int               num_corr_tiles,     // number of correlation tiles to process
+		const size_t      corr_stride_td,     // in floats
+		float           * gpu_corrs_td,       // correlation tiles in transform domain
+		const size_t      corr_stride,        // in floats
+		float           * gpu_corrs,          // correlation output data (either pixel domain or transform domain
+		float             fat_zero,           // here - absolute
+		int               corr_radius);        // radius of the output correlation (7 for 15x15)
 extern "C" __global__ void textures_nonoverlap(
 		struct tp_task  * gpu_tasks,

--- a/src/tp_defines.h
+++ b/src/tp_defines.h
@@ -40,42 +40,43 @@
 #pragma once
 #ifndef JCUDA
 #include <stdio.h>
-#define THREADSX         (DTT_SIZE)
+#define THREADSX              (DTT_SIZE)
-#define NUM_CAMS                  4
+#define NUM_CAMS                       4
-#define NUM_PAIRS                 6
+#define NUM_PAIRS                      6
-#define NUM_COLORS                3
+#define NUM_COLORS                     3
-#define IMG_WIDTH              2592
+#define IMG_WIDTH                   2592
-#define IMG_HEIGHT             1936
+#define IMG_HEIGHT                  1936
-#define KERNELS_HOR             164
+#define KERNELS_HOR                  164
-#define KERNELS_VERT            123
+#define KERNELS_VERT                 123
-#define KERNELS_LSTEP             4
+#define KERNELS_LSTEP                  4
-#define THREADS_PER_TILE          8
+#define THREADS_PER_TILE               8
-#define TILES_PER_BLOCK           4
+#define TILES_PER_BLOCK                4
-#define CORR_THREADS_PER_TILE     8
+#define CORR_THREADS_PER_TILE          8
-#define CORR_TILES_PER_BLOCK      4
+#define CORR_TILES_PER_BLOCK           4
-#define TEXTURE_THREADS_PER_TILE  8
+#define CORR_TILES_PER_BLOCK_NORMALIZE 4
-#define TEXTURE_TILES_PER_BLOCK   1
+#define TEXTURE_THREADS_PER_TILE       8
-#define IMCLT_THREADS_PER_TILE   16
+#define TEXTURE_TILES_PER_BLOCK        1
-#define IMCLT_TILES_PER_BLOCK     4
+#define IMCLT_THREADS_PER_TILE        16
-#define CORR_NTILE_SHIFT          8 // higher bits - number of a pair, other bits tile number
+#define IMCLT_TILES_PER_BLOCK          4
-#define CORR_PAIRS_MASK        0x3f// lower bits used to address correlation pair for the selected tile
+#define CORR_NTILE_SHIFT               8 // higher bits - number of a pair, other bits tile number
-#define CORR_TEXTURE_BIT          7 // bit 7 used to request texture for the tile
+#define CORR_PAIRS_MASK             0x3f// lower bits used to address correlation pair for the selected tile
-#define TASK_CORR_BITS            4
+#define CORR_TEXTURE_BIT               7 // bit 7 used to request texture for the tile
-#define TASK_TEXTURE_N_BIT        0 // Texture with North neighbor
+#define TASK_CORR_BITS                 4
-#define TASK_TEXTURE_E_BIT        1 // Texture with East  neighbor
+#define TASK_TEXTURE_N_BIT             0 // Texture with North neighbor
-#define TASK_TEXTURE_S_BIT        2 // Texture with South neighbor
+#define TASK_TEXTURE_E_BIT             1 // Texture with East  neighbor
-#define TASK_TEXTURE_W_BIT        3 // Texture with West  neighbor
+#define TASK_TEXTURE_S_BIT             2 // Texture with South neighbor
-#define TASK_TEXTURE_BIT          3 // bit to request texture calculation int task field of struct tp_task
+#define TASK_TEXTURE_W_BIT             3 // Texture with West  neighbor
-#define LIST_TEXTURE_BIT          7 // bit to request texture calculation
+#define TASK_TEXTURE_BIT               3 // bit to request texture calculation int task field of struct tp_task
-#define CORR_OUT_RAD              4
+#define LIST_TEXTURE_BIT               7 // bit to request texture calculation
-#define FAT_ZERO_WEIGHT           0.0001 // add to port weights to avoid nan
+#define CORR_OUT_RAD                   4
+#define FAT_ZERO_WEIGHT                0.0001 // add to port weights to avoid nan
-#define THREADS_DYNAMIC_BITS      5 // treads in block for CDP creation of the texture list
+#define THREADS_DYNAMIC_BITS           5 // treads in block for CDP creation of the texture list
-#define DBG_DISPARITY            0.0 // 56.0 // disparity for which to calculate offsets (not needed in Java)
-#define RBYRDIST_LEN           5001   // for doubles 10001 - floats   // length of rByRDist to allocate shared memory
+#define DBG_DISPARITY                  0.0 // 56.0 // disparity for which to calculate offsets (not needed in Java)
-#define RBYRDIST_STEP             0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
+#define RBYRDIST_LEN                5001   // for doubles 10001 - floats   // length of rByRDist to allocate shared memory
-#define TILES_PER_BLOCK_GEOM     (32/NUM_CAMS)   // each tile has NUM_CAMS threads
+#define RBYRDIST_STEP                  0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
+#define TILES_PER_BLOCK_GEOM          (32/NUM_CAMS)   // each tile has NUM_CAMS threads
 // only used in C++ test
 #define TILESX        (IMG_WIDTH / DTT_SIZE)