updated kernels

3c033192 · Andrey Filippov · 514057c6 · 3c033192 · 3c033192 · 3c033192
Commit 3c033192 authored Apr 13, 2020 by Andrey Filippov
5 changed files
--- a/src/main/resources/kernels/TileProcessor.cuh
+++ b/src/main/resources/kernels/TileProcessor.cuh
--- a/src/main/resources/kernels/TileProcessor.h
+++ b/src/main/resources/kernels/TileProcessor.h
@@ -41,6 +41,18 @@
 #include "tp_defines.h"
 #endif

+
+extern "C"
+__global__ void convert_correct_tiles(
+		float           ** gpu_kernel_offsets, // [NUM_CAMS],
+		float           ** gpu_kernels,        // [NUM_CAMS],
+		float           ** gpu_images,         // [NUM_CAMS],
+		struct tp_task   * gpu_tasks,
+		float           ** gpu_clt,            // [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
+		size_t             dstride,            // in floats (pixels)
+		int                num_tiles,          // number of tiles in task
+		int                lpf_mask);          // apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
+
 extern "C" __global__ void clear_texture_list(
 		int              * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
 		int                width,  // <= TILESX, use for faster processing of LWIR images
@@ -68,12 +80,12 @@ extern "C" __global__ void clear_texture_rbga(
 		const size_t      texture_rbga_stride,     // in floats 8*stride
 		float           * gpu_texture_tiles);  // (number of colors +1 + ?)*16*16 rgba texture tiles
 extern "C" __global__ void textures_accumulate(
-//		int               border_tile,        // if 1 - watch for border
 		int             * woi,                // x, y, width,height
 		float          ** gpu_clt,            // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
 		size_t            num_texture_tiles,  // number of texture tiles to process
 		int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
-		float           * gpu_port_offsets,       // relative ports x,y offsets - just to scale differences, may be approximate
+		// TODO: use geometry_correction rXY !
+		float           * gpu_port_offsets,   // relative ports x,y offsets - just to scale differences, may be approximate
 		int               colors,             // number of colors (3/1)
 		int               is_lwir,            // do not perform shot correction
 		float             min_shot,           // 10.0
@@ -102,5 +114,35 @@ extern "C" __global__ void imclt_rbg(
 		int               h_offset,
 		const size_t      dstride);            // in floats (pixels)

+extern "C"
+__global__ void generate_RBGA(
+// Parameters to generate texture tasks
+			struct tp_task   * gpu_tasks,
+			int                num_tiles,          // number of tiles in task list
+// declare arrays in device code?
+			int              * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
+			int              * num_texture_tiles,  // number of texture tiles to process  (8 separate elements for accumulation)
+			int              * woi,                // x,y,width,height of the woi
+			int                width,  // <= TILESX, use for faster processing of LWIR images (should be actual + 1)
+			int                height, // <= TILESY, use for faster processing of LWIR images
+// Parameters for the texture generation
+			float          ** gpu_clt,            // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
+			// TODO: use geometry_correction rXY !
+			float           * gpu_port_offsets,       // relative ports x,y offsets - just to scale differences, may be approximate
+			int               colors,             // number of colors (3/1)
+			int               is_lwir,            // do not perform shot correction
+			float             min_shot,           // 10.0
+			float             scale_shot,         // 3.0
+			float             diff_sigma,         // pixel value/pixel change
+			float             diff_threshold,     // pixel value/pixel change
+			float             min_agree,          // minimal number of channels to agree on a point (real number to work with fuzzy averages)
+			float             weight0,            // scale for R
+			float             weight1,            // scale for B
+			float             weight2,            // scale for G
+			int               dust_remove,        // Do not reduce average weight when only one image differs much from the average
+			int               keep_weights,       // return channel weights after A in RGBA (was removed)
+			const size_t      texture_rbga_stride,     // in floats
+			float           * gpu_texture_tiles);  // (number of colors +1 + ?)*16*16 rgba texture tiles
+


--- a/src/main/resources/kernels/dtt8x8.h
+++ b/src/main/resources/kernels/dtt8x8.h
@@ -72,9 +72,9 @@


 // kernels (not used so far)
-#ifdef BBBB
+#if 0
 extern "C" __global__ void GPU_DTT24_DRV(float *dst, float *src, int src_stride, int dtt_mode);
-#endif// #ifdef BBBB
+#endif// #if 0

 //=========================== 2D functions ===============
 extern __device__ void corrUnfoldTile(

--- a/src/main/resources/kernels/geometry_correction.cu
+++ b/src/main/resources/kernels/geometry_correction.cu
--- a/src/main/resources/kernels/geometry_correction.h
+++ b/src/main/resources/kernels/geometry_correction.h
@@ -41,6 +41,19 @@
 #include "tp_defines.h"
 #endif

+#define NVRTC_BUG 1
+#ifndef M_PI
+#define M_PI  3.14159265358979323846 /* pi */
+#endif
+#ifndef offsetof
+#define offsetof(st, m) \
+    ((size_t)&(((st *)0)->m))
+//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
+#endif
+
+
+#define SCENE_UNITS_SCALE  0.001 // meters from mm
+#define MIN_DISPARITY      0.01  // minimal disparity to try to convert to world coordinates
 struct tp_task {
 	int   task;
 	union {
@@ -61,19 +74,50 @@ struct corr_vector{
 	float imu_rot [3]; // d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15
 	float imu_move[3]; // dx/dt, dy/dt, dz/dt 16..19
 };
+#ifdef NVRTC_BUG
+struct trot_deriv{
+	float rots    [NUM_CAMS][3][3];
+	float d_daz   [NUM_CAMS][3][3];
+	float d_tilt  [NUM_CAMS][3][3];
+	float d_roll  [NUM_CAMS][3][3];
+	float d_zoom  [NUM_CAMS][3][3];
+};
+#else
+union trot_deriv{
+	struct {
+		float rots    [NUM_CAMS][3][3];
+		float d_daz   [NUM_CAMS][3][3];
+		float d_tilt  [NUM_CAMS][3][3];
+		float d_roll  [NUM_CAMS][3][3];
+		float d_zoom  [NUM_CAMS][3][3];
+	};
+	float matrices [5][NUM_CAMS][3][3];
+};
+#endif

 struct gc {
+	float pixelCorrectionWidth; //  =2592;   // virtual camera center is at (pixelCorrectionWidth/2, pixelCorrectionHeight/2)
+	float pixelCorrectionHeight; // =1936;
+	float line_time;        // duration of one scan line readout (for ERS)
 	float focalLength;      // =FOCAL_LENGTH;
 	float pixelSize;        // =  PIXEL_SIZE; //um
 	float distortionRadius; // =  DISTORTION_RADIUS; // mm - half width of the sensor
-
-	float distortionA8;     //r^8 (normalized to focal length or to sensor half width?)
-	float distortionA7;     //r^7 (normalized to focal length or to sensor half width?)
-	float distortionA6;     //r^6 (normalized to focal length or to sensor half width?)
-	float distortionA5;     //r^5 (normalized to focal length or to sensor half width?)
-	float distortionA;      // r^4 (normalized to focal length or to sensor half width?)
-	float distortionB;      // r^3
-	float distortionC;      // r^2
+#ifndef	NVRTC_BUG
+	union {
+		struct {
+#endif
+			float distortionC;      // r^2
+			float distortionB;      // r^3
+			float distortionA;      // r^4 (normalized to focal length or to sensor half width?)
+			float distortionA5;     //r^5 (normalized to focal length or to sensor half width?)
+			float distortionA6;     //r^6 (normalized to focal length or to sensor half width?)
+			float distortionA7;     //r^7 (normalized to focal length or to sensor half width?)
+			float distortionA8;     //r^8 (normalized to focal length or to sensor half width?)
+#ifndef	NVRTC_BUG
+//		};
+//		float rad_coeff [7];
+//	};
+#endif
 	// parameters, common for all sensors
 	float    elevation;     // degrees, up - positive;
 	float    heading;       // degrees, CW (from top) - positive
@@ -81,19 +125,37 @@ struct gc {
 	float forward    [NUM_CAMS];
 	float right      [NUM_CAMS];
 	float height     [NUM_CAMS];
-	float roll       [NUM_CAMS];  // degrees, CW (to target) - positive
-
+	float roll       [NUM_CAMS];    // degrees, CW (to target) - positive
+	float pXY0       [NUM_CAMS][2];
 	float common_right;    // mm right, camera center
 	float common_forward;  // mm forward (to target), camera center
 	float common_height;   // mm up, camera center
 	float common_roll;     // degrees CW (to target) camera as a whole
 //	float [][] XYZ_he;     // all cameras coordinates transformed to eliminate heading and elevation (rolls preserved)
 //	float [][] XYZ_her = null; // XYZ of the lenses in a corrected CCS (adjusted for to elevation, heading,  common_roll)
-	float rXY        [NUM_CAMS][3]; // XY pairs of the in a normal plane, relative to disparityRadius
+	float rXY        [NUM_CAMS][2]; // XY pairs of the in a normal plane, relative to disparityRadius
 //	float [][] rXY_ideal = {{-0.5, -0.5}, {0.5,-0.5}, {-0.5, 0.5}, {0.5,0.5}};
 // only used for the multi-quad systems
 	float cameraRadius; // =0; // average distance from the "mass center" of the sensors to the sensors
 	float disparityRadius; // =150.0; // distance between cameras to normalize disparity units to. sqrt(2)*disparityRadius for quad
 };
+#define RAD_COEFF_LEN 7
+extern "C" __global__ void get_tiles_offsets(
+		struct tp_task     * gpu_tasks,
+		int                  num_tiles,          // number of tiles in task
+		struct gc          * gpu_geometry_correction,
+		struct corr_vector * gpu_correction_vector,
+		float *              gpu_rByRDist, // length should match RBYRDIST_LEN
+		trot_deriv   * gpu_rot_deriv);
+
+#if 0
+// uses 3 threadIdx.x, 3 - threadIdx.y, 4 - threadIdx.z
+extern "C" __global__ void calc_rot_matrices(
+		struct corr_vector * gpu_correction_vector);
+#endif
+// uses NUM_CAMS blocks, (3,3,3) threads
+extern "C" __global__ void calc_rot_deriv(
+		struct corr_vector * gpu_correction_vector,
+		trot_deriv   * gpu_rot_deriv);