modified to be compatible with nvrtc

abc2d76d · Andrey Filippov · a6844c60 · abc2d76d · abc2d76d · abc2d76d
Commit abc2d76d authored Apr 13, 2020 by Andrey Filippov
Hide whitespace changes
Inline Side-by-side

Showing with 70 additions and 11 deletions

geometry_correction.cu src/geometry_correction.cu +31 -3

geometry_correction.h src/geometry_correction.h +31 -6

test_tp.cu src/test_tp.cu +8 -2

No files found.
--- a/src/geometry_correction.cu
+++ b/src/geometry_correction.cu
@@ -322,7 +322,7 @@ __constant__ int offset_tmp =      12; // 12..15
 */
 extern "C" __global__ void calc_rot_deriv(
 		struct corr_vector * gpu_correction_vector,
-		union trot_deriv   * gpu_rot_deriv)
+		trot_deriv   * gpu_rot_deriv)
 {
 //	__shared__ float zoom;
 	__shared__ float sincos  [4][2];    // {az,tilt,roll, d_az, d_tilt, d_roll, d_az}{cos,sin}
@@ -446,11 +446,21 @@ extern "C" __global__ void calc_rot_deriv(
 // copy results to global memory
 	int gindx = threadIdx.z;
 	int lindx = offset_rots + threadIdx.z;
+#ifdef NVRTC_BUG
+	// going beyond first dimension
+	gpu_rot_deriv->rots[ncam + gindx * NUM_CAMS][threadIdx.y][threadIdx.x] = matrices[lindx][threadIdx.y][threadIdx.x];
+#else
 	gpu_rot_deriv->matrices[gindx][ncam][threadIdx.y][threadIdx.x] = matrices[lindx][threadIdx.y][threadIdx.x];
+#endif
 	gindx +=3;
 	lindx+=3;
 	if (lindx < 5) {
+#ifdef NVRTC_BUG
+	// going beyond first dimension
+		gpu_rot_deriv->rots[ncam + gindx * NUM_CAMS][threadIdx.y][threadIdx.x] = matrices[lindx][threadIdx.y][threadIdx.x];
+#else
 		gpu_rot_deriv->matrices[gindx][ncam][threadIdx.y][threadIdx.x] = matrices[lindx][threadIdx.y][threadIdx.x];
+#endif
 	}
 	__syncthreads();
 #ifdef DEBUG21
@@ -476,10 +486,13 @@ extern "C" __global__ void get_tiles_offsets(
 		struct gc          * gpu_geometry_correction,
 		struct corr_vector * gpu_correction_vector,
 		float *              gpu_rByRDist,      // length should match RBYRDIST_LEN
-		union trot_deriv   * gpu_rot_deriv)
+		trot_deriv   * gpu_rot_deriv)
 {
 //	int task_num = blockIdx.x * blockDim.x + threadIdx.x; //  blockIdx.x * TILES_PER_BLOCK_GEOM + threadIdx.x
 	int task_num = blockIdx.x * blockDim.y + threadIdx.y; //  blockIdx.x * TILES_PER_BLOCK_GEOM + threadIdx.y
+	if (task_num >= num_tiles){
+		return;
+	}
 	int thread_xy = blockDim.x * threadIdx.y + threadIdx.x;
 	int ncam = threadIdx.x;
 	// threadIdx.x - numcam, used for per-camera
@@ -645,11 +658,18 @@ extern "C" __global__ void get_tiles_offsets(
 	float rD2rND = 1.0;
 	{
 		float rri = 1.0;
+#ifdef NVRTC_BUG
 #pragma unroll
+		for (int j = 0; j < RAD_COEFF_LEN; j++){
+			rri *= ri;
+			rD2rND +=  ((float *) &geometry_correction.distortionC)[j]*(rri - 1.0);
+		}
+#else
 		for (int j = 0; j < sizeof(geometry_correction.rad_coeff)/sizeof(float); j++){
 			rri *= ri;
 			rD2rND += geometry_correction.rad_coeff[j]*(rri - 1.0);
 		}
+#endif
 	}
 	// Get port pixel coordinates by scaling the 2d vector with Rdistorted/Dnondistorted coefficient)
 	float pXid = pXci * rD2rND;
@@ -759,15 +779,23 @@ extern "C" __global__ void get_tiles_offsets(
 				disp_dist[i][3] =   dd2.get(1, 1);
 */
+//#undef NVRTC_BUG
 	float drD2rND_dri = 0.0;
 	{
 		float rri = 1.0;
+#ifdef NVRTC_BUG
+#pragma unroll
+		for (int j = 0; j < RAD_COEFF_LEN; j++){
+			drD2rND_dri += ((float *) &geometry_correction.distortionC)[j] * (j+1) * rri;
+			rri *= ri;
+		}
+#else
 #pragma unroll
 		for (int j = 0; j < sizeof(geometry_correction.rad_coeff)/sizeof(float); j++){
 			drD2rND_dri += geometry_correction.rad_coeff[j] * (j+1) * rri;
 			rri *= ri;
 		}
+#endif
 	}
 	float scale_distort00 = rD2rND + ri* drD2rND_dri;
 	float scale_distort11 = rD2rND;

--- a/src/geometry_correction.h
+++ b/src/geometry_correction.h
@@ -41,6 +41,17 @@
 #include "tp_defines.h"
 #endif
+#define NVRTC_BUG 1
+#ifndef M_PI
+#define M_PI  3.14159265358979323846 /* pi */
+#endif
+#ifndef offsetof
+#define offsetof(st, m) \
+    ((size_t)&(((st *)0)->m))
+//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
+#endif
 #define SCENE_UNITS_SCALE  0.001 // meters from mm
 #define MIN_DISPARITY      0.01  // minimal disparity to try to convert to world coordinates
 struct tp_task {
@@ -63,7 +74,15 @@ struct corr_vector{
 	float imu_rot [3]; // d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15
 	float imu_move[3]; // dx/dt, dy/dt, dz/dt 16..19
 };
+#ifdef NVRTC_BUG
+struct trot_deriv{
+	float rots    [NUM_CAMS][3][3];
+	float d_daz   [NUM_CAMS][3][3];
+	float d_tilt  [NUM_CAMS][3][3];
+	float d_roll  [NUM_CAMS][3][3];
+	float d_zoom  [NUM_CAMS][3][3];
+};
+#else
 union trot_deriv{
 	struct {
 		float rots    [NUM_CAMS][3][3];
@@ -74,6 +93,7 @@ union trot_deriv{
 	};
 	float matrices [5][NUM_CAMS][3][3];
 };
+#endif
 struct gc {
 	float pixelCorrectionWidth; //  =2592;   // virtual camera center is at (pixelCorrectionWidth/2, pixelCorrectionHeight/2)
@@ -82,8 +102,10 @@ struct gc {
 	float focalLength;      // =FOCAL_LENGTH;
 	float pixelSize;        // =  PIXEL_SIZE; //um
 	float distortionRadius; // =  DISTORTION_RADIUS; // mm - half width of the sensor
+#ifndef	NVRTC_BUG
 	union {
 		struct {
+#endif
 			float distortionC;      // r^2
 			float distortionB;      // r^3
 			float distortionA;      // r^4 (normalized to focal length or to sensor half width?)
@@ -91,9 +113,11 @@ struct gc {
 			float distortionA6;     //r^6 (normalized to focal length or to sensor half width?)
 			float distortionA7;     //r^7 (normalized to focal length or to sensor half width?)
 			float distortionA8;     //r^8 (normalized to focal length or to sensor half width?)
-		};
+#ifndef	NVRTC_BUG
-		float rad_coeff [7];
+//		};
-	};
+//		float rad_coeff [7];
+//	};
+#endif
 	// parameters, common for all sensors
 	float    elevation;     // degrees, up - positive;
 	float    heading;       // degrees, CW (from top) - positive
@@ -115,13 +139,14 @@ struct gc {
 	float cameraRadius; // =0; // average distance from the "mass center" of the sensors to the sensors
 	float disparityRadius; // =150.0; // distance between cameras to normalize disparity units to. sqrt(2)*disparityRadius for quad
 };
+#define RAD_COEFF_LEN 7
 extern "C" __global__ void get_tiles_offsets(
 		struct tp_task     * gpu_tasks,
 		int                  num_tiles,          // number of tiles in task
 		struct gc          * gpu_geometry_correction,
 		struct corr_vector * gpu_correction_vector,
 		float *              gpu_rByRDist, // length should match RBYRDIST_LEN
-		union trot_deriv   * gpu_rot_deriv);
+		trot_deriv   * gpu_rot_deriv);
 #if 0
 // uses 3 threadIdx.x, 3 - threadIdx.y, 4 - threadIdx.z
@@ -131,6 +156,6 @@ extern "C" __global__ void calc_rot_matrices(
 // uses NUM_CAMS blocks, (3,3,3) threads
 extern "C" __global__ void calc_rot_deriv(
 		struct corr_vector * gpu_correction_vector,
-		union trot_deriv   * gpu_rot_deriv);
+		trot_deriv   * gpu_rot_deriv);
--- a/src/test_tp.cu
+++ b/src/test_tp.cu
@@ -341,7 +341,7 @@ struct tp_task {
 // static - see https://stackoverflow.com/questions/20253267/segmentation-fault-before-main
    static struct tp_task     task_data  [TILESX*TILESY]; // maximal length - each tile
    static struct tp_task     task_data1 [TILESX*TILESY]; // maximal length - each tile
-    union  trot_deriv  rot_deriv;
+    trot_deriv  rot_deriv;
    int                corr_indices         [NUM_PAIRS*TILESX*TILESY];
 //    int                texture_indices      [TILESX*TILESY];
    int                texture_indices      [TILESX*TILESYA];
@@ -395,7 +395,7 @@ struct tp_task {
    struct gc          * gpu_geometry_correction;
    struct corr_vector * gpu_correction_vector;
    float              * gpu_rByRDist;
-    union trot_deriv   * gpu_rot_deriv;
+    trot_deriv   * gpu_rot_deriv;
    readFloatsFromFile(
    		(float *) &fgeometry_correction, // float * data, // allocated array
@@ -660,7 +660,13 @@ struct tp_task {
 		for (int row = 0; row<3; row++){
 			for (int ncam = 0; ncam<NUM_CAMS;ncam++){
 				for (int col = 0; col <3; col++){
+#ifdef NVRTC_BUG
+					//abuse - exceeding first dimension
+					printf("%9.6f,",rot_deriv.rots[i*NUM_CAMS+ncam][row][col]);
+#else
 					printf("%9.6f,",rot_deriv.matrices[i][ncam][row][col]);
+#endif
 					if (col == 2){
 						if (ncam == (NUM_CAMS-1)){
 							printf("\n");