Commit abc2d76d authored by Andrey Filippov's avatar Andrey Filippov

modified to be compatible with nvrtc

parent a6844c60
...@@ -322,7 +322,7 @@ __constant__ int offset_tmp = 12; // 12..15 ...@@ -322,7 +322,7 @@ __constant__ int offset_tmp = 12; // 12..15
*/ */
extern "C" __global__ void calc_rot_deriv( extern "C" __global__ void calc_rot_deriv(
struct corr_vector * gpu_correction_vector, struct corr_vector * gpu_correction_vector,
union trot_deriv * gpu_rot_deriv) trot_deriv * gpu_rot_deriv)
{ {
// __shared__ float zoom; // __shared__ float zoom;
__shared__ float sincos [4][2]; // {az,tilt,roll, d_az, d_tilt, d_roll, d_az}{cos,sin} __shared__ float sincos [4][2]; // {az,tilt,roll, d_az, d_tilt, d_roll, d_az}{cos,sin}
...@@ -446,11 +446,21 @@ extern "C" __global__ void calc_rot_deriv( ...@@ -446,11 +446,21 @@ extern "C" __global__ void calc_rot_deriv(
// copy results to global memory // copy results to global memory
int gindx = threadIdx.z; int gindx = threadIdx.z;
int lindx = offset_rots + threadIdx.z; int lindx = offset_rots + threadIdx.z;
#ifdef NVRTC_BUG
// going beyond first dimension
gpu_rot_deriv->rots[ncam + gindx * NUM_CAMS][threadIdx.y][threadIdx.x] = matrices[lindx][threadIdx.y][threadIdx.x];
#else
gpu_rot_deriv->matrices[gindx][ncam][threadIdx.y][threadIdx.x] = matrices[lindx][threadIdx.y][threadIdx.x]; gpu_rot_deriv->matrices[gindx][ncam][threadIdx.y][threadIdx.x] = matrices[lindx][threadIdx.y][threadIdx.x];
#endif
gindx +=3; gindx +=3;
lindx+=3; lindx+=3;
if (lindx < 5) { if (lindx < 5) {
#ifdef NVRTC_BUG
// going beyond first dimension
gpu_rot_deriv->rots[ncam + gindx * NUM_CAMS][threadIdx.y][threadIdx.x] = matrices[lindx][threadIdx.y][threadIdx.x];
#else
gpu_rot_deriv->matrices[gindx][ncam][threadIdx.y][threadIdx.x] = matrices[lindx][threadIdx.y][threadIdx.x]; gpu_rot_deriv->matrices[gindx][ncam][threadIdx.y][threadIdx.x] = matrices[lindx][threadIdx.y][threadIdx.x];
#endif
} }
__syncthreads(); __syncthreads();
#ifdef DEBUG21 #ifdef DEBUG21
...@@ -476,10 +486,13 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -476,10 +486,13 @@ extern "C" __global__ void get_tiles_offsets(
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
struct corr_vector * gpu_correction_vector, struct corr_vector * gpu_correction_vector,
float * gpu_rByRDist, // length should match RBYRDIST_LEN float * gpu_rByRDist, // length should match RBYRDIST_LEN
union trot_deriv * gpu_rot_deriv) trot_deriv * gpu_rot_deriv)
{ {
// int task_num = blockIdx.x * blockDim.x + threadIdx.x; // blockIdx.x * TILES_PER_BLOCK_GEOM + threadIdx.x // int task_num = blockIdx.x * blockDim.x + threadIdx.x; // blockIdx.x * TILES_PER_BLOCK_GEOM + threadIdx.x
int task_num = blockIdx.x * blockDim.y + threadIdx.y; // blockIdx.x * TILES_PER_BLOCK_GEOM + threadIdx.y int task_num = blockIdx.x * blockDim.y + threadIdx.y; // blockIdx.x * TILES_PER_BLOCK_GEOM + threadIdx.y
if (task_num >= num_tiles){
return;
}
int thread_xy = blockDim.x * threadIdx.y + threadIdx.x; int thread_xy = blockDim.x * threadIdx.y + threadIdx.x;
int ncam = threadIdx.x; int ncam = threadIdx.x;
// threadIdx.x - numcam, used for per-camera // threadIdx.x - numcam, used for per-camera
...@@ -645,11 +658,18 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -645,11 +658,18 @@ extern "C" __global__ void get_tiles_offsets(
float rD2rND = 1.0; float rD2rND = 1.0;
{ {
float rri = 1.0; float rri = 1.0;
#ifdef NVRTC_BUG
#pragma unroll #pragma unroll
for (int j = 0; j < RAD_COEFF_LEN; j++){
rri *= ri;
rD2rND += ((float *) &geometry_correction.distortionC)[j]*(rri - 1.0);
}
#else
for (int j = 0; j < sizeof(geometry_correction.rad_coeff)/sizeof(float); j++){ for (int j = 0; j < sizeof(geometry_correction.rad_coeff)/sizeof(float); j++){
rri *= ri; rri *= ri;
rD2rND += geometry_correction.rad_coeff[j]*(rri - 1.0); rD2rND += geometry_correction.rad_coeff[j]*(rri - 1.0);
} }
#endif
} }
// Get port pixel coordinates by scaling the 2d vector with Rdistorted/Dnondistorted coefficient) // Get port pixel coordinates by scaling the 2d vector with Rdistorted/Dnondistorted coefficient)
float pXid = pXci * rD2rND; float pXid = pXci * rD2rND;
...@@ -759,15 +779,23 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -759,15 +779,23 @@ extern "C" __global__ void get_tiles_offsets(
disp_dist[i][3] = dd2.get(1, 1); disp_dist[i][3] = dd2.get(1, 1);
*/ */
//#undef NVRTC_BUG
float drD2rND_dri = 0.0; float drD2rND_dri = 0.0;
{ {
float rri = 1.0; float rri = 1.0;
#ifdef NVRTC_BUG
#pragma unroll
for (int j = 0; j < RAD_COEFF_LEN; j++){
drD2rND_dri += ((float *) &geometry_correction.distortionC)[j] * (j+1) * rri;
rri *= ri;
}
#else
#pragma unroll #pragma unroll
for (int j = 0; j < sizeof(geometry_correction.rad_coeff)/sizeof(float); j++){ for (int j = 0; j < sizeof(geometry_correction.rad_coeff)/sizeof(float); j++){
drD2rND_dri += geometry_correction.rad_coeff[j] * (j+1) * rri; drD2rND_dri += geometry_correction.rad_coeff[j] * (j+1) * rri;
rri *= ri; rri *= ri;
} }
#endif
} }
float scale_distort00 = rD2rND + ri* drD2rND_dri; float scale_distort00 = rD2rND + ri* drD2rND_dri;
float scale_distort11 = rD2rND; float scale_distort11 = rD2rND;
......
...@@ -41,6 +41,17 @@ ...@@ -41,6 +41,17 @@
#include "tp_defines.h" #include "tp_defines.h"
#endif #endif
#define NVRTC_BUG 1
#ifndef M_PI
#define M_PI 3.14159265358979323846 /* pi */
#endif
#ifndef offsetof
#define offsetof(st, m) \
((size_t)&(((st *)0)->m))
//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
#endif
#define SCENE_UNITS_SCALE 0.001 // meters from mm #define SCENE_UNITS_SCALE 0.001 // meters from mm
#define MIN_DISPARITY 0.01 // minimal disparity to try to convert to world coordinates #define MIN_DISPARITY 0.01 // minimal disparity to try to convert to world coordinates
struct tp_task { struct tp_task {
...@@ -63,7 +74,15 @@ struct corr_vector{ ...@@ -63,7 +74,15 @@ struct corr_vector{
float imu_rot [3]; // d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15 float imu_rot [3]; // d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15
float imu_move[3]; // dx/dt, dy/dt, dz/dt 16..19 float imu_move[3]; // dx/dt, dy/dt, dz/dt 16..19
}; };
#ifdef NVRTC_BUG
struct trot_deriv{
float rots [NUM_CAMS][3][3];
float d_daz [NUM_CAMS][3][3];
float d_tilt [NUM_CAMS][3][3];
float d_roll [NUM_CAMS][3][3];
float d_zoom [NUM_CAMS][3][3];
};
#else
union trot_deriv{ union trot_deriv{
struct { struct {
float rots [NUM_CAMS][3][3]; float rots [NUM_CAMS][3][3];
...@@ -74,6 +93,7 @@ union trot_deriv{ ...@@ -74,6 +93,7 @@ union trot_deriv{
}; };
float matrices [5][NUM_CAMS][3][3]; float matrices [5][NUM_CAMS][3][3];
}; };
#endif
struct gc { struct gc {
float pixelCorrectionWidth; // =2592; // virtual camera center is at (pixelCorrectionWidth/2, pixelCorrectionHeight/2) float pixelCorrectionWidth; // =2592; // virtual camera center is at (pixelCorrectionWidth/2, pixelCorrectionHeight/2)
...@@ -82,8 +102,10 @@ struct gc { ...@@ -82,8 +102,10 @@ struct gc {
float focalLength; // =FOCAL_LENGTH; float focalLength; // =FOCAL_LENGTH;
float pixelSize; // = PIXEL_SIZE; //um float pixelSize; // = PIXEL_SIZE; //um
float distortionRadius; // = DISTORTION_RADIUS; // mm - half width of the sensor float distortionRadius; // = DISTORTION_RADIUS; // mm - half width of the sensor
#ifndef NVRTC_BUG
union { union {
struct { struct {
#endif
float distortionC; // r^2 float distortionC; // r^2
float distortionB; // r^3 float distortionB; // r^3
float distortionA; // r^4 (normalized to focal length or to sensor half width?) float distortionA; // r^4 (normalized to focal length or to sensor half width?)
...@@ -91,9 +113,11 @@ struct gc { ...@@ -91,9 +113,11 @@ struct gc {
float distortionA6; //r^6 (normalized to focal length or to sensor half width?) float distortionA6; //r^6 (normalized to focal length or to sensor half width?)
float distortionA7; //r^7 (normalized to focal length or to sensor half width?) float distortionA7; //r^7 (normalized to focal length or to sensor half width?)
float distortionA8; //r^8 (normalized to focal length or to sensor half width?) float distortionA8; //r^8 (normalized to focal length or to sensor half width?)
}; #ifndef NVRTC_BUG
float rad_coeff [7]; // };
}; // float rad_coeff [7];
// };
#endif
// parameters, common for all sensors // parameters, common for all sensors
float elevation; // degrees, up - positive; float elevation; // degrees, up - positive;
float heading; // degrees, CW (from top) - positive float heading; // degrees, CW (from top) - positive
...@@ -115,13 +139,14 @@ struct gc { ...@@ -115,13 +139,14 @@ struct gc {
float cameraRadius; // =0; // average distance from the "mass center" of the sensors to the sensors float cameraRadius; // =0; // average distance from the "mass center" of the sensors to the sensors
float disparityRadius; // =150.0; // distance between cameras to normalize disparity units to. sqrt(2)*disparityRadius for quad float disparityRadius; // =150.0; // distance between cameras to normalize disparity units to. sqrt(2)*disparityRadius for quad
}; };
#define RAD_COEFF_LEN 7
extern "C" __global__ void get_tiles_offsets( extern "C" __global__ void get_tiles_offsets(
struct tp_task * gpu_tasks, struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task int num_tiles, // number of tiles in task
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
struct corr_vector * gpu_correction_vector, struct corr_vector * gpu_correction_vector,
float * gpu_rByRDist, // length should match RBYRDIST_LEN float * gpu_rByRDist, // length should match RBYRDIST_LEN
union trot_deriv * gpu_rot_deriv); trot_deriv * gpu_rot_deriv);
#if 0 #if 0
// uses 3 threadIdx.x, 3 - threadIdx.y, 4 - threadIdx.z // uses 3 threadIdx.x, 3 - threadIdx.y, 4 - threadIdx.z
...@@ -131,6 +156,6 @@ extern "C" __global__ void calc_rot_matrices( ...@@ -131,6 +156,6 @@ extern "C" __global__ void calc_rot_matrices(
// uses NUM_CAMS blocks, (3,3,3) threads // uses NUM_CAMS blocks, (3,3,3) threads
extern "C" __global__ void calc_rot_deriv( extern "C" __global__ void calc_rot_deriv(
struct corr_vector * gpu_correction_vector, struct corr_vector * gpu_correction_vector,
union trot_deriv * gpu_rot_deriv); trot_deriv * gpu_rot_deriv);
...@@ -341,7 +341,7 @@ struct tp_task { ...@@ -341,7 +341,7 @@ struct tp_task {
// static - see https://stackoverflow.com/questions/20253267/segmentation-fault-before-main // static - see https://stackoverflow.com/questions/20253267/segmentation-fault-before-main
static struct tp_task task_data [TILESX*TILESY]; // maximal length - each tile static struct tp_task task_data [TILESX*TILESY]; // maximal length - each tile
static struct tp_task task_data1 [TILESX*TILESY]; // maximal length - each tile static struct tp_task task_data1 [TILESX*TILESY]; // maximal length - each tile
union trot_deriv rot_deriv; trot_deriv rot_deriv;
int corr_indices [NUM_PAIRS*TILESX*TILESY]; int corr_indices [NUM_PAIRS*TILESX*TILESY];
// int texture_indices [TILESX*TILESY]; // int texture_indices [TILESX*TILESY];
int texture_indices [TILESX*TILESYA]; int texture_indices [TILESX*TILESYA];
...@@ -395,7 +395,7 @@ struct tp_task { ...@@ -395,7 +395,7 @@ struct tp_task {
struct gc * gpu_geometry_correction; struct gc * gpu_geometry_correction;
struct corr_vector * gpu_correction_vector; struct corr_vector * gpu_correction_vector;
float * gpu_rByRDist; float * gpu_rByRDist;
union trot_deriv * gpu_rot_deriv; trot_deriv * gpu_rot_deriv;
readFloatsFromFile( readFloatsFromFile(
(float *) &fgeometry_correction, // float * data, // allocated array (float *) &fgeometry_correction, // float * data, // allocated array
...@@ -660,7 +660,13 @@ struct tp_task { ...@@ -660,7 +660,13 @@ struct tp_task {
for (int row = 0; row<3; row++){ for (int row = 0; row<3; row++){
for (int ncam = 0; ncam<NUM_CAMS;ncam++){ for (int ncam = 0; ncam<NUM_CAMS;ncam++){
for (int col = 0; col <3; col++){ for (int col = 0; col <3; col++){
#ifdef NVRTC_BUG
//abuse - exceeding first dimension
printf("%9.6f,",rot_deriv.rots[i*NUM_CAMS+ncam][row][col]);
#else
printf("%9.6f,",rot_deriv.matrices[i][ncam][row][col]); printf("%9.6f,",rot_deriv.matrices[i][ncam][row][col]);
#endif
if (col == 2){ if (col == 2){
if (ncam == (NUM_CAMS-1)){ if (ncam == (NUM_CAMS-1)){
printf("\n"); printf("\n");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment