Commit abc2d76d authored by Andrey Filippov's avatar Andrey Filippov

modified to be compatible with nvrtc

parent a6844c60
......@@ -322,7 +322,7 @@ __constant__ int offset_tmp = 12; // 12..15
*/
extern "C" __global__ void calc_rot_deriv(
struct corr_vector * gpu_correction_vector,
union trot_deriv * gpu_rot_deriv)
trot_deriv * gpu_rot_deriv)
{
// __shared__ float zoom;
__shared__ float sincos [4][2]; // {az,tilt,roll, d_az, d_tilt, d_roll, d_az}{cos,sin}
......@@ -446,11 +446,21 @@ extern "C" __global__ void calc_rot_deriv(
// copy results to global memory
int gindx = threadIdx.z;
int lindx = offset_rots + threadIdx.z;
#ifdef NVRTC_BUG
// going beyond first dimension
gpu_rot_deriv->rots[ncam + gindx * NUM_CAMS][threadIdx.y][threadIdx.x] = matrices[lindx][threadIdx.y][threadIdx.x];
#else
gpu_rot_deriv->matrices[gindx][ncam][threadIdx.y][threadIdx.x] = matrices[lindx][threadIdx.y][threadIdx.x];
#endif
gindx +=3;
lindx+=3;
if (lindx < 5) {
#ifdef NVRTC_BUG
// going beyond first dimension
gpu_rot_deriv->rots[ncam + gindx * NUM_CAMS][threadIdx.y][threadIdx.x] = matrices[lindx][threadIdx.y][threadIdx.x];
#else
gpu_rot_deriv->matrices[gindx][ncam][threadIdx.y][threadIdx.x] = matrices[lindx][threadIdx.y][threadIdx.x];
#endif
}
__syncthreads();
#ifdef DEBUG21
......@@ -476,10 +486,13 @@ extern "C" __global__ void get_tiles_offsets(
struct gc * gpu_geometry_correction,
struct corr_vector * gpu_correction_vector,
float * gpu_rByRDist, // length should match RBYRDIST_LEN
union trot_deriv * gpu_rot_deriv)
trot_deriv * gpu_rot_deriv)
{
// int task_num = blockIdx.x * blockDim.x + threadIdx.x; // blockIdx.x * TILES_PER_BLOCK_GEOM + threadIdx.x
int task_num = blockIdx.x * blockDim.y + threadIdx.y; // blockIdx.x * TILES_PER_BLOCK_GEOM + threadIdx.y
if (task_num >= num_tiles){
return;
}
int thread_xy = blockDim.x * threadIdx.y + threadIdx.x;
int ncam = threadIdx.x;
// threadIdx.x - numcam, used for per-camera
......@@ -645,11 +658,18 @@ extern "C" __global__ void get_tiles_offsets(
float rD2rND = 1.0;
{
float rri = 1.0;
#ifdef NVRTC_BUG
#pragma unroll
for (int j = 0; j < RAD_COEFF_LEN; j++){
rri *= ri;
rD2rND += ((float *) &geometry_correction.distortionC)[j]*(rri - 1.0);
}
#else
for (int j = 0; j < sizeof(geometry_correction.rad_coeff)/sizeof(float); j++){
rri *= ri;
rD2rND += geometry_correction.rad_coeff[j]*(rri - 1.0);
}
#endif
}
// Get port pixel coordinates by scaling the 2d vector with Rdistorted/Dnondistorted coefficient)
float pXid = pXci * rD2rND;
......@@ -759,15 +779,23 @@ extern "C" __global__ void get_tiles_offsets(
disp_dist[i][3] = dd2.get(1, 1);
*/
//#undef NVRTC_BUG
float drD2rND_dri = 0.0;
{
float rri = 1.0;
#ifdef NVRTC_BUG
#pragma unroll
for (int j = 0; j < RAD_COEFF_LEN; j++){
drD2rND_dri += ((float *) &geometry_correction.distortionC)[j] * (j+1) * rri;
rri *= ri;
}
#else
#pragma unroll
for (int j = 0; j < sizeof(geometry_correction.rad_coeff)/sizeof(float); j++){
drD2rND_dri += geometry_correction.rad_coeff[j] * (j+1) * rri;
rri *= ri;
}
#endif
}
float scale_distort00 = rD2rND + ri* drD2rND_dri;
float scale_distort11 = rD2rND;
......
......@@ -41,6 +41,17 @@
#include "tp_defines.h"
#endif
#define NVRTC_BUG 1
#ifndef M_PI
#define M_PI 3.14159265358979323846 /* pi */
#endif
#ifndef offsetof
#define offsetof(st, m) \
((size_t)&(((st *)0)->m))
//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
#endif
#define SCENE_UNITS_SCALE 0.001 // meters from mm
#define MIN_DISPARITY 0.01 // minimal disparity to try to convert to world coordinates
struct tp_task {
......@@ -63,7 +74,15 @@ struct corr_vector{
float imu_rot [3]; // d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15
float imu_move[3]; // dx/dt, dy/dt, dz/dt 16..19
};
#ifdef NVRTC_BUG
struct trot_deriv{
float rots [NUM_CAMS][3][3];
float d_daz [NUM_CAMS][3][3];
float d_tilt [NUM_CAMS][3][3];
float d_roll [NUM_CAMS][3][3];
float d_zoom [NUM_CAMS][3][3];
};
#else
union trot_deriv{
struct {
float rots [NUM_CAMS][3][3];
......@@ -74,6 +93,7 @@ union trot_deriv{
};
float matrices [5][NUM_CAMS][3][3];
};
#endif
struct gc {
float pixelCorrectionWidth; // =2592; // virtual camera center is at (pixelCorrectionWidth/2, pixelCorrectionHeight/2)
......@@ -82,8 +102,10 @@ struct gc {
float focalLength; // =FOCAL_LENGTH;
float pixelSize; // = PIXEL_SIZE; //um
float distortionRadius; // = DISTORTION_RADIUS; // mm - half width of the sensor
#ifndef NVRTC_BUG
union {
struct {
#endif
float distortionC; // r^2
float distortionB; // r^3
float distortionA; // r^4 (normalized to focal length or to sensor half width?)
......@@ -91,9 +113,11 @@ struct gc {
float distortionA6; //r^6 (normalized to focal length or to sensor half width?)
float distortionA7; //r^7 (normalized to focal length or to sensor half width?)
float distortionA8; //r^8 (normalized to focal length or to sensor half width?)
};
float rad_coeff [7];
};
#ifndef NVRTC_BUG
// };
// float rad_coeff [7];
// };
#endif
// parameters, common for all sensors
float elevation; // degrees, up - positive;
float heading; // degrees, CW (from top) - positive
......@@ -115,13 +139,14 @@ struct gc {
float cameraRadius; // =0; // average distance from the "mass center" of the sensors to the sensors
float disparityRadius; // =150.0; // distance between cameras to normalize disparity units to. sqrt(2)*disparityRadius for quad
};
#define RAD_COEFF_LEN 7
extern "C" __global__ void get_tiles_offsets(
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task
struct gc * gpu_geometry_correction,
struct corr_vector * gpu_correction_vector,
float * gpu_rByRDist, // length should match RBYRDIST_LEN
union trot_deriv * gpu_rot_deriv);
trot_deriv * gpu_rot_deriv);
#if 0
// uses 3 threadIdx.x, 3 - threadIdx.y, 4 - threadIdx.z
......@@ -131,6 +156,6 @@ extern "C" __global__ void calc_rot_matrices(
// uses NUM_CAMS blocks, (3,3,3) threads
extern "C" __global__ void calc_rot_deriv(
struct corr_vector * gpu_correction_vector,
union trot_deriv * gpu_rot_deriv);
trot_deriv * gpu_rot_deriv);
......@@ -341,7 +341,7 @@ struct tp_task {
// static - see https://stackoverflow.com/questions/20253267/segmentation-fault-before-main
static struct tp_task task_data [TILESX*TILESY]; // maximal length - each tile
static struct tp_task task_data1 [TILESX*TILESY]; // maximal length - each tile
union trot_deriv rot_deriv;
trot_deriv rot_deriv;
int corr_indices [NUM_PAIRS*TILESX*TILESY];
// int texture_indices [TILESX*TILESY];
int texture_indices [TILESX*TILESYA];
......@@ -395,7 +395,7 @@ struct tp_task {
struct gc * gpu_geometry_correction;
struct corr_vector * gpu_correction_vector;
float * gpu_rByRDist;
union trot_deriv * gpu_rot_deriv;
trot_deriv * gpu_rot_deriv;
readFloatsFromFile(
(float *) &fgeometry_correction, // float * data, // allocated array
......@@ -660,7 +660,13 @@ struct tp_task {
for (int row = 0; row<3; row++){
for (int ncam = 0; ncam<NUM_CAMS;ncam++){
for (int col = 0; col <3; col++){
#ifdef NVRTC_BUG
//abuse - exceeding first dimension
printf("%9.6f,",rot_deriv.rots[i*NUM_CAMS+ncam][row][col]);
#else
printf("%9.6f,",rot_deriv.matrices[i][ncam][row][col]);
#endif
if (col == 2){
if (ncam == (NUM_CAMS-1)){
printf("\n");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment