Commit 3c033192 authored by Andrey Filippov's avatar Andrey Filippov

updated kernels

parent 514057c6
...@@ -41,6 +41,18 @@ ...@@ -41,6 +41,18 @@
#include "tp_defines.h" #include "tp_defines.h"
#endif #endif
extern "C"
__global__ void convert_correct_tiles(
float ** gpu_kernel_offsets, // [NUM_CAMS],
float ** gpu_kernels, // [NUM_CAMS],
float ** gpu_images, // [NUM_CAMS],
struct tp_task * gpu_tasks,
float ** gpu_clt, // [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t dstride, // in floats (pixels)
int num_tiles, // number of tiles in task
int lpf_mask); // apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
extern "C" __global__ void clear_texture_list( extern "C" __global__ void clear_texture_list(
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7) int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
int width, // <= TILESX, use for faster processing of LWIR images int width, // <= TILESX, use for faster processing of LWIR images
...@@ -68,11 +80,11 @@ extern "C" __global__ void clear_texture_rbga( ...@@ -68,11 +80,11 @@ extern "C" __global__ void clear_texture_rbga(
const size_t texture_rbga_stride, // in floats 8*stride const size_t texture_rbga_stride, // in floats 8*stride
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
extern "C" __global__ void textures_accumulate( extern "C" __global__ void textures_accumulate(
// int border_tile, // if 1 - watch for border
int * woi, // x, y, width,height int * woi, // x, y, width,height
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t num_texture_tiles, // number of texture tiles to process size_t num_texture_tiles, // number of texture tiles to process
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7) int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
// TODO: use geometry_correction rXY !
float * gpu_port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate float * gpu_port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction int is_lwir, // do not perform shot correction
...@@ -102,5 +114,35 @@ extern "C" __global__ void imclt_rbg( ...@@ -102,5 +114,35 @@ extern "C" __global__ void imclt_rbg(
int h_offset, int h_offset,
const size_t dstride); // in floats (pixels) const size_t dstride); // in floats (pixels)
extern "C"
__global__ void generate_RBGA(
// Parameters to generate texture tasks
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task list
// declare arrays in device code?
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
int * num_texture_tiles, // number of texture tiles to process (8 separate elements for accumulation)
int * woi, // x,y,width,height of the woi
int width, // <= TILESX, use for faster processing of LWIR images (should be actual + 1)
int height, // <= TILESY, use for faster processing of LWIR images
// Parameters for the texture generation
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY !
float * gpu_port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate
int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction
float min_shot, // 10.0
float scale_shot, // 3.0
float diff_sigma, // pixel value/pixel change
float diff_threshold, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weight0, // scale for R
float weight1, // scale for B
float weight2, // scale for G
int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed)
const size_t texture_rbga_stride, // in floats
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
...@@ -72,9 +72,9 @@ ...@@ -72,9 +72,9 @@
// kernels (not used so far) // kernels (not used so far)
#ifdef BBBB #if 0
extern "C" __global__ void GPU_DTT24_DRV(float *dst, float *src, int src_stride, int dtt_mode); extern "C" __global__ void GPU_DTT24_DRV(float *dst, float *src, int src_stride, int dtt_mode);
#endif// #ifdef BBBB #endif// #if 0
//=========================== 2D functions =============== //=========================== 2D functions ===============
extern __device__ void corrUnfoldTile( extern __device__ void corrUnfoldTile(
......
This diff is collapsed.
...@@ -41,6 +41,19 @@ ...@@ -41,6 +41,19 @@
#include "tp_defines.h" #include "tp_defines.h"
#endif #endif
#define NVRTC_BUG 1
#ifndef M_PI
#define M_PI 3.14159265358979323846 /* pi */
#endif
#ifndef offsetof
#define offsetof(st, m) \
((size_t)&(((st *)0)->m))
//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
#endif
#define SCENE_UNITS_SCALE 0.001 // meters from mm
#define MIN_DISPARITY 0.01 // minimal disparity to try to convert to world coordinates
struct tp_task { struct tp_task {
int task; int task;
union { union {
...@@ -61,19 +74,50 @@ struct corr_vector{ ...@@ -61,19 +74,50 @@ struct corr_vector{
float imu_rot [3]; // d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15 float imu_rot [3]; // d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15
float imu_move[3]; // dx/dt, dy/dt, dz/dt 16..19 float imu_move[3]; // dx/dt, dy/dt, dz/dt 16..19
}; };
#ifdef NVRTC_BUG
struct trot_deriv{
float rots [NUM_CAMS][3][3];
float d_daz [NUM_CAMS][3][3];
float d_tilt [NUM_CAMS][3][3];
float d_roll [NUM_CAMS][3][3];
float d_zoom [NUM_CAMS][3][3];
};
#else
union trot_deriv{
struct {
float rots [NUM_CAMS][3][3];
float d_daz [NUM_CAMS][3][3];
float d_tilt [NUM_CAMS][3][3];
float d_roll [NUM_CAMS][3][3];
float d_zoom [NUM_CAMS][3][3];
};
float matrices [5][NUM_CAMS][3][3];
};
#endif
struct gc { struct gc {
float pixelCorrectionWidth; // =2592; // virtual camera center is at (pixelCorrectionWidth/2, pixelCorrectionHeight/2)
float pixelCorrectionHeight; // =1936;
float line_time; // duration of one scan line readout (for ERS)
float focalLength; // =FOCAL_LENGTH; float focalLength; // =FOCAL_LENGTH;
float pixelSize; // = PIXEL_SIZE; //um float pixelSize; // = PIXEL_SIZE; //um
float distortionRadius; // = DISTORTION_RADIUS; // mm - half width of the sensor float distortionRadius; // = DISTORTION_RADIUS; // mm - half width of the sensor
#ifndef NVRTC_BUG
float distortionA8; //r^8 (normalized to focal length or to sensor half width?) union {
float distortionA7; //r^7 (normalized to focal length or to sensor half width?) struct {
float distortionA6; //r^6 (normalized to focal length or to sensor half width?) #endif
float distortionA5; //r^5 (normalized to focal length or to sensor half width?)
float distortionA; // r^4 (normalized to focal length or to sensor half width?)
float distortionB; // r^3
float distortionC; // r^2 float distortionC; // r^2
float distortionB; // r^3
float distortionA; // r^4 (normalized to focal length or to sensor half width?)
float distortionA5; //r^5 (normalized to focal length or to sensor half width?)
float distortionA6; //r^6 (normalized to focal length or to sensor half width?)
float distortionA7; //r^7 (normalized to focal length or to sensor half width?)
float distortionA8; //r^8 (normalized to focal length or to sensor half width?)
#ifndef NVRTC_BUG
// };
// float rad_coeff [7];
// };
#endif
// parameters, common for all sensors // parameters, common for all sensors
float elevation; // degrees, up - positive; float elevation; // degrees, up - positive;
float heading; // degrees, CW (from top) - positive float heading; // degrees, CW (from top) - positive
...@@ -82,18 +126,36 @@ struct gc { ...@@ -82,18 +126,36 @@ struct gc {
float right [NUM_CAMS]; float right [NUM_CAMS];
float height [NUM_CAMS]; float height [NUM_CAMS];
float roll [NUM_CAMS]; // degrees, CW (to target) - positive float roll [NUM_CAMS]; // degrees, CW (to target) - positive
float pXY0 [NUM_CAMS][2];
float common_right; // mm right, camera center float common_right; // mm right, camera center
float common_forward; // mm forward (to target), camera center float common_forward; // mm forward (to target), camera center
float common_height; // mm up, camera center float common_height; // mm up, camera center
float common_roll; // degrees CW (to target) camera as a whole float common_roll; // degrees CW (to target) camera as a whole
// float [][] XYZ_he; // all cameras coordinates transformed to eliminate heading and elevation (rolls preserved) // float [][] XYZ_he; // all cameras coordinates transformed to eliminate heading and elevation (rolls preserved)
// float [][] XYZ_her = null; // XYZ of the lenses in a corrected CCS (adjusted for to elevation, heading, common_roll) // float [][] XYZ_her = null; // XYZ of the lenses in a corrected CCS (adjusted for to elevation, heading, common_roll)
float rXY [NUM_CAMS][3]; // XY pairs of the in a normal plane, relative to disparityRadius float rXY [NUM_CAMS][2]; // XY pairs of the in a normal plane, relative to disparityRadius
// float [][] rXY_ideal = {{-0.5, -0.5}, {0.5,-0.5}, {-0.5, 0.5}, {0.5,0.5}}; // float [][] rXY_ideal = {{-0.5, -0.5}, {0.5,-0.5}, {-0.5, 0.5}, {0.5,0.5}};
// only used for the multi-quad systems // only used for the multi-quad systems
float cameraRadius; // =0; // average distance from the "mass center" of the sensors to the sensors float cameraRadius; // =0; // average distance from the "mass center" of the sensors to the sensors
float disparityRadius; // =150.0; // distance between cameras to normalize disparity units to. sqrt(2)*disparityRadius for quad float disparityRadius; // =150.0; // distance between cameras to normalize disparity units to. sqrt(2)*disparityRadius for quad
}; };
#define RAD_COEFF_LEN 7
extern "C" __global__ void get_tiles_offsets(
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task
struct gc * gpu_geometry_correction,
struct corr_vector * gpu_correction_vector,
float * gpu_rByRDist, // length should match RBYRDIST_LEN
trot_deriv * gpu_rot_deriv);
#if 0
// uses 3 threadIdx.x, 3 - threadIdx.y, 4 - threadIdx.z
extern "C" __global__ void calc_rot_matrices(
struct corr_vector * gpu_correction_vector);
#endif
// uses NUM_CAMS blocks, (3,3,3) threads
extern "C" __global__ void calc_rot_deriv(
struct corr_vector * gpu_correction_vector,
trot_deriv * gpu_rot_deriv);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment