Commit fa4f3beb authored by Andrey Filippov's avatar Andrey Filippov

updated GPU code

parent 15cff9c7
...@@ -7053,7 +7053,7 @@ private Panel panel1, ...@@ -7053,7 +7053,7 @@ private Panel panel1,
} }
dpixels[i] = d; dpixels[i] = d;
} }
if (disparity_max > 0) { if (log_mode && (disparity_max > 0)) {
mn = 0.0; mn = 0.0;
double d = disparity_max; double d = disparity_max;
if (d < 0.0) { // if (d < 0.0) { //
......
...@@ -1019,24 +1019,9 @@ public class CLTPass3d{ ...@@ -1019,24 +1019,9 @@ public class CLTPass3d{
double step_threshold, double step_threshold,
double min_disparity, double min_disparity,
double max_disparity, double max_disparity,
// double strength_floor,
// double strength_pow,
double stBlurSigma, double stBlurSigma,
boolean smplMode, // = true; // Use sample mode (false - regular tile mode) boolean smplMode, // = true; // Use sample mode (false - regular tile mode)
MeasuredLayersFilterParameters mlfp, MeasuredLayersFilterParameters mlfp,
// int smplSide, // = 2; // Sample size (side of a square)
// int smplNum, // = 3; // Number after removing worst
// double smplRms, // = 0.1; // Maximal RMS of the remaining tiles in a sample
// boolean smplWnd, // use window functions for the samples
// double max_abs_tilt, // 2.0; // pix per tile
// double max_rel_tilt, // 0.2; // (pix / disparity) per tile
// double damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data
// double min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity
// double transition, // 1.0; // Mode transition range (between tilted and maximal disparity)
// int far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity)
// double far_power, // 3.0; // Raise disparity to this power before averaging for far objects
int measSel) int measSel)
{ {
this.superTiles = new SuperTiles( this.superTiles = new SuperTiles(
...@@ -1046,23 +1031,9 @@ public class CLTPass3d{ ...@@ -1046,23 +1031,9 @@ public class CLTPass3d{
step_threshold, step_threshold,
min_disparity, min_disparity,
max_disparity, max_disparity,
// strength_floor,
// strength_pow,
stBlurSigma, stBlurSigma,
smplMode, // = true; // Use sample mode (false - regular tile mode) smplMode, // = true; // Use sample mode (false - regular tile mode)
mlfp, mlfp,
// smplSide, // = 2; // Sample size (side of a square)
// smplNum, // = 3; // Number after removing worst
// smplRms, // = 0.1; // Maximal RMS of the remaining tiles in a sample
// smplWnd, // final boolean smplWnd, // use window functions for the samples
// max_abs_tilt, // 2.0; // Maximal absolute tilt in pixels/tile
// max_rel_tilt, // 0.2; // Maximal relative tilt in pixels/tile/disparity
// damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data
// min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity
// transition, // 1.0; // Mode transition range (between tilted and maximal disparity)
// far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity)
// far_power, // 1.0; // Raise disparity to this power before averaging for far objects
// true, // boolean null_if_none,
measSel); measSel);
return this.superTiles; return this.superTiles;
} }
...@@ -1072,19 +1043,6 @@ public class CLTPass3d{ ...@@ -1072,19 +1043,6 @@ public class CLTPass3d{
boolean smplMode, // = true; // Use sample mode (false - regular tile mode) boolean smplMode, // = true; // Use sample mode (false - regular tile mode)
MeasuredLayersFilterParameters mlfp, MeasuredLayersFilterParameters mlfp,
// int smplSide, // = 2; // Sample size (side of a square)
// int smplNum, // = 3; // Number after removing worst
// double smplRms, // = 0.1; // Maximal RMS of the remaining tiles in a sample
// boolean smplWnd, // use window functions for the samples
// double max_abs_tilt, // 2.0; // pix per tile
// double max_rel_tilt, // 0.2; // (pix / disparity) per tile
// double damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data
// double min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity
// double transition, // 1.0; // Mode transition range (between tilted and maximal disparity)
// int far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity)
// double far_power, // 3.0; // Raise disparity to this power before averaging for far objects
int measSel) int measSel)
{ {
if (this.superTiles == null){ if (this.superTiles == null){
...@@ -1096,19 +1054,6 @@ public class CLTPass3d{ ...@@ -1096,19 +1054,6 @@ public class CLTPass3d{
smplMode, // = true; // Use sample mode (false - regular tile mode) smplMode, // = true; // Use sample mode (false - regular tile mode)
mlfp, mlfp,
// smplSide, // = 2; // Sample size (side of a square)
// smplNum, // = 3; // Number after removing worst
// smplRms, // = 0.1; // Maximal RMS of the remaining tiles in a sample
// smplWnd, // use window functions for the samples
// max_abs_tilt, // 2.0; // Maximal absolute tilt in pixels/tile
// max_rel_tilt, // 0.2; // Maximal relative tilt in pixels/tile/disparity
// damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data
// min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity
// transition, // 1.0; // Mode transition range (between tilted and maximal disparity)
// far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity)
// far_power, // 1.0; // Raise disparity to this power before averaging for far objects
measSel); measSel);
} }
......
...@@ -104,24 +104,6 @@ public class SuperTiles{ ...@@ -104,24 +104,6 @@ public class SuperTiles{
double stBlurSigma, double stBlurSigma,
boolean smplMode, // = true; // Use sample mode (false - regular tile mode) boolean smplMode, // = true; // Use sample mode (false - regular tile mode)
MeasuredLayersFilterParameters mlfp, MeasuredLayersFilterParameters mlfp,
// double strength_floor,
// double strength_pow,
// boolean smplMode, // = true; // Use sample mode (false - regular tile mode)
// int smplSide, // = 2; // Sample size (side of a square)
// int smplNum, // = 3; // Number after removing worst
// double smplRms, // = 0.1; // Maximal RMS of the remaining tiles in a sample
// boolean smplWnd, // use window functions for the samples
// double max_abs_tilt, // 2.0; // pix per tile
// double max_rel_tilt, // 0.2; // (pix / disparity) per tile
// double damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data
// double min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity
// double transition, // 1.0; // Mode transition range (between tilted and maximal disparity)
// int far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity)
// double far_power, // 3.0; // Raise disparity to this power before averaging for far objects
// boolean null_if_none,
int measSel) int measSel)
{ {
this.cltPass3d = cltPass3d; this.cltPass3d = cltPass3d;
...@@ -135,21 +117,6 @@ public class SuperTiles{ ...@@ -135,21 +117,6 @@ public class SuperTiles{
this.smplMode = smplMode; // Use sample mode (false - regular tile mode) this.smplMode = smplMode; // Use sample mode (false - regular tile mode)
this.mlfp = mlfp.clone(); this.mlfp = mlfp.clone();
// this.strength_floor = strength_floor;
// this.strength_pow = strength_pow;
// this.smplSide = smplSide; // Sample size (side of a square)
// this.smplNum = smplNum; // Number after removing worst
// this.smplRms = smplRms; // Maximal RMS of the remaining tiles in a sample
// this.max_abs_tilt = max_abs_tilt;
// this.max_rel_tilt = max_rel_tilt;
// this.damp_tilt = damp_tilt;
// this.min_tilt_disp = min_tilt_disp;
// this.transition = transition;
// this.far_mode = far_mode;
// this.far_power = far_power;
// this.smplWnd = smplWnd; // Use window functions for the samples
this.measSel = measSel; this.measSel = measSel;
this.step_threshold_near = this.step_threshold_far * step_near / this.step_far ; this.step_threshold_near = this.step_threshold_far * step_near / this.step_far ;
this.bin_far = this.step_threshold_far / this.step_far; this.bin_far = this.step_threshold_far / this.step_far;
...@@ -197,18 +164,6 @@ public class SuperTiles{ ...@@ -197,18 +164,6 @@ public class SuperTiles{
null, // boolean [][] tile_sel, // null or per-measurement layer, per-tile selection. For each layer null - do not use, {} - use all null, // boolean [][] tile_sel, // null or per-measurement layer, per-tile selection. For each layer null - do not use, {} - use all
smplMode, // final boolean smplMode, // = true; // Use sample mode (false - regular tile mode) smplMode, // final boolean smplMode, // = true; // Use sample mode (false - regular tile mode)
mlfp, mlfp,
// smplSide, // final int smplSide, // = 2; // Sample size (side of a square)
// smplNum, // final int smplNum, // = 3; // Number after removing worst
// smplRms, // final double smplRms, // = 0.1; // Maximal RMS of the remaining tiles in a sample
// smplWnd, // final boolean smplWnd, // use window functions for the samples
// max_abs_tilt, // 2.0; // Maximal absolute tilt in pixels/tile
// max_rel_tilt, // 0.2; // Maximal relative tilt in pixels/tile/disparity
// damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data
// min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity
// transition, // 1.0; // Mode transition range (between tilted and maximal disparity)
// far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity)
// far_power, // 1.0; // Raise disparity to this power before averaging for far objects
measSel); // calculate and blur supertiles (for all, not just selected?) measSel); // calculate and blur supertiles (for all, not just selected?)
if (tileProcessor.globalDebugLevel > 0){ if (tileProcessor.globalDebugLevel > 0){
......
...@@ -6796,22 +6796,9 @@ ImageDtt.startAndJoin(threads); ...@@ -6796,22 +6796,9 @@ ImageDtt.startAndJoin(threads);
clt_parameters.stStepThreshold, // double step_threshold, clt_parameters.stStepThreshold, // double step_threshold,
clt_parameters.stMinDisparity, // double min_disparity, clt_parameters.stMinDisparity, // double min_disparity,
clt_parameters.grow_disp_max, // double max_disparity, clt_parameters.grow_disp_max, // double max_disparity,
// clt_parameters.stFloor, // double strength_floor,
// clt_parameters.stPow, // double strength_pow,
0.0, // NO BLUR double stBlurSigma) 0.0, // NO BLUR double stBlurSigma)
false, //clt_parameters.stSmplMode, // Use sample mode (false - regular tile mode) false, //clt_parameters.stSmplMode, // Use sample mode (false - regular tile mode)
clt_parameters.mlfp, // Filter parameters clt_parameters.mlfp, // Filter parameters
// clt_parameters.stSmplSide, // Sample size (side of a square)
// clt_parameters.stSmplNum, // Number after removing worst
// clt_parameters.stSmplRms, // Maximal RMS of the remaining tiles in a sample
// clt_parameters.stSmplWnd, // boolean smplWnd, // use window functions for the samples
// clt_parameters.fs_max_abs_tilt, // 2.0; // Maximal absolute tilt in pixels/tile
// clt_parameters.fs_max_rel_tilt, // 0.2; // Maximal relative tilt in pixels/tile/disparity
// clt_parameters.fs_damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data
// clt_parameters.fs_min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity
// clt_parameters.fs_transition, // 1.0; // Mode transition range (between tilted and maximal disparity)
// clt_parameters.fs_far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity)
// clt_parameters.fs_far_power, // 1.0; // Raise disparity to this power before averaging for far objects
clt_parameters.stMeasSel); // bitmask of the selected measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert clt_parameters.stMeasSel); // bitmask of the selected measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert
dbg_hist[0] = scan_prev.getSuperTiles().showDisparityHistogram(); dbg_hist[0] = scan_prev.getSuperTiles().showDisparityHistogram();
scan_prev.setSuperTiles( scan_prev.setSuperTiles(
...@@ -6820,22 +6807,9 @@ ImageDtt.startAndJoin(threads); ...@@ -6820,22 +6807,9 @@ ImageDtt.startAndJoin(threads);
clt_parameters.stStepThreshold, // double step_threshold, clt_parameters.stStepThreshold, // double step_threshold,
clt_parameters.stMinDisparity, // double min_disparity, clt_parameters.stMinDisparity, // double min_disparity,
clt_parameters.grow_disp_max, // double max_disparity, clt_parameters.grow_disp_max, // double max_disparity,
// clt_parameters.stFloor, // double strength_floor,
// clt_parameters.stPow, // double strength_pow,
0.0, // NO BLUR double stBlurSigma) 0.0, // NO BLUR double stBlurSigma)
clt_parameters.stSmplMode, // Use sample mode (false - regular tile mode) clt_parameters.stSmplMode, // Use sample mode (false - regular tile mode)
clt_parameters.mlfp, // Filter parameters clt_parameters.mlfp, // Filter parameters
// clt_parameters.stSmplSide, // Sample size (side of a square)
// clt_parameters.stSmplNum, // Number after removing worst
// clt_parameters.stSmplRms, // Maximal RMS of the remaining tiles in a sample
// clt_parameters.stSmplWnd, // boolean smplWnd, // use window functions for the samples
// clt_parameters.fs_max_abs_tilt, // 2.0; // Maximal absolute tilt in pixels/tile
// clt_parameters.fs_max_rel_tilt, // 0.2; // Maximal relative tilt in pixels/tile/disparity
// clt_parameters.fs_damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data
// clt_parameters.fs_min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity
// clt_parameters.fs_transition, // 1.0; // Mode transition range (between tilted and maximal disparity)
// clt_parameters.fs_far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity)
// clt_parameters.fs_far_power, // 1.0; // Raise disparity to this power before averaging for far objects
clt_parameters.stMeasSel); // bitmask of the selected measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert clt_parameters.stMeasSel); // bitmask of the selected measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert
dbg_hist[1] = scan_prev.getSuperTiles().showDisparityHistogram(); dbg_hist[1] = scan_prev.getSuperTiles().showDisparityHistogram();
} }
...@@ -6847,22 +6821,9 @@ ImageDtt.startAndJoin(threads); ...@@ -6847,22 +6821,9 @@ ImageDtt.startAndJoin(threads);
clt_parameters.stStepThreshold, // double step_threshold, clt_parameters.stStepThreshold, // double step_threshold,
clt_parameters.stMinDisparity, // double min_disparity, clt_parameters.stMinDisparity, // double min_disparity,
clt_parameters.grow_disp_max, // double max_disparity, clt_parameters.grow_disp_max, // double max_disparity,
// clt_parameters.stFloor, // double strength_floor,
// clt_parameters.stPow, // double strength_pow,
clt_parameters.stSigma, // with blur double stBlurSigma) clt_parameters.stSigma, // with blur double stBlurSigma)
false, //clt_parameters.stSmplMode, // Use sample mode (false - regular tile mode) false, //clt_parameters.stSmplMode, // Use sample mode (false - regular tile mode)
clt_parameters.mlfp, // Filter parameters clt_parameters.mlfp, // Filter parameters
// clt_parameters.stSmplSide, // Sample size (side of a square)
// clt_parameters.stSmplNum, // Number after removing worst
// clt_parameters.stSmplRms, // Maximal RMS of the remaining tiles in a sample
// clt_parameters.stSmplWnd, // boolean smplWnd, // use window functions for the samples
// clt_parameters.fs_max_abs_tilt, // 2.0; // Maximal absolute tilt in pixels/tile
// clt_parameters.fs_max_rel_tilt, // 0.2; // Maximal relative tilt in pixels/tile/disparity
// clt_parameters.fs_damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data
// clt_parameters.fs_min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity
// clt_parameters.fs_transition, // 1.0; // Mode transition range (between tilted and maximal disparity)
// clt_parameters.fs_far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity)
// clt_parameters.fs_far_power, // 1.0; // Raise disparity to this power before averaging for far objects
clt_parameters.stMeasSel); // bitmask of the selected measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert clt_parameters.stMeasSel); // bitmask of the selected measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert
if (show_st) { // otherwise only blured version is needed if (show_st) { // otherwise only blured version is needed
dbg_hist[2] = scan_prev.getSuperTiles().showDisparityHistogram(); dbg_hist[2] = scan_prev.getSuperTiles().showDisparityHistogram();
...@@ -6884,22 +6845,9 @@ ImageDtt.startAndJoin(threads); ...@@ -6884,22 +6845,9 @@ ImageDtt.startAndJoin(threads);
clt_parameters.stStepThreshold, // double step_threshold, clt_parameters.stStepThreshold, // double step_threshold,
clt_parameters.stMinDisparity, // double min_disparity, clt_parameters.stMinDisparity, // double min_disparity,
clt_parameters.grow_disp_max, // double max_disparity, clt_parameters.grow_disp_max, // double max_disparity,
// clt_parameters.stFloor, // double strength_floor,
// clt_parameters.stPow, // double strength_pow,
0.0, // NO BLUR double stBlurSigma) 0.0, // NO BLUR double stBlurSigma)
clt_parameters.stSmplMode, // Use sample mode (false - regular tile mode) clt_parameters.stSmplMode, // Use sample mode (false - regular tile mode)
clt_parameters.mlfp, // Filter parameters clt_parameters.mlfp, // Filter parameters
// clt_parameters.stSmplSide, // Sample size (side of a square)
// clt_parameters.stSmplNum, // Number after removing worst
// clt_parameters.stSmplRms, // Maximal RMS of the remaining tiles in a sample
// clt_parameters.stSmplWnd, // boolean smplWnd, // use window functions for the samples
// clt_parameters.fs_max_abs_tilt, // 2.0; // Maximal absolute tilt in pixels/tile
// clt_parameters.fs_max_rel_tilt, // 0.2; // Maximal relative tilt in pixels/tile/disparity
// clt_parameters.fs_damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data
// clt_parameters.fs_min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity
// clt_parameters.fs_transition, // 1.0; // Mode transition range (between tilted and maximal disparity)
// clt_parameters.fs_far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity)
// clt_parameters.fs_far_power, // 1.0; // Raise disparity to this power before averaging for far objects
clt_parameters.stMeasSel); // bitmask of the selected measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert clt_parameters.stMeasSel); // bitmask of the selected measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert
} }
...@@ -7063,22 +7011,6 @@ ImageDtt.startAndJoin(threads); ...@@ -7063,22 +7011,6 @@ ImageDtt.startAndJoin(threads);
debugLevel, // final int debugLevel) debugLevel, // final int debugLevel)
clt_parameters.tileX, clt_parameters.tileX,
clt_parameters.tileY); clt_parameters.tileY);
/*
if (clt_parameters.plSplitApply) {
while (true) {
int num_added = 0;
num_added += st.fillSquares();
if (debugLevel > -1) {
System.out.println("after fillSquares() added "+num_added);
}
num_added += st.cutCorners();
if (debugLevel > -1) {
System.out.println("after plCutCorners() added (cumulative) "+num_added);
}
if (num_added == 0) break;
}
}
*/
double [][][] dispStrength = st.getDisparityStrengths( double [][][] dispStrength = st.getDisparityStrengths(
clt_parameters.stMeasSel); // int stMeasSel) // = 1; // Select measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert) clt_parameters.stMeasSel); // int stMeasSel) // = 1; // Select measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert)
boolean [][] tileSel = st.getMeasurementSelections( boolean [][] tileSel = st.getMeasurementSelections(
...@@ -7199,25 +7131,6 @@ ImageDtt.startAndJoin(threads); ...@@ -7199,25 +7131,6 @@ ImageDtt.startAndJoin(threads);
clt_parameters.tileY); clt_parameters.tileY);
} // if (clt_parameters.plSplitApply) } // if (clt_parameters.plSplitApply)
/*
while (true) {
int num_added = 0;
if (clt_parameters.plFillSquares){
num_added += st.fillSquares();
}
if (debugLevel > -1) {
System.out.println("after fillSquares() added "+num_added);
}
if (clt_parameters.plCutCorners){
num_added += st.cutCorners();
}
if (debugLevel > -1) {
System.out.println("after plCutCorners() added (cumulative) "+num_added);
}
if (num_added == 0) break;
}
*/
int max_num_tries = 20; int max_num_tries = 20;
if (clt_parameters.plIterations > 0) { if (clt_parameters.plIterations > 0) {
......
...@@ -403,6 +403,16 @@ __constant__ float lpf_corr[64]={ // modify if needed ...@@ -403,6 +403,16 @@ __constant__ float lpf_corr[64]={ // modify if needed
0.02728573f, 0.02374977f, 0.01799322f, 0.01186582f, 0.00681327f, 0.00341565f, 0.00153247f, 0.00074451f 0.02728573f, 0.02374977f, 0.01799322f, 0.01186582f, 0.00681327f, 0.00341565f, 0.00153247f, 0.00074451f
}; };
__constant__ float LoG_corr[64]={ // modify if needed high-pass filter before correlation to fit into float range
1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f,
1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f,
1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f,
1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f,
1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f,
1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f,
1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f,
1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f
};
__constant__ int pairs[6][2]={ __constant__ int pairs[6][2]={
{0, 1}, {0, 1},
...@@ -1086,6 +1096,22 @@ extern "C" __global__ void correlate2D_inner( ...@@ -1086,6 +1096,22 @@ extern "C" __global__ void correlate2D_inner(
float * clt_tile1i = clt_tile1 + threadIdx.x; float * clt_tile1i = clt_tile1 + threadIdx.x;
float * clt_tile2i = clt_tile2 + threadIdx.x; float * clt_tile2i = clt_tile2 + threadIdx.x;
#pragma unroll #pragma unroll
#define USE_LOG
#ifdef USE_LOG
// Apply high-pass filter to correlation inputs to reduce dynamic range before multiplication
for (int q = 0; q < 4; q++){
float *log = LoG_corr + threadIdx.x;
for (int i = 0; i < DTT_SIZE; i++){ // copy 32 rows (4 quadrants of 8 rows)
*clt_tile1i= (*gpu_tile1) * (*log);
*clt_tile2i= (*gpu_tile2) * (*log);
clt_tile1i += DTT_SIZE1;
clt_tile2i += DTT_SIZE1;
gpu_tile1 += DTT_SIZE;
gpu_tile2 += DTT_SIZE;
log += DTT_SIZE;
}
}
#else
for (int i = 0; i < DTT_SIZE4; i++){ // copy 32 rows (4 quadrants of 8 rows) for (int i = 0; i < DTT_SIZE4; i++){ // copy 32 rows (4 quadrants of 8 rows)
*clt_tile1i= *gpu_tile1; *clt_tile1i= *gpu_tile1;
*clt_tile2i= *gpu_tile2; *clt_tile2i= *gpu_tile2;
...@@ -1094,6 +1120,7 @@ extern "C" __global__ void correlate2D_inner( ...@@ -1094,6 +1120,7 @@ extern "C" __global__ void correlate2D_inner(
gpu_tile1 += DTT_SIZE; gpu_tile1 += DTT_SIZE;
gpu_tile2 += DTT_SIZE; gpu_tile2 += DTT_SIZE;
} }
#endif //USE_LOG
__syncthreads(); __syncthreads();
#ifdef DBG_TILE #ifdef DBG_TILE
#ifdef DEBUG6 #ifdef DEBUG6
......
...@@ -281,6 +281,32 @@ extern "C" __global__ void calc_rot_deriv( ...@@ -281,6 +281,32 @@ extern "C" __global__ void calc_rot_deriv(
} }
extern "C" __global__ void calculate_tiles_offsets(
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task
struct gc * gpu_geometry_correction,
struct corr_vector * gpu_correction_vector,
float * gpu_rByRDist, // length should match RBYRDIST_LEN
trot_deriv * gpu_rot_deriv)
{
dim3 threads_geom(NUM_CAMS,TILES_PER_BLOCK_GEOM, 1);
dim3 grid_geom ((num_tiles+TILES_PER_BLOCK_GEOM-1)/TILES_PER_BLOCK_GEOM, 1, 1);
if (threadIdx.x == 0) { // always 1
get_tiles_offsets<<<grid_geom,threads_geom>>> (
gpu_tasks, // struct tp_task * gpu_tasks,
num_tiles, // int num_tiles, // number of tiles in task list
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
gpu_correction_vector, // struct corr_vector * gpu_correction_vector,
gpu_rByRDist, // float * gpu_rByRDist) // length should match RBYRDIST_LEN
gpu_rot_deriv); // union trot_deriv * gpu_rot_deriv);
}
// __syncthreads();// __syncwarp();
// cudaDeviceSynchronize();
// cudaDeviceSynchronize();
}
/* /*
* blockDim.x = NUM_CAMS * blockDim.x = NUM_CAMS
* blockDim.y = TILES_PER_BLOCK_GEOM * blockDim.y = TILES_PER_BLOCK_GEOM
...@@ -295,12 +321,7 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -295,12 +321,7 @@ extern "C" __global__ void get_tiles_offsets(
trot_deriv * gpu_rot_deriv) trot_deriv * gpu_rot_deriv)
{ {
int task_num = blockIdx.x * blockDim.y + threadIdx.y; // blockIdx.x * TILES_PER_BLOCK_GEOM + threadIdx.y int task_num = blockIdx.x * blockDim.y + threadIdx.y; // blockIdx.x * TILES_PER_BLOCK_GEOM + threadIdx.y
if (task_num >= num_tiles){
return;
}
int thread_xy = blockDim.x * threadIdx.y + threadIdx.x; int thread_xy = blockDim.x * threadIdx.y + threadIdx.x;
int ncam = threadIdx.x;
// threadIdx.x - numcam, used for per-camera
__shared__ struct gc geometry_correction; __shared__ struct gc geometry_correction;
__shared__ float rByRDist [RBYRDIST_LEN]; __shared__ float rByRDist [RBYRDIST_LEN];
__shared__ struct corr_vector extrinsic_corr; __shared__ struct corr_vector extrinsic_corr;
...@@ -355,6 +376,10 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -355,6 +376,10 @@ extern "C" __global__ void get_tiles_offsets(
} }
} }
__syncthreads(); __syncthreads();
int ncam = threadIdx.x;
if (task_num >= num_tiles){
return;
}
int imu_exists = // todo - calculate once with rot_deriv? int imu_exists = // todo - calculate once with rot_deriv?
(extrinsic_corr.imu_rot[0] != 0.0) || (extrinsic_corr.imu_rot[0] != 0.0) ||
(extrinsic_corr.imu_rot[1] != 0.0) || (extrinsic_corr.imu_rot[1] != 0.0) ||
...@@ -418,7 +443,7 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -418,7 +443,7 @@ extern "C" __global__ void get_tiles_offsets(
xyz[0] = SCENE_UNITS_SCALE * pXc * geometry_correction.disparityRadius / disparity; xyz[0] = SCENE_UNITS_SCALE * pXc * geometry_correction.disparityRadius / disparity;
xyz[1] = -SCENE_UNITS_SCALE * pYc * geometry_correction.disparityRadius / disparity; xyz[1] = -SCENE_UNITS_SCALE * pYc * geometry_correction.disparityRadius / disparity;
// next radial distortion coefficients are for this, not master camera (may be the same) // next radial distortion coefficients are for this, not master camera (may be the same)
// geometry_correction.rad_coeff[i]; // geometry_correction.rad_coeff[i];
float fl_pix = geometry_correction.focalLength/(0.001 * geometry_correction.pixelSize); // focal length in pixels - this camera float fl_pix = geometry_correction.focalLength/(0.001 * geometry_correction.pixelSize); // focal length in pixels - this camera
float ri_scale = 0.001 * geometry_correction.pixelSize / geometry_correction.distortionRadius; float ri_scale = 0.001 * geometry_correction.pixelSize / geometry_correction.distortionRadius;
...@@ -486,7 +511,7 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -486,7 +511,7 @@ extern "C" __global__ void get_tiles_offsets(
float pYid = pYci * rD2rND; float pYid = pYci * rD2rND;
pXY[0] = pXid + geometry_correction.pXY0[ncam][0]; pXY[0] = pXid + geometry_correction.pXY0[ncam][0];
pXY[1] = pYid + geometry_correction.pXY0[ncam][1]; pXY[1] = pYid + geometry_correction.pXY0[ncam][1];
// new for ERS // new for ERS
pY_offsets[threadIdx.y][ncam] = pXY[1] - geometry_correction.woi_tops[ncam]; pY_offsets[threadIdx.y][ncam] = pXY[1] - geometry_correction.woi_tops[ncam];
__syncthreads(); __syncthreads();
// Each thread re-calculate same sum // Each thread re-calculate same sum
...@@ -511,9 +536,6 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -511,9 +536,6 @@ extern "C" __global__ void get_tiles_offsets(
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
#endif // DEBUG21 #endif // DEBUG21
// float rvi[3];
float drvi_daz [3]; // drvi_daz = deriv_rots[i][0].times(vi); float drvi_daz [3]; // drvi_daz = deriv_rots[i][0].times(vi);
float drvi_dtl [3]; // drvi_dtl = deriv_rots[i][1].times(vi); float drvi_dtl [3]; // drvi_dtl = deriv_rots[i][1].times(vi);
float drvi_drl [3]; // drvi_drl = deriv_rots[i][2].times(vi); float drvi_drl [3]; // drvi_drl = deriv_rots[i][2].times(vi);
...@@ -547,7 +569,7 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -547,7 +569,7 @@ extern "C" __global__ void get_tiles_offsets(
float disp_dist[4]; // only for this channel, to be copied to global gpu_tasks in the end float disp_dist[4]; // only for this channel, to be copied to global gpu_tasks in the end
float dpXci_pYci_imu_lin[2][3]; float dpXci_pYci_imu_lin[2][3];
/* /*
double [][] add0 = { double [][] add0 = {
{-rXY[i][0], rXY[i][1], 0.0}, {-rXY[i][0], rXY[i][1], 0.0},
{-rXY[i][1], -rXY[i][0], 0.0}, {-rXY[i][1], -rXY[i][0], 0.0},
...@@ -570,12 +592,11 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -570,12 +592,11 @@ extern "C" __global__ void get_tiles_offsets(
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
#endif // DEBUG21 #endif // DEBUG21
// now first column of 2x2 dd1 - x, y components of derivatives by disparity, second column - derivatives by ortho to disparity (~Y in 2d correlation) // now first column of 2x2 dd1 - x, y components of derivatives by disparity, second column - derivatives by ortho to disparity (~Y in 2d correlation)
// unity vector in the direction of radius // unity vector in the direction of radius
float c_dist = pXci/rNDi; float c_dist = pXci/rNDi;
float s_dist = pYci/rNDi; float s_dist = pYci/rNDi;
//#undef NVRTC_BUG //#undef NVRTC_BUG
float drD2rND_dri = 0.0; float drD2rND_dri = 0.0;
{ {
float rri = 1.0; float rri = 1.0;
...@@ -618,28 +639,16 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -618,28 +639,16 @@ extern "C" __global__ void get_tiles_offsets(
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
#endif // DEBUG21 #endif // DEBUG21
gpu_tasks[task_num].disp_dist[ncam][0] = disp_dist[0]; gpu_tasks[task_num].disp_dist[ncam][0] = disp_dist[0];
gpu_tasks[task_num].disp_dist[ncam][1] = disp_dist[1]; gpu_tasks[task_num].disp_dist[ncam][1] = disp_dist[1];
gpu_tasks[task_num].disp_dist[ncam][2] = disp_dist[2]; gpu_tasks[task_num].disp_dist[ncam][2] = disp_dist[2];
gpu_tasks[task_num].disp_dist[ncam][3] = disp_dist[3]; gpu_tasks[task_num].disp_dist[ncam][3] = disp_dist[3];
// imu = extrinsic_corr.getIMU(i); // currently it is common for all channels // imu = extrinsic_corr.getIMU(i); // currently it is common for all channels
// float imu_rot [3]; // d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15 // float imu_rot [3]; // d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15
// float imu_move[3]; // dx/dt, dy/dt, dz/dt 16..19 geometry_correction.imu_move // float imu_move[3]; // dx/dt, dy/dt, dz/dt 16..19 geometry_correction.imu_move
// ERS linear does not yet use per-port rotations, probably not needed // ERS linear does not yet use per-port rotations, probably not needed
if (imu_exists){ if (imu_exists){
/*
float delta_t = disp_dist[2] * disparity * geometry_correction.line_time; // positive for top cameras, negative - for bottom //disp_dist[2]=dd2.get(1, 0)
float ers_Xci = delta_t * (
dpXci_dtilt * extrinsic_corr.imu_rot[0] +
dpXci_dazimuth * extrinsic_corr.imu_rot[1] +
dpXci_droll * extrinsic_corr.imu_rot[2]);
float ers_Yci = delta_t* (
dpYci_dtilt * extrinsic_corr.imu_rot[0] +
dpYci_dazimuth * extrinsic_corr.imu_rot[1] +
dpYci_droll * extrinsic_corr.imu_rot[2]);
*/
float ers_x = float ers_x =
dpXci_dtilt * extrinsic_corr.imu_rot[0] + dpXci_dtilt * extrinsic_corr.imu_rot[0] +
dpXci_dazimuth * extrinsic_corr.imu_rot[1] + dpXci_dazimuth * extrinsic_corr.imu_rot[1] +
...@@ -649,11 +658,8 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -649,11 +658,8 @@ extern "C" __global__ void get_tiles_offsets(
dpYci_dazimuth * extrinsic_corr.imu_rot[1] + dpYci_dazimuth * extrinsic_corr.imu_rot[1] +
dpYci_droll * extrinsic_corr.imu_rot[2]; dpYci_droll * extrinsic_corr.imu_rot[2];
#ifdef DEBUG21 #ifdef DEBUG21
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){ if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
// printf("delta_t = %f, ers_Xci = %f, ers_Yci = %f\n", delta_t, ers_Xci, ers_Yci);
printf("ers_x = %f, ers_y = %f\n", ers_x, ers_y); printf("ers_x = %f, ers_y = %f\n", ers_x, ers_y);
} }
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
...@@ -665,15 +671,8 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -665,15 +671,8 @@ extern "C" __global__ void get_tiles_offsets(
dpXci_pYci_imu_lin[0][0] = -wdisparity / k; // dpx/ dworld_X dpXci_pYci_imu_lin[0][0] = -wdisparity / k; // dpx/ dworld_X
dpXci_pYci_imu_lin[1][1] = wdisparity / k; // dpy/ dworld_Y dpXci_pYci_imu_lin[1][1] = wdisparity / k; // dpy/ dworld_Y
dpXci_pYci_imu_lin[0][2] = (xyz[0] / k) * dwdisp_dz; // dpx/ dworld_Z dpXci_pYci_imu_lin[0][2] = (xyz[0] / k) * dwdisp_dz; // dpx/ dworld_Z
dpXci_pYci_imu_lin[1][2] = (xyz[1] / k) * dwdisp_dz; // dpy/ dworld_Z //// dpXci_pYci_imu_lin[1][2] = (xyz[1] / k) * dwdisp_dz; // dpy/ dworld_Z
/* dpXci_pYci_imu_lin[1][2] = -(xyz[1] / k) * dwdisp_dz; // dpy/ dworld_Z
ers_Xci += delta_t* (
dpXci_pYci_imu_lin[0][0] * extrinsic_corr.imu_move[0] +
dpXci_pYci_imu_lin[0][2] * extrinsic_corr.imu_move[2]);
ers_Yci += delta_t* (
dpXci_pYci_imu_lin[1][1] * extrinsic_corr.imu_move[1] +
dpXci_pYci_imu_lin[1][2] * extrinsic_corr.imu_move[2]);
*/
ers_x += dpXci_pYci_imu_lin[0][0] * extrinsic_corr.imu_move[0] + ers_x += dpXci_pYci_imu_lin[0][0] * extrinsic_corr.imu_move[0] +
dpXci_pYci_imu_lin[0][2] * extrinsic_corr.imu_move[2]; dpXci_pYci_imu_lin[0][2] * extrinsic_corr.imu_move[2];
ers_y += dpXci_pYci_imu_lin[1][1] * extrinsic_corr.imu_move[1] + ers_y += dpXci_pYci_imu_lin[1][1] * extrinsic_corr.imu_move[1] +
...@@ -700,8 +699,6 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -700,8 +699,6 @@ extern "C" __global__ void get_tiles_offsets(
// copy results to global memory pXY, disp_dist // copy results to global memory pXY, disp_dist
gpu_tasks[task_num].xy[ncam][0] = pXY[0]; gpu_tasks[task_num].xy[ncam][0] = pXY[0];
gpu_tasks[task_num].xy[ncam][1] = pXY[1]; gpu_tasks[task_num].xy[ncam][1] = pXY[1];
} }
extern "C" __global__ void calcReverseDistortionTable( extern "C" __global__ void calcReverseDistortionTable(
......
...@@ -149,6 +149,15 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -149,6 +149,15 @@ extern "C" __global__ void get_tiles_offsets(
float * gpu_rByRDist, // length should match RBYRDIST_LEN float * gpu_rByRDist, // length should match RBYRDIST_LEN
trot_deriv * gpu_rot_deriv); trot_deriv * gpu_rot_deriv);
extern "C" __global__ void calculate_tiles_offsets(
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task
struct gc * gpu_geometry_correction,
struct corr_vector * gpu_correction_vector,
float * gpu_rByRDist, // length should match RBYRDIST_LEN
trot_deriv * gpu_rot_deriv);
// uses NUM_CAMS blocks, (3,3,3) threads // uses NUM_CAMS blocks, (3,3,3) threads
extern "C" __global__ void calc_rot_deriv( extern "C" __global__ void calc_rot_deriv(
struct corr_vector * gpu_correction_vector, struct corr_vector * gpu_correction_vector,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment