diff --git a/src/main/java/com/elphel/imagej/correction/Eyesis_Correction.java b/src/main/java/com/elphel/imagej/correction/Eyesis_Correction.java index 93d99a07263f0cec69b7ba93571f27c5f64cd0d2..2bf1ed129386a553ad8556e845c85e2e0318f941 100644 --- a/src/main/java/com/elphel/imagej/correction/Eyesis_Correction.java +++ b/src/main/java/com/elphel/imagej/correction/Eyesis_Correction.java @@ -7053,7 +7053,7 @@ private Panel panel1, } dpixels[i] = d; } - if (disparity_max > 0) { + if (log_mode && (disparity_max > 0)) { mn = 0.0; double d = disparity_max; if (d < 0.0) { // diff --git a/src/main/java/com/elphel/imagej/tileprocessor/CLTPass3d.java b/src/main/java/com/elphel/imagej/tileprocessor/CLTPass3d.java index 7cdcfec6a48aefc6f21f3a5005dbd4e6be8e6ceb..047601d0a13a791eae01e78526f8633dbb87b760 100644 --- a/src/main/java/com/elphel/imagej/tileprocessor/CLTPass3d.java +++ b/src/main/java/com/elphel/imagej/tileprocessor/CLTPass3d.java @@ -1019,24 +1019,9 @@ public class CLTPass3d{ double step_threshold, double min_disparity, double max_disparity, -// double strength_floor, -// double strength_pow, double stBlurSigma, boolean smplMode, // = true; // Use sample mode (false - regular tile mode) MeasuredLayersFilterParameters mlfp, -// int smplSide, // = 2; // Sample size (side of a square) -// int smplNum, // = 3; // Number after removing worst -// double smplRms, // = 0.1; // Maximal RMS of the remaining tiles in a sample -// boolean smplWnd, // use window functions for the samples - -// double max_abs_tilt, // 2.0; // pix per tile -// double max_rel_tilt, // 0.2; // (pix / disparity) per tile -// double damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data -// double min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity -// double transition, // 1.0; // Mode transition range (between tilted and maximal disparity) -// int far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity) -// double far_power, // 3.0; // Raise disparity to this power before averaging for far objects - int measSel) { this.superTiles = new SuperTiles( @@ -1046,23 +1031,9 @@ public class CLTPass3d{ step_threshold, min_disparity, max_disparity, -// strength_floor, -// strength_pow, stBlurSigma, smplMode, // = true; // Use sample mode (false - regular tile mode) mlfp, -// smplSide, // = 2; // Sample size (side of a square) -// smplNum, // = 3; // Number after removing worst -// smplRms, // = 0.1; // Maximal RMS of the remaining tiles in a sample -// smplWnd, // final boolean smplWnd, // use window functions for the samples -// max_abs_tilt, // 2.0; // Maximal absolute tilt in pixels/tile -// max_rel_tilt, // 0.2; // Maximal relative tilt in pixels/tile/disparity -// damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data -// min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity -// transition, // 1.0; // Mode transition range (between tilted and maximal disparity) -// far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity) -// far_power, // 1.0; // Raise disparity to this power before averaging for far objects -// true, // boolean null_if_none, measSel); return this.superTiles; } @@ -1072,19 +1043,6 @@ public class CLTPass3d{ boolean smplMode, // = true; // Use sample mode (false - regular tile mode) MeasuredLayersFilterParameters mlfp, -// int smplSide, // = 2; // Sample size (side of a square) -// int smplNum, // = 3; // Number after removing worst -// double smplRms, // = 0.1; // Maximal RMS of the remaining tiles in a sample -// boolean smplWnd, // use window functions for the samples - -// double max_abs_tilt, // 2.0; // pix per tile -// double max_rel_tilt, // 0.2; // (pix / disparity) per tile -// double damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data -// double min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity -// double transition, // 1.0; // Mode transition range (between tilted and maximal disparity) -// int far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity) -// double far_power, // 3.0; // Raise disparity to this power before averaging for far objects - int measSel) { if (this.superTiles == null){ @@ -1096,19 +1054,6 @@ public class CLTPass3d{ smplMode, // = true; // Use sample mode (false - regular tile mode) mlfp, -// smplSide, // = 2; // Sample size (side of a square) -// smplNum, // = 3; // Number after removing worst -// smplRms, // = 0.1; // Maximal RMS of the remaining tiles in a sample -// smplWnd, // use window functions for the samples - -// max_abs_tilt, // 2.0; // Maximal absolute tilt in pixels/tile -// max_rel_tilt, // 0.2; // Maximal relative tilt in pixels/tile/disparity -// damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data -// min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity -// transition, // 1.0; // Mode transition range (between tilted and maximal disparity) -// far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity) -// far_power, // 1.0; // Raise disparity to this power before averaging for far objects - measSel); } diff --git a/src/main/java/com/elphel/imagej/tileprocessor/SuperTiles.java b/src/main/java/com/elphel/imagej/tileprocessor/SuperTiles.java index a302bfacf3174638536943a215ec74ea94be7103..e3c9fffd0848fd4ef8a2737c6b501f377f93ac11 100644 --- a/src/main/java/com/elphel/imagej/tileprocessor/SuperTiles.java +++ b/src/main/java/com/elphel/imagej/tileprocessor/SuperTiles.java @@ -104,24 +104,6 @@ public class SuperTiles{ double stBlurSigma, boolean smplMode, // = true; // Use sample mode (false - regular tile mode) MeasuredLayersFilterParameters mlfp, - -// double strength_floor, -// double strength_pow, -// boolean smplMode, // = true; // Use sample mode (false - regular tile mode) -// int smplSide, // = 2; // Sample size (side of a square) -// int smplNum, // = 3; // Number after removing worst -// double smplRms, // = 0.1; // Maximal RMS of the remaining tiles in a sample -// boolean smplWnd, // use window functions for the samples - -// double max_abs_tilt, // 2.0; // pix per tile -// double max_rel_tilt, // 0.2; // (pix / disparity) per tile -// double damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data -// double min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity -// double transition, // 1.0; // Mode transition range (between tilted and maximal disparity) -// int far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity) -// double far_power, // 3.0; // Raise disparity to this power before averaging for far objects -// boolean null_if_none, - int measSel) { this.cltPass3d = cltPass3d; @@ -135,21 +117,6 @@ public class SuperTiles{ this.smplMode = smplMode; // Use sample mode (false - regular tile mode) this.mlfp = mlfp.clone(); -// this.strength_floor = strength_floor; -// this.strength_pow = strength_pow; -// this.smplSide = smplSide; // Sample size (side of a square) -// this.smplNum = smplNum; // Number after removing worst -// this.smplRms = smplRms; // Maximal RMS of the remaining tiles in a sample -// this.max_abs_tilt = max_abs_tilt; -// this.max_rel_tilt = max_rel_tilt; -// this.damp_tilt = damp_tilt; -// this.min_tilt_disp = min_tilt_disp; -// this.transition = transition; -// this.far_mode = far_mode; -// this.far_power = far_power; -// this.smplWnd = smplWnd; // Use window functions for the samples - - this.measSel = measSel; this.step_threshold_near = this.step_threshold_far * step_near / this.step_far ; this.bin_far = this.step_threshold_far / this.step_far; @@ -197,18 +164,6 @@ public class SuperTiles{ null, // boolean [][] tile_sel, // null or per-measurement layer, per-tile selection. For each layer null - do not use, {} - use all smplMode, // final boolean smplMode, // = true; // Use sample mode (false - regular tile mode) mlfp, -// smplSide, // final int smplSide, // = 2; // Sample size (side of a square) -// smplNum, // final int smplNum, // = 3; // Number after removing worst -// smplRms, // final double smplRms, // = 0.1; // Maximal RMS of the remaining tiles in a sample -// smplWnd, // final boolean smplWnd, // use window functions for the samples - -// max_abs_tilt, // 2.0; // Maximal absolute tilt in pixels/tile -// max_rel_tilt, // 0.2; // Maximal relative tilt in pixels/tile/disparity -// damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data -// min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity -// transition, // 1.0; // Mode transition range (between tilted and maximal disparity) -// far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity) -// far_power, // 1.0; // Raise disparity to this power before averaging for far objects measSel); // calculate and blur supertiles (for all, not just selected?) if (tileProcessor.globalDebugLevel > 0){ diff --git a/src/main/java/com/elphel/imagej/tileprocessor/TileProcessor.java b/src/main/java/com/elphel/imagej/tileprocessor/TileProcessor.java index aed6cc3d4c9beea61880a363ef94caa7dac05868..3d1b9c4e7f8d38a6e54a9ff9d91a9756396ea44a 100644 --- a/src/main/java/com/elphel/imagej/tileprocessor/TileProcessor.java +++ b/src/main/java/com/elphel/imagej/tileprocessor/TileProcessor.java @@ -6774,11 +6774,11 @@ ImageDtt.startAndJoin(threads); //====================== public void showPlanes( - CLTParameters clt_parameters, + CLTParameters clt_parameters, GeometryCorrection geometryCorrection, - final int threadsMax, // maximal number of threads to launch - final boolean updateStatus, - final int debugLevel) + final int threadsMax, // maximal number of threads to launch + final boolean updateStatus, + final int debugLevel) { final boolean batch_mode = clt_parameters.batch_run; //disable any debug images trimCLTPasses(); // make possible to run this method multiple time - remove extra passes added by it last time @@ -6796,22 +6796,9 @@ ImageDtt.startAndJoin(threads); clt_parameters.stStepThreshold, // double step_threshold, clt_parameters.stMinDisparity, // double min_disparity, clt_parameters.grow_disp_max, // double max_disparity, -// clt_parameters.stFloor, // double strength_floor, -// clt_parameters.stPow, // double strength_pow, 0.0, // NO BLUR double stBlurSigma) false, //clt_parameters.stSmplMode, // Use sample mode (false - regular tile mode) clt_parameters.mlfp, // Filter parameters -// clt_parameters.stSmplSide, // Sample size (side of a square) -// clt_parameters.stSmplNum, // Number after removing worst -// clt_parameters.stSmplRms, // Maximal RMS of the remaining tiles in a sample -// clt_parameters.stSmplWnd, // boolean smplWnd, // use window functions for the samples -// clt_parameters.fs_max_abs_tilt, // 2.0; // Maximal absolute tilt in pixels/tile -// clt_parameters.fs_max_rel_tilt, // 0.2; // Maximal relative tilt in pixels/tile/disparity -// clt_parameters.fs_damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data -// clt_parameters.fs_min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity -// clt_parameters.fs_transition, // 1.0; // Mode transition range (between tilted and maximal disparity) -// clt_parameters.fs_far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity) -// clt_parameters.fs_far_power, // 1.0; // Raise disparity to this power before averaging for far objects clt_parameters.stMeasSel); // bitmask of the selected measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert dbg_hist[0] = scan_prev.getSuperTiles().showDisparityHistogram(); scan_prev.setSuperTiles( @@ -6820,22 +6807,9 @@ ImageDtt.startAndJoin(threads); clt_parameters.stStepThreshold, // double step_threshold, clt_parameters.stMinDisparity, // double min_disparity, clt_parameters.grow_disp_max, // double max_disparity, -// clt_parameters.stFloor, // double strength_floor, -// clt_parameters.stPow, // double strength_pow, 0.0, // NO BLUR double stBlurSigma) clt_parameters.stSmplMode, // Use sample mode (false - regular tile mode) clt_parameters.mlfp, // Filter parameters -// clt_parameters.stSmplSide, // Sample size (side of a square) -// clt_parameters.stSmplNum, // Number after removing worst -// clt_parameters.stSmplRms, // Maximal RMS of the remaining tiles in a sample -// clt_parameters.stSmplWnd, // boolean smplWnd, // use window functions for the samples -// clt_parameters.fs_max_abs_tilt, // 2.0; // Maximal absolute tilt in pixels/tile -// clt_parameters.fs_max_rel_tilt, // 0.2; // Maximal relative tilt in pixels/tile/disparity -// clt_parameters.fs_damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data -// clt_parameters.fs_min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity -// clt_parameters.fs_transition, // 1.0; // Mode transition range (between tilted and maximal disparity) -// clt_parameters.fs_far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity) -// clt_parameters.fs_far_power, // 1.0; // Raise disparity to this power before averaging for far objects clt_parameters.stMeasSel); // bitmask of the selected measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert dbg_hist[1] = scan_prev.getSuperTiles().showDisparityHistogram(); } @@ -6847,22 +6821,9 @@ ImageDtt.startAndJoin(threads); clt_parameters.stStepThreshold, // double step_threshold, clt_parameters.stMinDisparity, // double min_disparity, clt_parameters.grow_disp_max, // double max_disparity, -// clt_parameters.stFloor, // double strength_floor, -// clt_parameters.stPow, // double strength_pow, clt_parameters.stSigma, // with blur double stBlurSigma) false, //clt_parameters.stSmplMode, // Use sample mode (false - regular tile mode) clt_parameters.mlfp, // Filter parameters -// clt_parameters.stSmplSide, // Sample size (side of a square) -// clt_parameters.stSmplNum, // Number after removing worst -// clt_parameters.stSmplRms, // Maximal RMS of the remaining tiles in a sample -// clt_parameters.stSmplWnd, // boolean smplWnd, // use window functions for the samples -// clt_parameters.fs_max_abs_tilt, // 2.0; // Maximal absolute tilt in pixels/tile -// clt_parameters.fs_max_rel_tilt, // 0.2; // Maximal relative tilt in pixels/tile/disparity -// clt_parameters.fs_damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data -// clt_parameters.fs_min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity -// clt_parameters.fs_transition, // 1.0; // Mode transition range (between tilted and maximal disparity) -// clt_parameters.fs_far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity) -// clt_parameters.fs_far_power, // 1.0; // Raise disparity to this power before averaging for far objects clt_parameters.stMeasSel); // bitmask of the selected measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert if (show_st) { // otherwise only blured version is needed dbg_hist[2] = scan_prev.getSuperTiles().showDisparityHistogram(); @@ -6884,22 +6845,9 @@ ImageDtt.startAndJoin(threads); clt_parameters.stStepThreshold, // double step_threshold, clt_parameters.stMinDisparity, // double min_disparity, clt_parameters.grow_disp_max, // double max_disparity, -// clt_parameters.stFloor, // double strength_floor, -// clt_parameters.stPow, // double strength_pow, 0.0, // NO BLUR double stBlurSigma) clt_parameters.stSmplMode, // Use sample mode (false - regular tile mode) clt_parameters.mlfp, // Filter parameters -// clt_parameters.stSmplSide, // Sample size (side of a square) -// clt_parameters.stSmplNum, // Number after removing worst -// clt_parameters.stSmplRms, // Maximal RMS of the remaining tiles in a sample -// clt_parameters.stSmplWnd, // boolean smplWnd, // use window functions for the samples -// clt_parameters.fs_max_abs_tilt, // 2.0; // Maximal absolute tilt in pixels/tile -// clt_parameters.fs_max_rel_tilt, // 0.2; // Maximal relative tilt in pixels/tile/disparity -// clt_parameters.fs_damp_tilt, // 0.001; // Damp tilt to handle insufficient (co-linear)data -// clt_parameters.fs_min_tilt_disp, // 4.0; // Disparity switch between filtering modes - near objects use tilts, far - use max disparity -// clt_parameters.fs_transition, // 1.0; // Mode transition range (between tilted and maximal disparity) -// clt_parameters.fs_far_mode, // 1; // Far objects filtering mode (0 - off, 1 - power of disparity) -// clt_parameters.fs_far_power, // 1.0; // Raise disparity to this power before averaging for far objects clt_parameters.stMeasSel); // bitmask of the selected measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert } @@ -7063,22 +7011,6 @@ ImageDtt.startAndJoin(threads); debugLevel, // final int debugLevel) clt_parameters.tileX, clt_parameters.tileY); -/* - if (clt_parameters.plSplitApply) { - while (true) { - int num_added = 0; - num_added += st.fillSquares(); - if (debugLevel > -1) { - System.out.println("after fillSquares() added "+num_added); - } - num_added += st.cutCorners(); - if (debugLevel > -1) { - System.out.println("after plCutCorners() added (cumulative) "+num_added); - } - if (num_added == 0) break; - } - } -*/ double [][][] dispStrength = st.getDisparityStrengths( clt_parameters.stMeasSel); // int stMeasSel) // = 1; // Select measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert) boolean [][] tileSel = st.getMeasurementSelections( @@ -7199,25 +7131,6 @@ ImageDtt.startAndJoin(threads); clt_parameters.tileY); } // if (clt_parameters.plSplitApply) -/* - while (true) { - int num_added = 0; - if (clt_parameters.plFillSquares){ - num_added += st.fillSquares(); - } - if (debugLevel > -1) { - System.out.println("after fillSquares() added "+num_added); - } - if (clt_parameters.plCutCorners){ - num_added += st.cutCorners(); - } - if (debugLevel > -1) { - System.out.println("after plCutCorners() added (cumulative) "+num_added); - } - if (num_added == 0) break; - } - -*/ int max_num_tries = 20; if (clt_parameters.plIterations > 0) { diff --git a/src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java b/src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java index 19a7824ae33fe27396d4ea2fce229616fd4ff030..f8c9e0f2fed5ea88db7d302af4baacfa66a03077 100644 --- a/src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java +++ b/src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java @@ -9278,15 +9278,15 @@ if (debugLevel > -100) return true; // temporarily ! quadCLT_main.tp.clt_3d_passes.get( quadCLT_main.tp.clt_3d_passes.size() -1), false); // boolean force_final); - if (debugLevel > -1) { //-5){ - int scan_index = quadCLT_main.tp.clt_3d_passes.size() -1; - quadCLT_main.tp.showScan( - quadCLT_main.tp.clt_3d_passes.get(scan_index), // CLTPass3d scan, - "test_pre-after-"+scan_index); //String title) - } + if (debugLevel > -1) { //-5){ + int scan_index = quadCLT_main.tp.clt_3d_passes.size() -1; + quadCLT_main.tp.showScan( + quadCLT_main.tp.clt_3d_passes.get(scan_index), // CLTPass3d scan, + "test_pre-after-"+scan_index); //String title) + } + + - - dsi[DSI_DISPARITY_MAIN] = main_last_scan[0]; dsi[DSI_STRENGTH_MAIN] = main_last_scan[1]; if (quadCLT_main.correctionsParameters.clt_batch_dsi) { // Should be always enabled ? @@ -9339,7 +9339,7 @@ if (debugLevel > -100) return true; // temporarily ! updateStatus, // final boolean updateStatus, debugLevel); // final int debugLevel) } - */ + */ // copy regardless of ML generation // See if it will copy all files, not just the main camera ones diff --git a/src/main/resources/kernels/TileProcessor.cuh b/src/main/resources/kernels/TileProcessor.cuh index 2aa544cb2f09286df53d48a0e2ab944c96dab980..02675ababff3c9f1e43ccc1bf1f5e1566798b70d 100644 --- a/src/main/resources/kernels/TileProcessor.cuh +++ b/src/main/resources/kernels/TileProcessor.cuh @@ -403,6 +403,16 @@ __constant__ float lpf_corr[64]={ // modify if needed 0.02728573f, 0.02374977f, 0.01799322f, 0.01186582f, 0.00681327f, 0.00341565f, 0.00153247f, 0.00074451f }; +__constant__ float LoG_corr[64]={ // modify if needed high-pass filter before correlation to fit into float range + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f + }; __constant__ int pairs[6][2]={ {0, 1}, @@ -1086,6 +1096,22 @@ extern "C" __global__ void correlate2D_inner( float * clt_tile1i = clt_tile1 + threadIdx.x; float * clt_tile2i = clt_tile2 + threadIdx.x; #pragma unroll +#define USE_LOG +#ifdef USE_LOG + // Apply high-pass filter to correlation inputs to reduce dynamic range before multiplication + for (int q = 0; q < 4; q++){ + float *log = LoG_corr + threadIdx.x; + for (int i = 0; i < DTT_SIZE; i++){ // copy 32 rows (4 quadrants of 8 rows) + *clt_tile1i= (*gpu_tile1) * (*log); + *clt_tile2i= (*gpu_tile2) * (*log); + clt_tile1i += DTT_SIZE1; + clt_tile2i += DTT_SIZE1; + gpu_tile1 += DTT_SIZE; + gpu_tile2 += DTT_SIZE; + log += DTT_SIZE; + } + } +#else for (int i = 0; i < DTT_SIZE4; i++){ // copy 32 rows (4 quadrants of 8 rows) *clt_tile1i= *gpu_tile1; *clt_tile2i= *gpu_tile2; @@ -1093,7 +1119,8 @@ extern "C" __global__ void correlate2D_inner( clt_tile2i += DTT_SIZE1; gpu_tile1 += DTT_SIZE; gpu_tile2 += DTT_SIZE; - } + } +#endif //USE_LOG __syncthreads(); #ifdef DBG_TILE #ifdef DEBUG6 diff --git a/src/main/resources/kernels/geometry_correction.cu b/src/main/resources/kernels/geometry_correction.cu index 5cc70cccac526891158cce35d78a2f6a3d48b6ab..b125c117dadbbab7a6334d0c80a68dd86305abd4 100644 --- a/src/main/resources/kernels/geometry_correction.cu +++ b/src/main/resources/kernels/geometry_correction.cu @@ -281,6 +281,32 @@ extern "C" __global__ void calc_rot_deriv( } +extern "C" __global__ void calculate_tiles_offsets( + struct tp_task * gpu_tasks, + int num_tiles, // number of tiles in task + struct gc * gpu_geometry_correction, + struct corr_vector * gpu_correction_vector, + float * gpu_rByRDist, // length should match RBYRDIST_LEN + trot_deriv * gpu_rot_deriv) +{ + dim3 threads_geom(NUM_CAMS,TILES_PER_BLOCK_GEOM, 1); + dim3 grid_geom ((num_tiles+TILES_PER_BLOCK_GEOM-1)/TILES_PER_BLOCK_GEOM, 1, 1); + if (threadIdx.x == 0) { // always 1 + get_tiles_offsets<<>> ( + gpu_tasks, // struct tp_task * gpu_tasks, + num_tiles, // int num_tiles, // number of tiles in task list + gpu_geometry_correction, // struct gc * gpu_geometry_correction, + gpu_correction_vector, // struct corr_vector * gpu_correction_vector, + gpu_rByRDist, // float * gpu_rByRDist) // length should match RBYRDIST_LEN + gpu_rot_deriv); // union trot_deriv * gpu_rot_deriv); + + } +// __syncthreads();// __syncwarp(); +// cudaDeviceSynchronize(); +// cudaDeviceSynchronize(); +} + + /* * blockDim.x = NUM_CAMS * blockDim.y = TILES_PER_BLOCK_GEOM @@ -295,12 +321,7 @@ extern "C" __global__ void get_tiles_offsets( trot_deriv * gpu_rot_deriv) { int task_num = blockIdx.x * blockDim.y + threadIdx.y; // blockIdx.x * TILES_PER_BLOCK_GEOM + threadIdx.y - if (task_num >= num_tiles){ - return; - } int thread_xy = blockDim.x * threadIdx.y + threadIdx.x; - int ncam = threadIdx.x; - // threadIdx.x - numcam, used for per-camera __shared__ struct gc geometry_correction; __shared__ float rByRDist [RBYRDIST_LEN]; __shared__ struct corr_vector extrinsic_corr; @@ -355,6 +376,10 @@ extern "C" __global__ void get_tiles_offsets( } } __syncthreads(); + int ncam = threadIdx.x; + if (task_num >= num_tiles){ + return; + } int imu_exists = // todo - calculate once with rot_deriv? (extrinsic_corr.imu_rot[0] != 0.0) || (extrinsic_corr.imu_rot[1] != 0.0) || @@ -392,8 +417,8 @@ extern "C" __global__ void get_tiles_offsets( int tileY = (cxy >> 16); #ifdef DEBUG23 if ((ncam == 0) && (tileX == DBG_TILE_X) && (tileY == DBG_TILE_Y)){ - printf ("\n get_tiles_offsets(): Debugging tileX=%d, tileY=%d, ncam = %d\n", tileX,tileY,ncam); - printf("\n"); + printf ("\n get_tiles_offsets(): Debugging tileX=%d, tileY=%d, ncam = %d\n", tileX,tileY,ncam); + printf("\n"); __syncthreads(); } #endif //#ifdef DEBUG23 @@ -418,7 +443,7 @@ extern "C" __global__ void get_tiles_offsets( xyz[0] = SCENE_UNITS_SCALE * pXc * geometry_correction.disparityRadius / disparity; xyz[1] = -SCENE_UNITS_SCALE * pYc * geometry_correction.disparityRadius / disparity; // next radial distortion coefficients are for this, not master camera (may be the same) -// geometry_correction.rad_coeff[i]; + // geometry_correction.rad_coeff[i]; float fl_pix = geometry_correction.focalLength/(0.001 * geometry_correction.pixelSize); // focal length in pixels - this camera float ri_scale = 0.001 * geometry_correction.pixelSize / geometry_correction.distortionRadius; @@ -440,7 +465,7 @@ extern "C" __global__ void get_tiles_offsets( // above is common code, below - per camera (was cycle in Java, here individual threads //for (int ncam = 0; ncam < NUM_CAMS; ncam++){ - // non-distorted XY of the shifted location of the individual sensor + // non-distorted XY of the shifted location of the individual sensor // -------------- Each camera calculated by its own thread ---------------- float pXci0 = pXc - disparity * rXY[0]; // [ncam][0]; // in pixels @@ -486,7 +511,7 @@ extern "C" __global__ void get_tiles_offsets( float pYid = pYci * rD2rND; pXY[0] = pXid + geometry_correction.pXY0[ncam][0]; pXY[1] = pYid + geometry_correction.pXY0[ncam][1]; -// new for ERS + // new for ERS pY_offsets[threadIdx.y][ncam] = pXY[1] - geometry_correction.woi_tops[ncam]; __syncthreads(); // Each thread re-calculate same sum @@ -511,9 +536,6 @@ extern "C" __global__ void get_tiles_offsets( __syncthreads();// __syncwarp(); #endif // DEBUG21 - - - // float rvi[3]; float drvi_daz [3]; // drvi_daz = deriv_rots[i][0].times(vi); float drvi_dtl [3]; // drvi_dtl = deriv_rots[i][1].times(vi); float drvi_drl [3]; // drvi_drl = deriv_rots[i][2].times(vi); @@ -547,7 +569,7 @@ extern "C" __global__ void get_tiles_offsets( float disp_dist[4]; // only for this channel, to be copied to global gpu_tasks in the end float dpXci_pYci_imu_lin[2][3]; -/* + /* double [][] add0 = { {-rXY[i][0], rXY[i][1], 0.0}, {-rXY[i][1], -rXY[i][0], 0.0}, @@ -555,7 +577,7 @@ extern "C" __global__ void get_tiles_offsets( Matrix dd0 = new Matrix(add0); Matrix dd1 = rots[i].times(dd0).getMatrix(0, 1,0,1).times(norm_z); // get top left 2x2 sub-matrix - */ + */ float dd1[2][2];// get top left 2x2 sub-matrix dd1[0][0] = (-rot_deriv.rots[ncam][0][0]*rXY[0] -rot_deriv.rots[ncam][0][1]*rXY[1])*norm_z; dd1[0][1] = ( rot_deriv.rots[ncam][0][0]*rXY[1] -rot_deriv.rots[ncam][0][1]*rXY[0])*norm_z; @@ -570,12 +592,11 @@ extern "C" __global__ void get_tiles_offsets( __syncthreads();// __syncwarp(); #endif // DEBUG21 - // now first column of 2x2 dd1 - x, y components of derivatives by disparity, second column - derivatives by ortho to disparity (~Y in 2d correlation) // unity vector in the direction of radius float c_dist = pXci/rNDi; float s_dist = pYci/rNDi; -//#undef NVRTC_BUG + //#undef NVRTC_BUG float drD2rND_dri = 0.0; { float rri = 1.0; @@ -618,28 +639,16 @@ extern "C" __global__ void get_tiles_offsets( __syncthreads();// __syncwarp(); #endif // DEBUG21 - gpu_tasks[task_num].disp_dist[ncam][0] = disp_dist[0]; gpu_tasks[task_num].disp_dist[ncam][1] = disp_dist[1]; gpu_tasks[task_num].disp_dist[ncam][2] = disp_dist[2]; gpu_tasks[task_num].disp_dist[ncam][3] = disp_dist[3]; -// imu = extrinsic_corr.getIMU(i); // currently it is common for all channels -// float imu_rot [3]; // d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15 -// float imu_move[3]; // dx/dt, dy/dt, dz/dt 16..19 geometry_correction.imu_move -// ERS linear does not yet use per-port rotations, probably not needed + // imu = extrinsic_corr.getIMU(i); // currently it is common for all channels + // float imu_rot [3]; // d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15 + // float imu_move[3]; // dx/dt, dy/dt, dz/dt 16..19 geometry_correction.imu_move + // ERS linear does not yet use per-port rotations, probably not needed if (imu_exists){ - /* - float delta_t = disp_dist[2] * disparity * geometry_correction.line_time; // positive for top cameras, negative - for bottom //disp_dist[2]=dd2.get(1, 0) - float ers_Xci = delta_t * ( - dpXci_dtilt * extrinsic_corr.imu_rot[0] + - dpXci_dazimuth * extrinsic_corr.imu_rot[1] + - dpXci_droll * extrinsic_corr.imu_rot[2]); - float ers_Yci = delta_t* ( - dpYci_dtilt * extrinsic_corr.imu_rot[0] + - dpYci_dazimuth * extrinsic_corr.imu_rot[1] + - dpYci_droll * extrinsic_corr.imu_rot[2]); - */ float ers_x = dpXci_dtilt * extrinsic_corr.imu_rot[0] + dpXci_dazimuth * extrinsic_corr.imu_rot[1] + @@ -649,11 +658,8 @@ extern "C" __global__ void get_tiles_offsets( dpYci_dazimuth * extrinsic_corr.imu_rot[1] + dpYci_droll * extrinsic_corr.imu_rot[2]; - - #ifdef DEBUG21 if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){ -// printf("delta_t = %f, ers_Xci = %f, ers_Yci = %f\n", delta_t, ers_Xci, ers_Yci); printf("ers_x = %f, ers_y = %f\n", ers_x, ers_y); } __syncthreads();// __syncwarp(); @@ -665,19 +671,12 @@ extern "C" __global__ void get_tiles_offsets( dpXci_pYci_imu_lin[0][0] = -wdisparity / k; // dpx/ dworld_X dpXci_pYci_imu_lin[1][1] = wdisparity / k; // dpy/ dworld_Y dpXci_pYci_imu_lin[0][2] = (xyz[0] / k) * dwdisp_dz; // dpx/ dworld_Z - dpXci_pYci_imu_lin[1][2] = (xyz[1] / k) * dwdisp_dz; // dpy/ dworld_Z - /* - ers_Xci += delta_t* ( - dpXci_pYci_imu_lin[0][0] * extrinsic_corr.imu_move[0] + - dpXci_pYci_imu_lin[0][2] * extrinsic_corr.imu_move[2]); - ers_Yci += delta_t* ( - dpXci_pYci_imu_lin[1][1] * extrinsic_corr.imu_move[1] + - dpXci_pYci_imu_lin[1][2] * extrinsic_corr.imu_move[2]); - */ +//// dpXci_pYci_imu_lin[1][2] = (xyz[1] / k) * dwdisp_dz; // dpy/ dworld_Z + dpXci_pYci_imu_lin[1][2] = -(xyz[1] / k) * dwdisp_dz; // dpy/ dworld_Z ers_x += dpXci_pYci_imu_lin[0][0] * extrinsic_corr.imu_move[0] + - dpXci_pYci_imu_lin[0][2] * extrinsic_corr.imu_move[2]; + dpXci_pYci_imu_lin[0][2] * extrinsic_corr.imu_move[2]; ers_y += dpXci_pYci_imu_lin[1][1] * extrinsic_corr.imu_move[1] + - dpXci_pYci_imu_lin[1][2] * extrinsic_corr.imu_move[2]; + dpXci_pYci_imu_lin[1][2] * extrinsic_corr.imu_move[2]; float delta_t = (pY_offset/ (1.0 - geometry_correction.line_time * ers_y)) * geometry_correction.line_time; // positive for top cameras, negative - for bottom //disp_dist[2]=dd2.get(1, 0) pXY[0] += delta_t * ers_x * rD2rND; // added correction to pixel X @@ -700,8 +699,6 @@ extern "C" __global__ void get_tiles_offsets( // copy results to global memory pXY, disp_dist gpu_tasks[task_num].xy[ncam][0] = pXY[0]; gpu_tasks[task_num].xy[ncam][1] = pXY[1]; - - } extern "C" __global__ void calcReverseDistortionTable( diff --git a/src/main/resources/kernels/geometry_correction.h b/src/main/resources/kernels/geometry_correction.h index 7d36afcc815b0d31fb5c16dccf60fea1425cb719..2783cc79e012788d54bb2b898959355e1f594070 100644 --- a/src/main/resources/kernels/geometry_correction.h +++ b/src/main/resources/kernels/geometry_correction.h @@ -149,6 +149,15 @@ extern "C" __global__ void get_tiles_offsets( float * gpu_rByRDist, // length should match RBYRDIST_LEN trot_deriv * gpu_rot_deriv); +extern "C" __global__ void calculate_tiles_offsets( + struct tp_task * gpu_tasks, + int num_tiles, // number of tiles in task + struct gc * gpu_geometry_correction, + struct corr_vector * gpu_correction_vector, + float * gpu_rByRDist, // length should match RBYRDIST_LEN + trot_deriv * gpu_rot_deriv); + + // uses NUM_CAMS blocks, (3,3,3) threads extern "C" __global__ void calc_rot_deriv( struct corr_vector * gpu_correction_vector,