Commit 93480b46 authored by Andrey Filippov's avatar Andrey Filippov

Motion blur correction for rendering only

parent 7d6fb681
......@@ -3842,19 +3842,21 @@ public class GpuQuad{ // quad camera description
final int tilesX = img_width / GPUTileProcessor.DTT_SIZE;
final int tiles = pXpYD.length;
final Matrix [] corr_rots = geometryCorrection.getCorrVector().getRotMatrices(); // get array of per-sensor rotation matrices
final int quad_main = (geometryCorrection != null)? num_cams:0;
final int quad_main = num_cams; // (geometryCorrection != null)? num_cams:0;
final Thread[] threads = ImageDtt.newThreadArray(threadsMax);
final AtomicInteger ai = new AtomicInteger(00);
final AtomicInteger aTiles = new AtomicInteger(0);
final TpTask[][] tp_tasks = new TpTask[2][tiles]; // aTiles.get()]; // [0] - main, [1] - shifted
final double mb_len_scale = -Math.log(1.0 - 1.0/mb_max_gain);
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
@Override
public void run() {
for (int nTile = ai.getAndIncrement(); nTile < tiles; nTile = ai.getAndIncrement())
if ((pXpYD[nTile] != null) && (mb_vectors[nTile] != null) && ((selection == null) || selection[nTile])) {
if ((pXpYD[nTile] != null) &&
!Double.isNaN(mb_vectors[0][nTile]) &&
!Double.isNaN(mb_vectors[1][nTile]) &&
((selection == null) || selection[nTile])) {
int tileY = nTile / tilesX;
int tileX = nTile % tilesX;
TpTask tp_task = new TpTask(num_cams, tileX, tileY);
......@@ -3867,8 +3869,8 @@ public class GpuQuad{ // quad camera description
double [] centerXY = pXpYD[nTile];
tp_task.setCenterXY(centerXY); // this pair of coordinates will be used by GPU to set tp_task.xy and task.disp_dist!
// calculate offset for the secondary tile and weigh
double dx = mb_vectors[nTile][0];
double dy = mb_vectors[nTile][1];
double dx = mb_vectors[0][nTile];
double dy = mb_vectors[1][nTile];
double mb_len = Math.sqrt(dx*dx+dy*dy); // in pixels/s
dx /= mb_len; // unit vector
dy /= mb_len;
......@@ -3887,7 +3889,6 @@ public class GpuQuad{ // quad camera description
double gain_sub = -gain * exp_offs;
tp_task.setScale(gain);
tp_task_sub.setScale(gain_sub);
boolean bad_margins = false;
if (calcPortsCoordinatesAndDerivatives) { // for non-GPU?
double [][] disp_dist = new double[quad_main][]; // used to correct 3D correlations (not yet used here)
......
......@@ -15,7 +15,7 @@ import javax.xml.bind.DatatypeConverter;
import Jama.Matrix;
public class IntersceneLma {
OpticalFlow opticalFlow = null;
// OpticalFlow opticalFlow = null;
QuadCLT [] scenesCLT = null; // now will use just 2 - 0 -reference scene, 1 - scene.
private double [] last_rms = null; // {rms, rms_pure}, matching this.vector
private double [] good_or_bad_rms = null; // just for diagnostics, to read last (failed) rms
......@@ -37,11 +37,11 @@ public class IntersceneLma {
private int num_samples = 0;
private boolean thread_invariant = true; // Do not use DoubleAdder, provide results not dependent on threads
public IntersceneLma(
OpticalFlow opticalFlow,
// OpticalFlow opticalFlow,
boolean thread_invariant
) {
this.thread_invariant = thread_invariant;
this.opticalFlow = opticalFlow;
// this.opticalFlow = opticalFlow;
}
public double [][] getLastJT(){
......@@ -549,7 +549,7 @@ public class IntersceneLma {
{
this.weights = new double [num_samples + parameters_vector.length];
final Thread[] threads = ImageDtt.newThreadArray(opticalFlow.threadsMax);
final Thread[] threads = ImageDtt.newThreadArray(QuadCLT.THREADS_MAX);
final AtomicInteger ai = new AtomicInteger(0);
double sum_weights;
if (thread_invariant) {
......@@ -652,7 +652,7 @@ public class IntersceneLma {
private void normalizeWeights()
{
final Thread[] threads = ImageDtt.newThreadArray(opticalFlow.threadsMax);
final Thread[] threads = ImageDtt.newThreadArray(QuadCLT.THREADS_MAX);
final AtomicInteger ai = new AtomicInteger(0);
double full_weight, sum_weight_pure;
if (thread_invariant) {
......@@ -763,7 +763,7 @@ public class IntersceneLma {
scene_atr, // double [] atr);
false)[0]; // boolean invert));
final Thread[] threads = ImageDtt.newThreadArray(opticalFlow.threadsMax);
final Thread[] threads = ImageDtt.newThreadArray(QuadCLT.THREADS_MAX);
final AtomicInteger ai = new AtomicInteger(0);
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
......@@ -840,7 +840,7 @@ public class IntersceneLma {
final int num_pars2 = num_pars * num_pars;
final int nup_points = jt[0].length;
final double [][] wjtjl = new double [num_pars][num_pars];
final Thread[] threads = ImageDtt.newThreadArray(opticalFlow.threadsMax);
final Thread[] threads = ImageDtt.newThreadArray(QuadCLT.THREADS_MAX);
final AtomicInteger ai = new AtomicInteger(0);
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
......@@ -876,7 +876,7 @@ public class IntersceneLma {
final double [] fx,
final double [] rms_fp // null or [2]
) {
final Thread[] threads = ImageDtt.newThreadArray(opticalFlow.threadsMax);
final Thread[] threads = ImageDtt.newThreadArray(QuadCLT.THREADS_MAX);
final AtomicInteger ai = new AtomicInteger(0);
final double [] wymfw = new double [fx.length];
double s_rms;
......
......@@ -4888,6 +4888,7 @@ public class OpticalFlow {
scenes_suffix, // String suffix,
ds_vantage[0], // selected_disparity, // double [] ref_disparity,
quadCLTs, // QuadCLT [] quadCLTs,
threadsMax, // int threadsMax,
debugLevel); // int debugLevel);
if (save_mapped_mono_color[col_mode]) {
quadCLTs[ref_index].saveImagePlusInModelDirectory(
......@@ -5811,11 +5812,16 @@ public class OpticalFlow {
String suffix_in,
double [] ref_disparity,
QuadCLT [] quadCLTs,
int threadsMax,
int debugLevel) {
double [] stereo_atr = ZERO3; // maybe later play with rotated camera
boolean um_mono = clt_parameters.imp.um_mono;
double um_sigma = clt_parameters.imp.um_sigma;
double um_weight = clt_parameters.imp.um_weight;
boolean mb_en = clt_parameters.imp.mb_en && (fov_tiles==null) && (mode3d > 0);
double mb_tau = clt_parameters.imp.mb_tau; // 0.008; // time constant, sec
double mb_max_gain = clt_parameters.imp.mb_max_gain; // 5.0; // motion blur maximal gain (if more - move second point more than a pixel
final float fum_weight = (float) um_weight;
boolean merge_all = clt_parameters.imp.merge_all;
......@@ -5836,6 +5842,15 @@ public class OpticalFlow {
for (int i = 0; i < num_sens; i++) if (((sensor_mask >> i) & 1) != 0) channels[nch++] = i;
ImageStack stack_scenes = null;
int dbg_scene = -95;
double [][] ref_pXpYD = transformToScenePxPyD( // now should work with offset ref_scene
fov_tiles, // final Rectangle [] extra_woi, // show larger than sensor WOI (or null)
ref_disparity, // final double [] disparity_ref, // invalid tiles - NaN in disparity
ZERO3, // final double [] scene_xyz, // camera center in world coordinates
ZERO3, // final double [] scene_atr, // camera orientation relative to world frame
quadCLTs[ref_index], // final QuadCLT scene_QuadClt,
quadCLTs[ref_index], // final QuadCLT reference_QuadClt, // now - may be null - for testing if scene is rotated ref
threadsMax); // int threadsMax)
for (int nscene = 0; nscene < quadCLTs.length ; nscene++) if (quadCLTs[nscene] != null){
if (nscene== dbg_scene) {
System.out.println("renderSceneSequence(): nscene = "+nscene);
......@@ -5843,7 +5858,6 @@ public class OpticalFlow {
String ts = quadCLTs[nscene].getImageName();
double [] scene_xyz = ZERO3;
double [] scene_atr = ZERO3;
// if ((nscene != ref_index) && (mode3d >= 0)) {
if (nscene != ref_index) { // Check even for raw, so video frames will match in all modes
scene_xyz = ers_reference.getSceneXYZ(ts);
scene_atr = ers_reference.getSceneATR(ts);
......@@ -5871,7 +5885,80 @@ public class OpticalFlow {
scene_atr = combo_xyzatr[1];
}
int sm = merge_all? -1: sensor_mask;
ImagePlus imp_scene = QuadCLT.renderGPUFromDSI(
ImagePlus imp_scene = null;
double [][] dxyzatr_dt = null;
if (mb_en) {
get_velocities:
{
int nscene0 = nscene - 1;
if ((nscene0 < 0) ||
(quadCLTs[nscene0]== null)||
(ers_reference.getSceneXYZ(quadCLTs[nscene0].getImageName())== null) ||
(ers_reference.getSceneATR(quadCLTs[nscene0].getImageName())== null)) {
nscene0 = nscene;
}
int nscene1 = nscene + 1;
if ((nscene1 > ref_index) || (quadCLTs[nscene1]== null)) {
nscene1 = nscene;
}
if (nscene1 == nscene0) {
System.out.println("**** Isoloated scene!!! skipping... now may only happen for a ref_scene****");
break get_velocities;
}
double dt = quadCLTs[nscene1].getTimeStamp() - quadCLTs[nscene0].getTimeStamp();
String ts0 = quadCLTs[nscene0].getImageName();
String ts1 = quadCLTs[nscene1].getImageName();
double [] scene_xyz0 = ers_reference.getSceneXYZ(ts0);
double [] scene_atr0 = ers_reference.getSceneATR(ts0);
if (scene_xyz0 == null) {
System.out.println ("BUG: No egomotion data for timestamp "+ts0);
break get_velocities;
}
double [] scene_xyz1 = (nscene1== ref_index)? ZERO3:ers_reference.getSceneXYZ(ts1);
double [] scene_atr1 = (nscene1== ref_index)? ZERO3:ers_reference.getSceneATR(ts1);
dxyzatr_dt = new double[2][3];
for (int i = 0; i < 3; i++) {
dxyzatr_dt[0][i] = (scene_xyz1[i]-scene_xyz0[i])/dt;
dxyzatr_dt[1][i] = (scene_atr1[i]-scene_atr0[i])/dt;
}
}
}
if (mb_en && (dxyzatr_dt != null)) {
double [][] motion_blur = getMotionBlur(
quadCLTs[ref_index], // QuadCLT ref_scene,
quadCLTs[nscene], // QuadCLT scene, // can be the same as ref_scene
ref_pXpYD, // double [][] ref_pXpYD, // here it is scene, not reference!
scene_xyz, // double [] camera_xyz,
scene_atr, // double [] camera_atr,
dxyzatr_dt[0], // double [] camera_xyz_dt,
dxyzatr_dt[1], // double [] camera_atr_dt,
0, // int shrink_gaps, // will gaps, but not more that grow by this
debugLevel); // int debug_level)
imp_scene = QuadCLT.renderGPUFromDSI(
sm, // final int sensor_mask,
merge_all, // final boolean merge_channels,
null, // final Rectangle full_woi_in, // show larger than sensor WOI (or null)
clt_parameters, // CLTParameters clt_parameters,
ref_disparity, // double [] disparity_ref,
// motion blur compensation
mb_tau, // double mb_tau, // 0.008; // time constant, sec
mb_max_gain, // double mb_max_gain, // 5.0; // motion blur maximal gain (if more - move second point more than a pixel
motion_blur, // double [][] mb_vectors, //
scene_xyz, // final double [] scene_xyz, // camera center in world coordinates
scene_atr, // final double [] scene_atr, // camera orientation relative to world frame
quadCLTs[nscene], // final QuadCLT scene,
quadCLTs[ref_index], // final QuadCLT ref_scene, // now - may be null - for testing if scene is rotated ref
toRGB, // final boolean toRGB,
(toRGB? clt_parameters.imp.show_color_nan : clt_parameters.imp.show_mono_nan),
"", // String suffix, no suffix here
QuadCLT.THREADS_MAX, // int threadsMax,
debugLevel); // int debugLevel)
} else {
imp_scene = QuadCLT.renderGPUFromDSI(
sm, // final int sensor_mask,
merge_all, // final boolean merge_channels,
fov_tiles, // testr, // null, // final Rectangle full_woi_in, // show larger than sensor WOI (or null)
......@@ -5887,6 +5974,7 @@ public class OpticalFlow {
"", // String suffix, no suffix here
QuadCLT.THREADS_MAX, // int threadsMax,
debugLevel); // int debugLevel)
}
if (stack_scenes == null) {
stack_scenes = new ImageStack(imp_scene.getWidth(),imp_scene.getHeight());
}
......@@ -13164,7 +13252,6 @@ public double[][] correlateIntersceneDebug( // only uses GPU and quad
iscale); // int iscale) // 8
}
IntersceneLma intersceneLma = new IntersceneLma(
this, // OpticalFlow opticalFlow
clt_parameters.ilp.ilma_thread_invariant);
int nlma = 0;
int lmaResult = -1;
......@@ -13507,11 +13594,15 @@ public double[][] correlateIntersceneDebug( // only uses GPU and quad
ZERO3, //, // dxyzatr_dt[nscene][0], // double [] ers_xyz_dt,
dxyzatr_dt[nscene][1]); // double [] ers_atr_dt)(ers_scene_original_xyz_dt);
if (dbg_mb_img != null) {
boolean show_corrected = false;
if (nscene == debug_scene) {
System.out.println("nscene = "+nscene);
System.out.println("nscene = "+nscene);
}
dbg_mb_img[nscene] = new double [tilesX*tilesY*2];
Arrays.fill(dbg_mb_img[nscene],Double.NaN);
double [] mb_scene_xyz = (nscene != ref_index)? ers_reference.getSceneXYZ(ts):ZERO3;
double [] mb_scene_atr = (nscene != ref_index)? ers_reference.getSceneATR(ts):ZERO3;
double [][] motion_blur = getMotionBlur(
quadCLTs[ref_index], // QuadCLT ref_scene,
quadCLTs[nscene], // QuadCLT scene, // can be the same as ref_scene
......@@ -13520,17 +13611,13 @@ public double[][] correlateIntersceneDebug( // only uses GPU and quad
mb_scene_atr, // double [] camera_atr,
dxyzatr_dt[nscene][0], // double [] camera_xyz_dt,
dxyzatr_dt[nscene][1], // double [] camera_atr_dt,
-1, // int shrink_gaps, // will gaps, but not more that grow by this
debugLevel); // int debug_level)
for (int nTile = 0; nTile < motion_blur.length; nTile++) if (motion_blur[nTile] != null) {
for (int nTile = 0; nTile < motion_blur[0].length; nTile++) {
int tx = nTile % tilesX;
int ty = nTile / tilesX;
dbg_mb_img[nscene][tx + tilesX * (ty*2 +0)] = motion_blur[nTile][0];
dbg_mb_img[nscene][tx + tilesX * (ty*2 +1)] = motion_blur[nTile][1];
}
boolean show_corrected = false;
if (nscene == debug_scene) {
System.out.println("nscene = "+nscene);
System.out.println("nscene = "+nscene);
dbg_mb_img[nscene][tx + tilesX * (ty*2 +0)] = mb_tau * motion_blur[0][nTile];
dbg_mb_img[nscene][tx + tilesX * (ty*2 +1)] = mb_tau * motion_blur[1][nTile];
}
while (show_corrected) {
ImagePlus imp_mbc = QuadCLT.renderGPUFromDSI(
......@@ -13667,21 +13754,24 @@ public double[][] correlateIntersceneDebug( // only uses GPU and quad
* @param camera_atr camera azimuth, tilt, roll relative to the reference
* @param camera_xyz_dt camera linear velocities: x', y', z'
* @param camera_atr_dt camera angular velocities: azimuth', tilt', roll'
* @param shrink_gaps < 0 fill all gaps, 0 - do not fill gaps, >0 expand using growTiles, do not fill farther.
* @param debug_level debug level
* @return per-tile array of {dx/dt, dy/dt} vectors, some may be null
* @return per-tile array of [2][tiles] of dx/dt, dy/dt, some may be NaN
*/
public double [][] getMotionBlur(
public static double [][] getMotionBlur(
QuadCLT ref_scene,
QuadCLT scene, // can be the same as ref_scene
double [][] ref_pXpYD,
double [][] ref_pXpYD, // tilesX * tilesY
double [] camera_xyz,
double [] camera_atr,
double [] camera_xyz_dt,
double [] camera_atr_dt,
// boolean fill_gaps,
int shrink_gaps, // will gaps, but not more that grow by this
int debug_level)
{
int num_passes = 100;
double max_diff = 1E-4;
boolean[] param_select = new boolean[ErsCorrection.DP_NUM_PARS];
final int [] par_indices = new int[] {
ErsCorrection.DP_DSAZ,
......@@ -13696,10 +13786,12 @@ public double[][] correlateIntersceneDebug( // only uses GPU and quad
final double [] camera_dt = new double[] {
camera_atr_dt[0], camera_atr_dt[1], camera_atr_dt[2],
camera_xyz_dt[0], camera_xyz_dt[1], camera_xyz_dt[2]};
final double [][] mb_vectors = new double [ref_pXpYD.length][];
final double [][] mb_vectors = new double [2][ref_pXpYD.length];
Arrays.fill(mb_vectors[0], Double.NaN);
Arrays.fill(mb_vectors[1], Double.NaN);
final int tilesX = ref_scene.tp.getTilesX();
// final int tilesY = ref_scene.tp.getTilesY();
IntersceneLma intersceneLma = new IntersceneLma(
this, // OpticalFlow opticalFlow
false); // clt_parameters.ilp.ilma_thread_invariant);
intersceneLma.prepareLMA(
camera_xyz, // final double [] scene_xyz0, // camera center in world coordinates (or null to use instance)
......@@ -13713,28 +13805,170 @@ public double[][] correlateIntersceneDebug( // only uses GPU and quad
false, // boolean first_run,
debug_level); // final int debug_level)
final double [][] last_jt = intersceneLma. getLastJT(); // alternating x,y for each selected parameters
final Thread[] threads = ImageDtt.newThreadArray(threadsMax);
int [] sensor_wh = ref_scene.getGeometryCorrection().getSensorWH();
final double width = sensor_wh[0];
final double height = sensor_wh[1];
final double min_disparity = -0.5;
final double max_disparity = 100.0;
final Thread[] threads = ImageDtt.newThreadArray(QuadCLT.THREADS_MAX);
final AtomicInteger ai = new AtomicInteger(0);
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
public void run() {
for (int nTile = ai.getAndIncrement(); nTile < ref_pXpYD.length; nTile = ai.getAndIncrement()) if (ref_pXpYD[nTile] != null){
mb_vectors[nTile]= new double[2];
for (int i = 0; i < par_indices.length; i++) {
mb_vectors[nTile][0] += camera_dt[i] * last_jt[i][2*nTile + 0];
mb_vectors[nTile][1] += camera_dt[i] * last_jt[i][2*nTile + 1];
if ( (ref_pXpYD[nTile][0] < 0) || (ref_pXpYD[nTile][0] >= width) ||
(ref_pXpYD[nTile][1] < 0) || (ref_pXpYD[nTile][1] >= height) ||
(ref_pXpYD[nTile][2] < min_disparity) || (ref_pXpYD[nTile][2] >= max_disparity)) {
continue;
}
if (Double.isNaN(mb_vectors[nTile][0]) || Double.isNaN(mb_vectors[nTile][1])) {
mb_vectors[nTile] = null;
mb_vectors[0][nTile] = 0.0;
mb_vectors[1][nTile] = 0.0;
for (int i = 0; i < par_indices.length; i++) {
mb_vectors[0][nTile] += camera_dt[i] * last_jt[i][2*nTile + 0];
mb_vectors[1][nTile] += camera_dt[i] * last_jt[i][2*nTile + 1];
}
}
}
};
}
ImageDtt.startAndJoin(threads);
if (shrink_gaps != 0) {
for (int dim = 0; dim < mb_vectors.length; dim++) {
mb_vectors[dim] = fillGapsDouble(
mb_vectors[dim], // double [] data,
null, // boolean [] mask_in, // do not process if false (may be null)
tilesX, // int width,
(shrink_gaps > 0) ? shrink_gaps: 0, // int max_grow,
num_passes, // int num_passes,
max_diff, // double max_diff,
QuadCLT.THREADS_MAX, // int threadsMax,
debug_level); // int debug_level)
}
}
return mb_vectors;
}
public static double[] fillGapsDouble(
double [] data,
boolean [] mask_in, // do not process if false (may be null)
int width,
int max_grow,
int num_passes,
double max_diff,
int threadsMax,
int debug_level)
{
final double max_diff2 = max_diff * max_diff;
final double diagonal_weight = 0.5 * Math.sqrt(2.0); // relative to ortho
double wdiag = 0.25 *diagonal_weight / (diagonal_weight + 1.0);
double wortho = 0.25 / (diagonal_weight + 1.0);
final double [] neibw = {wortho, wdiag, wortho, wdiag, wortho, wdiag, wortho, wdiag};
final int tiles = data.length;
final int height = tiles/width;
final double [] data_in = data.clone();
final double [] data_out = data.clone();
final boolean [] mask = (mask_in==null) ? new boolean[tiles]: mask_in.clone();
if (mask_in == null) {
if (max_grow == 0) {
Arrays.fill(mask, true);
} else {
for (int i = 0; i < tiles; i++) {
mask[i] = !Double.isNaN(data[i]);
}
TileProcessor.growTiles(
max_grow, // grow, // grow tile selection by 1 over non-background tiles 1: 4 directions, 2 - 8 directions, 3 - 8 by 1, 4 by 1 more
mask, // tiles,
null, // prohibit,
width,
height);
}
}
final TileNeibs tn = new TileNeibs(width, height);
final int [] tile_indices = new int [tiles];
final boolean [] floating = new boolean[tiles]; // which tiles will change
final Thread[] threads = ImageDtt.newThreadArray(threadsMax);
final AtomicInteger ai = new AtomicInteger(0);
final AtomicInteger anum_gaps = new AtomicInteger(0);
final int dbg_tile = -3379;
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
public void run() {
for (int nTile = ai.getAndIncrement(); nTile < tiles; nTile = ai.getAndIncrement()) {
if (mask[nTile] && Double.isNaN(data[nTile])){
int indx = anum_gaps.getAndIncrement();
tile_indices[indx] = nTile;
floating[nTile] = true;
}
}
}
};
}
ImageDtt.startAndJoin(threads);
ai.set(0);
final int num_gaps = anum_gaps.get();
if (num_gaps == 0) {
return data_in; // no gaps already
}
final boolean [] fill_all = {false};
DoubleAccumulator amax_diff = new DoubleAccumulator (Double::max, Double.NEGATIVE_INFINITY);
for (int npass = 0; npass < num_passes; npass+= fill_all[0]? 1:0 ) { // do not limit initial passes
anum_gaps.set(0);
amax_diff.reset();
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
public void run() {
for (int indx = ai.getAndIncrement(); indx < num_gaps; indx = ai.getAndIncrement()) {
int nTile = tile_indices[indx];
if ((debug_level >0) && (nTile == dbg_tile)) {
System.out.println("fillDisparityStrength() nTile="+nTile);
}
if (!fill_all[0] && !Double.isNaN(data_in[nTile])) {
continue; // fill only new
}
double swd = 0.0, sw = 0.0;
for (int dir = 0; dir < 8; dir++) {
int nt_neib = tn.getNeibIndex(nTile, dir);
if ((nt_neib >= 0) && !Double.isNaN(data_in[nt_neib])) {
sw += neibw[dir];
swd += neibw[dir] * data_in[nt_neib];
}
}
if (sw > 0) {
double new_val = swd/sw;
double d = new_val - data_in[nTile];
double d2 = d * d;
amax_diff.accumulate(d2);
data_out[nTile] = new_val;
} else {
anum_gaps.getAndIncrement();
}
}
}
};
}
ImageDtt.startAndJoin(threads);
ai.set(0);
System.arraycopy(data_out, 0, data_in, 0, tiles);
if ((debug_level > 0) && fill_all[0]) {
System.out.println("fillGapsDouble() num_gaps="+num_gaps+", npass="+npass+", change="+Math.sqrt(amax_diff.get())+" ("+max_diff+")");
}
if (fill_all[0] && (amax_diff.get() < max_diff2)) {
break; // all done
}
if (anum_gaps.get() == 0) { // no new tiles filled
fill_all[0] = true;
}
if ((debug_level>0) && (npass == (num_passes-1))){
System.out.println("fillGapsDouble() LAST PASS ! npass="+npass+", change="+Math.sqrt(amax_diff.get())+" ("+max_diff+")");
System.out.println("fillGapsDouble() LAST PASS ! npass="+npass+", change="+Math.sqrt(amax_diff.get())+" ("+max_diff+")");
System.out.println("fillGapsDouble() LAST PASS ! npass="+npass+", change="+Math.sqrt(amax_diff.get())+" ("+max_diff+")");
}
} // for (int npass = 0; npass < num_passes; npass+= fill_all[0]? 1:0 )
return data_out;
}
public double[][] adjustPairsLMAInterscene(
CLTParameters clt_parameters,
......@@ -13754,7 +13988,6 @@ public double[][] correlateIntersceneDebug( // only uses GPU and quad
int sensor_mask_inter = clt_parameters.imp.sensor_mask_inter ; //-1;
float [][][] facc_2d_img = new float [1][][];
IntersceneLma intersceneLma = new IntersceneLma(
this, // OpticalFlow opticalFlow
clt_parameters.ilp.ilma_thread_invariant);
int lmaResult = -1;
boolean last_run = false;
......@@ -14029,7 +14262,6 @@ public double[][] correlateIntersceneDebug( // only uses GPU and quad
iscale); // int iscale) // 8
}
IntersceneLma intersceneLma = new IntersceneLma(
this, // OpticalFlow opticalFlow
clt_parameters.ilp.ilma_thread_invariant);
for (int nlma = 0; nlma < clt_parameters.ilp.ilma_num_corr; nlma++) {
boolean last_run = nlma == ( clt_parameters.ilp.ilma_num_corr - 1);
......@@ -14102,9 +14334,6 @@ public double[][] correlateIntersceneDebug( // only uses GPU and quad
macroTilesX); // int width)
}
// IntersceneLma intersceneLma = new IntersceneLma(
// this); // OpticalFlow opticalFlow
intersceneLma.prepareLMA(
camera_xyz0, // final double [] scene_xyz0, // camera center in world coordinates (or null to use instance)
camera_atr0, // final double [] scene_atr0, // camera orientation relative to world frame (or null to use instance)
......
......@@ -2706,7 +2706,7 @@ public class QuadCLT extends QuadCLTCPU {
// motion blur compensation
double mb_tau, // 0.008; // time constant, sec
double mb_max_gain, // 5.0; // motion blur maximal gain (if more - move second point more than a pixel
double [][] mb_vectors, //
double [][] mb_vectors, // now [2][ntiles];
final double [] scene_xyz, // camera center in world coordinates
final double [] scene_atr, // camera orientation relative to world frame
......@@ -2740,14 +2740,14 @@ public class QuadCLT extends QuadCLTCPU {
for (int i = 0; i < dbg_img.length; i++) {
Arrays.fill(dbg_img[i], Double.NaN);
}
for (int nTile = 0; nTile < pXpYD.length; nTile++) if (pXpYD[nTile] != null){
for (int nTile = 0; nTile < pXpYD.length; nTile++){
if (pXpYD[nTile] != null) {
for (int i = 0; i < pXpYD[nTile].length; i++) {
dbg_img[i][nTile] = pXpYD[nTile][i];
}
if (mb_vectors[nTile]!=null) {
for (int i = 0; i <2; i++) {
dbg_img[3 + i][nTile] = mb_tau * mb_vectors[nTile][i];
}
for (int i = 0; i <2; i++) {
dbg_img[3 + i][nTile] = mb_tau * mb_vectors[i][nTile];
}
}
(new ShowDoubleFloatArrays()).showArrays( // out of boundary 15
......@@ -2804,8 +2804,8 @@ public class QuadCLT extends QuadCLTCPU {
full_woi_in.width * GPUTileProcessor.DTT_SIZE,
full_woi_in.height * GPUTileProcessor.DTT_SIZE};
int erase_clt = show_nan ? 1:0;
boolean test1 = true;
if ((mb_vectors!=null) && test1) {
// boolean test1 = true;
if (mb_vectors!=null) {// && test1) {
image_dtt.setReferenceTDMotionBlur( // change to main?
erase_clt, //final int erase_clt,
wh, // null, // final int [] wh, // null (use sensor dimensions) or pair {width, height} in pixels
......
......@@ -862,6 +862,7 @@ __device__ void convertCorrectTile(
const float centerX,
const float centerY,
const int txy,
const float tscale,
const size_t dstride, // in floats (pixels)
float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float * clt_kernels, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
......@@ -3118,7 +3119,7 @@ __global__ void convert_correct_tiles(
int thread0 = threadIdx.x & 1; // 0,1
int thread12 = threadIdx.x >>1; // now 0..3 (total number == (DTT_SIZE), will not change
float * tp = tp0 + tp_task_xy_offset + threadIdx.x;
float * tp = tp0 + TP_TASK_XY_OFFSET + threadIdx.x;
if (thread12 < num_cams) {
tt[tile_in_block].xy[thread12][thread0] = *(tp); // gpu_task -> xy[thread12][thread0];
}
......@@ -3135,7 +3136,9 @@ __global__ void convert_correct_tiles(
if (threadIdx.x == 0){ // only one thread calculates, others - wait
tt[tile_in_block].task = *(int *) (tp0++); // get first integer value
tt[tile_in_block].txy = *(int *) (tp0++); // get second integer value
tt[tile_in_block].target_disparity = *(tp0++); //
tt[tile_in_block].target_disparity = *(tp0); //
tp0 +=3; // skip centerXY and previous increment (was tt[tile_in_block].target_disparity = *(tp0++);
tt[tile_in_block].scale = *(tp0++); // get scale to multiply before accumulating/saving
}
// float centerXY[2] is not used/copied here
......@@ -3168,6 +3171,7 @@ __global__ void convert_correct_tiles(
tt[tile_in_block].xy[ncam][0], // const float centerX,
tt[tile_in_block].xy[ncam][1], // const float centerY,
tt[tile_in_block].txy, // const int txy,
tt[tile_in_block].scale, // const float tscale,
dstride, // size_t dstride, // in floats (pixels)
(float * )(clt_tile [tile_in_block]), // float clt_tile [TILES_PER_BLOCK][NUM_CAMS][num_colors][4][DTT_SIZE][DTT_SIZE])
(float * )(clt_kernels[tile_in_block]), // float clt_tile [num_colors][4][DTT_SIZE][DTT_SIZE],
......@@ -4457,6 +4461,7 @@ __device__ void normalizeTileAmplitude(
* @param centerX full X-offset of the tile center, calculated from the geometry, distortions and disparity
* @param centerY full Y-offset of the tile center
* @param txy integer value combining tile X (low 16 bits) and tile Y (high 16 bits)
* @param tscale float value to scale result. 0 - set. >0 scale and set, <0 subtract
* @param dstride stride (in floats) for the input Bayer images
* @param clt_tile image tile in shared memory [4][DTT_SIZE][DTT_SIZE1] (just allocated)
* @param clt_kernels kernel tile in shared memory [4][DTT_SIZE][DTT_SIZE1] (just allocated)
......@@ -4482,6 +4487,7 @@ __device__ void convertCorrectTile(
const float centerX,
const float centerY,
const int txy,
const float tscale,
const size_t dstride, // in floats (pixels)
float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float * clt_kernels, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
......@@ -5078,7 +5084,7 @@ __device__ void convertCorrectTile(
#endif
if (tscale == 0) { // just set w/o scaling
#pragma unroll
for (int j = 0; j < DTT_SIZE * 4; j++){ // all 4 components, 8 rows
// shared memory tiles use DTT_SIZE1
......@@ -5086,10 +5092,24 @@ __device__ void convertCorrectTile(
clt_src += DTT_SIZE1;
clt_dst += DTT_SIZE;
}
} else if (tscale > 0) { // positive - scale and set. For motion blur positive should be first
#pragma unroll
for (int j = 0; j < DTT_SIZE * 4; j++){ // all 4 components, 8 rows
// shared memory tiles use DTT_SIZE1
*clt_dst = *clt_src * tscale;
clt_src += DTT_SIZE1;
clt_dst += DTT_SIZE;
}
} else { // negative - scale and subtract from existing. For motion blur positive should be first
#pragma unroll
for (int j = 0; j < DTT_SIZE * 4; j++){ // all 4 components, 8 rows
// shared memory tiles use DTT_SIZE1
*clt_dst += *clt_src * tscale;
clt_src += DTT_SIZE1;
clt_dst += DTT_SIZE;
}
}
__syncthreads();// __syncwarp();
// just for testing perform imclt, save result to clt_kernels
//#endif
}
......
......@@ -460,11 +460,11 @@ extern "C" __global__ void get_tiles_offsets(
// common code, calculated in parallel
/// int cxy = gpu_tasks[task_num].txy;
/// float disparity = gpu_tasks[task_num].target_disparity;
float disparity = * (gpu_ftasks + task_size * task_num + 2);
float *centerXY = gpu_ftasks + task_size * task_num + tp_task_centerXY_offset;
float disparity = * (gpu_ftasks + task_size * task_num + TP_TASK_DISPARITY_OFFSET);
float *centerXY = gpu_ftasks + task_size * task_num + TP_TASK_CENTERXY_OFFSET;
float px = *(centerXY);
float py = *(centerXY + 1);
int cxy = *(int *) (gpu_ftasks + task_size * task_num + 1);
int cxy = *(int *) (gpu_ftasks + task_size * task_num + TP_TASK_TXY_OFFSET);
int tileX = (cxy & 0xffff);
int tileY = (cxy >> 16);
......@@ -705,7 +705,7 @@ extern "C" __global__ void get_tiles_offsets(
/// gpu_tasks[task_num].disp_dist[ncam][1] = disp_dist[1];
/// gpu_tasks[task_num].disp_dist[ncam][2] = disp_dist[2];
/// gpu_tasks[task_num].disp_dist[ncam][3] = disp_dist[3];
float * disp_dist_p = gpu_ftasks + task_size * task_num + tp_task_xy_offset + num_cams* 2 + ncam * 4; // ncam = threadIdx.x, so each thread will have different offset
float * disp_dist_p = gpu_ftasks + task_size * task_num + TP_TASK_XY_OFFSET + num_cams* 2 + ncam * 4; // ncam = threadIdx.x, so each thread will have different offset
*(disp_dist_p++) = disp_dist[0]; // global memory
*(disp_dist_p++) = disp_dist[1];
*(disp_dist_p++) = disp_dist[2];
......@@ -768,7 +768,7 @@ extern "C" __global__ void get_tiles_offsets(
// gpu_tasks[task_num].xy[ncam][1] = pXY[1];
// float * tile_xy_p = gpu_ftasks + task_size * task_num + 3 + num_cams * 4 + ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
// .xy goes right after 3 commonn (tak, txy and target_disparity
float * tile_xy_p = gpu_ftasks + task_size * task_num + tp_task_xy_offset + ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
float * tile_xy_p = gpu_ftasks + task_size * task_num + TP_TASK_XY_OFFSET + ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
*(tile_xy_p++) = pXY[0]; // global memory
*(tile_xy_p++) = pXY[1]; // global memory
}
......
......@@ -64,13 +64,19 @@ struct tp_task {
float target_disparity;
float centerXY[2]; // "ideal" centerX, centerY to use instead of the uniform tile centers (txy) for interscene accumulation
// if isnan(centerXY[0]), then txy is used to calculate centerXY and all xy
float xy[NUM_CAMS][2];
// scale == 0 - old way, just set. Scale !=0 - accumulate. Or make > 0 - set too? only negative - subtract?
float scale; // multiply during direct conversion before accumulating in TD - used for motion blur correction
float xy [NUM_CAMS][2];
float disp_dist[NUM_CAMS][4]; // calculated with getPortsCoordinates()
};
#define get_task_size(x) (sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - x))
#define tp_task_xy_offset 5
#define tp_task_centerXY_offset 3
#define TP_TASK_TASK_OFFSET 0
#define TP_TASK_TXY_OFFSET 1
#define TP_TASK_DISPARITY_OFFSET 2
#define TP_TASK_CENTERXY_OFFSET 3
#define TP_TASK_SCALE_OFFSET 5
#define TP_TASK_XY_OFFSET 6
struct corr_vector{
float tilt [NUM_CAMS-1]; // 0..2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment