Commit 1e2d8ada authored by Andrey Filippov's avatar Andrey Filippov

First run of TD neighbors

parent 8623329f
...@@ -126,6 +126,7 @@ public class GpuQuad{ // quad camera description ...@@ -126,6 +126,7 @@ public class GpuQuad{ // quad camera description
private int sensor_mask_inter = -1; private int sensor_mask_inter = -1;
private int [] corr_mask_indices = null; private int [] corr_mask_indices = null;
private int [] sensor_mask_inter_indices = null; private int [] sensor_mask_inter_indices = null;
private int [] sensor_mask_inter_indices_neibs = null;
public boolean [] getCorrMask() { public boolean [] getCorrMask() {
...@@ -513,13 +514,17 @@ public class GpuQuad{ // quad camera description ...@@ -513,13 +514,17 @@ public class GpuQuad{ // quad camera description
public void setSensorMaskInter(int mask) { public void setSensorMaskInter(int mask) {
sensor_mask_inter = mask & ((1 << num_cams) -1); sensor_mask_inter = mask & ((1 << num_cams) -1);
sensor_mask_inter_indices = new int [getNumCamsInter()+1]; sensor_mask_inter_indices = new int [getNumCamsInter()+1];
sensor_mask_inter_indices_neibs = new int [getNumCamsInter()+2];
int indx = 0; int indx = 0;
for (int i = 0; i < num_cams; i++) { for (int i = 0; i < num_cams; i++) {
if ((sensor_mask_inter & (1 << (i & 31))) != 0) { if ((sensor_mask_inter & (1 << (i & 31))) != 0) {
sensor_mask_inter_indices[indx++] = i; sensor_mask_inter_indices[indx++] = i;
} }
} }
sensor_mask_inter_indices[indx++] = 0xff; // sum sensor_mask_inter_indices[indx] = 0xff; // sum
sensor_mask_inter_indices_neibs[indx++] = 0xff; // sum
sensor_mask_inter_indices_neibs[indx++] = 0xfe; // sumof neibs
} }
public int getNumCamsInter() { public int getNumCamsInter() {
return Integer.bitCount(sensor_mask_inter); return Integer.bitCount(sensor_mask_inter);
...@@ -530,7 +535,24 @@ public class GpuQuad{ // quad camera description ...@@ -530,7 +535,24 @@ public class GpuQuad{ // quad camera description
public int [] getSensInter() { public int [] getSensInter() {
return sensor_mask_inter_indices; return sensor_mask_inter_indices;
} }
/*
public int getSensInterNeib(int indx) { // not used
return sensor_mask_inter_indices_neibs[indx];
}
*/
public int [] getSensInterNeib() {
return sensor_mask_inter_indices_neibs;
}
public int [] getSensInterNeib(boolean full) {
return full? sensor_mask_inter_indices_neibs : (new int [] {
sensor_mask_inter_indices_neibs[sensor_mask_inter_indices_neibs.length -2],
sensor_mask_inter_indices_neibs[sensor_mask_inter_indices_neibs.length -1]});
}
/*
public int getSensInterNeib(int indx, boolean full) {
return full? sensor_mask_inter_indices_neibs[indx] : getSensInterNeib(false)[indx];
}
*/
public void setGeometryCorrection() { // will reset geometry_correction_set when running GPU kernel public void setGeometryCorrection() { // will reset geometry_correction_set when running GPU kernel
// if (geometry_correction_set) return; // if (geometry_correction_set) return;
...@@ -3029,6 +3051,37 @@ public class GpuQuad{ // quad camera description ...@@ -3029,6 +3051,37 @@ public class GpuQuad{ // quad camera description
} }
public float [][][] getCorrTilesLayerTD(
int [] indices,
float [] fdata,
boolean inter,
int pair) {
int tilesX = img_width / GPUTileProcessor.DTT_SIZE;
int tilesY = img_height / GPUTileProcessor.DTT_SIZE;
int num_pairs = inter? (getNumCamsInter() + 1 + 0): getNumUsedPairs();// Number of used pairs num_pairs = num_pairs_in;
final int corr_size_td = 4 * GPUTileProcessor.DTT_SIZE * GPUTileProcessor.DTT_SIZE;
// int [] indices = getCorrIndices(); // also sets num_corr_tiles
// float [] fdata = getCorrTdData();
int num_tiles = num_corr_tiles / num_pairs;
float [][][] corr_tiles = new float [tilesY][tilesX][];
for (int nt = 0; nt < num_tiles; nt++ ) {
int nTile = (indices[nt * num_pairs] >> GPUTileProcessor.CORR_NTILE_SHIFT);
int ty = nTile / tilesX;
int tx = nTile % tilesX;
corr_tiles[ty][tx] = new float [corr_size_td];
System.arraycopy(
fdata,
(nt * num_pairs + pair) * corr_size_td,
corr_tiles[ty][tx],
0,
corr_size_td);
}
return corr_tiles;
}
public int [] setCorrTilesComboTd( // not used? public int [] setCorrTilesComboTd( // not used?
final float [][][] corr_tiles, // [tileY][tileX][4*64] final float [][][] corr_tiles, // [tileY][tileX][4*64]
......
...@@ -1326,7 +1326,7 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1326,7 +1326,7 @@ public class ImageDtt extends ImageDttCPU {
gpuQuad.execConvertDirect(use_reference_buffer, wh, -1); // erase_clt); // put results into a "reference" buffer gpuQuad.execConvertDirect(use_reference_buffer, wh, -1); // erase_clt); // put results into a "reference" buffer
} }
private float [] prepNeibCorr( private float [] prepNeibCorr0(
int [][] corr_indices_outp, // should be [1][] int [][] corr_indices_outp, // should be [1][]
double [] neib_weights_od, // {orhto, diag} double [] neib_weights_od, // {orhto, diag}
int [] map_corr_indices_in, int [] map_corr_indices_in,
...@@ -1406,10 +1406,121 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1406,10 +1406,121 @@ public class ImageDtt extends ImageDttCPU {
} }
startAndJoin(threads); startAndJoin(threads);
ai.set(0); ai.set(0);
return fcorr_data_out;
}
// TODO: verify there is enough room for longer corr indices/data in GPU memory - yes, it accommodate NUM+PAIRS (120)
private int [] prepNeibCorr(
final boolean use_partial, // find motion vectors for individual pairs, false - for sum only
double [] neib_weights_od, // {orhto, diag}
int [] map_corr_indices_in,
final int debug_tileX,
final int debug_tileY,
final int globalDebugLevel)
{
final int corr_size_td = 4 * GPUTileProcessor.DTT_SIZE * GPUTileProcessor.DTT_SIZE;
final int [] corr_indices_in = gpuQuad.getCorrIndices(); // also sets num_corr_tiles FIXME: update num_corr_tiles?
final float [] fdata_in = gpuQuad.getCorrTdData(); // may be optimized to skip individual channels
final int [] used_sensors_list = gpuQuad.getSensInter(); // last is 0xff - sum of channels
final int [] used_sensors_list_neib = gpuQuad.getSensInterNeib(); // last are 0xff (sum of channels), 0xfe (sum of neibs)
final int num_tiles = corr_indices_in.length / used_sensors_list.length; // number of correlated tiles (not in tp_tasks)
final int num_corr_slices = use_partial ? used_sensors_list_neib.length : 2; // sum and sum_neibs 18 or 2
final int start_in = used_sensors_list_neib.length - num_corr_slices; // 18-18 or 18-2
return fcorr_data_out; final float [] fcorr_data_out = new float [corr_size_td * num_tiles * num_corr_slices]; // combined length
final int [] corr_indices_neib = new int [num_tiles * num_corr_slices];
if (map_corr_indices_in == null) {
map_corr_indices_in = getMapCorr(corr_indices_in);
}
final int [] map_corr_indices = map_corr_indices_in;
final float [][][] fcorr_data_sum = gpuQuad.getCorrTilesLayerTD(
corr_indices_in, // int [] indices,
fdata_in, // float [] fdata,
true,
used_sensors_list.length-1); // last is sum
final float [] weights = {
(float) neib_weights_od[0], (float) neib_weights_od[1],
(float) neib_weights_od[0], (float) neib_weights_od[1],
(float) neib_weights_od[0], (float) neib_weights_od[1],
(float) neib_weights_od[0], (float) neib_weights_od[1]};
final int tilesX= gpuQuad.getTilesX(); // width/transform_size;
final int tilesY= gpuQuad.getTilesY(); // final int tilesY=height/transform_size;
final Thread[] threads = newThreadArray(THREADS_MAX);
final AtomicInteger ai = new AtomicInteger(0);
// create indices for neighbors
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
@Override
public void run() {
int tileY,tileX,nTile; // , chn;
TileNeibs tn = new TileNeibs(tilesX,tilesY);
for (int iCorrTile = ai.getAndIncrement(); iCorrTile < num_tiles; iCorrTile = ai.getAndIncrement()) {
nTile = (corr_indices_in[iCorrTile* used_sensors_list.length] >> GPUTileProcessor.CORR_NTILE_SHIFT);
tileY = nTile / tilesX;
tileX = nTile % tilesX;
// corr_indices_neib[iCorrTile] = corr_indices_in[(iCorrTile + 1) * used_sensors_list.length - 1];
boolean debugTile0 = (tileX == debug_tileX) && (tileY == debug_tileY) && (globalDebugLevel > 2); // 0);
if (debugTile0) {
System.out.println("clt_process_tl_correlations(): tileX="+tileX+", tileY="+tileY+", nTile="+nTile+", nTile="+nTile);
}
// copy all previous data
System.arraycopy(
fdata_in,
(iCorrTile * used_sensors_list.length + start_in) * corr_size_td,
fcorr_data_out,
iCorrTile * num_corr_slices * corr_size_td,
(num_corr_slices - 1) * corr_size_td); // 1 or 17
System.arraycopy(
corr_indices_in,
(iCorrTile * used_sensors_list.length + start_in),
corr_indices_neib,
iCorrTile * num_corr_slices,
num_corr_slices - 1); // 1 or 17
int out_offset = ((iCorrTile + 1) * num_corr_slices -1) * corr_size_td;
System.arraycopy(
fcorr_data_sum[tileY][tileX],
0,
fcorr_data_out,
out_offset, // corr_size_td * iCorrTile,
corr_size_td);
corr_indices_neib[(iCorrTile +1) * num_corr_slices -1] = (nTile << GPUTileProcessor.CORR_NTILE_SHIFT) | 0xfe; // sum of neibs
float sw = 1.0f;
for (int dir = 0; dir < tn.numNeibs(); dir++) {
int nTile1 = tn.getNeibIndex(nTile, dir);
if ((nTile1 >=0) && (map_corr_indices[nTile1] >=0)) {
float w = weights[dir];
sw += w;
float [] fcorr_data_neib = fcorr_data_sum[tn.getY(nTile1)][tn.getX(nTile1)];
int indx = out_offset; // corr_size_td * iCorrTile;
for (int i = 0; i < corr_size_td; i++) {
fcorr_data_out[indx++] += w * fcorr_data_neib[i];
}
} }
}
float s = 1.0f/sw;
int indx0 = out_offset; // corr_size_td * iCorrTile;
int indx1 = indx0+corr_size_td;
for (int i = indx0; i < indx1; i++) {
fcorr_data_out[i] *= s;
}
}
}
};
}
startAndJoin(threads);
ai.set(0);
// set GPU memory
gpuQuad.setCorrIndicesTdData(
num_tiles * num_corr_slices, // int num_tiles, // corr_indices, fdata may be longer than needed
corr_indices_neib, // int [] corr_indices,
fcorr_data_out); // float [] fdata)
return corr_indices_neib;
}
private int[] getMapCorr( private int[] getMapCorr(
int [] corr_indices) { int [] corr_indices) {
final int tilesX= gpuQuad.getTilesX(); // width/transform_size; final int tilesX= gpuQuad.getTilesX(); // width/transform_size;
...@@ -1489,7 +1600,7 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1489,7 +1600,7 @@ public class ImageDtt extends ImageDttCPU {
return null; return null;
} }
final boolean extra_sum = true; // use sum of pixel-domain correlations (TD have artifacts for low contrast final boolean extra_sum = true; // use sum of pixel-domain correlations (TD have artifacts for low contrast
// - maybe -related to float vs. double - not tested yet // - maybe -related to float vs. double - not tested yet . Probably - still FPN with low offset
final int tilesX= gpuQuad.getTilesX(); // width/transform_size; final int tilesX= gpuQuad.getTilesX(); // width/transform_size;
final int tilesY= gpuQuad.getTilesY(); // final int tilesY=height/transform_size; final int tilesY= gpuQuad.getTilesY(); // final int tilesY=height/transform_size;
final double [][][] coord_motion = new double [(pXpYD != null)?2:1][tilesX * tilesY][]; final double [][][] coord_motion = new double [(pXpYD != null)?2:1][tilesX * tilesY][];
...@@ -1526,19 +1637,23 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1526,19 +1637,23 @@ public class ImageDtt extends ImageDttCPU {
} }
} }
// corr_indices has TD sum slot // corr_indices has TD sum slot
final int num_tiles = corr_indices.length / gpuQuad.getSensInter().length; // number of tiles, regardless of correlation slices
//getSensInterNeib(boolean full)
final int [] map_corr_indices = getMapCorr(corr_indices); final int [] map_corr_indices = getMapCorr(corr_indices);
int [][] corr_neibs_indx = new int [1][];
float [] corr_neibs_td = null;
double [] neib_weights_od = {0.7, 0.5}; double [] neib_weights_od = {0.7, 0.5};
final boolean use_full = use_partial || (dcorr_tiles != null) || !use_neibs; // old version always correlated all sensors
final int [] used_sensors_list = use_neibs ? gpuQuad.getSensInterNeib(use_full) : gpuQuad.getSensInter(); // last is 0xff - sum of channels
if (use_neibs) { if (use_neibs) {
corr_neibs_td = prepNeibCorr( corr_indices = prepNeibCorr( // updates GPU memory to run a single execCorr2D_normalize
corr_neibs_indx, // int [][] corr_indices_outp, // should be [1][] use_full, // final boolean use_partial, // find motion vectors for individual pairs, false - for sum only
neib_weights_od, // double [] neib_weights_od, // {orhto, diag} neib_weights_od, // double [] neib_weights_od, // {orhto, diag}
map_corr_indices, // int [] map_corr_indices_in, map_corr_indices, // int [] map_corr_indices_in,
debug_tileX, // final int debug_tileX, debug_tileX, // final int debug_tileX,
debug_tileY, // final int debug_tileY, debug_tileY, // final int debug_tileY,
globalDebugLevel); // final int globalDebugLevel) globalDebugLevel); // final int globalDebugLevel)
} }
// final int num_used_slices = corr_indices.length / num_tiles;
int dbg_imax = 0; int dbg_imax = 0;
for (int ii = 1; ii < corr_indices.length; ii++) { for (int ii = 1; ii < corr_indices.length; ii++) {
...@@ -1551,6 +1666,7 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1551,6 +1666,7 @@ public class ImageDtt extends ImageDttCPU {
return null; return null;
} }
float [] fcorr_weights = ((num_acc != null) || (dcorr_weight != null))? pfcorr_weights[0] : null; float [] fcorr_weights = ((num_acc != null) || (dcorr_weight != null))? pfcorr_weights[0] : null;
gpuQuad.execCorr2D_normalize( gpuQuad.execCorr2D_normalize(
false, // boolean combo, // normalize combo correlations (false - per-pair ones) false, // boolean combo, // normalize combo correlations (false - per-pair ones)
gpu_fat_zero, // double fat_zero); gpu_fat_zero, // double fat_zero);
...@@ -1560,35 +1676,17 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1560,35 +1676,17 @@ public class ImageDtt extends ImageDttCPU {
final float [][] fcorr2D = gpuQuad.getCorr2D(gpu_corr_rad); // int corr_rad); final float [][] fcorr2D = gpuQuad.getCorr2D(gpu_corr_rad); // int corr_rad);
final int corr_length = fcorr2D[0].length;// all correlation tiles have the same size final int corr_length = fcorr2D[0].length;// all correlation tiles have the same size
// final int num_tiles = corr_indices.length / gpuQuad.getSensInter().length; // number of tiles, regardless of correlation slices
final int [] used_sensors_list = gpuQuad.getSensInter(); // last is 0xff - sum of channels // currently execCorr2D_normalize() output has 17 slices for old variant (no neibs) and 18/2 if (use_neibs)
final int extra_len = extra_sum? 1 : 0; final int extra_len = extra_sum? 1 : 0;
final int corrs_len = (use_partial?used_sensors_list.length:1); // without optional extra_len but including GPU sum // final int corrs_len = ((use_partial || use_neibs) ? used_sensors_list.length:1); // without optional extra_len but including GPU sum
final int corrs_len = (use_neibs || use_partial) ? used_sensors_list.length:1; // without optional extra_len but including GPU sum
final int indx_sum_pd = (extra_len > 0) ? corrs_len : -1;
final int num_tiles = corr_indices.length / used_sensors_list.length; // number of correlated tiles (not in tp_tasks) final int indx_sum_td = use_neibs ? (corrs_len -2): (corrs_len -1);
final int indx_sum_td_neib = use_neibs ? (corrs_len -1): -1;
// now load GPU with neib-averaged TD data and calculate PD 2D correlations (single-layer)
if (use_neibs) {
gpuQuad.setCorrIndicesTdData(
corr_neibs_indx[0].length, // int num_tiles, // corr_indices, fdata may be longer than needed
corr_neibs_indx[0], // int [] corr_indices,
corr_neibs_td); // float [] fdata)
gpuQuad.execCorr2D_normalize(
false, // boolean combo, // normalize combo correlations (false - per-pair ones)
gpu_fat_zero, // double fat_zero);
null, // fcorr_weights, // float [] fcorr_weights, // null or one per correlation tile (num_corr_tiles) to divide fat zero2
gpu_corr_rad); // int corr_radius
}
final float [][] fcorr2Dneibs = use_neibs ? gpuQuad.getCorr2D(gpu_corr_rad) : null; // int corr_rad);
// Add (and init by caller) if needed, so far static is enough
// if (correlation2d == null) {
// throw new IllegalArgumentException ("clt_process_tl_correlations(): correlation2d == null!");
// }
//num_used_slices
final double [][] corr_wnd = Corr2dLMA.getCorrWnd( final double [][] corr_wnd = Corr2dLMA.getCorrWnd(
transform_size, transform_size,
imgdtt_params.lma_wnd); imgdtt_params.lma_wnd);
...@@ -1601,6 +1699,10 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1601,6 +1699,10 @@ public class ImageDtt extends ImageDttCPU {
} }
} }
} }
final int [] fcorr_indices = corr_indices;
final int [] fpn_indices = use_neibs?
( new int [] {used_sensors_list[used_sensors_list.length-2],used_sensors_list[used_sensors_list.length-1]}) :
(new int [] {used_sensors_list[used_sensors_list.length-1]});
final Thread[] threads = newThreadArray(threadsMax); final Thread[] threads = newThreadArray(threadsMax);
final AtomicInteger ai = new AtomicInteger(0); final AtomicInteger ai = new AtomicInteger(0);
...@@ -1612,7 +1714,7 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1612,7 +1714,7 @@ public class ImageDtt extends ImageDttCPU {
int tileY,tileX,nTile; // , chn; int tileY,tileX,nTile; // , chn;
TileNeibs tn = new TileNeibs(tilesX,tilesY); TileNeibs tn = new TileNeibs(tilesX,tilesY);
for (int iCorrTile = ai.getAndIncrement(); iCorrTile < num_tiles; iCorrTile = ai.getAndIncrement()) { for (int iCorrTile = ai.getAndIncrement(); iCorrTile < num_tiles; iCorrTile = ai.getAndIncrement()) {
nTile = (corr_indices[iCorrTile* used_sensors_list.length] >> GPUTileProcessor.CORR_NTILE_SHIFT); nTile = (fcorr_indices[iCorrTile* used_sensors_list.length] >> GPUTileProcessor.CORR_NTILE_SHIFT);
tileY = nTile / tilesX; tileY = nTile / tilesX;
tileX = nTile % tilesX; tileX = nTile % tilesX;
boolean debugTile0 =(tileX == debug_tileX) && (tileY == debug_tileY) && (globalDebugLevel > 2); // 0); boolean debugTile0 =(tileX == debug_tileX) && (tileY == debug_tileY) && (globalDebugLevel > 2); // 0);
...@@ -1631,12 +1733,11 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1631,12 +1733,11 @@ public class ImageDtt extends ImageDttCPU {
int max_x = (int) Math.min(Math.round(fpn_x + fpn_radius), corr_size-1); int max_x = (int) Math.min(Math.round(fpn_x + fpn_radius), corr_size-1);
int min_y = (int) Math.max(Math.round(fpn_y - fpn_radius),0); int min_y = (int) Math.max(Math.round(fpn_y - fpn_radius),0);
int max_y = (int) Math.min(Math.round(fpn_y + fpn_radius), corr_size-1); int max_y = (int) Math.min(Math.round(fpn_y + fpn_radius), corr_size-1);
int fcorr2D_indx = (iCorrTile + 1)* used_sensors_list.length -1; // last in each group - sum in TD // int fcorr2D_indx = (iCorrTile + 1)* used_sensors_list.length -1; // last in each group - sum in TD
fpn_mask = new boolean[fcorr2D[fcorr2D_indx].length]; fpn_mask = new boolean[fcorr2D[0].length]; // fcorr2D_indx].length];
for (int iy = min_y; iy <= max_y; iy++) { for (int iy = min_y; iy <= max_y; iy++) {
for (int ix = min_x; ix <= max_x; ix++) { for (int ix = min_x; ix <= max_x; ix++) {
int indx = iy * corr_size + ix; int indx = iy * corr_size + ix;
// fcorr2D[fcorr2D_indx][indx] = 0;
fpn_mask[indx] = true; fpn_mask[indx] = true;
} }
} }
...@@ -1644,28 +1745,42 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1644,28 +1745,42 @@ public class ImageDtt extends ImageDttCPU {
min_str_sum = min_str_sum_fpn; min_str_sum = min_str_sum_fpn;
is_fpn = true; is_fpn = true;
} }
double [][] corrs = new double [corrs_len + extra_len][];
double [][] corrs = new double [corrs_len + extra_len][]; // 1/17/2/18 +(0/1)
// copy correlation tiles from the GPU's floating point arrays // copy correlation tiles from the GPU's floating point arrays
double scale = 1.0/getNumSensors(); double scale = 1.0/getNumSensors();
if (extra_sum) { if (extra_sum) {
corrs[corrs_len] = new double [corr_length]; corrs[corrs_len] = new double [corr_length];
} }
for (int isens = corrs_len - 1; isens >= 0; isens--) { // copy all preserved, calculate sum of individual sensors correlations?
int nsens = used_sensors_list.length - corrs_len + isens; // !use_neibs - all slices with individual, corrs_len - may be only combo (1) or all 17
// use_neibs - used_sensors_list.length == corrs_len
for (int isens = corrs_len - 1; isens >= 0; isens--) { // 16..0, 0..0, 17..0, 1..0
int nsens = used_sensors_list.length - corrs_len + isens; // 16..0, 16..16, 17..0, 1..0
corrs[isens] = new double[corr_length]; corrs[isens] = new double[corr_length];
int fcorr2D_indx = iCorrTile * used_sensors_list.length + nsens; int fcorr2D_indx = iCorrTile * used_sensors_list.length + nsens;
// convert to double and scale - all slices used
for (int i = 0; i < corr_length; i++) { for (int i = 0; i < corr_length; i++) {
corrs[isens][i] = gpu_corr_scale * fcorr2D[fcorr2D_indx][i]; // copy one-by-one converting from floats to doubles corrs[isens][i] = gpu_corr_scale * fcorr2D[fcorr2D_indx][i]; // copy one-by-one converting from floats to doubles
} }
// calculate PD sum of individual sensors correlations
if (use_partial && extra_sum && (used_sensors_list[nsens] < getNumSensors())) { // only for individual sensors
for (int i = 0; i < corr_length; i++) {
corrs[corrs_len][i] += scale*corrs[isens][i];
}
}
/*
if (use_partial && (isens < (corrs_len - 1))) { // not including sum if (use_partial && (isens < (corrs_len - 1))) { // not including sum
for (int i = 0; i < corr_length; i++) { for (int i = 0; i < corr_length; i++) {
corrs[corrs_len][i] += scale*corrs[isens][i]; corrs[corrs_len][i] += scale*corrs[isens][i];
} }
} }
*/
} }
if (!use_partial && extra_sum) { // calculate PD sum of individual sensors correlations if they themselves are not preserved
if (!use_partial && extra_sum && use_full) {
scale *= gpu_corr_scale; scale *= gpu_corr_scale;
for (int nsens = 0; nsens < (used_sensors_list.length - 1); nsens++) { for (int nsens = 0; nsens < (used_sensors_list.length - 1); nsens++) if (used_sensors_list[nsens] < 0xfe){
int fcorr2D_indx = iCorrTile * used_sensors_list.length + nsens; int fcorr2D_indx = iCorrTile * used_sensors_list.length + nsens;
for (int i = 0; i < corr_length; i++) { for (int i = 0; i < corr_length; i++) {
corrs[corrs_len][i] += scale * fcorr2D[fcorr2D_indx][i]; // copy one-by-one converting from floats to doubles corrs[corrs_len][i] += scale * fcorr2D[fcorr2D_indx][i]; // copy one-by-one converting from floats to doubles
...@@ -1675,17 +1790,24 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1675,17 +1790,24 @@ public class ImageDtt extends ImageDttCPU {
if (is_fpn) { if (is_fpn) {
for (int i = 0; i < corr_length; i++) if (fpn_mask[i]){ for (int i = 0; i < corr_length; i++) if (fpn_mask[i]){
corrs[corrs_len - 1][i] = 0.0; // instead of fcorr2D[fcorr2D_indx][indx] = 0; corrs[corrs_len - 1][i] = 0.0; // instead of fcorr2D[fcorr2D_indx][indx] = 0;
if (use_neibs) {
corrs[corrs_len - 2][i] = 0.0;
}
} }
} }
if (dcorr_tiles != null) { // This will be visualized (only for visualization?) if (dcorr_tiles != null) { // This will be visualized (only for visualization?)
int index_es = getNumSensors() + extra_len; // int index_es = getNumSensors() + extra_len;
dcorr_tiles[iCorrTile] = new double[getNumSensors()+1 + extra_len][]; int index_es = used_sensors_list.length; // last, OK if extra_len==0
//used_sensors_list
// dcorr_tiles[iCorrTile] = new double[getNumSensors()+1 + extra_len][];
dcorr_tiles[iCorrTile] = new double[used_sensors_list.length + extra_len][];
if (extra_sum) { if (extra_sum) {
dcorr_tiles[iCorrTile][index_es] = new double[corr_length]; dcorr_tiles[iCorrTile][index_es] = new double[corr_length];
} }
for (int nsens = 0; nsens < used_sensors_list.length; nsens++) { /*
int abs_sens = used_sensors_list[nsens]; for (int nsens = 0; nsens < used_sensors_list.length; nsens++) { // all but sum
int abs_sens = used_sensors_list[nsens]; // should fork for neibs to full (2 elements)
if (abs_sens >= getNumSensors()) { if (abs_sens >= getNumSensors()) {
abs_sens = getNumSensors(); // last - sum of all sensors abs_sens = getNumSensors(); // last - sum of all sensors
} }
...@@ -1701,9 +1823,28 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1701,9 +1823,28 @@ public class ImageDtt extends ImageDttCPU {
dcorr_tiles[iCorrTile][abs_sens][i] = gpu_corr_scale * fcorr2D[fcorr2D_indx][i]; // copy one-by-one converting from floats to doubles dcorr_tiles[iCorrTile][abs_sens][i] = gpu_corr_scale * fcorr2D[fcorr2D_indx][i]; // copy one-by-one converting from floats to doubles
} }
} }
*/
for (int nsens = 0; nsens < used_sensors_list.length; nsens++) { // all but sum
int abs_sens = used_sensors_list[nsens]; // should fork for neibs to full (2 elements)
if ((abs_sens < getNumSensors()) && extra_sum) {
int fcorr2D_indx = iCorrTile * used_sensors_list.length + nsens;
for (int i = 0; i < corr_length; i++) {
dcorr_tiles[iCorrTile][index_es][i] += scale * gpu_corr_scale * fcorr2D[fcorr2D_indx][i]; // copy one-by-one converting from floats to doubles
}
}
dcorr_tiles[iCorrTile][nsens] = new double[corr_length];
int fcorr2D_indx = iCorrTile * used_sensors_list.length + nsens;
for (int i = 0; i < corr_length; i++) {
dcorr_tiles[iCorrTile][nsens][i] = gpu_corr_scale * fcorr2D[fcorr2D_indx][i]; // copy one-by-one converting from floats to doubles
}
}
if (is_fpn) { if (is_fpn) {
for (int i = 0; i < corr_length; i++) if (fpn_mask[i]){ for (int i = 0; i < corr_length; i++) if (fpn_mask[i]){
dcorr_tiles[iCorrTile][used_sensors_list.length-1][i] = 0.0; // instead of fcorr2D[fcorr2D_indx][indx] = 0; dcorr_tiles[iCorrTile][used_sensors_list.length-1][i] = 0.0; // instead of fcorr2D[fcorr2D_indx][indx] = 0;
if (use_neibs) {
dcorr_tiles[iCorrTile][used_sensors_list.length-2][i] = 0.0; // instead of fcorr2D[fcorr2D_indx][indx] = 0;
}
} }
} }
} }
...@@ -1732,9 +1873,9 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1732,9 +1873,9 @@ public class ImageDtt extends ImageDttCPU {
double [] mv_td = new double [3]; double [] mv_td = new double [3];
boolean retry_pd=false, retry_td=false; boolean retry_pd=false, retry_td=false;
boolean neib_en = !(is_fpn && neibs_nofpn_only); boolean neib_en = !(is_fpn && neibs_nofpn_only);
if (pd_weight > 0.0) { if ((pd_weight > 0.0) && (indx_sum_pd >=0)) {
mv_pd = Correlation2d.getMaxXYCm( // last, average mv_pd = Correlation2d.getMaxXYCm( // last, average
corrs[corrs.length-1], // double [] data, corrs[indx_sum_pd], // corrs.length-1], // double [] data,
corr_size, // int data_width, // = 2 * transform_size - 1; corr_size, // int data_width, // = 2 * transform_size - 1;
centroid_radius, // double radius, // 0 - all same weight, > 0 cosine(PI/2*sqrt(dx^2+dy^2)/rad) centroid_radius, // double radius, // 0 - all same weight, > 0 cosine(PI/2*sqrt(dx^2+dy^2)/rad)
n_recenter, // int refine, // re-center window around new maximum. 0 -no refines (single-pass) n_recenter, // int refine, // re-center window around new maximum. 0 -no refines (single-pass)
...@@ -1755,7 +1896,7 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1755,7 +1896,7 @@ public class ImageDtt extends ImageDttCPU {
} }
if (td_weight > 0.0) { if (td_weight > 0.0) {
mv_td = Correlation2d.getMaxXYCm( // pre-last - sharp (in FD) mv_td = Correlation2d.getMaxXYCm( // pre-last - sharp (in FD)
corrs[corrs.length-2], // double [] data, corrs[indx_sum_td], // corrs.length-2], // double [] data,
corr_size, // int data_width, // = 2 * transform_size - 1; corr_size, // int data_width, // = 2 * transform_size - 1;
centroid_radius, // double radius, // 0 - all same weight, > 0 cosine(PI/2*sqrt(dx^2+dy^2)/rad) centroid_radius, // double radius, // 0 - all same weight, > 0 cosine(PI/2*sqrt(dx^2+dy^2)/rad)
n_recenter, // int refine, // re-center window around new maximum. 0 -no refines (single-pass) n_recenter, // int refine, // re-center window around new maximum. 0 -no refines (single-pass)
...@@ -1776,6 +1917,7 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1776,6 +1917,7 @@ public class ImageDtt extends ImageDttCPU {
} }
// calculate averages from neighbors // calculate averages from neighbors
// will replace corrs[] with averages // will replace corrs[] with averages
/*
if (retry_pd || retry_td) { if (retry_pd || retry_td) {
if (redo_both) { if (redo_both) {
retry_pd |= retry_td; // here could be just true retry_pd |= retry_td; // here could be just true
...@@ -1805,7 +1947,7 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1805,7 +1947,7 @@ public class ImageDtt extends ImageDttCPU {
corrs[isens][i] += gpu_corr_scale * fcorr2D[fcorr2D_indx][i]; // copy one-by-one converting from floats to doubles corrs[isens][i] += gpu_corr_scale * fcorr2D[fcorr2D_indx][i]; // copy one-by-one converting from floats to doubles
} }
} }
// direcly accumulating, without preservation of per-sensor data // directly accumulating, without preservation of per-sensor data
if (!use_partial && extra_sum) { if (!use_partial && extra_sum) {
for (int nsens = 0; nsens < (used_sensors_list.length - 1); nsens++) { for (int nsens = 0; nsens < (used_sensors_list.length - 1); nsens++) {
int fcorr2D_indx = iCorrTile1 * used_sensors_list.length + nsens; int fcorr2D_indx = iCorrTile1 * used_sensors_list.length + nsens;
...@@ -1893,7 +2035,7 @@ public class ImageDtt extends ImageDttCPU { ...@@ -1893,7 +2035,7 @@ public class ImageDtt extends ImageDttCPU {
} }
} // if (num_neibs > min_num_neibs) { } // if (num_neibs > min_num_neibs) {
} }
*/
if ((mv_td != null) || (mv_pd != null)) { if ((mv_td != null) || (mv_pd != null)) {
double [] mv = new double[3 + (use3D? 2 :0)]; // keep for disparity/strength double [] mv = new double[3 + (use3D? 2 :0)]; // keep for disparity/strength
if (mv_pd != null) { if (mv_pd != null) {
......
...@@ -13152,16 +13152,17 @@ public class OpticalFlow { ...@@ -13152,16 +13152,17 @@ public class OpticalFlow {
} }
boolean use_neibs = clt_parameters.imp.use_neibs; // false; // true; boolean use_neibs = clt_parameters.imp.use_neibs; // false; // true;
boolean use_neibs_pd = true;
boolean neibs_nofpn_only = clt_parameters.imp.neibs_nofpn_only | boolean neibs_nofpn_only = clt_parameters.imp.neibs_nofpn_only |
(initial_adjust && clt_parameters.imp.neibs_nofpn_init); // consolidate neighbors fot non-fpn tiles only! (initial_adjust && clt_parameters.imp.neibs_nofpn_init); // consolidate neighbors fot non-fpn tiles only!
boolean redo_both = clt_parameters.imp.redo_both; // use average of neighbors for both pd,td if any of the center tile tests (td, pd) fails boolean redo_both = clt_parameters.imp.redo_both; // use average of neighbors for both pd,td if any of the center tile tests (td, pd) fails
int min_num_neibs = clt_parameters.imp.min_num_neibs; // plus center, total number >= (min_num_neibs+1) int min_num_neibs = clt_parameters.imp.min_num_neibs; // plus center, total number >= (min_num_neibs+1)
double scale_neibs_pd = use_neibs? clt_parameters.imp.scale_neibs_pd : 0; // scale threshold for the pixel-domain average maximums double scale_neibs_pd = use_neibs_pd? clt_parameters.imp.scale_neibs_pd : 0; // scale threshold for the pixel-domain average maximums
double scale_neibs_td = use_neibs? clt_parameters.imp.scale_neibs_td : 0; // scale threshold for the transform-domain average maximums double scale_neibs_td = use_neibs_pd? clt_parameters.imp.scale_neibs_td : 0; // scale threshold for the transform-domain average maximums
double scale_avg_weight = clt_parameters.imp.scale_avg_weight; // reduce influence of the averaged correlations compared to the single-tile ones double scale_avg_weight = clt_parameters.imp.scale_avg_weight; // reduce influence of the averaged correlations compared to the single-tile ones
int [] corr_indices_dbg = show_2d_correlations? image_dtt.getGPU().getCorrIndices() : null; int [] corr_indices_dbg = show_2d_correlations? image_dtt.getGPU().getCorrIndices() : null;
boolean use_partial = clt_parameters.imp.use_partial;
coord_motion = image_dtt.clt_process_tl_interscene( // convert to pixel domain and process correlations already prepared in fcorr_td and/or fcorr_combo_td coord_motion = image_dtt.clt_process_tl_interscene( // convert to pixel domain and process correlations already prepared in fcorr_td and/or fcorr_combo_td
clt_parameters.img_dtt, // final ImageDttParameters imgdtt_params, // Now just extra correlation parameters, later will include, most others clt_parameters.img_dtt, // final ImageDttParameters imgdtt_params, // Now just extra correlation parameters, later will include, most others
// only used here to keep extra array element for disparity difference // only used here to keep extra array element for disparity difference
...@@ -13184,7 +13185,7 @@ public class OpticalFlow { ...@@ -13184,7 +13185,7 @@ public class OpticalFlow {
fpn_ignore_border, // final boolean fpn_ignore_border, // only if fpn_mask != null - ignore tile if maximum touches fpn_mask fpn_ignore_border, // final boolean fpn_ignore_border, // only if fpn_mask != null - ignore tile if maximum touches fpn_mask
motion_vectors, // final double [][][] motion_vectors, // [tilesY*tilesX][][] -> [][][num_sel_sensors+1][2] motion_vectors, // final double [][][] motion_vectors, // [tilesY*tilesX][][] -> [][][num_sel_sensors+1][2]
clt_parameters.imp.run_poly, // final boolean run_poly, // polynomial max, if false - centroid clt_parameters.imp.run_poly, // final boolean run_poly, // polynomial max, if false - centroid
clt_parameters.imp.use_partial, // final boolean use_partial, // find motion vectors for individual pairs, false - for sum only use_partial, // final boolean use_partial, // find motion vectors for individual pairs, false - for sum only
clt_parameters.imp.centroid_radius,// final double centroid_radius, // 0 - use all tile, >0 - cosine window around local max clt_parameters.imp.centroid_radius,// final double centroid_radius, // 0 - use all tile, >0 - cosine window around local max
clt_parameters.imp.n_recenter, // final int n_recenter, // when cosine window, re-center window this many times clt_parameters.imp.n_recenter, // final int n_recenter, // when cosine window, re-center window this many times
clt_parameters.imp.td_weight, // final double td_weight, // mix correlations accumulated in TD with clt_parameters.imp.td_weight, // final double td_weight, // mix correlations accumulated in TD with
...@@ -15131,7 +15132,7 @@ public class OpticalFlow { ...@@ -15131,7 +15132,7 @@ public class OpticalFlow {
double disparity_weight = use3D? clt_parameters.ilp.ilma_disparity_weight : 0.0; double disparity_weight = use3D? clt_parameters.ilp.ilma_disparity_weight : 0.0;
int margin = clt_parameters.imp.margin; int margin = clt_parameters.imp.margin;
int sensor_mask_inter = clt_parameters.imp.sensor_mask_inter ; //-1; int sensor_mask_inter = clt_parameters.imp.sensor_mask_inter ; //-1;
float [][][] facc_2d_img = new float [1][][]; float [][][] facc_2d_img = new float [1][][]; // set it to null?
IntersceneLma intersceneLma = new IntersceneLma( IntersceneLma intersceneLma = new IntersceneLma(
clt_parameters.ilp.ilma_thread_invariant, clt_parameters.ilp.ilma_thread_invariant,
disparity_weight); disparity_weight);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment