Commit 8623329f authored by Andrey Filippov's avatar Andrey Filippov

started TD neighbors

parent aec8ae98
......@@ -507,7 +507,7 @@ public class GpuQuad{ // quad camera description
public int getSensorWidth() {return this.img_width;}
public int getSensorHeight() {return this.img_height;}
public int getDttSize() {return GPUTileProcessor.DTT_SIZE;}
public static int getDttSize() {return GPUTileProcessor.DTT_SIZE;}
// public int getNumCams() {return GPUTileProcessor.NUM_CAMS;}
public int getSensorMaskInter() {return sensor_mask_inter;}
public void setSensorMaskInter(int mask) {
......@@ -2716,9 +2716,8 @@ public class GpuQuad{ // quad camera description
public int getNumPairs() {return num_all_pairs;}
//
/**
* Generating correlation sequence by CPU to correlate all tiles provided in linescan order.
* Additionally, if both (num_acc != null) and (pfcorr_weights !=null), pfcorr_weights[0]
......@@ -2817,10 +2816,10 @@ public class GpuQuad{ // quad camera description
int ty = tp_tasks[ntile].ty;
int tx = tp_tasks[ntile].tx;
for (int ipair = 0; ipair < num_pairs; ipair++) {
int pair = inter_mode?getSensInter(ipair): getCorrPair(ipair);
int pair = inter_mode?getSensInter(ipair): getCorrPair(ipair); // 0xff for accumulated
int corr_pair = ntile * num_pairs + ipair;
// Below was an error (not visible if all selected, using index of the pair, not an absolute pair )
indices[corr_pair]= // ntile * num_pairs + pair] =
indices[corr_pair]= // ntile * num_pairs + pair] = // low 8 bits - pair, 0xff for sum of all channels
((ty * tilesX + tx) << GPUTileProcessor.CORR_NTILE_SHIFT) +
(pair & ((1 << GPUTileProcessor.CORR_NTILE_SHIFT) -1) );
if (fcorr_weights != null) {
......@@ -2994,9 +2993,44 @@ public class GpuQuad{ // quad camera description
return corr_tiles;
}
/**
* Read GPU correlation data to the sparse array [tilesY][tilesX][] for a single correlation pair (usually a combo one)
* @param inter true for interscene correlations, false - for the intrascene ones
* @param pair correlation pair index to read
* @return an array of transform-domain correlations mapped to tiles [tilesY][tilesX]. Each tile data is either null
* or [4*64] float array
*/
public float [][][] getCorrTilesLayerTD(
boolean inter,
int pair) {
int tilesX = img_width / GPUTileProcessor.DTT_SIZE;
int tilesY = img_height / GPUTileProcessor.DTT_SIZE;
int num_pairs = inter? (getNumCamsInter() + 1 + 0): getNumUsedPairs();// Number of used pairs num_pairs = num_pairs_in;
final int corr_size_td = 4 * GPUTileProcessor.DTT_SIZE * GPUTileProcessor.DTT_SIZE;
int [] indices = getCorrIndices(); // also sets num_corr_tiles
float [] fdata = getCorrTdData();
int num_tiles = num_corr_tiles / num_pairs;
float [][][] corr_tiles = new float [tilesY][tilesX][];
for (int nt = 0; nt < num_tiles; nt++ ) {
int nTile = (indices[nt * num_pairs] >> GPUTileProcessor.CORR_NTILE_SHIFT);
int ty = nTile / tilesX;
int tx = nTile % tilesX;
corr_tiles[ty][tx] = new float [corr_size_td];
System.arraycopy(
fdata,
(nt * num_pairs + pair) * corr_size_td,
corr_tiles[ty][tx],
0,
corr_size_td);
}
return corr_tiles;
}
public int [] setCorrTilesComboTd(
public int [] setCorrTilesComboTd( // not used?
final float [][][] corr_tiles, // [tileY][tileX][4*64]
int ipair) // just to set in the index low bits
{
......@@ -3025,7 +3059,7 @@ public class GpuQuad{ // quad camera description
return indices_trim;
}
public float [][][] getCorrTilesComboTd() // [tileY][tileX][4*64] , read all available pairs
public float [][][] getCorrTilesComboTd() // [tileY][tileX][4*64] , read all available pairs // not used?
{
int tilesX = img_width / GPUTileProcessor.DTT_SIZE;
int tilesY = img_height / GPUTileProcessor.DTT_SIZE;
......
......@@ -11,6 +11,7 @@ import com.elphel.imagej.gpu.GpuQuad;
import com.elphel.imagej.gpu.TpTask;
import ij.ImagePlus;
import jcuda.Pointer;
//import Jama.Matrix;
......@@ -1211,8 +1212,10 @@ public class ImageDtt extends ImageDttCPU {
return;
}
gpuQuad.setSensorMaskInter(sensor_mask_inter);
//Generate 2D phase correlations from the CLT representation
gpuQuad.execCorr2D_inter_TD(
// Generate 2D phase correlations from the CLT representation
// generates sum of the per-channel correlations as the last slot.
// updates gpuQuad.gpu_corr_indices, gpuQuad.gpu_corrs_td and some other
gpuQuad.execCorr2D_inter_TD( //
col_weights); // double [] scales,
if (fcorr_td != null) {
gpuQuad.getCorrTilesTd(
......@@ -1323,11 +1326,121 @@ public class ImageDtt extends ImageDttCPU {
gpuQuad.execConvertDirect(use_reference_buffer, wh, -1); // erase_clt); // put results into a "reference" buffer
}
private float [] prepNeibCorr(
int [][] corr_indices_outp, // should be [1][]
double [] neib_weights_od, // {orhto, diag}
int [] map_corr_indices_in,
final int debug_tileX,
final int debug_tileY,
final int globalDebugLevel)
{
final int corr_size_td = 4 * GPUTileProcessor.DTT_SIZE * GPUTileProcessor.DTT_SIZE;
final int [] corr_indices_in = gpuQuad.getCorrIndices(); // also sets num_corr_tiles FIXME: update num_corr_tiles?
final int [] used_sensors_list = gpuQuad.getSensInter(); // last is 0xff - sum of channels
final int num_tiles = corr_indices_in.length / used_sensors_list.length; // number of correlated tiles (not in tp_tasks)
final float [] fcorr_data_out = new float [corr_size_td * num_tiles];
if (map_corr_indices_in == null) {
map_corr_indices_in = getMapCorr(corr_indices_in);
}
final int [] map_corr_indices = map_corr_indices_in;
final float [][][] fcorr_data_sum = gpuQuad.getCorrTilesLayerTD(
true,
used_sensors_list.length-1); // last is sum
final int [] corr_indices_neib = new int[num_tiles];
if (corr_indices_outp != null) {
corr_indices_outp[0] = corr_indices_neib;
}
final float [] weights = {
(float) neib_weights_od[0], (float) neib_weights_od[1],
(float) neib_weights_od[0], (float) neib_weights_od[1],
(float) neib_weights_od[0], (float) neib_weights_od[1],
(float) neib_weights_od[0], (float) neib_weights_od[1]};
final int tilesX= gpuQuad.getTilesX(); // width/transform_size;
final int tilesY= gpuQuad.getTilesY(); // final int tilesY=height/transform_size;
final Thread[] threads = newThreadArray(THREADS_MAX);
final AtomicInteger ai = new AtomicInteger(0);
// create indices for neighbors
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
@Override
public void run() {
int tileY,tileX,nTile; // , chn;
TileNeibs tn = new TileNeibs(tilesX,tilesY);
for (int iCorrTile = ai.getAndIncrement(); iCorrTile < num_tiles; iCorrTile = ai.getAndIncrement()) {
nTile = (corr_indices_in[iCorrTile* used_sensors_list.length] >> GPUTileProcessor.CORR_NTILE_SHIFT);
tileY = nTile / tilesX;
tileX = nTile % tilesX;
corr_indices_neib[iCorrTile] = corr_indices_in[(iCorrTile + 1) * used_sensors_list.length - 1];
boolean debugTile0 = (tileX == debug_tileX) && (tileY == debug_tileY) && (globalDebugLevel > 2); // 0);
if (debugTile0) {
System.out.println("clt_process_tl_correlations(): tileX="+tileX+", tileY="+tileY+", nTile="+nTile+", nTile="+nTile);
}
System.arraycopy(
fcorr_data_sum[tileY][tileX],
0,
fcorr_data_out,
corr_size_td * iCorrTile,
corr_size_td);
float sw = 1.0f;
for (int dir = 0; dir < tn.numNeibs(); dir++) {
int nTile1 = tn.getNeibIndex(nTile, dir);
if ((nTile1 >=0) && (map_corr_indices[nTile1] >=0)) {
float w = weights[dir];
sw += w;
float [] fcorr_data_neib = fcorr_data_sum[tn.getY(nTile1)][tn.getX(nTile1)];
int indx = corr_size_td * iCorrTile;
for (int i = 0; i < corr_size_td; i++) {
fcorr_data_out[indx++] += w * fcorr_data_neib[i];
}
}
}
float s = 1.0f/sw;
int indx0 = corr_size_td * iCorrTile;
int indx1=indx0+corr_size_td;
for (int i = indx0; i < indx1; i++) {
fcorr_data_out[i] *= s;
}
}
}
};
}
startAndJoin(threads);
ai.set(0);
return fcorr_data_out;
}
private int[] getMapCorr(
int [] corr_indices) {
final int tilesX= gpuQuad.getTilesX(); // width/transform_size;
final int tilesY= gpuQuad.getTilesY(); // final int tilesY=height/transform_size;
final int [] used_sensors_list = gpuQuad.getSensInter(); // last is 0xff - sum of channels
final int num_tiles = corr_indices.length / used_sensors_list.length; // number of correlated tiles (not in tp_tasks)
final Thread[] threads = newThreadArray(THREADS_MAX);
final AtomicInteger ai = new AtomicInteger(0);
final int [] map_corr_indices=new int[tilesX*tilesY];
Arrays.fill(map_corr_indices, -1);
// create indices for neighbors
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
@Override
public void run() {
for (int iCorrTile = ai.getAndIncrement(); iCorrTile < num_tiles; iCorrTile = ai.getAndIncrement()) {
int nTile = (corr_indices[iCorrTile* used_sensors_list.length] >> GPUTileProcessor.CORR_NTILE_SHIFT);
map_corr_indices[nTile] = iCorrTile;
}
}
};
}
startAndJoin(threads);
return map_corr_indices;
}
public double [][][] clt_process_tl_interscene( // convert to pixel domain and process correlations already prepared in fcorr_td and/or fcorr_combo_td
final ImageDttParameters imgdtt_params, // Now just extra correlation parameters, later will include, most others
// only used here to keep extra array element for disparity difference
boolean use3D, // generate disparity difference
boolean use_neibs,
final float [][][][] fcorr_td, // [tilesY][tilesX][pair][4*64] transform domain representation of all selected corr pairs
float [][][] num_acc, // number of accumulated tiles [tilesY][tilesX][pair] (or null). Can be inner null if not used in tp_tasks
double [] dcorr_weight, // alternative to num_acc, compatible with CPU processing (only one non-zero enough)
......@@ -1370,26 +1483,13 @@ public class ImageDtt extends ImageDttCPU {
final int debug_tileY,
final int threadsMax, // maximal number of threads to launch
final int globalDebugLevel)
{ /*
boolean use_neibs = true; // false; // true;
final boolean neibs_nofpn_only = false; // consolidate neighbors fot non-fpn tiles only!
final double scale_neibs_pd = use_neibs? 0.5 : 0;
final double scale_neibs_td = use_neibs? 0.5 : 0;
final double scale_avg_weight = 0.5; // reduce influence of the averaged correlations compared to the single-tile ones
final int min_num_neibs = 4; // plus center, total number >= (min_num_neibs+1)
final boolean redo_both = true; // use average of neighbors for both pd,td if any of the center tile tests (td, pd) fails
*/
{
if (this.gpuQuad == null) {
System.out.println("clt_process_tl_interscene(): this.gpuQuad is null, bailing out");
return null;
}
//boolean debugTile0 =(tileX == debug_tileX) && (tileY == debug_tileY) && (globalDebugLevel > 2); // 0);
// final int min_neibs = clt_parameters.imp.min_neibs;
final boolean extra_sum = true; // use sum of pixel-domain correlations (TD have artifacts for low contrast
// - maybe -related to float vs. double - not tested yet
// final int width = gpuQuad.getImageWidth();
// final int height = gpuQuad.getImageHeight();
final int tilesX= gpuQuad.getTilesX(); // width/transform_size;
final int tilesY= gpuQuad.getTilesY(); // final int tilesY=height/transform_size;
final double [][][] coord_motion = new double [(pXpYD != null)?2:1][tilesX * tilesY][];
......@@ -1403,11 +1503,12 @@ public class ImageDtt extends ImageDttCPU {
}
}
final float [][] pfcorr_weights = ((num_acc != null) || (dcorr_weight != null))? new float[1][] : null;
// This version obeys tp_task order and fills fcorr_td gaps (should be none_) with zeros.
// This version obeys tp_task order and fills fcorr_td gaps (should be none) with zeros.
int [] corr_indices ;
// now it is always null
if (fcorr_td == null) { // used with no accumulation, assume TD correlation data is still in GPU
corr_indices = gpuQuad.getCorrIndices(); // also sets num_corr_tiles
} else {
} else { // never
if (num_acc != null) { // version with float [][][] num_acc, // number of accumulated tiles [tilesY][tilesX][pair] (or null)
corr_indices = gpuQuad.setCorrTilesTd( // .length = 295866 should set num_corr_tiles!
tp_tasks, // final TpTask [] tp_tasks, // data from the reference frame - will be applied to LMW for the integrated correlations
......@@ -1424,6 +1525,21 @@ public class ImageDtt extends ImageDttCPU {
pfcorr_weights); // float [][] pfcorr_weights) // null or one per correlation tile (num_corr_tiles) to divide fat zero2
}
}
// corr_indices has TD sum slot
final int [] map_corr_indices = getMapCorr(corr_indices);
int [][] corr_neibs_indx = new int [1][];
float [] corr_neibs_td = null;
double [] neib_weights_od = {0.7, 0.5};
if (use_neibs) {
corr_neibs_td = prepNeibCorr(
corr_neibs_indx, // int [][] corr_indices_outp, // should be [1][]
neib_weights_od, // double [] neib_weights_od, // {orhto, diag}
map_corr_indices, // int [] map_corr_indices_in,
debug_tileX, // final int debug_tileX,
debug_tileY, // final int debug_tileY,
globalDebugLevel); // final int globalDebugLevel)
}
int dbg_imax = 0;
for (int ii = 1; ii < corr_indices.length; ii++) {
if (corr_indices[ii] > corr_indices[dbg_imax]) {
......@@ -1445,13 +1561,28 @@ public class ImageDtt extends ImageDttCPU {
final int corr_length = fcorr2D[0].length;// all correlation tiles have the same size
final int [] used_sensors_list = gpuQuad.getSensInter();
final int [] used_sensors_list = gpuQuad.getSensInter(); // last is 0xff - sum of channels
final int extra_len = extra_sum? 1 : 0;
final int corrs_len = (use_partial?used_sensors_list.length:1); // without optional extra_len but including GPU sum
final int num_tiles = corr_indices.length / used_sensors_list.length; // number of correlated tiles (not in tp_tasks)
final int num_tiles = corr_indices.length / used_sensors_list.length; // number of correlated tiles (not in tp_tasks)
// now load GPU with neib-averaged TD data and calculate PD 2D correlations (single-layer)
if (use_neibs) {
gpuQuad.setCorrIndicesTdData(
corr_neibs_indx[0].length, // int num_tiles, // corr_indices, fdata may be longer than needed
corr_neibs_indx[0], // int [] corr_indices,
corr_neibs_td); // float [] fdata)
gpuQuad.execCorr2D_normalize(
false, // boolean combo, // normalize combo correlations (false - per-pair ones)
gpu_fat_zero, // double fat_zero);
null, // fcorr_weights, // float [] fcorr_weights, // null or one per correlation tile (num_corr_tiles) to divide fat zero2
gpu_corr_rad); // int corr_radius
}
final float [][] fcorr2Dneibs = use_neibs ? gpuQuad.getCorr2D(gpu_corr_rad) : null; // int corr_rad);
// Add (and init by caller) if needed, so far static is enough
// if (correlation2d == null) {
......@@ -1473,23 +1604,7 @@ public class ImageDtt extends ImageDttCPU {
final Thread[] threads = newThreadArray(threadsMax);
final AtomicInteger ai = new AtomicInteger(0);
final int [] map_corr_indices=new int[tilesX*tilesY];
Arrays.fill(map_corr_indices, -1);
// create indices for neighbors
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
@Override
public void run() {
for (int iCorrTile = ai.getAndIncrement(); iCorrTile < num_tiles; iCorrTile = ai.getAndIncrement()) {
int nTile = (corr_indices[iCorrTile* used_sensors_list.length] >> GPUTileProcessor.CORR_NTILE_SHIFT);
map_corr_indices[nTile] = iCorrTile;
}
}
};
}
startAndJoin(threads);
ai.set(0);
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
@Override
......
......@@ -44,6 +44,7 @@ import Jama.Matrix;
import ij.ImageStack;
public class ImageDttCPU {
static final int THREADS_MAX = 100;
static boolean FPGA_COMPARE_DATA= false; // true; // false; //
static int FPGA_SHIFT_BITS = 7; // number of bits for fractional pixel shift
static int FPGA_PIXEL_BITS = 15; // bits to represent pixel data (positive)
......
......@@ -12998,6 +12998,9 @@ public class OpticalFlow {
scene.saveQuadClt(); // to re-load new set of Bayer images to the GPU (do nothing for CPU) and Geometry
float [][][][] fcorr_td = null; // no accumulation, use data in GPU
// Generate 2D phase correlations from the CLT representation
// generates sum of the per-channel correlations as the last slot.
// updates gpuQuad.gpu_corr_indices, gpuQuad.gpu_corrs_td and some other
if (mb_en && (mb_vectors!=null)) {
image_dtt.interCorrTDMotionBlur(
clt_parameters.img_dtt, // final ImageDttParameters imgdtt_params, // Now just extra correlation parameters, later will include, most others
......@@ -13030,7 +13033,7 @@ public class OpticalFlow {
clt_parameters.corr_red, // final double corr_red, // +used
clt_parameters.corr_blue, // final double corr_blue,// +used
sensor_mask_inter, // final int sensor_mask_inter, // The bitmask - which sensors to correlate, -1 - all.
THREADS_MAX, // final int threadsMax, // maximal number of threads to launch
THREADS_MAX, // final int threadsMax, // maximal number of threads to launch
debug_level); // final int globalDebugLevel);
}
if (show_render_ref) {
......@@ -13158,10 +13161,12 @@ public class OpticalFlow {
double scale_neibs_td = use_neibs? clt_parameters.imp.scale_neibs_td : 0; // scale threshold for the transform-domain average maximums
double scale_avg_weight = clt_parameters.imp.scale_avg_weight; // reduce influence of the averaged correlations compared to the single-tile ones
int [] corr_indices_dbg = show_2d_correlations? image_dtt.getGPU().getCorrIndices() : null;
coord_motion = image_dtt.clt_process_tl_interscene( // convert to pixel domain and process correlations already prepared in fcorr_td and/or fcorr_combo_td
clt_parameters.img_dtt, // final ImageDttParameters imgdtt_params, // Now just extra correlation parameters, later will include, most others
// only used here to keep extra array element for disparity difference
use3D, // boolean use3D, // generate disparity difference
use_neibs, // boolean use_neibs,
fcorr_td, // final float [][][][] fcorr_td, // [tilesY][tilesX][pair][4*64] transform domain representation of all selected corr pairs
null, // float [][][] num_acc, // number of accumulated tiles [tilesY][tilesX][pair] (or null). Can be inner null if not used in tp_tasks
null, // double [] dcorr_weight, // alternative to num_acc, compatible with CPU processing (only one non-zero enough)
......@@ -13307,9 +13312,12 @@ public class OpticalFlow {
float [][][] fclt_corr1 = ImageDtt.convertFcltCorr( // partial length, matching corr_indices = gpuQuad.getCorrIndices(); // also sets num_corr_tiles
dcorr_tiles, // double [][][] dcorr_tiles,// [tile][sparse, correlation pair][(2*transform_size-1)*(2*transform_size-1)] // if null - will not calculate
fclt_corr); // float [][][] fclt_corr) // new float [tilesX * tilesY][][] or null
if (use_neibs) {
}
float [][] dbg_corr_rslt_partial = ImageDtt.corr_partial_dbg( // not used in lwir
fclt_corr1, // final float [][][] fcorr_data, // [tile][pair][(2*transform_size-1)*(2*transform_size-1)] // if null - will not calculate
image_dtt.getGPU().getCorrIndices(), // tp_tasks, // final TpTask [] tp_tasks, //
corr_indices_dbg, // image_dtt.getGPU().getCorrIndices(), // tp_tasks, // final TpTask [] tp_tasks, //
tilesX, //final int tilesX,
tilesY, //final int tilesX,
2*image_dtt.transform_size - 1, // final int corr_size,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment