Commit 08bccd0d authored by Andrey Filippov's avatar Andrey Filippov

implementing moving objects masking for ERS fitting, this is debug

version for GPU geometry correction
parent fabadb74
......@@ -28,345 +28,392 @@ package com.elphel.imagej.common;
public class DoubleGaussianBlur {
/* the standard deviation of the Gaussian*/
// private static double sigma = 2.0;
/* whether sigma is given in units corresponding to the pixel scale (not pixels)*/
// private static boolean sigmaScaled = false;
/* The flags specifying the capabilities and needs */
// private int flags = DOES_ALL|SUPPORTS_MASKING|PARALLELIZE_STACKS|KEEP_PREVIEW;
// private ImagePlus imp; // The ImagePlus of the setup call, needed to get the spatial calibration
// private boolean hasScale = false; // whether the image has an x&y scale
private int nPasses = 1; // The number of passes (filter directions * color channels * stack slices)
// private int nChannels = 1; // The number of color channels
private int pass; // Current pass
/* the standard deviation of the Gaussian*/
// private static double sigma = 2.0;
/* whether sigma is given in units corresponding to the pixel scale (not pixels)*/
// private static boolean sigmaScaled = false;
/* The flags specifying the capabilities and needs */
// private int flags = DOES_ALL|SUPPORTS_MASKING|PARALLELIZE_STACKS|KEEP_PREVIEW;
// private ImagePlus imp; // The ImagePlus of the setup call, needed to get the spatial calibration
// private boolean hasScale = false; // whether the image has an x&y scale
private int nPasses = 1; // The number of passes (filter directions * color channels * stack slices)
// private int nChannels = 1; // The number of color channels
private int pass; // Current pass
/* Default constructor */
public DoubleGaussianBlur() {
}
/* Default constructor */
public DoubleGaussianBlur() {
}
public double [] blurWithNaN(
double[] pixels,
double [] in_weight, // or null
int width,
int height,
double sigmaX,
double sigmaY,
double accuracy
) {
double [] weight;
double [] blured = new double[pixels.length];
if (in_weight == null) {
weight = new double [pixels.length];
for (int i = 0; i < weight.length; i++) {
weight[i] = 1.0;
}
} else {
weight = in_weight.clone();
}
for (int i = 0; i < pixels.length; i++) {
if (Double.isNaN(pixels[i])) {
weight[i] = 0.0;
} else {
blured[i] = pixels[i] * weight[i];
}
}
blurDouble(
blured,
width,
height,
sigmaX,
sigmaY,
accuracy);
blurDouble(
weight,
width,
height,
sigmaX,
sigmaY,
accuracy);
for (int i = 0; i < pixels.length; i++) {
blured[i] /= weight[i];
}
return blured;
}
public void blurDouble(double[] pixels,
int width,
int height,
double sigmaX,
double sigmaY,
double accuracy) {
if (sigmaX > 0)
blur1Direction(pixels, width,height, sigmaX, accuracy, true);
if (Thread.currentThread().isInterrupted()) return; // interruption for new parameters during preview?
if (sigmaY > 0)
blur1Direction(pixels, width,height, sigmaY, accuracy, false);
return;
}
public void blurDouble(
double[] pixels,
int width,
int height,
double sigmaX,
double sigmaY,
double accuracy) {
if (sigmaX > 0)
blur1Direction(pixels, width,height, sigmaX, accuracy, true);
if (Thread.currentThread().isInterrupted()) return; // interruption for new parameters during preview?
if (sigmaY > 0)
blur1Direction(pixels, width,height, sigmaY, accuracy, false);
return;
}
/* Blur an image in one direction (x or y) by a Gaussian.
* @param ip The Image with the original data where also the result will be stored
* @param sigma Standard deviation of the Gaussian
* @param accuracy Accuracy of kernel, should not be > 0.02
* @param xDirection True for blurring in x direction, false for y direction
* @param extraLines Number of lines (parallel to the blurring direction)
* below and above the roi bounds that should be processed.
*/
public void blur1Direction(double [] pixels,
int width,
int height,
double sigma,
double accuracy,
boolean xDirection
// int extraLines
) {
final int UPSCALE_K_RADIUS = 2; //number of pixels to add for upscaling
final double MIN_DOWNSCALED_SIGMA = 4.; //minimum standard deviation in the downscaled image
// float[] pixels = (float[])ip.getPixels();
// int width = ip.getWidth();
// int height = ip.getHeight();
// Rectangle roi = ip.getRoi();
int length = xDirection ? width : height; //number of points per line (line can be a row or column)
int pointInc = xDirection ? 1 : width; //increment of the pixels array index to the next point in a line
int lineInc = xDirection ? width : 1; //increment of the pixels array index to the next line
// int lineFrom = (xDirection ? roi.y : roi.x) - extraLines; //the first line to process
// if (lineFrom < 0) lineFrom = 0;
int lineFrom = 0; //the first line to process
// int lineTo = (xDirection ? roi.y+roi.height : roi.x+roi.width) + extraLines; //the last line+1 to process
// if (lineTo > (xDirection ? height:width)) lineTo = (xDirection ? height:width);
int lineTo = (xDirection ? height:width);
// int writeFrom = xDirection? roi.x : roi.y; //first point of a line that needs to be written
// int writeTo = xDirection ? roi.x+roi.width : roi.y+roi.height;
int writeFrom = 0; //first point of a line that needs to be written
int writeTo = xDirection ? width : height;
// int inc = Math.max((lineTo-lineFrom)/(100/(nPasses>0?nPasses:1)+1),20);
pass++;
if (pass>nPasses) pass =1;
// Thread thread = Thread.currentThread(); // needed to check for interrupted state
if (sigma > 2*MIN_DOWNSCALED_SIGMA + 0.5) {
/* large radius (sigma): scale down, then convolve, then scale up */
int reduceBy = (int)Math.floor(sigma/MIN_DOWNSCALED_SIGMA); //downscale by this factor
if (reduceBy > length) reduceBy = length;
/* Downscale gives std deviation sigma = 1/sqrt(3); upscale gives sigma = 1/2. (in downscaled pixels) */
/* All sigma^2 values add to full sigma^2 */
double sigmaGauss = Math.sqrt(sigma*sigma/(reduceBy*reduceBy) - 1./3. - 1./4.);
int maxLength = (length+reduceBy-1)/reduceBy + 2*(UPSCALE_K_RADIUS + 1); //downscaled line can't be longer
double[][] gaussKernel = makeGaussianKernel(sigmaGauss, accuracy, maxLength);
int kRadius = gaussKernel[0].length*reduceBy; //Gaussian kernel radius after upscaling
int readFrom = (writeFrom-kRadius < 0) ? 0 : writeFrom-kRadius; //not including broadening by downscale&upscale
int readTo = (writeTo+kRadius > length) ? length : writeTo+kRadius;
int newLength = (readTo-readFrom+reduceBy-1)/reduceBy + 2*(UPSCALE_K_RADIUS + 1);
int unscaled0 = readFrom - (UPSCALE_K_RADIUS + 1)*reduceBy; //input point corresponding to cache index 0
//IJ.log("reduce="+reduceBy+", newLength="+newLength+", unscaled0="+unscaled0+", sigmaG="+(float)sigmaGauss+", kRadius="+gaussKernel[0].length);
double[] downscaleKernel = makeDownscaleKernel(reduceBy);
double[] upscaleKernel = makeUpscaleKernel(reduceBy);
double[] cache1 = new double[newLength]; //holds data after downscaling
double[] cache2 = new double[newLength]; //holds data after convolution
int pixel0 = lineFrom*lineInc;
for (int line=lineFrom; line<lineTo; line++, pixel0+=lineInc) {
downscaleLine(pixels, cache1, downscaleKernel, reduceBy, pixel0, unscaled0, length, pointInc, newLength);
convolveLine(cache1, cache2, gaussKernel, 0, newLength, 1, newLength-1, 0, 1);
upscaleLine(cache2, pixels, upscaleKernel, reduceBy, pixel0, unscaled0, writeFrom, writeTo, pointInc);
}
} else {
/* small radius: normal convolution */
double[][] gaussKernel = makeGaussianKernel(sigma, accuracy, length);
int kRadius = gaussKernel[0].length;
double[] cache = new double[length]; //input for convolution, hopefully in CPU cache
int readFrom = (writeFrom-kRadius < 0) ? 0 : writeFrom-kRadius;
int readTo = (writeTo+kRadius > length) ? length : writeTo+kRadius;
int pixel0 = lineFrom*lineInc;
for (int line=lineFrom; line<lineTo; line++, pixel0+=lineInc) {
int p = pixel0 + readFrom*pointInc;
for (int i=readFrom; i<readTo; i++ ,p+=pointInc)
cache[i] = pixels[p];
convolveLine(cache, pixels, gaussKernel, readFrom, readTo, writeFrom, writeTo, pixel0, pointInc);
}
}
return;
}
/* Create a 1-dimensional normalized Gaussian kernel with standard deviation sigma
* and the running sum over the kernel
* Note: this is one side of the kernel only, not the full kernel as used by the
* Convolver class of ImageJ.
* To avoid a step due to the cutoff at a finite value, the near-edge values are
* replaced by a 2nd-order polynomial with its minimum=0 at the first out-of-kernel
* pixel. Thus, the kernel function has a smooth 1st derivative in spite of finite
* length.
*
* @param sigma Standard deviation, i.e. radius of decay to 1/sqrt(e), in pixels.
* @param accuracy Relative accuracy; for best results below 0.01 when processing
* 8-bit images. For short or float images, values of 1e-3 to 1e-4
* are better (but increase the kernel size and thereby the
* processing time). Edge smoothing will fail with very poor
* accuracy (above approx. 0.02)
* @param maxRadius Maximum radius of the kernel: Limits kernel size in case of
* large sigma, should be set to image width or height. For small
* values of maxRadius, the kernel returned may have a larger
* radius, however.
* @return A 2*n array. Array[0][n] is the kernel, decaying towards zero,
* which would be reached at kernel.length (unless kernel size is
* limited by maxRadius). Array[1][n] holds the sum over all kernel
* values > n, including non-calculated values in case the kernel
* size is limited by <code>maxRadius</code>.
*/
public double[][] makeGaussianKernel(double sigma, double accuracy, int maxRadius) {
int kRadius = (int)Math.ceil(sigma*Math.sqrt(-2*Math.log(accuracy)))+1;
if (maxRadius < 50) maxRadius = 50; // too small maxRadius would result in inaccurate sum.
if (kRadius > maxRadius) kRadius = maxRadius;
double[][] kernel = new double[2][kRadius];
for (int i=0; i<kRadius; i++) // Gaussian function
kernel[0][i] = (Math.exp(-0.5*i*i/sigma/sigma));
if (kRadius < maxRadius && kRadius > 3) { // edge correction
double sqrtSlope = Double.MAX_VALUE;
int r = kRadius;
while (r > kRadius/2) {
r--;
double a = Math.sqrt(kernel[0][r])/(kRadius-r);
if (a < sqrtSlope)
sqrtSlope = a;
else
break;
}
for (int r1 = r+2; r1 < kRadius; r1++)
kernel[0][r1] = (kRadius-r1)*(kRadius-r1)*sqrtSlope*sqrtSlope;
}
double sum; // sum over all kernel elements for normalization
if (kRadius < maxRadius) {
sum = kernel[0][0];
for (int i=1; i<kRadius; i++)
sum += 2*kernel[0][i];
} else
sum = sigma * Math.sqrt(2*Math.PI);
/* Blur an image in one direction (x or y) by a Gaussian.
* @param ip The Image with the original data where also the result will be stored
* @param sigma Standard deviation of the Gaussian
* @param accuracy Accuracy of kernel, should not be > 0.02
* @param xDirection True for blurring in x direction, false for y direction
* @param extraLines Number of lines (parallel to the blurring direction)
* below and above the roi bounds that should be processed.
*/
public void blur1Direction(double [] pixels,
int width,
int height,
double sigma,
double accuracy,
boolean xDirection
// int extraLines
) {
final int UPSCALE_K_RADIUS = 2; //number of pixels to add for upscaling
final double MIN_DOWNSCALED_SIGMA = 4.; //minimum standard deviation in the downscaled image
// float[] pixels = (float[])ip.getPixels();
// int width = ip.getWidth();
// int height = ip.getHeight();
// Rectangle roi = ip.getRoi();
int length = xDirection ? width : height; //number of points per line (line can be a row or column)
int pointInc = xDirection ? 1 : width; //increment of the pixels array index to the next point in a line
int lineInc = xDirection ? width : 1; //increment of the pixels array index to the next line
// int lineFrom = (xDirection ? roi.y : roi.x) - extraLines; //the first line to process
// if (lineFrom < 0) lineFrom = 0;
int lineFrom = 0; //the first line to process
// int lineTo = (xDirection ? roi.y+roi.height : roi.x+roi.width) + extraLines; //the last line+1 to process
// if (lineTo > (xDirection ? height:width)) lineTo = (xDirection ? height:width);
int lineTo = (xDirection ? height:width);
// int writeFrom = xDirection? roi.x : roi.y; //first point of a line that needs to be written
// int writeTo = xDirection ? roi.x+roi.width : roi.y+roi.height;
int writeFrom = 0; //first point of a line that needs to be written
int writeTo = xDirection ? width : height;
// int inc = Math.max((lineTo-lineFrom)/(100/(nPasses>0?nPasses:1)+1),20);
pass++;
if (pass>nPasses) pass =1;
// Thread thread = Thread.currentThread(); // needed to check for interrupted state
if (sigma > 2*MIN_DOWNSCALED_SIGMA + 0.5) {
/* large radius (sigma): scale down, then convolve, then scale up */
int reduceBy = (int)Math.floor(sigma/MIN_DOWNSCALED_SIGMA); //downscale by this factor
if (reduceBy > length) reduceBy = length;
/* Downscale gives std deviation sigma = 1/sqrt(3); upscale gives sigma = 1/2. (in downscaled pixels) */
/* All sigma^2 values add to full sigma^2 */
double sigmaGauss = Math.sqrt(sigma*sigma/(reduceBy*reduceBy) - 1./3. - 1./4.);
int maxLength = (length+reduceBy-1)/reduceBy + 2*(UPSCALE_K_RADIUS + 1); //downscaled line can't be longer
double[][] gaussKernel = makeGaussianKernel(sigmaGauss, accuracy, maxLength);
int kRadius = gaussKernel[0].length*reduceBy; //Gaussian kernel radius after upscaling
int readFrom = (writeFrom-kRadius < 0) ? 0 : writeFrom-kRadius; //not including broadening by downscale&upscale
int readTo = (writeTo+kRadius > length) ? length : writeTo+kRadius;
int newLength = (readTo-readFrom+reduceBy-1)/reduceBy + 2*(UPSCALE_K_RADIUS + 1);
int unscaled0 = readFrom - (UPSCALE_K_RADIUS + 1)*reduceBy; //input point corresponding to cache index 0
//IJ.log("reduce="+reduceBy+", newLength="+newLength+", unscaled0="+unscaled0+", sigmaG="+(float)sigmaGauss+", kRadius="+gaussKernel[0].length);
double[] downscaleKernel = makeDownscaleKernel(reduceBy);
double[] upscaleKernel = makeUpscaleKernel(reduceBy);
double[] cache1 = new double[newLength]; //holds data after downscaling
double[] cache2 = new double[newLength]; //holds data after convolution
int pixel0 = lineFrom*lineInc;
for (int line=lineFrom; line<lineTo; line++, pixel0+=lineInc) {
downscaleLine(pixels, cache1, downscaleKernel, reduceBy, pixel0, unscaled0, length, pointInc, newLength);
convolveLine(cache1, cache2, gaussKernel, 0, newLength, 1, newLength-1, 0, 1);
upscaleLine(cache2, pixels, upscaleKernel, reduceBy, pixel0, unscaled0, writeFrom, writeTo, pointInc);
}
} else {
/* small radius: normal convolution */
double[][] gaussKernel = makeGaussianKernel(sigma, accuracy, length);
int kRadius = gaussKernel[0].length;
double[] cache = new double[length]; //input for convolution, hopefully in CPU cache
int readFrom = (writeFrom-kRadius < 0) ? 0 : writeFrom-kRadius;
int readTo = (writeTo+kRadius > length) ? length : writeTo+kRadius;
int pixel0 = lineFrom*lineInc;
for (int line=lineFrom; line<lineTo; line++, pixel0+=lineInc) {
int p = pixel0 + readFrom*pointInc;
for (int i=readFrom; i<readTo; i++ ,p+=pointInc)
cache[i] = pixels[p];
convolveLine(cache, pixels, gaussKernel, readFrom, readTo, writeFrom, writeTo, pixel0, pointInc);
}
}
return;
}
/* Create a 1-dimensional normalized Gaussian kernel with standard deviation sigma
* and the running sum over the kernel
* Note: this is one side of the kernel only, not the full kernel as used by the
* Convolver class of ImageJ.
* To avoid a step due to the cutoff at a finite value, the near-edge values are
* replaced by a 2nd-order polynomial with its minimum=0 at the first out-of-kernel
* pixel. Thus, the kernel function has a smooth 1st derivative in spite of finite
* length.
*
* @param sigma Standard deviation, i.e. radius of decay to 1/sqrt(e), in pixels.
* @param accuracy Relative accuracy; for best results below 0.01 when processing
* 8-bit images. For short or float images, values of 1e-3 to 1e-4
* are better (but increase the kernel size and thereby the
* processing time). Edge smoothing will fail with very poor
* accuracy (above approx. 0.02)
* @param maxRadius Maximum radius of the kernel: Limits kernel size in case of
* large sigma, should be set to image width or height. For small
* values of maxRadius, the kernel returned may have a larger
* radius, however.
* @return A 2*n array. Array[0][n] is the kernel, decaying towards zero,
* which would be reached at kernel.length (unless kernel size is
* limited by maxRadius). Array[1][n] holds the sum over all kernel
* values > n, including non-calculated values in case the kernel
* size is limited by <code>maxRadius</code>.
*/
public double[][] makeGaussianKernel(double sigma, double accuracy, int maxRadius) {
int kRadius = (int)Math.ceil(sigma*Math.sqrt(-2*Math.log(accuracy)))+1;
if (maxRadius < 50) maxRadius = 50; // too small maxRadius would result in inaccurate sum.
if (kRadius > maxRadius) kRadius = maxRadius;
double[][] kernel = new double[2][kRadius];
for (int i=0; i<kRadius; i++) // Gaussian function
kernel[0][i] = (Math.exp(-0.5*i*i/sigma/sigma));
if (kRadius < maxRadius && kRadius > 3) { // edge correction
double sqrtSlope = Double.MAX_VALUE;
int r = kRadius;
while (r > kRadius/2) {
r--;
double a = Math.sqrt(kernel[0][r])/(kRadius-r);
if (a < sqrtSlope)
sqrtSlope = a;
else
break;
}
for (int r1 = r+2; r1 < kRadius; r1++)
kernel[0][r1] = (kRadius-r1)*(kRadius-r1)*sqrtSlope*sqrtSlope;
}
double sum; // sum over all kernel elements for normalization
if (kRadius < maxRadius) {
sum = kernel[0][0];
for (int i=1; i<kRadius; i++)
sum += 2*kernel[0][i];
} else
sum = sigma * Math.sqrt(2*Math.PI);
double rsum = 0.5 + 0.5*kernel[0][0]/sum;
for (int i=0; i<kRadius; i++) {
double v = (kernel[0][i]/sum);
kernel[0][i] = v;
rsum -= v;
kernel[1][i] = rsum;
//IJ.log("k["+i+"]="+(float)v+" sum="+(float)rsum);
}
return kernel;
}
/* Scale a line (row or column of a FloatProcessor or part thereof)
* down by a factor <code>reduceBy</code> and write the result into
* <code>cache</code>.
* Input line pixel # <code>unscaled0</code> will correspond to output
* line pixel # 0. <code>unscaled0</code> may be negative. Out-of-line
* pixels of the input are replaced by the edge pixels.
*/
void downscaleLine(double[] pixels, double[] cache, double[] kernel,
int reduceBy, int pixel0, int unscaled0, int length, int pointInc, int newLength) {
if (pixel0 > pixels.length) {
System.out.println("++++++ Error in DoubleGaussianBlur, pixel0="+pixel0+", pixels.length="+(pixels.length));
return;
}
double first = pixels[pixel0];
double last = pixels[pixel0 + pointInc*(length-1)];
int xin = unscaled0 - reduceBy/2;
int p = pixel0 + pointInc*xin;
for (int xout=0; xout<newLength; xout++) {
double v = 0;
for (int x=0; x<reduceBy; x++, xin++, p+=pointInc) {
v += kernel[x] * ((xin-reduceBy < 0) ? first : ((xin-reduceBy >= length) ? last : pixels[p-pointInc*reduceBy]));
v += kernel[x+reduceBy] * ((xin < 0) ? first : ((xin >= length) ? last : pixels[p]));
v += kernel[x+2*reduceBy] * ((xin+reduceBy < 0) ? first : ((xin+reduceBy >= length) ? last : pixels[p+pointInc*reduceBy]));
}
cache[xout] = v;
}
}
double rsum = 0.5 + 0.5*kernel[0][0]/sum;
for (int i=0; i<kRadius; i++) {
double v = (kernel[0][i]/sum);
kernel[0][i] = v;
rsum -= v;
kernel[1][i] = rsum;
//IJ.log("k["+i+"]="+(float)v+" sum="+(float)rsum);
}
return kernel;
}
/* Scale a line (row or column of a FloatProcessor or part thereof)
* down by a factor <code>reduceBy</code> and write the result into
* <code>cache</code>.
* Input line pixel # <code>unscaled0</code> will correspond to output
* line pixel # 0. <code>unscaled0</code> may be negative. Out-of-line
* pixels of the input are replaced by the edge pixels.
*/
void downscaleLine(double[] pixels, double[] cache, double[] kernel,
int reduceBy, int pixel0, int unscaled0, int length, int pointInc, int newLength) {
if (pixel0 > pixels.length) {
System.out.println("++++++ Error in DoubleGaussianBlur, pixel0="+pixel0+", pixels.length="+(pixels.length));
return;
}
double first = pixels[pixel0];
double last = pixels[pixel0 + pointInc*(length-1)];
int xin = unscaled0 - reduceBy/2;
int p = pixel0 + pointInc*xin;
for (int xout=0; xout<newLength; xout++) {
double v = 0;
for (int x=0; x<reduceBy; x++, xin++, p+=pointInc) {
v += kernel[x] * ((xin-reduceBy < 0) ? first : ((xin-reduceBy >= length) ? last : pixels[p-pointInc*reduceBy]));
v += kernel[x+reduceBy] * ((xin < 0) ? first : ((xin >= length) ? last : pixels[p]));
v += kernel[x+2*reduceBy] * ((xin+reduceBy < 0) ? first : ((xin+reduceBy >= length) ? last : pixels[p+pointInc*reduceBy]));
}
cache[xout] = v;
}
}
/* Create a kernel for downscaling. The kernel function preserves
* norm and 1st moment (i.e., position) and has fixed 2nd moment,
* (in contrast to linear interpolation).
* In scaled space, the length of the kernel runs from -1.5 to +1.5,
* and the standard deviation is 1/2.
* Array index corresponding to the kernel center is
* unitLength*3/2
*/
double[] makeDownscaleKernel (int unitLength) {
int mid = unitLength*3/2;
double[] kernel = new double[3*unitLength];
for (int i=0; i<=unitLength/2; i++) {
double x = i/(double)unitLength;
double v = (0.75-x*x)/unitLength;
kernel[mid-i] = v;
kernel[mid+i] = v;
}
for (int i=unitLength/2+1; i<(unitLength*3+1)/2; i++) {
double x = i/(double)unitLength;
double v = (0.125 + 0.5*(x-1)*(x-2))/unitLength;
kernel[mid-i] = v;
kernel[mid+i] = v;
}
return kernel;
}
/* Create a kernel for downscaling. The kernel function preserves
* norm and 1st moment (i.e., position) and has fixed 2nd moment,
* (in contrast to linear interpolation).
* In scaled space, the length of the kernel runs from -1.5 to +1.5,
* and the standard deviation is 1/2.
* Array index corresponding to the kernel center is
* unitLength*3/2
*/
double[] makeDownscaleKernel (int unitLength) {
int mid = unitLength*3/2;
double[] kernel = new double[3*unitLength];
for (int i=0; i<=unitLength/2; i++) {
double x = i/(double)unitLength;
double v = (0.75-x*x)/unitLength;
kernel[mid-i] = v;
kernel[mid+i] = v;
}
for (int i=unitLength/2+1; i<(unitLength*3+1)/2; i++) {
double x = i/(double)unitLength;
double v = (0.125 + 0.5*(x-1)*(x-2))/unitLength;
kernel[mid-i] = v;
kernel[mid+i] = v;
}
return kernel;
}
/* Scale a line up by factor <code>reduceBy</code> and write as a row
* or column (or part thereof) to the pixels array of a FloatProcessor.
*/
void upscaleLine (double[] cache, double[] pixels, double[] kernel,
int reduceBy, int pixel0, int unscaled0, int writeFrom, int writeTo, int pointInc) {
int p = pixel0 + pointInc*writeFrom;
for (int xout = writeFrom; xout < writeTo; xout++, p+=pointInc) {
int xin = (xout-unscaled0+reduceBy-1)/reduceBy; //the corresponding point in the cache (if exact) or the one above
int x = reduceBy - 1 - (xout-unscaled0+reduceBy-1)%reduceBy;
pixels[p] = cache[xin-2]*kernel[x]
+ cache[xin-1]*kernel[x+reduceBy]
+ cache[xin]*kernel[x+2*reduceBy]
+ cache[xin+1]*kernel[x+3*reduceBy];
}
}
/* Scale a line up by factor <code>reduceBy</code> and write as a row
* or column (or part thereof) to the pixels array of a FloatProcessor.
*/
void upscaleLine (double[] cache, double[] pixels, double[] kernel,
int reduceBy, int pixel0, int unscaled0, int writeFrom, int writeTo, int pointInc) {
int p = pixel0 + pointInc*writeFrom;
for (int xout = writeFrom; xout < writeTo; xout++, p+=pointInc) {
int xin = (xout-unscaled0+reduceBy-1)/reduceBy; //the corresponding point in the cache (if exact) or the one above
int x = reduceBy - 1 - (xout-unscaled0+reduceBy-1)%reduceBy;
pixels[p] = cache[xin-2]*kernel[x]
+ cache[xin-1]*kernel[x+reduceBy]
+ cache[xin]*kernel[x+2*reduceBy]
+ cache[xin+1]*kernel[x+3*reduceBy];
}
}
/* Create a kernel for upscaling. The kernel function is a convolution
* of four unit squares, i.e., four uniform kernels with value +1
* from -0.5 to +0.5 (in downscaled coordinates). The second derivative
* of this kernel is smooth, the third is not. Its standard deviation
* is 1/sqrt(3) in downscaled cordinates.
* The kernel runs from [-2 to +2[, corresponding to array index
* 0 ... 4*unitLength (whereby the last point is not in the array any more).
*/
double[] makeUpscaleKernel (int unitLength) {
double[] kernel = new double[4*unitLength];
int mid = 2*unitLength;
kernel[0] = 0;
for (int i=0; i<unitLength; i++) {
double x = i/(double)unitLength;
double v = ((2./3. -x*x*(1-0.5*x)));
kernel[mid+i] = v;
kernel[mid-i] = v;
}
for (int i=unitLength; i<2*unitLength; i++) {
double x = i/(double)unitLength;
double v = (2.-x)*(2.-x)*(2.-x)/6.;
kernel[mid+i] = v;
kernel[mid-i] = v;
}
return kernel;
}
/* Create a kernel for upscaling. The kernel function is a convolution
* of four unit squares, i.e., four uniform kernels with value +1
* from -0.5 to +0.5 (in downscaled coordinates). The second derivative
* of this kernel is smooth, the third is not. Its standard deviation
* is 1/sqrt(3) in downscaled cordinates.
* The kernel runs from [-2 to +2[, corresponding to array index
* 0 ... 4*unitLength (whereby the last point is not in the array any more).
*/
double[] makeUpscaleKernel (int unitLength) {
double[] kernel = new double[4*unitLength];
int mid = 2*unitLength;
kernel[0] = 0;
for (int i=0; i<unitLength; i++) {
double x = i/(double)unitLength;
double v = ((2./3. -x*x*(1-0.5*x)));
kernel[mid+i] = v;
kernel[mid-i] = v;
}
for (int i=unitLength; i<2*unitLength; i++) {
double x = i/(double)unitLength;
double v = (2.-x)*(2.-x)*(2.-x)/6.;
kernel[mid+i] = v;
kernel[mid-i] = v;
}
return kernel;
}
/* Convolve a line with a symmetric kernel and write to a separate array,
* possibly the pixels array of a FloatProcessor (as a row or column or part thereof)
*
* @param input Input array containing the line
* @param pixels Float array for output, can be the pixels of a FloatProcessor
* @param kernel "One-sided" kernel array, kernel[0][n] must contain the kernel
* itself, kernel[1][n] must contain the running sum over all
* kernel elements from kernel[0][n+1] to the periphery.
* The kernel must be normalized, i.e. sum(kernel[0][n]) = 1
* where n runs from the kernel periphery (last element) to 0 and
* back. Normalization should include all kernel points, also these
* not calculated because they are not needed.
* @param readFrom First array element of the line that must be read.
* <code>writeFrom-kernel.length</code> or 0.
* @param readTo Last array element+1 of the line that must be read.
* <code>writeTo+kernel.length</code> or <code>input.length</code>
* @param writeFrom Index of the first point in the line that should be written
* @param writeTo Index+1 of the last point in the line that should be written
* @param point0 Array index of first element of the 'line' in pixels (i.e., lineNumber * lineInc)
* @param pointInc Increment of the pixels array index to the next point (for an ImageProcessor,
* it should be <code>1</code> for a row, <code>width</code> for a column)
*/
public void convolveLine(double[] input, double[] pixels, double[][] kernel, int readFrom,
int readTo, int writeFrom, int writeTo, int point0, int pointInc) {
int length = input.length;
double first = input[0]; //out-of-edge pixels are replaced by nearest edge pixels
double last = input[length-1];
double[] kern = kernel[0]; //the kernel itself
double kern0 = kern[0];
double[] kernSum = kernel[1]; //the running sum over the kernel
int kRadius = kern.length;
int firstPart = kRadius < length ? kRadius : length;
int p = point0 + writeFrom*pointInc;
int i = writeFrom;
for (; i<firstPart; i++,p+=pointInc) { //while the sum would include pixels < 0
double result = input[i]*kern0;
result += kernSum[i]*first;
if (i+kRadius>length) result += kernSum[length-i-1]*last;
for (int k=1; k<kRadius; k++) {
double v = 0;
if (i-k >= 0) v += input[i-k];
if (i+k<length) v+= input[i+k];
result += kern[k] * v;
}
pixels[p] = result;
}
int iEndInside = length-kRadius<writeTo ? length-kRadius : writeTo;
for (;i<iEndInside;i++,p+=pointInc) { //while only pixels within the line are be addressed (the easy case)
double result = input[i]*kern0;
for (int k=1; k<kRadius; k++)
result += kern[k] * (input[i-k] + input[i+k]);
pixels[p] = result;
}
for (; i<writeTo; i++,p+=pointInc) { //while the sum would include pixels >= length
double result = input[i]*kern0;
if (i<kRadius) result += kernSum[i]*first;
if (i+kRadius>=length) result += kernSum[length-i-1]*last;
for (int k=1; k<kRadius; k++) {
double v = 0;
if (i-k >= 0) v += input[i-k];
if (i+k<length) v+= input[i+k];
result += kern[k] * v;
}
pixels[p] = result;
}
}
/* Convolve a line with a symmetric kernel and write to a separate array,
* possibly the pixels array of a FloatProcessor (as a row or column or part thereof)
*
* @param input Input array containing the line
* @param pixels Float array for output, can be the pixels of a FloatProcessor
* @param kernel "One-sided" kernel array, kernel[0][n] must contain the kernel
* itself, kernel[1][n] must contain the running sum over all
* kernel elements from kernel[0][n+1] to the periphery.
* The kernel must be normalized, i.e. sum(kernel[0][n]) = 1
* where n runs from the kernel periphery (last element) to 0 and
* back. Normalization should include all kernel points, also these
* not calculated because they are not needed.
* @param readFrom First array element of the line that must be read.
* <code>writeFrom-kernel.length</code> or 0.
* @param readTo Last array element+1 of the line that must be read.
* <code>writeTo+kernel.length</code> or <code>input.length</code>
* @param writeFrom Index of the first point in the line that should be written
* @param writeTo Index+1 of the last point in the line that should be written
* @param point0 Array index of first element of the 'line' in pixels (i.e., lineNumber * lineInc)
* @param pointInc Increment of the pixels array index to the next point (for an ImageProcessor,
* it should be <code>1</code> for a row, <code>width</code> for a column)
*/
public void convolveLine(double[] input, double[] pixels, double[][] kernel, int readFrom,
int readTo, int writeFrom, int writeTo, int point0, int pointInc) {
int length = input.length;
double first = input[0]; //out-of-edge pixels are replaced by nearest edge pixels
double last = input[length-1];
double[] kern = kernel[0]; //the kernel itself
double kern0 = kern[0];
double[] kernSum = kernel[1]; //the running sum over the kernel
int kRadius = kern.length;
int firstPart = kRadius < length ? kRadius : length;
int p = point0 + writeFrom*pointInc;
int i = writeFrom;
for (; i<firstPart; i++,p+=pointInc) { //while the sum would include pixels < 0
double result = input[i]*kern0;
result += kernSum[i]*first;
if (i+kRadius>length) result += kernSum[length-i-1]*last;
for (int k=1; k<kRadius; k++) {
double v = 0;
if (i-k >= 0) v += input[i-k];
if (i+k<length) v+= input[i+k];
result += kern[k] * v;
}
pixels[p] = result;
}
int iEndInside = length-kRadius<writeTo ? length-kRadius : writeTo;
for (;i<iEndInside;i++,p+=pointInc) { //while only pixels within the line are be addressed (the easy case)
double result = input[i]*kern0;
for (int k=1; k<kRadius; k++)
result += kern[k] * (input[i-k] + input[i+k]);
pixels[p] = result;
}
for (; i<writeTo; i++,p+=pointInc) { //while the sum would include pixels >= length
double result = input[i]*kern0;
if (i<kRadius) result += kernSum[i]*first;
if (i+kRadius>=length) result += kernSum[length-i-1]*last;
for (int k=1; k<kRadius; k++) {
double v = 0;
if (i-k >= 0) v += input[i-k];
if (i+k<length) v+= input[i+k];
result += kern[k] * v;
}
pixels[p] = result;
}
}
......
......@@ -106,6 +106,7 @@ public class GPUTileProcessor {
static String GPU_RBGA_NAME = "generate_RBGA"; // name in C code
static String GPU_ROT_DERIV = "calc_rot_deriv"; // calculate rotation matrices and derivatives
static String GPU_SET_TILES_OFFSETS = "get_tiles_offsets"; // calculate pixel offsets and disparity distortions
static String GPU_CALCULATE_TILES_OFFSETS = "calculate_tiles_offsets"; // calculate pixel offsets and disparity distortions
static String GPU_CALC_REVERSE_DISTORTION = "calcReverseDistortionTable"; // calculate reverse radial distortion table from gpu_geometry_correction
// pass some defines to gpu source code with #ifdef JCUDA
......@@ -168,7 +169,8 @@ public class GPUTileProcessor {
private CUfunction GPU_TEXTURES_kernel = null;
private CUfunction GPU_RBGA_kernel = null;
private CUfunction GPU_ROT_DERIV_kernel = null;
private CUfunction GPU_SET_TILES_OFFSETS_kernel = null;
// private CUfunction GPU_SET_TILES_OFFSETS_kernel = null;
private CUfunction GPU_CALCULATE_TILES_OFFSETS_kernel = null;
private CUfunction GPU_CALC_REVERSE_DISTORTION_kernel = null;
CUmodule module; // to access constants memory
......@@ -393,7 +395,8 @@ public class GPUTileProcessor {
GPU_TEXTURES_NAME,
GPU_RBGA_NAME,
GPU_ROT_DERIV,
GPU_SET_TILES_OFFSETS,
// GPU_SET_TILES_OFFSETS,
GPU_CALCULATE_TILES_OFFSETS,
GPU_CALC_REVERSE_DISTORTION
};
CUfunction[] functions = createFunctions(kernelSources,
......@@ -408,7 +411,8 @@ public class GPUTileProcessor {
GPU_TEXTURES_kernel= functions[5];
GPU_RBGA_kernel= functions[6];
GPU_ROT_DERIV_kernel = functions[7];
GPU_SET_TILES_OFFSETS_kernel = functions[8];
// GPU_SET_TILES_OFFSETS_kernel = functions[8];
GPU_CALCULATE_TILES_OFFSETS_kernel = functions[8];
GPU_CALC_REVERSE_DISTORTION_kernel = functions[9];
System.out.println("GPU kernel functions initialized");
......@@ -420,7 +424,8 @@ public class GPUTileProcessor {
System.out.println(GPU_TEXTURES_kernel.toString());
System.out.println(GPU_RBGA_kernel.toString());
System.out.println(GPU_ROT_DERIV_kernel.toString());
System.out.println(GPU_SET_TILES_OFFSETS_kernel.toString());
// System.out.println(GPU_SET_TILES_OFFSETS_kernel.toString());
System.out.println(GPU_CALCULATE_TILES_OFFSETS_kernel.toString());
System.out.println(GPU_CALC_REVERSE_DISTORTION_kernel.toString());
// GPU data structures are now initialized through GpuQuad instances
......@@ -1603,7 +1608,8 @@ public class GPUTileProcessor {
/**
* Calculate tiles offsets (before each direct conversion run)
*/
public void execSetTilesOffsets() {
/*
public void execSetTilesOffsetsOld() {
execCalcReverseDistortions(); // will check if it is needed first
execRotDerivs(); // will check if it is needed first
if (GPU_SET_TILES_OFFSETS_kernel == null)
......@@ -1612,7 +1618,7 @@ public class GPUTileProcessor {
return;
}
// kernel parameters: pointer to pointers
int [] GridFullWarps = {(num_task_tiles + TILES_PER_BLOCK_GEOM - 1)/TILES_PER_BLOCK_GEOM, 1, 1}; // round up
int [] GridFullWarps = {(num_task_tiles + 2 * TILES_PER_BLOCK_GEOM - 1)/TILES_PER_BLOCK_GEOM, 1, 1}; // round up
int [] ThreadsFullWarps = {num_cams, TILES_PER_BLOCK_GEOM, 1}; // 4,8,1
Pointer kernelParameters = Pointer.to(
Pointer.to(gpu_tasks), // struct tp_task * gpu_tasks,
......@@ -1632,6 +1638,42 @@ public class GPUTileProcessor {
System.out.println("======execSetTilesOffsets()");
}
}
*/
public void execSetTilesOffsets() {
execCalcReverseDistortions(); // will check if it is needed first
execRotDerivs(); // will check if it is needed first
if (GPU_CALCULATE_TILES_OFFSETS_kernel == null)
{
IJ.showMessage("Error", "No GPU kernel: GPU_CALCULATE_TILES_OFFSETS_kernel");
return;
}
if (gpu_debug_level > -1) {
System.out.println("num_task_tiles="+num_task_tiles);
}
// kernel parameters: pointer to pointers
// int [] GridFullWarps = {(num_task_tiles + 2 * TILES_PER_BLOCK_GEOM - 1)/TILES_PER_BLOCK_GEOM, 1, 1}; // round up
// int [] ThreadsFullWarps = {num_cams, TILES_PER_BLOCK_GEOM, 1}; // 4,8,1
int [] GridFullWarps = {1, 1, 1}; // round up
int [] ThreadsFullWarps = {1, 1, 1}; // 4,8,1
Pointer kernelParameters = Pointer.to(
Pointer.to(gpu_tasks), // struct tp_task * gpu_tasks,
Pointer.to(new int[] { num_task_tiles }),// int num_tiles, // number of tiles in task list
Pointer.to(gpu_geometry_correction), // struct gc * gpu_geometry_correction,
Pointer.to(gpu_correction_vector), // struct corr_vector * gpu_correction_vector,
Pointer.to(gpu_rByRDist), // float * gpu_rByRDist) // length should match RBYRDIST_LEN
Pointer.to(gpu_rot_deriv)); // trot_deriv * gpu_rot_deriv);
cuCtxSynchronize();
cuLaunchKernel(GPU_CALCULATE_TILES_OFFSETS_kernel,
GridFullWarps[0], GridFullWarps[1], GridFullWarps[2], // Grid dimension
ThreadsFullWarps[0], ThreadsFullWarps[1],ThreadsFullWarps[2],// Block dimension
0, null, // Shared memory size and stream (shared - only dynamic, static is in code)
kernelParameters, null); // Kernel- and extra parameters
cuCtxSynchronize(); // remove later
if (gpu_debug_level > -1) {
System.out.println("======execSetTilesOffsets()");
}
}
/**
* Direct CLT conversion and aberration correction
......
package com.elphel.imagej.tileprocessor;
import com.elphel.imagej.common.DoubleGaussianBlur;
import com.elphel.imagej.common.ShowDoubleFloatArrays;
import Jama.Matrix;
......@@ -81,7 +82,7 @@ public class ExtrinsicAdjustment {
public int clustersX;
public int clustersY;
public double dbg_delta = 1.0E-5; // if not null - use delta instead of the derivatives in getJacobianTransposed
public double dbg_delta = 0; // 1.0E-5; // if not 0 - use delta instead of the derivatives in getJacobianTransposed
public double [] getOldNewRMS() {
double [] on_rms = new double[2];
......@@ -272,7 +273,7 @@ public class ExtrinsicAdjustment {
double min_fg_disp = 0.0; // 25.0; // minimal disparity to boost foreground objects
double min_rel_over = 0.25; // minimal relative disparity over average for a row to boost foreground objects
int min_num_fg = min_num_forced;
double fb_boost_fraction = 0.5;
double fg_boost_fraction = 0.5;
if (min_fg_disp > 0.0) {
boolean [] select_ers = selectERS(
measured_dsxy, // double [][] measured_dsxy,
......@@ -283,10 +284,62 @@ public class ExtrinsicAdjustment {
this.weights, // double [] weights, // will be updated
select_ers, // boolean [] fg,
min_num_fg, // int min_num_fg,
fb_boost_fraction); // double fg_boost_fraction)
fg_boost_fraction); // double fg_boost_fraction)
}
double max_ers_disparity = 175.0;
limitDisparity(
measured_dsxy, // double [][] measured_dsxy,
this.weights, // double [] weights, // will be updated
max_ers_disparity); // double max_disparity
// remove moving objects here
if (debugLevel > -10) { // temporary
dbgYminusFxWeight(
this.last_ymfx,
this.weights, // will use current weights
"Initial y-fX");
double [][] ers_tilt_az = getMovingObjects(
corr_vector, // GeometryCorrection.CorrVector corr_vector,
null ); // double [] fx
showMovingObjects(
"Moving_objects", // String title,
ers_tilt_az);
double moving_sigma = 1.0;
double [] ers_blured = showMovingObjects(
"Moving_objects_sigma"+moving_sigma, // String title,
ers_tilt_az,
moving_sigma, // double sigmaX,
0.01); // double accuracy
System.out.println("Moving objects");
double mov_avg = 0.0;
int clusters = clustersX * clustersY;
for (int cluster = 0; cluster < clusters; cluster++) {
if (!Double.isNaN(ers_blured[cluster])) mov_avg += ers_blured[cluster];
}
mov_avg /= clusters;
System.out.println("Average moving object detection value = "+mov_avg+
" (use to abandon detection if too high?");
// double min_l2 = 1.0 * mov_avg;
double min_l2 = 1.0 * mov_avg;
boolean [] moving_maybe = extractMovingObjects(
"Moving_objects_filtering", // String title,
ers_blured, //double [] ers_blured, // has NaNs
min_l2, // double min_l2); // minimal value (can use average or fraction of it?
4, // int shrink, // prevents growing outer border (==4)
2); // int grow) { // located moving areas
int num_moving = 0;
for (int cluster = 0; cluster < clusters; cluster++) {
if (moving_maybe[cluster]) num_moving++;
}
System.out.println("Number of clusters to remove from LMA fitting = "+num_moving);
System.out.println();
blockSelectedClusters(
measured_dsxy, // double [][] measured_dsxy,
this.weights, // double [] weights, // will be updated
moving_maybe); // boolean [] prohibit
}
double lambda = 0.1;
double lambda_scale_good = 0.5;
......@@ -606,6 +659,78 @@ public class ExtrinsicAdjustment {
return;
}
private void limitDisparity(
double [][] measured_dsxy,
double [] weights,
double max_disparity
) {
double sw = 0.0;
double sw_near = 0.0;
int clusters = clustersX * clustersY;
for (int cluster = 0; cluster < clusters; cluster++) if (measured_dsxy[cluster] != null){
double disparity = measured_dsxy[cluster][ExtrinsicAdjustment.INDX_DISP];
if (disparity <= max_disparity) {
for (int i = 0; i < POINTS_SAMPLE; i++) {
sw_near += weights[cluster * POINTS_SAMPLE + i];
}
}
for (int i = 0; i < POINTS_SAMPLE; i++) {
sw += weights[cluster * POINTS_SAMPLE + i];
}
}
if (sw_near > 0.0) {
double wboost = sw/sw_near;
for (int cluster = 0; cluster < clusters; cluster++) if (measured_dsxy[cluster] != null){
double disparity = measured_dsxy[cluster][ExtrinsicAdjustment.INDX_DISP];
if (disparity <= max_disparity) {
for (int i = 0; i < POINTS_SAMPLE; i++) {
weights[cluster * POINTS_SAMPLE + i] *= wboost;
}
} else {
for (int i = 0; i < POINTS_SAMPLE; i++) {
weights[cluster * POINTS_SAMPLE + i] = 0;
}
}
}
}
}
private void blockSelectedClusters(
double [][] measured_dsxy,
double [] weights,
boolean [] prohibit
) {
double sw = 0.0;
double sw_en = 0.0;
int clusters = clustersX * clustersY;
for (int cluster = 0; cluster < clusters; cluster++) if (measured_dsxy[cluster] != null){
if (!prohibit[cluster]) {
for (int i = 0; i < POINTS_SAMPLE; i++) {
sw_en += weights[cluster * POINTS_SAMPLE + i];
}
}
for (int i = 0; i < POINTS_SAMPLE; i++) {
sw += weights[cluster * POINTS_SAMPLE + i];
}
}
if (sw_en > 0.0) {
double wboost = sw/sw_en;
for (int cluster = 0; cluster < clusters; cluster++) if (measured_dsxy[cluster] != null){
if (!prohibit[cluster]) {
for (int i = 0; i < POINTS_SAMPLE; i++) {
weights[cluster * POINTS_SAMPLE + i] *= wboost;
}
} else {
for (int i = 0; i < POINTS_SAMPLE; i++) {
weights[cluster * POINTS_SAMPLE + i] = 0;
}
}
}
}
}
private double [] getWeights(
......@@ -683,31 +808,36 @@ public class ExtrinsicAdjustment {
Matrix [][] deriv_rots = corr_vector.getRotDeriveMatrices();
double [] imu = corr_vector.getIMU(); // i)
double [] y_minus_fx = new double [clusters * POINTS_SAMPLE];
for (int cluster = 0; cluster < clusters; cluster++) if (measured_dsxy[cluster] != null){
if ((cluster == 1735 ) || (cluster==1736)){
System.out.print("");
}
double [] ddnd = geometryCorrection.getPortsDDNDAndDerivativesNew( // USED in lwir
geometryCorrection, // GeometryCorrection gc_main,
use_rig_offsets, // boolean use_rig_offsets,
corr_rots, // Matrix [] rots,
deriv_rots, // Matrix [][] deriv_rots,
null, // double [][] DDNDderiv, // if not null, should be double[8][]
pY_offset[cluster], // double [] py_offset, // array of per-port average pY offset from the center (to correct ERS) or null (for no ERS)
imu, // double [] imu,
x0y0[cluster], // double [] pXYND0, // per-port non-distorted coordinates corresponding to the correlation measurements
world_xyz[cluster], // double [] xyz, // world XYZ for ERS correction
measured_dsxy[cluster][ExtrinsicAdjustment.INDX_PX + 0], // double px,
measured_dsxy[cluster][ExtrinsicAdjustment.INDX_PX + 1], // double py,
measured_dsxy[cluster][ExtrinsicAdjustment.INDX_TARGET]); // double disparity);
//arraycopy(Object src, int srcPos, Object dest, int destPos, int length)
// System.arraycopy(src_pixels, 0, dst_pixels, 0, src_pixels.length); /* for the borders closer to 1/2 kernel size*/
ddnd[0] = ddnd[0]; // ?
for (int i = 0; i < NUM_SENSORS; i++) {
ddnd[i + 1] = ddnd[i + 1];
ddnd[i + 5] = ddnd[i + 5];
for (int cluster = 0; cluster < clusters; cluster++) {
// if ((cluster == 1892) || (cluster == 1894) ||(cluster == 3205)) {
// System.out.println("getFx() cluster="+cluster);
// }
if (measured_dsxy[cluster] != null){
// if ((cluster == 1735 ) || (cluster==1736)){
// System.out.print("");
// }
double [] ddnd = geometryCorrection.getPortsDDNDAndDerivativesNew( // USED in lwir
geometryCorrection, // GeometryCorrection gc_main,
use_rig_offsets, // boolean use_rig_offsets,
corr_rots, // Matrix [] rots,
deriv_rots, // Matrix [][] deriv_rots,
null, // double [][] DDNDderiv, // if not null, should be double[8][]
pY_offset[cluster], // double [] py_offset, // array of per-port average pY offset from the center (to correct ERS) or null (for no ERS)
imu, // double [] imu,
x0y0[cluster], // double [] pXYND0, // per-port non-distorted coordinates corresponding to the correlation measurements
world_xyz[cluster], // double [] xyz, // world XYZ for ERS correction
measured_dsxy[cluster][ExtrinsicAdjustment.INDX_PX + 0], // double px,
measured_dsxy[cluster][ExtrinsicAdjustment.INDX_PX + 1], // double py,
measured_dsxy[cluster][ExtrinsicAdjustment.INDX_TARGET]); // double disparity);
//arraycopy(Object src, int srcPos, Object dest, int destPos, int length)
// System.arraycopy(src_pixels, 0, dst_pixels, 0, src_pixels.length); /* for the borders closer to 1/2 kernel size*/
ddnd[0] = ddnd[0]; // ?
for (int i = 0; i < NUM_SENSORS; i++) {
ddnd[i + 1] = ddnd[i + 1];
ddnd[i + 5] = ddnd[i + 5];
}
System.arraycopy(ddnd, 0, y_minus_fx, cluster*POINTS_SAMPLE, POINTS_SAMPLE);
}
System.arraycopy(ddnd, 0, y_minus_fx, cluster*POINTS_SAMPLE, POINTS_SAMPLE);
}
return y_minus_fx;
}
......@@ -751,7 +881,7 @@ public class ExtrinsicAdjustment {
double delta,
boolean graphic) {
// delta *= 0.1;
int gap = 10; //pix
int gap = 10+0; //pix
int clusters = clustersX * clustersY;
int num_pars = getNumPars();
String [] titles = getSymNames();
......@@ -902,10 +1032,168 @@ public class ExtrinsicAdjustment {
titles);
}
private double [][] getMovingObjects(
GeometryCorrection.CorrVector corr_vector,
double [] fx // or null;
) {
int ers_tilt_index = GeometryCorrection.CorrVector.IMU_INDEX;
int ers_azimuth_index = GeometryCorrection.CorrVector.IMU_INDEX + 1;
int clusters = clustersX * clustersY;
double [][] ers_tilt_az = new double [clusters][];
double [][] jt = getJacobianTransposed(corr_vector);
for (int cluster = 0; cluster < clusters; cluster++) if ( measured_dsxy[cluster] != null) {
ers_tilt_az[cluster] = new double [2];
for (int i = 0; i < (POINTS_SAMPLE-1); i++) {
int indx = cluster * POINTS_SAMPLE + i + 1; // skipping disparity
double d = measured_dsxy[cluster][ExtrinsicAdjustment.INDX_DD0 + i];
if (fx != null) {
d += fx[indx];
}
ers_tilt_az[cluster][0] += d * jt[ers_tilt_index][indx];
ers_tilt_az[cluster][1] += d * jt[ers_azimuth_index][indx];
}
}
return ers_tilt_az;
}
private void showMovingObjects(
String title,
double [][] ers_tilt_az) {
showMovingObjects(
title,
ers_tilt_az,
0, // double sigma,
0);
}
private double [] showMovingObjects(
String title,
double [][] ers_tilt_az,
double sigma,
double accuracy
)
{
int clusters = clustersX * clustersY;
String [] titles = {"vert", "hor", "amplitude"};
double [][] dbg_img = new double [titles.length][clusters];
double max_reasonable = 100.0;
for (int cluster = 0; cluster < clusters; cluster++) {
if (ers_tilt_az[cluster] != null) {
dbg_img[0][cluster] = ers_tilt_az[cluster][0];
dbg_img[1][cluster] = ers_tilt_az[cluster][1];
dbg_img[2][cluster] = Math.sqrt(0.5*(ers_tilt_az[cluster][0]*ers_tilt_az[cluster][0] + ers_tilt_az[cluster][1]*ers_tilt_az[cluster][1]));
if (dbg_img[2][cluster] > max_reasonable) {
dbg_img[0][cluster] = Double.NaN;
dbg_img[1][cluster] = Double.NaN;
dbg_img[2][cluster] = Double.NaN;
}
} else {
dbg_img[0][cluster] = Double.NaN;
dbg_img[1][cluster] = Double.NaN;
dbg_img[2][cluster] = Double.NaN;
}
}
if (sigma > 0.0){
double [] cluster_weights = new double [clusters];
for (int cluster = 0; cluster < clusters; cluster++) {
for (int i = 0; i < POINTS_SAMPLE; i++) {
cluster_weights[cluster] += this.weights[cluster * POINTS_SAMPLE + i];
}
}
for (int i = 0; i < dbg_img.length; i++) {
dbg_img[i] = (new DoubleGaussianBlur()).blurWithNaN(
dbg_img[i], // double[] pixels,
cluster_weights, // null, // double [] in_weight, // or null
clustersX,// int width,
clustersY,// int height,
sigma, // double sigmaX,
sigma, // double sigmaY,
0.01); // double accuracy
}
}
(new ShowDoubleFloatArrays()).showArrays(
dbg_img,
clustersX,
clustersY,
true,
title,
titles);
return dbg_img[2];
}
public boolean [] extractMovingObjects(
String title,
double [] ers_blured, // has NaNs
double min_l2, // minimal value (can use average or fraction of it?
int shrink, // prevents growing outer border (==4)
int grow) { // located moving areas
int clusters = clustersX * clustersY;
TileNeibs tn = new TileNeibs(clustersX, clustersY);
boolean [] terrain = new boolean[clusters];
for (int i = 0; i < clusters; i++) {
terrain[i] = ers_blured[i] <= min_l2; // NaNs not included
}
// find largest terrain area
int [] iterrains = tn. enumerateClusters(
terrain, // boolean [] tiles,
true); //boolean ordered)
boolean [] terrain_main = new boolean[clusters];
for (int i = 0; i < clusters; i++) {
terrain_main[i] = (iterrains[i] == 1); // largest cluster
}
boolean [] filled = tn.fillConcave(terrain_main);
tn.shrinkSelection(
shrink, // int shrink,
filled, // boolean [] tiles,
null); // boolean [] prohibit)
boolean [] moving = filled.clone();
for (int i = 0; i < clusters; i++) {
moving[i] &= !terrain_main[i];
}
boolean [] grown = moving.clone();
tn.growSelection(
grow, // int grow, // grow tile selection by 1 over non-background tiles 1: 4 directions, 2 - 8 directions, 3 - 8 by 1, 4 by 1 more
grown, // boolean [] tiles,
null); // boolean [] prohibit)
if (title != null) {
String [] titles = {"terrain","clusters","main_terrain","filled","moving","grown"};
double [][] dbg_img = new double [titles.length][clusters];
for (int cluster = 0; cluster < clusters; cluster++) {
dbg_img[0][cluster] = terrain[cluster]? 1.0 : 0.0;
dbg_img[1][cluster] = iterrains[cluster];
dbg_img[2][cluster] = terrain_main[cluster]? 1.0 : 0.0;
dbg_img[3][cluster] = filled[cluster]? 1.0 : 0.0;
dbg_img[4][cluster] = moving[cluster]? 1.0 : 0.0;
dbg_img[5][cluster] = grown[cluster]? 1.0 : 0.0;
}
(new ShowDoubleFloatArrays()).showArrays(
dbg_img,
clustersX,
clustersY,
true,
title,
titles);
}
return grown;
}
private void dbgYminusFxWeight(
double [] fx,
double [] weights,
String title) {
if (fx == null) {
fx = new double [weights.length];
}
int gap = 10; //pix
int clusters = clustersX * clustersY;
String [] titles = {"Y", "-fX", "Y+fx", "Weight", "W*(Y+fx)", "Masked Y+fx"};
......@@ -979,23 +1267,33 @@ public class ExtrinsicAdjustment {
double [][] xy = getXYNondistorted(corr_vector, false);
double [][] dbg_img = new double [3][width*height];
for (int mode = 0; mode < 2 * NUM_SENSORS; mode++) {
double [][] moving_objects = new double [3][clusters];
for (int mode = 0; mode < 2 * NUM_SENSORS + 1; mode++) {
int x0 = (mode % 3) * (clustersX + gap);
int y0 = (mode / 3) * (clustersY + gap);
for (int cluster = 0; cluster < clusters; cluster++) {
int x = x0 + (cluster % clustersX);
int y = y0 + (cluster / clustersX);
int pix = x + y * width;
// int indx = cluster * POINTS_SAMPLE + mode;
if (measured_dsxy[cluster] != null){
if (mode < (2 * NUM_SENSORS)) {
dbg_img[0][pix] = measured_dsxy[cluster][ExtrinsicAdjustment.INDX_X0+mode];
dbg_img[1][pix] = xy[cluster][mode] - x0y0[cluster][mode];
dbg_img[2][pix] = xy[cluster][mode] - x0y0[cluster][mode] - measured_dsxy[cluster][ExtrinsicAdjustment.INDX_X0+mode];
dbg_img[2][pix] = measured_dsxy[cluster][ExtrinsicAdjustment.INDX_X0+mode] - (xy[cluster][mode] - x0y0[cluster][mode]);
moving_objects[0][cluster] += dbg_img[0][pix]*dbg_img[0][pix];
moving_objects[1][cluster] += dbg_img[1][pix]*dbg_img[0][pix];
moving_objects[2][cluster] += dbg_img[2][pix]*dbg_img[0][pix];
} else {
dbg_img[0][pix] = Math.sqrt(moving_objects[0][cluster]/(2 * NUM_SENSORS));
dbg_img[1][pix] = Math.sqrt(moving_objects[0][cluster]/(2 * NUM_SENSORS));
dbg_img[2][pix] = Math.sqrt(moving_objects[0][cluster]/(2 * NUM_SENSORS));
}
} else {
dbg_img[ 0][pix] = Double.NaN;
dbg_img[ 1][pix] = Double.NaN;
dbg_img[ 2][pix] = Double.NaN;
}
}
}
(new ShowDoubleFloatArrays()).showArrays(
......@@ -1005,6 +1303,7 @@ public class ExtrinsicAdjustment {
true,
title,
titles);
System.out.println("dbgXY() DONE");
}
......@@ -1176,16 +1475,54 @@ public class ExtrinsicAdjustment {
// this.last_jt); // double [][] jt) { // should be either [vector.length][samples.size()] or null - then only fx is calculated
this.last_jt = getJacobianTransposed(corr_vector); // new double [num_pars][num_points];
this.last_ymfx = getFx(corr_vector);
if (debug_level > 2) {
if (debug_level > -10) { // temporary
dbgYminusFxWeight(
this.last_ymfx,
this.weights,
"Initial y-fX");
// dbgYminusFx(this.last_ymfx, "Initial y-fX");
"Initial_y-fX_after_moving_objects");
/*
double [][] ers_tilt_az = getMovingObjects(
corr_vector, // GeometryCorrection.CorrVector corr_vector,
null ); // double [] fx
showMovingObjects(
"Moving_objects", // String title,
ers_tilt_az);
double moving_sigma = 1.0;
double [] ers_blured = showMovingObjects(
"Moving_objects_sigma"+moving_sigma, // String title,
ers_tilt_az,
moving_sigma, // double sigmaX,
0.01); // double accuracy
System.out.println("Moving objects");
double mov_avg = 0.0;
int clusters = clustersX * clustersY;
for (int cluster = 0; cluster < clusters; cluster++) {
if (!Double.isNaN(ers_blured[cluster])) mov_avg += ers_blured[cluster];
}
mov_avg /= clusters;
System.out.println("Average moving object detection value = "+mov_avg+
" (use to abandon detection if too high?");
double min_l2 = 1.0 * mov_avg;
boolean [] moving_maybe = extractMovingObjects(
"Moving_objects_filtering", // String title,
ers_blured, //double [] ers_blured, // has NaNs
min_l2, // double min_l2); // minimal value (can use average or fraction of it?
4, // int shrink, // prevents growing outer border (==4)
2); // int grow) { // located moving areas
int num_moving = 0;
for (int cluster = 0; cluster < clusters; cluster++) {
if (moving_maybe[cluster]) num_moving++;
}
System.out.println("Number of clusters to remove from LMA fitting = "+num_moving);
System.out.println();
*/
}
if (last_ymfx == null) {
return null; // need to re-init/restart LMA
}
......
......@@ -655,6 +655,639 @@ public class ImageDtt extends ImageDttCPU {
// return clt_data;
}
public GPUTileProcessor.TpTask[][] clt_aberrations_quad_corr_GPU_test(
final ImageDttParameters imgdtt_params, // Now just extra correlation parameters, later will include, most others
final int macro_scale, // to correlate tile data instead of the pixel data: 1 - pixels, 8 - tiles
final int [][] tile_op, // [tilesY][tilesX] - what to do - 0 - nothing for this tile
final double [][] disparity_array, // [tilesY][tilesX] - individual per-tile expected disparity
final float [][][][] fcorr_td, // [tilesY][tilesX][pair][4*64] transform domain representation of 6 corr pairs
final float [][][][] fcorr_combo_td, // [4][tilesY][tilesX][pair][4*64] TD of combo corrs: qud, cross, hor,vert
// each of the top elements may be null to skip particular combo type
final float [][][][] fdisp_dist, // [tilesY][tilesX][cams][4], // disparity derivatives vectors or null
final float [][][][] fpxpy, // [tilesY][tilesX][cams][2], tile {pX,pY}
final float [][][][] fpxpy_test, // [tilesY][tilesX][cams][2], tile {pX,pY}
final double [][][][] clt_corr_combo, // [type][tilesY][tilesX][(2*transform_size-1)*(2*transform_size-1)] // if null - will not calculate
// [type][tilesY][tilesX] should be set by caller
// types: 0 - selected correlation (product+offset), 1 - sum
final double [][][][][] clt_corr_partial,// [tilesY][tilesX][quad]color][(2*transform_size-1)*(2*transform_size-1)] // if null - will not calculate
// [tilesY][tilesX] should be set by caller
// When clt_mismatch is non-zero, no far objects extraction will be attempted
final double [][] clt_mismatch, // [12][tilesY * tilesX] // ***** transpose unapplied ***** ?. null - do not calculate
// values in the "main" directions have disparity (*_CM) subtracted, in the perpendicular - as is
final double [][] disparity_map, // [8][tilesY][tilesX], only [6][] is needed on input or null - do not calculate
// last 2 - contrast, avg/ "geometric average)
final int disparity_modes, // bit mask of disparity_map slices to calculate/return
final double [][][][] texture_tiles, // compatible with the CPU ones
final float [][] texture_img, // null or [3][] (RGB) or [4][] RGBA
final Rectangle texture_woi_pix, // null or generated texture location/size in pixels
final float [][][] iclt_fimg, // will return quad images or null to skip, use quadCLT.linearStackToColor
// new parameters, will replace some other?
final double gpu_corr_scale, // 0.75; // reduce GPU-generated correlation values
final double gpu_fat_zero, // clt_parameters.getGpuFatZero(is_mono);absolute == 30.0
final double gpu_sigma_r, // 0.9, 1.1
final double gpu_sigma_b, // 0.9, 1.1
final double gpu_sigma_g, // 0.6, 0.7
final double gpu_sigma_m, // = 0.4; // 0.7;
final double gpu_sigma_rb_corr, // = 0.5; // apply LPF after accumulating R and B correlation before G, monochrome ? 1.0 : gpu_sigma_rb_corr;
final double gpu_sigma_corr, // = 0.9;gpu_sigma_corr_m
final int gpu_corr_rad, // = transform_size - 1 ?
final double corr_red, // +used
final double corr_blue,// +used
final double max_corr_radius, // 3.9;
final int corr_mode, // Correlation mode: 0 - integer max, 1 - center of mass, 2 - polynomial
final double min_shot, // +10.0; // Do not adjust for shot noise if lower than
final double scale_shot, // +3.0; // scale when dividing by sqrt ( <0 - disable correction)
final double diff_sigma, // +5.0;//RMS difference from average to reduce weights (~ 1.0 - 1/255 full scale image)
final double diff_threshold, // +5.0; // RMS difference from average to discard channel (~ 1.0 - 1/255 full scale image)
final boolean diff_gauss, // true; // when averaging images, use Gaussian around average as weight (false - sharp all/nothing)
final double min_agree, // +3.0; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
final boolean dust_remove, // +Do not reduce average weight when only one image differs much from the average
final GeometryCorrection geometryCorrection, // for GPU TODO: combine geometry corrections if geometryCorrection_main is not null
final GeometryCorrection geometryCorrection_main, // if not null correct this camera (aux) to the coordinates of the main
final int window_type, // GPU: will not be used
final double disparity_corr, // disparity at infinity
final int debug_tileX,
final int debug_tileY,
final int threadsMax, // maximal number of threads to launch
final int globalDebugLevel)
{
if (this.gpuQuad == null) {
System.out.println("clt_aberrations_quad_corr_GPU(): this.gpuQuad is null, bailing out");
return null;
}
final boolean [][] saturation_imp = gpuQuad.quadCLT.saturation_imp; // boolean [][] saturation_imp, // (near) saturated pixels or null
//gpuQuad
final boolean debug_distort= globalDebugLevel > 0; ///false; // true;
final double [][] debug_offsets = new double[imgdtt_params.lma_dbg_offset.length][2];
for (int i = 0; i < debug_offsets.length; i++) for (int j = 0; j < debug_offsets[i].length; j++) {
debug_offsets[i][j] = imgdtt_params.lma_dbg_offset[i][j]*imgdtt_params.lma_dbg_scale;
}
final boolean macro_mode = macro_scale != 1; // correlate tile data instead of the pixel data
final int quad = 4; // number of subcameras
// final int numcol = 3; // number of colors // keep the same, just do not use [0] and [1], [2] - green
final int numcol = isMonochrome()?1:3;
final int width = gpuQuad.getImageWidth();
final int height = gpuQuad.getImageHeight();
final int tilesX=gpuQuad.getTilesX(); // width/transform_size;
final int tilesY=gpuQuad.getTilesY(); // final int tilesY=height/transform_size;
//// final int nTilesInChn=tilesX*tilesY;
//// final double [][][][][][] clt_data = new double[quad][numcol][tilesY][tilesX][][];
final Thread[] threads = newThreadArray(threadsMax);
final AtomicInteger ai = new AtomicInteger(0);
final double [] col_weights= new double [numcol]; // colors are RBG
final double [][] dbg_distort = debug_distort? (new double [4*quad][tilesX*tilesY]) : null;
// not yet used with GPU
/**
final double [][] corr_wnd = Corr2dLMA.getCorrWnd(
transform_size,
imgdtt_params.lma_wnd);
final double [] corr_wnd_inv_limited = (imgdtt_params.lma_min_wnd <= 1.0)? new double [corr_wnd.length * corr_wnd[0].length]: null;
if (corr_wnd_inv_limited != null) {
double inv_pwr = imgdtt_params.lma_wnd_pwr - (imgdtt_params.lma_wnd - 1.0); // compensate for lma_wnd
for (int i = imgdtt_params.lma_hard_marg; i < (corr_wnd.length - imgdtt_params.lma_hard_marg); i++) {
for (int j = imgdtt_params.lma_hard_marg; j < (corr_wnd.length - imgdtt_params.lma_hard_marg); j++) {
corr_wnd_inv_limited[i * (corr_wnd.length) + j] = 1.0/Math.max(Math.pow(corr_wnd[i][j],inv_pwr), imgdtt_params.lma_min_wnd);
}
}
}
*/
// keep for now for mono, find out what do they mean for macro mode
if (macro_mode) { // all the same as they now mean different
//compensating Bayer correction
col_weights[0] = 0.25; // 1.0/3;
col_weights[1] = 0.25; // 1.0/3;
col_weights[2] = 0.5; // 1.0/3;
} else {
if (isMonochrome()) {
col_weights[2] = 1.0;// green color/mono
col_weights[0] = 0;
col_weights[1] = 0;
} else {
col_weights[2] = 1.0/(1.0 + corr_red + corr_blue); // green color
col_weights[0] = corr_red * col_weights[2];
col_weights[1] = corr_blue * col_weights[2];
}
}
final int corr_size = transform_size * 2 - 1;
if ((globalDebugLevel > -10) && (disparity_corr != 0.0)){
System.out.println(String.format("Using manual infinity disparity correction of %8.5f pixels",disparity_corr));
}
// reducing weight of on-axis correlation values to enhance detection of vertical/horizontal lines
// multiply correlation results inside the horizontal center strip 2*enhortho_width - 1 wide by enhortho_scale
final double [] enh_ortho_scale = new double [corr_size];
for (int i = 0; i < corr_size; i++){
if ((i < (transform_size - imgdtt_params.getEnhOrthoWidth(isAux()))) || (i > (transform_size - 2 + imgdtt_params.getEnhOrthoWidth(isAux())))) {
enh_ortho_scale[i] = 1.0;
} else {
enh_ortho_scale[i] = imgdtt_params.getEnhOrthoScale(isAux());
}
if (i == (transform_size-1)) enh_ortho_scale[i] = 0.0 ; // hardwired 0 in the center
enh_ortho_scale[i] *= Math.sin(Math.PI*(i+1.0)/(2*transform_size));
}
if (globalDebugLevel > 1){
System.out.println("getEnhOrthoWidth(isAux())="+ imgdtt_params.getEnhOrthoWidth(isAux())+" getEnhOrthoScale(isAux())="+ imgdtt_params.getEnhOrthoScale(isAux()));
for (int i = 0; i < corr_size; i++){
System.out.println(" enh_ortho_scale["+i+"]="+ enh_ortho_scale[i]);
}
}
// Create window to select center correlation strip using
// ortho_height - full width of non-zero elements
// ortho_eff_height - effective height (ration of the weighted column sum to the center value)
int wcenter = transform_size - 1;
final double [] ortho_weights = new double [corr_size]; // [15]
for (int i = 0; i < corr_size; i++){
if ((i >= wcenter - imgdtt_params.ortho_height/2) && (i <= wcenter + imgdtt_params.ortho_height/2)) {
double dx = 1.0*(i-wcenter)/(imgdtt_params.ortho_height/2 + 1);
ortho_weights[i] = 0.5*(1.0+Math.cos(Math.PI*dx))/imgdtt_params.ortho_eff_height;
}
}
if (globalDebugLevel > 0){
System.out.println("ortho_height="+ imgdtt_params.ortho_height+" ortho_eff_height="+ imgdtt_params.ortho_eff_height);
for (int i = 0; i < corr_size; i++){
System.out.println(" ortho_weights["+i+"]="+ ortho_weights[i]);
}
}
if (globalDebugLevel > 0) {
System.out.println("clt_aberrations_quad_corr(): width="+width+" height="+height+" transform_size="+transform_size+
" debug_tileX="+debug_tileX+" debug_tileY="+debug_tileY+" globalDebugLevel="+globalDebugLevel);
}
// add optional initialization of debug layers here
boolean need_macro = false;
boolean need_corr = (clt_mismatch != null) || (fcorr_combo_td !=null) || (fcorr_td !=null) ; // (not the only reason)
// skipping DISPARITY_VARIATIONS_INDEX - it was not used
if (disparity_map != null){
for (int i = 0; i<disparity_map.length;i++) if ((disparity_modes & (1 << i)) != 0){
if ((i == OVEREXPOSED) && (saturation_imp == null)) {
continue;
}
disparity_map[i] = new double [tilesY*tilesX];
if ((i >= IMG_TONE_RGB) || ((i >= IMG_DIFF0_INDEX) && (i < (IMG_DIFF0_INDEX + 4)))) {
need_macro = true;
}
if (i <=DISPARITY_STRENGTH_INDEX) {
need_corr = true;
}
}
}
if (clt_mismatch != null){
for (int i = 0; i<clt_mismatch.length;i++){
clt_mismatch[i] = new double [tilesY*tilesX]; // will use only "center of mass" centers
}
}
DttRad2 dtt = new DttRad2(transform_size);
dtt.set_window(window_type);
final double [] lt_window = dtt.getWin2d(); // [256] - never used
final double [] lt_window2 = new double [lt_window.length]; // squared - never used
for (int i = 0; i < lt_window.length; i++) lt_window2[i] = lt_window[i] * lt_window[i];
if (globalDebugLevel > 1) {
ShowDoubleFloatArrays sdfa_instance = new ShowDoubleFloatArrays(); // just for debugging?
sdfa_instance.showArrays(lt_window, 2*transform_size, 2*transform_size, "lt_window");
}
if (globalDebugLevel > 0) {
System.out.println("macro_mode="+macro_mode);
}
final boolean use_main = geometryCorrection_main != null;
boolean [] used_corrs = new boolean[1];
final int all_pairs = imgdtt_params.dbg_pair_mask; //TODO: use tile tasks
final GPUTileProcessor.TpTask[] tp_tasks = gpuQuad.setTpTask(
disparity_array, // final double [][] disparity_array, // [tilesY][tilesX] - individual per-tile expected disparity
disparity_corr, // final double disparity_corr,
used_corrs, // final boolean [] need_corrs, // should be initialized to boolean[1] or null
tile_op, // final int [][] tile_op, // [tilesY][tilesX] - what to do - 0 - nothing for this tile
all_pairs, // final int corr_mask, // <0 - use corr mask from the tile tile_op, >=0 - overwrite all with non-zero corr_mask_tp
threadsMax); // final int threadsMax, // maximal number of threads to launch
if (tp_tasks.length == 0) {
System.out.println("Empty tasks - nothing to do");
return null;
}
//texture_tiles
final boolean fneed_macro = need_macro;
final boolean fneed_corr = need_corr && used_corrs[0]; // *** tasks should include correlation
final float [][] lpf_rgb = new float[][] {
floatGetCltLpfFd(gpu_sigma_r),
floatGetCltLpfFd(gpu_sigma_b),
floatGetCltLpfFd(gpu_sigma_g),
floatGetCltLpfFd(gpu_sigma_m)
};
gpuQuad.setLpfRbg( // constants memory - same for all cameras
lpf_rgb,
globalDebugLevel > -1);
final float [] lpf_flat = floatGetCltLpfFd(gpu_sigma_corr);
gpuQuad.setLpfCorr(// constants memory - same for all cameras
"lpf_corr", // String const_name, // "lpf_corr"
lpf_flat,
globalDebugLevel > -1);
final float [] lpf_rb_flat = floatGetCltLpfFd(gpu_sigma_rb_corr);
gpuQuad.setLpfCorr(// constants memory - same for all cameras
"lpf_rb_corr", // String const_name, // "lpf_corr"
lpf_rb_flat,
globalDebugLevel > -1);
gpuQuad.setTasks( // copy tp_tasks to the GPU memory
tp_tasks, // TpTask [] tile_tasks,
use_main, // use_aux); // boolean use_aux)
imgdtt_params.gpu_verify); // boolean verify
gpuQuad.execSetTilesOffsets(); // prepare tiles offsets in GPU memory
GPUTileProcessor.TpTask[][] test_tasks = new GPUTileProcessor.TpTask[3][];
test_tasks[2] = tp_tasks;
if ((fdisp_dist != null) || (fpxpy != null)) {
final GPUTileProcessor.TpTask[] tp_tasks_full = gpuQuad.getTasks(use_main);
test_tasks[0] = tp_tasks_full;
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
@Override
public void run() {
for (int indx_tile = ai.getAndIncrement(); indx_tile < tp_tasks_full.length; indx_tile = ai.getAndIncrement()) {
GPUTileProcessor.TpTask task = tp_tasks_full[indx_tile];
if (fdisp_dist != null) {
fdisp_dist[task.getTileY()][task.getTileX()] = task.getDispDist();
}
if (fpxpy != null) {
fpxpy[task.getTileY()][task.getTileX()] = task.getXY(use_main); // boolean use_aux);
}
} // end of tile
}
};
}
startAndJoin(threads);
ai.set(0);
}
if (fpxpy_test != null) {
gpuQuad.execSetTilesOffsets(); // prepare tiles offsets in GPU memory
final GPUTileProcessor.TpTask[] tp_tasks_full = gpuQuad.getTasks(use_main); // reads the same
test_tasks[1] = tp_tasks_full;
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
@Override
public void run() {
for (int indx_tile = ai.getAndIncrement(); indx_tile < tp_tasks_full.length; indx_tile = ai.getAndIncrement()) {
GPUTileProcessor.TpTask task = tp_tasks_full[indx_tile];
if (fpxpy != null) {
fpxpy_test[task.getTileY()][task.getTileX()] = task.getXY(use_main); // boolean use_aux);
}
} // end of tile
}
};
}
startAndJoin(threads);
ai.set(0);
}
// GPUTileProcessor.TpTask[] tp_tasks_full1,
// GPUTileProcessor.TpTask[] tp_tasks_full2)
gpuQuad.execConvertDirect();
if (iclt_fimg != null) {
gpuQuad.execImcltRbgAll(isMonochrome()); // execute GPU kernel
for (int ncam = 0; ncam < iclt_fimg.length; ncam++) {
iclt_fimg[ncam] = gpuQuad.getRBG(ncam); // retrieve data from GPU
}
} else {gpuQuad.execImcltRbgAll(isMonochrome());} // just for testing
// does it need texture tiles to be output?
if (texture_img != null) {
Rectangle woi = new Rectangle(); // will be filled out to match actual available image
gpuQuad.execRBGA(
col_weights, // double [] color_weights,
isLwir(), // boolean is_lwir,
min_shot, // double min_shot, // 10.0
scale_shot, // double scale_shot, // 3.0
diff_sigma, // double diff_sigma, // pixel value/pixel change
diff_threshold, // double diff_threshold, // pixel value/pixel change
min_agree, // double min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
dust_remove); // boolean dust_remove,
float [][] rbga = gpuQuad.getRBGA(
(isMonochrome() ? 1 : 3), // int num_colors,
(texture_woi_pix != null)? texture_woi_pix : woi);
for (int ncol = 0; ncol < texture_img.length; ncol++) if (ncol < rbga.length) {
texture_img[ncol] = rbga[ncol];
}
}
// does it need macro data?
if (fneed_macro) {
//Generate non-overlapping (16x16) texture tiles, prepare
gpuQuad.execTextures(
col_weights, // double [] color_weights,
isLwir(), // boolean is_lwir,
min_shot, // double min_shot, // 10.0
scale_shot, // double scale_shot, // 3.0
diff_sigma, // double diff_sigma, // pixel value/pixel change
diff_threshold, // double diff_threshold, // pixel value/pixel change - never used in GPU ?
min_agree, // double min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
dust_remove, // boolean dust_remove, // Do not reduce average weight when only one image differs much from the average
false, // boolean calc_textures,
true); // boolean calc_extra)
float [][] extra = gpuQuad.getExtra();
int num_cams = gpuQuad.getNumCams();
for (int ncam = 0; ncam < num_cams; ncam++) {
int indx = ncam + IMG_DIFF0_INDEX;
if ((disparity_modes & (1 << indx)) != 0){
disparity_map[indx] = new double [extra[ncam].length];
for (int i = 0; i < extra[ncam].length; i++) {
disparity_map[indx][i] = extra[ncam][i];
}
}
}
for (int nc = 0; nc < (extra.length - num_cams); nc++) {
int sindx = nc + num_cams;
int indx = nc + IMG_TONE_RGB;
if ((disparity_modes & (1 << indx)) != 0){
disparity_map[indx] = new double [extra[sindx].length];
for (int i = 0; i < extra[sindx].length; i++) {
disparity_map[indx][i] = extra[sindx][i];
}
}
}
}
// does it need non-overlapping texture tiles
if (texture_tiles != null) { // compatible with the CPU ones
//Generate non-overlapping (16x16) texture tiles, prepare
gpuQuad.execTextures(
col_weights, // double [] color_weights,
isLwir(), // boolean is_lwir,
min_shot, // double min_shot, // 10.0
scale_shot, // double scale_shot, // 3.0
diff_sigma, // double diff_sigma, // pixel value/pixel change
diff_threshold, // double diff_threshold, // pixel value/pixel change - never used in GPU ?
min_agree, // double min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
dust_remove, // boolean dust_remove, // Do not reduce average weight when only one image differs much from the average
true, // boolean calc_textures,
false); // boolean calc_extra)
int [] texture_indices = gpuQuad.getTextureIndices();
int num_src_slices = numcol + 1; // + (clt_parameters.keep_weights?(ports + numcol + 1):0); // 12 ; // calculate
float [] flat_textures = gpuQuad.getFlatTextures( // fatal error has been detected by the Java Runtime Environment:
texture_indices.length,
numcol, // int num_colors,
false); // clt_parameters.keep_weights); // boolean keep_weights);
gpuQuad.doubleTextures(
new Rectangle(0, 0, tilesX, tilesY), // Rectangle woi,
texture_tiles, // double [][][][] texture_tiles, // null or [tilesY][tilesX]
texture_indices, // int [] indices,
flat_textures, // float [][][] ftextures,
tilesX, // int full_width,
4, // rbga only /int num_slices
num_src_slices // int num_src_slices
);
}
// does it need correlations?
if (fneed_corr) {
//Generate 2D phase correlations from the CLT representation
gpuQuad.execCorr2D_TD(col_weights); // Get TD version of correlations (may be read out and saved)
final int [] corr_indices = gpuQuad.getCorrIndices();
if (fcorr_td != null) {
gpuQuad.getCorrTilesTd(fcorr_td); // generate time domain correlation pairs
}
gpuQuad.execCorr2D_normalize(
false, // boolean combo, // normalize combo correlations (false - per-pair ones)
gpu_fat_zero, // double fat_zero);
gpu_corr_rad); // int corr_radius
final float [][] fcorr2D = gpuQuad.getCorr2D(gpu_corr_rad); // int corr_rad);
// Combine 4 ortho pairs
gpuQuad.execCorr2D_combine( // calculate cross pairs
true, // boolean init_corr, // initialize output tiles (false - add to current)
GPUTileProcessor.NUM_PAIRS, // int num_pairs_in, // typically 6 - number of pairs per tile (tile task should have same number per each tile
0x0f); // int pairs_mask // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
if ((fcorr_combo_td != null) && (fcorr_combo_td.length >= 0) && (fcorr_combo_td[0] != null)) {
gpuQuad.getCorrTilesComboTd(fcorr_combo_td[0]); // generate time domain correlation pairs for quad ortho combination
}
// normalize and convert to pixel domain
gpuQuad.execCorr2D_normalize(
true, // boolean combo, // normalize combo correlations (false - per-pair ones)
gpu_fat_zero, // double fat_zero);
gpu_corr_rad); // int corr_radius
final int [] corr_quad_indices = gpuQuad.getCorrComboIndices(); // get quad
final float [][] fcorr2D_quad = gpuQuad.getCorr2DCombo(gpu_corr_rad);
// Combine 2 diagonal pairs
gpuQuad.execCorr2D_combine( // calculate cross pairs
true, // boolean init_corr, // initialize output tiles (false - add to current)
GPUTileProcessor.NUM_PAIRS, // int num_pairs_in, // typically 6 - number of pairs per tile (tile task should have same number per each tile
0x30); // int pairs_mask // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
if ((fcorr_combo_td != null) && (fcorr_combo_td.length >= 1) && (fcorr_combo_td[1] != null)) {
gpuQuad.getCorrTilesComboTd(fcorr_combo_td[1]); // generate time domain correlation pairs for cross diagonal combination
}
gpuQuad.execCorr2D_normalize(
true, // boolean combo, // normalize combo correlations (false - per-pair ones)
gpu_fat_zero, // double fat_zero);
gpu_corr_rad); // int corr_radius
final float [][] fcorr2D_cross = gpuQuad.getCorr2DCombo(gpu_corr_rad);
// Combine 2 horizontal pairs
gpuQuad.execCorr2D_combine( // calculate cross pairs
true, // boolean init_corr, // initialize output tiles (false - add to current)
GPUTileProcessor.NUM_PAIRS, // int num_pairs_in, // typically 6 - number of pairs per tile (tile task should have same number per each tile
0x03); // int pairs_mask // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
if ((fcorr_combo_td != null) && (fcorr_combo_td.length >= 2) && (fcorr_combo_td[2] != null)) {
gpuQuad.getCorrTilesComboTd(fcorr_combo_td[2]); // generate time domain correlation pairs for horizontal combination
}
gpuQuad.execCorr2D_normalize(
true, // boolean combo, // normalize combo correlations (false - per-pair ones)
gpu_fat_zero, // double fat_zero);
gpu_corr_rad); // int corr_radius
final float [][] fcorr2D_hor = gpuQuad.getCorr2DCombo(gpu_corr_rad);
// Combine 2 vertical pairs
gpuQuad.execCorr2D_combine( // calculate cross pairs
true, // boolean init_corr, // initialize output tiles (false - add to current)
GPUTileProcessor.NUM_PAIRS, // int num_pairs_in, // typically 6 - number of pairs per tile (tile task should have same number per each tile
0x0c); // int pairs_mask // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
if ((fcorr_combo_td != null) && (fcorr_combo_td.length >= 3) && (fcorr_combo_td[3] != null)) {
gpuQuad.getCorrTilesComboTd(fcorr_combo_td[3]); // generate time domain correlation pairs for vertical combination
}
gpuQuad.execCorr2D_normalize(
true, // boolean combo, // normalize combo correlations (false - per-pair ones)
gpu_fat_zero, // double fat_zero);
gpu_corr_rad); // int corr_radius
final float [][] fcorr2D_vert = gpuQuad.getCorr2DCombo(gpu_corr_rad);
if (corr_indices.length > 0) {
/*
if (true) { // debugging only
int [] wh = new int[2];
double [][] dbg_corr = GPUTileProcessor.getCorr2DView(
tilesX,
tilesY,
corr_indices,
fcorr2D,
wh);
(new ShowDoubleFloatArrays()).showArrays(
dbg_corr,
wh[0],
wh[1],
true,
"dbg-corr2D-initial", // name+"-CORR2D-D"+clt_parameters.disparity,
GPUTileProcessor.getCorrTitles());
}
*/
final int corr_length = fcorr2D[0].length;// all correlation tiles have the same size
// assuming that the correlation pairs sets are the same for each tile that has correlations
// find this number
int nt0 = (corr_indices[0] >> GPUTileProcessor.CORR_NTILE_SHIFT);
int nc0 = 1;
for (int i = 1; (i < corr_indices.length) && ((corr_indices[i] >> GPUTileProcessor.CORR_NTILE_SHIFT) == nt0) ; i++) {
nc0++;
}
final int num_tile_corr = nc0; // normally 6
final int num_tiles = corr_indices.length / num_tile_corr;
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
@Override
public void run() {
// int tileY,tileX,tIndex;
// double [][] corrs = new double [GPUTileProcessor.NUM_PAIRS][corr_length]; // 225-long (15x15)
Correlation2d corr2d = new Correlation2d(
imgdtt_params, // ImageDttParameters imgdtt_params,
transform_size, // int transform_size,
2.0, // double wndx_scale, // (wndy scale is always 1.0)
isMonochrome(), // boolean monochrome,
(globalDebugLevel > -1)); // boolean debug)
corr2d.createOrtoNotch(
imgdtt_params.getEnhOrthoWidth(isAux()), // double getEnhOrthoWidth(isAux()),
imgdtt_params.getEnhOrthoScale(isAux()), //double getEnhOrthoScale(isAux()),
(imgdtt_params.lma_debug_level > 1)); // boolean debug);
for (int indx_tile = ai.getAndIncrement(); indx_tile < num_tiles; indx_tile = ai.getAndIncrement()) {
// double [][] corrs = new double [GPUTileProcessor.NUM_PAIRS][corr_length]; // 225-long (15x15)
// added quad and cross combos
double [][] corrs = new double [GPUTileProcessor.NUM_PAIRS + 4][corr_length]; // 225-long (15x15)
int indx_corr = indx_tile * num_tile_corr;
int nt = (corr_indices[indx_corr] >> GPUTileProcessor.CORR_NTILE_SHIFT);
int tileX = nt % tilesX;
int tileY = nt / tilesX;
int tIndex = tileY * tilesX + tileX;
// Prepare the same (currently 10-layer) corrs as double [][], as in CPU version
int pair_mask = 0;
for (int indx_pair = 0; indx_pair < num_tile_corr; indx_pair++) {
int pair = corr_indices[indx_corr] & GPUTileProcessor.CORR_PAIRS_MASK; // ((1 << CORR_NTILE_SHIFT) - 1); // np should
assert pair < GPUTileProcessor.NUM_PAIRS : "invalid correllation pair";
pair_mask |= (1 << pair);
for (int i = 0; i < corr_length; i++) {
corrs[pair][i] = gpu_corr_scale * fcorr2D[indx_corr][i]; // from float to double
}
indx_corr++;
}
// add 4 combo layers : quad, cross, hor, vert
int pair = GPUTileProcessor.NUM_PAIRS; // 6
nt = (corr_quad_indices[indx_tile] >> GPUTileProcessor.CORR_NTILE_SHIFT); // corr_quad_indices - different sequence
for (int i = 0; i < corr_length; i++) {
corrs[pair][i] = gpu_corr_scale * fcorr2D_quad[indx_tile][i]; // from float to double
}
// indices for cross are the same as for quad
pair++;
for (int i = 0; i < corr_length; i++) {
corrs[pair][i] = gpu_corr_scale * fcorr2D_cross[indx_tile][i]; // from float to double
}
// indices for hor are the same as for quad
pair++;
for (int i = 0; i < corr_length; i++) {
corrs[pair][i] = gpu_corr_scale * fcorr2D_hor[indx_tile][i]; // from float to double
}
// indices for vert are the same as for quad
pair++;
for (int i = 0; i < corr_length; i++) {
corrs[pair][i] = gpu_corr_scale * fcorr2D_vert[indx_tile][i]; // from float to double
}
int used_pairs = pair_mask; // imgdtt_params.dbg_pair_mask; //TODO: use tile tasks
int tile_lma_debug_level = ((tileX == debug_tileX) && (tileY == debug_tileY))? (imgdtt_params.lma_debug_level-1) : -2;
boolean debugTile =(tileX == debug_tileX) && (tileY == debug_tileY) && (globalDebugLevel > -1);
corr_common_GPU(
imgdtt_params, // final ImageDttParameters imgdtt_params,
clt_corr_partial, // final double [][][][][] clt_corr_partial,
used_pairs, // final int used_pairs,
disparity_map, // final double [][] disparity_map,
clt_mismatch, // final double [][] clt_mismatch,
saturation_imp, // final boolean [][] saturation_imp,
fneed_macro, // final boolean fneed_macro,
corr2d, // final Correlation2d corr2d,
corrs, // final double [][] corrs,
tileX, // final int tileX,
tileY, // final int tileY,
max_corr_radius, // final double max_corr_radius, // 3.9;
tile_lma_debug_level, // int tile_lma_debug_level,
debugTile, // boolean debugTile,
globalDebugLevel); // final int globalDebugLevel)
// double extra_disparity = 0.0; // used for textures: if allowed, shift images extra before trying to combine
/*
// Disabled for GPU
if (corr_mode == 0) extra_disparity = disparity_map[DISPARITY_INDEX_INT][tIndex];
else if (corr_mode == 1) extra_disparity = disparity_map[DISPARITY_INDEX_CM][tIndex];
else if (corr_mode == 2) extra_disparity = disparity_map[DISPARITY_INDEX_POLY][tIndex];
else if (corr_mode == 3) extra_disparity = disparity_map[DISPARITY_INDEX_HOR][tIndex]; // not used in lwir
else if (corr_mode == 4) extra_disparity = disparity_map[DISPARITY_INDEX_VERT][tIndex]; // not used in lwir
if (Double.isNaN(extra_disparity)) extra_disparity = 0; // used in lwir
*/
if ((disparity_map != null) && Double.isNaN(disparity_map[DISPARITY_STRENGTH_INDEX][tIndex])) {
System.out.println("BUG: 3. disparity_map[DISPARITY_STRENGTH_INDEX][tIndex] should not be NaN");
}
// only debug is left
// old (per-color correlation)
// removed
} // end of tile
}
};
}
startAndJoin(threads);
} else {
// no correlation tiles to process
}
}
if ((dbg_distort != null) &&(globalDebugLevel >=0)) {
(new ShowDoubleFloatArrays()).showArrays(dbg_distort, tilesX, tilesY, true, "disparity_distortions"); // , dbg_titles);
}
return test_tasks;
// GPUTileProcessor.TpTask[] tp_tasks_full1,
// GPUTileProcessor.TpTask[] tp_tasks_full2)
}
public void clt_process_tl_correlations_GPU( // convert to pixel domain and process correlations already prepared in fcorr_td and/or fcorr_combo_td
final ImageDttParameters imgdtt_params, // Now just extra correlation parameters, later will include, most others
......
......@@ -48,6 +48,7 @@ import com.elphel.imagej.gpu.GPUTileProcessor;
import ij.ImagePlus;
import ij.ImageStack;
public class QuadCLT extends QuadCLTCPU {
int dbg_lev = 1;
private GPUTileProcessor.GpuQuad gpuQuad = null;
public QuadCLT(
String prefix,
......@@ -2314,33 +2315,38 @@ public class QuadCLT extends QuadCLTCPU {
threadsMax,
debugLevel - 2);
} else {// GPU
// First - measure and get fcorr_td, fdisp_dist, fpxpy
// First - measure and get fcorr_td, fdisp_dist, fpxpy
float [][][][] fcorr_td = new float[tilesY][tilesX][][];
float [][][][] fdisp_dist = new float[tilesY][tilesX][][];
float [][][][] fpxpy = new float[tilesY][tilesX][][];
float [][][][] fpxpy_test = new float[tilesY][tilesX][][];
float [][][][] fpxpy_test1 = new float[tilesY][tilesX][][];
float [][][][] fpxpy_test2 = new float[tilesY][tilesX][][];
final double gpu_fat_zero = clt_parameters.getGpuFatZero(isMonochrome());
image_dtt.clt_aberrations_quad_corr_GPU( // USED in LWIR
GPUTileProcessor.TpTask[][] test_tasks0 = image_dtt.clt_aberrations_quad_corr_GPU_test( // USED in LWIR
clt_parameters.img_dtt, // final ImageDttParameters imgdtt_params, // Now just extra correlation parameters, later will include, most others
1, // final int macro_scale, // to correlate tile data instead of the pixel data: 1 - pixels, 8 - tiles
tile_op, // per-tile operation bit codes
disparity_array, // clt_parameters.disparity, // final double disparity,
fcorr_td, // final float [][][][] corr_td, // [tilesY][tilesX][pair][4*64] transform domain representation of 6 corr pairs
null, // final float [][][][] corr_combo_td, // [4][tilesY][tilesX][pair][4*64] TD of combo corrs: qud, cross, hor,vert
// each of the top elements may be null to skip particular combo type
// each of the top elements may be null to skip particular combo type
fdisp_dist, // final float [][][][] fdisp_dist, // [tilesY][tilesX][cams][4], // disparity derivatives vectors or null
fpxpy, // final float [][][][] fpxpy, // [tilesY][tilesX][cams][2], tile {pX,pY}
fpxpy_test, // final float [][][][] fpxpy, // [tilesY][tilesX][cams][2], tile {pX,pY}
fpxpy_test1, // final float [][][][] fpxpy, // [tilesY][tilesX][cams][2], tile {pX,pY}
//// Uses quadCLT from gpuQuad
// correlation results - final and partial
// correlation results - final and partial
null, // [type][tilesY][tilesX][(2*transform_size-1)*(2*transform_size-1)] // if null - will not calculate
// [type][tilesY][tilesX] should be set by caller
// types: 0 - selected correlation (product+offset), 1 - sum
// [type][tilesY][tilesX] should be set by caller
// types: 0 - selected correlation (product+offset), 1 - sum
null, // clt_corr_partial,// [tilesY][tilesX][quad]color][(2*transform_size-1)*(2*transform_size-1)] // if null - will not calculate
// [tilesY][tilesX] should be set by caller
// When clt_mismatch is non-zero, no far objects extraction will be attempted
// [tilesY][tilesX] should be set by caller
// When clt_mismatch is non-zero, no far objects extraction will be attempted
null, // clt_mismatch, // [12][tilesY * tilesX] // ***** transpose unapplied ***** ?. null - do not calculate
// values in the "main" directions have disparity (*_CM) subtracted, in the perpendicular - as is
// values in the "main" directions have disparity (*_CM) subtracted, in the perpendicular - as is
null, // disparity_map, // [8][tilesY][tilesX], only [6][] is needed on input or null - do not calculate
// last 2 - contrast, avg/ "geometric average)
// last 2 - contrast, avg/ "geometric average)
0, // disparity_modes, // disparity_modes, // bit mask of disparity_map slices to calculate/return
null, // final double [][][][] texture_tiles, // compatible with the CPU ones
null, // texture_img, // texture_img, // null or [3][] (RGB) or [4][] RGBA
......@@ -2375,6 +2381,212 @@ public class QuadCLT extends QuadCLTCPU {
threadsMax,
debugLevel);
GPUTileProcessor.TpTask[][] test_tasks1 = image_dtt.clt_aberrations_quad_corr_GPU_test( // USED in LWIR
clt_parameters.img_dtt, // final ImageDttParameters imgdtt_params, // Now just extra correlation parameters, later will include, most others
1, // final int macro_scale, // to correlate tile data instead of the pixel data: 1 - pixels, 8 - tiles
tile_op, // per-tile operation bit codes
disparity_array, // clt_parameters.disparity, // final double disparity,
fcorr_td, // final float [][][][] corr_td, // [tilesY][tilesX][pair][4*64] transform domain representation of 6 corr pairs
null, // final float [][][][] corr_combo_td, // [4][tilesY][tilesX][pair][4*64] TD of combo corrs: qud, cross, hor,vert
// each of the top elements may be null to skip particular combo type
fdisp_dist, // final float [][][][] fdisp_dist, // [tilesY][tilesX][cams][4], // disparity derivatives vectors or null
fpxpy, // final float [][][][] fpxpy, // [tilesY][tilesX][cams][2], tile {pX,pY}
fpxpy_test2, // final float [][][][] fpxpy_test, // [tilesY][tilesX][cams][2], tile {pX,pY}
//// Uses quadCLT from gpuQuad
// correlation results - final and partial
null, // [type][tilesY][tilesX][(2*transform_size-1)*(2*transform_size-1)] // if null - will not calculate
// [type][tilesY][tilesX] should be set by caller
// types: 0 - selected correlation (product+offset), 1 - sum
null, // clt_corr_partial,// [tilesY][tilesX][quad]color][(2*transform_size-1)*(2*transform_size-1)] // if null - will not calculate
// [tilesY][tilesX] should be set by caller
// When clt_mismatch is non-zero, no far objects extraction will be attempted
null, // clt_mismatch, // [12][tilesY * tilesX] // ***** transpose unapplied ***** ?. null - do not calculate
// values in the "main" directions have disparity (*_CM) subtracted, in the perpendicular - as is
null, // disparity_map, // [8][tilesY][tilesX], only [6][] is needed on input or null - do not calculate
// last 2 - contrast, avg/ "geometric average)
0, // disparity_modes, // disparity_modes, // bit mask of disparity_map slices to calculate/return
null, // final double [][][][] texture_tiles, // compatible with the CPU ones
null, // texture_img, // texture_img, // null or [3][] (RGB) or [4][] RGBA
null, // texture_woi, // texture_woi, // null or generated texture location/size
null, // iclt_fimg, // will return quad images or null to skip, use quadCLT.linearStackToColor
clt_parameters.gpu_corr_scale, //.gpu_corr_scale, // gpu_corr_scale, // 0.75; // reduce GPU-generated correlation values
gpu_fat_zero, // final double gpu_fat_zero, // clt_parameters.getGpuFatZero(is_mono);absolute == 30.0\
clt_parameters.gpu_sigma_r, // 0.9, 1.1
clt_parameters.gpu_sigma_b, // 0.9, 1.1
clt_parameters.gpu_sigma_g, // 0.6, 0.7
clt_parameters.gpu_sigma_m, // = 0.4; // 0.7;
(isMonochrome()? 1.0 : clt_parameters.gpu_sigma_rb_corr), // final double gpu_sigma_rb_corr, // = 0.5; // apply LPF after accumulating R and B correlation before G, monochrome ? 1.0 : gpu_sigma_rb_corr;
clt_parameters.gpu_sigma_corr, // = 0.9;gpu_sigma_corr_m
image_dtt.transform_size - 1, // clt_parameters.gpu_corr_rad, // = transform_size - 1 ?
clt_parameters.corr_red, // +used
clt_parameters.corr_blue, // +used
clt_parameters.max_corr_radius,// 3.9;
clt_parameters.corr_mode, // Correlation mode: 0 - integer max, 1 - center of mass, 2 - polynomial
clt_parameters.min_shot, // 10.0; // Do not adjust for shot noise if lower than
clt_parameters.scale_shot, // 3.0; // scale when dividing by sqrt ( <0 - disable correction)
clt_parameters.diff_sigma, // 5.0;//RMS difference from average to reduce weights (~ 1.0 - 1/255 full scale image)
clt_parameters.diff_threshold, // 5.0; // RMS difference from average to discard channel (~ 1.0 - 1/255 full scale image)
clt_parameters.diff_gauss, // true; // when averaging images, use gaussian around average as weight (false - sharp all/nothing)
clt_parameters.min_agree, // 3.0; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
clt_parameters.dust_remove, // Do not reduce average weight when only one image differes much from the average
geometryCorrection, // final GeometryCorrection geometryCorrection,
null, // final GeometryCorrection geometryCorrection_main, // if not null correct this camera (aux) to the coordinates of the main
clt_parameters.clt_window,
disparity_corr, // final double disparity_corr, // disparity at infinity
clt_parameters.tileX, // final int debug_tileX,
clt_parameters.tileY, // final int debug_tileY,
threadsMax,
debugLevel);
if (dbg_lev > 0) {
double [][] dbg_disp = new double [16][tilesX*tilesY];
double [][] dbg_pxy = new double [8] [tilesX*tilesY];
double [][] dbg_pxy_test = new double [8] [tilesX*tilesY];
double [][] dbg_pxy_diff = new double [8] [tilesX*tilesY];
for (int tY = 0; tY<tilesY; tY++) {
for (int tX = 0; tX<tilesX; tX++) {
int indx = tY * tilesX + tX;
if (fdisp_dist[tY][tX] != null) {
for (int ncam = 0; ncam < 4; ncam++) {
for (int i = 0; i < 4; i++) {
dbg_disp[ncam * 4 + i][indx] = fdisp_dist[tY][tX][ncam][i];
}
for (int i = 0; i < 2; i++) {
dbg_pxy[ncam * 2 + i][indx] = fpxpy[tY][tX][ncam][i];
}
}
} else {
for (int ncam = 0; ncam < 4; ncam++) {
for (int i = 0; i < 4; i++) {
dbg_disp[ncam * 4 + i][indx] = Double.NaN;
}
for (int i = 0; i < 2; i++) {
dbg_pxy[ncam * 2 + i][indx] = Double.NaN;
}
}
}
if (fpxpy_test[tY][tX] != null) {
for (int ncam = 0; ncam < 4; ncam++) {
for (int i = 0; i < 2; i++) {
dbg_pxy_test[ncam * 2 + i][indx] = fpxpy_test[tY][tX][ncam][i];
}
}
} else {
for (int ncam = 0; ncam < 4; ncam++) {
for (int i = 0; i < 2; i++) {
dbg_pxy_test[ncam * 2 + i][indx] = Double.NaN;
}
}
}
}
}
double d2 = 0.0;
for (int n = 0; n < dbg_pxy_diff.length; n++) {
for (int i = 0; i < dbg_pxy_diff[n].length; i++) {
dbg_pxy_diff[n][i] = dbg_pxy[n][i] - dbg_pxy_test[n][i];
if (!Double.isNaN(dbg_pxy_diff[n][i])) {
d2+=dbg_pxy_diff[n][i]*dbg_pxy_diff[n][i];
if (dbg_pxy_diff[n][i] != 0.0) {
System.out.println("dbg_pxy - dbg_pxy_test mismatch @ n ="+n+" tile="+i);
// find tasks
int ty = i / tilesX;
int tx = i % tilesX;
System.out.println("tx= "+tx+", ty = "+ty);
// find index in test_tasks0[0]
int t0_indx = -1;
for (int ti = 0; ti < test_tasks0[0].length; ti++) {
GPUTileProcessor.TpTask t0 = test_tasks0[0][ti];
if ((t0.tx == tx) && (t0.ty == ty)) {
t0_indx = ti;
break;
}
}
int t1_indx = -1;
for (int ti = 0; ti < test_tasks1[0].length; ti++) {
GPUTileProcessor.TpTask t1 = test_tasks1[0][ti];
if ((t1.tx == tx) && (t1.ty == ty)) {
t1_indx = ti;
break;
}
}
System.out.println("tx= "+tx+", ty = "+ty+ " t0_indx="+t0_indx+ " t1_indx="+t1_indx);
System.out.print("");
}
}
}
}
System.out.println("(dbg_pxy - dbg_pxy_test) squared = "+d2);
d2 = 0.0;
for (int tY = 0; tY<tilesY; tY++) {
for (int tX = 0; tX<tilesX; tX++) {
int indx = tY * tilesX + tX;
if (fpxpy[tY][tX] != null) {
for (int ncam = 0; ncam < 4; ncam++) {
for (int i = 0; i < 2; i++) {
double diff = fpxpy[tY][tX][ncam][i] - fpxpy_test2[tY][tX][ncam][i];
if (diff != 0.0) {
System.out.println("fpxpy - fpxpy_test2 mismatch @ tile="+indx+" n ="+ncam+" i="+i);
System.out.print("");
}
d2 += diff*diff;
}
}
}
}
}
System.out.println("(fpxpy - fpxpy_test2) squared = "+d2);
d2 = 0.0;
for (int tY = 0; tY<tilesY; tY++) {
for (int tX = 0; tX<tilesX; tX++) {
int indx = tY * tilesX + tX;
if (fpxpy[tY][tX] != null) {
for (int ncam = 0; ncam < 4; ncam++) {
for (int i = 0; i < 2; i++) {
double diff = fpxpy_test[tY][tX][ncam][i] - fpxpy_test1[tY][tX][ncam][i];
if (diff != 0.0) {
System.out.println("fpxpy_test - fpxpy_test1 mismatch @ tile="+indx+" n ="+ncam+" i="+i);
System.out.print("");
}
d2 += diff*diff;
}
}
}
}
}
System.out.println("(fpxpy_test - fpxpy_test1) squared = "+d2);
(new ShowDoubleFloatArrays()).showArrays(
dbg_pxy,
tilesX,
tilesY,
true,
"fpxpy");
(new ShowDoubleFloatArrays()).showArrays(
dbg_pxy_test,
tilesX,
tilesY,
true,
"fpxpy_test");
(new ShowDoubleFloatArrays()).showArrays(
dbg_pxy_diff,
tilesX,
tilesY,
true,
"fpxpy_diff");
(new ShowDoubleFloatArrays()).showArrays(
dbg_disp,
tilesX,
tilesY,
true,
"fdisp_dist");
}
lazy_eye_data = image_dtt.cltMeasureLazyEyeGPU ( // returns d,s lazy eye parameters
clt_parameters.img_dtt, // final ImageDttParameters imgdtt_params, // Now just extra correlation parameters, later will include, most others
disparity_array, // final double [][] disparity_array, // [tilesY][tilesX] - individual per-tile expected disparity
......@@ -2391,6 +2603,51 @@ public class QuadCLT extends QuadCLTCPU {
clt_parameters.tileY, // final int debug_tileY,
threadsMax,
debugLevel);
if (dbg_lev > 0) {
double [][] dbg_disp = new double [16][tilesX*tilesY];
double [][] dbg_pxy = new double [8] [tilesX*tilesY];
for (int tY = 0; tY<tilesY; tY++) {
for (int tX = 0; tX<tilesX; tX++) {
int indx = tY * tilesX + tX;
if (fdisp_dist[tY][tX] != null) {
for (int ncam = 0; ncam < 4; ncam++) {
for (int i = 0; i < 4; i++) {
dbg_disp[ncam * 4 + i][indx] = fdisp_dist[tY][tX][ncam][i];
}
for (int i = 0; i < 2; i++) {
dbg_pxy[ncam * 2 + i][indx] = fpxpy[tY][tX][ncam][i];
}
}
} else {
for (int ncam = 0; ncam < 4; ncam++) {
for (int i = 0; i < 4; i++) {
dbg_disp[ncam * 4 + i][indx] = Double.NaN;
}
for (int i = 0; i < 2; i++) {
dbg_pxy[ncam * 2 + i][indx] = Double.NaN;
}
}
}
}
}
(new ShowDoubleFloatArrays()).showArrays(
dbg_disp,
tilesX,
tilesY,
true,
"fdisp_dist-after");
(new ShowDoubleFloatArrays()).showArrays(
dbg_pxy,
tilesX,
tilesY,
true,
"fpxpy-after");
}
}
scan.setLazyEyeData(lazy_eye_data);
......@@ -2399,6 +2656,7 @@ public class QuadCLT extends QuadCLTCPU {
scan.resetProcessed();
return scan;
}
public void gpuResetCorrVector() {
if (getGPU() != null) {
getGPU().resetGeometryCorrectionVector();
......
......@@ -7904,7 +7904,7 @@ public class QuadCLTCPU {
CLTPass3d scan = tp.clt_3d_passes.get(combo_scan);
// for the second half of runs (always for single run) - limit infinity min/max
boolean debug_actual_LY_derivs = true; // debugLevel > 9;
boolean debug_actual_LY_derivs = debugLevel > 9; // true
if (debug_actual_LY_derivs) {
debugLYDerivatives(
......
......@@ -513,4 +513,41 @@ public class TileNeibs{
return num_total_removed;
}
public boolean [] fillConcave(
boolean [] tiles // should be the same size
) {
boolean [] filled = tiles.clone();
int min_neibs = 4;
ArrayList<Integer> front = new ArrayList<Integer>();
for (int start_indx = 0; start_indx < tiles.length; start_indx++) if (!filled[start_indx]) {
int nneib = 0;
for (int dir = 0; dir < dirs; dir++) {
int id = getNeibIndex(start_indx, dir);
if ((id >= 0) && filled[id]) nneib++;
}
if (nneib >= min_neibs) {
front.add(start_indx); // do not mark yet
filled[start_indx] = true;
while (!front.isEmpty()) {
int seed = front.remove(0);// get oldest element
for (int d = 0; d < dirs; d++){
int indx = getNeibIndex(seed, d);
if ((indx >=0) && !filled[indx]) {
nneib = 0;
for (int dir = 0; dir < dirs; dir++) {
int id = getNeibIndex(indx, dir);
if ((id >= 0) && filled[id]) nneib++;
}
if (nneib >= min_neibs) {
front.add(indx); // do not mark yet
filled[indx] = true;
}
}
}
}
}
}
return filled;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment