Commit 575e240c authored by Andrey Filippov's avatar Andrey Filippov

speeding up

parent 689efe82
......@@ -30,6 +30,8 @@ import ij.ImageStack;
import java.util.concurrent.atomic.AtomicInteger;
import javax.swing.SwingUtilities;
public class DebayerScissors {
// showDoubleFloatArrays SDFA_INSTANCE= new showDoubleFloatArrays();
......@@ -96,7 +98,29 @@ public class DebayerScissors {
final Thread[] threads = newThreadArray(threadsMax);
final AtomicInteger ai = new AtomicInteger(0);
final int numberOfKernels=tilesY*tilesX;
int indx,dx,dy,tx,ty,li;
final int [] nonOverlapSeq = new int[numberOfKernels];
int [] nextFirstFindex=new int[4];
indx = 0;
li=0;
for (dy=0;dy<2;dy++) for (dx=0;dx<2;dx++) {
for (ty=dy; ty < tilesY; ty+=2) for (tx=dx; tx < tilesX; tx+=2){
nonOverlapSeq[indx++] = ty*tilesX + tx;
}
nextFirstFindex[li++] = indx;
}
final AtomicInteger aStopIndex = new AtomicInteger(0);
final long startTime = System.nanoTime();
final AtomicInteger tilesFinishedAtomic = new AtomicInteger(1); // first finished will be 1
for (li = 0; li < nextFirstFindex.length; li++){
aStopIndex.set(nextFirstFindex[li]);
if (li>0){
ai.set(nextFirstFindex[li-1]);
}
// System.out.println("\n=== nextFirstFindex["+li+"] =" + nextFirstFindex[li]+" === ");
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
public void run() {
......@@ -114,17 +138,20 @@ public class DebayerScissors {
debayerParameters.debayerRelativeWidthRedblue, // result red/blue mask mpy by scaled default (square)
debayerParameters.debayerRelativeWidthRedblueMain, // green mask when applied to red/blue, main (center)
debayerParameters.debayerRelativeWidthRedblueClones);// green mask when applied to red/blue, clones
for (int nTile = ai.getAndIncrement(); nTile < numberOfKernels; nTile = ai.getAndIncrement()) {
// for (int nTile0 = ai.getAndIncrement(); nTile0 < numberOfKernels; nTile0 = ai.getAndIncrement()) {
for (int nTile0 = ai.getAndIncrement(); nTile0 < aStopIndex.get(); nTile0 = ai.getAndIncrement()) {
int nTile = nonOverlapSeq[nTile0];
tileY = nTile /tilesX;
tileX = nTile % tilesX;
if (tileX==0) {
if (updateStatus) IJ.showStatus("(1)Reducing sampling aliases, row "+(tileY+1)+" of "+tilesY);
if (tileX < 2) {
int trow=(tileY+((tileY & 1)*tilesY))/2;
if (updateStatus) IJ.showStatus("Reducing sampling aliases, row "+(trow+1)+" of "+tilesY);
// System.out.println("(1)Reducing sampling aliases, row "+(tileY+1)+" of "+tilesY+" ("+nTile+"/"+nTile0+") col="+(tileX+1));
if (globalDebugLevel>2) System.out.println("(1)Reducing sampling aliases, row "+(tileY+1)+" of "+tilesY+" : "+IJ.d2s(0.000000001*(System.nanoTime()-startTime),3));
}
// if ((tileY==yTileDebug) && (tileX==xTileDebug)) this.debugLevel=4;
// else this.debugLevel=wasDebugLevel;
// if ((tileY==yTileDebug) && (tileX==xTileDebug)) this.debugLevel=4;
// else this.debugLevel=wasDebugLevel;
for (chn=0;chn<nChn;chn++){
extractSquareTile( pixels[chn], // source pixel array,
tile[chn], // will be filled, should have correct size before call
......@@ -149,8 +176,8 @@ public class DebayerScissors {
debayerParameters.debayerThreshold, // no high frequencies - use default uniform filter
debayerParameters.debayerGamma, // power function applied to the amplitudes before generating spectral masks
debayerParameters.debayerBonus, // scale far pixels as (1.0+bonus*r/rmax)
debayerParameters.mainToAlias,// relative main/alias amplitudes to enable lixels (i.e. 0.5 means that if alias is >0.5*main, the pixel will be masked out)
debayerParameters.debayerMaskBlur, // for both masks sigma for gaussian blur of the binary masks (<0 -do not use "scissors")
debayerParameters.mainToAlias,// relative main/alias amplitudes to enable pixels (i.e. 0.5 means that if alias is >0.5*main, the pixel will be masked out)
debayerParameters.debayerMaskBlur, // for both masks sigma for Gaussian blur of the binary masks (<0 -do not use "scissors")
debayerParameters.debayerUseScissors, // use "scissors", if false - just apply "diamond" ands "square" with DEBAYER_PARAMETERS.debayerRelativeWidthGreen and DEBAYER_PARAMETERS.debayerRelativeWidthRedblue
((tileY==yTileDebug) && (tileX==xTileDebug))?4:1);
// 1); // internal debug level ((this.debugLevel>2) && (yTile==yTile0) && (xTile==xTile0))?3:1;
......@@ -167,22 +194,32 @@ public class DebayerScissors {
fht_instance.swapQuadrants(tile[chn]);
/* accumulate result */
/*This is synchronized method. It is possible to make threads to write to non-overlapping regions of the outPixles, but as the accumulation
* takes just small fraction of severtal FHTs, it should be OK - reasonable number of threads will spread and not "stay in line"
* takes just small fraction of several FHTs, it should be OK - reasonable number of threads will spread and not "stay in line"
*/
accumulateSquareTile(outPixles[chn], // float pixels array to accumulate tile
//accumulateSquareTile(
nonSyncAccumulateSquareTile (
outPixles[chn], // float pixels array to accumulate tile
tile[chn], // data to accumulate to the pixels array
imgWidth, // width of pixels array
tileX*step, // left corner X
tileY*step); // top corner Y
}
if ((tileY==yTileDebug) && (tileX==xTileDebug) && (SDFA_instance!=null)) SDFA_instance.showArrays (tile.clone(),debayerParameters.size,debayerParameters.size, "B00");
final int numFinished=tilesFinishedAtomic.getAndIncrement();
SwingUtilities.invokeLater(new Runnable() {
public void run() {
IJ.showProgress(numFinished,numberOfKernels);
}
});
}
}
};
}
startAndJoin(threads);
}
if (updateStatus) IJ.showStatus("Reducing sampling aliases DONE");
IJ.showProgress(1.0);
// this.debugLevel=wasDebugLevel;
/* prepare result stack to return */
ImageStack outStack=new ImageStack(imgWidth,imgHeight);
......@@ -193,7 +230,7 @@ public class DebayerScissors {
// if (debayerParameters.showEnergy) {
// SDFA_INSTANCE.showArrays (debayerEnergy,tilesX,tilesY, "Debayer-Energy");
// }
if (globalDebugLevel>0) System.out.println("(1)Reducing sampling aliases done in "+IJ.d2s(0.000000001*(System.nanoTime()-startTime),3));
return outStack;
}
......@@ -273,6 +310,31 @@ public class DebayerScissors {
}
}
}
void nonSyncAccumulateSquareTile(
float [] pixels, // float pixels array to accumulate tile
double [] tile, // data to accumulate to the pixels array
int width, // width of pixels array
int x0, // left corner X
int y0) { // top corner Y
int length=tile.length;
int size=(int) Math.sqrt(length);
int i,j,x,y;
int height=pixels.length/width;
int index=0;
for (i=0;i<size;i++) {
y=y0+i;
if ((y>=0) && (y<height)) {
index=i*size;
for (j=0;j<size;j++) {
x=x0+j;
if ((x>=0) && (x<width)) pixels[y*width+x]+=tile [index];
index++;
}
}
}
}
synchronized void accumulateSquareTile(
double [] pixels, // float pixels array to accumulate tile
double [] tile, // data to accumulate to the pixels array
......
......@@ -39,6 +39,8 @@ import ij.process.ImageProcessor;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicInteger;
import javax.swing.SwingUtilities;
import loci.common.services.DependencyException;
import loci.common.services.ServiceException;
import loci.formats.FormatException;
......@@ -473,7 +475,7 @@ public class EyesisCorrections {
this.defectsDiff[srcChannel]=this.pixelMapping.getDefectsDiff(srcChannel);
if (this.debugLevel>0){
if (this.defectsXY[srcChannel]==null){
System.out.println("No pixel defects info is availabele for channel "+srcChannel);
System.out.println("No pixel defects info is available for channel "+srcChannel);
} else {
System.out.println("Extracted "+this.defectsXY[srcChannel].length+" pixel outlayers for channel "+srcChannel+
" (x:y:difference");
......@@ -1712,14 +1714,31 @@ public class EyesisCorrections {
final AtomicInteger ai = new AtomicInteger(0);
final int numberOfKernels= tilesY*tilesX*nChn;
final int numberOfKernelsInChn=tilesY*tilesX;
// if (globalDebugLevel>1)
System.out.println("Eyesis_Correction:convolveStackWithKernelStack :\n"+
int ichn,indx,dx,dy,tx,ty,li;
final int [] nonOverlapSeq = new int[numberOfKernels];
int [] nextFirstFindex=new int[16*nChn];
indx = 0;
li=0;
for (ichn=0;ichn<nChn;ichn++) for (dy=0;dy<4;dy++) for (dx=0;dx<4;dx++) {
for (ty=dy; ty < tilesY; ty+=4) for (tx=dx; tx < tilesX; tx+=4){
nonOverlapSeq[indx++] = ichn*numberOfKernelsInChn+ ty*tilesX + tx;
}
nextFirstFindex[li++] = indx;
}
final AtomicInteger aStopIndex = new AtomicInteger(0);
final AtomicInteger tilesFinishedAtomic = new AtomicInteger(1); // first finished will be 1
if (globalDebugLevel>1)
System.out.println("Eyesis_Corrections:convolveStackWithKernelStack :\n"+
"globalDebugLevel="+globalDebugLevel+"\n"+
"imgWidth="+imgWidth+"\n"+
"imgHeight="+imgHeight+"\n"+
"tilesX="+tilesX+"\n"+
"tilesY="+tilesY+"\n"+
"nChn="+nChn+"\n"+
"step="+step+"\n"+
"size="+size+"\n"+
"kernelSize="+kernelSize+"\n"+
"kernelWidth="+kernelWidth+"\n"+
"kernelNumHor="+kernelNumHor+"\n"+
......@@ -1727,6 +1746,12 @@ public class EyesisCorrections {
final long startTime = System.nanoTime();
for (li = 0; li < nextFirstFindex.length; li++){
aStopIndex.set(nextFirstFindex[li]);
if (li>0){
ai.set(nextFirstFindex[li-1]);
}
// System.out.println("\n=== nextFirstFindex["+li+"] =" + nextFirstFindex[li]+" === ");
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
public void run() {
......@@ -1738,15 +1763,19 @@ public class EyesisCorrections {
double [] doubleKernel= new double[size * size];
int chn,tileY,tileX;
int chn0=-1;
// double debug_sum;
// int i;
// double debug_sum;
// int i;
DoubleFHT fht_instance =new DoubleFHT(); // provide DoubleFHT instance to save on initializations (or null)
for (int nTile = ai.getAndIncrement(); nTile < numberOfKernels; nTile = ai.getAndIncrement()) {
// for (int nTile0 = ai.getAndIncrement(); nTile0 < numberOfKernels; nTile0 = ai.getAndIncrement()) {
for (int nTile0 = ai.getAndIncrement(); nTile0 < aStopIndex.get(); nTile0 = ai.getAndIncrement()) {
//aStopIndex
int nTile = nonOverlapSeq[nTile0];
chn=nTile/numberOfKernelsInChn;
tileY =(nTile % numberOfKernelsInChn)/tilesX;
tileX = nTile % tilesX;
if (tileX==0) {
if (updateStatus) IJ.showStatus("Convolving image with kernels, channel "+(chn+1)+" of "+nChn+", row "+(tileY+1)+" of "+tilesY);
if (tileX < 4) {
int trow=(tileY+ ((tileY & 3) * tilesY))/4;
if (updateStatus) IJ.showStatus("Convolving image with kernels, channel "+(chn+1)+" of "+nChn+", row "+(trow+1)+" of "+tilesY);
if (globalDebugLevel>2) System.out.println("Processing kernels, channel "+(chn+1)+" of "+nChn+", row "+(tileY+1)+" of "+tilesY+" : "+IJ.d2s(0.000000001*(System.nanoTime()-startTime),3));
}
......@@ -1777,9 +1806,9 @@ public class EyesisCorrections {
tileY); // vertical number of kernel to extract
/* zero pad twice the original size*/
doubleKernel=extendFFTInputTo (kernel, size);
// debug_sum=0;
// for (i=0;i<doubleKernel.length;i++) debug_sum+=doubleKernel[i];
// if (globalDebugLevel>1) System.out.println("kernel sum="+debug_sum);
// debug_sum=0;
// for (i=0;i<doubleKernel.length;i++) debug_sum+=doubleKernel[i];
// if (globalDebugLevel>1) System.out.println("kernel sum="+debug_sum);
//if ((tileY==tilesY/2) && (tileX==tilesX/2)) SDFA_INSTANCE.showArrays(doubleKernel,size,size, "doubleKernel-"+chn);
/* FHT transform of the kernel */
......@@ -1793,25 +1822,43 @@ public class EyesisCorrections {
/* accumulate result */
//if ((tileY==tilesY/2) && (tileX==tilesX/2)) SDFA_INSTANCE.showArrays(outTile,size,size, "out-"+chn);
/*This is synchronized method. It is possible to make threads to write to non-overlapping regions of the outPixels, but as the accumulation
* takes just small fraction of severtal FHTs, it should be OK - reasonable number of threads will spread and not "stay in line"
* takes just small fraction of several FHTs, it should be OK - reasonable number of threads will spread and not "stay in line"
*/
accumulateSquareTile(outPixels[chn], // float pixels array to accumulate tile
// Add smart synchronization - wait only if is too far ahead. First test - no synchronization at all
//accumulateSquareTile(
// System.out.print(tileY+":"+tileX+"/"+chn+"("+nTile0+"/"+nTile+") ");
// if (tileX < 4)System.out.println();
nonSyncAccumulateSquareTile(
outPixels[chn], // float pixels array to accumulate tile
outTile, // data to accumulate to the pixels array
imgWidth, // width of pixels array
(tileX-1)*step, // left corner X
(tileY-1)*step); // top corner Y
final int numFinished=tilesFinishedAtomic.getAndIncrement();
SwingUtilities.invokeLater(new Runnable() {
public void run() {
IJ.showProgress(numFinished,numberOfKernels);
}
});
//numberOfKernels
}
}
};
}
startAndJoin(threads);
if (globalDebugLevel > 1) System.out.println("Threads done at "+IJ.d2s(0.000000001*(System.nanoTime()-startTime),3));
}
if (updateStatus) IJ.showStatus("Convolution DONE");
if (globalDebugLevel > 1) System.out.println("Threads done in "+IJ.d2s(0.000000001*(System.nanoTime()-startTime),3));
IJ.showProgress(1.0);
/* prepare result stack to return */
ImageStack outStack=new ImageStack(imgWidth,imgHeight);
for (i=0;i<nChn;i++) {
outStack.addSlice(imageStack.getSliceLabel(i+1), outPixels[i]);
}
if (globalDebugLevel > 0) System.out.println("Convolution done in "+IJ.d2s(0.000000001*(System.nanoTime()-startTime),3));
return outStack;
}
/* Adds zero pixels around the image, "extending canvas" */
......@@ -1942,6 +1989,32 @@ public class EyesisCorrections {
}
}
}
void nonSyncAccumulateSquareTile(
float [] pixels, // float pixels array to accumulate tile
double [] tile, // data to accumulate to the pixels array
int width, // width of pixels array
int x0, // left corner X
int y0) { // top corner Y
int length=tile.length;
int size=(int) Math.sqrt(length);
int i,j,x,y;
int height=pixels.length/width;
int index=0;
for (i=0;i<size;i++) {
y=y0+i;
if ((y>=0) && (y<height)) {
index=i*size;
for (j=0;j<size;j++) {
x=x0+j;
if ((x>=0) && (x<width)) pixels[y*width+x]+=tile [index];
index++;
}
}
}
}
synchronized void accumulateSquareTile(
double [] pixels, // float pixels array to accumulate tile
double [] tile, // data to accumulate to the pixels array
......
......@@ -3967,7 +3967,7 @@ private Panel panel1,panel2,panel3,panel4,panel5,panel5a, panel6,panel7,panelPos
final AtomicInteger ai = new AtomicInteger(0);
final int numberOfKernels= tilesY*tilesX*nChn;
final int numberOfKernelsInChn=tilesY*tilesX;
// if (MASTER_DEBUG_LEVEL>1)
if (MASTER_DEBUG_LEVEL>1)
System.out.println("Eyesis_Correction:convolveStackWithKernelStack :\n"+
"MASTER_DEBUG_LEVEL="+MASTER_DEBUG_LEVEL+"\n"+
"imgWidth="+imgWidth+"\n"+
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment