package com.elphel.imagej.orthomosaic;

import java.awt.Rectangle;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;

import com.elphel.imagej.common.DoubleGaussianBlur;
import com.elphel.imagej.common.ShowDoubleFloatArrays;
import com.elphel.imagej.tileprocessor.ImageDtt;
import com.elphel.imagej.tileprocessor.TileNeibs;
import com.elphel.imagej.tileprocessor.TileProcessor;

import ij.ImagePlus;
import ij.ImageStack;
import ij.Prefs;
import ij.io.FileSaver;

public class OrangeTest {
	public static double [][][] matching_points_initial= {
			{ // 1697877488_162849 - 117 sfm 37.1
				{ 110.500, 452.500},  
				{ 287.500, 121.500},  
				{1294.833, 545.833},
				{1307.500, 162.167}				
			},
			{ // 1697877490_930437 -  42 sfm  6.4
				{ 410.167, 979.833},
				{ 585.500, 649.500},
				{1579.167,1126.833},
				{1604.750, 737.000}				
			},
			{ // 1697877491_613999 -  76 sfm 12.2 0, 76
				{ 423.500, 1260.250},
				{ 598.833,  937.500},
				{1587.000, 1404.000},
				{1613.167, 1017.167}				
			},
			{ // 1697877491_997460 -  85 sfm 14.2 master
				{ 464.000, 1551.000},
				{ 617.167, 1222.500},
				{1630.167, 1624.500},
				{1630.250, 1239.500}				
			},
			{ // 1697877492_314232 -  43 sfm 5.1
				{ 637.000, 1830.000},
				{ 763.167, 1489.167},
				{1807.167, 1807.167},
				{1772.833, 1420.500}				
			},
			{ // 1697877492_614332 -  38 sfm 4.2
				{ 731.000, 1952.500},
				{ 828.500, 1606.500},
				{1893.000, 1829.500},
				{1826.500, 1453.000}				
			}
	};
	
	public static double [][][] matching_points_first_corr= { // /media/elphel/SSD3-4GB/lwir16-proc/berdich3/debug/agent_orange/terrain-merged/02-first_point_correction/
			{ // 1697877488_162849 - 117 sfm 37.1
				{ 105.500, 452.500},  
				{ 289.500, 121.500},  
				{1298.833, 550.833},
				{1305.500, 164.167}				
			},
			{ // 1697877490_930437 -  42 sfm  6.4
				{ 410.167, 981.833},
				{ 585.500, 651.500},
				{1579.167,1128.833},
				{1606.750, 741.000}				
			},
			{ // 1697877491_613999 -  76 sfm 12.2 0, 76
				{ 419.500, 1260.250},
				{ 596.833,  937.500},
				{1585.000, 1404.000},
				{1613.167, 1019.167}				
			},
			{ // 1697877491_997460 -  85 sfm 14.2 MASTER
				{ 464.000, 1551.000},
				{ 617.167, 1222.500},
				{1630.167, 1624.500},
				{1630.250, 1239.500}				
			},
			{ // 1697877492_314232 -  43 sfm 5.1
				{ 637.000, 1828.000},
				{ 765.167, 1489.167},
				{1805.167, 1805.167},
				{1770.833, 1422.500}				
			},
			{ // 1697877492_614332 -  38 sfm 4.2
				{ 733.000, 1950.500},
				{ 832.500, 1604.500},
				{1891.000, 1829.500},
				{1826.500, 1455.000}				
			}
	};
	public static double [][][] matching_points_second_corr= { // /media/elphel/SSD3-4GB/lwir16-proc/berdich3/debug/agent_orange/terrain-merged/02-first_point_correction/
			{ // 1697877488_162849 - 117 sfm 37.1
				{ 106.500, 453.500},  //+1,+1
				{ 290.500, 123.500},  //+1,+2
				{1299.833, 551.833},  //+1,+1
				{1305.500, 164.667}   // 0,+.5				
			},
			{ // 1697877490_930437 -  42 sfm  6.4
				{ 411.167, 981.833},  //+1,0
				{ 585.500, 652.500},  // 0,+1
				{1579.167,1128.833},  // 0, 0
				{1606.750, 741.500}	  // 0,+.5			
			},
			{ // 1697877491_613999 -  76 sfm 12.2 0, 76
				{ 420.500, 1260.250}, //+1, 0
				{ 596.833,  937.500}, // 0, 0
				{1586.000, 1404.000}, //+1, 0
				{1613.167, 1019.667}  // 0,+.5				
			},
			{ // 1697877491_997460 -  85 sfm 14.2 MASTER
				{ 464.000, 1551.000},
				{ 617.167, 1222.500},
				{1630.167, 1624.500},
				{1630.250, 1239.500}				
			},
			{ // 1697877492_314232 -  43 sfm 5.1
				{ 637.000, 1828.000}, // 0, 0
				{ 765.167, 1489.167}, // 0, 0
				{1805.167, 1805.167}, //+1,+1
				{1770.833, 1422.500}  // 0,+1				
			},
			{ // 1697877492_614332 -  38 sfm 4.2
				{ 732.000, 1950.500}, //-1, 0
				{ 831.500, 1604.500}, //-1, 0
				{1892.000, 1830.500}, //+1,+1
				{1826.000, 1456.000}  //-.5,+1				
			}
	};

	public static int NUM_SPACE_KERNELS = 9;
	public static String [] src_files = {
		"/media/elphel/SSD3-4GB/lwir16-proc/berdich3/debug/agent_orange/terrain-merged/1697877488_162849-TERRAIN-MERGED.tiff",
		"/media/elphel/SSD3-4GB/lwir16-proc/berdich3/debug/agent_orange/terrain-merged/1697877490_930437-TERRAIN-MERGED.tiff",
		"/media/elphel/SSD3-4GB/lwir16-proc/berdich3/debug/agent_orange/terrain-merged/1697877491_613999-TERRAIN-MERGED.tiff",
		"/media/elphel/SSD3-4GB/lwir16-proc/berdich3/debug/agent_orange/terrain-merged/1697877491_997460-TERRAIN-MERGED.tiff",
		"/media/elphel/SSD3-4GB/lwir16-proc/berdich3/debug/agent_orange/terrain-merged/1697877492_314232-TERRAIN-MERGED.tiff",
		"/media/elphel/SSD3-4GB/lwir16-proc/berdich3/debug/agent_orange/terrain-merged/1697877492_614332-TERRAIN-MERGED.tiff"};
	public static double [] value_offsets = {33.0,  0.0,  0.0,  0.0,  0.0, 0.0};
//	public static String merged_file = "/media/elphel/SSD3-4GB/lwir16-proc/berdich3/debug/agent_orange/terrain-merged/04-third-corr/merged_seq_scale2.0-slices260_x92_y280_w320_h120_zeronan.tiff";
	public static String merged_file = "/media/elphel/SSD3-4GB/lwir16-proc/berdich3/debug/agent_orange/terrain-merged/04-third-corr/merged_weighted/merged_seq_scale2.0-slices260_x92_y280_w320_h120_zeronan.tiff";
	public static String convolutions_dir = "/media/elphel/SSD3-4GB/lwir16-proc/berdich3/debug/agent_orange/terrain-merged/04-third-corr/convolutions/";
	public static double points_scale = 4.0;

	public static double [][][] matching_points= { // /media/elphel/SSD3-4GB/lwir16-proc/berdich3/debug/agent_orange/terrain-merged/02-first_point_correction/
			{ // 1697877488_162849 - 117 sfm 37.1
				{ 105.500, 453.500},  //-1, 0
				{ 292.500, 123.500},  //+2, 0
				{1299.833, 552.333},  // 0, +.5
				{1304.500, 164.667}   //-1, 0				
			},
			{ // 1697877490_930437 -  42 sfm  6.4
				{ 411.167, 982.833},  // 0,+1
				{ 585.500, 653.500},  // 0,+1
				{1578.167,1128.833},  //-1, 0
				{1606.750, 741.500}	  // 0, 0			
			},
			{ // 1697877491_613999 -  76 sfm 12.2 0, 76
				{ 421.000, 1260.250}, //+.5,0
				{ 596.833,  937.500}, // 0, 0
				{1586.000, 1404.000}, // 0, 0
				{1613.167, 1020.167}  // 0,+.5				
			},
			{ // 1697877491_997460 -  85 sfm 14.2 MASTER
				{ 464.000, 1551.000},
				{ 617.167, 1222.500},
				{1630.167, 1624.500},
				{1630.250, 1239.500}				
			},
			{ // 1697877492_314232 -  43 sfm 5.1
				{ 637.000, 1828.000}, // 0, 0
				{ 764.667, 1489.167}, //-.5,0 
				{1806.167, 1806.167}, //+1,+1
				{1770.833, 1422.000}  //0,-.5				
			},
			{ // 1697877492_614332 -  38 sfm 4.2
				{ 732.000, 1950.000}, // 0,-.5
				{ 830.500, 1604.500}, //-1, 0
				{1893.000, 1830.500}, //+1, 0
				{1826.000, 1455.000}  // 0,-1				
			}
	};
	public static int [][]  full_segments =   {{ 0,117},{0,42},{0,76},{ 0,85},{ 0,43},{ 0,38}};
	public static int [][]  match_segments =  {{ 0,117},{0, 7},{0,76},{62,85},{24,43},{20,38}}; // maybe improve to cut symmetrically?
	public static int [][]  single_segments = {{60,61},{0,1},{0,1},{0,1},{0,1},{0,1}};
	
	public static double [][] patterns_integrate0 = {
			{1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0}, // first scale
			{1.1, 1.1, 1.1, 1.1, 0.0, 0.0, 0.0, 0.0, 0.0}, // second scale
			{1.2, 1.2, 1.2, 1.2, 0.0, 0.0, 0.0, 0.0, 0.4}  // third scale
	};
	public static double [][] patterns_integrate1 = {
			{1.0, 1.0, 1.0, 1.0, 0.65, 0.65, 0.65, 0.65, 0.0}, // first scale
			{0.0, 0.0, 0.0, 0.0, 0.0,  0.0,  0.0,  0.0,  0.0}, // second scale
			{1.3, 1.3, 1.3, 1.3, 0.85, 0.85, 0.85, 0.85, 0.0}  // third scale
	};

	public static double [][] patterns_integrate = {  // only sharp
			{1.0, 1.0, 1.0, 1.0, 0.65, 0.65, 0.65, 0.65, 0.0}, // first scale
			{0.0, 0.0, 0.0, 0.0, 0.0,  0.0,  0.0,  0.0,  0.0}, // second scale
			{0.0, 0.0, 0.0, 0.0, 0.0,  0.0,  0.0,  0.0,  0.0}  // third scale
	};

	
	public static void processMerged() {
		boolean  save_convolutions =       true;
		boolean  convolve_and_exit =      true;
		boolean  use_saved_convolutions = true; // false; // true;
		boolean  repeat_border=   true;
		boolean  show_orig_dt =   false; // true; // false;
		boolean  show_derivs_dt = true;
		boolean  show_kernels =   true; // false; // true;
		boolean  full_convolves_only = true;
		
		boolean  show_pre_best =   true; // false;
		boolean  show_best     =   true;
		boolean  show_best_temp  = true;
		boolean  show_result =     true;

		
		int      num_vert = 1; //4;
		int      vgap =    0; // 10;
		double [][] time_rt_scales =  {{2.9,2.9},{2.9,4.9},{4.9,4.9}, {9.9,2.9}};
		double [][] space_rt_scales = {{2.9,2.9},{2.9,4.9},{4.9,4.9}};
		double []   blur3d =          {2.9, 4.9}; // blur result weights before application 
		int         num_tscales = time_rt_scales.length;
		int         num_sscales = space_rt_scales.length;
		
		double   frac_outliers = 0.1;
		String conv_dir = save_convolutions ? convolutions_dir : null;

		// Create kernels
		double [][][][] kernels_diff_time= new double [num_tscales][][][];
		for (int nk = 0; nk < num_tscales; nk++) {
			kernels_diff_time[nk] = kernelDiffTime(
					time_rt_scales[nk][0], //double rt,
					time_rt_scales[nk][1]); // double rxy)
			if (show_kernels) {
				showKernel(
						kernels_diff_time[nk], // double [][][] kernel,
						"kernel_diff_time_num"+ nk + "_rt"+time_rt_scales[nk][0]+"_rxy"+time_rt_scales[nk][1]); // String title)
			}
		}
		System.out.println("Created kernels");
		double [][][][][] space_kernels= new double[num_sscales][NUM_SPACE_KERNELS][][][];
		for (int nk = 0; nk < num_sscales; nk++) {
			for (int dir = 0; dir < space_kernels[nk].length; dir++) {
				space_kernels[nk][dir]=  kernelXY(
						dir,        // int    dir, // 0: +y, 1: +x+y, 2:+x, 3 +x-t, 4: 0: +center will be voting 1 of 10 (each plus and minus), weighted by abs(kernelDiffTime)
						space_rt_scales[nk][0],   // double rt,
						space_rt_scales[nk][1]); // double rxy)
			}
			if (show_kernels) {
				showKernels(
						space_kernels[nk], // double [][][][] kernels,
						"space_kernels_num"+nk+"_rt"+space_rt_scales[nk][0]+"_rxy"+space_rt_scales[nk][1]); // String title)
			}

		}
		double [][][] blur_kernel =  kernelBlur3d(
				blur3d[0],  // double rt,
				blur3d[1]); // 		double rxy)
		if (show_kernels) {
			showKernels(
					new double [][][][] {blur_kernel}, // double [][][][] kernels,
					"blur_kernel_rt"+blur3d[0]+"_rxy"+blur3d[1]); // String title)
		}
		
		// Read source data
		ImagePlus imp_merged = new ImagePlus(merged_file);
		ImageStack stack = imp_merged.getImageStack();
        final int width = imp_merged.getWidth();
        final int height = imp_merged.getHeight();
		final double [][] dpixels_all = new double [stack.getSize()][];
		String [] titles_spatial_all = new String[stack.getSize()];
		for (int i = 0; i < stack.getSize(); i++) {
			titles_spatial_all[i] = stack.getSliceLabel(i+1); 
		}
		String [] titles_spatial = new String [titles_spatial_all.length - 2];
		System.arraycopy(titles_spatial_all, 0, titles_spatial, 0, titles_spatial.length);
		for (int i = 0; i < dpixels_all.length; i++) {
			float [] fpixels = (float[]) stack.getPixels(i+1);
			dpixels_all[i] = new double[fpixels.length];
			for (int j = 0; j < fpixels.length; j++) {
				dpixels_all[i][j] = fpixels[j];
			}
		}
		
		String [] titles_temporal = new String[dpixels_all[0].length/width]; // height
		for (int t = 0; t < titles_temporal.length; t++) {
			if (num_vert == 1) {
				titles_temporal[t]=String.format("row %d",t);
			} else {
				titles_temporal[t]=String.format("rows: %d-%d",t*num_vert,(t+1)*num_vert-1);
			}
		}
		String [] titles_pattern = new String [num_sscales * 2 * NUM_SPACE_KERNELS];
		for (int nscale = 0; nscale < num_sscales; nscale++) {
			for (int nsign = 0; nsign<2; nsign++) {
				for (int npatt = 0; npatt <NUM_SPACE_KERNELS; npatt++) {
					int indx = npatt + nsign * NUM_SPACE_KERNELS + nscale * (NUM_SPACE_KERNELS * 2);
					titles_pattern[indx] = String.format("%d: scale=%d, sign=%d, patt=%d", indx, nscale, nsign, npatt);
				}
			}
		}
		
		
		
		
		if (show_orig_dt) { 
			showOrigDt (
					dpixels_all,      // final double [][] multi_image,
					width, // final int         width,
					num_vert,     // final int         num_vert,
					vgap);        // final int         vgap)
		}
		final int remove_last = 2;
		final double [][] dpixels = new double[dpixels_all.length - remove_last][];
		System.arraycopy(dpixels_all, 0, dpixels, 0, dpixels.length);
		final double [] dpixels_avg =    dpixels_all[dpixels_all.length - 2];
		final double [] dpixels_weight = dpixels_all[dpixels_all.length - 1];
		System.out.println("Read source data");
		// Convolve multi-scale temporal derivative kernels with source data
		double [][][]   time_derivs = new double[num_tscales][][];
		double [][][][] space_conv = new double[num_sscales][NUM_SPACE_KERNELS][][];
		
		String title_space_conv = "space_conv";
		for (int nk = 0; nk < num_sscales; nk++) {
			title_space_conv += "_"+space_rt_scales[nk][0]+"-"+space_rt_scales[nk][1];
		}

		String title_time_derivs = "time_derivs";
		int num_temp_scales = num_tscales;
		num_temp_scales = 3; //////////////////// Temporary
		for (int nk = 0; nk < num_temp_scales; nk++) {
			title_time_derivs += "_"+time_rt_scales[nk][0]+"-"+time_rt_scales[nk][1];
		}
		
		
		if (use_saved_convolutions) {
			String dir = conv_dir;
			if (!dir.endsWith(Prefs.getFileSeparator())){
				dir+=Prefs.getFileSeparator();
			}
			double [][][]   t_d = restoreTemporalConvolutions(dir + title_time_derivs+".tiff");
			double [][][][] s_c = restoreSpaceConvolutions   (dir + title_space_conv+".tiff");
			System.arraycopy(t_d, 0, time_derivs, 0, t_d.length);
			System.arraycopy(s_c, 0, space_conv,  0, s_c.length);
		} else {
			for (int nk = 0; nk < num_tscales; nk++) {
				time_derivs[nk] = convolve3d(
						dpixels,               // final double [][]   data_in,        // fill NaN before running, will treat NaN as 0 on input, skip if center is NaN
						width,                 // final int           width,
						kernels_diff_time[nk], // final double [][][] kernel,         // (odd)x(odd)x(odd}
						repeat_border,         // final boolean       repeat_border)
						full_convolves_only); // final boolean       full_convolves_only)

			}
			// Convolve multi-scale space kernels with source data 
			System.out.println("Convolved temporal derivatives, created time_derivs");

			double [][] space_scales = new double [num_sscales][];
			for (int nk = 0; nk < num_sscales; nk++) {
				for (int dir = 0; dir < space_kernels[nk].length; dir++) {
					space_conv[nk][dir] = convolve3d(
							dpixels,                // final double [][]   data_in,        // fill NaN before running, will treat NaN as 0 on input, skip if center is NaN
							width,                  // final int           width,
							space_kernels[nk][dir], // final double [][][] kernel,         // (odd)x(odd)x(odd}
							repeat_border,          // final boolean       repeat_border)
							full_convolves_only);   // final boolean       full_convolves_only)

				}
				space_scales[nk] = normalizeSpaceKernels(
						space_conv[nk]); // double [][][] conv_data) // same data convolved with different kernels, may have NaNs
				System.out.println("Convoved with kernels scale "+nk);
			}
			double average_space = space_scales[0][NUM_SPACE_KERNELS];
			System.out.println("Space kernels normalization:");
			for (int nk = 0; nk < num_sscales; nk++) {
				System.out.println("Scale number "+nk);
				for (int dir = 0; dir < space_kernels[nk].length; dir++) {
					System.out.println("dir number "+dir+", scale "+space_scales[nk][dir]);
				}
				System.out.println("average value "+space_scales[nk][space_scales[nk].length-1]);
			}
			System.out.println("average_space = "+average_space);

			// normalize all kernel groups for different scales together (match to the first group
			for (int nk = 0; nk < num_sscales; nk++) {
				for (int i = 0; i < NUM_SPACE_KERNELS; i++) {
					space_scales[nk][i] *= average_space/space_scales[nk][NUM_SPACE_KERNELS]; 
				}
				for (int dir = 0; dir < NUM_SPACE_KERNELS; dir++) {
					scaleConvolutions(
							space_conv[nk][dir],    // final double [][] conv_data, // same data convolved with different kernels, may have NaNs
							space_scales[nk][dir]); // final double scale)
				}
			}

			// normalize time derivatives to match those of the spatial ones
			double [][] time_averages = new double [num_tscales][];
			double [] time_std = new double [num_tscales];
			for (int nk = 0; nk < num_tscales; nk++) {
				time_averages[nk] = normalizeTimeDerivs( // return {std, average abs()}
						time_derivs[nk],     // final double [][] conv_data,
						frac_outliers);  // final double frac_outliers)
				time_std[nk] = time_averages[nk][0];  // [1] - average of the absolute values;
				scaleConvolutions(
						time_derivs[nk], // final double [][] conv_data, // same data convolved with different kernels, may have NaNs
						average_space/time_std[nk]); // final double scale)
			}
			System.out.println("Temporal kernels normalization:");
			for (int nk = 0; nk < num_tscales; nk++) {
				System.out.println("Scale number "+nk+" std="+time_averages[nk][0]+" abs="+time_averages[nk][1]);

			}

			saveSpaceConvolutions(
					space_conv,        // double [][][][] space_convolutions,
					width,             // int             width,
					title_space_conv,  // String          title,
					conv_dir);         // String          save_dir) // do not save if null, but show

			saveTemporalConvolutions(
					time_derivs,       // double [][][] time_derivs,
					width,             // int             width,
					title_time_derivs, // String          title,
					conv_dir);         // String          save_dir) // do not save if null, but show
			if (convolve_and_exit) {
				return;
			}
		}
		
		// calculate time derivative with the selected time derivative kernel for each spatial convolution
		System.out.println("Calculating space convolution derivatives with respect to time");
		final int time_deriv_patt = 3; // convolve with a single derivative
		double [][][][] space_conv_dt = new double [space_conv.length][space_conv[0].length][][];
		for (int nscale = 0; nscale <  space_conv.length; nscale++) {
			for (int npatt = 0; npatt < space_conv[0].length; npatt++) {
				space_conv_dt[nscale][npatt] =  convolve3d(
						space_conv[nscale][npatt],          // final double [][]   data_in,        // fill NaN before running, will treat NaN as 0 on input, skip if center is NaN
						width,                              // final int           width,
						kernels_diff_time[time_deriv_patt], // final double [][][] kernel,         // (odd)x(odd)x(odd}
						repeat_border,                      // final boolean       repeat_border)
						full_convolves_only);               // final boolean       full_convolves_only)
			}
		}
		System.out.println("Done calculating space convolution derivatives with respect to time");
		{
			String title_space_dt = title_space_conv+"_dt_"+time_rt_scales[time_deriv_patt][0]+"-"+time_rt_scales[time_deriv_patt][1];
			saveSpaceConvolutions(
					space_conv_dt,        // double [][][][] space_convolutions,
					width,             // int             width,
					title_space_dt,    // String          title,
					null); // conv_dir);         // String          save_dir) // do not save if null, but show

		}
		// combine kernel derivatives fotr future exclusion from integration
		boolean         use_max = false; // true;
		double [][] combo_dt =  combineSelectedConvolutions(
						space_conv_dt, // final double [][][][] space_conv_dt,
						patterns_integrate, // final double [][]     pattern_selection)
						use_max); // final boolean         use_max)
		double [][] combo_dt1 =  combineSelectedConvolutions(
				space_conv_dt, // final double [][][][] space_conv_dt,
				patterns_integrate, // final double [][]     pattern_selection)
				!use_max); // final boolean         use_max)
		{
			String title_combo_dt = title_space_conv+"_combo_dt_"+time_rt_scales[time_deriv_patt][0]+"-"+time_rt_scales[time_deriv_patt][1]+(use_max?"-max":"-avg");
			ShowDoubleFloatArrays.showArrays(
					combo_dt,
					width,
					height,
					true,
					title_combo_dt+".tiff",
					titles_spatial);
			String title_combo_dt1 = title_space_conv+"_combo_dt_"+time_rt_scales[time_deriv_patt][0]+"-"+time_rt_scales[time_deriv_patt][1]+((!use_max)?"-max":"-avg");
			ShowDoubleFloatArrays.showArrays(
					combo_dt1,
					width,
					height,
					true,
					title_combo_dt1+".tiff",
					titles_spatial);
		}
		
		double [][] weights_pre =  weightsMinMax(
				combo_dt,     // final double [][] weights_in, // same data convolved with different kernels, may have NaNs
				20.0,         // final double      min,
				40.0,         // final double      max,
				true);        // final boolean     invert)
		{
			String title_weights_pre = title_space_conv+"_weights-pre_"+time_rt_scales[time_deriv_patt][0]+"-"+time_rt_scales[time_deriv_patt][1]+(use_max?"-max":"-avg");
			ShowDoubleFloatArrays.showArrays(
					weights_pre,
					width,
					height,
					true,
					title_weights_pre+".tiff",
					titles_spatial);
		}
		
		int weights_pre_shrink =  2;
		int weights_pre_min_seq = 5;
		double [][] weights_pre_filt = weightsShrink(
				weights_pre, // final double [][] weights_in, // same data convolved with different kernels, may have NaNs
				width, // final int         width,
				weights_pre_shrink, // final int         shrink,
				weights_pre_min_seq); // final int         min_frames)
		{
			String title_weights_pre_filt = title_space_conv+"_weights-pre-filt_"+time_rt_scales[time_deriv_patt][0]+"-"+time_rt_scales[time_deriv_patt][1]+(use_max?"-max":"-avg");
			ShowDoubleFloatArrays.showArrays(
					weights_pre_filt,
					width,
					height,
					true,
					title_weights_pre_filt+".tiff",
					titles_spatial);
		}
		
		boolean          no_gradients = true; // do not consider patterns 0..3 - when they are near the edge they are too strong
		double [][] integrated_pattern_strengths = integrateSpaceConv(
				space_conv,        // final double [][][][]  space_conv,
				weights_pre_filt,  // weights_pre);  // final double [][] weights)  // may be null [num_images][num_pixels]
				false); // no_gradients) ;    // final boolean          no_gradients) { // do not consider patterns 0..3 - when they are near the edge they are too strong
		
		if (show_best && show_pre_best) {
			ShowDoubleFloatArrays.showArrays(
					integrated_pattern_strengths,
					width,
					height,
					true,
					"integrated_pattern_strengths.tiff",
					titles_pattern);
		}
		int [][] best_pattern_frame = getBestConv(
				space_conv, // final double [][][][]  space_conv)
				no_gradients) ;    // final boolean          no_gradients) { // do not consider patterns 0..3 - when they are near the edge they are too strong				
		int patt_mod = 18; // = 54;
		if (show_best && show_pre_best) {
			double [][] best_pattern_frame_dbg = new double [best_pattern_frame.length][best_pattern_frame[0].length];
			for (int nimg = 0; nimg < best_pattern_frame_dbg.length; nimg++) {
				Arrays.fill(best_pattern_frame_dbg[nimg],Double.NaN);
				for (int ipix = 0; ipix < best_pattern_frame_dbg[nimg].length; ipix++) if (best_pattern_frame[nimg][ipix] >= 0){
					best_pattern_frame_dbg[nimg][ipix] = best_pattern_frame[nimg][ipix] % patt_mod; 
				}
			}
			ShowDoubleFloatArrays.showArrays(
					best_pattern_frame_dbg,
					width,
					height,
					true,
					"best_pattern_frame_mod"+patt_mod+".tiff",
					titles_pattern);
		}
		
		int [] best_pattern = getBestIntegratedConv(
				integrated_pattern_strengths,        // double [][] integ_conv){ // [kern][pix]
				no_gradients) ;    // final boolean          no_gradients) { // do not consider patterns 0..3 - when they are near the edge they are too strong				
		
		if (show_best && show_pre_best) {
			double [] best_pattern_dbg = new double [best_pattern.length];
			Arrays.fill(best_pattern_dbg,Double.NaN);
			for (int ipix = 0; ipix < best_pattern_dbg.length; ipix++) if (best_pattern[ipix] >= 0){
				best_pattern_dbg[ipix] = best_pattern[ipix] % patt_mod; 
			}
			
			ShowDoubleFloatArrays.showArrays(
					new double[][] {best_pattern_dbg},
					width,
					height,
					true,
					"best_pattern_mod"+patt_mod+".tiff",
					titles_pattern);
		}		
		double [][] patt_strength_abs = getPatternStrengths(
				space_conv,   // final double [][][][]  space_conv,
				best_pattern, // final int []           best_patt_integ, // best pattern index for integrated each pixel    
				null);        // final int [][]         best_patt_frame){ // best pattern index for each frame, each pixel. If !=null calculate strength relative to the best pattern,  null - absolute    

		double [][] patt_strength_rel = getPatternStrengths(
				space_conv,           // final double [][][][]  space_conv,
				best_pattern,         // final int []           best_patt_integ, // best pattern index for integrated each pixel    
				best_pattern_frame);  // final int [][]         best_patt_frame){ // best pattern index for each frame, each pixel. If !=null calculate strength relative to the best pattern,  null - absolute    
		double strength_rel_min = 0.5;
		double strength_rel_max = 0.9;
		double [][] weights_mm = weightsMinMax(
				patt_strength_rel, //final double [][] weights_in, // same data convolved with different kernels, may have NaNs
				strength_rel_min,  // final double      min,
				strength_rel_max,  // final double      max)
				false); // final boolean     invert) { // should be > min
		// combine weights_pre (removed fg/bg borders with tempooral filtering) and  weights_mm (by same pattern)
		double [][] weights_mul =  weightsMultiply(
				weights_pre, // final double [][] weights0,
				weights_mm); // final double [][] weights1)
		// blur weights 3d with kernel
		double [][] weights_blurred = convolve3d(
				weights_mul,            // final double [][]   data_in,        // fill NaN before running, will treat NaN as 0 on input, skip if center is NaN
				width,                  // final int           width,
				blur_kernel,            // final double [][][] kernel,         // (odd)x(odd)x(odd}
				repeat_border,          // final boolean       repeat_border)
				full_convolves_only);   // final boolean       full_convolves_only)
		if (show_result) {
			double [][][] weights_dbg = {weights_pre, weights_mm, weights_mul, weights_blurred,dpixels};
			String [] titles_weights=   {"weights_pre", "weights_mm", "weights_mul", "weights_blurred","dpixels"};
			ShowDoubleFloatArrays.showArraysHyperstack(
					weights_dbg, // double[][][] pixels, 
					width,                    // int          width, 
					"weights_combining",      // String       title, "time_derivs-rt"+diff_time_rt+"-rxy"+diff_time_rxy,
					titles_spatial,           // String []    titles, // all slices*frames titles or just slice titles or null
					titles_weights,           // String []    frame_titles, // frame titles or null
					true);                    // boolean      show)
		}
		
		double [][] applied_weight = applyWeights(
				dpixels, // final double [][] data_in,
				weights_blurred); // weights_mm); // final double [][] weights)
		
		if (show_result) {
			double [][] result_img = {dpixels_avg, applied_weight[0], applied_weight[1]};
			String [] titles_result = {"average", "weighted", "sum.weights"};
			ShowDoubleFloatArrays.showArrays(
					result_img,
					width,
					height,
					true,
					"integrated_frames_min"+strength_rel_min+"_max"+strength_rel_max+".tiff",
					titles_result);
		}
		
		
		if (show_best) {
			ShowDoubleFloatArrays.showArrays(
					patt_strength_abs,
					width,
					height,
					true,
					"patt_strength_abs.tiff",
					titles_spatial);
			ShowDoubleFloatArrays.showArrays(
					patt_strength_rel,
					width,
					height,
					true,
					"patt_strength_rel.tiff",
					titles_spatial);
		}
		
		if (show_best_temp) {
			double [][] patt_strength_abs_temp = timeToY(
					patt_strength_abs,    // final double [][] multi_image,
					width,          // final int         width,
					num_vert,       // final int         num_vert,
					vgap);          // final int         vgap)

			double [][] patt_strength_rel_temp = timeToY(
					patt_strength_rel,    // final double [][] multi_image,
					width,          // final int         width,
					num_vert,       // final int         num_vert,
					vgap);          // final int         vgap)
			ShowDoubleFloatArrays.showArrays(
					patt_strength_abs_temp,
					width,
					height,
					true,
					"patt_strength_abs_temp.tiff",
					titles_temporal);
			ShowDoubleFloatArrays.showArrays(
					patt_strength_rel_temp,
					width,
					height,
					true,
					"patt_strength_rel_temp.tiff",
					titles_temporal);
			
			return;
			
		}
		
		
		
		
		double [][][][] space_conv_temp = new double[num_sscales][NUM_SPACE_KERNELS][][];
		double [][][]  time_derivs_temp = new double[num_tscales][][];
		
		
			
		// Find the best space kernel for each time and XY
		int [][]    best_space_dir = new int [space_conv[0][0].length][];
		double [][] best_space_value = getBestSpace(
				space_conv,      // final double [][][][]  space_conv
				best_space_dir); // final int [][]      best_space) // null or int [nimg][]
		System.out.println("Generated best_space_value");

		
		
		
		
		
		if (show_derivs_dt) {
			for (int nk = 0; nk < num_sscales; nk++) {
				for (int dir = 0; dir < space_conv[nk].length; dir++) {
					space_conv_temp[nk][dir] =			timeToY(
							space_conv[nk][dir],    // final double [][] multi_image,
							width,          // final int         width,
							num_vert,       // final int         num_vert,
							vgap);          // final int         vgap)
//					System.out.println("space_conv_temp["+nk+"]["+dir+"].length="+space_conv_temp[nk][dir].length);
				}		
			}
//		double [][][]  time_derivs_temp = new double[num_tscales][][];
			for (int nk = 0; nk < num_tscales; nk++) {
				time_derivs_temp[nk] = timeToY(
						time_derivs[nk],    // final double [][] multi_image,
						width,          // final int         width,
						num_vert,       // final int         num_vert,
						vgap);          // final int         vgap)
			}
			int out_height = time_derivs_temp[0][0].length/width;
			int num_slices = time_derivs_temp[0].length;
			String [] frame_titles = new String[num_tscales];
			for (int nk = 0; nk < num_tscales; nk++) {
				frame_titles[nk]=String.format("kern %d: ",nk);
			}

			// show time representations of temporal derivatives with different kernels
			ShowDoubleFloatArrays.showArraysHyperstack(
					time_derivs_temp, // double[][][] pixels, 
					width,            // int          width, 
					title_time_derivs+"-temp",// String       title, "time_derivs-rt"+diff_time_rt+"-rxy"+diff_time_rxy,
					titles_temporal,           // String []    titles, // all slices*frames titles or just slice titles or null
					frame_titles,     // String []    frame_titles, // frame titles or null
					true);            // boolean      show)
			System.out.println("Displayed time_derivs_temp");
			
			double [][][] space_conv_temp_combo = new double [space_conv_temp.length*space_conv_temp[0].length][][];
			System.out.println("space_conv_temp_combo.length="+space_conv_temp_combo.length);
			String []  frame_titles_combo = new String[space_conv_temp_combo.length];   
			for (int nk = 0; nk < space_conv_temp.length; nk++) {
				for (int dir = 0; dir < space_conv_temp[0].length; dir++) {
					int indx = dir + nk*space_conv_temp[0].length;
					space_conv_temp_combo[indx] = space_conv_temp[nk][dir];
					frame_titles_combo[indx] = String.format("scale%d-dir%d",nk,dir);
//					System.out.println("nk="+nk+" dir="+dir+" indx="+indx+" frame_titles_combo="+frame_titles_combo[indx]);
//					System.out.println("space_conv_temp_combo["+indx+"].length="+space_conv_temp_combo[indx].length);
				}
			}
			System.out.println("title_space_conv="+title_space_conv);
//			System.out.println("space_conv_temp_combo.length="+space_conv_temp_combo.length);
//space_conv-dir3-rt5.5-rxy5.5.tif
			ShowDoubleFloatArrays.showArraysHyperstack(
					space_conv_temp_combo, // double[][][] pixels, 
					width,                 // int          width, 
					title_space_conv,      // String       title, "time_derivs-rt"+diff_time_rt+"-rxy"+diff_time_rxy,
					titles_temporal,                // String []    titles, // "space_conv-dir"+dir+"-rt"+space_rt+"-rxy"+space_rxy,
					frame_titles_combo,    // String []    frame_titles) // frame titles or null,
					true);            // boolean      show)
			System.out.println("Displayed space_conv_temp_combo");

			
			double [][] best_space_value_temp = timeToY(
					best_space_value,  // final double [][] multi_image,
					width,             // final int         width,
					num_vert,          // final int         num_vert,
					vgap);             // final int         vgap)
		
			String title_best_space_value = "best_space_value";
			for (int nk = 0; nk < num_sscales; nk++) {
				title_best_space_value += "_"+space_rt_scales[nk][0]+"-"+space_rt_scales[nk][1];
			}
			ShowDoubleFloatArrays.showArrays(
					best_space_value_temp,
					width,
					out_height,
					true,
					title_best_space_value,
					titles_temporal);
			System.out.println("Displayed best_space_value_temp");

			double [][] best_space_dir_dbg = new double [best_space_dir.length][best_space_dir[0].length];
			for (int nimg = 0; nimg < best_space_dir.length; nimg++) {
				Arrays.fill (best_space_dir_dbg[nimg], Double.NaN);
				for (int i = 0; i < best_space_dir[nimg].length; i++)  if (best_space_dir[nimg][i] >= 0){
					best_space_dir_dbg[nimg][i] = best_space_dir[nimg][i];
				}
			}

			double [][] best_space_dir_temp = timeToY(
					best_space_dir_dbg,  // final double [][] multi_image,
					width,             // final int         width,
					num_vert,          // final int         num_vert,
					vgap);             // final int         vgap)
			
			String title_bdest_space_dir = "best_space_dir";
			for (int nk = 0; nk < num_sscales; nk++) {
				title_bdest_space_dir += "_"+space_rt_scales[nk][0]+"-"+space_rt_scales[nk][1];
			}
			ShowDoubleFloatArrays.showArrays(
					best_space_dir_temp,
					width,
					out_height,
					true,
					title_bdest_space_dir,
					titles_temporal);
			System.out.println("Displayed best_space_dir_temp");
		}
		return;
	}
	
	public static ImagePlus saveSpaceConvolutions(
			double [][][][] space_convolutions,
			int             width,
			String          title,
			String          save_dir) { // do not save if null, but show
		int num_scales =  space_convolutions.length;          // number of scales for kernel sets
		int num_kernels = space_convolutions[0].length;       // number of kernels for each scale
		int num_times =   space_convolutions[0][0].length;    // number of time slices
//		int num_pix =     space_convolutions[0][0][0].length; // number of pixels in each slice
		int num_slices = num_scales * num_kernels * num_times;
//		System.out.println("num_scales="+num_scales+" num_kernels="+num_kernels+" num_times="+num_times);
		String [] titles = new String[num_slices];
		double [][][] data = new double [num_scales * num_kernels][][];
		int indx = 0;
		for (int s = 0; s < num_scales; s++) {
			for (int k = 0; k < num_kernels; k++) {
				for (int t = 0; t < num_times; t++) {
					titles [indx++] = String.format("%d:%d:%d", s,k,t);
//					System.out.println("titles[" + (indx - 1) + "]=" + titles[indx - 1]);
				}
				data[s * num_kernels + k] = space_convolutions[s][k];	
			}
		}
		ImagePlus imp =  ShowDoubleFloatArrays.showArraysHyperstack(
				data,              // double[][][] pixels, 
				width,             // int          width, 
				title,             // String       title, "time_derivs-rt"+diff_time_rt+"-rxy"+diff_time_rxy,
				titles,            // String []    titles, // "space_conv-dir"+dir+"-rt"+space_rt+"-rxy"+space_rxy,
				null,              // String []    frame_titles) // frame titles or null,
				(save_dir==null)); // boolean      show)
		if (save_dir != null) {
			if (!save_dir.endsWith(Prefs.getFileSeparator())) {
				save_dir += Prefs.getFileSeparator();
			}
			String file_path = save_dir+title+".tiff";
			FileSaver fs=new FileSaver(imp);
			fs.saveAsTiff(file_path);
		}
		return imp;
	}
	
	public static double [][][][] restoreSpaceConvolutions(String path){
		ImagePlus imp = new ImagePlus(path);
		final ImageStack stack = imp.getImageStack();
		final int num_slices = stack.getSize(); // .length;
		final String [] labels = new String[num_slices];
		System.arraycopy(
				stack.getSliceLabels(),
				0,
				labels,
				0,
				num_slices);
		int nscales =  0;   // number of scales for kernel sets
		int nkernels = 0;   // number of kernels for each scale
		int ntimes =   0;   // number of time slices
		final int num_pixels = ((float[]) stack.getPixels(1)).length;
		for (int i = 0; i < labels.length; i++) {
			String [] tokens = labels[i].split(":");
			int s = Integer.parseInt(tokens[0]);
			int k = Integer.parseInt(tokens[1]);
			int t = Integer.parseInt(tokens[2]);
			nscales =  Math.max(nscales,  s);
			nkernels = Math.max(nkernels, k);
			ntimes =   Math.max(ntimes,   t);
		}
		final int num_scales =  nscales+1;   // number of scales for kernel sets
		final int num_kernels = nkernels+1;   // number of kernels for each scale
		final int num_times =   ntimes+1;   // number of time slices

		final double [][][][] data = new double [num_scales][num_kernels][num_times][num_pixels];
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int slice = ai.getAndIncrement(); slice < num_slices; slice = ai.getAndIncrement()) {
						int s= slice / (num_kernels * num_times);
						int kt = slice % (num_kernels * num_times);
						int k = kt / num_times;
						int t = kt % num_times;
						float [] pixels = (float []) stack.getPixels(slice+1);
						for (int i = 0; i < num_pixels; i++) {
							data[s][k][t][i]=pixels[i];
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return data;
	}
	
	

	public static ImagePlus saveTemporalConvolutions(
			double [][][] time_derivs,
			int             width,
			String          title,
			String          save_dir) { // do not save if null, but show
		int num_scales =  time_derivs.length;          // number of scales for kernel sets
		int num_times =   time_derivs[0].length;    // number of time slices
//		int num_pix =     time_derivs[0][0].length; // number of pixels in each slice
		int num_slices = num_scales * num_times;
		String [] titles = new String[num_slices];
		int indx = 0;
		for (int s = 0; s < num_scales; s++) {
				for (int t = 0; t < num_times; t++) {
					titles [indx++] = String.format("%d:%d", s,t);
				}
		}
		ImagePlus imp =  ShowDoubleFloatArrays.showArraysHyperstack(
				time_derivs,              // double[][][] pixels, 
				width,             // int          width, 
				title,             // String       title, "time_derivs-rt"+diff_time_rt+"-rxy"+diff_time_rxy,
				titles,            // String []    titles, // "space_conv-dir"+dir+"-rt"+space_rt+"-rxy"+space_rxy,
				null,              // String []    frame_titles) // frame titles or null,
				(save_dir==null)); // boolean      show)
		if (save_dir != null) {
			if (!save_dir.endsWith(Prefs.getFileSeparator())) {
				save_dir += Prefs.getFileSeparator();
			}
			String file_path = save_dir+title+".tiff";
			FileSaver fs=new FileSaver(imp);
			fs.saveAsTiff(file_path);
		}
		return imp;
	}
	
	
	public static double [][][] restoreTemporalConvolutions(String path){
		ImagePlus imp = new ImagePlus(path);
		final ImageStack stack = imp.getImageStack();
		final int num_slices = stack.getSize(); // .length;
		final String [] labels = new String[num_slices];
		System.arraycopy(
				stack.getSliceLabels(),
				0,
				labels,
				0,
				num_slices);
		int nscales =  0;   // number of scales for kernel sets
		int ntimes =   0;   // number of time slices
		final int num_pixels = ((float[]) stack.getPixels(1)).length;
		for (int i = 0; i < labels.length; i++) {
//			System.out.println(i+":"+labels[i]);
			String [] tokens = labels[i].split(":");
			int s = Integer.parseInt(tokens[0]);
			int t = Integer.parseInt(tokens[1]);
			nscales =  Math.max(nscales,  s);
			ntimes =   Math.max(ntimes,   t);
		}
		final int num_scales =  nscales+1;   // number of scales for kernel sets
		final int num_times =   ntimes+1;   // number of time slices
		final double [][][] data = new double [num_scales][num_times][num_pixels];
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int slice = ai.getAndIncrement(); slice < num_slices; slice = ai.getAndIncrement()) {
						int s= slice / num_times;
						int t = slice % num_times;
						float [] pixels = (float []) stack.getPixels(slice+1);
						for (int i = 0; i < num_pixels; i++) {
							data[s][t][i]=pixels[i];
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return data;
	}
	
	
	
	
	public static void showOrigDt (
			final double [][] multi_image,
			final int         width,
			final int         num_vert,
			final int         vgap){
		double [][] out_data_dt = timeToY(
				multi_image,      // final double [][] multi_image,
				width, // final int         width,
				num_vert,     // final int         num_vert,
				vgap);        // final int         vgap)
		int out_height = out_data_dt[0].length/width;
		String [] titles = new String[out_data_dt.length];
		for (int i = 0; i < titles.length; i++) {
			if (num_vert == 1) {
				titles[i]=String.format("row %d",i);
			} else {
				titles[i]=String.format("rows: %d-%d",i*num_vert,(i+1)*num_vert-1);
			}
		}
		ShowDoubleFloatArrays.showArrays(
				out_data_dt,
				width,
				out_height,
				true,
				"time_section-"+out_data_dt.length,
				titles);

	}
	
	
	
	public static double [][]      getBestSpace(
			final double [][][][]  space_conv,
			final int    [][]      best_space){ // null or int [nimg][]
		final int num_scales =  space_conv.length; // may be modified, if there will be extra layers
		final int num_kernels = space_conv[0].length; // may be modified, if there will be extra layers
		final int num_images = space_conv[0][0].length;
		final int num_pixels = space_conv[0][0][0].length;
		final double [][] best_val = new double [num_images][num_pixels];
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int nimg = 0; nimg < num_images; nimg++) {
			final int fimg = nimg;
			Arrays.fill(best_val[fimg], Double.NaN);
			if (best_space != null) {
				best_space[fimg] = new int[num_pixels];
				Arrays.fill(best_space[fimg], -1);
			}
			ai.set(0);
			//			ati.set(0);
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						for (int ipix = ai.getAndIncrement(); ipix < num_pixels; ipix = ai.getAndIncrement()) {
							int best_scale = -1;
							int best_dir = -1;
							int best_sgn = 0;
							double bestv = 0;
							for (int nscale = 0; nscale < num_scales; nscale++) {
								for (int dir = 0; dir < num_kernels; dir++) {
									double d = space_conv[nscale][dir][fimg][ipix];
									if (!Double.isNaN(d)) {
										for (int sgn = 0; sgn<2; sgn++) {
											if (sgn > 0) {
												d = -d;
											}
											if (d > bestv) {
												best_scale = nscale;
												best_dir = dir;
												best_sgn = sgn;
												bestv = d;
											}
										}
									}
								}
							}
							if (best_dir >= 0) {
								best_val[fimg][ipix] = bestv;
								if (best_space != null) {
									if (best_dir < 8) {
										best_space[fimg][ipix] = best_dir + best_sgn * 8 + best_scale * 16 ;
									} else {
										best_space[fimg][ipix] = 16* num_scales + best_sgn + 2 * best_scale;
									}
								}
							}
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		}
		return best_val;	
	}
	
	
	public static int [][]      getBestConv(
			final double [][][][]  space_conv,
			final boolean          no_gradients) { // do not consider patterns 0..3 - when they are near the edge they are too strong			
		final int num_scales =  space_conv.length; // may be modified, if there will be extra layers
		final int num_kernels = space_conv[0].length; // may be modified, if there will be extra layers
		final int num_images = space_conv[0][0].length;
		final int num_pixels = space_conv[0][0][0].length;
		final int num_signs = 2;
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		final int [][] best_conv = new int [num_images][num_pixels];
		for (int nimg = 0; nimg < num_images; nimg++) {
			final int fimg = nimg;
			Arrays.fill(best_conv[fimg], -1);
			ai.set(0);
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						for (int ipix = ai.getAndIncrement(); ipix < num_pixels; ipix = ai.getAndIncrement()) {
							int best_scale = -1;
							int best_dir = -1;
							int best_sgn = 0;
							double bestv = 0;
							for (int nscale = 0; nscale < num_scales; nscale++) {
								int low_dir = no_gradients? 4: 0;
								for (int dir = low_dir; dir < num_kernels; dir++) {
									double d = space_conv[nscale][dir][fimg][ipix];
									if (!Double.isNaN(d)) {
										for (int sgn = 0; sgn<2; sgn++) {
											if (sgn > 0) {
												d = -d;
											}
											if (d > bestv) {
												best_scale = nscale;
												best_dir = dir;
												best_sgn = sgn;
												bestv = d;
											}
										}
									}
								}
							}
							if (best_dir >= 0) {
								best_conv[fimg][ipix] = best_dir + num_kernels * best_sgn + best_scale *  num_kernels * num_signs;
							}
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		}
		return best_conv;	
	}
	
	
	public static double [][] integrateSpaceConv(
		final double [][][][]  space_conv,
		final double [][]      weights, // may be null [num_images][num_pixels]
		final boolean          no_gradients) { // do not consider patterns 0..3 - when they are near the edge they are too strong
		final int num_scales =  space_conv.length; // may be modified, if there will be extra layers
		final int num_kernels = space_conv[0].length; // may be modified, if there will be extra layers
		final int num_images = space_conv[0][0].length;
		final int num_pixels = space_conv[0][0][0].length;
		final int num_signs = 2;
		final double [][] integrated_convs = new double [num_scales*num_signs*num_kernels][num_pixels];
		final double []   sum_weights = new double [num_pixels];
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int nimg = 0; nimg < num_images; nimg++) {
			final int fimg = nimg;
			ai.set(0);
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						for (int ipix = ai.getAndIncrement(); ipix < num_pixels; ipix = ai.getAndIncrement()) {
							double w = (weights == null)? 1.0 : weights[fimg][ipix] ;
							sum_weights[ipix] += w;
							for (int nscale = 0; nscale < num_scales; nscale++) {
								int low_dir = no_gradients? 4: 0;
								for (int dir = low_dir; dir < num_kernels; dir++) {
									double d = space_conv[nscale][dir][fimg][ipix];
									if (!Double.isNaN(d)) {
										for (int sgn = 0; sgn<num_signs; sgn++) {
											if (sgn > 0) {
												d = -d;
											}
											if (d > 0) {
												if (weights != null) {
													d *= weights[fimg][ipix];
												}
												int indx = dir + num_kernels * sgn + nscale *  num_kernels * num_signs;
												integrated_convs[indx][ipix] += d*d;
											}
										}
									}
								}
							}
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		}
		// normalize
		ai.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int ipix = ai.getAndIncrement(); ipix < num_pixels; ipix = ai.getAndIncrement()) {
						if (sum_weights[ipix] > 0 ) {
							for (int i = 0; i < integrated_convs.length; i++) {
								if (integrated_convs[i][ipix] >0) {
									integrated_convs[i][ipix] = Math.sqrt(integrated_convs[i][ipix])/ sum_weights[ipix];
								}
							}
						} else {
							for (int i = 0; i < integrated_convs.length; i++) {
								integrated_convs[i][ipix] = Double.NaN;
							}
							
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return integrated_convs;	
	}
	
	public static int [] getBestIntegratedConv(
			double [][] integ_conv, // [kern][pix]
			final boolean          no_gradients) { // do not consider patterns 0..3 - when they are near the edge they are too strong
		final int dbg_pix = 135+160*640;
		final int num_patt =      integ_conv.length;
		final int num_pixels =    integ_conv[0].length;
		final int [] best_index = new int [num_pixels];
		final Thread[] threads =  ImageDtt.newThreadArray();
		final AtomicInteger ai =  new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int ipix = ai.getAndIncrement(); ipix < num_pixels; ipix = ai.getAndIncrement()) {
						if (ipix == dbg_pix) {
							System.out.println("ipix="+ipix);
						}
						int best_patt = -1;
						double bestv = 0;
						for (int ipatt = 0; ipatt < num_patt; ipatt++) {
							int npatt = ipatt % 9;
							if (no_gradients && npatt < 4) {
								continue;
							}
							double d = integ_conv[ipatt][ipix];
							if (d > bestv) { // cares of NaN
								bestv = d;
								best_patt = ipatt;
							}
						}
						if (best_patt >=0) {
							best_index[ipix] = best_patt;
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return best_index;
	}
	
	public static double [][] combineSelectedConvolutions(
			final double [][][][] space_conv_dt,
			final double [][]     pattern_selection,
			final boolean         use_max) {
		final int num_scales =   space_conv_dt.length; // may be modified, if there will be extra layers
		final int num_patterns = space_conv_dt[0].length; // may be modified, if there will be extra layers
		final int num_images =   space_conv_dt[0][0].length;
		final int num_pixels =   space_conv_dt[0][0][0].length;
		if ((pattern_selection.length != num_scales) || (pattern_selection[0].length != num_patterns)) {
			throw new IllegalArgumentException("pattern_selection does not match space_conv_dt in dimensions");
		}
		final double [][] combo_dt = new double [num_images][num_pixels];
		for (int nimg = 0; nimg < num_images;nimg++) {
			Arrays.fill(combo_dt[nimg], Double.NaN);
		}
		final Thread[] threads =  ImageDtt.newThreadArray();
		final AtomicInteger ai =  new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int ipix = ai.getAndIncrement(); ipix < num_pixels; ipix = ai.getAndIncrement()) {
						for (int nimg = 0; nimg< num_images; nimg++) {
							int num_valid = 0;
							double s2 = 0;
							for (int nscale = 0; nscale < num_scales; nscale++) {
								for (int npatt = 0; npatt < num_patterns; npatt++) if (pattern_selection[nscale][npatt] > 0){
									double d = space_conv_dt[nscale][npatt][nimg][ipix];
									if (!Double.isNaN(d)) {
										d*= pattern_selection[nscale][npatt]; // scaling d before squaring
										d*=d;
										if (use_max) {
											s2 = Math.max(s2,d);
											num_valid = 1;
										} else {
											s2 += d;
											num_valid++;
										}
									}
								}
							}
							if (num_valid > 0) {
								combo_dt[nimg][ipix] = Math.sqrt(s2/num_valid);
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return combo_dt;
	}
	
	public static double [][] getPatternStrengths(
			final double [][][][]  space_conv,
			final int []           best_patt_integ, // best pattern index for integrated each pixel    
			final int [][]         best_patt_frame){ // best pattern index for each frame, each pixel. If !=null calculate strength relative to the best pattern,  null - absolute    
		final boolean          relative = (best_patt_frame != null); // false - just strength, true - relative to the best pattern for each frame/pix
//		final int num_scales =   space_conv.length; // may be modified, if there will be extra layers
		final int num_patterns = space_conv[0].length; // may be modified, if there will be extra layers
		final int num_images =   space_conv[0][0].length;
		final int num_pixels =   space_conv[0][0][0].length;
		final int num_signs = 2;
		final double [][] pattern_strengths = new double [num_images][num_pixels];
		final Thread[] threads =  ImageDtt.newThreadArray();
		final AtomicInteger ai =  new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int ipix = ai.getAndIncrement(); ipix < num_pixels; ipix = ai.getAndIncrement()) {
						int indx = best_patt_integ[ipix];
						if (indx >= 0) {
							int npatt =   indx % num_patterns;
							int r = indx/ num_patterns; 
							int sgn =    r % num_signs;
							int nscale = r / num_signs;
							for (int nimg = 0; nimg < num_images; nimg++) {
								double s = space_conv[nscale][npatt][nimg][ipix];
								if (!Double.isNaN(s)) {
									if (sgn > 0) {
										s = -s;
									}
									if (s > 0) {
										if (relative) {
											int indx_best =best_patt_frame[nimg][ipix];
											if (indx_best >=0) {
												int npatt_best = indx_best % num_patterns;
												int r_best =  indx_best / num_patterns;
												int sgn_best =    r_best % num_signs;
												int nscale_best = r_best / num_signs;
												double s_best = space_conv[nscale_best][npatt_best][nimg][ipix];
												if (sgn_best > 0) {
													s_best = -s_best;
												}
												pattern_strengths[nimg][ipix] = s/s_best;
											}
										} else {
											pattern_strengths[nimg][ipix] = s;
										}
									}
								}
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return pattern_strengths;
	}
	
	// TODO: Normalize strengths with power and upper limit or fraction
	
	
	public static double [] normalizeSpaceKernels(
			double [][][] conv_data) { // same data convolved with different kernels, may have NaNs
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		final AtomicInteger ati = new AtomicInteger(0);
		final double [][] s0t =  new double [threads.length][conv_data.length];
		final double [][] sdt =  new double [threads.length][conv_data.length];
		final double [][] sdat = new double [threads.length][conv_data.length]; // will compare l2 and abs
		final double [][] sd2t = new double [threads.length][conv_data.length];
		final double [] s0 =  new double [conv_data.length];
		final double [] sd =  new double [conv_data.length];
		final double [] sda = new double [conv_data.length]; // will compare l2 and abs
		final double [] sd2 = new double [conv_data.length];
		final double [] std = new double [conv_data.length];
		final double [] coeffs_std = new double [conv_data.length+1];
		final double [] coeffs_abs = new double [conv_data.length+1];

		for (int nkern = 0; nkern < conv_data.length; nkern++) {
			final int fnkern = nkern;
			for (int nimg = 0; nimg < conv_data[fnkern].length; nimg++) {
				final int fimg = nimg;
				ai.set(0);
				ati.set(0);
				for (int ithread = 0; ithread < threads.length; ithread++) {
					threads[ithread] = new Thread() {
						public void run() {
							int nthread = ati.getAndIncrement();
							for (int ipix = ai.getAndIncrement(); ipix < conv_data[fnkern][fimg].length; ipix = ai.getAndIncrement()) {
								double d =  conv_data[fnkern][fimg][ipix];
								if (!Double.isNaN(d)) {
									s0t[nthread][fnkern] += 1.0;
									sdt[nthread][fnkern] += d;
									sd2t[nthread][fnkern] += d*d;
									sdat[nthread][fnkern] += Math.abs(d);
								}
							}
						}
					};
				}		      
				ImageDtt.startAndJoin(threads);
			}
			for (int nthread = 0; nthread < s0t.length; nthread++) {
				s0 [nkern] += s0t [nthread][nkern];  
				sd [nkern] += sdt [nthread][nkern];  
				sd2[nkern] += sd2t[nthread][nkern];  
				sda[nkern] += sdat[nthread][nkern];  
			}
		}

		double avg_std = 0.0;
		double avg_abs = 0.0;
		
		for (int nkern = 0; nkern < conv_data.length; nkern++) {
			sd[nkern] /= s0[nkern];
			sda[nkern] /= s0[nkern];
			sd2[nkern] /= s0[nkern];
			std[nkern] = Math.sqrt(sd2[nkern] - sd[nkern] * sd[nkern]);
			avg_std+= std[nkern];
			avg_abs+= sda[nkern];
		}		
		avg_std/=conv_data.length;
		avg_abs/=conv_data.length;
		for (int nkern = 0; nkern < conv_data.length; nkern++) {
			coeffs_std[nkern] = avg_std/ std[nkern];
			coeffs_abs[nkern] = avg_abs/ sda[nkern];
		}		
		coeffs_std[conv_data.length] = avg_std;  // last element - average
		coeffs_abs[conv_data.length] = avg_abs; 
		return coeffs_std;
	}
	
	public static double [] normalizeTimeDerivs( // return {std, average abs()}
			final double [][] conv_data,
			final double frac_outliers) {
		final int num_bins = 10000;
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		final AtomicInteger ati = new AtomicInteger(0);
		final double [] max_abs = new double [threads.length];
		for (int nimg = 0; nimg < conv_data.length; nimg++) {
			final double [] data =  conv_data[nimg];
			ai.set(0);
			ati.set(0);
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						int nthread = ati.getAndIncrement();
						for (int ipix = ai.getAndIncrement(); ipix < data.length; ipix = ai.getAndIncrement()) if (!Double.isNaN(data[ipix])) {
							max_abs[nthread] = Math.max(max_abs[nthread],Math.abs(data[ipix]));
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		}
		double amax = 0;
		for (int nthread = 0; nthread < max_abs.length; nthread++) {
			amax=Math.max(amax, max_abs[nthread]);
		}
//		final double fmax = amax;
//		final double scale = amax/num_bins;
		final double step = amax/num_bins;
		final double [][] histograms = new double [threads.length][num_bins];
		for (int nimg = 0; nimg < conv_data.length; nimg++) {
			final double [] data =  conv_data[nimg];
			ai.set(0);
			ati.set(0);
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						int nthread = ati.getAndIncrement();
						for (int ipix = ai.getAndIncrement(); ipix < data.length; ipix = ai.getAndIncrement()) if (!Double.isNaN(data[ipix])) {
							double ad = Math.abs(data[ipix]);
							int bin = (int) Math.floor(ad/step);
							if ((bin >=0) && (bin < num_bins)) {
								histograms[nthread][bin] += 1.0;
							}
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		}
		final double [] histogram = new double [num_bins];
		ai.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int bin = ai.getAndIncrement(); bin < num_bins; bin = ai.getAndIncrement()) {
						for (int i = 0; i < histograms.length; i++) {
							histogram[bin]+=histograms[i][bin];
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		double sum = 0;
		for (int i = 0; i < num_bins; i++) {
			sum+=histogram[i];
		}
		double threshold = sum * frac_outliers;
		double run_sum = 0;
		double prev_sum = 0;
		double alim = 0;
		for (int bin= num_bins-1; bin >=0; bin--) {
			prev_sum = run_sum;
			run_sum +=  histogram[bin];
			if (run_sum >=threshold) {
				alim = amax *(bin + (run_sum - threshold)/(run_sum - prev_sum))/num_bins;
				break;
			}
		}
		final double flim = alim;
		// alim - absolute limit to remove outliers
		final double [] s0t =  new double [threads.length];
		final double [] sdt =  new double [threads.length];
		final double [] sdat = new double [threads.length]; // will compare l2 and abs
		final double [] sd2t = new double [threads.length];
		for (int nimg = 0; nimg < conv_data.length; nimg++) {
			final double [] data =  conv_data[nimg];
			ai.set(0);
			ati.set(0);
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						int nthread = ati.getAndIncrement();
						for (int ipix = ai.getAndIncrement(); ipix < data.length; ipix = ai.getAndIncrement()) {
							double d =  data[ipix];
							if (!Double.isNaN(d) && (Math.abs(d) < flim)) {
								s0t[nthread] += 1.0;
								sdt[nthread] += d;
								sd2t[nthread] += d*d;
								sdat[nthread] += Math.abs(d);
							}
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		}
		double s0=0, sd=0,sd2=0,sda=0;
		for (int nthread = 0; nthread < s0t.length; nthread++) {
			s0  += s0t [nthread];  
			sd  += sdt [nthread];  
			sd2 += sd2t[nthread];  
			sda += sdat[nthread];  
		}
		sd /= s0;
		sda /= s0;
		sd2 /= s0;
		double std =  Math.sqrt(sd2 - sd * sd);
		return new double [] {std, sda};
	}
	
	
	
	public static void scaleConvolutions(
			final double [][] conv_data, // same data convolved with different kernels, may have NaNs
			final double scale) {
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int nimg = 0; nimg < conv_data.length; nimg++) {
			final double [] data = conv_data[nimg];
			ai.set(0);
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						for (int ipix = ai.getAndIncrement(); ipix < data.length; ipix = ai.getAndIncrement()) {
							data[ipix] *= scale; 
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		}
	}

	public static double [][] weightsMultiply(
			final double [][] weights0,
			final double [][] weights1) {
		
		final int num_frames = weights0.length;
		final int num_pixels = weights0[0].length;
		final double [][] weights = new double[num_frames][num_pixels];
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		ai.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int ipix = ai.getAndIncrement(); ipix < num_pixels; ipix = ai.getAndIncrement()){ // if (w_frame[ipix] > min){
						for (int nimg = 0; nimg < num_frames; nimg++) {
							double w = weights0[nimg][ipix]*weights1[nimg][ipix];
							if (w > 0) { // NaN-aware
								weights[nimg][ipix] = w;
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return weights;
	}
	
/*
		for (int nimg = 0; nimg < num_frames; nimg++) {
			final double [] w_frame = weights_in[nimg];
			final double [] w_out = weights[nimg];
 	
 */
	
	public static double [][]   weightsMinMax(
			final double [][] weights_in, // same data convolved with different kernels, may have NaNs
			final double      min,
			final double      max,
			final boolean     invert) { // should be > min
		
		final double scale = 1.0/(max-min);
		final int num_frames = weights_in.length;
		final int num_pixels = weights_in[0].length;
		final double [][] weights = new double[num_frames][num_pixels];
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int nimg = 0; nimg < num_frames; nimg++) {
			final double [] w_frame = weights_in[nimg];
			final double [] w_out = weights[nimg];
			ai.set(0);
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						for (int ipix = ai.getAndIncrement(); ipix < num_pixels; ipix = ai.getAndIncrement()) if (!Double.isNaN(w_frame[ipix])){ // if (w_frame[ipix] > min){
							double w = (Math.min(Math.max(w_frame[ipix] , min), max) - min) *scale;
							w_out[ipix] = invert ? (1.0-w) : w;
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		}
		return weights;
	}

	public static double [][]   weightsShrink(
			final double [][] weights_in, // same data convolved with different kernels, may have NaNs
			final int         width,
			final int         shrink,
			final int         min_frames) {
		
		final int num_frames = weights_in.length;
		final int num_pixels = weights_in[0].length;
		final double [][] weights = new double[num_frames][num_pixels];
		final Thread[] threads = ImageDtt.newThreadArray();
		final boolean [][] mask = new boolean [num_frames][num_pixels];
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				TileNeibs tn = new TileNeibs(width, num_pixels/width);
				public void run() {
					for (int nframe = ai.getAndIncrement(); nframe < num_frames; nframe = ai.getAndIncrement()){ // if (w_frame[ipix] > min){
						System.arraycopy(weights_in[nframe], 0, weights[nframe], 0, num_pixels);
						if (shrink > 0) {
						for (int ipix=0; ipix < num_pixels; ipix++) {
							mask[nframe][ipix] = !(weights_in[nframe][ipix] > 0);
						}
						tn.growSelection(
								shrink, // int              grow,           // grow tile selection by 1 over non-background tiles 1: 4 directions, 2 - 8 directions, 3 - 8 by 1, 4 by 1 more
								mask[nframe], // final boolean [] tiles,
								null); // final boolean [] prohibit)
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		// filter by sequence length
		if (min_frames > 0)  {
			ai.set(0);
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						int [] seq_length = new int [num_frames];					
						for (int ipix = ai.getAndIncrement(); ipix < num_pixels; ipix = ai.getAndIncrement()){
							int prev = 0;
							for (int nframe = 0; nframe <  num_frames; nframe++) {
								if (weights_in[nframe][ipix] > 0) {
									seq_length[nframe] = ++prev;
								} else {
									prev = 0;
									seq_length[nframe] = 0;
								}
							}
							for (int nframe = num_frames-1; nframe >= 0; nframe--) {
								if (seq_length[nframe] > 0) {
									if (seq_length[nframe] < min_frames) {
										for (int i = 0; i < seq_length[nframe]; i++) {
											mask[nframe-i][ipix] = true;
										}
									}
									nframe -= seq_length[nframe]-1; // will be decrease by one more 
								}
							}
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		}

		ai.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int nframe = ai.getAndIncrement(); nframe < num_frames; nframe = ai.getAndIncrement()){ // if (w_frame[ipix] > min){
						for (int ipix=0; ipix < num_pixels; ipix++) {
							if (mask[nframe][ipix]) {
								weights[nframe][ipix] = 0;
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return weights;
	}
	
	
	
	public static double [][] timeToY(
			final double [][] multi_image,
			final int         width,
			final int         num_vert,
			final int         vgap){
		final int height = multi_image[0].length/width;
		final int num_img_in = multi_image.length - 1; //last slice is average
		final int num_slices = (height+num_vert-1)/ num_vert;
		final int out_height = num_img_in*num_vert + vgap*(num_vert-1);
		final double [][] out_data = new double [num_slices][width * out_height];
		for (int i = 0; i < out_data.length; i++) {
			Arrays.fill(out_data[i], Double.NaN);
		}
		for (int nimg = 0; nimg <num_img_in; nimg++) {
			for (int nrow = 0; nrow < height; nrow++) {
				int nout_slice = nrow/num_vert;
				int nsub = nrow%num_vert;
				int offs_out =  ((num_img_in + vgap)* nsub + nimg)*width;
				// copy row
				System.arraycopy(
						multi_image[nimg],
						nrow*width,
						out_data[nout_slice],
						offs_out,
						width);
			}
		}
		return out_data;
	}
	
	public static void showKernel(
			double [][][] kernel,
			String title) {
		final int rT = (kernel.length-1)/2; 
		if (kernel.length!= (2 * rT+1)) {
			throw new IllegalArgumentException("Not all kernel dimensions are odd");
		}
		int height = kernel[0].length;
		int width = kernel[0][0].length;
		String [] titles = new String [kernel.length];
		double [][] data = new double [kernel.length][width*height];
		for (int t = 0; t < kernel.length; t++) {
			titles[t] = "t "+(t-rT);
			for (int row = 0; row < kernel[0].length; row++) {
				System.arraycopy(
						kernel[t][row],
						0,
						data[t],
						width*row,
						width);
			}
		}
		ShowDoubleFloatArrays.showArrays(
				data,
				width,
				height,
				true,
				title,
				titles);
	}

	public static void showKernels(
			double [][][][] kernels,
			String title) {
		final int num_kernels = kernels.length;
		final int num_times = kernels[0].length;
		final int height = kernels[0][0].length;
		final int width = kernels[0][0][0].length;
		final int num_slices = num_kernels*num_times;
		final int rT = (num_times - 1)/2; 
		if (kernels[0].length!= (2 * rT+1)) {
			throw new IllegalArgumentException("Not all kernel dimensions are odd");
		}
		String [] titles = new String [num_slices];
		double [][] data = new double [num_slices][width*height];
		for (int k = 0; k < num_kernels; k++) {
			for (int t = 0; t < num_times; t++) {
				int slice = t+ num_times *  k; 
				titles[slice] = "k "+k+" t "+(t-rT);
				for (int row = 0; row < height; row++) {
					System.arraycopy(
							kernels[k][t][row],
							0,
							data[slice],
							width*row,
							width);
				}
			}
		}
		ShowDoubleFloatArrays.showArraysHyperstack(
				data,      // double[][] pixels,
				width,     // int width,
				height,    // int height,
				num_times, // int slices,
				title,     // String title,
				titles,    // String [] titles,
				true);     // boolean show)
	}
	
	
	
	public static double [][] convolve3d(
			final double [][]   data_in,        // fill NaN before running, will treat NaN as 0 on input, skip if center is NaN
			final int           width,
			final double [][][] kernel,         // (odd)x(odd)x(odd}
			final boolean       repeat_border,
			final boolean       full_convolves_only){
		final int height = data_in[0].length/width;
		final int rT = (kernel.length-1)/2; 
		final int rY = (kernel[0].length-1)/2;
		final int rX = (kernel[0][0].length-1)/2;
		if ((kernel.length!= (2 * rT+1)) || (kernel[0].length!= (2 * rY+1)) || (kernel[0][0].length!= (2 * rX+1))) {
			throw new IllegalArgumentException("Not all kernel dimensions are odd");
		}
		final double [][] data_out = new double [data_in.length][data_in[0].length];
		for (int i = 0; i < data_out.length; i++) {
			Arrays.fill(data_out[i], Double.NaN);
		}
		final int lenXY = data_in[0].length;
		final int lenT = data_in.length;
		final int total_pix = lenXY * lenT;
		final int lenTm1 = lenT-1;
		final int lenYm1 = height-1;
		final int lenXm1 = width-1;
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
        for (int ithread = 0; ithread < threads.length; ithread++) {
            threads[ithread] = new Thread() {
            	public void run() {
            		for (int ipix = ai.getAndIncrement(); ipix < total_pix; ipix = ai.getAndIncrement()) {
            			int iT =  ipix / lenXY;
            			int iXY = ipix % lenXY;
            			if (!Double.isNaN(data_in[iT][iXY])) {
            				int iY =  iXY  / width;
            				int iX =  iXY  % width;
            				int t0 = iT-rT, t1 = iT+rT, y0 = iY-rY, y1 = iY + rY, x0= iX-rX, x1=iX+rX;
            				int kTindx = 2*rT, kYindx = 2*rY, kXindx = 2*rX;
            				boolean limited = (t0 < 0) || (t1 >= lenT) || (y0 < 0) || (y1 >= height) || (x0 < 0) || (x1 >= width);
            				if (limited) {
            					if (!repeat_border) {
            						if (full_convolves_only) {
            							continue;
            						}
            						if (t0 < 0) {
            							kTindx += t0; // negative 
            							t0 = 0;
            						}
            						if (t1 >= lenT)   t1 = lenT-1;
            						if (y0 < 0) {
            							kYindx += y0; // negative 
            							y0 = 0;
            						}
            						if (y1 >= height) y1 = height-1;
            						if (x0 < 0) {
            							kXindx += x0; // negative 
            							x0 = 0;
            						}
            						if (x1 >= width)  x1 = width-1;
            					}
            					no_nan_limited: {
                					int kTi = kTindx;
                					double s = 0;
            						for (int tt = t0; tt <= t1; tt++) {
            							int t = Math.min(lenTm1, Math.max(tt, 0)); // if repeating
            							int kYi = kYindx;
            							for (int yy = y0; yy <= y1; yy++ ) {
            								int y = Math.min(lenYm1, Math.max(yy, 0)); // if repeating
            								int kXi = kXindx;
            								for (int xx = x0; xx <= x1; xx++ ) {
            									int x = Math.min(lenXm1, Math.max(xx, 0)); // if repeating
            									int xy = x + width * y;
            									double d = data_in[t][xy];
            									if (!Double.isNaN(d)) {
            										s += d*kernel[kTi][kYi][kXi];
            									} else if (full_convolves_only) {
            										break no_nan_limited;
            									}
            									kXi--;
            								}
            								kYi--;
            							}
            							kTi--;
            						}
            						data_out[iT][iXY] = s;
            					}
            				} else { // same faster, w/o tests
            					no_nan: {
            						int kTi = kTindx;
            						double s = 0;
            						for (int t = t0; t <= t1; t++) {
            							int kYi = kYindx;
            							for (int y = y0; y <= y1; y++ ) {
            								int kXi = kXindx;
            								for (int x = x0; x <= x1; x++ ) {
            									int xy = x + width * y;
            									double d = data_in[t][xy];
            									if (!Double.isNaN(d)) {
            										s += d*kernel[kTi][kYi][kXi];
            									} else if (full_convolves_only) {
            										break no_nan;
            									}
            									kXi--;
            								}
            								kYi--;
            							}
            							kTi--;
            						}
            						data_out[iT][iXY] = s;
            					}
            				}
            			}
            		}
            	}
            };
        }		      
        ImageDtt.startAndJoin(threads);
		return data_out;
	}

	public static double [][][] kernelBlur3d(
			double rt,
			double rxy) {
		int irt =  (int) Math.floor(rt);
		int irxy = (int) Math.floor(rxy);
		int lt = 2 * irt + 1;
		int lxy = 2 * irxy + 1;
		double [][][] kernel = new double [lt][lxy][lxy];
		for (int it = 0; it < lt; it++) {
			double t = it - irt;
			double wt = Math.cos(0.5*Math.PI* t / rt);
			for (int iy = 0; iy < lxy; iy++) {
				double y = iy- irxy;
				double wty = wt * Math.cos(0.5*Math.PI* y / rxy);
				for (int ix = 0; ix < lxy; ix++) {
					double x = ix- irxy;
					kernel[it][iy][ix] = wty * Math.cos(0.5*Math.PI* x / rxy);
				}
			}
		}
		
		double [][][] norm_kernel = kernelNormalize2(kernel); 
		return norm_kernel;
	}
	
	
	
	public static double [][][] kernelDiffTime(
			double rt,
			double rxy) {
		int irt =  (int) Math.floor(rt);
		int irxy = (int) Math.floor(rxy);
		int lt = 2 * irt + 1;
		int lxy = 2 * irxy + 1;
		double [][][] kernel = new double [lt][lxy][lxy];
		for (int it = 0; it < lt; it++) {
			double t = it - irt;
			double wt = -t * Math.cos(0.5*Math.PI* t / rt);
			for (int iy = 0; iy < lxy; iy++) {
				double y = iy- irxy;
				double wty = wt * Math.cos(0.5*Math.PI* y / rxy);
				for (int ix = 0; ix < lxy; ix++) {
					double x = ix- irxy;
					kernel[it][iy][ix] = wty * Math.cos(0.5*Math.PI* x / rxy);
				}
			}
		}
		
		double [][][] norm_kernel = kernelNormalize2(kernel); 
		return norm_kernel;
	}
	
	/**
	 * Adding more dirs:
	 * 0-3 - tilts (0:+y, 1:+x+y, 2:+x, 3:+x-y),
	 * 4-7 - lines (4:along y, 5: along x+y, 6:along x, 7: along x-y)
	 * 8 - center, 
	 * @param dir
	 * @param rt
	 * @param rxy
	 * @return
	 */
	public static double [][][] kernelXY(
			
			int    dir, // 0: +y, 1: x+y, 2:x, 3 +x-y, 4: 0: +center will be voting 1 of 10 (each plus and minus), weighted by abs(kernelDiffTime)
			double rt,
			double rxy) {
		double rxy2 = rxy*rxy;
		int irt =  (int) Math.floor(rt);
		int irxy = (int) Math.floor(rxy);
		int lt = 2 * irt + 1;
		int lxy = 2 * irxy + 1;
		double sqrt05 = Math.sqrt(0.5);
		double [][][] kernel = new double [lt][lxy][lxy];
		for (int it = 0; it < lt; it++) {
			double sum_cos = 0;
			double sum_patt = 0;
			double [][] cos_patt = new double [lxy][lxy];
			double t = it - irt;
			double wt = Math.cos(0.5*Math.PI* t / rt);
			for (int iy = 0; iy < lxy; iy++) {
				double y = iy- irxy;
//				double wty = wt * Math.cos(0.5*Math.PI* y / rxy);
				for (int ix = 0; ix < lxy; ix++) {
					double x = ix- irxy;
					double r2=(x*x+y*y)/rxy2;
					double w = 0;
					if (r2 < 1) {
						double r = Math.sqrt(r2);
						w = wt * Math.cos(0.5*Math.PI * r);
						cos_patt[iy][ix] = w;
						sum_cos += w;
						switch (dir) {
						case 0: w *= y; break;
						case 1: w *= y - x; break;
						case 2: w *= -x; break;
						case 3: w *= -y - x; break;
						case 4: w *= Math.cos(Math.PI * x / rxy); break;
						case 5: w *= Math.cos(Math.PI * sqrt05 * (x - y) / rxy); break;
						case 6: w *= Math.cos(Math.PI * y / rxy); break;
						case 7: w *= Math.cos(Math.PI * sqrt05 * (x + y) / rxy); break;
						case 8: w *= Math.cos(Math.PI * r); break; // center
						}
						sum_patt+=w;
					}
					kernel[it][iy][ix] = w;
				}
			}
			double cos_corr = sum_patt/sum_cos; // balance average=0
			for (int iy = 0; iy < lxy; iy++) {
				for (int ix = 0; ix < lxy; ix++) {
					kernel[it][iy][ix] -= cos_corr*cos_patt[iy][ix];
				}
			}
			
		}
		return kernelNormalize2(kernel);
	}

	
	
	
	
	public static double [][][] kernelNormalize2(double [][][] kernel){
		double [][][] kernel_out = new double [kernel.length][kernel[0].length][kernel[0][0].length];
		double s = 0;
		for (int t = 0; t < kernel.length; t++) {
			for (int y = 0; y < kernel[t].length; y++) {
				for (int x = 0; x < kernel[t][y].length; x++) {
					double d = kernel[t][y][x];
					s+=d * d;
				}
			}
		}
		if (s == 0 ) {
			throw new IllegalArgumentException("Kernel of all zeros");
		}
		double k = 1.0/Math.sqrt(s);
		for (int t = 0; t < kernel.length; t++) {
			for (int y = 0; y < kernel[t].length; y++) {
				for (int x = 0; x < kernel[t][y].length; x++) {
					kernel_out[t][y][x] = kernel[t][y][x] * k;
				}
			}
		}
		return kernel_out;
	}
	
	public static double [][] applyWeights(
			final double [][] data_in,
			final double [][] weights) {
		final int num_frames = data_in.length;
		final int num_pixels = data_in[0].length;
		if ((weights.length != num_frames) || (weights[0].length != num_pixels)) {
			throw new IllegalArgumentException("Weights do not match data");
		}
		
		
		final double [] data_out =    new double [num_pixels];
		final double [] sum_weights = new double [num_pixels];
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int nimg = 0; nimg < weights.length; nimg++) {
			final int fimg = nimg;
	        ai.set(0);
	        for (int ithread = 0; ithread < threads.length; ithread++) {
	            threads[ithread] = new Thread() {
	            	public void run() {
	            		for (int ipix = ai.getAndIncrement(); ipix < data_in[fimg].length; ipix = ai.getAndIncrement()) {
	            			double w = weights[fimg][ipix];
	            			if (w > 0 ) {
	            				double d = data_in[fimg][ipix];
	            				if (!Double.isNaN(d)) {
	            					sum_weights[ipix] += w;
	            					data_out[ipix] += w * d;
	            				}
	            				
	            			}
	            		}
	            	}
	            };
	        }		      
	        ImageDtt.startAndJoin(threads);
		}
        ai.set(0);
        for (int ithread = 0; ithread < threads.length; ithread++) {
            threads[ithread] = new Thread() {
            	public void run() {
            		for (int ipix = ai.getAndIncrement(); ipix < data_out.length; ipix = ai.getAndIncrement()) {
            			if (sum_weights[ipix] > 0) {
            				data_out[ipix] /= sum_weights[ipix]; 
            			} else {
            				data_out[ipix] = Double.NaN;
            			}
            		}
            	}
            };
        }		      
        ImageDtt.startAndJoin(threads);
		return new double [][] { data_out, sum_weights};
	}
	
	
	
////	
	public static void testOrange0() {
//		double shrink= 8.0; // multiply by oscale * 2
//		double fade_sigma = 4.0;
		boolean zero_is_nan=true;
		int master = 3;
		final double [] voffsets = value_offsets;
		int [][] segments = match_segments; // full_segments; // single_segments;
		double    oscale = 2.0;
//		Rectangle woi = new Rectangle(0,0,640,512); // full window;
//		Rectangle woi = new Rectangle(92,223,320,240); // approx. overlap
		Rectangle woi = new Rectangle(92,280,320,120); // minimal overlap
		final Rectangle owoi = new Rectangle (
				(int) Math.round(oscale *woi.x ),
				(int) Math.round(oscale *woi.y ),
				(int) Math.round(oscale *woi.width),
				(int) Math.round(oscale *woi.height));
		int [][] seg_offs_len = new int [segments.length][2];
		int offs = 0;
		for (int nimg = 0; nimg < seg_offs_len.length; nimg++) {
			seg_offs_len[nimg][0] = offs;
			int len = segments[nimg][1] - segments[nimg][0]; // length;
			seg_offs_len[nimg][1] = len;
			offs += len;
		}
		final int indx_avg = offs;
		final int olen = owoi.width * owoi.height;
		final int owidth =  owoi.width;
		final double [][] out_data = new double [offs+1][owoi.width * owoi.height];
		final int []   num_valid = new int [owoi.width * owoi.height];
		String [] titles = new String[out_data.length];
		titles [indx_avg] = "average";
		for (int i = 0; i < offs; i++) {
			Arrays.fill(out_data[i], Double.NaN);
		}

		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
//		final AtomicInteger anans = new AtomicInteger(0);
//		final boolean [] nan_pixels = new boolean [olen];
//		final double [] weights = new double [olen];
//		final double [] sum_weights = new double [olen];
//		final TileNeibs tn = new TileNeibs (owoi.width, owoi.height);
//		final int grow_nans = (int) Math.round(shrink * oscale * 2);
		
		
		
		for (int nimg = 0; nimg < seg_offs_len.length; nimg++) {
//			Arrays.fill(nan_pixels, false);
			final double voffs = +voffsets[nimg];
			final int foffs = seg_offs_len[nimg][0];
			ImagePlus imp_src = new ImagePlus(src_files[nimg]);
			ImageStack stack = imp_src.getImageStack();
	        final int src_width = imp_src.getWidth();
	        final int src_height = imp_src.getHeight();
			final int num_slices = stack.getSize(); // .length;
			final String [] stack_labels = new String[num_slices];
			System.arraycopy(
					stack.getSliceLabels(),
					0,
					stack_labels,
					0,
					num_slices);
			final double [][] dpixels = new double [seg_offs_len[nimg][1]][];
			for (int i = 0; i < seg_offs_len[nimg][1]; i++) {
				titles[seg_offs_len[nimg][0] + i] =nimg+":"+stack_labels[segments[nimg][0]+i];
				float [] fpixels = (float[]) stack.getPixels(segments[nimg][0]+i+1);
				dpixels[i] = new double [fpixels.length];
				for (int j = 0; j < dpixels[i].length; j++) {
					dpixels[i][j] = fpixels[j];
					if (zero_is_nan && (dpixels[i][j] == 0.0)) {
						dpixels[i][j] = Double.NaN;
					}
				}
			}
			final double [][] points0 = new double[matching_points[master].length][];
			final double [][] points1 = new double[points0.length][]; // matching_points[nimg];
			for (int i = 0; i < points0.length;i++) {
				points0[i] = new double [] {matching_points[master][i][0]/points_scale, matching_points[master][i][1]/points_scale};
				points1[i] = new double [] {matching_points[nimg  ][i][0]/points_scale, matching_points[nimg  ][i][1]/points_scale};
			}
			
			//points_scale
			
			double [][] ab1 = ComboMatch.getAffineMatrix(
					points0, //  double [][] x,
					points1); // double [][] y){
			double k = 1.0/oscale;
	        final double [][] a1 = {{k*ab1[0][0],k*ab1[0][1]},{k*ab1[1][0],k*ab1[1][1]}};
	        final double []   b1 = {
	        		ab1[2][0] + a1[0][0]*owoi.x + a1[0][1]*owoi.y,
	        		ab1[2][1] + a1[1][0]*owoi.x + a1[1][1]*owoi.y}; // from output coordinates to image2 coordinates
	        ai.set(0);
//	        anans.set(0);
	        for (int ithread = 0; ithread < threads.length; ithread++) {
	            threads[ithread] = new Thread() {
	                public void run() {
	                    for (int ipix = ai.getAndIncrement(); ipix < olen; ipix = ai.getAndIncrement()) {
	                    	int y = ipix / owidth; // output image coordinates
	                    	int x = ipix % owidth; // output image coordinates
	                    	double fx = a1[0][0] * x + a1[0][1] * y + b1[0];
	                    	double fy = a1[1][0] * x + a1[1][1] * y + b1[1];
	                    	int ix0 = (int) Math.floor(fx);
	                    	int iy0 = (int) Math.floor(fy);
	                    	int ix1 = ix0 + 1;
	                    	int iy1 = iy0 + 1;
	                    	if ((ix1 >=0) && (iy1 >= 0) && (ix0 < src_width)  && (iy0 < src_height)) {
	                    		double dx = fx - ix0;
	                    		double dy = fy - iy0;
	                    		if (ix0 < 0) ix0 = 0; 
	                    		if (iy0 < 0) iy0 = 0;
	                    		if (ix1 >= src_width)  ix1 = src_width - 1;
	                    		if (iy1 >= src_height) iy1 = src_height - 1;
                    			int indx00 = ix0 + iy0 * src_width;
                    			int indx01 = ix1 + iy0 * src_width;
                    			int indx10 = ix0 + iy1 * src_width;
                    			int indx11 = ix1 + iy1 * src_width;
	                    		for (int islice = 0; islice < dpixels.length; islice++) {
	                    			int oslice = islice + foffs;
	                    			double d00 = dpixels[islice][indx00];
	                    			double d01 = dpixels[islice][indx01];
	                    			double d10 = dpixels[islice][indx10];
	                    			double d11 = dpixels[islice][indx11];
	                    			double d = d00*(1-dx)*(1-dy)+d01*dx*(1-dy)+d10*(1-dx)*dy+d11*dx*dy + voffs;
	                    			out_data[oslice][ipix]= d;
	                    			if (!Double.isNaN(d)) {
	                    				out_data[indx_avg][ipix] += d;
	                    				num_valid[ipix]++;
	                    			} else {
//	                    				nan_pixels[ipix] = true;
//	                    				anans.getAndIncrement();
	                    			}
	                    		}
	                    	}
	                    }
	                }
	            };
	        }		      
	        ImageDtt.startAndJoin(threads);
	        /*
	        if (anans.get() == 0) {
	        	tn.growSelection(
	        			grow_nans,  // int              grow,           // grow tile selection by 1 over non-background tiles 1: 4 directions, 2 - 8 directions, 3 - 8 by 1, 4 by 1 more
	        			nan_pixels, // final boolean [] tiles,
	        			null);      // final boolean [] prohibit)
	        	for (int i = 0; i < nan_pixels.length; i++) {
	        		weights[i] = nan_pixels[i]? 0.0 : 1.0;
	        	}
	    		(new DoubleGaussianBlur()).blurDouble(
	    				weights,       // double[] pixels,
	    				tn.getSizeX(), // int width,
	    				tn.getSizeY(), // int height,
	    				fade_sigma,    // double sigmaX,
	    				fade_sigma,    // double sigmaY,
	    				0.01);         // double accuracy);
	        } else {
	        	Arrays.fill(weights, 1.0);
	        }
	        ai.set(0);
	        for (int ithread = 0; ithread < threads.length; ithread++) {
	            threads[ithread] = new Thread() {
	                public void run() {
	                    for (int ipix = ai.getAndIncrement(); ipix < olen; ipix = ai.getAndIncrement()) {
	                    	
	                    	
	                    }
	                }
	            };
	        }		      
	        ImageDtt.startAndJoin(threads);
	        */
		}
		
		for (int ipix=0; ipix < olen; ipix++) {
			if (num_valid[ipix] == 0) {
				out_data[indx_avg][ipix] = Double.NaN;
			} else {
				out_data[indx_avg][ipix]/=num_valid[ipix];
			}
		}
		ShowDoubleFloatArrays.showArrays(
				out_data,
				owoi.width,
				owoi.height,
				true,
				"merged_seq_scale"+oscale+"-slices"+indx_avg+"_x"+woi.x+"_y"+woi.y+"_w"+woi.width+"_h"+woi.height+(zero_is_nan?"_zeronan":""),
				titles);
		return;
	}
	
	public static double [][] getFadingWeights(
			final double [][] data, // input data from one file, with NaNs
			final int         width,
			final int         shrink, // 2 for 8 directions
			final double      fade_sigma, // gaussian blur
			final boolean     nan_only){
		final int hshrink =   shrink/2;
		final int num_scenes = data.length; 
		final int num_pixels = data[0].length;
		final int height = num_pixels/width;
		final double [][] weights = new double [num_scenes][num_pixels];
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
        for (int ithread = 0; ithread < threads.length; ithread++) {
            threads[ithread] = new Thread() {
                public void run() {
                	boolean [] nan_pixels = new boolean [num_pixels];
            		final TileNeibs tn = new TileNeibs (width, height);
                    for (int nscene = ai.getAndIncrement(); nscene < num_scenes; nscene = ai.getAndIncrement()) {
                    	int num_nans = 0;
                    	Arrays.fill(nan_pixels, false);
                    	Arrays.fill(weights[nscene], 1.0);
                    	for (int ipix = 0; ipix < num_pixels; ipix++) if (Double.isNaN(data[nscene][ipix])){
                    		nan_pixels[ipix] = true; // invert
                    		num_nans++;
                    	}
                    	if (num_nans > 0) {
                    		// grow here, before borders
                    		tn.growSelection(
                    				shrink,     // int              grow,           // grow tile selection by 1 over non-background tiles 1: 4 directions, 2 - 8 directions, 3 - 8 by 1, 4 by 1 more
                    				nan_pixels, // final boolean [] tiles,
                    				null);      // final boolean [] prohibit)
                    		for (int i = 0; i < nan_pixels.length; i++) if (nan_pixels[i]){
                    			weights[nscene][i] = 0.0;
                    		}
                    	}
                    	if (!nan_only) {
                    		for (int n = 0; n < hshrink; n++) {
                    			int indx0 = n * width;
                    			int indx1 = (height-n-1) * width;
                    			int n1 = width-n-1;
                    			for (int i = 0; i < width; i++) {
                    				weights[nscene][indx0 + i] = 0;
                    				weights[nscene][indx1 + i] = 0;
                    			}
                    			for (int i = hshrink; i < height-hshrink; i++) {
                    				weights[nscene][i*width + n] =  0;
                    				weights[nscene][i*width + n1] = 0;
                    			}
                    		}
                    	}
        	    		(new DoubleGaussianBlur()).blurDouble(
        	    				weights[nscene],       // double[] pixels,
        	    				tn.getSizeX(), // int width,
        	    				tn.getSizeY(), // int height,
        	    				fade_sigma,    // double sigmaX,
        	    				fade_sigma,    // double sigmaY,
        	    				0.01);         // double accuracy);
                    }
                }
            };
        }		      
        ImageDtt.startAndJoin(threads);
		return weights;
	}
	
	
	
	
	public static double [][] warpImageSequenceBilinear(
			final double [][] data_in,
			final double [][] weights, // may be null?
			final int         width,
			final double [][] ab1, // {{a00, a01},{a10,a11},{b0,b1}}
			final Rectangle   owoi,
			final double      offs, // add to data out
			final double      oscale) {
		final int num_scenes =  data_in.length; 
		final int num_pixels =  data_in[0].length;
		final int height =      num_pixels/width;
		final int owidth =      owoi.width;
		final int oheight =     owoi.height;
		final int num_opixels = owidth*oheight;
		final double [][] data_out = new double [num_scenes+2][num_opixels]; // {..., average, sum_weight}
		final int indx_average =    num_scenes;
		final int indx_sumweights = num_scenes+1;
		for (int nscene = 0; nscene <= indx_average; nscene++) {
			Arrays.fill(data_out[nscene], Double.NaN); // [indx_sumweights] = 0 
		}
		
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		double k = 1.0/oscale;
        final double [][] a1 = {{k*ab1[0][0],k*ab1[0][1]},{k*ab1[1][0],k*ab1[1][1]}};
        final double []   b1 = {
        		ab1[2][0] + a1[0][0]*owoi.x + a1[0][1]*owoi.y,
        		ab1[2][1] + a1[1][0]*owoi.x + a1[1][1]*owoi.y}; // from output coordinates to image2 coordinates
        for (int ithread = 0; ithread < threads.length; ithread++) {
        	threads[ithread] = new Thread() {
        		public void run() {
        			for (int ipix = ai.getAndIncrement(); ipix < num_opixels; ipix = ai.getAndIncrement()) {
        				int y = ipix / owidth; // output image coordinates
        				int x = ipix % owidth; // output image coordinates
        				double fx = a1[0][0] * x + a1[0][1] * y + b1[0];
        				double fy = a1[1][0] * x + a1[1][1] * y + b1[1];
        				int ix0 = (int) Math.floor(fx);
        				int iy0 = (int) Math.floor(fy);
        				int ix1 = ix0 + 1;
        				int iy1 = iy0 + 1;
        				if ((ix1 >=0) && (iy1 >= 0) && (ix0 < width)  && (iy0 < height)) {
        					double dx = fx - ix0;
        					double dy = fy - iy0;
        					if (ix0 < 0) ix0 = 0; 
        					if (iy0 < 0) iy0 = 0;
        					if (ix1 >= width)  ix1 = width - 1;
        					if (iy1 >= height) iy1 = height - 1;
        					int indx00 = ix0 + iy0 * width;
        					int indx01 = ix1 + iy0 * width;
        					int indx10 = ix0 + iy1 * width;
        					int indx11 = ix1 + iy1 * width;
        					for (int nscene = 0; nscene < num_scenes; nscene++) {
        						double d00 = data_in[nscene][indx00];
        						double d01 = data_in[nscene][indx01];
        						double d10 = data_in[nscene][indx10];
        						double d11 = data_in[nscene][indx11];
        						double w00 = weights[nscene][indx00];
        						double w01 = weights[nscene][indx01];
        						double w10 = weights[nscene][indx10];
        						double w11 = weights[nscene][indx11];
        						double d = d00*(1-dx)*(1-dy)+d01*dx*(1-dy)+d10*(1-dx)*dy+d11*dx*dy + offs;
        						double w = w00*(1-dx)*(1-dy)+w01*dx*(1-dy)+w10*(1-dx)*dy+w11*dx*dy;
        						data_out[nscene][ipix]= d;
        						if (!Double.isNaN(d) && (w > 0)) {
        							double d_old = data_out[indx_average][ipix];
        							if (Double.isNaN(d_old)) {
        								data_out[indx_average][ipix] =    d;
        								data_out[indx_sumweights][ipix] = w;
        							} else {
            							double w_old = data_out[indx_sumweights][ipix];
            							double w_new = w_old + w;
            							data_out[indx_average][ipix] = (d_old*w_old + d * w)/w_new;
        								data_out[indx_sumweights][ipix] = w_new;
        							}
        						}
        					}
        				}
        			}
        		}
        	};
        }		      
        ImageDtt.startAndJoin(threads);
		return data_out;
	}
	
	public static void combineAverages(
			final double [] new_data,
			final double [] new_weight,
			final double [] avg_data,
			final double [] sum_weight) {
		final int num_pixels = new_data.length;
		if ((new_weight.length != num_pixels) || (avg_data.length != num_pixels) || (sum_weight.length != num_pixels)) {
			throw new IllegalArgumentException("Not all data arrays are the same length");
		}
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
        for (int ithread = 0; ithread < threads.length; ithread++) {
            threads[ithread] = new Thread() {
                public void run() {
                    for (int ipix = ai.getAndIncrement(); ipix < num_pixels; ipix = ai.getAndIncrement()) {
                    	double d = new_data[ipix];
                    	double w = new_weight[ipix];
                    	if (!Double.isNaN(d) && (w > 0)) {
                    		double d_old = avg_data[ipix];
							if (Double.isNaN(d_old)) {
								avg_data[ipix] =   d;
								sum_weight[ipix] = w;
							} else {
    							double w_old = sum_weight[ipix];
    							double w_new = w_old + w;
    							avg_data[ipix] = (d_old*w_old + d * w)/w_new;
    							sum_weight[ipix] = w_new;
							}
                    		
                    	}
                    }
                }
            };
        }		      
        ImageDtt.startAndJoin(threads);

	}
	
	
	public static void testOrange() {
		int    shrink_src2=  16; // shrink source by 8 pixels  
		double fade_sigma = 4.0;
		final boolean     nan_only = false; // shrink from borders also
		boolean show_debug = true;
		boolean zero_is_nan=true;
		int master = 3;
		final double [] voffsets = value_offsets;
		int [][] segments = match_segments; // full_segments; // single_segments;
		double    oscale = 2.0;
//		Rectangle woi = new Rectangle(0,0,640,512); // full window;
//		Rectangle woi = new Rectangle(92,223,320,240); // approx. overlap
		Rectangle woi = new Rectangle(92,280,320,120); // minimal overlap
		final Rectangle owoi = new Rectangle (
				(int) Math.round(oscale *woi.x ),
				(int) Math.round(oscale *woi.y ),
				(int) Math.round(oscale *woi.width),
				(int) Math.round(oscale *woi.height));
		int [][] seg_offs_len = new int [segments.length][2];
		int offs = 0;
		for (int nimg = 0; nimg < seg_offs_len.length; nimg++) {
			seg_offs_len[nimg][0] = offs;
			int len = segments[nimg][1] - segments[nimg][0]; // length;
			seg_offs_len[nimg][1] = len;
			offs += len;
		}
		final int num_scenes = offs;
		final int olen = owoi.width * owoi.height;
//		final int owidth =  owoi.width;
		final double [][] out_data = new double [num_scenes+2][]; // [owoi.width * owoi.height];
		final int indx_average =    num_scenes;
		final int indx_sumweights = num_scenes+1;
		out_data[indx_average] =    new double [olen];
		Arrays.fill(out_data[indx_average], Double.NaN); // [indx_sumweights] = 0 

		out_data[indx_sumweights] = new double [olen];
		
		
		String [] titles = new String[out_data.length];
		titles [indx_average] = "average";
		titles [indx_sumweights] = "weight";
		
		
		for (int nimg = 0; nimg < seg_offs_len.length; nimg++) {
			ImagePlus imp_src = new ImagePlus(src_files[nimg]);
			ImageStack stack = imp_src.getImageStack();
	        final int src_width = imp_src.getWidth();
			final int num_slices = stack.getSize(); // .length;
			final String [] stack_labels = new String[num_slices];
			System.arraycopy(
					stack.getSliceLabels(),
					0,
					stack_labels,
					0,
					num_slices);
			final double [][] dpixels = new double [seg_offs_len[nimg][1]][];
			for (int i = 0; i < seg_offs_len[nimg][1]; i++) {
				titles[seg_offs_len[nimg][0] + i] =nimg+":"+stack_labels[segments[nimg][0]+i];
				float [] fpixels = (float[]) stack.getPixels(segments[nimg][0]+i+1);
				dpixels[i] = new double [fpixels.length];
				for (int j = 0; j < dpixels[i].length; j++) {
					dpixels[i][j] = fpixels[j];
					if (zero_is_nan && (dpixels[i][j] == 0.0)) {
						dpixels[i][j] = Double.NaN;
					}
				}
			}
			final double [][] points0 = new double[matching_points[master].length][];
			final double [][] points1 = new double[points0.length][]; // matching_points[nimg];
			for (int i = 0; i < points0.length;i++) {
				points0[i] = new double [] {matching_points[master][i][0]/points_scale, matching_points[master][i][1]/points_scale};
				points1[i] = new double [] {matching_points[nimg  ][i][0]/points_scale, matching_points[nimg  ][i][1]/points_scale};
			}
			double [][] ab1 = ComboMatch.getAffineMatrix(
					points0, //  double [][] x,
					points1); // double [][] y){
			
			final double [][] weights_in =  getFadingWeights(
					dpixels,        // final double [][] data, // input data from one file, with NaNs
					src_width,      // final int         width,
					shrink_src2,    // final int         shrink, // 2 for 8 directions
					fade_sigma,     // final double      fade_sigma, // gaussian blur
					nan_only);      // final boolean     nan_only)
			final double [][] data_warped =   warpImageSequenceBilinear(
					dpixels,        // final double [][] data_in,
					weights_in,     // final double [][] weights, // may be null?
					src_width,      // final int         width,
					ab1,            // final double [][] ab1, // {{a00, a01},{a10,a11},{b0,b1}}
					owoi,           // final Rectangle   owoi,
					voffsets[nimg], // final double      offs, // add to data out
					oscale);        // final double      oscale)
			if (show_debug) {
				String [] dbg_titles = new String[data_warped.length];
				for (int i = 0; i < seg_offs_len[nimg][1]; i++) {
					dbg_titles[ i] =i+":"+stack_labels[segments[nimg][0]+i];
				}
				dbg_titles[dbg_titles.length-2]="average";
				dbg_titles[dbg_titles.length-1]="weight";
				ShowDoubleFloatArrays.showArrays(
						data_warped,
						owoi.width,
						owoi.height,
						true,
						"merged_seq_frag_"+imp_src.getTitle()+"-scale"+oscale+"-slices"+data_warped.length+"_x"+woi.x+"_y"+woi.y+"_w"+woi.width+"_h"+woi.height+(zero_is_nan?"_zeronan":""),
						titles);
			}
			
			
			// copy individual images
			System.arraycopy(
					data_warped,
					0,
					out_data,
					seg_offs_len[nimg][0],   // scene offset
					seg_offs_len[nimg][1]); // number of scenes in a file
			// combine averages and weights
			combineAverages(
					data_warped[data_warped.length-2], // final double [] new_data,
					data_warped[data_warped.length-1], // final double [] new_weight,
					out_data[indx_average],         // final double [] avg_data,
					out_data[indx_sumweights]);     // final double [] sum_weight)
		}
		ShowDoubleFloatArrays.showArrays(
				out_data,
				owoi.width,
				owoi.height,
				true,
				"merged_seq_scale"+oscale+"-slices"+indx_average+"_x"+woi.x+"_y"+woi.y+"_w"+woi.width+"_h"+woi.height+(zero_is_nan?"_zeronan":""),
				titles);
		return;
	}
	
	
	
	
	

}