/**
 ** Render3D - render 3D view to the specified plane
 **
 ** Copyright (C) 2023 Elphel, Inc.
 **
 ** -----------------------------------------------------------------------------**
 **
 **  Render3D.java is free software: you can redistribute it and/or modify
 **  it under the terms of the GNU General Public License as published by
 **  the Free Software Foundation, either version 3 of the License, or
 **  (at your option) any later version.
 **
 **  This program is distributed in the hope that it will be useful,
 **  but WITHOUT ANY WARRANTY; without even the implied warranty of
 **  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 **  GNU General Public License for more details.
 **
 **  You should have received a copy of the GNU General Public License
 **  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ** -----------------------------------------------------------------------------**
 **
 */
package com.elphel.imagej.x3d.export;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.math3.analysis.interpolation.PiecewiseBicubicSplineInterpolatingFunction;
import org.apache.commons.math3.geometry.euclidean.threed.Vector3D;
import org.apache.commons.math3.util.FastMath;

import com.elphel.imagej.tileprocessor.ErsCorrection;
import com.elphel.imagej.tileprocessor.ImageDtt;
import com.elphel.imagej.tileprocessor.QuadCLT;

public class Render3D {
	static final int  THREADS_MAX = 100;  // maximal number of threads to launch
	static double []  ZERO3 = new double[3];
	final double [][] toground;
	final double [][] tocam;
	final QuadCLT     ref_scene;
	final double      pixel_per_m;  // pixels per meter
	final int         out_width;    // output rendered image width in pixels
	final int         out_height;   // output rendered image height in pixels
//	final String      x3d_dir;
//	final String      model_name;
	final double  []  x0_y0; 
	final Vector3D    ground_origin;
	final Vector3D    ground_x;
	final Vector3D    ground_y;
	final Vector3D    ground_normal;
	final Vector3D    ground_normal_unit;
	final double      above_ground;
	final double []   xy_offs;

	
	public Render3D (
			QuadCLT      ref_scene,    // all coordinates relative to this scene
			double [][]  toground, // projection plane center relative to reference scene
			double       pixel_size,   // in meters
			double []    x0_y0,        // usually negative - top-left point of the output render
			int          out_width,    // output rendered image width in pixels
			int          out_height){   // output rendered image height in pixels
		this.ref_scene =  ref_scene;
		this.pixel_per_m = 1.0 / pixel_size; 
		this.out_width =  out_width; 
		this.out_height = out_height; 
		this.toground =   toground;
		this.x0_y0 =      x0_y0; // used in parallel projection BOTTOM-left
		this.tocam = ErsCorrection.invertXYZATR(this.toground);  // null
		// ground plane x0, y0 in camera coordinates
		ground_origin = new Vector3D(ErsCorrection.applyXYZATR(tocam, new double [] {x0_y0[0],      x0_y0[1],     0.0}));
		Vector3D v3x1 = new Vector3D(ErsCorrection.applyXYZATR(tocam, new double [] {x0_y0[0] + 1.0,x0_y0[1],     0.0}));
		Vector3D v3y1 = new Vector3D(ErsCorrection.applyXYZATR(tocam, new double [] {x0_y0[0],      x0_y0[1]+1.0, 0.0}));
		ground_x = v3x1.subtract(this.ground_origin).normalize(); // unity in plane X direction
		ground_y = v3y1.subtract(this.ground_origin).normalize(); // unity in plane Y direction
		ground_normal = ground_origin.subtract(
				ground_x.scalarMultiply(ground_origin.dotProduct(ground_x))).subtract(
						ground_y.scalarMultiply(ground_origin.dotProduct(ground_y)));
		Vector3D gnu; 
		try {
			gnu = ground_normal.normalize(); // unitary vector normal and away from the ground plane
		} catch (Exception e) {
			System.out.println("Zero above ground");
			gnu = ground_x.crossProduct(ground_y).normalize(); // sets to (0, 0, 1)
		}
		ground_normal_unit = gnu; // only used for center projection where ground_origin != 0;
		above_ground = ground_normal.getNorm();
		xy_offs = new double[] {ground_x.dotProduct(ground_origin), ground_y.dotProduct(ground_origin)};
	}
	
	public double [] projectToPlaneLinear(Vector3D v3) { // get ground plane pixel coordinate from camera x,y,z - not used
		double   z =        ground_normal_unit.dotProduct(v3);
		Vector3D in_plane = v3.scalarMultiply(above_ground/z);
		double [] xy = new double[3];
		xy[0] = ground_x.dotProduct(in_plane) - xy_offs[0]; 
		xy[1] = ground_y.dotProduct(in_plane) - xy_offs[1];
		xy[2] = z; 
		return xy;
	}
	
	public double [] projectToPlanePixels(Vector3D v3) { // get ground plane pixel coordinate from camera x,y,z used in center proj
		double   z =        ground_normal_unit.dotProduct(v3);
		Vector3D in_plane = v3.scalarMultiply(above_ground/z);
		double [] xy = new double[3];
		xy[0] = pixel_per_m * (ground_x.dotProduct(in_plane) - xy_offs[0]); 
		xy[1] = pixel_per_m * (ground_y.dotProduct(in_plane) - xy_offs[1]); 
		xy[2] = pixel_per_m * z; // positive distance from the camera in pixels (same linear scale)
		return xy;
	}
	public static double cross2 (double [] v1, double [] v2) {
		return v1[0]*v2[1]-v1[1]*v2[0];
	}
	public static boolean cross2ccw (double [] v1, double [] v2) {
		return (v1[0]*v2[1]-v1[1]*v2[0]) > 0;
	}

	public static double dot2 (double [] v1, double [] v2) {
		return v1[0]*v2[0]+v1[1]*v2[1];
	}
	public static double [] normalize2 (double [] v) {
		double l = FastMath.sqrt (v[0] * v[0] + v[1]*v[1]);
		return new double[] {v[0]/l,v[1]/l};
	}
	
	public static double [] orthonormSingle2(double [] v1, double [] v2) {
		double p11 = dot2(v1, v1);
		double p12 = dot2(v1, v2);
		double p22 = dot2(v2, v2);
		double k = p12/p11;
		double a =  p11/(p22*p11 - p12 * p12);
		return new double[] {a * (v2[0] - v1[0]*k),a * (v2[1] - v1[1]*k)}; 
	}
	
	/**
	 *  p = v1 * (orthonorm2(v1,v2)[0]).dot(p)) + v2 * (orthonorm2(v1,v2)[1]).dot(p))
	 * @param v1
	 * @param v2
	 * @return
	 */
	public static double [][] orthonorm2(double [] v1, double [] v2) {
		return new double [][] {orthonormSingle2(v2,v1), orthonormSingle2(v1,v2)};
	}
	
	public static double [][] cropRectified(
			boolean crop_empty,
//			boolean last_is_alpha,
			int     indx_alpha,
			int     crop_extra,
			int     width,
			int []  ltwh, // should be initialized to int [4] (left,top,width, height)
			double [][] img_src){
		int height = img_src[0].length/width;
//		int indx_alpha = img_src.length - 1;
		int marg_top=0,marg_left=0,marg_bottom=0,marg_right=0;
		if (crop_empty) {
			int [][] xy_min_max= null; // new int[2][2];
			for (int iy = 0; iy<height; iy++) {
				for (int ix=0; ix<width; ix++) {
					int indx=iy * width +ix;
//					if (last_is_alpha? (img_src[indx_alpha][indx] > 0.0) : !Double.isNaN(img_src[0][indx])) {
					if (!Double.isNaN(img_src[0][indx])) {
						if ((indx_alpha >= 0) && (img_src[indx_alpha][indx] <= 0.0)) {
							continue;
						}
						if (xy_min_max == null) {
							xy_min_max=new int [][] {{ix,ix},{iy,iy}};
						} else {
							if      (ix < xy_min_max[0][0]) xy_min_max[0][0] = ix;
							else if (ix > xy_min_max[0][1]) xy_min_max[0][1] = ix;
							if      (iy < xy_min_max[1][0]) xy_min_max[1][0] = iy;
							else if (iy > xy_min_max[1][1]) xy_min_max[1][1] = iy;
						}
					}
				}
			}
			if (xy_min_max == null) {
				return null;
			}
			marg_top =  xy_min_max[1][0];
			marg_left = xy_min_max[0][0];
			marg_bottom = height - 1 - xy_min_max[1][1]; 
			marg_right =  width -  1 - xy_min_max[0][1];
		}
		marg_top +=     crop_extra;
		marg_left +=    crop_extra;
		marg_bottom +=  crop_extra; 
		marg_right +=   crop_extra;
		int out_width =  width - marg_left - marg_right;
		int out_height = height - marg_top - marg_bottom;
		ltwh[0] = marg_left;
		ltwh[1] = marg_top;
		ltwh[2] = out_width;
		ltwh[3] = out_height;
		double [][] img_cropped = new double [img_src.length][out_width*out_height];
		for (int row = 0; row < out_height; row++) {
			for (int chn = 0; chn < img_cropped.length; chn++) {
				System.arraycopy(
						img_src[chn],
						(row + marg_top) * width + marg_left,
						img_cropped[chn],
						row * out_width ,
						out_width);
			}
		}
		return img_cropped;
	}
	
	public static double [][] getBounds(
			final ArrayList<TriMesh> tri_meshes,
			final double [][]        xyzatr_toground,
			int                      debugLevel){  // debug level
		double [][] bounds = new double[3][];
		final Thread[] threads = ImageDtt.newThreadArray(THREADS_MAX);
		final AtomicInteger ai = new AtomicInteger(0);
		final AtomicInteger ati = new AtomicInteger(0);
		final double [][][] minmaxes_xyz = new double [threads.length][3][];
		for (TriMesh mesh:tri_meshes) {
			final double [][] coords = mesh.getCoordinates();
			ai.set(0);
			ati.set(0);
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						int ti = ati.getAndIncrement();
						double [][] minmax_xyz = minmaxes_xyz[ti]; // this thread rendered results
						for (int indx = ai.getAndIncrement(); indx < coords.length; indx = ai.getAndIncrement()) {
							double [] xyz = ErsCorrection.applyXYZATR(xyzatr_toground, coords[indx]);
							// maybe no need to check for Double.isNaN()?
							if ((xyz != null) && !Double.isNaN(xyz[0]) && !Double.isNaN(xyz[1]) && !Double.isNaN(xyz[2])) {
								if (minmax_xyz[0] == null) {
									for (int j = 0; j < xyz.length; j++) {
										minmax_xyz[j] = new double[] {xyz[j],xyz[j]};
									}
								}
								for (int j = 0; j < minmax_xyz.length; j++) {
									minmax_xyz[j][0] = Math.min(minmax_xyz[j][0],  xyz[j]);
									minmax_xyz[j][1] = Math.max(minmax_xyz[j][1],  xyz[j]);
								}
							}
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		}
		for (double [][] mm_xyz: minmaxes_xyz) if (mm_xyz[0] != null){
			if (bounds[0] == null) {
				for (int j = 0; j < bounds.length; j++) {
					bounds[j] = new double [] {mm_xyz[j][0],mm_xyz[j][1]};
				}
			}
			for (int j = 0; j < bounds.length; j++) {
				bounds[j][0] = Math.min(bounds[j][0], mm_xyz[j][0]);
				bounds[j][1] = Math.max(bounds[j][1], mm_xyz[j][1]);
			}			
		}
		return bounds; // y is up
	}
	
	
	public  double [][] render3dPlaneParallelProj(
			final ArrayList<TriMesh> tri_meshes,
			final boolean            last_is_alpha,
//			final QuadCLT            ref_scene,    // all coordinates relative to this scene - not used
			int                      debugLevel){  // debug level
		// TODO: add crop - add to the caller
		if ((tri_meshes == null) || tri_meshes.isEmpty() || (tri_meshes.get(0).getTexturePixels() == null)) {
			return null;
		}
		final boolean export_z = true;
		final int dbg_ipix=1673752;
		// get total number of triangles
		int num_tri=0;
		for (TriMesh tri: tri_meshes) {
			num_tri += tri.getTriangles().length;
		}
		int indx=0;
		int num_mesh = 0;
		final int [][] tri_index = new int[num_tri][2];
		for (TriMesh tri: tri_meshes) {
			int num_tri_mesh = tri.getTriangles().length;
			for (int i = 0; i < num_tri_mesh; i++)
			{
				tri_index[indx][0] =   num_mesh;
				tri_index[indx++][1] = i;
			}
			num_mesh++;
		}
		if (debugLevel > -2) {
			System.out.println("Prepare to render "+num_tri+" triangles in "+num_mesh+" meshes");
		}
		final int z_index = tri_meshes.get(0).getTexturePixels().length;
		final double [][] full_rendered = new double[z_index+ (export_z? 1:0)][out_width * out_height];
		int alpha_index = last_is_alpha ? (z_index - 1) : z_index;
		for (int chn = 0; chn < alpha_index; chn++) {
			Arrays.fill(full_rendered[chn], Double.NaN);
		}
		// create z-buffer array per each thread, in the end - merge them
		final Thread[] threads = ImageDtt.newThreadArray(THREADS_MAX);
		final AtomicInteger ai = new AtomicInteger(0);
		final AtomicInteger ati = new AtomicInteger(0);
		final double [][][] rendered = new double [threads.length][full_rendered[0].length][];
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					int ti = ati.getAndIncrement();
					double [][] rend = rendered[ti]; // this thread rendered results
					for (int indx = ai.getAndIncrement(); indx < tri_index.length; indx = ai.getAndIncrement()) {
						TriMesh tri=tri_meshes.get(tri_index[indx][0]); // mesh to process;
						int tri_indx = tri_index[indx][1]; // triangle index in a mesh
						double [][] texture =       tri.getTexturePixels();
						int texture_width=          tri.getTextureWidth();
						int texture_height=         tri.getTextureHeight();
						double[][] mesh_coord =     tri.getCoordinates();
						double[][] mesh_tex_coord = tri.getTexCoord();
						int [] triangle = tri.getTriangles()[tri_indx];
						double [][] tri_out2=  new double[3][];
						double [][] tri_text2= new double[3][];
						double [][] min_max_xyz = new double[3][2];
						for (int i = 0; i < 3; i++) {
//							tri_out2[i] =  projectToPlanePixels(new Vector3D(mesh_coord[triangle[i]]));
							double [] gxyz=ErsCorrection.applyXYZATR(toground, mesh_coord[triangle[i]]);
							tri_out2[i] =  new double [] {
									pixel_per_m * (gxyz[0] - x0_y0[0]), 
									pixel_per_m * (gxyz[1] - x0_y0[1]),
									-pixel_per_m * gxyz[2] // make it positive?  
							};
							//[2] - distance from the camera in "pixels" - same linear scale as on the ground. lower values obscure higher.
							tri_text2[i] = mesh_tex_coord[triangle[i]];
							for (int j = 0; j < 3; j++) {
								if ((i==0) || (tri_out2[i][j] < min_max_xyz[j][0])) min_max_xyz[j][0] = tri_out2[i][j]; 
								if ((i==0) || (tri_out2[i][j] > min_max_xyz[j][1])) min_max_xyz[j][1] = tri_out2[i][j]; 
							}
						}
						// Check plane direction
						double [] d01 = new double [] {tri_out2[1][0]-tri_out2[0][0], tri_out2[1][1]-tri_out2[0][1]};
						double [] d02 = new double [] {tri_out2[2][0]-tri_out2[0][0], tri_out2[2][1]-tri_out2[0][1]};
						if (!cross2ccw(d02,d01)) {
							continue;
						}
						
						int ipx_min = (int) Math.floor(min_max_xyz[0][0]);
						int ipx_max = (int) Math.ceil (min_max_xyz[0][1]);
						int ipy_min = (int) Math.floor(min_max_xyz[1][0]);
						int ipy_max = (int) Math.ceil (min_max_xyz[1][1]);
						// apply bounds
						if (ipx_min < 0) ipx_min = 0;
						if (ipy_min < 0) ipy_min = 0;
						if (ipx_max >= out_width) ipx_max = out_width - 1;
						if (ipy_max >= out_height) ipy_max = out_height - 1;
						if ((ipx_min > ipx_max) || (ipy_min > ipy_max)) {
							continue; // triangle completely outside rendering are
						}
						// vector from 1 to 2
						double [] t01 = new double [] {tri_text2[1][0]-tri_text2[0][0], tri_text2[1][1]-tri_text2[0][1]};
						double [] t02 = new double [] {tri_text2[2][0]-tri_text2[0][0], tri_text2[2][1]-tri_text2[0][1]};
						double [][] orto2 = orthonorm2(d01, d02);
						double [] d12 = new double [] {tri_out2[2][0]-tri_out2[1][0], tri_out2[2][1]-tri_out2[1][1]};
						for (int ipy = ipy_min; ipy <= ipy_max; ipy++) {
							for (int ipx = ipx_min; ipx <= ipx_max; ipx++) {
								// check it is inside triangle
								double [] d0p = new double[] {ipx-tri_out2[0][0],ipy-tri_out2[0][1]};
								if (!cross2ccw(d0p,d01)) continue; 
								if (!cross2ccw(d02,d0p)) continue; 
								double [] d1p = new double[] {ipx-tri_out2[1][0],ipy-tri_out2[1][1]};
								if (!cross2ccw(d1p,d12)) continue;
								int ipix = ipx +(out_height - 1 -ipy) * out_width; // Y goes down
								if (ipix== dbg_ipix) {
									System.out.println("ipix="+ipix);
								}
								// See if the rendered pixel is closer than the closest of the corners
								if ((rend[ipix] != null ) && (rend[ipix][z_index] < min_max_xyz[2][0])) {
									continue; 
								}
								double kx = dot2(d0p, orto2[0]);
								double ky = dot2(d0p, orto2[1]);
								// interpolate z
								double z_interp = tri_out2[0][2] +
										kx * (tri_out2[1][2]-tri_out2[0][2]) +
										ky * (tri_out2[2][2]-tri_out2[0][2]);
								if ((rend[ipix] != null ) && (rend[ipix][z_index] < z_interp)) {
									continue;
								}
								// Get corresponding texture coordinates
								double text_x = tri_text2[0][0] + kx * t01[0] + ky*t02[0]; // texture relative coordinates (0,1)
								double text_y = tri_text2[0][1] + kx * t01[1] + ky*t02[1]; // y - up!
								double px = text_x * texture_width - 0.5; // (0.0,0.0) - center of top-left texture pixel
								double py = (1.0-text_y) * texture_height -0.5;
								int ipx0 = (int) Math.floor(px);
								int ipy0 = (int) Math.floor(py);
								double fx = px - ipx0;
								double fy = py - ipy0;
								int ipx1 = ipx0+1;
								int ipy1 = ipy0+1;
								if ((ipx1 < 0) || (ipy1 < 0) || (ipx0 >= texture_width) || (ipy0 >= texture_width)) {
									continue; // outside bounds
								}
								// limit if just on the edge
								if (ipx0 < 0) ipx0=ipx1;
								if (ipy0 < 0) ipy0=ipy1;
								if (ipx1 >= texture_width)  ipx1=ipx0;
								if (ipy1 >= texture_height) ipy1=ipy0;
								int indx00 = ipx0+texture_width*ipy0;
								int indx10 = ipx1+texture_width*ipy0;
								int indx01 = ipx0+texture_width*ipy1;
								int indx11 = ipx1+texture_width*ipy1;
								double [] pix_val = new double[z_index+1];
								pix_val[z_index] = z_interp;
								for (int chn = 0; chn < z_index; chn++) {
									pix_val[chn] =
											(1.0 - fy) * (1.0 - fx) * texture[chn][indx00] + 
											(1.0 - fy) * (      fx) * texture[chn][indx10] + 
											(      fy) * (1.0 - fx) * texture[chn][indx01] + 
											(      fy) * (      fx) * texture[chn][indx11]; 
								}
								// handle alpha
								if (last_is_alpha && (pix_val[z_index-1] < 0.5)) {
									continue; // low alpha -> transparent
								}
								rend[ipix] = pix_val;
							}
						}
						// min_max_xyz[2]
						//num_col_chn
						// projectToPlanePixels
						// getCoordinates()
						//getTexCoord()
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		// merge partial renders:
		ai.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int indx = ai.getAndIncrement(); indx < full_rendered[0].length; indx = ai.getAndIncrement()) {
						double z = Double.NaN;
						for (int sub_render = 0; sub_render < rendered.length; sub_render++) if (rendered[sub_render][indx] != null){
							if (!(rendered[sub_render][indx][z_index] <= z)) { // OK previous NaN
								z = rendered[sub_render][indx][z_index];
								for (int chn = 0; chn < full_rendered.length; chn++) { // z_index; chn++) {
									full_rendered[chn][indx] = rendered[sub_render][indx][chn];
								}
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return full_rendered;
	}
//https://commons.apache.org/proper/commons-math/javadocs/api-3.6.1/org/apache/commons/math3/analysis/interpolation/PiecewiseBicubicSplineInterpolatingFunction.html	
	public  double [][] render3dPlaneParallelProjBiCubic(
			final ArrayList<TriMesh> tri_meshes,
			final boolean            last_is_alpha,
			int                      debugLevel){  // debug level
		// TODO: add crop - add to the caller
		if ((tri_meshes == null) || tri_meshes.isEmpty() || (tri_meshes.get(0).getTexturePixels() == null)) {
			return null;
		}
		final boolean export_z = true;
		final int dbg_ipix=1673752;
		// get total number of triangles
		int num_tri=0;
		for (TriMesh tri: tri_meshes) {
			num_tri += tri.getTriangles().length;
		}
		int indx=0;
		int num_mesh = 0;
		final int z_index = tri_meshes.get(0).getTexturePixels().length;
		final int [][] tri_index = new int[num_tri][2];
		for (TriMesh tri: tri_meshes) {
			int num_tri_mesh = tri.getTriangles().length;
			for (int i = 0; i < num_tri_mesh; i++)
			{
				tri_index[indx][0] =   num_mesh;
				tri_index[indx++][1] = i;
			}
			num_mesh++;
		}
		final PiecewiseBicubicSplineInterpolatingFunction [][] pbsif =
				new PiecewiseBicubicSplineInterpolatingFunction[tri_meshes.size()][z_index];
		final double [][] x = new double [tri_meshes.size()][];
		final double [][] y = new double [tri_meshes.size()][];
		final double [][][][] f = new double [tri_meshes.size()][z_index][][];
		
		for (int nm =0; nm < pbsif.length; nm++) {
			TriMesh tri = tri_meshes.get(nm);
			int texture_width=  tri.getTextureWidth();
			int texture_height= tri.getTextureHeight();
			double [][] texture =       tri.getTexturePixels();
			x[nm] = new double[texture_width];
			y[nm] = new double[texture_height];
			for (int i = 0; i < x[nm].length; i++) {
				x[nm][i] = i;  // add 0.5? Linear does not
			}
			for (int i = 0; i < y[nm].length; i++) {
				y[nm][i] = i;  // add 0.5? Linear does not
			}
			for (int chn = 0; chn < z_index; chn++) {
				f[nm][chn] = new double [texture_width][texture_height];
				for (int row = 0; row < texture_height; row++) {
					for (int col = 0; col < texture_width; col++) {
						f[nm][chn][col][row] = texture[chn][col + row * texture_width];	
					}
					/*
					System.arraycopy(
							texture[chn],
							row * texture_width,
							f[nm][chn][row],
							0,
							texture_width);
							*/
				}
				// clones x,y,f (but not f[]). f is in columns, not rows! f[width][height]
				pbsif[nm][chn] = new PiecewiseBicubicSplineInterpolatingFunction(x[nm],y[nm],f[nm][chn]);
			}
		}		
		
		
		if (debugLevel > -2) {
			System.out.println("Prepare to render "+num_tri+" triangles in "+num_mesh+" meshes");
		}
		final double [][] full_rendered = new double[z_index+ (export_z? 1:0)][out_width * out_height];
		int alpha_index = last_is_alpha ? (z_index - 1) : z_index;
		for (int chn = 0; chn < alpha_index; chn++) {
			Arrays.fill(full_rendered[chn], Double.NaN);
		}
		// create z-buffer array per each thread, in the end - merge them
		final Thread[] threads = ImageDtt.newThreadArray(THREADS_MAX);
		final AtomicInteger ai = new AtomicInteger(0);
		final AtomicInteger ati = new AtomicInteger(0);
		final double [][][] rendered = new double [threads.length][full_rendered[0].length][];
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					int ti = ati.getAndIncrement();
					double [][] rend = rendered[ti]; // this thread rendered results
					for (int indx = ai.getAndIncrement(); indx < tri_index.length; indx = ai.getAndIncrement()) {
						TriMesh tri=tri_meshes.get(tri_index[indx][0]); // mesh to process;
						int tri_indx = tri_index[indx][1]; // triangle index in a mesh
						double [][] texture =       tri.getTexturePixels();
						int texture_width=          tri.getTextureWidth();
						int texture_height=         tri.getTextureHeight();
						double[][] mesh_coord =     tri.getCoordinates();
						double[][] mesh_tex_coord = tri.getTexCoord();
						int [] triangle = tri.getTriangles()[tri_indx];
						double [][] tri_out2=  new double[3][];
						double [][] tri_text2= new double[3][];
						double [][] min_max_xyz = new double[3][2];
						for (int i = 0; i < 3; i++) {
							double [] gxyz=ErsCorrection.applyXYZATR(toground, mesh_coord[triangle[i]]);
							tri_out2[i] =  new double [] {
									pixel_per_m * (gxyz[0] - x0_y0[0]), 
									pixel_per_m * (gxyz[1] - x0_y0[1]),
									-pixel_per_m * gxyz[2] // make it positive?  
							};
							//[2] - distance from the camera in "pixels" - same linear scale as on the ground. lower values obscure higher.
							tri_text2[i] = mesh_tex_coord[triangle[i]];
							for (int j = 0; j < 3; j++) {
								if ((i==0) || (tri_out2[i][j] < min_max_xyz[j][0])) min_max_xyz[j][0] = tri_out2[i][j]; 
								if ((i==0) || (tri_out2[i][j] > min_max_xyz[j][1])) min_max_xyz[j][1] = tri_out2[i][j]; 
							}
						}
						// Check plane direction
						double [] d01 = new double [] {tri_out2[1][0]-tri_out2[0][0], tri_out2[1][1]-tri_out2[0][1]};
						double [] d02 = new double [] {tri_out2[2][0]-tri_out2[0][0], tri_out2[2][1]-tri_out2[0][1]};
						if (!cross2ccw(d02,d01)) {
							continue;
						}
//						PiecewiseBicubicSplineInterpolatingFunction pbs = pbsf[tri_index[indx][0]]
						int ipx_min = (int) Math.floor(min_max_xyz[0][0]);
						int ipx_max = (int) Math.ceil (min_max_xyz[0][1]);
						int ipy_min = (int) Math.floor(min_max_xyz[1][0]);
						int ipy_max = (int) Math.ceil (min_max_xyz[1][1]);
						// apply bounds
						if (ipx_min < 0) ipx_min = 0;
						if (ipy_min < 0) ipy_min = 0;
						if (ipx_max >= out_width) ipx_max = out_width - 1;
						if (ipy_max >= out_height) ipy_max = out_height - 1;
						if ((ipx_min > ipx_max) || (ipy_min > ipy_max)) {
							continue; // triangle completely outside rendering are
						}
						
						//pbsif[nm][chn]
						// vector from 1 to 2
						double [] t01 = new double [] {tri_text2[1][0]-tri_text2[0][0], tri_text2[1][1]-tri_text2[0][1]};
						double [] t02 = new double [] {tri_text2[2][0]-tri_text2[0][0], tri_text2[2][1]-tri_text2[0][1]};
						double [][] orto2 = orthonorm2(d01, d02);
						double [] d12 = new double [] {tri_out2[2][0]-tri_out2[1][0], tri_out2[2][1]-tri_out2[1][1]};
						for (int ipy = ipy_min; ipy <= ipy_max; ipy++) {
							for (int ipx = ipx_min; ipx <= ipx_max; ipx++) {
								// check it is inside triangle
								double [] d0p = new double[] {ipx-tri_out2[0][0],ipy-tri_out2[0][1]};
								if (!cross2ccw(d0p,d01)) continue; 
								if (!cross2ccw(d02,d0p)) continue; 
								double [] d1p = new double[] {ipx-tri_out2[1][0],ipy-tri_out2[1][1]};
								if (!cross2ccw(d1p,d12)) continue;
								int ipix = ipx +(out_height - 1 -ipy) * out_width; // Y goes down
								if (ipix== dbg_ipix) {
									System.out.println("ipix="+ipix);
								}
								// See if the rendered pixel is closer than the closest of the corners
								if ((rend[ipix] != null ) && (rend[ipix][z_index] < min_max_xyz[2][0])) {
									continue; 
								}
								double kx = dot2(d0p, orto2[0]);
								double ky = dot2(d0p, orto2[1]);
								// interpolate z
								double z_interp = tri_out2[0][2] +
										kx * (tri_out2[1][2]-tri_out2[0][2]) +
										ky * (tri_out2[2][2]-tri_out2[0][2]);
								if ((rend[ipix] != null ) && (rend[ipix][z_index] < z_interp)) {
									continue;
								}
								// Get corresponding texture coordinates
								double text_x = tri_text2[0][0] + kx * t01[0] + ky*t02[0]; // texture relative coordinates (0,1)
								double text_y = tri_text2[0][1] + kx * t01[1] + ky*t02[1]; // y - up!
								double px = text_x * texture_width - 0.5; // (0.0,0.0) - center of top-left texture pixel
								double py = (1.0-text_y) * texture_height -0.5;
								int ipx0 = (int) Math.floor(px);
								int ipy0 = (int) Math.floor(py);
								double fx = px - ipx0;
								double fy = py - ipy0;
								int ipx1 = ipx0+1;
								int ipy1 = ipy0+1;
								if ((ipx1 < 0) || (ipy1 < 0) || (ipx0 >= texture_width) || (ipy0 >= texture_width)) {
									continue; // outside bounds
								}
								// limit if just on the edge
								if (ipx0 < 0) ipx0=ipx1;
								if (ipy0 < 0) ipy0=ipy1;
								if (ipx1 >= texture_width)  ipx1=ipx0;
								if (ipy1 >= texture_height) ipy1=ipy0;
								int indx00 = ipx0+texture_width*ipy0;
								int indx10 = ipx1+texture_width*ipy0;
								int indx01 = ipx0+texture_width*ipy1;
								int indx11 = ipx1+texture_width*ipy1;
								double [] pix_val = new double[z_index+1];
								pix_val[z_index] = z_interp;
								for (int chn = 0; chn < z_index; chn++) { // just for testing - use bilinear first
									pix_val[chn] =
											(1.0 - fy) * (1.0 - fx) * texture[chn][indx00] + 
											(1.0 - fy) * (      fx) * texture[chn][indx10] + 
											(      fy) * (1.0 - fx) * texture[chn][indx01] + 
											(      fy) * (      fx) * texture[chn][indx11];
									if (pbsif[tri_index[indx][0]][chn].isValidPoint(px, py)) { // then overwrite with bicubic
										pix_val[chn] = pbsif[tri_index[indx][0]][chn].value(px,py);   
									}
								}
								// handle alpha
								if (last_is_alpha && (pix_val[z_index-1] < 0.5)) {
									continue; // low alpha -> transparent
								}
								rend[ipix] = pix_val;
							}
						}
						// min_max_xyz[2]
						//num_col_chn
						// projectToPlanePixels
						// getCoordinates()
						//getTexCoord()
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		// merge partial renders:
		ai.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int indx = ai.getAndIncrement(); indx < full_rendered[0].length; indx = ai.getAndIncrement()) {
						double z = Double.NaN;
						for (int sub_render = 0; sub_render < rendered.length; sub_render++) if (rendered[sub_render][indx] != null){
							if (!(rendered[sub_render][indx][z_index] <= z)) { // OK previous NaN
								z = rendered[sub_render][indx][z_index];
								for (int chn = 0; chn < full_rendered.length; chn++) { // z_index; chn++) {
									full_rendered[chn][indx] = rendered[sub_render][indx][chn];
								}
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return full_rendered;
	}
	
	
	public  double [][] render3dPlaneCenterProj(
			final ArrayList<TriMesh> tri_meshes,
			final boolean            last_is_alpha,
//			final QuadCLT            ref_scene,    // all coordinates relative to this scene - not used
			int                      debugLevel){  // debug level
		// TODO: add crop - add to the caller
		if ((tri_meshes == null) || tri_meshes.isEmpty() || (tri_meshes.get(0).getTexturePixels() == null)) {
			return null;
		}
		final int dbg_ipix=1673752;
		// get total number of triangles
		int num_tri=0;
		for (TriMesh tri: tri_meshes) {
			num_tri += tri.getTriangles().length;
		}
		int indx=0;
		int num_mesh = 0;
		final int [][] tri_index = new int[num_tri][2];
		for (TriMesh tri: tri_meshes) {
			int num_tri_mesh = tri.getTriangles().length;
			for (int i = 0; i < num_tri_mesh; i++)
			{
				tri_index[indx][0] =   num_mesh;
				tri_index[indx++][1] = i;
			}
			num_mesh++;
		}
		if (debugLevel > -2) {
			System.out.println("Prepare to render "+num_tri+" triangles in "+num_mesh+" meshes");
		}
		final int z_index = tri_meshes.get(0).getTexturePixels().length;
		final double [][] full_rendered = new double[z_index][out_width * out_height];
		int alpha_index = last_is_alpha ? (z_index - 1) : z_index;
		for (int chn = 0; chn < alpha_index; chn++) {
			Arrays.fill(full_rendered[chn], Double.NaN);
		}
		// create z-buffer array per each thread, in the end - merge them
		final Thread[] threads = ImageDtt.newThreadArray(THREADS_MAX);
		final AtomicInteger ai = new AtomicInteger(0);
		final AtomicInteger ati = new AtomicInteger(0);
		final double [][][] rendered = new double [threads.length][full_rendered[0].length][];
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					int ti = ati.getAndIncrement();
					double [][] rend = rendered[ti]; // this thread rendered results
					for (int indx = ai.getAndIncrement(); indx < tri_index.length; indx = ai.getAndIncrement()) {
						TriMesh tri=tri_meshes.get(tri_index[indx][0]); // mesh to process;
						int tri_indx = tri_index[indx][1]; // triangle index in a mesh
						double [][] texture =       tri.getTexturePixels();
						int texture_width=          tri.getTextureWidth();
						int texture_height=         tri.getTextureHeight();
						double[][] mesh_coord =     tri.getCoordinates();
						double[][] mesh_tex_coord = tri.getTexCoord();
						int [] triangle = tri.getTriangles()[tri_indx];
						double [][] tri_out2=  new double[3][];
						double [][] tri_text2= new double[3][];
						double [][] min_max_xyz = new double[3][2];
						for (int i = 0; i < 3; i++) {
							tri_out2[i] =  projectToPlanePixels(new Vector3D(mesh_coord[triangle[i]]));
							//[2] - distance from the camera in "pixels" - same linear scale as on the ground. lower values obscure higher.
							tri_text2[i] = mesh_tex_coord[triangle[i]];
							for (int j = 0; j < 3; j++) {
								if ((i==0) || (tri_out2[i][j] < min_max_xyz[j][0])) min_max_xyz[j][0] = tri_out2[i][j]; 
								if ((i==0) || (tri_out2[i][j] > min_max_xyz[j][1])) min_max_xyz[j][1] = tri_out2[i][j]; 
							}
						}
						// Check plane direction
						double [] d01 = new double [] {tri_out2[1][0]-tri_out2[0][0], tri_out2[1][1]-tri_out2[0][1]};
						double [] d02 = new double [] {tri_out2[2][0]-tri_out2[0][0], tri_out2[2][1]-tri_out2[0][1]};
						if (!cross2ccw(d02,d01)) {
							continue;
						}
						
						int ipx_min = (int) Math.floor(min_max_xyz[0][0]);
						int ipx_max = (int) Math.ceil (min_max_xyz[0][1]);
						int ipy_min = (int) Math.floor(min_max_xyz[1][0]);
						int ipy_max = (int) Math.ceil (min_max_xyz[1][1]);
						// apply bounds
						if (ipx_min < 0) ipx_min = 0;
						if (ipy_min < 0) ipy_min = 0;
						if (ipx_max >= out_width) ipx_max = out_width - 1;
						if (ipy_max >= out_height) ipy_max = out_height - 1;
						if ((ipx_min > ipx_max) || (ipy_min > ipy_max)) {
							continue; // triangle completely outside rendering are
						}
						// vector from 1 to 2
						double [] t01 = new double [] {tri_text2[1][0]-tri_text2[0][0], tri_text2[1][1]-tri_text2[0][1]};
						double [] t02 = new double [] {tri_text2[2][0]-tri_text2[0][0], tri_text2[2][1]-tri_text2[0][1]};
						double [][] orto2 = orthonorm2(d01, d02);
						double [] d12 = new double [] {tri_out2[2][0]-tri_out2[1][0], tri_out2[2][1]-tri_out2[1][1]};
						for (int ipy = ipy_min; ipy <= ipy_max; ipy++) {
							for (int ipx = ipx_min; ipx <= ipx_max; ipx++) {
								// check it is inside triangle
								double [] d0p = new double[] {ipx-tri_out2[0][0],ipy-tri_out2[0][1]};
								if (!cross2ccw(d0p,d01)) continue; 
								if (!cross2ccw(d02,d0p)) continue; 
								double [] d1p = new double[] {ipx-tri_out2[1][0],ipy-tri_out2[1][1]};
								if (!cross2ccw(d1p,d12)) continue;
								int ipix = ipx +(out_height - 1 -ipy) * out_width; // Y goes down
								if (ipix== dbg_ipix) {
									System.out.println("ipix="+ipix);
								}
								// See if the rendered pixel is closer than the closest of the corners
								if ((rend[ipix] != null ) && (rend[ipix][z_index] < min_max_xyz[2][0])) {
									continue; 
								}
								double kx = dot2(d0p, orto2[0]);
								double ky = dot2(d0p, orto2[1]);
								// interpolate z
								double z_interp = tri_out2[0][2] +
										kx * (tri_out2[1][2]-tri_out2[0][2]) +
										ky * (tri_out2[2][2]-tri_out2[0][2]);
								if ((rend[ipix] != null ) && (rend[ipix][z_index] < z_interp)) {
									continue;
								}
								// Get corresponding texture coordinates
								double text_x = tri_text2[0][0] + kx * t01[0] + ky*t02[0]; // texture relative coordinates (0,1)
								double text_y = tri_text2[0][1] + kx * t01[1] + ky*t02[1]; // y - up!
								double px = text_x * texture_width - 0.5; // (0.0,0.0) - center of top-left texture pixel
								double py = (1.0-text_y) * texture_height -0.5;
								int ipx0 = (int) Math.floor(px);
								int ipy0 = (int) Math.floor(py);
								double fx = px - ipx0;
								double fy = py - ipy0;
								int ipx1 = ipx0+1;
								int ipy1 = ipy0+1;
								if ((ipx1 < 0) || (ipy1 < 0) || (ipx0 >= texture_width) || (ipy0 >= texture_width)) {
									continue; // outside bounds
								}
								// limit if just on the edge
								if (ipx0 < 0) ipx0=ipx1;
								if (ipy0 < 0) ipy0=ipy1;
								if (ipx1 >= texture_width)  ipx1=ipx0;
								if (ipy1 >= texture_height) ipy1=ipy0;
								int indx00 = ipx0+texture_width*ipy0;
								int indx10 = ipx1+texture_width*ipy0;
								int indx01 = ipx0+texture_width*ipy1;
								int indx11 = ipx1+texture_width*ipy1;
								double [] pix_val = new double[z_index+1];
								pix_val[z_index] = z_interp;
								for (int chn = 0; chn < z_index; chn++) {
									pix_val[chn] =
											(1.0 - fy) * (1.0 - fx) * texture[chn][indx00] + 
											(1.0 - fy) * (      fx) * texture[chn][indx10] + 
											(      fy) * (1.0 - fx) * texture[chn][indx01] + 
											(      fy) * (      fx) * texture[chn][indx11]; 
								}
								// handle alpha
								if (last_is_alpha && (pix_val[z_index-1] < 0.5)) {
									continue; // low alpha -> transparent
								}
								rend[ipix] = pix_val;
							}
						}
						// min_max_xyz[2]
						//num_col_chn
						// projectToPlanePixels
						// getCoordinates()
						//getTexCoord()
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		// merge partial renders:
		ai.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int indx = ai.getAndIncrement(); indx < full_rendered[0].length; indx = ai.getAndIncrement()) {
						double z = Double.NaN;
						for (int sub_render = 0; sub_render < rendered.length; sub_render++) if (rendered[sub_render][indx] != null){
							if (!(rendered[sub_render][indx][z_index] <= z)) { // OK previous NaN
								z = rendered[sub_render][indx][z_index];
								for (int chn = 0; chn < z_index; chn++) {
									full_rendered[chn][indx] = rendered[sub_render][indx][chn];
								}
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return full_rendered;
	}
}
