commited old modified files

d786da7b · Andrey Filippov · 8ba9b224 · d786da7b · d786da7b · d786da7b
Commit d786da7b authored Dec 01, 2021 by Andrey Filippov
Showing with 1574 additions and 72 deletions

explore_data5m.py explore_data5m.py +1253 -0

imagej_tiffwriter.py imagej_tiffwriter.py +53 -53

nn_eval_01.py nn_eval_01.py +189 -9

qcstereo_functions.py qcstereo_functions.py +79 -10

No files found.
--- a/explore_data5m.py
+++ b/explore_data5m.py
+#!/usr/bin/env python3
+#from numpy import float64
+
+__copyright__ = "Copyright 2018, Elphel, Inc."
+__license__   = "GPL-3.0+"
+__email__     = "andrey@elphel.com"
+
+import os
+import sys
+import glob
+import imagej_tiff as ijt
+import numpy as np
+import resource
+#import timeit
+import matplotlib.pyplot as plt
+from scipy.ndimage.filters import gaussian_filter
+import time
+import tensorflow as tf
+
+#http://stackoverflow.com/questions/287871/print-in-terminal-with-colors-using-python
+class bcolors:
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[38;5;214m'
+    FAIL = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    BOLDWHITE = '\033[1;37m'
+    UNDERLINE = '\033[4m'
+TIME_START = time.time()
+TIME_LAST  = TIME_START
+    
+def print_time(txt="",end="\n"):
+    global TIME_LAST
+    t = time.time()
+    if txt:
+        txt +=" "
+    print(("%s"+bcolors.BOLDWHITE+"at %.4fs (+%.4fs)"+bcolors.ENDC)%(txt,t-TIME_START,t-TIME_LAST), end = end)
+    TIME_LAST = t
+
+def _dtype_feature(ndarray):
+    """match appropriate tf.train.Feature class with dtype of ndarray. """
+    assert isinstance(ndarray, np.ndarray)
+    dtype_ = ndarray.dtype
+    if dtype_ == np.float64 or dtype_ == np.float32:
+        return lambda array: tf.train.Feature(float_list=tf.train.FloatList(value=array))
+    elif dtype_ == np.int64:
+        return lambda array: tf.train.Feature(int64_list=tf.train.Int64List(value=array))
+    else:  
+        raise ValueError("The input should be numpy ndarray. \
+                           Instead got {}".format(ndarray.dtype))
+def readTFRewcordsEpoch(train_filename):
+#    filenames = [train_filename]
+#    dataset = tf.data.TFRecordDataset(filenames)
+    if not  '.tfrecords' in train_filename:
+        train_filename += '.tfrecords'
+    record_iterator = tf.python_io.tf_record_iterator(path=train_filename)
+    corr2d_list=[]
+    target_disparity_list=[]
+    gt_ds_list = []
+    for string_record in record_iterator:
+        example = tf.train.Example()
+        example.ParseFromString(string_record)
+        corr2d_list.append(np.array(example.features.feature['corr2d'] .float_list .value))
+        target_disparity_list.append(np.array(example.features.feature['target_disparity'] .float_list .value[0]))
+        gt_ds_list.append(np.array(example.features.feature['gt_ds'] .float_list .value))
+    corr2d=            np.array(corr2d_list)
+    target_disparity = np.array(target_disparity_list)
+    gt_ds =            np.array(gt_ds_list)
+    return corr2d, target_disparity, gt_ds   
+
+def writeTFRewcordsImageTiles(img_path, tfr_filename): # test_set=False):
+    num_tiles = 242*324 # fixme
+    all_image_tiles = np.array(range(num_tiles))
+    corr_layers =  ['hor-pairs', 'vert-pairs','diagm-pair', 'diago-pair']
+    img =          ijt.imagej_tiff(img_path, corr_layers, all_image_tiles)
+    """
+    Values read from correlation file, it now may differ from the COMBO-DSI:
+    1) The target disparities used for correlations are replaced if they are too far from the rig (GT) values and
+    replaced by interpolation from available neighbors. If there are no suitable neighbors, target disparity is
+    derived from the rig data by adding a random offset (specified in ImageJ plugin configuration ML section)
+    2) correlation is performed around the defined tiles extrapolating disparity. rig data may be 0 disparity,
+    0 strength if there is no rig data for those tiles. That means that such tiles can only be used as peripherals
+    i (now 5x5) clusters, not for the cluster centers where GT is needed. 
+    """
+    corr2d =           img.corr2d.reshape((num_tiles,-1))
+    target_disparity = img.target_disparity.reshape((num_tiles,-1))
+    gt_ds =            img.gt_ds.reshape((num_tiles,-1))
+      
+    """
+    Replace GT data with zero strength with nan, zero strength
+    nan2 = np.array((np.nan,0), dtype=np.float32)            
+    gt_ds[np.where(gt_ds[:,1]==0)] = nan2            
+    """
+
+    if not  '.tfrecords' in tfr_filename:
+        tfr_filename += '.tfrecords'
+
+    tfr_filename=tfr_filename.replace(' ','_')
+    try:
+        os.makedirs(os.path.dirname(tfr_filename))
+    except:
+        pass     
+          
+    writer = tf.python_io.TFRecordWriter(tfr_filename)
+    dtype_feature_corr2d =   _dtype_feature(corr2d)
+    dtype_target_disparity = _dtype_feature(target_disparity)
+    dtype_feature_gt_ds =    _dtype_feature(gt_ds)
+    for i in range(num_tiles):
+        x = corr2d[i].astype(np.float32)
+        y = target_disparity[i].astype(np.float32)
+        z = gt_ds[i].astype(np.float32)
+        d_feature = {'corr2d':          dtype_feature_corr2d(x),
+                     'target_disparity':dtype_target_disparity(y),
+                     'gt_ds':           dtype_feature_gt_ds(z)}
+        example = tf.train.Example(features=tf.train.Features(feature=d_feature))
+        writer.write(example.SerializeToString())
+        pass
+    writer.close()
+    sys.stdout.flush()        
+
+
+
+class ExploreData:
+    """
+    TODO: add to constructor parameters
+    """
+    
+    PATTERN = "*-DSI_COMBO.tiff"
+#    ML_DIR = "ml"
+#    ML_PATTERN = "*-ML_DATA*OFFS*.tiff"
+#    ML_PATTERN = "*-ML_DATA*MAIN*.tiff"
+#    ML_PATTERN = "*-ML_DATA*MAIN.tiff"
+#    ML_PATTERN = "*-ML_DATA*MAIN_RND*.tiff"
+##   ML_PATTERN = "*-ML_DATA*RIG_RND*.tiff"
+#    ML_PATTERN = "*-ML_DATA*OFFS-0.20000_0.20000.tiff"
+    """
+1527182801_296892-ML_DATARND-32B-O-FZ0.05-OFFS-0.20000_0.20000.tiff
+1527182805_696892-ML_DATA-32B-O-FZ0.05-RIG_RND2.00000.tiff
+    """
+    def getComboList(self, top_dir, latest_version_only):
+        if not top_dir:
+            return []
+#        patt = "*-DSI_COMBO.tiff"
+        tlist = []
+        for i in range(5):
+            pp = top_dir#) ,'**', patt) # works
+            for _ in range (i):
+                pp = os.path.join(pp,'*')
+            pp = os.path.join(pp, ExploreData.PATTERN)
+            tlist += glob.glob(pp)
+            if (self.debug_level > 0):    
+                print (pp+" "+str(len(tlist)))
+        if (self.debug_level > 0):    
+            print("Found "+str(len(tlist))+" combo DSI files in "+top_dir+" :")
+            if (self.debug_level > 1):    
+                print("\n".join(tlist))
+        if latest_version_only:
+            models = {}
+            for p in tlist:
+                model = os.path.dirname(os.path.dirname(p))
+                if (not model in models) or ( models[model]< p):
+                    models[model] = p
+            tlist = [v for v in models.values()]
+            if (self.debug_level > 0):    
+                print("After filtering the latest versions only, left "+str(len(tlist))+" combo DSI files in "+top_dir+" :")
+                if (self.debug_level > 1):    
+                    print("\n".join(tlist))
+        tlist.sort()
+        return tlist
+    
+    def loadComboFiles(self, tlist):
+        indx = 0
+        images = []
+        if (self.debug_level>2):
+            print(str(resource.getrusage(resource.RUSAGE_SELF)))
+        layers =  ['disparity_rig','strength_rig','disparity_main']
+        for combo_file in tlist:
+            tiff = ijt.imagej_tiff(combo_file,layers)
+            if not indx:
+                images = np.empty((len(tlist), tiff.image.shape[0],tiff.image.shape[1],tiff.image.shape[2]), tiff.image.dtype)
+            images[indx] = tiff.image
+            if (self.debug_level>2):
+                print(str(indx)+": "+str(resource.getrusage(resource.RUSAGE_SELF)))
+            indx += 1
+        return images
+    
+    def getHistogramDSI(
+            self, 
+            list_rds,
+            disparity_bins =    1000,
+            strength_bins =      100,
+            disparity_min_drop =  -0.1,
+            disparity_min_clip =  -0.1,
+            disparity_max_drop = 100.0,
+            disparity_max_clip = 100.0,
+            strength_min_drop =    0.1,
+            strength_min_clip =    0.1,
+            strength_max_drop =    1.0,
+            strength_max_clip =    0.9,
+            max_main_offset =      0.0,
+            normalize =           True,
+#            no_histogram =        False            
+            ):
+        good_tiles_list=[]
+        for combo_rds in list_rds:
+            good_tiles = np.empty((combo_rds.shape[0], combo_rds.shape[1],combo_rds.shape[2]), dtype=bool)
+            for ids in range (combo_rds.shape[0]): #iterate over all scenes ds[2][rows][cols]
+                ds = combo_rds[ids]
+                disparity = ds[...,0]
+                strength =  ds[...,1]
+                good_tiles[ids] =  disparity >= disparity_min_drop
+                good_tiles[ids] &= disparity <= disparity_max_drop
+                good_tiles[ids] &= strength >=  strength_min_drop
+                good_tiles[ids] &= strength <=  strength_max_drop
+                if max_main_offset > 0.0:
+                    disparity_main = ds[...,2]
+                    good_tiles[ids] &= disparity_main <= (disparity + max_main_offset)
+                    good_tiles[ids] &= disparity_main >= (disparity - max_main_offset)
+                disparity = np.nan_to_num(disparity, copy = False) # to be able to multiply by 0.0 in mask | copy=False, then out=disparity all done in-place
+                strength =  np.nan_to_num(strength, copy = False)  # likely should never happen
+                np.clip(disparity, disparity_min_clip, disparity_max_clip, out = disparity)
+                np.clip(strength, strength_min_clip, strength_max_clip, out = strength)
+            good_tiles_list.append(good_tiles)
+        combo_rds = np.concatenate(list_rds)
+        hist, xedges, yedges = np.histogram2d( # xedges, yedges - just for debugging
+            x =      combo_rds[...,1].flatten(),
+            y =      combo_rds[...,0].flatten(),
+            bins=    (strength_bins, disparity_bins),
+            range=   ((strength_min_clip,strength_max_clip),(disparity_min_clip,disparity_max_clip)),
+            normed=  normalize,
+            weights= np.concatenate(good_tiles_list).flatten())
+        for i, combo_rds in enumerate(list_rds):
+            for ids in range (combo_rds.shape[0]): #iterate over all scenes ds[2][rows][cols]
+                combo_rds[ids][...,1]*= good_tiles_list[i][ids]
+        return hist, xedges, yedges
+    
+    
+    def __init__(self,
+               topdir_train,
+               topdir_test,
+               ml_pattern,
+               latest_version_only,
+               max_main_offset =      2.0, # > 0.0 - do not use main camera tiles with offset more than this  
+               debug_level =          0,
+               disparity_bins =    1000,
+               strength_bins =      100,
+               disparity_min_drop =  -0.1,
+               disparity_min_clip =  -0.1,
+               disparity_max_drop = 100.0,
+               disparity_max_clip = 100.0,
+               strength_min_drop =    0.1,
+               strength_min_clip =    0.1,
+               strength_max_drop =    1.0,
+               strength_max_clip =    0.9,
+               
+               hist_sigma =           2.0,  # Blur log histogram
+               hist_cutoff=           0.001 #  of maximal  
+               ):
+    # file name
+        self.debug_level =        debug_level
+        self.ml_pattern =         ml_pattern
+        #self.testImageTiles()    
+        self.max_main_offset =    max_main_offset
+        self.disparity_bins =     disparity_bins
+        self.strength_bins =      strength_bins
+        self.disparity_min_drop = disparity_min_drop
+        self.disparity_min_clip = disparity_min_clip
+        self.disparity_max_drop = disparity_max_drop
+        self.disparity_max_clip = disparity_max_clip
+        self.strength_min_drop =  strength_min_drop
+        self.strength_min_clip =  strength_min_clip
+        self.strength_max_drop =  strength_max_drop
+        self.strength_max_clip =  strength_max_clip
+        self.hist_sigma =         hist_sigma # Blur log histogram
+        self.hist_cutoff=         hist_cutoff #  of maximal  
+        self.pre_log_offs =       0.001 # of histogram maximum
+        self.good_tiles =         None
+        self.files_train =        self.getComboList(topdir_train, latest_version_only)
+        self.files_test =         self.getComboList(topdir_test, latest_version_only)
+        
+        self.train_ds =           self.loadComboFiles(self.files_train)
+        self.test_ds =            self.loadComboFiles(self.files_test)
+        
+        self.num_tiles = self.train_ds.shape[1]*self.train_ds.shape[2] 
+        """
+Traceback (most recent call last):
+  File "explore_data5.py", line 1036, in <module>
+    hist_cutoff=           0.001) #  of maximal  
+  File "explore_data5.py", line 286, in __init__
+    self.num_tiles = self.train_ds.shape[1]*self.train_ds.shape[2] 
+AttributeError: 'list' object has no attribute 'shape'
+        
+        """
+        
+        
+##        self.hist, xedges, yedges = self.getHistogramDSI(
+        self.hist, _, _ = self.getHistogramDSI(
+                list_rds =           [self.train_ds,self.test_ds], # combo_rds,
+                disparity_bins =     self.disparity_bins,
+                strength_bins =      self.strength_bins,
+                disparity_min_drop = self.disparity_min_drop,
+                disparity_min_clip = self.disparity_min_clip,
+                disparity_max_drop = self.disparity_max_drop,
+                disparity_max_clip = self.disparity_max_clip,
+                strength_min_drop =  self.strength_min_drop,
+                strength_min_clip =  self.strength_min_clip,
+                strength_max_drop =  self.strength_max_drop,
+                strength_max_clip =  self.strength_max_clip,
+                max_main_offset =    self.max_main_offset,
+                normalize =          True
+#                no_histogram =       False
+           )
+        log_offset = self.pre_log_offs * self.hist.max()
+        h_cutoff =   hist_cutoff * self.hist.max()
+        lhist =         np.log(self.hist + log_offset)
+        blurred_lhist = gaussian_filter(lhist, sigma = self.hist_sigma)
+        self.blurred_hist  = np.exp(blurred_lhist) - log_offset
+        self.good_tiles =  self.blurred_hist >= h_cutoff
+        self.blurred_hist *= self.good_tiles # set bad ones to zero 
+
+    def exploreNeibs(self,
+                     data_ds, # disparity/strength data for all files (train or test)
+                     radius,  # how far to look from center each side ( 1- 3x3, 2 - 5x5)
+                     disp_thesh = 5.0): # reduce effective variance for higher disparities
+        """
+        For each tile calculate difference between max and min among neighbors and number of qualifying neighbors (bad center is not removed)
+        data_ds may maismatch with the correlation files - correlation filas have data in extrapolated areas and replaced for large difference with GT
+        
+        """
+        disp_min =   np.empty_like(data_ds[...,0], dtype = np.float)
+        disp_max =   np.empty_like(disp_min, dtype = np.float)
+        tile_neibs = np.zeros_like(disp_min, dtype = np.int)
+        dmin = data_ds[...,0].min()
+        dmax = data_ds[...,0].max()
+        good_tiles = self.getBB(data_ds) >= 0
+        side = 2 * radius + 1
+        for nf, ds in enumerate(data_ds):
+            disp = ds[...,0] 
+            height = disp.shape[0]
+            width = disp.shape[1]
+            bad_max = np.ones((height+side, width+side),  dtype=float) * dmax
+            bad_min = np.ones((height+side, width+side),  dtype=float) * dmin
+            good =    np.zeros((height+side, width+side), dtype=int)
+            #Assign centers of the array, replace bad tiles with max/min (so they will not change min/max) 
+            bad_max[radius:height+radius,radius:width+radius] = np.select([good_tiles[nf]],[disp],default = dmax)
+            bad_min[radius:height+radius,radius:width+radius] = np.select([good_tiles[nf]],[disp],default = dmin)
+            good   [radius:height+radius,radius:width+radius] = good_tiles[nf]
+            disp_min  [nf,...] = disp 
+            disp_max  [nf,...] = disp
+            tile_neibs[nf,...] = good_tiles[nf]
+            for offset_y in range(-radius, radius+1):
+                oy = offset_y+radius
+                for offset_x in range(-radius, radius+1):
+                    ox = offset_x+radius
+                    if offset_y or offset_x: # Skip center - already copied
+                        np.minimum(disp_min[nf], bad_max[oy:oy+height, ox:ox+width], out=disp_min[nf])
+                        np.maximum(disp_max[nf], bad_min[oy:oy+height, ox:ox+width], out=disp_max[nf])
+                        tile_neibs[nf] +=  good[oy:oy+height, ox:ox+width]
+                        pass
+                    pass
+                pass
+            pass
+        
+        #disp_thesh
+        disp_avar = disp_max - disp_min
+        disp_rvar = disp_avar * disp_thesh / np.maximum(disp_max, 0.001) # removing division by 0 error - those tiles will be anyway discarded 
+        disp_var = np.select([disp_max >= disp_thesh, disp_max < disp_thesh],[disp_rvar,disp_avar])
+        return disp_var, tile_neibs
+
+    def assignBatchBins(self,
+                        disp_bins,
+                        str_bins,
+                        files_per_scene = 5,   # not used here, will be used when generating batches
+                        min_batch_choices=10,  # not used here, will be used when generating batches
+                        max_batch_files = 10): # not used here, will be used when generating batches
+        """
+        for each disparity/strength combination (self.disparity_bins * self.strength_bins = 1000*100) provide number of "large"
+        variable-size disparity/strength bin, or -1 if this disparity/strength combination does not seem right
+        """
+        self.files_per_scene = files_per_scene
+        self.min_batch_choices=min_batch_choices
+        self.max_batch_files = max_batch_files
+        
+        hist_to_batch =       np.zeros((self.blurred_hist.shape[0],self.blurred_hist.shape[1]),dtype=int) #zeros_like?
+##        hist_to_batch_multi = np.ones((self.blurred_hist.shape[0],self.blurred_hist.shape[1]),dtype=int) #zeros_like?
+        scale_hist= (disp_bins * str_bins)/self.blurred_hist.sum()
+        norm_b_hist =     self.blurred_hist * scale_hist
+##        disp_list = [] # last disparity hist 
+#        disp_multi = [] # number of disp rows to fit
+        disp_run_tot = 0.0
+        disp_batch = 0
+        disp=0
+        num_batch_bins = disp_bins * str_bins
+        disp_hist = np.linspace(0, num_batch_bins, disp_bins+1)
+        batch_index = 0
+        num_members = np.zeros((num_batch_bins,),int)
+        while disp_batch < disp_bins:
+            #disp_multi.append(1)
+#        while (disp < self.disparity_bins):
+#            disp_target_tot =disp_hist[disp_batch+1]
+            disp_run_tot_new = disp_run_tot
+            disp0 = disp # start disaprity matching disp_run_tot 
+            while (disp_run_tot_new < disp_hist[disp_batch+1]) and (disp < self.disparity_bins):
+                disp_run_tot_new += norm_b_hist[:,disp].sum()
+                disp+=1;
+                disp_multi = 1
+                while   (disp_batch < (disp_bins - 1)) and (disp_run_tot_new >= disp_hist[disp_batch+2]):
+                    disp_batch += 1 # only if large disp_bins and very high hist value
+                    disp_multi += 1
+            # now  disp_run_tot - before this batch disparity col
+            str_bins_corr = str_bins * disp_multi # if too narrow disparity column - multiply number of strength columns
+            str_bins_corr_last = str_bins_corr -1
+            str_hist = np.linspace(disp_run_tot, disp_run_tot_new, str_bins_corr + 1)
+            str_run_tot_new = disp_run_tot
+#            str_batch = 0
+            str_index=0
+#            wide_col = norm_b_hist[:,disp0:disp] #disp0 - first column, disp - last+ 1
+            #iterate in linescan along the column
+            for si in range(self.strength_bins):
+                for di in range(disp0, disp,1):
+                    if norm_b_hist[si,di] > 0.0 :
+                        str_run_tot_new += norm_b_hist[si,di]
+                        # do not increment after last to avoid precision issues 
+                        if (batch_index < num_batch_bins) and (num_members[batch_index] > 0) and (str_index < str_bins_corr_last) and (str_run_tot_new > str_hist[str_index+1]):
+                            batch_index += 1
+                            str_index +=   1
+                        if batch_index < num_batch_bins :     
+                            hist_to_batch[si,di] = batch_index
+                            num_members[batch_index] += 1
+                        else:
+                            pass
+                    else:
+                        hist_to_batch[si,di] = -1
+                        
+            batch_index += 1 # it was not incremented afterthe last in the column to avoid rounding error 
+            disp_batch += 1
+            disp_run_tot = disp_run_tot_new
+            pass
+        self.hist_to_batch = hist_to_batch
+        return hist_to_batch        
+
+    def getBB(self, data_ds):
+        """
+        for each file, each tile get histogram index (or -1 for bad tiles)
+        """
+##        hist_to_batch = self.hist_to_batch
+##        files_batch_list = []
+        disp_step = ( self.disparity_max_clip - self.disparity_min_clip )/ self.disparity_bins 
+        str_step =  ( self.strength_max_clip -  self.strength_min_clip )/ self.strength_bins
+        bb = np.empty_like(data_ds[...,0],dtype=int)
+        for findx in range(data_ds.shape[0]):
+            ds = data_ds[findx]
+            gt = ds[...,1] > 0.0 # OK
+            db = (((ds[...,0] - self.disparity_min_clip)/disp_step).astype(int))*gt
+            sb = (((ds[...,1] - self.strength_min_clip)/ str_step).astype(int))*gt
+            np.clip(db, 0, self.disparity_bins-1, out = db)
+            np.clip(sb, 0, self.strength_bins-1, out = sb)
+            bb[findx] = (self.hist_to_batch[sb.reshape(self.num_tiles),db.reshape(self.num_tiles)])   .reshape(db.shape[0],db.shape[1]) + (gt -1)
+        return bb
+
+    def makeBatchLists(self,
+            data_ds =      None, # (disparity,strength) per scene, per tile
+            disp_var =     None, # difference between maximal and minimal disparity for each scene, each tile
+            
+            disp_neibs =   None, # number of valid tiles around each center tile (for 3x3 (radius = 1) - maximal is 9  
+            min_var =      None, # Minimal tile variance to include
+            max_var =      None, # Maximal tile variance to include
+##            scale_disp =   5.0,
+            min_neibs =    None):# Minimal number of valid tiles to include
+        if data_ds is None:
+            data_ds =      self.train_ds
+##        hist_to_batch = self.hist_to_batch
+        num_batch_tiles = np.empty((data_ds.shape[0],self.hist_to_batch.max()+1),dtype = int) 
+        bb = self.getBB(data_ds)
+        use_neibs = not ((disp_var is None) or (disp_neibs is None) or (min_var is None) or (max_var is None) or (min_neibs is None))
+        list_of_file_lists=[]
+        for findx in range(data_ds.shape[0]):
+            foffs = findx * self.num_tiles 
+            lst = []
+            for i in range (self.hist_to_batch.max()+1):
+                lst.append([])
+#            bb1d = bb[findx].reshape(self.num_tiles)
+            if use_neibs:    
+                disp_var_tiles =   disp_var[findx].reshape(self.num_tiles)
+                disp_neibs_tiles = disp_neibs[findx].reshape(self.num_tiles)
+            for n, indx in enumerate(bb[findx].reshape(self.num_tiles)):
+                if indx >= 0:
+                    if use_neibs:
+#                        disp_var_tiles =   disp_var[findx].reshape(self.num_tiles)
+#                        disp_neibs_tiles = disp_neibs[findx].reshape(self.num_tiles)
+                        if disp_neibs_tiles[n] < min_neibs:
+                            continue # too few neighbors
+                        if not disp_var_tiles[n] >= min_var:
+                            continue #too small variance 
+                        if not disp_var_tiles[n] <  max_var:
+                            continue #too large variance 
+                    lst[indx].append(foffs + n)
+            lst_arr=[]
+            for i,l in enumerate(lst):
+#                lst_arr.append(np.array(l,dtype = int))
+                lst_arr.append(l)
+                num_batch_tiles[findx,i] = len(l)
+            list_of_file_lists.append(lst_arr)
+        self.list_of_file_lists= list_of_file_lists
+        self.num_batch_tiles =   num_batch_tiles
+        return list_of_file_lists, num_batch_tiles
+    #todo: only use other files if there are no enough choices in the main file!
+    
+    
+    def augmentBatchFileIndices(self,
+                                 seed_index,
+                                 min_choices=None,
+                                 max_files = None,
+                                 set_ds = None
+                                 ):
+        if min_choices is None:
+            min_choices = self.min_batch_choices 
+        if max_files is None:
+            max_files =  self.max_batch_files
+        if set_ds is None:
+            set_ds = self.train_ds
+        full_num_choices = self.num_batch_tiles[seed_index].copy()
+        flist = [seed_index]
+        all_choices = list(range(self.num_batch_tiles.shape[0]))
+        all_choices.remove(seed_index)
+        for _ in range (max_files-1):
+            if full_num_choices.min() >= min_choices:
+                break
+            findx = np.random.choice(all_choices)
+            flist.append(findx)
+            all_choices.remove(findx)
+            full_num_choices += self.num_batch_tiles[findx]
+
+        file_tiles_sparse = [[] for _ in set_ds] #list of empty lists for each train scene (will be sparse)
+         
+        for nt in range(self.num_batch_tiles.shape[1]): #number of tiles per batch (not counting ml file variant) // radius2 - 40
+            tl = []
+            nchoices = 0
+            for findx in flist:
+                if (len(self.list_of_file_lists[findx][nt])):
+                    tl.append(self.list_of_file_lists[findx][nt])
+                nchoices+= self.num_batch_tiles[findx][nt]
+                if nchoices >= min_choices: # use minimum of extra files
+                    break;
+            while len(tl)==0:
+                print("** BUG! could not find a single candidate from files ",flist," for cell ",nt)
+                print("trying to use some other cell")
+                nt1 = np.random.randint(0,self.num_batch_tiles.shape[1])
+                for findx in flist:
+                    if (len(self.list_of_file_lists[findx][nt1])):
+                        tl.append(self.list_of_file_lists[findx][nt1])
+                    nchoices+= self.num_batch_tiles[findx][nt1]
+                    if nchoices >= min_choices: # use minimum of extra files
+                        break;
+            tile = np.random.choice(np.concatenate(tl))
+            """
+Traceback (most recent call last):
+  File "explore_data2.py", line 1041, in <module>
+    ex_data.writeTFRewcordsEpoch(fpath, ml_list = ml_list_train, files_list = ex_data.files_train, set_ds= ex_data.train_ds, radius = RADIUS)
+  File "explore_data2.py", line 761, in writeTFRewcordsEpoch
+    corr2d_batch, target_disparity_batch, gt_ds_batch = ex_data.prepareBatchData(ml_list, seed_index, min_choices=None, max_files = None, ml_num = None, set_ds = set_ds, radius = radius)
+  File "explore_data2.py", line 556, in prepareBatchData
+    flist,tiles = self.augmentBatchFileIndices(seed_index, min_choices, max_files, set_ds)
+  File "explore_data2.py", line 494, in augmentBatchFileIndices
+    tile = np.random.choice(np.concatenate(tl))
+ValueError: need at least one array to concatenate
+            """
+#            print (nt, tile, tile//self.num_tiles, tile % self.num_tiles)
+            if not type (tile) is np.int64:
+                print("tile=",tile)
+            file_tiles_sparse[tile//self.num_tiles].append(tile % self.num_tiles)
+        file_tiles = []
+        for findx in flist:
+            file_tiles.append(np.sort(np.array(file_tiles_sparse[findx],dtype=int))) 
+        return flist, file_tiles # file indices, list if tile indices for each file   
+            
+            
+               
+                
+    def getMLList(self, ml_subdir, flist):
+        ml_list = []
+        for fn in flist:
+#            ml_patt = os.path.join(os.path.dirname(fn), ml_subdir, ExploreData.ML_PATTERN)
+##            if isinstance(ml_subdir,list)    
+            ml_patt = os.path.join(os.path.dirname(fn), ml_subdir, self.ml_pattern)
+            ml_list.append(glob.glob(ml_patt))
+##        self.ml_list = ml_list
+        return ml_list
+            
+    def getBatchData(
+            self,
+            flist,
+##            tiles,
+            ml_list,
+            ml_num = None ): # 0 - use all ml files for the scene, >0 select random number
+        if ml_num is None:
+            ml_num = self.files_per_scene
+        ml_all_files = []
+        for findx in flist:
+            mli =  list(range(len(ml_list[findx])))
+            if (ml_num > 0) and (ml_num < len(mli)):
+                mli_left = mli
+                mli = []
+                for _ in range(ml_num):
+                    ml = np.random.choice(mli_left)
+                    mli.append(ml)
+                    mli_left.remove(ml)
+            ml_files = []
+            for ml_index in mli:
+                ml_files.append(ml_list[findx][ml_index])
+            ml_all_files.append(ml_files)        
+                    
+        return ml_all_files
+    
+    def prepareBatchData(self,
+                         ml_list,
+                         seed_index,
+                         min_choices=None,
+                         max_files = None,
+                         ml_num = None,
+                         set_ds = None,
+                         radius = 0):
+        """
+        set_ds (from COMBO_DSI) is used to select tile clusters, exported values come from correlation files. 
+        target_disparity for correlation files may be different than data_ds - replaced dureing ImageJ plugin
+        export if main camera and the rig (GT) converged on different objects fro the same tile
+        """
+        if min_choices is None:
+            min_choices = self.min_batch_choices
+        if max_files is None:
+            max_files = self.max_batch_files
+        if ml_num is None:
+            ml_num = self.files_per_scene
+        if set_ds is None:
+            set_ds = self.train_ds
+        tiles_in_sample = (2 * radius + 1) * (2 * radius + 1)
+        height = set_ds.shape[1]
+        width =  set_ds.shape[2]
+        width_m1 = width-1
+        height_m1 = height-1
+#        set_ds = [self.train_ds, self.test_ds][test_set]            
+        corr_layers =  ['hor-pairs', 'vert-pairs','diagm-pair', 'diago-pair']
+        flist,tiles = self.augmentBatchFileIndices(seed_index, min_choices, max_files, set_ds)
+        
+#        ml_all_files = self.getBatchData(flist, tiles, ml_list,  ml_num) # 0 - use all ml files for the scene, >0 select random number
+        ml_all_files = self.getBatchData(
+            flist,
+##            tiles,
+            ml_list,
+            0) # ml_num) # 0 - use all ml files for the scene, >0 select random number
+        if self.debug_level > 1:
+            print ("==============",seed_index, flist)
+            for i, _ in enumerate(flist):
+                print(i,"\n".join(ml_all_files[i])) 
+                print(tiles[i]) 
+        total_tiles = 0
+        for i, t in enumerate(tiles):
+##          total_tiles += len(t)*len(ml_all_files[i]) # tiles per scene * offset files per scene
+            total_tiles += len(t) # tiles per scene * offset files per scene
+        if self.debug_level > 1:
+            print("Tiles in the batch=",total_tiles)
+        corr2d_batch = None # np.empty((total_tiles, len(corr_layers),81))
+        gt_ds_batch =            np.empty((total_tiles * tiles_in_sample, 2), dtype=float) 
+        target_disparity_batch = np.empty((total_tiles * tiles_in_sample, ),  dtype=float) 
+        start_tile = 0
+        for nscene, scene_files in enumerate(ml_all_files):
+            '''
+            Create tiles list including neighbors
+            '''
+            full_tiles = np.empty([len(tiles[nscene]) * tiles_in_sample], dtype = int)
+            indx = 0;
+            for i, nt in enumerate(tiles[nscene]):
+                ty = nt // width
+                tx = nt % width
+                for dy in range (-radius, radius+1):
+                    y = np.clip(ty+dy,0,height_m1)
+                    for dx in range (-radius, radius+1):
+                        x = np.clip(tx+dx,0,width_m1)
+                        full_tiles[indx] = y * width + x
+                        indx += 1
+            """
+            Assign tiles to several correlation files
+            """
+            file_tiles = []
+            file_indices = []
+            for _ in scene_files:
+                file_tiles.append([])
+            num_scene_files = len(scene_files)    
+            for t in full_tiles:
+                fi = np.random.randint(0, num_scene_files) #error here - probably wrong ml file pattern (no files matched)
+                file_tiles[fi].append(t)
+                file_indices.append(fi)
+            corr2d_list =           []
+            target_disparity_list = []
+            gt_ds_list =            []
+            for fi, path in enumerate (scene_files):
+                img = ijt.imagej_tiff(path, corr_layers, tile_list=file_tiles[fi])
+                corr2d_list.append          (img.corr2d)
+                target_disparity_list.append(img.target_disparity)
+                gt_ds_list.append           (img.gt_ds)
+            img_indices = [0] * len(scene_files)
+            for i, fi in enumerate(file_indices):
+                ti = img_indices[fi]
+                img_indices[fi] += 1
+                if corr2d_batch is None:
+                    corr2d_batch = np.empty((total_tiles * tiles_in_sample, len(corr_layers), corr2d_list[fi].shape[-1]))
+                gt_ds_batch            [start_tile] = gt_ds_list[fi][ti]
+                target_disparity_batch [start_tile] = target_disparity_list[fi][ti]
+                corr2d_batch           [start_tile] = corr2d_list[fi][ti]
+                start_tile +=  1
+            """
+             Sometimes get bad tile in ML file that was not bad in COMBO-DSI
+             Need to recover
+             np.argwhere(np.isnan(target_disparity_batch))                 
+            """
+        bad_tiles = np.argwhere(np.isnan(target_disparity_batch))
+        if (len(bad_tiles)>0):
+            print ("*** Got %d bad tiles in a batch, no code to replace :-("%(len(bad_tiles)))
+            # for now - just repeat some good tile
+            """
+            for ibt in bad_tiles:
+                while np.isnan(target_disparity_batch[ibt]):
+                    irt = np.random.randint(0,total_tiles)
+                    if not np.isnan(target_disparity_batch[irt]):
+                        target_disparity_batch[ibt] = target_disparity_batch[irt]
+                        corr2d_batch[ibt] = corr2d_batch[irt]
+                        gt_ds_batch[ibt] = gt_ds_batch[irt]
+                        break
+            print (" done replacing")
+            """
+        self.corr2d_batch =           corr2d_batch
+        self.target_disparity_batch = target_disparity_batch
+        self.gt_ds_batch =            gt_ds_batch
+        return corr2d_batch, target_disparity_batch, gt_ds_batch
+
+    def writeTFRewcordsEpoch(self, tfr_filename, ml_list, files_list = None, set_ds= None, radius = 0, num_scenes = None): # test_set=False):
+#        train_filename = 'train.tfrecords'  # address to save the TFRecords file
+        # open the TFRecords file
+        if not  '.tfrecords' in tfr_filename:
+            tfr_filename += '.tfrecords'
+
+        tfr_filename=tfr_filename.replace(' ','_')
+        if files_list is None:
+            files_list = self.files_train
+            
+        if set_ds is None:
+            set_ds = self.train_ds
+        try:
+            os.makedirs(os.path.dirname(tfr_filename))
+            print("Created directory "+os.path.dirname(tfr_filename))
+        except:
+            print("Directory "+os.path.dirname(tfr_filename)+" already exists, using it")
+            pass
+        #skip writing if file exists - it will be possible to continue or run several instances
+        if os.path.exists(tfr_filename):
+            print(tfr_filename+" already exists, skipping generation. Please remove and re-run this program if you want to regenerate the file")
+            return     
+        writer = tf.python_io.TFRecordWriter(tfr_filename)
+#$        files_list = [self.files_train, self.files_test][test_set]
+        if num_scenes is None:
+            num_scenes = len(files_list)
+        if len(files_list) <= num_scenes:    
+            seed_list = np.arange(num_scenes) % len(files_list)
+            np.random.shuffle(seed_list)
+        else:
+            seed_list = np.arange(len(files_list))
+            np.random.shuffle(seed_list)
+            seed_list = seed_list[:num_scenes]
+#        seed_list = np.arange(len(files_list))
+        np.random.shuffle(seed_list)
+        cluster_size = (2 * radius + 1) * (2 * radius + 1)
+        for nscene, seed_index in enumerate(seed_list):
+            corr2d_batch, target_disparity_batch, gt_ds_batch = ex_data.prepareBatchData(
+                ml_list,
+                seed_index,
+                min_choices=None,
+                max_files = None,
+                ml_num = None,
+                set_ds = set_ds,
+                radius = radius)
+            #shuffles tiles in a batch
+#            tiles_in_batch = len(target_disparity_batch)
+            tiles_in_batch =    corr2d_batch.shape[0]
+            clusters_in_batch = tiles_in_batch // cluster_size
+#            permut = np.random.permutation(tiles_in_batch)
+            permut = np.random.permutation(clusters_in_batch)
+            corr2d_clusters =           corr2d_batch.          reshape((clusters_in_batch,-1)) 
+            target_disparity_clusters = target_disparity_batch.reshape((clusters_in_batch,-1)) 
+            gt_ds_clusters =            gt_ds_batch.           reshape((clusters_in_batch,-1)) 
+                
+#            corr2d_batch_shuffled =           corr2d_batch[permut].reshape((corr2d_batch.shape[0], corr2d_batch.shape[1]*corr2d_batch.shape[2]))
+#            target_disparity_batch_shuffled = target_disparity_batch[permut].reshape((tiles_in_batch,1))
+#            gt_ds_batch_shuffled =            gt_ds_batch[permut]
+
+            corr2d_batch_shuffled =           corr2d_clusters[permut].          reshape((tiles_in_batch, -1))
+            target_disparity_batch_shuffled = target_disparity_clusters[permut].reshape((tiles_in_batch, -1))
+            gt_ds_batch_shuffled =            gt_ds_clusters[permut].           reshape((tiles_in_batch, -1))
+            
+            if nscene == 0:
+                dtype_feature_corr2d =   _dtype_feature(corr2d_batch_shuffled)
+                dtype_target_disparity = _dtype_feature(target_disparity_batch_shuffled)
+                dtype_feature_gt_ds =    _dtype_feature(gt_ds_batch_shuffled)
+
+            for i in range(tiles_in_batch):
+                x = corr2d_batch_shuffled[i].astype(np.float32)
+                y = target_disparity_batch_shuffled[i].astype(np.float32)
+                z = gt_ds_batch_shuffled[i].astype(np.float32)
+                d_feature = {'corr2d':          dtype_feature_corr2d(x),
+                             'target_disparity':dtype_target_disparity(y),
+                             'gt_ds':           dtype_feature_gt_ds(z)}
+                example = tf.train.Example(features=tf.train.Features(feature=d_feature))
+                writer.write(example.SerializeToString())
+            if (self.debug_level > 0):
+                print_time("Scene %d (%d) of %d -> %s"%(nscene, seed_index, len(seed_list), tfr_filename))        
+        writer.close()
+        sys.stdout.flush()        
+
+
+
+    
+    def showVariance(self,
+            rds_list,           # list of disparity/strength files, suchas training, testing 
+            disp_var_list,      # list of disparity variance files. Same shape(but last dim) as rds_list
+            num_neibs_list,    # list of number of tile neibs files. Same shape(but last dim) as rds_list
+            variance_min =       0.0,
+            variance_max =       1.5,
+            neibs_min =          9,
+            #Same parameters as for the histogram 
+#            disparity_bins =    1000,
+#            strength_bins =      100,
+#            disparity_min_drop =  -0.1,
+#            disparity_min_clip =  -0.1,
+#            disparity_max_drop = 100.0,
+#            disparity_max_clip = 100.0,
+#            strength_min_drop =    0.1,
+#            strength_min_clip =    0.1,
+#            strength_max_drop =    1.0,
+#            strength_max_clip =    0.9,
+            normalize =           False): # True):
+        good_tiles_list=[]
+        for nf, combo_rds in enumerate(rds_list):
+            disp_var =  disp_var_list[nf]
+            num_neibs = num_neibs_list[nf]
+            good_tiles = np.empty((combo_rds.shape[0], combo_rds.shape[1],combo_rds.shape[2]), dtype=bool)
+            for ids in range (combo_rds.shape[0]): #iterate over all scenes ds[2][rows][cols]
+                ds = combo_rds[ids]
+                disparity = ds[...,0]
+                strength =  ds[...,1]
+                variance =  disp_var[ids]
+                neibs =     num_neibs[ids]
+                good_tiles[ids] =  disparity >= self.disparity_min_drop
+                good_tiles[ids] &= disparity <= self.disparity_max_drop
+                good_tiles[ids] &= strength >=  self.strength_min_drop
+                good_tiles[ids] &= strength <=  self.strength_max_drop
+                good_tiles[ids] &= neibs    >=  neibs_min
+                good_tiles[ids] &= variance >=  variance_min
+                good_tiles[ids] &= variance <   variance_max
+                disparity = np.nan_to_num(disparity, copy = False) # to be able to multiply by 0.0 in mask | copy=False, then out=disparity all done in-place
+                strength =  np.nan_to_num(strength, copy = False)  # likely should never happen
+#                np.clip(disparity, self.disparity_min_clip, self.disparity_max_clip, out = disparity)
+#                np.clip(strength, self.strength_min_clip, self.strength_max_clip, out = strength)
+            good_tiles_list.append(good_tiles)
+        combo_rds = np.concatenate(rds_list)
+#        hist, xedges, yedges = np.histogram2d( # xedges, yedges - just for debugging
+        hist, _, _ = np.histogram2d( # xedges, yedges - just for debugging
+            x =      combo_rds[...,1].flatten(),
+            y =      combo_rds[...,0].flatten(),
+            bins=    (self.strength_bins, self.disparity_bins),
+            range=   ((self.strength_min_clip,self.strength_max_clip),(self.disparity_min_clip,self.disparity_max_clip)),
+            normed=  normalize,
+            weights= np.concatenate(good_tiles_list).flatten())
+        
+        mytitle = "Disparity_Strength variance histogram"
+        fig = plt.figure()
+        fig.canvas.set_window_title(mytitle)
+        fig.suptitle("Min variance = %f, max variance = %f, min neibs = %d"%(variance_min, variance_max, neibs_min))
+#        plt.imshow(hist, vmin=0, vmax=.1 * hist.max())#,vmin=-6,vmax=-2) # , vmin=0, vmax=.01)
+        plt.imshow(hist, vmin=0.0, vmax=300.0)#,vmin=-6,vmax=-2) # , vmin=0, vmax=.01)
+        plt.colorbar(orientation='horizontal') # location='bottom')
+        
+#        for i, combo_rds in enumerate(rds_list):
+#            for ids in range (combo_rds.shape[0]): #iterate over all scenes ds[2][rows][cols]
+#                combo_rds[ids][...,1]*= good_tiles_list[i][ids]
+#        return hist, xedges, yedges
+
+#MAIN
+if __name__ == "__main__":
+    LATEST_VERSION_ONLY = True
+    try:
+        topdir_train = sys.argv[1]
+    except IndexError:
+#        topdir_train = "/mnt/dde6f983-d149-435e-b4a2-88749245cc6c/home/eyesis/x3d_data/data_sets/train"#test" #all/"
+##        topdir_train = "/data_ssd/data_sets/train_mlr32_18d"
+##        topdir_train = '/data_ssd/data_sets/test_only'# ''
+        topdir_train = '/data_ssd/data_sets/train_set2'# ''
+#        tf_data_5x5_main_10_heur
+      
+    try:
+        topdir_test = sys.argv[2]
+    except IndexError:
+#        topdir_test = "/mnt/dde6f983-d149-435e-b4a2-88749245cc6c/home/eyesis/x3d_data/data_sets/test"#test" #all/"
+#        topdir_test =  "/data_ssd/data_sets/test_mlr32_18d"
+##        topdir_test = '/data_ssd/data_sets/test_only'
+        topdir_test = '/data_ssd/data_sets/test_set2'
+      
+    try:
+        pathTFR =     sys.argv[3]
+    except IndexError:
+#        pathTFR = "/mnt/dde6f983-d149-435e-b4a2-88749245cc6c/home/eyesis/x3d_data/data_sets/tf_data_3x3b" #no trailing "/"
+#        pathTFR = "/home/eyesis/x3d_data/data_sets/tf_data_5x5" #no trailing "/"
+        pathTFR = "/data_ssd/data_sets/tf_data_5x5_main_13_heur"
+##        pathTFR = "/data_ssd/data_sets/tf_data_5x5_main_11_rnd"
+##        pathTFR = "/data_ssd/data_sets/tf_data_5x5_main_12_rigrnd"
+
+    try:
+        ml_subdir =   sys.argv[4]
+    except IndexError:
+#      ml_subdir =   "ml"
+#      ml_subdir =   "mlr32_18a"
+#        ml_subdir =   "mlr32_18d"
+#        ml_subdir =   "{ml32,mlr32_18d}"
+        ml_subdir =   "ml*"
+    try:
+        ml_pattern =   sys.argv[5]
+    except IndexError:
+        ml_pattern =   "*-ML_DATA*MAIN.tiff" ##        pathTFR = "/data_ssd/data_sets/tf_data_5x5_main_10_heur"
+##        ml_pattern =   "*-ML_DATA*MAIN_RND*.tiff" ##        pathTFR = "/data_ssd/data_sets/tf_data_5x5_main_11_rnd"
+##        ml_pattern =  "*-ML_DATA*RIG_RND*.tiff" ##        pathTFR = "/data_ssd/data_sets/tf_data_5x5_main_12_rigrnd"
+
+##   ML_PATTERN = "*-ML_DATA*RIG_RND*.tiff"
+#1527182801_296892-ML_DATA-32B-O-FZ0.05-MAIN_RND2.00000.tiff
+      
+#  pathTFR = "/mnt/dde6f983-d149-435e-b4a2-88749245cc6c/home/eyesis/x3d_data/data_sets/tf_data_3x3b" #no trailing "/"
+#  test_corr = '/home/eyesis/x3d_data/models/var_main/www/html/x3domlet/models/all-clean/overlook/1527257933_150165/v04/mlr32_18a/1527257933_150165-ML_DATA-32B-O-FZ0.05-MAIN.tiff' # overlook
+#  test_corr = '/home/eyesis/x3d_data/data_sets/test_mlr32_18a/1527256816_150165/v02/mlr32_18a/1527256816_150165-ML_DATA-32B-O-FZ0.05-MAIN.tiff' # State Street
+#  test_corr = '/home/eyesis/x3d_data/models/dsi_combo_and_ml_all/state_street/1527256858_150165/v01/mlr32_18a/1527256858_150165-ML_DATA-32B-O-FZ0.05-MAIN.tiff'   # State Street
+    """
+/data_ssd/models/plane_1527182801/1527182805_696892/v02/mlr32_18d/1527182805_696892-ML_DATA-32B-O-FZ0.05-RIG_RND2.00000.tiff
+/data_ssd/models/plane_1527182801/1527182805_696892/v02/mlr32_18d/1527182805_696892-ML_DATA-32B-O-FZ0.05-MAIN_RND2.00000.tiff
+/data_ssd/models/plane_1527182801/1527182805_696892/v02/mlr32_18d/1527182805_696892-ML_DATA-32B-O-FZ0.05-MAIN.tiff
+    
+    test_corrs = [
+                '/home/eyesis/x3d_data/data_sets/test_mlr32_18a/1527257933_150165/v04/mlr32_18a/1527257933_150165-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # overlook
+                '/home/eyesis/x3d_data/data_sets/test_mlr32_18a/1527256816_150165/v02/mlr32_18a/1527256816_150165-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # State Street
+                '/home/eyesis/x3d_data/data_sets/test_mlr32_18a/1527256858_150165/v01/mlr32_18a/1527256858_150165-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # State Street
+                '/home/eyesis/x3d_data/data_sets/test_mlr32_18a/1527182802_096892/v02/mlr32_18a/1527182802_096892-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # near plane"
+                '/home/eyesis/x3d_data/data_sets/test_mlr32_18a/1527182805_096892/v02/mlr32_18a/1527182805_096892-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # medium plane"
+                '/home/eyesis/x3d_data/data_sets/test_mlr32_18a/1527182810_096892/v02/mlr32_18a/1527182810_096892-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # far plane
+                ]
+    test_corrs = [
+                '/home/eyesis/x3d_data/data_sets/test_mlr32_18a/1527257933_150165/v04/mlr32_18c/1527257933_150165-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # overlook
+                '/home/eyesis/x3d_data/data_sets/test_mlr32_18a/1527256816_150165/v02/mlr32_18c/1527256816_150165-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # State Street
+                '/home/eyesis/x3d_data/data_sets/test_mlr32_18a/1527256858_150165/v01/mlr32_18c/1527256858_150165-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # State Street
+                '/home/eyesis/x3d_data/data_sets/test_mlr32_18a/1527182802_096892/v02/mlr32_18c/1527182802_096892-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # near plane"
+                '/home/eyesis/x3d_data/data_sets/test_mlr32_18a/1527182805_096892/v02/mlr32_18c/1527182805_096892-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # medium plane"
+                '/home/eyesis/x3d_data/data_sets/test_mlr32_18a/1527182810_096892/v02/mlr32_18c/1527182810_096892-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # far plane
+                ]
+    test_corrs = [
+                '/data_ssd/data_sets/test_mlr32_18d/1527257933_150165/v04/mlr32_18d/1527257933_150165-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # overlook
+                '/data_ssd/data_sets/test_mlr32_18d/1527256816_150165/v02/mlr32_18d/1527256816_150165-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # State Street
+                '/data_ssd/data_sets/test_mlr32_18d/1527256858_150165/v01/mlr32_18d/1527256858_150165-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # State Street
+                '/data_ssd/data_sets/test_mlr32_18d/1527182802_096892/v02/mlr32_18d/1527182802_096892-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # near plane"
+                '/data_ssd/data_sets/test_mlr32_18d/1527182805_096892/v02/mlr32_18d/1527182805_096892-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # medium plane"
+                '/data_ssd/data_sets/test_mlr32_18d/1527182810_096892/v02/mlr32_18d/1527182810_096892-ML_DATA-32B-O-FZ0.05-MAIN.tiff', # far plane
+                ]
+    test_corrs = [
+                '/data_ssd/data_sets/test_mlr32_18d/1527257933_150165/v04/mlr32_18d/1527257933_150165-ML_DATA-32B-O-FZ0.05-MAIN_RND2.00000.tiff', # overlook
+                '/data_ssd/data_sets/test_mlr32_18d/1527256816_150165/v02/mlr32_18d/1527256816_150165-ML_DATA-32B-O-FZ0.05-MAIN_RND2.00000.tiff', # State Street
+                '/data_ssd/data_sets/test_mlr32_18d/1527256858_150165/v01/mlr32_18d/1527256858_150165-ML_DATA-32B-O-FZ0.05-MAIN_RND2.00000.tiff', # State Street
+                '/data_ssd/data_sets/test_mlr32_18d/1527182802_096892/v02/mlr32_18d/1527182802_096892-ML_DATA-32B-O-FZ0.05-MAIN_RND2.00000.tiff', # near plane"
+                '/data_ssd/data_sets/test_mlr32_18d/1527182805_096892/v02/mlr32_18d/1527182805_096892-ML_DATA-32B-O-FZ0.05-MAIN_RND2.00000.tiff', # medium plane"
+                '/data_ssd/data_sets/test_mlr32_18d/1527182810_096892/v02/mlr32_18d/1527182810_096892-ML_DATA-32B-O-FZ0.05-MAIN_RND2.00000.tiff', # far plane
+                ]
+    test_corrs = [
+                '/data_ssd/data_sets/test_mlr32_18d/1527257933_150165/v04/mlr32_18d/1527257933_150165-ML_DATA-32B-O-FZ0.05-RIG_RND2.00000.tiff', # overlook
+                '/data_ssd/data_sets/test_mlr32_18d/1527256816_150165/v02/mlr32_18d/1527256816_150165-ML_DATA-32B-O-FZ0.05-RIG_RND2.00000.tiff', # State Street
+                '/data_ssd/data_sets/test_mlr32_18d/1527256858_150165/v01/mlr32_18d/1527256858_150165-ML_DATA-32B-O-FZ0.05-RIG_RND2.00000.tiff', # State Street
+                '/data_ssd/data_sets/test_mlr32_18d/1527182802_096892/v02/mlr32_18d/1527182802_096892-ML_DATA-32B-O-FZ0.05-RIG_RND2.00000.tiff', # near plane"
+                '/data_ssd/data_sets/test_mlr32_18d/1527182805_096892/v02/mlr32_18d/1527182805_096892-ML_DATA-32B-O-FZ0.05-RIG_RND2.00000.tiff', # medium plane"
+                '/data_ssd/data_sets/test_mlr32_18d/1527182810_096892/v02/mlr32_18d/1527182810_096892-ML_DATA-32B-O-FZ0.05-RIG_RND2.00000.tiff', # far plane
+                ]
+    """
+    # These images are made with large random offset
+    '''
+    test_corrs = [
+                '/data_ssd/data_sets/test_only/1527258897_071435/v02/ml32/1527258897_071435-ML_DATA-32B-O-FZ0.05-MAIN.tiff',
+                '/data_ssd/data_sets/test_only/1527257894_750165/v02/ml32/1527257894_750165-ML_DATA-32B-O-FZ0.05-MAIN.tiff',
+                '/data_ssd/data_sets/test_only/1527257406_950165/v02/ml32/1527257406_950165-ML_DATA-32B-O-FZ0.05-MAIN.tiff',
+                '/data_ssd/data_sets/test_only/1527257757_950165/v02/ml32/1527257757_950165-ML_DATA-32B-O-FZ0.05-MAIN.tiff',
+                '/data_ssd/data_sets/test_only/1527257370_950165/v02/ml32/1527257370_950165-ML_DATA-32B-O-FZ0.05-MAIN.tiff',
+                '/data_ssd/data_sets/test_only/1527257235_950165/v02/ml32/1527257235_950165-ML_DATA-32B-O-FZ0.05-MAIN.tiff',
+                '/data_ssd/data_sets/test_only/1527257235_350165/v02/ml32/1527257235_350165-ML_DATA-32B-O-FZ0.05-MAIN.tiff',
+                '/data_ssd/data_sets/test_only/1527259003_271435/v02/ml32/1527259003_271435-ML_DATA-32B-O-FZ0.05-MAIN.tiff',
+                '/data_ssd/data_sets/test_only/1527257787_950165/v02/ml32/1527257787_950165-ML_DATA-32B-O-FZ0.05-MAIN.tiff',
+                '/data_ssd/data_sets/test_only/1527257235_150165/v02/ml32/1527257235_150165-ML_DATA-32B-O-FZ0.05-MAIN.tiff',
+                '/data_ssd/data_sets/test_only/1527257235_750165/v02/ml32/1527257235_750165-ML_DATA-32B-O-FZ0.05-MAIN.tiff',
+                '/data_ssd/data_sets/test_only/1527258936_671435/v02/ml32/1527258936_671435-ML_DATA-32B-O-FZ0.05-MAIN.tiff',
+                '/data_ssd/data_sets/test_only/1527257244_350165/v02/ml32/1527257244_350165-ML_DATA-32B-O-FZ0.05-MAIN.tiff',
+                '/data_ssd/data_sets/test_only/1527257235_550165/v02/ml32/1527257235_550165-ML_DATA-32B-O-FZ0.05-MAIN.tiff',
+                ]
+    '''
+    test_corrs = []
+#1527257933_150165-ML_DATA-32B-O-FZ0.05-MAIN-RND2.00000.tiff  
+#/home/eyesis/x3d_data/data_sets/test_mlr32_18a/1527257933_150165/v04/mlr32_18c/1527257933_150165-ML_DATA-32B-O-FZ0.05-MAIN.tiff
+
+
+    #Parameters to generate neighbors data. Set radius to 0 to generate single-tile
+    TEST_SAME_LENGTH_AS_TRAIN = False # True # make test to have same number of entries as train ones
+    FIXED_TEST_LENGTH = None # put number of test scenes to output (used when making test only from few or single test file     
+    RADIUS = 2 # 5x5
+    MIN_NEIBS = (2 * RADIUS + 1) * (2 * RADIUS + 1) # All tiles valid == 9
+    VARIANCE_THRESHOLD =       0.4 # 1.5
+    VARIANCE_SCALE_DISPARITY = 5.0 #Scale variance if average is above this
+    NUM_TRAIN_SETS =           32 # 8
+    if not topdir_train:
+        NUM_TRAIN_SETS = 0
+    if RADIUS == 0:
+        BATCH_DISP_BINS = 50 # 1000 * 1
+        BATCH_STR_BINS =  20 # 10
+    elif RADIUS == 1:
+        BATCH_DISP_BINS = 15 # 120 * 9
+        BATCH_STR_BINS =  8
+    else: # RADIUS = 2
+        BATCH_DISP_BINS = 10 # 40 * 25
+        BATCH_STR_BINS =  4
+
+    train_filenameTFR = pathTFR+"/train"        
+    test_filenameTFR =  pathTFR+"/test"
+#        disp_bins = 20,
+#      str_bins=10)
+
+#  corr2d, target_disparity, gt_ds = readTFRewcordsEpoch(train_filenameTFR)
+#  print_time("Read %d tiles"%(corr2d.shape[0]))
+#  exit (0)
+    ex_data = ExploreData(
+               topdir_train =         topdir_train,
+               topdir_test =          topdir_test,
+               ml_pattern =           ml_pattern,
+               latest_version_only =  LATEST_VERSION_ONLY,
+               debug_level =          1, #3, ##0, #3,
+               disparity_bins =     200, #1000,
+               strength_bins =      100,
+               disparity_min_drop =  -0.1,
+               disparity_min_clip =  -0.1,
+               disparity_max_drop = 20.0, #100.0,
+               disparity_max_clip = 20.0, #100.0,
+               strength_min_drop =    0.1,
+               strength_min_clip =    0.1,
+               strength_max_drop =    1.0,
+               strength_max_clip =    0.9,
+               hist_sigma =           2.0,  # Blur log histogram
+               hist_cutoff=           0.001) #  of maximal  
+  
+    mytitle = "Disparity_Strength histogram"
+    fig = plt.figure()
+    fig.canvas.set_window_title(mytitle)
+    fig.suptitle(mytitle)
+    #  plt.imshow(lhist,vmin=-6,vmax=-2) # , vmin=0, vmax=.01)
+    plt.imshow(ex_data.blurred_hist, vmin=0, vmax=.1 * ex_data.blurred_hist.max())#,vmin=-6,vmax=-2) # , vmin=0, vmax=.01)
+    plt.colorbar(orientation='horizontal') # location='bottom')
+    hist_to_batch = ex_data.assignBatchBins(
+        disp_bins = BATCH_DISP_BINS,
+        str_bins =  BATCH_STR_BINS)
+    bb_display = hist_to_batch.copy()
+    bb_display = ( 1+ (bb_display % 2) + 2 * ((bb_display % 20)//10)) * (hist_to_batch > 0) #).astype(float) 
+    fig2 = plt.figure()
+    fig2.canvas.set_window_title("Batch indices")
+    fig2.suptitle("Batch index for each disparity/strength cell")
+    plt.imshow(bb_display) #, vmin=0, vmax=.1 * ex_data.blurred_hist.max())#,vmin=-6,vmax=-2) # , vmin=0, vmax=.01)
+    
+    """ prepare test dataset """
+    for test_corr in test_corrs:
+        scene = os.path.basename(test_corr)[:17]
+        scene_version= os.path.basename(os.path.dirname(os.path.dirname(test_corr)))
+        fname =scene+'-'+scene_version 
+        img_filenameTFR = os.path.join(pathTFR,'img',fname)
+        print_time("Saving test image %s as tiles..."%(img_filenameTFR),end = " ")        
+        writeTFRewcordsImageTiles(test_corr, img_filenameTFR)
+        print_time("Done")        
+        pass
+
+
+    if (RADIUS > 0):
+        disp_var_test,  num_neibs_test =  ex_data.exploreNeibs(ex_data.test_ds, RADIUS, VARIANCE_SCALE_DISPARITY)
+        disp_var_train, num_neibs_train = ex_data.exploreNeibs(ex_data.train_ds, RADIUS, VARIANCE_SCALE_DISPARITY)
+      
+        # show varinace histogram
+#      for var_thresh in [0.1, 1.0, 1.5, 2.0, 5.0]:
+        for var_thresh in [1.5]:
+            ex_data.showVariance(
+                rds_list =       [ex_data.train_ds, ex_data.test_ds],           # list of disparity/strength files, suchas training, testing 
+                disp_var_list =  [disp_var_train,  disp_var_test],      # list of disparity variance files. Same shape(but last dim) as rds_list
+                num_neibs_list = [num_neibs_train, num_neibs_test],    # list of number of tile neibs files. Same shape(but last dim) as rds_list
+                variance_min =       0.0,
+                variance_max =       var_thresh,
+                neibs_min =          MIN_NEIBS)
+            ex_data.showVariance(
+                rds_list =       [ex_data.train_ds, ex_data.test_ds],           # list of disparity/strength files, suchas training, testing 
+                disp_var_list =  [disp_var_train,  disp_var_test],      # list of disparity variance files. Same shape(but last dim) as rds_list
+                num_neibs_list = [num_neibs_train, num_neibs_test],    # list of number of tile neibs files. Same shape(but last dim) as rds_list
+                variance_min =       var_thresh,
+                variance_max =       1000.0,
+                neibs_min =          MIN_NEIBS)
+            pass
+        pass
+      
+    else:
+        disp_var_test,  num_neibs_test =  None, None    
+        disp_var_train, num_neibs_train = None, None    
+  
+    ml_list_train=ex_data.getMLList(ml_subdir, ex_data.files_train)
+    ml_list_test= ex_data.getMLList(ml_subdir, ex_data.files_test)
+
+    if FIXED_TEST_LENGTH is None:
+        num_test_scenes = len([ex_data.files_test, ex_data.files_train][TEST_SAME_LENGTH_AS_TRAIN])    
+    else:
+        num_test_scenes = FIXED_TEST_LENGTH 
+        
+          
+
+    if RADIUS == 0 :
+        list_of_file_lists_train, num_batch_tiles_train = ex_data.makeBatchLists( # results are also saved to self.*
+            data_ds =      ex_data.train_ds,
+            disp_var =     disp_var_train,      # difference between maximal and minimal disparity for each scene, each tile
+            disp_neibs =   num_neibs_train,     # number of valid tiles around each center tile (for 3x3 (radius = 1) - macximal is 9  
+            min_var =      0.0,                # Minimal tile variance to include
+            max_var =      VARIANCE_THRESHOLD, # Maximal tile variance to include
+            scale_disp =   VARIANCE_SCALE_DISPARITY,
+            min_neibs =    MIN_NEIBS)          # Minimal number of valid tiles to include
+        pass
+#  ex_data.makeBatchLists(data_ds = ex_data.train_ds)
+        for train_var in range (NUM_TRAIN_SETS):
+            fpath =  train_filenameTFR+("%03d"%(train_var,))
+            ex_data.writeTFRewcordsEpoch(fpath, ml_list = ml_list_train, files_list = ex_data.files_train, set_ds= ex_data.train_ds)
+          
+        list_of_file_lists_test, num_batch_tiles_test = ex_data.makeBatchLists( # results are also saved to self.*
+            data_ds =      ex_data.test_ds,
+            disp_var =     disp_var_test,      # difference between maximal and minimal disparity for each scene, each tile
+            disp_neibs =   num_neibs_test,     # number of valid tiles around each center tile (for 3x3 (radius = 1) - macximal is 9  
+            min_var =      0.0,                # Minimal tile variance to include
+            max_var =      VARIANCE_THRESHOLD, # Maximal tile variance to include
+##            scale_disp =   VARIANCE_SCALE_DISPARITY,
+            min_neibs =    MIN_NEIBS)          # Minimal number of valid tiles to include
+        fpath =  test_filenameTFR # +("-%03d"%(train_var,))
+        ex_data.writeTFRewcordsEpoch(fpath, ml_list = ml_list_test, files_list = ex_data.files_test, set_ds= ex_data.test_ds,  num_scenes =  num_test_scenes)
+        pass
+    else: # RADIUS > 0
+    # test
+        list_of_file_lists_test, num_batch_tiles_test = ex_data.makeBatchLists( # results are also saved to self.*
+        data_ds =      ex_data.test_ds,
+        disp_var =     disp_var_test,      # difference between maximal and minimal disparity for each scene, each tile
+        disp_neibs =   num_neibs_test,     # number of valid tiles around each center tile (for 3x3 (radius = 1) - macximal is 9  
+        min_var =      0.0,                # Minimal tile variance to include
+        max_var =      VARIANCE_THRESHOLD, # Maximal tile variance to include
+##                scale_disp =   VARIANCE_SCALE_DISPARITY,
+        min_neibs =    MIN_NEIBS)          # Minimal number of valid tiles to include
+        num_le_test =  num_batch_tiles_test.sum()
+        print("Number of <= %f disparity variance tiles: %d (est)"%(VARIANCE_THRESHOLD, num_le_test))
+
+        fpath =  test_filenameTFR +("TEST_R%d_LE%4.1f"%(RADIUS,VARIANCE_THRESHOLD))
+        ex_data.writeTFRewcordsEpoch(fpath, ml_list = ml_list_test, files_list = ex_data.files_test, set_ds= ex_data.test_ds, radius = RADIUS, num_scenes =  num_test_scenes)
+
+        list_of_file_lists_test, num_batch_tiles_test = ex_data.makeBatchLists( # results are also saved to self.*
+        data_ds =      ex_data.test_ds,
+        disp_var =     disp_var_test,      # difference between maximal and minimal disparity for each scene, each tile
+        disp_neibs =   num_neibs_test,     # number of valid tiles around each center tile (for 3x3 (radius = 1) - macximal is 9  
+        min_var =      VARIANCE_THRESHOLD, # Minimal tile variance to include
+        max_var =      1000.0,             # Maximal tile variance to include
+##                scale_disp =   VARIANCE_SCALE_DISPARITY,
+        min_neibs =    MIN_NEIBS)          # Minimal number of valid tiles to include
+        num_gt_test =  num_batch_tiles_test.sum()
+        high_fract_test = 1.0 * num_gt_test / (num_le_test + num_gt_test)
+        print("Number of > %f disparity variance tiles: %d, fraction = %f (test)"%(VARIANCE_THRESHOLD, num_gt_test, high_fract_test))
+        fpath =  test_filenameTFR +("TEST_R%d_GT%4.1f"%(RADIUS,VARIANCE_THRESHOLD))
+        ex_data.writeTFRewcordsEpoch(fpath, ml_list = ml_list_test, files_list = ex_data.files_test, set_ds= ex_data.test_ds, radius = RADIUS,  num_scenes =  num_test_scenes)
+
+        #fake
+        if NUM_TRAIN_SETS > 0:
+            list_of_file_lists_fake, num_batch_tiles_fake = ex_data.makeBatchLists( # results are also saved to self.*
+            data_ds =      ex_data.train_ds,
+            disp_var =     disp_var_train,      # difference between maximal and minimal disparity for each scene, each tile
+            disp_neibs =   num_neibs_train,     # number of valid tiles around each center tile (for 3x3 (radius = 1) - macximal is 9  
+            min_var =      0.0,                # Minimal tile variance to include
+            max_var =      VARIANCE_THRESHOLD, # Maximal tile variance to include
+    ##                scale_disp =   VARIANCE_SCALE_DISPARITY,
+            min_neibs =    MIN_NEIBS)          # Minimal number of valid tiles to include
+            num_le_fake = num_batch_tiles_fake.sum()
+            print("Number of <= %f disparity variance tiles: %d (test)"%(VARIANCE_THRESHOLD, num_le_fake))
+    
+            fpath =  test_filenameTFR +("FAKE_R%d_LE%4.1f"%(RADIUS,VARIANCE_THRESHOLD))
+            ex_data.writeTFRewcordsEpoch(fpath, ml_list = ml_list_train, files_list = ex_data.files_train, set_ds= ex_data.train_ds, radius = RADIUS, num_scenes =  num_test_scenes)
+    
+            list_of_file_lists_fake, num_batch_tiles_fake = ex_data.makeBatchLists( # results are also saved to self.*
+            data_ds =      ex_data.train_ds,
+            disp_var =     disp_var_train,      # difference between maximal and minimal disparity for each scene, each tile
+            disp_neibs =   num_neibs_train,     # number of valid tiles around each center tile (for 3x3 (radius = 1) - macximal is 9  
+            min_var =      VARIANCE_THRESHOLD, # Minimal tile variance to include
+            max_var =      1000.0,             # Maximal tile variance to include
+    ##                scale_disp =   VARIANCE_SCALE_DISPARITY,
+            min_neibs =    MIN_NEIBS)          # Minimal number of valid tiles to include
+            num_gt_fake =  num_batch_tiles_fake.sum()
+            high_fract_fake = 1.0 * num_gt_fake / (num_le_fake + num_gt_fake)
+            print("Number of > %f disparity variance tiles: %d, fraction = %f (test)"%(VARIANCE_THRESHOLD, num_gt_fake, high_fract_fake))
+            fpath =  test_filenameTFR +("FAKE_R%d_GT%4.1f"%(RADIUS,VARIANCE_THRESHOLD))
+            ex_data.writeTFRewcordsEpoch(fpath, ml_list = ml_list_train, files_list = ex_data.files_train, set_ds= ex_data.train_ds, radius = RADIUS,  num_scenes =  num_test_scenes)
+        
+        # train
+        for train_var in range (NUM_TRAIN_SETS): # Recalculate list for each file - slower, but will alternate lvar/hvar
+            list_of_file_lists_train, num_batch_tiles_train = ex_data.makeBatchLists( # results are also saved to self.*
+                data_ds =      ex_data.train_ds,
+                disp_var =     disp_var_train,      # difference between maximal and minimal disparity for each scene, each tile
+                disp_neibs =   num_neibs_train,     # number of valid tiles around each center tile (for 3x3 (radius = 1) - macximal is 9  
+                min_var =      0.0,                # Minimal tile variance to include
+                max_var =      VARIANCE_THRESHOLD, # Maximal tile variance to include
+##                scale_disp =   VARIANCE_SCALE_DISPARITY,
+                min_neibs =    MIN_NEIBS)          # Minimal number of valid tiles to include
+            num_le_train = num_batch_tiles_train.sum()
+            print("Number of <= %f disparity variance tiles: %d (train)"%(VARIANCE_THRESHOLD, num_le_train))
+#      for train_var in range (NUM_TRAIN_SETS):
+            fpath =  train_filenameTFR+("%03d_R%d_LE%4.1f"%(train_var,RADIUS,VARIANCE_THRESHOLD))
+            ex_data.writeTFRewcordsEpoch(fpath, ml_list = ml_list_train, files_list = ex_data.files_train, set_ds= ex_data.train_ds, radius = RADIUS)
+
+            list_of_file_lists_train, num_batch_tiles_train = ex_data.makeBatchLists( # results are also saved to self.*
+                data_ds =      ex_data.train_ds,
+                disp_var =     disp_var_train,      # difference between maximal and minimal disparity for each scene, each tile
+                disp_neibs =   num_neibs_train,     # number of valid tiles around each center tile (for 3x3 (radius = 1) - macximal is 9  
+                min_var =      VARIANCE_THRESHOLD,  # Minimal tile variance to include
+                max_var =      1000.0,              # Maximal tile variance to include
+##                scale_disp =   VARIANCE_SCALE_DISPARITY,
+                min_neibs =    MIN_NEIBS)          # Minimal number of valid tiles to include
+            num_gt_train = num_batch_tiles_train.sum()
+            high_fract_train = 1.0 * num_gt_train / (num_le_train + num_gt_train)
+            print("Number of > %f disparity variance tiles: %d, fraction = %f (train)"%(VARIANCE_THRESHOLD, num_gt_train, high_fract_train))
+#      for train_var in range (NUM_TRAIN_SETS):
+            fpath =  (train_filenameTFR+("%03d_R%d_GT%4.1f"%(train_var,RADIUS,VARIANCE_THRESHOLD)))
+            ex_data.writeTFRewcordsEpoch(fpath, ml_list = ml_list_train, files_list = ex_data.files_train, set_ds= ex_data.train_ds, radius = RADIUS)
+
+##            if train_var < 1: # make test files immediately after the train ones    
+    plt.show()
+    """              
+    scene = os.path.basename(test_corr)[:17]
+    scene_version= os.path.basename(os.path.dirname(os.path.dirname(test_corr)))
+    fname =scene+'-'+scene_version 
+    img_filenameTFR = os.path.join(pathTFR,'img',fname)
+    print_time("Saving test image %s as tiles..."%(img_filenameTFR),end = " ")        
+    writeTFRewcordsImageTiles(test_corr, img_filenameTFR)
+    print_time("Done")        
+    pass
+    """
+    pass
+    
--- a/imagej_tiffwriter.py
+++ b/imagej_tiffwriter.py
@@ -102,67 +102,67 @@ def imagej_metadata_tags(metadata, byteorder):
 #def save(path,images,force_stack=False,force_hyperstack=False):
 def save(path,images,labels=None,label_prefix="Label "):

-  '''
-    labels a list or None
-  '''
-
-  '''
+    '''
+        labels a list or None
+    '''
+    
+    '''
    Expecting:
    (h,w),
    (n,h,w) - just create a simple stack
-  '''
-
-  # Got images, analyze shape:
-  #   - possible formats (c == depth):
-  #     -- (t,z,h,w,c)
-  #     -- (t,h,w,c), t or z does not matter
-  #     -- (h,w,c)
-  #     -- (h,w)
-
-  # 0 or 1 images.shapes are not handled
-  #
-  # (h,w)
-  if len(images.shape)==2:
-    images = images[np.newaxis,...]
-
-  # now the shape length is 3
-  if len(images.shape)==3:
-    # tifffile treats shape[0] as channel, need to expand to get labels displayed
-    #images = images[images.shape[0],np.newaxis,images.shape[1],images.shape[2]]
-    images = np.reshape(images,(images.shape[0],1,images.shape[1],images.shape[2]))
-
-    labels_list = []
-    if labels is None:
-      for i in range(images.shape[0]):
-        labels_list.append(label_prefix+str(i+1))
-    else:
-      labels_list = labels
-
-    ijtags = imagej_metadata_tags({'Labels':labels_list}, '<')
-
-    with tifffile.TiffWriter(path, bigtiff=False,imagej=True) as tif:
-      for i in range(images.shape[0]):
-        tif.save(images[i], metadata={'version':'1.11a','loop': False}, extratags=ijtags)
+      '''
+
+    # Got images, analyze shape:
+    #   - possible formats (c == depth):
+    #     -- (t,z,h,w,c)
+    #     -- (t,h,w,c), t or z does not matter
+    #     -- (h,w,c)
+    #     -- (h,w)
+    
+    # 0 or 1 images.shapes are not handled
+    #
+    # (h,w)
+    if len(images.shape)==2:
+        images = images[np.newaxis,...]
+
+    # now the shape length is 3
+    if len(images.shape)==3:
+        # tifffile treats shape[0] as channel, need to expand to get labels displayed
+        #images = images[images.shape[0],np.newaxis,images.shape[1],images.shape[2]]
+        images = np.reshape(images,(images.shape[0],1,images.shape[1],images.shape[2]))
+    
+        labels_list = []
+        if labels is None:
+            for i in range(images.shape[0]):
+                labels_list.append(label_prefix+str(i+1))
+        else:
+            labels_list = labels
+    
+        ijtags = imagej_metadata_tags({'Labels':labels_list}, '<')
+    
+        with tifffile.TiffWriter(path, bigtiff=False,imagej=True) as tif:
+            for i in range(images.shape[0]):
+                tif.save(images[i], metadata={'version':'1.11a','loop': False}, extratags=ijtags)

 # Testing
 if __name__ == "__main__":

-  def hamming_window(x,N):
-    y = 0.54 - 0.46*math.cos(2*math.pi*x/(N-1))
-    return y
+    def hamming_window(x,N):
+        y = 0.54 - 0.46*math.cos(2*math.pi*x/(N-1))
+        return y

-  hw = hamming_window
+    hw = hamming_window

-  NT = 5
-  NX = 512
-  NY = 512
+    NT = 5
+    NX = 512
+    NY = 512

-  images = np.empty((NT,NY,NX),np.float32)
+    images = np.empty((NT,NY,NX),np.float32)

-  import time
-  print(str(time.time())+": Generating test images")
-  for t in range(NT):
-    images[t,:,:] = np.array([[(255-t*25)*hw(i,512)*hw(j,512) for i in range(NX)] for j in range(NY)],np.float32)
-  print(str(time.time())+": Test images generated")
-  print("Images shape: "+str(images.shape))
-  v = save("tiffwriter_test.tiff",images)
+    import time
+    print(str(time.time())+": Generating test images")
+    for t in range(NT):
+        images[t,:,:] = np.array([[(255-t*25)*hw(i,512)*hw(j,512) for i in range(NX)] for j in range(NY)],np.float32)
+    print(str(time.time())+": Test images generated")
+    print("Images shape: "+str(images.shape))
+    v = save("tiffwriter_test.tiff",images)
--- a/nn_eval_01.py
+++ b/nn_eval_01.py
@@ -13,11 +13,14 @@ import sys

 #import numpy as np

+import imagej_tiffwriter
+
 import time

 import matplotlib.pyplot as plt
 from matplotlib.backends.backend_pdf import PdfPages
 import qcstereo_functions as qsf
+import numpy as np

 #import xml.etree.ElementTree as ET

@@ -132,28 +135,144 @@ fig_params = get_fig_params(dbg_parameters['disparity_ranges'])

 pass

+#temporary:
+TIFF_ONLY = False # True
+#max_bad =        2.5 # excludes only direct bad
+max_bad =        2.5 #2.5 # 1.5 # excludes only direct bad
+max_diff =       1.5 # 2.0 # 5.0 # maximal max-min difference
+max_target_err = 1.0 # 0.5 # maximal max-min difference
+max_disp =       5.0
+
+min_strength =   0.18 #ignore tiles below
+min_neibs =      1
+max_log_to_mm =  0.5 # difference between center average and center should be under this fraction of max-min (0 - disables feature) 
+
+
+#num_bins = 256 # number of histogram bins
+num_bins =        15 # 50 # number of histogram bins
+use_gt_weights = True # False # True
+index_gt =         2
+index_gt_weight =  3
+index_heur_err =   7
+index_nn_err =     6
+index_mm =         8 # max-min
+index_log =        9
+index_bad =       10
+index_num_neibs = 11
+"""
+Debugging high 9-tile variations, removing error for all tiles with lower difference between max and min
+"""
+#min_diff =       0.25 # remove all flat tiles with spread less than this (do not show on heuristic/network disparity errors subplots 
+min_diff =       0 # remove all flat tiles with spread less than this 
+
+
+
+
+max_target_err2 = max_target_err * max_target_err
 if not 'show' in FIGS_SAVESHOW:
    plt.ioff()

-for mode in ['train','infer']:
+#for mode in ['train','infer']:
+for mode in ['infer']:
    figs = []
    ffiles = [] # no ext
    def setlimsxy(lim_xy):
        if not lim_xy is None:
            plt.xlim(min(lim_xy[:2]),max(lim_xy[:2]))            
-            plt.ylim(max(lim_xy[2:]),min(lim_xy[2:]))            
+            plt.ylim(max(lim_xy[2:]),min(lim_xy[2:]))
+    cumul_weights = None                   
        
    for nfile, fpars in enumerate(fig_params):
        if not fpars is None:
            img_file = files['result'][nfile]
            if mode == 'infer':
                img_file = img_file.replace('.npy','-infer.npy')
+            """    
            try:    
-                data,_ = qsf.result_npy_prepare(img_file, ABSOLUTE_DISPARITY, fix_nan=True, insert_deltas=True)
+#                data,_ = qsf.result_npy_prepare(img_file, ABSOLUTE_DISPARITY, fix_nan=True, insert_deltas=True)
+#                data,_ = qsf.result_npy_prepare(img_file, ABSOLUTE_DISPARITY, fix_nan=True, insert_deltas=3)
+                data,labels = qsf.result_npy_prepare(img_file, ABSOLUTE_DISPARITY, fix_nan=True, insert_deltas=3)
            except:
                print ("Image file does not exist:", img_file)
                continue
-            
+            """
+            pass
+            data,labels = qsf.result_npy_prepare(img_file, ABSOLUTE_DISPARITY, fix_nan=True, insert_deltas=3)
+            if  True: #TIFF_ONLY:
+                
+                
+                tiff_path = img_file.replace('.npy','-test.tiff')
+                        
+                data = data.transpose(2,0,1)
+                print("Saving results to TIFF: "+tiff_path)
+                imagej_tiffwriter.save(tiff_path,data,labels=labels)
+                """
+                Calculate histograms
+                """
+                err_heur2 = data[index_heur_err]*data[index_heur_err] 
+                err_nn2 =   data[index_nn_err]*  data[index_nn_err] 
+                diff_log2 = data[index_log]*     data[index_log] 
+                weights = (
+                    (data[index_gt] < max_disp) & 
+                    (err_heur2 < max_target_err2) &
+                    (data[index_bad] < max_bad) &
+                    (data[index_gt_weight] >= min_strength) &
+                    (data[index_num_neibs] >= min_neibs)&
+#max_log_to_mm =  0.5 # difference between center average and center should be under this fraction of max-min (0 - disables feature) 
+                    (data[index_log] < max_log_to_mm * np.sqrt(data[index_mm]) )                    
+                    ).astype(data.dtype) # 0.0/1.1
+                #max_disp
+                
+                #max_target_err
+                if  use_gt_weights:
+                    weights *= data[index_gt_weight]
+                mm =     data[index_mm]
+                weh = np.nan_to_num(weights*err_heur2)
+                wen = np.nan_to_num(weights*err_nn2)
+                wel = np.nan_to_num(weights*diff_log2)
+                hist_weights,bin_vals =   np.histogram(a=mm, bins = num_bins, range = (0.0, max_diff), weights = weights,  density = False)
+                hist_err_heur2,_ = np.histogram(a=mm, bins = num_bins, range = (0.0, max_diff), weights = weh,      density = False)
+                hist_err_nn2,_ =   np.histogram(a=mm, bins = num_bins, range = (0.0, max_diff), weights = wen,      density = False)
+                hist_diff_log2,_ = np.histogram(a=mm, bins = num_bins, range = (0.0, max_diff), weights = wel,      density = False)
+                if cumul_weights is None:
+                    cumul_weights =    hist_weights
+                    cumul_err_heur2 =  hist_err_heur2
+                    cumul_err_nn2 =    hist_err_nn2
+                    cumul_diff_log2 =  hist_diff_log2
+                else:
+                    cumul_weights +=   hist_weights
+                    cumul_err_heur2 += hist_err_heur2
+                    cumul_err_nn2 +=   hist_err_nn2
+                    cumul_diff_log2 += hist_diff_log2
+                
+                hist_err_heur2 =   np.nan_to_num(hist_err_heur2/hist_weights)
+                hist_err_nn2 =     np.nan_to_num(hist_err_nn2/hist_weights)
+                hist_gain2 = np.nan_to_num(hist_err_heur2/hist_err_nn2)
+                hist_gain = np.sqrt(hist_gain2)
+                hist_diff_log2 =   np.nan_to_num(hist_diff_log2/hist_weights)
+
+                print("hist_err_heur2", end = " ")
+                print(np.sqrt(hist_err_heur2))
+                print("hist_err_nn2", end = " ")
+                print(np.sqrt(hist_err_nn2))
+                print("hist_gain", end = " ")
+                print(hist_gain)
+                print("hist_diff_log2", end = " ")
+                print(np.sqrt(hist_diff_log2))
+                
+                
+                if min_diff> 0.0:
+                    pass
+                    good = (mm > min_diff).astype(mm.dtype)
+                    good /= good # good - 1, bad - nan
+                    data[index_heur_err] *= good
+                    data[index_nn_err] *= good
+                data = data.transpose(1,2,0)
+                
+            if TIFF_ONLY:
+                continue
+        
+        
            for subindex, rng in enumerate(fpars['ranges']):
                lim_val = rng['lim_val']
                lim_xy =  rng['lim_xy']
@@ -214,7 +333,68 @@ for mode in ['train','infer']:
                        fb_noext+="-"+str(subindex)
                ffiles.append(fb_noext)
                pass
+    if True:
+        cumul_err_heur2 =   np.nan_to_num(cumul_err_heur2/cumul_weights)
+        cumul_err_nn2 =     np.nan_to_num(cumul_err_nn2/cumul_weights)
+        cumul_gain2 =       np.nan_to_num(cumul_err_heur2/cumul_err_nn2)
+        cumul_gain =        np.sqrt(cumul_gain2)
+        cumul_diff_log2 =   np.nan_to_num(cumul_diff_log2/cumul_weights)
+        print("cumul_weights", end = " ")
+        print(cumul_weights)
+        print("cumul_err_heur", end = " ")
+        print(np.sqrt(cumul_err_heur2))
+        print("cumul_err_nn", end = " ")
+        print(np.sqrt(cumul_err_nn2))
+        print("cumul_gain", end = " ")
+        print(cumul_gain)
+        print("cumul_diff_log2", end = " ")
+        print(np.sqrt(cumul_diff_log2))
+        fig, ax1 = plt.subplots()
+        ax1.set_xlabel('3x3 tiles ground truth disparity max-min (pix)')
+        ax1.set_ylabel('RMSE\n(pix)', color='black', rotation='horizontal')
+        ax1.yaxis.set_label_coords(-0.045,0.92)
+        
+        ax1.plot(bin_vals[0:-1], np.sqrt(cumul_err_nn2),   'tab:red',label="network disparity RMSE")
+        ax1.plot(bin_vals[0:-1], np.sqrt(cumul_err_heur2), 'tab:green',label="heuristic disparity RMSE")
+        ax1.plot(bin_vals[0:-1], np.sqrt(cumul_diff_log2), 'tab:cyan',label="ground truth LoG")
+        
+        ax1.tick_params(axis='y', labelcolor='black')
+        
+        ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
+        ax2.set_ylabel('weight', color='black', rotation='horizontal')  # we already handled the x-label with ax1  
+        ax2.yaxis.set_label_coords(1.06,1.0)
+        
+        
+              
+        ax2.plot(bin_vals[0:-1], cumul_weights,color='grey',dashes=[6, 2],label='weights = n_tiles * gt_confidence')
+        ax1.legend(loc="upper left", bbox_to_anchor=(0.2,1.0))
+        ax2.legend(loc="lower right", bbox_to_anchor=(1.0,0.1))
+        
+        """
    
+        fig = plt.figure(figsize=FIGSIZE)
+        fig.canvas.set_window_title('Cumulative')
+        fig.suptitle('Difference to GT')
+#        ax_conf=plt.subplot(322)
+        ax_conf=plt.subplot(211)
+        ax_conf.set_title("RMS vs max9-min9")
+        plt.plot(bin_vals[0:-1], np.sqrt(cumul_err_heur2),'red',
+                 bin_vals[0:-1], np.sqrt(cumul_err_nn2),'green',
+                 bin_vals[0:-1], np.sqrt(cumul_diff_log2),'blue')
+        figs.append(fig)
+        ffiles.append('cumulative')
+        ax_conf=plt.subplot(212)
+        ax_conf.set_title("weights vs max9-min9")
+        plt.plot(bin_vals[0:-1], cumul_weights,'black')
+        """
+        figs.append(fig)
+        ffiles.append('cumulative')
+        pass
+        #bin_vals[0:-1]
+        
+#            fig.suptitle("Groud truth confidence")
+
+#    
    #whow to allow adjustment before applying tight_layout?
    pass
    for fig in figs:
@@ -229,9 +409,9 @@ for mode in ['train','infer']:
        pp=None
        if 'pdf' in FIGS_EXTENSIONS:
            if mode == 'infer':
-                pdf_path = os.path.join(dirs['figures'],"figures-infer.pdf")
+                pdf_path = os.path.join(dirs['figures'],"figures-infer%s.pdf"%str(min_diff))
            else:
-                pdf_path = os.path.join(dirs['figures'],"figures-train.pdf")
+                pdf_path = os.path.join(dirs['figures'],"figures-train%s.pdf"%str(min_diff))
            pp= PdfPages(pdf_path)
        
        for fb_noext, fig in zip(ffiles,figs):
@@ -259,8 +439,8 @@ if 'show' in FIGS_SAVESHOW:

 #FIGS_ESXTENSIONS

-#qsf.evaluateAllResults(result_files = files['result'],
-#                       absolute_disparity = ABSOLUTE_DISPARITY,
-#                       cluster_radius = CLUSTER_RADIUS)
+qsf.evaluateAllResults(result_files = files['result'],
+                       absolute_disparity = ABSOLUTE_DISPARITY,
+                       cluster_radius = CLUSTER_RADIUS)
 print("All done")
 exit (0)
--- a/qcstereo_functions.py
+++ b/qcstereo_functions.py
@@ -31,14 +31,14 @@ def print_time(txt="",end="\n"):
    TIME_LAST = t

 DEFAULT_TITLES = [
-        ['test_lvar','Test_flat_heuristic'],
-        ['test_hvar','Test_edge_heuristic'],
-        ['test_lvar1','Test_flat_random'],
-        ['test_hvar1','Test_edge_random'],
-        ['fake_lvar','Fake_flat_heuristic'],
-        ['fake_hvar','Fake_edge_heuristic'],
-        ['fake_lvar1','Fake_flat_random'],
-        ['fake_hvar1','Fake_edge_random']]
+        ['test_lvar',  'Test_flat_heuristic'],
+        ['test_hvar',  'Test_edge_heuristic'],
+        ['test_lvar1', 'Test_flat_random'],
+        ['test_hvar1', 'Test_edge_random'],
+        ['fake_lvar',  'Fake_flat_heuristic'],
+        ['fake_hvar',  'Fake_edge_heuristic'],
+        ['fake_lvar1', 'Fake_flat_random'],
+        ['fake_hvar1', 'Fake_edge_random']]
    
 def parseXmlConfig(conf_file, root_dir):
    tree = ET.parse(conf_file)
@@ -517,6 +517,7 @@ def result_npy_prepare(npy_path, absolute, fix_nan, insert_deltas=True,labels=No
           data will be written as 4-layer tiff, extension '.npy' replaced with '.tiff'
    @param absolute - True - the first layer contains absolute disparity, False - difference from target_disparity
    @param fix_nan - replace nan in target_disparity with 0 to apply offset, target_disparity will still contain nan
+    @parame insert_deltas: +1 - add delta layers, +2 - add variance (max - min of this and 8 neighbors)
    """
    data = np.load(npy_path) #(324,242,4) [nn_disp, target_disp,gt_disp, gt_conf]
    if labels is None:
@@ -525,13 +526,18 @@ def result_npy_prepare(npy_path, absolute, fix_nan, insert_deltas=True,labels=No
    nn_out =            0
 #    target_disparity =  1     
    gt_disparity =      2     
-    gt_strength =       3     
+    gt_strength =       3
+    heur_err =          7
+    min_heur_err =      0.001     
+    height = data.shape[0]
+    width =  data.shape[1]
+    nocenter9 = np.array([[[1,1,1,1,np.nan,1,1,1,1]]], dtype = data.dtype)
    if not absolute:
        if fix_nan:
            data[...,nn_out] +=  np.nan_to_num(data[...,1], copy=True)
        else:
            data[...,nn_out] +=  data[...,1]
-    if insert_deltas:
+    if (insert_deltas & 1):
        np.nan_to_num(data[...,gt_strength], copy=False)
        data = np.concatenate([data[...,0:4],data[...,0:2],data[...,0:2],data[...,4:]], axis = 2) # data[...,4:] may be empty
        labels = labels[:4]+["nn_out","hier_out","nn_err","hier_err"]+labels[4:]
@@ -543,6 +549,69 @@ def result_npy_prepare(npy_path, absolute, fix_nan, insert_deltas=True,labels=No
        # All other layers - mast too
        for l in range(8,data.shape[2]):
            data[...,l] = np.select([data[...,gt_strength]==0.0, data[...,gt_strength]>0.0], [np.nan,data[...,l]])
+        """
+        Calculate bad tiles where ggt was used as a master, to remove them from the results (later versions add random error)
+        """
+        bad1 =     abs(data[...,heur_err]) < min_heur_err
+        bad1_ext = np.concatenate([bad1    [0:1,:], bad1    [0:1,:], bad1[:,:],     bad1    [-1:height,:], bad1    [-1:height,:]],axis = 0)
+        bad1_ext = np.concatenate([bad1_ext[:,0:1], bad1_ext[:,0:1], bad1_ext[:,:], bad1_ext[:,-1:width],  bad1_ext[:,-1:width]], axis = 1)
+        bad25 = np.empty(shape=[height, width, 25], dtype=bad1.dtype)
+        bm25=np.array([[[1,1,1,1,1, 1,1,1,1,1, 1,1,1,1,1, 1,1,1,1,1, 1,1,1,1,1]]])
+        bm09=np.array([[[0,0,0,0,0, 0,1,1,1,0, 0,1,1,1,0, 0,1,1,1,0, 0,0,0,0,0]]])
+        bm01=np.array([[[0,0,0,0,0, 0,0,0,0,0, 0,0,1,0,0, 0,0,0,0,0, 0,0,0,0,0]]])
+        for row in range(5):
+            for col in range(5):
+                pass
+                bad25  [...,row*5+col]= bad1_ext[row:height+row, col:width+col] 
+            
+        bad_num1=(np.sum(bad25*bm25,axis=2) > 0).astype(data.dtype)  
+        bad_num2=(np.sum(bad25*bm09,axis=2) > 0).astype(data.dtype) 
+        bad_num3=(np.sum(bad25*bm01,axis=2) > 0).astype(data.dtype)
+        bad_num = bad_num1 + bad_num2 + bad_num3   
+    if (insert_deltas & 2):
+        wo = 0.7 # ortho
+        wc = 0.5 #corner
+        w8=np.array([wc,wo,wc,wo,0.0,wo,wc,wo,wc], dtype=data.dtype)
+        w8/=np.sum(w8) #normalize
+        
+        gt_ext =  np.concatenate([data[0:1,:,gt_disparity],data[:,:,gt_disparity],data[-1:height,:,gt_disparity]],axis = 0)
+        gt_ext =  np.concatenate([gt_ext[:,0:1],           gt_ext[:,:],           gt_ext[:,-1:width]],axis = 1)
+        gs_ext =  np.concatenate([data[0:1,:,gt_strength], data[:,:,gt_strength], data[-1:height,:,gt_strength]],axis = 0)
+        gs_ext =  np.concatenate([gs_ext[:,0:1],           gs_ext[:,:],           gs_ext[:,-1:width]],axis = 1)
+        
+        data9 =   np.empty(shape=[height, width, 9], dtype=data.dtype)
+        weight9 = np.empty(shape=[height, width, 9], dtype=data.dtype)
+        for row in range(3):
+            for col in range(3):
+                pass
+                data9  [...,row*3+col]= gt_ext[row:height+row, col:width+col] 
+                weight9[...,row*3+col]= gs_ext[row:height+row, col:width+col]
+                
+        data9 *= weight9/weight9 # make data=nan where wigth is 0         
+            
+#        data = np.concatenate([data[...],np.empty_like(data[...,-1])], axis = 2) # data[...,4:] may be empty
+        data =        np.concatenate([data[...],np.empty(shape=[height,width,4],dtype=data.dtype)], axis = 2) # data[...,4:] may be empty
+        data[...,-4] = np.nanmax(data9*nocenter9, axis=2)-np.nanmin(data9*nocenter9,axis=2)# will ignore nan
+        
+        np.nan_to_num(data9,copy=False) # replace all nan in data9 with 0.
+        weight9 *= w8
+        w_center =   np.sum(weight9, axis=2) 
+        dw_center =  np.sum(data9*weight9, axis=2)
+        dw_center /= w_center # now dw_center - weighted average in the center  
+        
+        data[...,-3] = np.abs(data[...,gt_disparity]- dw_center)
+        
+#        data[...,-2] = data[...,gt_disparity]- dw_center
+        #data[...,-3] *= (data[...,-4] < 1.0) # just temporary
+        #data[...,-3] *= (data[...,gt_disparity] < 5) #just temporary
+
+        data[...,-2] =bad_num.astype(data.dtype)
+        
+        data [...,-1]= np.sum(np.nan_to_num(weight9/weight9),axis=2).astype(data.dtype)
+#        data[...,-1] = dw_center
+        labels +=["max-min","abs-center","badness","neibs"]
+        #neib = np.concatenate([gt_ext[:height,:width,:],],axis = )
+        pass
    return data, labels        

 def result_npy_to_tiff(npy_path,