Trying versions and working on new evaluation criteria for FG/BG

features

Trying versions and working on new evaluation criteria for FG/BG
features
b1111bca · Andrey Filippov · 2c44046d · b1111bca · b1111bca · b1111bca
Commit b1111bca authored Jul 22, 2019 by Andrey Filippov
10 changed files
--- a/explore_data6.py
+++ b/explore_data6.py
 #!/usr/bin/env python3
 #from numpy import float64
-from tensorflow.contrib.image.ops.gen_distort_image_ops import adjust_hsv_in_yiq
+#from tensorflow.contrib.image.ops.gen_distort_image_ops import adjust_hsv_in_yiq

 __copyright__ = "Copyright 2018, Elphel, Inc."
 __license__   = "GPL-3.0+"
@@ -161,7 +161,7 @@ def writeTFRecordsFromImageSet(
            s_gt, # wght, # will be modified in place
            w_diag = 0.7,
            w_reduce = 0.7,
-            num_pass = 10,
+            num_pass = 50,
            eps = 1E-6)
                    
    if debug > 1:
@@ -281,7 +281,8 @@ def fillGapsByLaplacian(
        w_diag = 0.7,
        w_reduce = 0.7,
        num_pass = 10,
-        eps = 1E-6):
+        eps = 1E-6,
+        debug_level = 0):
    dirs  = ((-1,0), (-1,1), (0,1), (1,1), (1,0), (1,-1), (0,-1), (-1,-1))
    wneib = (  1.0,  w_diag,  1.0,  w_diag,  1.0,  w_diag,  1.0,  w_diag)
    gap_tiles =   []
@@ -304,7 +305,7 @@ def fillGapsByLaplacian(
    valn =  np.copy(val)         
    wghtn = np.copy(wght)
    achange = eps * np.max(wght)
-    for _ in range (num_pass):
+    for npass in range (num_pass):
        num_new = 1
        max_diff = 0.0;
        for tile, neibs in zip (gap_tiles, gap_neibs):
@@ -326,6 +327,8 @@ def fillGapsByLaplacian(
            max_diff = max(max_diff, wdiff)
        np.copyto(val,  valn)         
        np.copyto(wght, wghtn)
+        if (debug_level > 3):
+            print("Pass %d, max_diff = %f"%(npass, max_diff))
        if (num_new == 0) and (max_diff < achange):
            break

@@ -536,13 +539,23 @@ class ExploreData:
        ds_pair[:,:,:,2] = gtaux[:,:,:, ijt.IJFGBG.AUX_DISP]     # 8
        
        for nf in range (ds_pair.shape[0]):
+            if (self.debug_level > 3):
+                print ("---- nf=%d"%(nf,))
            fillGapsByLaplacian(
                    ds_pair[nf,:,:,0], # val, # will be modified in place
                    ds_pair[nf,:,:,1], # wght, # will be modified in place
                    w_diag = 0.7,
                    w_reduce = 0.7,
                    num_pass = 20,
-                    eps = 1E-6)
+                    eps = 1E-6,
+                    debug_level = self.debug_level)
+            if (self.debug_level > 0):
+                print ("---- nf=%d min = %f mean = %f max = %f"%(
+                    nf,
+                    ds_pair[nf,:,:,0].min(),
+                    ds_pair[nf,:,:,0].mean(),
+                    ds_pair[nf,:,:,0].max()))
+                print("zero strength",np.nonzero(ds_pair[nf,:,:,1]==0.0))
        
        return ds_pair 

@@ -847,7 +860,11 @@ class ExploreData:
            min_neibs =    None,  # Minimal number of valid tiles to include
            use_split =    False, # Select y single/multi-plane tiles (center only)
            keep_split =   False):# When sel_split, keep only multi-plane tiles (false - only single-plane)
-            
+        #for file names:
+        self.min_neibs = min_neibs
+        self.use_split = use_split
+        self.keep_split = keep_split
+           
        if data_ds is None:
            data_ds =      self.train_ds
        num_batch_tiles = np.empty((data_ds.shape[0],self.hist_to_batch.max()+1),dtype = int) 
@@ -903,6 +920,7 @@ class ExploreData:
    '''
    def augmentBatchFileIndices(self,
                                 seed_index,
+                                 seed_list = None,
                                 min_choices=None,
                                 max_files = None,
                                 set_ds = None
@@ -915,14 +933,21 @@ class ExploreData:
            set_ds = self.train_ds
        full_num_choices = self.num_batch_tiles[seed_index].copy()
        flist = [seed_index]
-        all_choices = list(range(self.num_batch_tiles.shape[0]))
-        all_choices.remove(seed_index)
+        if seed_list is None:
+            seed_list = list(range(self.num_batch_tiles.shape[0]))
+        all_choices = list(seed_list) # a copy of seed list
+        all_choices.remove(seed_index) # seed_list made unique by the caller
+###     list(filter(lambda a: a != seed_index, all_choices)) # remove all instances of seed_index
        for _ in range (max_files-1):
            if full_num_choices.min() >= min_choices:
                break
+            if len(all_choices) == 0:
+                print ("Nothing left in all_choices!")
+                break
            findx = np.random.choice(all_choices)
            flist.append(findx)
-            all_choices.remove(findx)
+            all_choices.remove(findx) # seed_list made unique by the caller
+###            list(filter(lambda a: a != findx, all_choices)) # remove all instances of findx
            full_num_choices += self.num_batch_tiles[findx]

        file_tiles_sparse = [[] for _ in set_ds] #list of empty lists for each train scene (will be sparse)
@@ -1012,6 +1037,7 @@ ValueError: need at least one array to concatenate
    def prepareBatchData(self,
                         ml_list,
                         seed_index,
+                         seed_list,
                         min_choices=None,
                         max_files = None,
                         ml_num = None,
@@ -1036,7 +1062,12 @@ ValueError: need at least one array to concatenate
        width_m1 = width-1
        height_m1 = height-1
        corr_layers =  ['hor-pairs', 'vert-pairs','diagm-pair', 'diago-pair']
-        flist,tiles = self.augmentBatchFileIndices(seed_index, min_choices, max_files, set_ds)
+        flist,tiles = self.augmentBatchFileIndices(
+            seed_index,
+            seed_list,
+            min_choices,
+            max_files,
+            set_ds)
        ml_all_files = self.getBatchData(
            flist,
            ml_list,
@@ -1137,6 +1168,7 @@ ValueError: need at least one array to concatenate
        writer = tf.io.TFRecordWriter(tfr_filename)
        if num_scenes is None:
            num_scenes = len(files_list)
+        '''    
        if len(files_list) <= num_scenes:
            #create and shuffle repetitive list of files of num_scenes.length    
            seed_list = np.arange(num_scenes) % len(files_list)
@@ -1147,11 +1179,25 @@ ValueError: need at least one array to concatenate
            np.random.shuffle(seed_list)
            seed_list = seed_list[:num_scenes]
        np.random.shuffle(seed_list)
+        '''
+        
+        augment_list = []    
+        for seed_indx in np.arange(len(files_list)):
+            if  self.num_batch_tiles[seed_indx].sum() >0:
+                augment_list.append(seed_indx)
+        seed_list = list(augment_list) # seed list will be modified while augment_list will have unique/full list of suitable files          
+        while len(seed_list) <  num_scenes:
+            seed_list.append(np.random.choice(seed_list))             
+        np.random.shuffle(seed_list)
+        if len(seed_list) >=  num_scenes:
+            seed_list = seed_list[:num_scenes]
+        
        cluster_size = (2 * radius + 1) * (2 * radius + 1)
        for nscene, seed_index in enumerate(seed_list):
            corr2d_batch, target_disparity_batch, gt_ds_batch = ex_data.prepareBatchData( #'hor-pairs' is not in list
                ml_list,
                seed_index,
+                augment_list,
                min_choices=None,
                max_files = None,
                ml_num = None,
@@ -1195,6 +1241,7 @@ ValueError: need at least one array to concatenate
                         sweep_files,
                         sweep_disparities,
                         seed_index,
+                         seed_list,
                         min_choices=None,
                         max_files = None,
                         set_ds =    None,
@@ -1210,8 +1257,6 @@ ValueError: need at least one array to concatenate
            min_choices = self.min_batch_choices #10
        if max_files is None:
            max_files = self.max_batch_files #10
-###        if ml_num is None:
-###            ml_num = self.files_per_scene #5 ????
        if set_ds is None:
            set_ds = self.train_ds
        tiles_in_sample = (2 * radius + 1) * (2 * radius + 1)
@@ -1223,6 +1268,7 @@ ValueError: need at least one array to concatenate
        
        flist0, tiles0 = self.augmentBatchFileIndices(
            seed_index,
+            seed_list,
            min_choices,
            max_files,
            set_ds)
@@ -1306,6 +1352,13 @@ ValueError: need at least one array to concatenate
                                 rnd_plate =  0.0):## disparity random for each plate (now 25 tiles)
        
        # open the TFRecords file
+        
+        fb = ""
+        if self.use_split:
+            fb = ["-FB1","-FB2"][self.keep_split] # single plane - FB1, split FG/BG planes - FB2
+        
+        tfr_filename+="-RT%1.2f-RP%1.2f-M%d-NB%d%s"%(rnd_tile,rnd_plate,self.fgbg_mode,self.min_neibs, fb)
+        
        if not  '.tfrecords' in tfr_filename:
            tfr_filename += '.tfrecords'

@@ -1324,10 +1377,11 @@ ValueError: need at least one array to concatenate
        #skip writing if file exists - it will be possible to continue or run several instances
        if os.path.exists(tfr_filename):
            print(tfr_filename+" already exists, skipping generation. Please remove and re-run this program if you want to regenerate the file")
-#           return # Temporary disable     
+            return # Temporary disable     
        writer = tf.io.TFRecordWriter(tfr_filename)
        if num_scenes is None:
            num_scenes = len(files_list)
+        '''    
        if len(files_list) <= num_scenes:
            #create and shuffle repetitive list of files of num_scenes.length    
            seed_list = np.arange(num_scenes) % len(files_list)
@@ -1337,7 +1391,19 @@ ValueError: need at least one array to concatenate
            seed_list = np.arange(len(files_list))
            np.random.shuffle(seed_list)
            seed_list = seed_list[:num_scenes]
+        '''    
+        augment_list = []    
+        for seed_indx in np.arange(len(files_list)):
+            if  self.num_batch_tiles[seed_indx].sum() >0:
+                augment_list.append(seed_indx)
+        seed_list = list(augment_list) # seed list will be modified while augment_list will have unique/full list of suitable files          
+        while len(seed_list) <  num_scenes:
+            seed_list.append(np.random.choice(seed_list))             
        np.random.shuffle(seed_list)
+        if len(seed_list) >=  num_scenes:
+            seed_list = seed_list[:num_scenes]
+
+        
        cluster_size = (2 * radius + 1) * (2 * radius + 1)
        for nscene, seed_index in enumerate(seed_list):
            corr2d_batch, target_disparity_batch, gt_ds_batch = ex_data.prepareBatchDataLwir( #'hor-pairs' is not in list
@@ -1345,9 +1411,9 @@ ValueError: need at least one array to concatenate
                sweep_files =       sweep_files,
                sweep_disparities = sweep_disparities,
                seed_index =        seed_index,
+                seed_list =         augment_list,
                min_choices =       None,
                max_files =         None,
-###                ml_num =            None,
                set_ds =            set_ds, #DS data from all GT_AX files scanned
                radius =            radius,
                rnd_tile =          rnd_tile, ## disparity random for each tile
@@ -1463,7 +1529,7 @@ if __name__ == "__main__":
 ##        topdir_train = "/data_ssd/data_sets/train_mlr32_18d"
 ##        topdir_train = '/data_ssd/data_sets/test_only'# ''
 ###        topdir_train = '/data_ssd/data_sets/train_set2'# ''
-        topdir_train = '/data_ssd/lwir_sets/lwir_train1'# ''
+        topdir_train = '/data_ssd/lwir_sets/lwir_train2'# ''
 #        tf_data_5x5_main_10_heur
      
    try:
@@ -1472,8 +1538,8 @@ if __name__ == "__main__":
 #        topdir_test = "/mnt/dde6f983-d149-435e-b4a2-88749245cc6c/home/eyesis/x3d_data/data_sets/test"#test" #all/"
 #        topdir_test =  "/data_ssd/data_sets/test_mlr32_18d"
 ##        topdir_test = '/data_ssd/data_sets/test_only'
-###        topdir_test = '/data_ssd/data_sets/test_set2'
-        topdir_test = '/data_ssd/lwir_sets/lwir_test1'
+###        topdir_test = '/data_ssd/data_sets/test_set21'
+        topdir_test = '/data_ssd/lwir_sets/lwir_test2'
        
      
    try:
@@ -1482,7 +1548,7 @@ if __name__ == "__main__":
 #        pathTFR = "/mnt/dde6f983-d149-435e-b4a2-88749245cc6c/home/eyesis/x3d_data/data_sets/tf_data_3x3b" #no trailing "/"
 #        pathTFR = "/home/eyesis/x3d_data/data_sets/tf_data_5x5" #no trailing "/"
 ###        pathTFR = "/data_ssd/data_sets/tf_data_5x5_main_13_heur"
-        pathTFR = '/data_ssd/lwir_sets/tf_data_5x5_01'
+        pathTFR = '/data_ssd/lwir_sets/tf_data_5x5_2'
 ##        pathTFR = "/data_ssd/data_sets/tf_data_5x5_main_11_rnd"
 ##        pathTFR = "/data_ssd/data_sets/tf_data_5x5_main_12_rigrnd"

@@ -1589,7 +1655,7 @@ if __name__ == "__main__":
    FRAC_NEIBS_VALID = 0.55# 8 #LWIR new
    MIN_NEIBS = (2 * RADIUS + 1) * (2 * RADIUS + 1) # All tiles valid == 9
    MIN_NEIBS = round (MIN_NEIBS * FRAC_NEIBS_VALID)
-    VARIANCE_THRESHOLD =       0.4 # 1.5
+    VARIANCE_THRESHOLD =       0.8 # 0.4 # 1.5
    VARIANCE_SCALE_DISPARITY = 5.0 #Scale variance if average is above this
    NUM_TRAIN_SETS =           32 # 8
    
@@ -1600,6 +1666,9 @@ if __name__ == "__main__":
    RND_AMPLIUDE_TRAIN_PLATE = 0.5 # train with corr2d rendered +/- this far from the GT - common for each (5x5) plate component
    MAX_MAIN_OFFSET =          2.5 # do not use tile for training if MAIN camera (AUX for LWIR) differs more from GT
    MODEL_ML_DIR =             "ml32" # subdirectory with the ML disparity sweep files
+    USE_SPLIT =                False # True,                # Select y single/multi-plane tiles (center only)
+    KEEP_SPLIT =               False # When sel_split, keep only multi-plane tiles (false - only single-plane)
+
    
    if not topdir_train:
        NUM_TRAIN_SETS = 0
@@ -1618,14 +1687,14 @@ if __name__ == "__main__":
    
    
    ''' Prepare full image for testing '''
-    """
+    
    for model_ml_path in test_sets:
        writeTFRecordsFromImageSet(
            model_ml_path,     # model/version/ml_dir
            FGBGMODE_TEST,     # 0, # expot_mode,    # 0 - GT average, 1 - GT FG, 2 - GT BG, 3 - AUX disparity
            RND_AMPLIUDE_TEST, # random_offset, # for modes 0..2 - add random offset of -random_offset to +random_offset, in mode 3 add random to  GT average if no AUX data
            pathTFR)           # TFR directory
-    """
+    
    
    
 #        disp_bins = 20,
@@ -1641,13 +1710,13 @@ if __name__ == "__main__":
               ml_pattern =           ml_pattern,
               max_main_offset =      MAX_MAIN_OFFSET,
               latest_version_only =  LATEST_VERSION_ONLY,
-               debug_level =          3, #1, #3, ##0, #3,
+               debug_level =          1, #3, #1, #3, ##0, #3,
               disparity_bins =      50, #100 #200, #1000,
               strength_bins =       50, #100
               disparity_min_drop =  -0.1,
               disparity_min_clip =  -0.1,
-               disparity_max_drop =   5.0, #100.0,
-               disparity_max_clip =   5.0, #100.0,
+               disparity_max_drop =   8.0, #100.0,
+               disparity_max_clip =   8.0, #100.0,
               strength_min_drop =    0.02, # 0.1,
               strength_min_clip =    0.02, # 0.1,
               strength_max_drop =    0.3,  # 1.0,
@@ -1743,13 +1812,21 @@ if __name__ == "__main__":
            max_var =      VARIANCE_THRESHOLD,  # Maximal tile variance to include
            scale_disp =   VARIANCE_SCALE_DISPARITY,
            min_neibs =    MIN_NEIBS,           # Minimal number of valid tiles to include
-            use_split =    True,                # Select y single/multi-plane tiles (center only)
-            keep_split =   False)               # When sel_split, keep only multi-plane tiles (false - only single-plane)
+            use_split =    USE_SPLIT,           # Select y single/multi-plane tiles (center only)
+            keep_split =   KEEP_SPLIT)          # When sel_split, keep only multi-plane tiles (false - only single-plane)
        pass

        for train_var in range (NUM_TRAIN_SETS):
            fpath =  train_filenameTFR+("%03d"%(train_var,))
-            ex_data.writeTFRewcordsEpoch(fpath, ml_list = ml_list_train, files_list = ex_data.files_train, set_ds= ex_data.train_ds)
+            ex_data.writeTFRewcordsEpochLwir(
+                fpath,
+                sweep_files =        ex_data.train_sweep_files,
+                sweep_disparities =  ex_data.train_sweep_disparities,
+                files_list =         ex_data.files_train,
+                set_ds =             ex_data.train_ds,
+                radius =             ex_data.radius,
+                rnd_tile =           ex_data.rnd_tile,
+                rnd_plate =          ex_data.rnd_plate)
          
        list_of_file_lists_test, num_batch_tiles_test = ex_data.makeBatchLists( # results are also saved to self.*
            data_ds =      ex_data.test_ds,
@@ -1759,12 +1836,21 @@ if __name__ == "__main__":
            min_var =      0.0,                # Minimal tile variance to include
            max_var =      VARIANCE_THRESHOLD, # Maximal tile variance to include
            min_neibs =    MIN_NEIBS,          # Minimal number of valid tiles to include
-            use_split =    True,                # Select y single/multi-plane tiles (center only)
-            keep_split =   False)               # When sel_split, keep only multi-plane tiles (false - only single-plane)
+            use_split =    USE_SPLIT,           # Select y single/multi-plane tiles (center only)
+            keep_split =   KEEP_SPLIT)          # When sel_split, keep only multi-plane tiles (false - only single-plane)
        
        fpath =  test_filenameTFR # +("-%03d"%(train_var,))
-        ex_data.writeTFRewcordsEpoch(fpath, ml_list = ml_list_test, files_list = ex_data.files_test, set_ds= ex_data.test_ds,  num_scenes =  num_test_scenes)
-        pass
+        
+        ex_data.writeTFRewcordsEpochLwir(
+            fpath,
+            sweep_files =        ex_data.test_sweep_files,
+            sweep_disparities =  ex_data.test_sweep_disparities,
+            files_list =         ex_data.files_test,
+            set_ds =             ex_data.test_ds,
+            radius =             ex_data.radius,
+            num_scenes =         num_test_scenes,
+            rnd_tile =           ex_data.rnd_tile,
+            rnd_plate =          ex_data.rnd_plate)
    else: # RADIUS > 0
    # test
        list_of_file_lists_test, num_batch_tiles_test = ex_data.makeBatchLists( # results are also saved to self.*
@@ -1775,8 +1861,8 @@ if __name__ == "__main__":
        min_var =      0.0,                 # Minimal tile variance to include
        max_var =      VARIANCE_THRESHOLD,  # Maximal tile variance to include
        min_neibs =    MIN_NEIBS,           # Minimal number of valid tiles to include
-        use_split =    True,                # Select y single/multi-plane tiles (center only)
-        keep_split =   False)               # When sel_split, keep only multi-plane tiles (false - only single-plane)
+        use_split =    USE_SPLIT,           # Select y single/multi-plane tiles (center only)
+        keep_split =   KEEP_SPLIT)          # When sel_split, keep only multi-plane tiles (false - only single-plane)
        
        num_le_test =  num_batch_tiles_test.sum()
        print("Number of <= %f disparity variance tiles: %d (est)"%(VARIANCE_THRESHOLD, num_le_test))
@@ -1802,8 +1888,8 @@ if __name__ == "__main__":
            min_var =      VARIANCE_THRESHOLD,  # Minimal tile variance to include
            max_var =      1000.0,              # Maximal tile variance to include
            min_neibs =    MIN_NEIBS,           # Minimal number of valid tiles to include
-            use_split =    True,                # Select y single/multi-plane tiles (center only)
-            keep_split =   False)               # When sel_split, keep only multi-plane tiles (false - only single-plane)
+            use_split =    USE_SPLIT,           # Select y single/multi-plane tiles (center only)
+            keep_split =   KEEP_SPLIT)          # When sel_split, keep only multi-plane tiles (false - only single-plane)
        
        num_gt_test =  num_batch_tiles_test.sum()
        high_fract_test = 1.0 * num_gt_test / (num_le_test + num_gt_test)
@@ -1831,8 +1917,8 @@ if __name__ == "__main__":
                min_var =      0.0,                 # Minimal tile variance to include
                max_var =      VARIANCE_THRESHOLD,  # Maximal tile variance to include
                min_neibs =    MIN_NEIBS,           # Minimal number of valid tiles to include
-                use_split =    True,                # Select y single/multi-plane tiles (center only)
-                keep_split =   False)               # When sel_split, keep only multi-plane tiles (false - only single-plane)
+                use_split =    USE_SPLIT,           # Select y single/multi-plane tiles (center only)
+                keep_split =   KEEP_SPLIT)          # When sel_split, keep only multi-plane tiles (false - only single-plane)
            
            num_le_fake = num_batch_tiles_fake.sum()
            print("Number of <= %f disparity variance tiles: %d (test)"%(VARIANCE_THRESHOLD, num_le_fake))
@@ -1858,8 +1944,8 @@ if __name__ == "__main__":
                min_var =      VARIANCE_THRESHOLD,  # Minimal tile variance to include
                max_var =      1000.0,              # Maximal tile variance to include
                min_neibs =    MIN_NEIBS,           # Minimal number of valid tiles to include
-                use_split =    True,                # Select y single/multi-plane tiles (center only)
-                keep_split =   False)               # When sel_split, keep only multi-plane tiles (false - only single-plane)
+                use_split =    USE_SPLIT,           # Select y single/multi-plane tiles (center only)
+                keep_split =   KEEP_SPLIT)          # When sel_split, keep only multi-plane tiles (false - only single-plane)
            
            num_gt_fake =  num_batch_tiles_fake.sum()
            high_fract_fake = 1.0 * num_gt_fake / (num_le_fake + num_gt_fake)
@@ -1888,8 +1974,8 @@ if __name__ == "__main__":
                min_var =      0.0,                # Minimal tile variance to include
                max_var =      VARIANCE_THRESHOLD, # Maximal tile variance to include
                min_neibs =    MIN_NEIBS,           # Minimal number of valid tiles to include
-                use_split =    True,                # Select y single/multi-plane tiles (center only)
-                keep_split =   False)               # When sel_split, keep only multi-plane tiles (false - only single-plane)
+                use_split =    USE_SPLIT,           # Select y single/multi-plane tiles (center only)
+                keep_split =   KEEP_SPLIT)          # When sel_split, keep only multi-plane tiles (false - only single-plane)
                
            num_le_train = num_batch_tiles_train.sum()
            print("Number of <= %f disparity variance tiles: %d (train)"%(VARIANCE_THRESHOLD, num_le_train))
@@ -1913,9 +1999,9 @@ if __name__ == "__main__":
                disp_neibs =   num_neibs_train,     # number of valid tiles around each center tile (for 3x3 (radius = 1) - macximal is 9  
                min_var =      VARIANCE_THRESHOLD,  # Minimal tile variance to include
                max_var =      1000.0,              # Maximal tile variance to include
-                min_neibs =    MIN_NEIBS,          # Minimal number of valid tiles to include
-                use_split =    True,                # Select y single/multi-plane tiles (center only)
-                keep_split =   False)               # When sel_split, keep only multi-plane tiles (false - only single-plane)
+                min_neibs =    MIN_NEIBS,           # Minimal number of valid tiles to include
+                use_split =    USE_SPLIT,           # Select y single/multi-plane tiles (center only)
+                keep_split =   KEEP_SPLIT)          # When sel_split, keep only multi-plane tiles (false - only single-plane)

            num_gt_train = num_batch_tiles_train.sum()
            high_fract_train = 1.0 * num_gt_train / (num_le_train + num_gt_train)

--- a/explore_data7.py
+++ b/explore_data7.py
--- a/explore_data8.py
+++ b/explore_data8.py
--- a/explore_data9.py
+++ b/explore_data9.py
--- a/imagej_tiffwriter.py
+++ b/imagej_tiffwriter.py
+#!/usr/bin/env python3
+
+'''
+/**
+ * @file imagej_tiffwriter.py
+ * @brief save tiffs for imagej (1.52d+) - with stacks and hyperstacks
+ * @par <b>License</b>:
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+'''
+
+__copyright__ = "Copyright 2018, Elphel, Inc."
+__license__   = "GPL-3.0+"
+__email__     = "oleg@elphel.com"
+
+'''
+Usage example:
+  import imagej_tiffwriter
+  import numpy as np
+
+  Have a few images in the form of numpy arrays np.float32:
+  - (h,w)
+  - (n,h,w)
+
+  Labels can be provided as a list: ['label1','label2', etc.]
+  No list length check against number of images
+
+  imagej_tiffwriter.save(path,images,labels)
+
+'''
+
+import numpy as np
+import struct
+import tifffile
+import math
+
+# from here: https://stackoverflow.com/questions/50258287/how-to-specify-colormap-when-saving-tiff-stack
+def imagej_metadata_tags(metadata, byteorder):
+    """Return IJMetadata and IJMetadataByteCounts tags from metadata dict.
+
+    The tags can be passed to the TiffWriter.save function as extratags.
+
+    """
+    header = [{'>': b'IJIJ', '<': b'JIJI'}[byteorder]]
+    bytecounts = [0]
+    body = []
+
+    def writestring(data, byteorder):
+        return data.encode('utf-16' + {'>': 'be', '<': 'le'}[byteorder])
+
+    def writedoubles(data, byteorder):
+        return struct.pack(byteorder+('d' * len(data)), *data)
+
+    def writebytes(data, byteorder):
+        return data.tobytes()
+
+    metadata_types = (
+        ('Info', b'info', 1, writestring),
+        ('Labels', b'labl', None, writestring),
+        ('Ranges', b'rang', 1, writedoubles),
+        ('LUTs', b'luts', None, writebytes),
+        ('Plot', b'plot', 1, writebytes),
+        ('ROI', b'roi ', 1, writebytes),
+        ('Overlays', b'over', None, writebytes))
+
+    for key, mtype, count, func in metadata_types:
+        if key not in metadata:
+            continue
+        if byteorder == '<':
+            mtype = mtype[::-1]
+        values = metadata[key]
+        if count is None:
+            count = len(values)
+        else:
+            values = [values]
+        header.append(mtype + struct.pack(byteorder+'I', count))
+        for value in values:
+            data = func(value, byteorder)
+            body.append(data)
+            bytecounts.append(len(data))
+
+    body = b''.join(body)
+    header = b''.join(header)
+    data = header + body
+    bytecounts[0] = len(header)
+    bytecounts = struct.pack(byteorder+('I' * len(bytecounts)), *bytecounts)
+    return ((50839, 'B', len(data), data, True),
+            (50838, 'I', len(bytecounts)//4, bytecounts, True))
+
+
+#def save(path,images,force_stack=False,force_hyperstack=False):
+def save(path,images,labels=None,label_prefix="Label "):
+
+    '''
+        labels a list or None
+    '''
+    
+    '''
+    Expecting:
+    (h,w),
+    (n,h,w) - just create a simple stack
+      '''
+
+    # Got images, analyze shape:
+    #   - possible formats (c == depth):
+    #     -- (t,z,h,w,c)
+    #     -- (t,h,w,c), t or z does not matter
+    #     -- (h,w,c)
+    #     -- (h,w)
+    
+    # 0 or 1 images.shapes are not handled
+    #
+    # (h,w)
+    if len(images.shape)==2:
+        images = images[np.newaxis,...]
+
+    # now the shape length is 3
+    if len(images.shape)==3:
+        # tifffile treats shape[0] as channel, need to expand to get labels displayed
+        #images = images[images.shape[0],np.newaxis,images.shape[1],images.shape[2]]
+        images = np.reshape(images,(images.shape[0],1,images.shape[1],images.shape[2]))
+    
+        labels_list = []
+        if labels is None:
+            for i in range(images.shape[0]):
+                labels_list.append(label_prefix+str(i+1))
+        else:
+            labels_list = labels
+    
+        ijtags = imagej_metadata_tags({'Labels':labels_list}, '<')
+    
+        with tifffile.TiffWriter(path, bigtiff=False,imagej=True) as tif:
+            for i in range(images.shape[0]):
+                tif.save(images[i], metadata={'version':'1.11a','loop': False}, extratags=ijtags)
+
+# Testing
+if __name__ == "__main__":
+
+    def hamming_window(x,N):
+        y = 0.54 - 0.46*math.cos(2*math.pi*x/(N-1))
+        return y
+
+    hw = hamming_window
+
+    NT = 5
+    NX = 512
+    NY = 512
+
+    images = np.empty((NT,NY,NX),np.float32)
+
+    import time
+    print(str(time.time())+": Generating test images")
+    for t in range(NT):
+        images[t,:,:] = np.array([[(255-t*25)*hw(i,512)*hw(j,512) for i in range(NX)] for j in range(NY)],np.float32)
+    print(str(time.time())+": Test images generated")
+    print("Images shape: "+str(images.shape))
+    v = save("tiffwriter_test.tiff",images)
--- a/nn_ds_neibs30.py
+++ b/nn_ds_neibs30.py
+#!/usr/bin/env python3
+__copyright__ = "Copyright 2018-2019, Elphel, Inc."
+__license__   = "GPL-3.0+"
+__email__     = "andrey@elphel.com"
+
+#python3 nn_ds_neibs30.py /data_ssd/lwir_sets/conf/qcstereo_lwir01.xml /data_ssd/lwir_sets/
+#tensorboard --logdir="nn_ds_neibs30_graph13-9RNSWLAM0.5SLAM0.1SCLP0.2_nG_nI_HF_CP0.3_S0.03" --port=7001
+
+import os
+import sys
+import numpy as np
+import time
+import shutil
+from threading import Thread
+import qcstereo_network
+import qcstereo_losses
+import qcstereo_functions as qsf
+import tensorflow as tf
+
+qsf.TIME_START = time.time()
+qsf.TIME_LAST  = qsf.TIME_START
+
+IMG_WIDTH =        20 # 324 # tiles per image row
+DEBUG_LEVEL= 1
+
+try:
+    conf_file =  sys.argv[1]
+except IndexError:
+    print("Configuration path is required as a first argument. Optional second argument specifies root directory for data files")
+    exit(1)
+try:
+    root_dir =  sys.argv[2]
+except IndexError:
+    root_dir =  os.path.dirname(conf_file)
+    
+print ("Configuration file: " + conf_file)
+parameters, dirs, files, _ = qsf.parseXmlConfig(conf_file, root_dir)
+"""
+Temporarily for backward compatibility
+"""
+if not "SLOSS_CLIP" in parameters:
+    parameters['SLOSS_CLIP'] = 0.5
+    print ("Old config, setting SLOSS_CLIP=", parameters['SLOSS_CLIP'])
+    
+"""
+Defined in config file
+"""
+TILE_SIDE, TILE_LAYERS, TWO_TRAINS, NET_ARCH1, NET_ARCH2 = [None]*5
+ABSOLUTE_DISPARITY,SYM8_SUB, WLOSS_LAMBDA,  SLOSS_LAMBDA, SLOSS_CLIP  = [None]*5
+SPREAD_CONVERGENCE, INTER_CONVERGENCE, HOR_FLIP, DISP_DIFF_CAP, DISP_DIFF_SLOPE  = [None]*5
+CLUSTER_RADIUS = None
+PARTIALS_WEIGHTS, MAX_IMGS_IN_MEM, MAX_FILES_PER_GROUP,  BATCH_WEIGHTS, ONLY_TILE = [None] * 5  
+USE_CONFIDENCE, WBORDERS_ZERO, EPOCHS_TO_RUN, FILE_UPDATE_EPOCHS = [None] * 4
+LR600,LR400,LR200,LR100,LR = [None]*5
+SHUFFLE_FILES, EPOCHS_FULL_TEST, SAVE_TIFFS = [None] * 3
+CHECKPOINT_PERIOD = None
+TRAIN_BUFFER_GPU, TRAIN_BUFFER_CPU = [None]*2
+TEST_TITLES = None
+LOGFILE="results.txt"
+"""
+Next gets globals from the config file
+"""
+globals().update(parameters)
+
+
+TRAIN_BUFFER_SIZE = TRAIN_BUFFER_GPU * TRAIN_BUFFER_CPU # in merged (quad) batches
+
+
+
+#exit(0)
+WIDTH =  20 # 324
+HEIGHT = 15 # 242
+TILE_SIZE =         TILE_SIDE* TILE_SIDE # == 81
+FEATURES_PER_TILE =  TILE_LAYERS * TILE_SIZE# == 324
+BATCH_SIZE =       ([1,2][TWO_TRAINS])*2*1000//25 # == 80 Each batch of tiles has balanced D/S tiles, shuffled batches but not inside batches
+
+SUFFIX=(str(NET_ARCH1)+'-'+str(NET_ARCH2)+
+       (["R","A"][ABSOLUTE_DISPARITY]) +
+       (["NS","S8"][SYM8_SUB])+
+       "WLAM"+str(WLOSS_LAMBDA)+
+       "SLAM"+str(SLOSS_LAMBDA)+
+       "SCLP"+str(SLOSS_CLIP)+
+       (['_nG','_G'][SPREAD_CONVERGENCE])+
+       (['_nI','_I'][INTER_CONVERGENCE]) +
+       (['_nHF',"_HF"][HOR_FLIP]) +
+       ('_CP'+str(DISP_DIFF_CAP)) +
+       ('_S'+str(DISP_DIFF_SLOPE))
+       )
+NN_LAYOUT1 = qcstereo_network.NN_LAYOUTS[NET_ARCH1]
+NN_LAYOUT2 = qcstereo_network.NN_LAYOUTS[NET_ARCH2]
+USE_PARTIALS =      not PARTIALS_WEIGHTS is None # False - just a single Siamese net, True - partial outputs that use concentric squares of the first level subnets
+
+# Tiff export slice labels
+SLICE_LABELS =  ["nn_out_ext","hier_out_ext","gt_disparity","gt_strength",
+                 "cutcorn_cost_nw","cutcorn_cost",
+                 "gt-avg_dist","avg8_disp","gt_disp","out-avg"]
+
+##############################################################################
+cluster_size = (2 * CLUSTER_RADIUS + 1) * (2 * CLUSTER_RADIUS + 1)
+center_tile_index = 2 * CLUSTER_RADIUS * (CLUSTER_RADIUS + 1)
+qsf.prepareFiles(dirs, files, suffix = SUFFIX)
+
+#copy config to results directory
+
+print ("Copying config files to results directory:\n ('%s' -> '%s')"%(conf_file,dirs['result']))
+try:
+    os.makedirs(dirs['result'])
+except:
+    pass
+
+shutil.copy2(conf_file,dirs['result'])
+LOGPATH = os.path.join(dirs['result'],LOGFILE)
+
+
+if TEST_TITLES is None:
+    TEST_TITLES = qsf.defaultTestTitles(files)
+
+partials = None
+partials = qsf.concentricSquares(CLUSTER_RADIUS)
+PARTIALS_WEIGHTS = [1.0*pw/sum(PARTIALS_WEIGHTS) for pw in PARTIALS_WEIGHTS]
+if not USE_PARTIALS:
+    partials = partials[0:1]
+    PARTIALS_WEIGHTS = [1.0]
+
+
+qsf.evaluateAllResults(result_files = files['result'],
+                       absolute_disparity = ABSOLUTE_DISPARITY,
+                       cluster_radius =     CLUSTER_RADIUS,
+                       labels =             SLICE_LABELS,
+                       logpath=             LOGPATH)
+
+image_data = qsf.initImageData(
+                files =          files,
+                max_imgs =       MAX_IMGS_IN_MEM,
+                cluster_radius = CLUSTER_RADIUS,
+                tile_layers =    TILE_LAYERS,
+                tile_side =      TILE_SIDE,
+                width =          IMG_WIDTH,
+                replace_nans =   True)
+    
+corr2d_len, target_disparity_len, gtds_len, _ = qsf.get_lengths(CLUSTER_RADIUS, TILE_LAYERS, TILE_SIDE)
+ 
+train_next, dataset_train, datasets_test= qsf.initTrainTestData(
+        files = files,
+        cluster_radius =      CLUSTER_RADIUS,
+        buffer_size =         TRAIN_BUFFER_SIZE * BATCH_SIZE, # number of clusters per train
+        test_titles =         TEST_TITLES)
+
+    
+corr2d_train_placeholder =           tf.compat.v1.placeholder(dataset_train.dtype, (None,FEATURES_PER_TILE * cluster_size)) # corr2d_train.shape)
+target_disparity_train_placeholder = tf.compat.v1.placeholder(dataset_train.dtype, (None,1 *   cluster_size))  #target_disparity_train.shape)
+gt_ds_train_placeholder =            tf.compat.v1.placeholder(dataset_train.dtype, (None,2 *   cluster_size)) #gt_ds_train.shape)
+
+dataset_tt = tf.data.Dataset.from_tensor_slices({
+    "corr2d":           corr2d_train_placeholder,
+    "target_disparity": target_disparity_train_placeholder,
+    "gt_ds":            gt_ds_train_placeholder})
+
+tf_batch_weights = tf.compat.v1.placeholder(shape=(None,), dtype=tf.float32, name = "batch_weights") # way to increase importance of the high variance clusters 
+feed_batch_weights =   np.array(BATCH_WEIGHTS*(BATCH_SIZE//len(BATCH_WEIGHTS)), dtype=np.float32)
+feed_batch_weight_1 =  np.array([1.0], dtype=np.float32) 
+
+dataset_test_size = len(datasets_test[0])
+dataset_test_size //= BATCH_SIZE
+
+dataset_img_size = len(image_data[0]['corr2d'])
+dataset_img_size //= BATCH_SIZE
+
+dataset_tt = dataset_tt.batch(BATCH_SIZE)
+dataset_tt = dataset_tt.prefetch(BATCH_SIZE)
+iterator_tt = dataset_tt.make_initializable_iterator()
+next_element_tt = iterator_tt.get_next()
+
+result_dir = './attic/result_neibs_'+     SUFFIX+'/'
+checkpoint_dir = './attic/result_neibs_'+ SUFFIX+'/'
+save_freq = 500
+
+def debug_gt_variance(
+        indx,        # This tile index (0..8)
+        center_indx, # center tile index
+        gt_ds_batch # [?:9:2]
+        ):
+    with tf.name_scope("Debug_GT_Variance"):
+        d_gt_this =     tf.reshape(gt_ds_batch[:,2 * indx],[-1],                     name = "d_this")
+        d_gt_center =   tf.reshape(gt_ds_batch[:,2 * center_indx],[-1],              name = "d_center")
+        d_gt_diff =     tf.subtract(d_gt_this, d_gt_center,                          name = "d_diff")
+        d_gt_diff2 =    tf.multiply(d_gt_diff, d_gt_diff,                            name = "d_diff2")
+        d_gt_var =      tf.reduce_mean(d_gt_diff2,                                   name = "d_gt_var")
+        return  d_gt_var
+    
+target_disparity_cluster = tf.reshape(next_element_tt['target_disparity'], [-1,cluster_size, 1], name="targdisp_cluster")    
+corr2d_Nx325 = tf.concat([tf.reshape(next_element_tt['corr2d'],[-1,cluster_size,FEATURES_PER_TILE], name="coor2d_cluster"),
+                          target_disparity_cluster], axis=2, name = "corr2d_Nx325")
+if SPREAD_CONVERGENCE:                                      
+    outs, inp_weights =  qcstereo_network.networks_siam(
+                                            input =             corr2d_Nx325,
+                                            input_global =      target_disparity_cluster,
+                                            layout1 =           NN_LAYOUT1, 
+                                            layout2 =           NN_LAYOUT2,
+                                            inter_convergence = INTER_CONVERGENCE,
+                                            sym8 =              SYM8_SUB,
+                                            only_tile =         ONLY_TILE, #Remove/put None for normal operation
+                                            partials =          partials,
+                                            use_confidence=     USE_CONFIDENCE,
+                                            cluster_radius =    CLUSTER_RADIUS)
+                                            
+else:
+    outs, inp_weights =  qcstereo_network.networks_siam(
+                                            input_tensor=       corr2d_Nx325,
+                                            input_global =      None,
+                                            layout1 =           NN_LAYOUT1, 
+                                            layout2 =           NN_LAYOUT2,
+                                            inter_convergence = False,
+                                            sym8 =              SYM8_SUB,
+                                            only_tile =         ONLY_TILE, #Remove/put None for normal operation
+                                            partials =          partials,
+                                            use_confidence=     USE_CONFIDENCE,
+                                            cluster_radius =    CLUSTER_RADIUS)
+                                                                                      
+tf_partial_weights = tf.constant(PARTIALS_WEIGHTS,dtype=tf.float32,name="partial_weights")
+G_losses = [0.0]*len(partials)
+target_disparity_batch=  next_element_tt['target_disparity'][:,center_tile_index:center_tile_index+1]
+gt_ds_batch_clust =      next_element_tt['gt_ds']
+gt_ds_batch =            gt_ds_batch_clust[:,2 * center_tile_index: 2 * (center_tile_index +1)]
+G_losses[0], _disp_slice, _d_gt_slice, _out_diff, _out_diff2, _w_norm, _out_wdiff2, _cost1 = qcstereo_losses.batchLoss(
+              out_batch =              outs[0],        # [batch_size,(1..2)] tf_result
+              target_disparity_batch=  target_disparity_batch, # next_element_tt['target_disparity'][:,center_tile_index:center_tile_index+1], # target_disparity_batch_center, # next_element_tt['target_disparity'], # target_disparity, ### target_d,   # [batch_size]        tf placeholder
+              gt_ds_batch =            gt_ds_batch, # next_element_tt['gt_ds'][:,2 * center_tile_index: 2 * (center_tile_index +1)],  # gt_ds_batch_center, ## next_element_tt['gt_ds'], # gt_ds, ### gt,         # [batch_size,2]      tf placeholder
+              batch_weights =          tf_batch_weights,
+              disp_diff_cap =          DISP_DIFF_CAP,
+              disp_diff_slope=         DISP_DIFF_SLOPE,
+              absolute_disparity =     ABSOLUTE_DISPARITY,
+              use_confidence =         USE_CONFIDENCE, # True, 
+              lambda_conf_avg =        0.01,
+##              lambda_conf_pwr =        0.1,
+              conf_pwr =               2.0,
+              gt_conf_offset =         0.08,
+              gt_conf_pwr =            2.0,
+              error2_offset =          0, # 0.0025, # (0.05^2)
+              disp_wmin =              1.0,    # minimal disparity to apply weight boosting for small disparities
+              disp_wmax =              8.0,    # maximal disparity to apply weight boosting for small disparities
+              use_out =                False)  # use calculated disparity for disparity weight boosting (False - use target disparity)
+
+G_loss = G_losses[0]
+for n in range (1,len(partials)):
+    G_losses[n], _, _, _, _, _, _, _ = qcstereo_losses.batchLoss(
+              out_batch =              outs[n],        # [batch_size,(1..2)] tf_result
+              target_disparity_batch=  target_disparity_batch, #next_element_tt['target_disparity'][:,center_tile_index:center_tile_index+1], # target_disparity_batch_center, # next_element_tt['target_disparity'], # target_disparity, ### target_d,   # [batch_size]        tf placeholder
+              gt_ds_batch =            gt_ds_batch, # next_element_tt['gt_ds'][:,2 * center_tile_index: 2 * (center_tile_index +1)],  # gt_ds_batch_center, ## next_element_tt['gt_ds'], # gt_ds, ### gt,         # [batch_size,2]      tf placeholder
+              batch_weights =          tf_batch_weights,
+              disp_diff_cap =          DISP_DIFF_CAP,
+              disp_diff_slope=         DISP_DIFF_SLOPE,
+              absolute_disparity =     ABSOLUTE_DISPARITY,
+              use_confidence =         USE_CONFIDENCE, # True, 
+              lambda_conf_avg =        0.01,
+#              lambda_conf_pwr =        0.1,
+              conf_pwr =               2.0,
+              gt_conf_offset =         0.08,
+              gt_conf_pwr =            2.0,
+              error2_offset =          0, # 0.0025, # (0.05^2)
+              disp_wmin =              1.0,    # minimal disparity to apply weight boosting for small disparities
+              disp_wmax =              8.0,    # maximal disparity to apply weight boosting for small disparities
+              use_out =                False)  # use calculated disparity for disparity weight boosting (False - use target disparity)
+
+tf_wlosses = tf.multiply(G_losses, tf_partial_weights, name =  "tf_wlosses")
+G_losses_sum = tf.reduce_sum(tf_wlosses, name = "G_losses_sum")
+
+if SLOSS_LAMBDA > 0:    
+    S_loss, rslt_cost_nw, rslt_cost_w, rslt_d , rslt_avg_disparity, rslt_gt_disparity, rslt_offs = qcstereo_losses.smoothLoss(
+               out_batch =             outs[0],                   # [batch_size,(1..2)] tf_result
+               target_disparity_batch = target_disparity_batch,    # [batch_size]        tf placeholder
+               gt_ds_batch_clust =      gt_ds_batch_clust,           # [batch_size,25,2]      tf placeholder
+               clip =                   SLOSS_CLIP,
+               absolute_disparity =     ABSOLUTE_DISPARITY, #when false there should be no activation on disparity output !
+               cluster_radius =         CLUSTER_RADIUS)
+    GS_loss =  tf.add(G_losses_sum, SLOSS_LAMBDA * S_loss, name = "GS_loss")
+
+else:
+    S_loss =   tf.constant(0.0, dtype=tf.float32,name = "S_loss")
+    GS_loss = G_losses_sum # G_loss
+        
+if WLOSS_LAMBDA > 0.0:   
+    W_loss =     qcstereo_losses.weightsLoss(
+        inp_weights =   inp_weights[0], #    inp_weights - list of tensors, currently - just [0]
+        tile_layers=    TILE_LAYERS, # 4
+        tile_side =     TILE_SIDE, # 9
+        wborders_zero = WBORDERS_ZERO)
+
+    GW_loss =    tf.add(GS_loss, WLOSS_LAMBDA * W_loss, name = "GW_loss")
+else:
+    GW_loss =    GS_loss # G_loss
+    W_loss =     tf.constant(0.0, dtype=tf.float32,name = "W_loss")
+
+GT_variance =  debug_gt_variance(indx = 0,        # This tile index (0..8)
+                                 center_indx = 4, # center tile index
+                                 gt_ds_batch = next_element_tt['gt_ds'])# [?:18]
+              
+tf_ph_G_loss =    tf.compat.v1.placeholder(tf.float32,shape=None,name='G_loss_avg')
+tf_ph_G_losses =  tf.compat.v1.placeholder(tf.float32,shape=[len(partials)],name='G_losses_avg')
+tf_ph_S_loss =    tf.compat.v1.placeholder(tf.float32,shape=None,name='S_loss_avg')
+tf_ph_W_loss =    tf.compat.v1.placeholder(tf.float32,shape=None,name='W_loss_avg')
+tf_ph_GW_loss =   tf.compat.v1.placeholder(tf.float32,shape=None,name='GW_loss_avg')
+tf_ph_sq_diff =   tf.compat.v1.placeholder(tf.float32,shape=None,name='sq_diff_avg')
+tf_gtvar_diff =   tf.compat.v1.placeholder(tf.float32,shape=None,name='gtvar_diff')
+tf_img_test0 =    tf.compat.v1.placeholder(tf.float32,shape=None,name='img_test0')
+tf_img_test9 =    tf.compat.v1.placeholder(tf.float32,shape=None,name='img_test9')
+"""
+with tf.name_scope('sample'):
+    tf.compat.v1.summary.scalar("GW_loss",      GW_loss)
+    tf.compat.v1.summary.scalar("G_loss",       G_loss)
+    tf.compat.v1.summary.scalar("S_loss",       S_loss)
+    tf.compat.v1.summary.scalar("W_loss",       W_loss)
+    tf.compat.v1.summary.scalar("sq_diff",      _cost1)
+    tf.compat.v1.summary.scalar("gtvar_diff",   GT_variance)
+"""    
+with tf.name_scope('epoch_average'):
+    for i in range(tf_ph_G_losses.shape[0]):
+        tf.compat.v1.summary.scalar("G_loss_epoch_"+str(i), tf_ph_G_losses[i])
+        
+    tf.compat.v1.summary.scalar("GW_loss_epoch", tf_ph_GW_loss)
+    tf.compat.v1.summary.scalar("G_loss_epoch",  tf_ph_G_loss)
+    tf.compat.v1.summary.scalar("S_loss_epoch",  tf_ph_S_loss)
+    tf.compat.v1.summary.scalar("W_loss_epoch",  tf_ph_W_loss)
+    tf.compat.v1.summary.scalar("sq_diff_epoch", tf_ph_sq_diff)
+    tf.compat.v1.summary.scalar("gtvar_diff",    tf_gtvar_diff)
+    
+    tf.compat.v1.summary.scalar("img_test0",     tf_img_test0)
+    tf.compat.v1.summary.scalar("img_test9",     tf_img_test9)
+
+trainable_vars=    tf.trainable_variables()
+lr=                tf.compat.v1.placeholder(tf.float32)
+G_opt=             tf.compat.v1.train.AdamOptimizer(learning_rate=lr).minimize(GW_loss)
+
+
+ROOT_PATH  = './attic/nn_ds_neibs30_graph'+SUFFIX+"/" # for tensorboard
+
+
+
+TT_SUBDIRS = ['train'] #,'test0','test1','test2','test3']
+for i,_ in enumerate(datasets_test):
+#    TT_SUBDIRS.append('test%d'%(i))
+    TT_SUBDIRS.append(TEST_TITLES[i].replace(' ','_'))
+
+TT_PATHS = [ROOT_PATH + p for p in TT_SUBDIRS]
+
+# CLEAN OLD STAFF
+shutil.rmtree(ROOT_PATH, ignore_errors=True)
+#for p in TT_PATHS:
+#    shutil.rmtree(p, ignore_errors=True)
+    
+#seems that runs use directory creation time to order graphs    
+#for p in TT_PATHS:
+#    os.makedirs(p)
+#    time.sleep(1.5) # reduce later
+
+
+num_train_subs = len(train_next) # number of (different type) merged training sets    
+dataset_train_size = TRAIN_BUFFER_GPU * num_train_subs # TRAIN_BUFFER_SIZE
+tt_summaries = [0.0 for e in TT_SUBDIRS]
+tt2_avg =      [0.0 for e in TT_SUBDIRS]
+tt_gw_avg =    [0.0 for e in TT_SUBDIRS]
+tt_g_avgs =    [[0.0]*len(partials) for e in TT_SUBDIRS]
+tt_w_avg =     [0.0 for e in TT_SUBDIRS]
+tt_s_avg =     [0.0 for e in TT_SUBDIRS]
+tt_gtvar_avg = [0.0 for e in TT_SUBDIRS]
+
+
+saver=tf.compat.v1.train.Saver(trainable_vars)
+
+saver_def = saver.as_saver_def()
+
+# The name of the tensor you must feed with a filename when saving/restoring.
+print ('saver_def.filename_tensor_name=',saver_def.filename_tensor_name)
+
+# The name of the target operation you must run when restoring.
+print ('saver_def.restore_op_name=',saver_def.restore_op_name)
+
+# The name of the target operation you must run when saving.
+print ('saver_def.save_tensor_name=',saver_def.save_tensor_name)
+
+
+try:
+    os.makedirs(os.path.dirname(files['checkpoints']))
+    print ("Created directory ",os.path.dirname(files['checkpoints']))
+#    os.makedirs(files['checkpoints'])
+except:
+    pass
+
+with tf.Session()  as sess:
+    
+    sess.run(tf.global_variables_initializer())
+    sess.run(tf.local_variables_initializer())
+    merged = tf.summary.merge_all()
+    tt_writers = []
+    for p in TT_PATHS:
+        tt_writers.append(tf.summary.FileWriter(p, sess.graph))
+        print ("Adding delay to make directory creation time different: "+p)
+        time.sleep(2.0) # reduce later
+        
+    
+    loss_gw_train_hist=  np.empty(dataset_train_size, dtype=np.float32)
+    
+    loss_g_train_hists=   [np.empty(dataset_train_size, dtype=np.float32) for p in partials]
+    
+    
+    loss_s_train_hist=   np.empty(dataset_train_size, dtype=np.float32)
+    loss_w_train_hist=   np.empty(dataset_train_size, dtype=np.float32)
+    
+    loss_gw_test_hist=  np.empty(dataset_test_size, dtype=np.float32)
+
+    loss_g_test_hists=   [np.empty(dataset_test_size, dtype=np.float32) for p in partials]
+    
+    loss_s_test_hist=   np.empty(dataset_test_size, dtype=np.float32)
+    loss_w_test_hist=   np.empty(dataset_test_size, dtype=np.float32)
+    
+    loss2_train_hist= np.empty(dataset_train_size, dtype=np.float32)
+    loss2_test_hist=  np.empty(dataset_test_size, dtype=np.float32)
+    
+
+    gtvar_train_hist=  np.empty(dataset_train_size, dtype=np.float32)
+    gtvar_test_hist=   np.empty(dataset_test_size, dtype=np.float32)
+    gtvar_train = 0.0
+    gtvar_test = 0.0
+    img_gain_test0 =  1.0
+    img_gain_test9 =  1.0
+    
+    thr=None
+    thr_result = None
+    trains_to_update = [train_next[n_train]['more_files'] for n_train in range(len(train_next))]
+    
+    
+    
+    
+    
+    for epoch in range (EPOCHS_TO_RUN):
+        """
+        update files after each epoch, all 4.
+        Convert to threads after testing
+        """
+        if (FILE_UPDATE_EPOCHS > 0) and (epoch % FILE_UPDATE_EPOCHS == 0):
+            if not thr is None:
+                if thr.is_alive():
+                    qsf.print_time("***WAITING*** until tfrecord gets loaded", end=" ")
+                else:
+                    qsf.print_time("tfrecord is ***ALREADY LOADED*** ", end=" ")
+        
+                thr.join()
+                qsf.print_time("Done")
+                qsf.print_time("Inserting new data", end=" ")
+                for n_train in range(len(trains_to_update)):
+                    if trains_to_update[n_train]:
+                        qsf.add_file_to_dataset(dataset = dataset_train,
+                                                new_dataset = thr_result[n_train],
+                                                train_next = train_next[n_train])
+                qsf.print_time("Done")
+            thr_result = []
+            fpaths = []
+            for n_train in range(len(trains_to_update)):
+                if trains_to_update[n_train]:
+                    fpaths.append(files['train'][n_train][train_next[n_train]['file']])
+                    qsf.print_time("Will read in background: "+fpaths[-1])
+            thr = Thread(target=qsf.getMoreFiles, args=(fpaths,thr_result, CLUSTER_RADIUS, HOR_FLIP, TILE_LAYERS, TILE_SIDE))            
+            thr.start()        
+        train_buf_index = epoch %   TRAIN_BUFFER_CPU # GPU memory from CPU memory (now 4)
+        if   epoch >=600:
+            learning_rate = LR600
+        elif epoch >=400:
+            learning_rate = LR400
+        elif epoch >=200:
+            learning_rate = LR200
+        elif epoch >=100:
+            learning_rate = LR100
+        else:
+            learning_rate = LR
+        if (train_buf_index == 0) and SHUFFLE_FILES:
+            qsf.print_time("Shuffling how datasets datasets_train_lvar and datasets_train_hvar are zipped together", end="")
+            qsf.shuffle_in_place(
+                dataset_data = dataset_train, #alternating clusters from 4 sources.each cluster has all needed data (concatenated)
+                period = num_train_subs)
+            qsf.print_time("  Done")
+        sti = train_buf_index *  dataset_train_size * BATCH_SIZE #      TRAIN_BUFFER_GPU * num_train_subs
+        eti = sti+   dataset_train_size * BATCH_SIZE#    (train_buf_index +1) *  TRAIN_BUFFER_GPU * num_train_subs
+         
+        sess.run(iterator_tt.initializer, feed_dict={corr2d_train_placeholder:           dataset_train[sti:eti,:corr2d_len], 
+                                                     target_disparity_train_placeholder: dataset_train[sti:eti,corr2d_len:corr2d_len+target_disparity_len],
+                                                     gt_ds_train_placeholder:            dataset_train[sti:eti,corr2d_len+target_disparity_len:corr2d_len+target_disparity_len+gtds_len] })
+        
+        
+        for i in range(dataset_train_size):
+#            try:
+                _, GW_loss_trained,  G_losses_trained,  S_loss_trained,  W_loss_trained,  output, disp_slice, d_gt_slice, out_diff, out_diff2, w_norm, out_wdiff2, out_cost1, gt_variance  = sess.run(
+                    [   G_opt,
+                        GW_loss,
+                        G_losses,
+                        S_loss,
+                        W_loss,
+                        outs[0],
+                        _disp_slice,
+                        _d_gt_slice,
+                        _out_diff,
+                        _out_diff2,
+                        _w_norm,
+                        _out_wdiff2,
+                        _cost1,
+                        GT_variance
+                    ],
+                    feed_dict={tf_batch_weights: feed_batch_weights,
+                               lr:               learning_rate
+                               }) # previous value of *_avg #Fetch argument 0.0 has invalid type <class 'float'>, must be a string or Tensor. (Can not convert a float into a Tensor or Operation.)
+                
+                loss_gw_train_hist[i] = GW_loss_trained
+                for nn, gl  in enumerate(G_losses_trained):
+                    loss_g_train_hists[nn][i] =  gl
+                loss_s_train_hist[i] =  S_loss_trained
+                loss_w_train_hist[i] =  W_loss_trained
+                loss2_train_hist[i] = out_cost1
+                gtvar_train_hist[i] = gt_variance
+#            except tf.errors.OutOfRangeError:
+#                print("****** NO MORE DATA! train done at step %d"%(i))
+#                break
+
+        tt_gw_avg[0] =      np.average(loss_gw_train_hist).astype(np.float32)     
+        for nn, lgth  in enumerate(loss_g_train_hists):
+            tt_g_avgs[0][nn] =       np.average(lgth).astype(np.float32)
+        
+        tt_s_avg[0] =       np.average(loss_s_train_hist).astype(np.float32)     
+        tt_w_avg[0] =       np.average(loss_w_train_hist).astype(np.float32)
+
+        tt2_avg[0] =        np.average(loss2_train_hist).astype(np.float32)
+        tt_gtvar_avg[0] =   np.average(gtvar_train_hist).astype(np.float32)
+        
+        
+        for ntest,dataset_test in enumerate(datasets_test):
+            sess.run(iterator_tt.initializer, feed_dict={corr2d_train_placeholder:      dataset_test[:, :corr2d_len],  #['corr2d'],
+                                                    target_disparity_train_placeholder: dataset_test[:, corr2d_len:corr2d_len+target_disparity_len], # ['target_disparity'],
+                                                    gt_ds_train_placeholder:            dataset_test[:, corr2d_len+target_disparity_len:] }) # ['gt_ds']})
+
+            """
+            TODO: Make it possible to have different length dataset_test arrays to mix different length test files 
+            """              
+            for i in range(dataset_test_size):
+#            for i in range(dataset_test.shape[0]):
+#                try:
+                    GW_loss_tested, G_losses_tested, S_loss_tested, W_loss_tested, output, disp_slice, d_gt_slice, out_diff, out_diff2, w_norm, out_wdiff2, out_cost1, gt_variance = sess.run(
+                        [GW_loss,
+                         G_losses,
+                         S_loss,
+                         W_loss,
+                         outs[0],
+                         _disp_slice,
+                         _d_gt_slice,
+                         _out_diff,
+                         _out_diff2,
+                         _w_norm,
+                         _out_wdiff2,
+                         _cost1,
+                         GT_variance
+                         ],
+                         feed_dict={tf_batch_weights: feed_batch_weight_1 , #  feed_batch_weights,
+                                    lr:               learning_rate
+                                    })  # previous value of *_avg
+                    loss_gw_test_hist[i] =  GW_loss_tested
+                    
+                    for nn, gl  in enumerate(G_losses_tested):
+                        loss_g_test_hists[nn][i] =  gl
+
+                    loss_s_test_hist[i] =   S_loss_tested
+                    loss_w_test_hist[i] =   W_loss_tested
+                    loss2_test_hist[i] = out_cost1
+                    gtvar_test_hist[i] = gt_variance
+#                except tf.errors.OutOfRangeError:
+#                    print("test done at step %d"%(i))
+#                    break
+
+            tt_gw_avg[ntest+1] = np.average(loss_gw_test_hist).astype(np.float32)     
+            for nn, lgth  in enumerate(loss_g_test_hists):
+                tt_g_avgs[ntest+1][nn] =  np.average(lgth).astype(np.float32)
+            tt_s_avg[ntest+1] =     np.average(loss_s_test_hist).astype(np.float32)
+            tt_w_avg[ntest+1] =     np.average(loss_w_test_hist).astype(np.float32)
+            tt2_avg[ntest+1] =      np.average(loss2_test_hist).astype(np.float32)   
+            tt_gtvar_avg[ntest+1] = np.average(gtvar_test_hist).astype(np.float32)
+        
+        
+        if (((epoch + 1) == EPOCHS_TO_RUN) or (((epoch + 1) % EPOCHS_FULL_TEST) == 0)) and (len(image_data) > 0) :
+            lf = None
+            if (epoch + 1) == EPOCHS_TO_RUN: # last
+                print("Last epoch, removing train/test datasets to reduce memory footprint")
+                del(dataset_train)
+                del(dataset_test)
+                if LOGPATH:
+                    lf=open(LOGPATH,"w") #overwrite previous (or make it "a"?
+
+                             
+            last_epoch = (epoch + 1) == EPOCHS_TO_RUN
+            ind_img = [0]
+            if last_epoch:
+                ind_img = [i for i in range(len(image_data))]
+###################################################
+# Read the full image
+################################################### 
+##            test_summaries_img = [0.0]*len(ind_img) # datasets_img)
+            disp_out=     np.empty((WIDTH*HEIGHT), dtype=np.float32)
+            dbg_cost_nw=  np.empty((WIDTH*HEIGHT), dtype=np.float32)
+            dbg_cost_w=   np.empty((WIDTH*HEIGHT), dtype=np.float32)
+            dbg_d=        np.empty((WIDTH*HEIGHT), dtype=np.float32)
+            
+            dbg_avg_disparity = np.empty((WIDTH*HEIGHT), dtype=np.float32)
+            dbg_gt_disparity =  np.empty((WIDTH*HEIGHT), dtype=np.float32)
+            dbg_offs =          np.empty((WIDTH*HEIGHT), dtype=np.float32)
+            
+            for ntest in ind_img: # datasets_img):
+                dataset_img = qsf.readImageData(
+                    image_data =     image_data,
+                    files =          files,
+                    indx =           ntest,
+                    cluster_radius = CLUSTER_RADIUS,
+                    tile_layers =    TILE_LAYERS,
+                    tile_side =      TILE_SIDE,
+                    width =          IMG_WIDTH,
+                    replace_nans =   True)
+
+                sess.run(iterator_tt.initializer, feed_dict={corr2d_train_placeholder:      dataset_img['corr2d'],
+                                                        target_disparity_train_placeholder: dataset_img['target_disparity'],
+                                                        gt_ds_train_placeholder:            dataset_img['gt_ds']})
+                for start_offs in range(0,disp_out.shape[0],BATCH_SIZE):
+                    end_offs = min(start_offs+BATCH_SIZE,disp_out.shape[0])
+                    
+#                    try:
+                    output, cost_nw, cost_w, dd, avg_disparity, gt_disparity, offs  = sess.run(
+                        [outs[0],     # {?,1]
+                         rslt_cost_nw, #[?,]
+                         rslt_cost_w,  #[?,]
+                         rslt_d,       #[?,]
+                         
+                         rslt_avg_disparity,
+                         rslt_gt_disparity,
+                         rslt_offs                       
+                         ],
+                         feed_dict={
+                                    tf_batch_weights: feed_batch_weight_1 # feed_batch_weights,
+                                    })  # previous value of *_avg
+#                    except tf.errors.OutOfRangeError:
+#                        print("test done at step %d"%(i))
+#                        break
+#                    try:
+                    disp_out[start_offs:end_offs] = output.flatten()
+                    dbg_cost_nw[start_offs:end_offs] = cost_nw.flatten()
+                    dbg_cost_w [start_offs:end_offs] = cost_w.flatten()
+                    dbg_d[start_offs:end_offs] = dd.flatten()
+                    dbg_avg_disparity[start_offs:end_offs] = avg_disparity.flatten()
+                    dbg_gt_disparity[start_offs:end_offs] = gt_disparity.flatten()
+                    dbg_offs[start_offs:end_offs] = offs.flatten()
+#                    except ValueError:
+#                        print("dataset_img_size= %d, i=%d, output.shape[0]=%d "%(dataset_img_size, i, output.shape[0]))
+#                        break;    
+                    pass
+                result_file = files['result'][ntest] # result_files[ntest]
+                try:
+                    os.makedirs(os.path.dirname(result_file))
+                except:
+                    pass     
+
+                rslt = np.concatenate(
+                    [disp_out.reshape(-1,1),
+                     dataset_img['t_disps'], #t_disps[ntest],
+                     dataset_img['gtruths'], # gtruths[ntest],
+                     dbg_cost_nw.reshape(-1,1),
+                     dbg_cost_w.reshape(-1,1),
+                     dbg_d.reshape(-1,1),
+                     dbg_avg_disparity.reshape(-1,1),
+                     dbg_gt_disparity.reshape(-1,1),
+                     dbg_offs.reshape(-1,1)],1)
+
+                np.save(result_file,           rslt.reshape(HEIGHT,WIDTH,-1))
+                rslt = qsf.eval_results(result_file, ABSOLUTE_DISPARITY, radius=CLUSTER_RADIUS, logfile=lf)                
+                img_gain_test0 = rslt[0][0]/rslt[0][1]   
+                img_gain_test9 = rslt[9][0]/rslt[9][1]   
+                if SAVE_TIFFS:
+                    qsf.result_npy_to_tiff(result_file, ABSOLUTE_DISPARITY, fix_nan = True,labels=SLICE_LABELS, logfile=lf)
+                    
+                """
+                Remove dataset_img (if it is not [0] to reduce memory footprint         
+                """
+                if ntest > 0:
+                    image_data[ntest] = None
+            if lf:
+                lf.close()
+        # tensorboard scalars
+        tt_summaries[0]  = sess.run([merged],
+            feed_dict={ tf_ph_GW_loss:    tt_gw_avg[0],
+                        tf_ph_G_loss:     tt_g_avgs[0][0], #train_g_avg,
+                        tf_ph_G_losses:   tt_g_avgs[0],
+                        tf_ph_S_loss:     tt_s_avg[0],
+                        tf_ph_W_loss:     tt_w_avg[0],
+                        tf_ph_sq_diff:    tt2_avg[0], # train2_avg,
+                        tf_gtvar_diff:    tt_gtvar_avg[0],
+                        tf_img_test0:     img_gain_test0,
+                        tf_img_test9:     img_gain_test9}) # previous value of *_avg #Fetch argument 0.0 has invalid type <class 'float'>, must be a string or Tensor. (Can not convert a float into a Tensor or Operation.)
+        for ntest, _ in enumerate(datasets_test):
+            tt_summaries[ntest+1]  = sess.run([merged],
+                feed_dict={
+                        tf_ph_GW_loss:    tt_gw_avg[ntest+1],
+                        tf_ph_G_loss:     tt_g_avgs[ntest+1][0],
+                        tf_ph_G_losses:   tt_g_avgs[ntest+1], # train_g_avgs, # temporary, there is o data fro test
+                        tf_ph_S_loss:     tt_s_avg[ntest+1],
+                        tf_ph_W_loss:     tt_w_avg[ntest+1],
+                        tf_ph_sq_diff:    tt2_avg[ntest+1], #test2_avg,
+                        tf_gtvar_diff:    tt_gtvar_avg[ntest+1],
+                        tf_img_test0:     img_gain_test0,
+                        tf_img_test9:     img_gain_test9})
+        for n,tt_writer in enumerate(tt_writers):
+##            tt_writer.add_summary(tt_summaries[n],epoch)     
+            tt_writer.add_summary(tt_summaries[n][0],epoch)
+#            if epoch ==0 :
+#                print ("adding delay to make directory creation time different")
+#                time.sleep(2.0) # reduce later
+     
+        qsf.print_time("==== %04d:%03d -> %.4f %.4f %.4f %.4f %.4f (%.4f %.4f %.4f %.4f %.4f) ===="%(
+                        epoch,i,
+                        tt_gw_avg[0], tt_gw_avg[1], tt_gw_avg[2], tt_gw_avg[3], tt_gw_avg[4],
+                        tt2_avg[0],   tt2_avg[1],   tt2_avg[2],   tt2_avg[3],   tt2_avg[4]))
+        if (not CHECKPOINT_PERIOD is None) and  (((epoch + 1) % CHECKPOINT_PERIOD) == 0):
+            print("Saving periodic checkpoint (trained variables only) to %s, global_step = %d"%(os.path.dirname(files['checkpoints']), epoch),end=" => ")
+            print(saver.save(sess, files['checkpoints'], global_step=epoch, write_meta_graph=False))
+    # Close writers
+    for tt_writer in tt_writers:
+        try:
+            tt_writer.close()
+        except:
+            print ("Could not close tt_writer: ",tt_writer)
+    print("Saving final checkpoint (trained variables only) to %s"%(files['checkpoints']),end=" => ")
+    print(saver.save(sess, files["checkpoints"]))    
+            
+
+print("All done")
+exit (0)
+"""
+Traceback (most recent call last):
+  File "nn_ds_neibs30.py", line 721, in <module>
+    tt2_avg[0],   tt2_avg[1],   tt2_avg[2],   tt2_avg[3],   tt2_avg[4]))
+ValueError: unsupported format character ' ' (0x20) at index 20
+
+"""
\ No newline at end of file
--- a/nn_ds_neibs31.py
+++ b/nn_ds_neibs31.py
+#!/usr/bin/env python3
+__copyright__ = "Copyright 2018-2019, Elphel, Inc."
+__license__   = "GPL-3.0+"
+__email__     = "andrey@elphel.com"
+
+#python3 nn_ds_neibs31.py /data_ssd/lwir_sets/conf/qcstereo_lwir05.xml /data_ssd/lwir_sets/
+#tensorboard --logdir="nn_ds_neibs30_graph13-9RNSWLAM0.5SLAM0.1SCLP0.2_nG_nI_HF_CP0.3_S0.03" --port=7001
+
+import os
+import sys
+import numpy as np
+import time
+import shutil
+from threading import Thread
+import qcstereo_network
+import qcstereo_losses
+import qcstereo_functions as qsf
+import tensorflow as tf
+
+qsf.TIME_START = time.time()
+qsf.TIME_LAST  = qsf.TIME_START
+
+IMG_WIDTH =        20 # 324 # tiles per image row
+DEBUG_LEVEL= 1
+
+try:
+    conf_file =  sys.argv[1]
+except IndexError:
+    print("Configuration path is required as a first argument. Optional second argument specifies root directory for data files")
+    exit(1)
+try:
+    root_dir =  sys.argv[2]
+except IndexError:
+    root_dir =  os.path.dirname(conf_file)
+    
+print ("Configuration file: " + conf_file)
+parameters, dirs, files, _ = qsf.parseXmlConfig(conf_file, root_dir)
+"""
+Temporarily for backward compatibility
+"""
+if not "SLOSS_CLIP" in parameters:
+    parameters['SLOSS_CLIP'] = 0.5
+    print ("Old config, setting SLOSS_CLIP=", parameters['SLOSS_CLIP'])
+    
+"""
+Defined in config file
+"""
+TILE_SIDE, TILE_LAYERS, TWO_TRAINS, NET_ARCH1, NET_ARCH2 = [None]*5
+ABSOLUTE_DISPARITY,SYM8_SUB, WLOSS_LAMBDA,  SLOSS_LAMBDA, SLOSS_CLIP  = [None]*5
+SPREAD_CONVERGENCE, INTER_CONVERGENCE, HOR_FLIP, DISP_DIFF_CAP, DISP_DIFF_SLOPE  = [None]*5
+CLUSTER_RADIUS = None
+PARTIALS_WEIGHTS, MAX_IMGS_IN_MEM, MAX_FILES_PER_GROUP,  BATCH_WEIGHTS, ONLY_TILE = [None] * 5  
+USE_CONFIDENCE, WBORDERS_ZERO, EPOCHS_TO_RUN, FILE_UPDATE_EPOCHS = [None] * 4
+LR600,LR400,LR200,LR100,LR = [None]*5
+SHUFFLE_FILES, EPOCHS_FULL_TEST, SAVE_TIFFS = [None] * 3
+CHECKPOINT_PERIOD = None
+TRAIN_BUFFER_GPU, TRAIN_BUFFER_CPU = [None]*2
+TEST_TITLES = None
+LOGFILE="results.txt"
+"""
+Next gets globals from the config file
+"""
+globals().update(parameters)
+
+
+TRAIN_BUFFER_SIZE = TRAIN_BUFFER_GPU * TRAIN_BUFFER_CPU # in merged (quad) batches
+
+
+
+#exit(0)
+WIDTH =  20 # 324
+HEIGHT = 15 # 242
+TILE_SIZE =         TILE_SIDE* TILE_SIDE # == 81
+FEATURES_PER_TILE =  TILE_LAYERS * TILE_SIZE# == 324
+BATCH_SIZE =       ([1,2][TWO_TRAINS])*2*1000//25 # == 80 Each batch of tiles has balanced D/S tiles, shuffled batches but not inside batches
+
+SUFFIX=(str(NET_ARCH1)+'-'+str(NET_ARCH2)+
+       (["R","A"][ABSOLUTE_DISPARITY]) +
+       (["NS","S8"][SYM8_SUB])+
+       "WLAM"+str(WLOSS_LAMBDA)+
+       "SLAM"+str(SLOSS_LAMBDA)+
+       "SCLP"+str(SLOSS_CLIP)+
+       (['_nG','_G'][SPREAD_CONVERGENCE])+
+       (['_nI','_I'][INTER_CONVERGENCE]) +
+       (['_nHF',"_HF"][HOR_FLIP]) +
+       ('_CP'+str(DISP_DIFF_CAP)) +
+       ('_S'+str(DISP_DIFF_SLOPE))
+       )
+NN_LAYOUT1 = qcstereo_network.NN_LAYOUTS[NET_ARCH1]
+NN_LAYOUT2 = qcstereo_network.NN_LAYOUTS[NET_ARCH2]
+USE_PARTIALS =      not PARTIALS_WEIGHTS is None # False - just a single Siamese net, True - partial outputs that use concentric squares of the first level subnets
+
+# Tiff export slice labels
+SLICE_LABELS =  ["nn_out_ext","hier_out_ext","gt_disparity","gt_strength",
+                 "cutcorn_cost_nw","cutcorn_cost",
+                 "gt-avg_dist","avg8_disp","gt_disp","out-avg"]
+
+##############################################################################
+cluster_size = (2 * CLUSTER_RADIUS + 1) * (2 * CLUSTER_RADIUS + 1)
+center_tile_index = 2 * CLUSTER_RADIUS * (CLUSTER_RADIUS + 1)
+qsf.prepareFiles(dirs, files, suffix = SUFFIX)
+
+#copy config to results directory
+
+print ("Copying config files to results directory:\n ('%s' -> '%s')"%(conf_file,dirs['result']))
+try:
+    os.makedirs(dirs['result'])
+except:
+    pass
+
+shutil.copy2(conf_file,dirs['result'])
+LOGPATH = os.path.join(dirs['result'],LOGFILE)
+
+
+if TEST_TITLES is None:
+    TEST_TITLES = qsf.defaultTestTitles(files)
+
+partials = None
+partials = qsf.concentricSquares(CLUSTER_RADIUS)
+PARTIALS_WEIGHTS = [1.0*pw/sum(PARTIALS_WEIGHTS) for pw in PARTIALS_WEIGHTS]
+if not USE_PARTIALS:
+    partials = partials[0:1]
+    PARTIALS_WEIGHTS = [1.0]
+
+
+qsf.evaluateAllResults(result_files = files['result'],
+                       absolute_disparity = ABSOLUTE_DISPARITY,
+                       cluster_radius =     CLUSTER_RADIUS,
+                       labels =             SLICE_LABELS,
+                       logpath=             LOGPATH)
+
+image_data = qsf.initImageData(
+                files =          files,
+                max_imgs =       MAX_IMGS_IN_MEM,
+                cluster_radius = CLUSTER_RADIUS,
+                tile_layers =    TILE_LAYERS,
+                tile_side =      TILE_SIDE,
+                width =          IMG_WIDTH,
+                replace_nans =   True)
+    
+corr2d_len, target_disparity_len, gtds_len, _ = qsf.get_lengths(CLUSTER_RADIUS, TILE_LAYERS, TILE_SIDE)
+ 
+train_next, dataset_train, datasets_test= qsf.initTrainTestData(
+        files = files,
+        cluster_radius =      CLUSTER_RADIUS,
+        buffer_size =         TRAIN_BUFFER_SIZE * BATCH_SIZE, # number of clusters per train
+        test_titles =         TEST_TITLES)
+
+    
+corr2d_train_placeholder =           tf.compat.v1.placeholder(dataset_train.dtype, (None,FEATURES_PER_TILE * cluster_size)) # corr2d_train.shape)
+target_disparity_train_placeholder = tf.compat.v1.placeholder(dataset_train.dtype, (None,1 *   cluster_size))  #target_disparity_train.shape)
+gt_ds_train_placeholder =            tf.compat.v1.placeholder(dataset_train.dtype, (None,2 *   cluster_size)) #gt_ds_train.shape)
+
+dataset_tt = tf.data.Dataset.from_tensor_slices({
+    "corr2d":           corr2d_train_placeholder,
+    "target_disparity": target_disparity_train_placeholder,
+    "gt_ds":            gt_ds_train_placeholder})
+
+tf_batch_weights = tf.compat.v1.placeholder(shape=(None,), dtype=tf.float32, name = "batch_weights") # way to increase importance of the high variance clusters 
+feed_batch_weights =   np.array(BATCH_WEIGHTS*(BATCH_SIZE//len(BATCH_WEIGHTS)), dtype=np.float32)
+feed_batch_weight_1 =  np.array([1.0], dtype=np.float32) 
+
+dataset_test_size = len(datasets_test[0])
+dataset_test_size //= BATCH_SIZE
+
+dataset_img_size = len(image_data[0]['corr2d'])
+dataset_img_size //= BATCH_SIZE
+
+dataset_tt = dataset_tt.batch(BATCH_SIZE)
+dataset_tt = dataset_tt.prefetch(BATCH_SIZE)
+iterator_tt = dataset_tt.make_initializable_iterator()
+next_element_tt = iterator_tt.get_next()
+
+result_dir = './attic/result_neibs_'+     SUFFIX+'/'
+checkpoint_dir = './attic/result_neibs_'+ SUFFIX+'/'
+save_freq = 500
+
+def debug_gt_variance(
+        indx,        # This tile index (0..8)
+        center_indx, # center tile index
+        gt_ds_batch # [?:9:2]
+        ):
+    with tf.name_scope("Debug_GT_Variance"):
+        d_gt_this =     tf.reshape(gt_ds_batch[:,2 * indx],[-1],                     name = "d_this")
+        d_gt_center =   tf.reshape(gt_ds_batch[:,2 * center_indx],[-1],              name = "d_center")
+        d_gt_diff =     tf.subtract(d_gt_this, d_gt_center,                          name = "d_diff")
+        d_gt_diff2 =    tf.multiply(d_gt_diff, d_gt_diff,                            name = "d_diff2")
+        d_gt_var =      tf.reduce_mean(d_gt_diff2,                                   name = "d_gt_var")
+        return  d_gt_var
+    
+target_disparity_cluster = tf.reshape(next_element_tt['target_disparity'], [-1,cluster_size, 1], name="targdisp_cluster")    
+corr2d_Nx325 = tf.concat([tf.reshape(next_element_tt['corr2d'],[-1,cluster_size,FEATURES_PER_TILE], name="coor2d_cluster"),
+                          target_disparity_cluster], axis=2, name = "corr2d_Nx325")
+if SPREAD_CONVERGENCE:                                      
+    outs, inp_weights =  qcstereo_network.networks_siam(
+                                            input =             corr2d_Nx325,
+                                            input_global =      target_disparity_cluster,
+                                            layout1 =           NN_LAYOUT1, 
+                                            layout2 =           NN_LAYOUT2,
+                                            inter_convergence = INTER_CONVERGENCE,
+                                            sym8 =              SYM8_SUB,
+                                            only_tile =         ONLY_TILE, #Remove/put None for normal operation
+                                            partials =          partials,
+                                            use_confidence=     USE_CONFIDENCE,
+                                            cluster_radius =    CLUSTER_RADIUS)
+                                            
+else:
+    outs, inp_weights =  qcstereo_network.networks_siam(
+                                            input_tensor=       corr2d_Nx325,
+                                            input_global =      None,
+                                            layout1 =           NN_LAYOUT1, 
+                                            layout2 =           NN_LAYOUT2,
+                                            inter_convergence = False,
+                                            sym8 =              SYM8_SUB,
+                                            only_tile =         ONLY_TILE, #Remove/put None for normal operation
+                                            partials =          partials,
+                                            use_confidence=     USE_CONFIDENCE,
+                                            cluster_radius =    CLUSTER_RADIUS)
+                                                                                      
+tf_partial_weights = tf.constant(PARTIALS_WEIGHTS,dtype=tf.float32,name="partial_weights")
+G_losses = [0.0]*len(partials)
+target_disparity_batch=  next_element_tt['target_disparity'][:,center_tile_index:center_tile_index+1]
+gt_ds_batch_clust =      next_element_tt['gt_ds']
+gt_ds_batch =            gt_ds_batch_clust[:,2 * center_tile_index: 2 * (center_tile_index +1)]
+G_losses[0], _disp_slice, _d_gt_slice, _out_diff, _out_diff2, _w_norm, _out_wdiff2, _cost1 = qcstereo_losses.batchLoss(
+              out_batch =              outs[0],        # [batch_size,(1..2)] tf_result
+              target_disparity_batch=  target_disparity_batch, # next_element_tt['target_disparity'][:,center_tile_index:center_tile_index+1], # target_disparity_batch_center, # next_element_tt['target_disparity'], # target_disparity, ### target_d,   # [batch_size]        tf placeholder
+              gt_ds_batch =            gt_ds_batch, # next_element_tt['gt_ds'][:,2 * center_tile_index: 2 * (center_tile_index +1)],  # gt_ds_batch_center, ## next_element_tt['gt_ds'], # gt_ds, ### gt,         # [batch_size,2]      tf placeholder
+              batch_weights =          tf_batch_weights,
+              disp_diff_cap =          DISP_DIFF_CAP,
+              disp_diff_slope=         DISP_DIFF_SLOPE,
+              absolute_disparity =     ABSOLUTE_DISPARITY,
+              use_confidence =         USE_CONFIDENCE, # True, 
+              lambda_conf_avg =        0.01,
+##              lambda_conf_pwr =        0.1,
+              conf_pwr =               2.0,
+              gt_conf_offset =         0.08,
+              gt_conf_pwr =            2.0,
+              error2_offset =          0, # 0.0025, # (0.05^2)
+              disp_wmin =              1.0,    # minimal disparity to apply weight boosting for small disparities
+              disp_wmax =              8.0,    # maximal disparity to apply weight boosting for small disparities
+              use_out =                False)  # use calculated disparity for disparity weight boosting (False - use target disparity)
+
+G_loss = G_losses[0]
+for n in range (1,len(partials)):
+    G_losses[n], _, _, _, _, _, _, _ = qcstereo_losses.batchLoss(
+              out_batch =              outs[n],        # [batch_size,(1..2)] tf_result
+              target_disparity_batch=  target_disparity_batch, #next_element_tt['target_disparity'][:,center_tile_index:center_tile_index+1], # target_disparity_batch_center, # next_element_tt['target_disparity'], # target_disparity, ### target_d,   # [batch_size]        tf placeholder
+              gt_ds_batch =            gt_ds_batch, # next_element_tt['gt_ds'][:,2 * center_tile_index: 2 * (center_tile_index +1)],  # gt_ds_batch_center, ## next_element_tt['gt_ds'], # gt_ds, ### gt,         # [batch_size,2]      tf placeholder
+              batch_weights =          tf_batch_weights,
+              disp_diff_cap =          DISP_DIFF_CAP,
+              disp_diff_slope=         DISP_DIFF_SLOPE,
+              absolute_disparity =     ABSOLUTE_DISPARITY,
+              use_confidence =         USE_CONFIDENCE, # True, 
+              lambda_conf_avg =        0.01,
+#              lambda_conf_pwr =        0.1,
+              conf_pwr =               2.0,
+              gt_conf_offset =         0.08,
+              gt_conf_pwr =            2.0,
+              error2_offset =          0, # 0.0025, # (0.05^2)
+              disp_wmin =              1.0,    # minimal disparity to apply weight boosting for small disparities
+              disp_wmax =              8.0,    # maximal disparity to apply weight boosting for small disparities
+              use_out =                False)  # use calculated disparity for disparity weight boosting (False - use target disparity)
+
+tf_wlosses = tf.multiply(G_losses, tf_partial_weights, name =  "tf_wlosses")
+G_losses_sum = tf.reduce_sum(tf_wlosses, name = "G_losses_sum")
+
+if SLOSS_LAMBDA > 0:    
+    S_loss, rslt_cost_nw, rslt_cost_w, rslt_d , rslt_avg_disparity, rslt_gt_disparity, rslt_offs = qcstereo_losses.smoothLoss(
+               out_batch =             outs[0],                   # [batch_size,(1..2)] tf_result
+               target_disparity_batch = target_disparity_batch,    # [batch_size]        tf placeholder
+               gt_ds_batch_clust =      gt_ds_batch_clust,           # [batch_size,25,2]      tf placeholder
+               clip =                   SLOSS_CLIP,
+               absolute_disparity =     ABSOLUTE_DISPARITY, #when false there should be no activation on disparity output !
+               cluster_radius =         CLUSTER_RADIUS)
+    GS_loss =  tf.add(G_losses_sum, SLOSS_LAMBDA * S_loss, name = "GS_loss")
+
+else:
+    S_loss =   tf.constant(0.0, dtype=tf.float32,name = "S_loss")
+    GS_loss = G_losses_sum # G_loss
+        
+if WLOSS_LAMBDA > 0.0:   
+    W_loss =     qcstereo_losses.weightsLoss(
+        inp_weights =   inp_weights[0], #    inp_weights - list of tensors, currently - just [0]
+        tile_layers=    TILE_LAYERS, # 4
+        tile_side =     TILE_SIDE, # 9
+        wborders_zero = WBORDERS_ZERO)
+
+    GW_loss =    tf.add(GS_loss, WLOSS_LAMBDA * W_loss, name = "GW_loss")
+else:
+    GW_loss =    GS_loss # G_loss
+    W_loss =     tf.constant(0.0, dtype=tf.float32,name = "W_loss")
+
+GT_variance =  debug_gt_variance(indx = 0,        # This tile index (0..8)
+                                 center_indx = 4, # center tile index
+                                 gt_ds_batch = next_element_tt['gt_ds'])# [?:18]
+              
+tf_ph_G_loss =    tf.compat.v1.placeholder(tf.float32,shape=None,name='G_loss_avg')
+tf_ph_G_losses =  tf.compat.v1.placeholder(tf.float32,shape=[len(partials)],name='G_losses_avg')
+tf_ph_S_loss =    tf.compat.v1.placeholder(tf.float32,shape=None,name='S_loss_avg')
+tf_ph_W_loss =    tf.compat.v1.placeholder(tf.float32,shape=None,name='W_loss_avg')
+tf_ph_GW_loss =   tf.compat.v1.placeholder(tf.float32,shape=None,name='GW_loss_avg')
+tf_ph_sq_diff =   tf.compat.v1.placeholder(tf.float32,shape=None,name='sq_diff_avg')
+tf_gtvar_diff =   tf.compat.v1.placeholder(tf.float32,shape=None,name='gtvar_diff')
+tf_img_test0 =    tf.compat.v1.placeholder(tf.float32,shape=None,name='img_test0')
+tf_img_test9 =    tf.compat.v1.placeholder(tf.float32,shape=None,name='img_test9')
+"""
+with tf.name_scope('sample'):
+    tf.compat.v1.summary.scalar("GW_loss",      GW_loss)
+    tf.compat.v1.summary.scalar("G_loss",       G_loss)
+    tf.compat.v1.summary.scalar("S_loss",       S_loss)
+    tf.compat.v1.summary.scalar("W_loss",       W_loss)
+    tf.compat.v1.summary.scalar("sq_diff",      _cost1)
+    tf.compat.v1.summary.scalar("gtvar_diff",   GT_variance)
+"""    
+with tf.name_scope('epoch_average'):
+    for i in range(tf_ph_G_losses.shape[0]):
+        tf.compat.v1.summary.scalar("G_loss_epoch_"+str(i), tf_ph_G_losses[i])
+        
+    tf.compat.v1.summary.scalar("GW_loss_epoch", tf_ph_GW_loss)
+    tf.compat.v1.summary.scalar("G_loss_epoch",  tf_ph_G_loss)
+    tf.compat.v1.summary.scalar("S_loss_epoch",  tf_ph_S_loss)
+    tf.compat.v1.summary.scalar("W_loss_epoch",  tf_ph_W_loss)
+    tf.compat.v1.summary.scalar("sq_diff_epoch", tf_ph_sq_diff)
+    tf.compat.v1.summary.scalar("gtvar_diff",    tf_gtvar_diff)
+    
+    tf.compat.v1.summary.scalar("img_test0",     tf_img_test0)
+    tf.compat.v1.summary.scalar("img_test9",     tf_img_test9)
+
+trainable_vars=    tf.trainable_variables()
+lr=                tf.compat.v1.placeholder(tf.float32)
+G_opt=             tf.compat.v1.train.AdamOptimizer(learning_rate=lr).minimize(GW_loss)
+
+
+ROOT_PATH  = './attic/nn_ds_neibs30_graph'+SUFFIX+"/" # for tensorboard
+
+
+
+TT_SUBDIRS = ['train'] #,'test0','test1','test2','test3']
+for i,_ in enumerate(datasets_test):
+#    TT_SUBDIRS.append('test%d'%(i))
+    TT_SUBDIRS.append(TEST_TITLES[i].replace(' ','_'))
+
+TT_PATHS = [ROOT_PATH + p for p in TT_SUBDIRS]
+
+# CLEAN OLD STAFF
+shutil.rmtree(ROOT_PATH, ignore_errors=True)
+#for p in TT_PATHS:
+#    shutil.rmtree(p, ignore_errors=True)
+    
+#seems that runs use directory creation time to order graphs    
+#for p in TT_PATHS:
+#    os.makedirs(p)
+#    time.sleep(1.5) # reduce later
+
+
+num_train_subs = len(train_next) # number of (different type) merged training sets    
+dataset_train_size = TRAIN_BUFFER_GPU * num_train_subs # TRAIN_BUFFER_SIZE
+tt_summaries = [0.0 for e in TT_SUBDIRS]
+tt2_avg =      [0.0 for e in TT_SUBDIRS]
+tt_gw_avg =    [0.0 for e in TT_SUBDIRS]
+tt_g_avgs =    [[0.0]*len(partials) for e in TT_SUBDIRS]
+tt_w_avg =     [0.0 for e in TT_SUBDIRS]
+tt_s_avg =     [0.0 for e in TT_SUBDIRS]
+tt_gtvar_avg = [0.0 for e in TT_SUBDIRS]
+
+
+saver=tf.compat.v1.train.Saver(trainable_vars)
+
+saver_def = saver.as_saver_def()
+
+# The name of the tensor you must feed with a filename when saving/restoring.
+print ('saver_def.filename_tensor_name=',saver_def.filename_tensor_name)
+
+# The name of the target operation you must run when restoring.
+print ('saver_def.restore_op_name=',saver_def.restore_op_name)
+
+# The name of the target operation you must run when saving.
+print ('saver_def.save_tensor_name=',saver_def.save_tensor_name)
+
+
+try:
+    os.makedirs(os.path.dirname(files['checkpoints']))
+    print ("Created directory ",os.path.dirname(files['checkpoints']))
+#    os.makedirs(files['checkpoints'])
+except:
+    pass
+
+with tf.Session()  as sess:
+    
+    sess.run(tf.global_variables_initializer())
+    sess.run(tf.local_variables_initializer())
+    merged = tf.summary.merge_all()
+    tt_writers = []
+    for p in TT_PATHS:
+        tt_writers.append(tf.summary.FileWriter(p, sess.graph))
+        print ("Adding delay to make directory creation time different: "+p)
+        time.sleep(2.0) # reduce later
+        
+    
+    loss_gw_train_hist=  np.empty(dataset_train_size, dtype=np.float32)
+    
+    loss_g_train_hists=   [np.empty(dataset_train_size, dtype=np.float32) for p in partials]
+    
+    
+    loss_s_train_hist=   np.empty(dataset_train_size, dtype=np.float32)
+    loss_w_train_hist=   np.empty(dataset_train_size, dtype=np.float32)
+    
+    loss_gw_test_hist=  np.empty(dataset_test_size, dtype=np.float32)
+
+    loss_g_test_hists=   [np.empty(dataset_test_size, dtype=np.float32) for p in partials]
+    
+    loss_s_test_hist=   np.empty(dataset_test_size, dtype=np.float32)
+    loss_w_test_hist=   np.empty(dataset_test_size, dtype=np.float32)
+    
+    loss2_train_hist= np.empty(dataset_train_size, dtype=np.float32)
+    loss2_test_hist=  np.empty(dataset_test_size, dtype=np.float32)
+    
+
+    gtvar_train_hist=  np.empty(dataset_train_size, dtype=np.float32)
+    gtvar_test_hist=   np.empty(dataset_test_size, dtype=np.float32)
+    gtvar_train = 0.0
+    gtvar_test = 0.0
+    img_gain_test0 =  1.0
+    img_gain_test9 =  1.0
+    
+    thr=None
+    thr_result = None
+    trains_to_update = [train_next[n_train]['more_files'] for n_train in range(len(train_next))]
+    
+    
+    
+    
+    
+    for epoch in range (EPOCHS_TO_RUN):
+        """
+        update files after each epoch, all 4.
+        Convert to threads after testing
+        """
+        if (FILE_UPDATE_EPOCHS > 0) and (epoch % FILE_UPDATE_EPOCHS == 0):
+            if not thr is None:
+                if thr.is_alive():
+                    qsf.print_time("***WAITING*** until tfrecord gets loaded", end=" ")
+                else:
+                    qsf.print_time("tfrecord is ***ALREADY LOADED*** ", end=" ")
+        
+                thr.join()
+                qsf.print_time("Done")
+                qsf.print_time("Inserting new data", end=" ")
+                for n_train in range(len(trains_to_update)):
+                    if trains_to_update[n_train]:
+                        qsf.add_file_to_dataset(dataset = dataset_train,
+                                                new_dataset = thr_result[n_train],
+                                                train_next = train_next[n_train])
+                qsf.print_time("Done")
+            thr_result = []
+            fpaths = []
+            for n_train in range(len(trains_to_update)):
+                if trains_to_update[n_train]:
+                    fpaths.append(files['train'][n_train][train_next[n_train]['file']])
+                    qsf.print_time("Will read in background: "+fpaths[-1])
+            thr = Thread(target=qsf.getMoreFiles, args=(fpaths,thr_result, CLUSTER_RADIUS, HOR_FLIP, TILE_LAYERS, TILE_SIDE))            
+            thr.start()        
+        train_buf_index = epoch %   TRAIN_BUFFER_CPU # GPU memory from CPU memory (now 4)
+        if   epoch >=600:
+            learning_rate = LR600
+        elif epoch >=400:
+            learning_rate = LR400
+        elif epoch >=200:
+            learning_rate = LR200
+        elif epoch >=100:
+            learning_rate = LR100
+        else:
+            learning_rate = LR
+        if (train_buf_index == 0) and SHUFFLE_FILES:
+            qsf.print_time("Shuffling how datasets datasets_train_lvar and datasets_train_hvar are zipped together", end="")
+            qsf.shuffle_in_place(
+                dataset_data = dataset_train, #alternating clusters from 4 sources.each cluster has all needed data (concatenated)
+                period = num_train_subs)
+            qsf.print_time("  Done")
+        sti = train_buf_index *  dataset_train_size * BATCH_SIZE #      TRAIN_BUFFER_GPU * num_train_subs
+        eti = sti+   dataset_train_size * BATCH_SIZE#    (train_buf_index +1) *  TRAIN_BUFFER_GPU * num_train_subs
+         
+        sess.run(iterator_tt.initializer, feed_dict={corr2d_train_placeholder:           dataset_train[sti:eti,:corr2d_len], 
+                                                     target_disparity_train_placeholder: dataset_train[sti:eti,corr2d_len:corr2d_len+target_disparity_len],
+                                                     gt_ds_train_placeholder:            dataset_train[sti:eti,corr2d_len+target_disparity_len:corr2d_len+target_disparity_len+gtds_len] })
+        
+        
+        for i in range(dataset_train_size):
+#            try:
+                _, GW_loss_trained,  G_losses_trained,  S_loss_trained,  W_loss_trained,  output, disp_slice, d_gt_slice, out_diff, out_diff2, w_norm, out_wdiff2, out_cost1, gt_variance  = sess.run(
+                    [   G_opt,
+                        GW_loss,
+                        G_losses,
+                        S_loss,
+                        W_loss,
+                        outs[0],
+                        _disp_slice,
+                        _d_gt_slice,
+                        _out_diff,
+                        _out_diff2,
+                        _w_norm,
+                        _out_wdiff2,
+                        _cost1,
+                        GT_variance
+                    ],
+                    feed_dict={tf_batch_weights: feed_batch_weights,
+                               lr:               learning_rate
+                               }) # previous value of *_avg #Fetch argument 0.0 has invalid type <class 'float'>, must be a string or Tensor. (Can not convert a float into a Tensor or Operation.)
+                
+                loss_gw_train_hist[i] = GW_loss_trained
+                for nn, gl  in enumerate(G_losses_trained):
+                    loss_g_train_hists[nn][i] =  gl
+                loss_s_train_hist[i] =  S_loss_trained
+                loss_w_train_hist[i] =  W_loss_trained
+                loss2_train_hist[i] = out_cost1
+                gtvar_train_hist[i] = gt_variance
+#            except tf.errors.OutOfRangeError:
+#                print("****** NO MORE DATA! train done at step %d"%(i))
+#                break
+
+        tt_gw_avg[0] =      np.average(loss_gw_train_hist).astype(np.float32)     
+        for nn, lgth  in enumerate(loss_g_train_hists):
+            tt_g_avgs[0][nn] =       np.average(lgth).astype(np.float32)
+        
+        tt_s_avg[0] =       np.average(loss_s_train_hist).astype(np.float32)     
+        tt_w_avg[0] =       np.average(loss_w_train_hist).astype(np.float32)
+
+        tt2_avg[0] =        np.average(loss2_train_hist).astype(np.float32)
+        tt_gtvar_avg[0] =   np.average(gtvar_train_hist).astype(np.float32)
+        
+        
+        for ntest,dataset_test in enumerate(datasets_test):
+            sess.run(iterator_tt.initializer, feed_dict={corr2d_train_placeholder:      dataset_test[:, :corr2d_len],  #['corr2d'],
+                                                    target_disparity_train_placeholder: dataset_test[:, corr2d_len:corr2d_len+target_disparity_len], # ['target_disparity'],
+                                                    gt_ds_train_placeholder:            dataset_test[:, corr2d_len+target_disparity_len:] }) # ['gt_ds']})
+
+            """
+            TODO: Make it possible to have different length dataset_test arrays to mix different length test files 
+            """              
+            for i in range(dataset_test_size):
+#            for i in range(dataset_test.shape[0]):
+#                try:
+                    GW_loss_tested, G_losses_tested, S_loss_tested, W_loss_tested, output, disp_slice, d_gt_slice, out_diff, out_diff2, w_norm, out_wdiff2, out_cost1, gt_variance = sess.run(
+                        [GW_loss,
+                         G_losses,
+                         S_loss,
+                         W_loss,
+                         outs[0],
+                         _disp_slice,
+                         _d_gt_slice,
+                         _out_diff,
+                         _out_diff2,
+                         _w_norm,
+                         _out_wdiff2,
+                         _cost1,
+                         GT_variance
+                         ],
+                         feed_dict={tf_batch_weights: feed_batch_weight_1 , #  feed_batch_weights,
+                                    lr:               learning_rate
+                                    })  # previous value of *_avg
+                    loss_gw_test_hist[i] =  GW_loss_tested
+                    
+                    for nn, gl  in enumerate(G_losses_tested):
+                        loss_g_test_hists[nn][i] =  gl
+
+                    loss_s_test_hist[i] =   S_loss_tested
+                    loss_w_test_hist[i] =   W_loss_tested
+                    loss2_test_hist[i] = out_cost1
+                    gtvar_test_hist[i] = gt_variance
+#                except tf.errors.OutOfRangeError:
+#                    print("test done at step %d"%(i))
+#                    break
+
+            tt_gw_avg[ntest+1] = np.average(loss_gw_test_hist).astype(np.float32)     
+            for nn, lgth  in enumerate(loss_g_test_hists):
+                tt_g_avgs[ntest+1][nn] =  np.average(lgth).astype(np.float32)
+            tt_s_avg[ntest+1] =     np.average(loss_s_test_hist).astype(np.float32)
+            tt_w_avg[ntest+1] =     np.average(loss_w_test_hist).astype(np.float32)
+            tt2_avg[ntest+1] =      np.average(loss2_test_hist).astype(np.float32)   
+            tt_gtvar_avg[ntest+1] = np.average(gtvar_test_hist).astype(np.float32)
+        
+        
+        if (((epoch + 1) == EPOCHS_TO_RUN) or (((epoch + 1) % EPOCHS_FULL_TEST) == 0)) and (len(image_data) > 0) :
+            lf = None
+            if (epoch + 1) == EPOCHS_TO_RUN: # last
+                print("Last epoch, removing train/test datasets to reduce memory footprint")
+                del(dataset_train)
+                del(dataset_test)
+                if LOGPATH:
+                    lf=open(LOGPATH,"w") #overwrite previous (or make it "a"?
+
+                             
+            last_epoch = (epoch + 1) == EPOCHS_TO_RUN
+            ind_img = [0]
+            if last_epoch:
+                ind_img = [i for i in range(len(image_data))]
+###################################################
+# Read the full image
+################################################### 
+##            test_summaries_img = [0.0]*len(ind_img) # datasets_img)
+            disp_out=     np.empty((WIDTH*HEIGHT), dtype=np.float32)
+            dbg_cost_nw=  np.empty((WIDTH*HEIGHT), dtype=np.float32)
+            dbg_cost_w=   np.empty((WIDTH*HEIGHT), dtype=np.float32)
+            dbg_d=        np.empty((WIDTH*HEIGHT), dtype=np.float32)
+            
+            dbg_avg_disparity = np.empty((WIDTH*HEIGHT), dtype=np.float32)
+            dbg_gt_disparity =  np.empty((WIDTH*HEIGHT), dtype=np.float32)
+            dbg_offs =          np.empty((WIDTH*HEIGHT), dtype=np.float32)
+            
+            for ntest in ind_img: # datasets_img):
+                dataset_img = qsf.readImageData(
+                    image_data =     image_data,
+                    files =          files,
+                    indx =           ntest,
+                    cluster_radius = CLUSTER_RADIUS,
+                    tile_layers =    TILE_LAYERS,
+                    tile_side =      TILE_SIDE,
+                    width =          IMG_WIDTH,
+                    replace_nans =   True)
+
+                sess.run(iterator_tt.initializer, feed_dict={corr2d_train_placeholder:      dataset_img['corr2d'],
+                                                        target_disparity_train_placeholder: dataset_img['target_disparity'],
+                                                        gt_ds_train_placeholder:            dataset_img['gt_ds']})
+                for start_offs in range(0,disp_out.shape[0],BATCH_SIZE):
+                    end_offs = min(start_offs+BATCH_SIZE,disp_out.shape[0])
+                    
+#                    try:
+                    output, cost_nw, cost_w, dd, avg_disparity, gt_disparity, offs  = sess.run(
+                        [outs[0],     # {?,1]
+                         rslt_cost_nw, #[?,]
+                         rslt_cost_w,  #[?,]
+                         rslt_d,       #[?,]
+                         
+                         rslt_avg_disparity,
+                         rslt_gt_disparity,
+                         rslt_offs                       
+                         ],
+                         feed_dict={
+                                    tf_batch_weights: feed_batch_weight_1 # feed_batch_weights,
+                                    })  # previous value of *_avg
+#                    except tf.errors.OutOfRangeError:
+#                        print("test done at step %d"%(i))
+#                        break
+#                    try:
+                    disp_out[start_offs:end_offs] = output.flatten()
+                    dbg_cost_nw[start_offs:end_offs] = cost_nw.flatten()
+                    dbg_cost_w [start_offs:end_offs] = cost_w.flatten()
+                    dbg_d[start_offs:end_offs] = dd.flatten()
+                    dbg_avg_disparity[start_offs:end_offs] = avg_disparity.flatten()
+                    dbg_gt_disparity[start_offs:end_offs] = gt_disparity.flatten()
+                    dbg_offs[start_offs:end_offs] = offs.flatten()
+#                    except ValueError:
+#                        print("dataset_img_size= %d, i=%d, output.shape[0]=%d "%(dataset_img_size, i, output.shape[0]))
+#                        break;    
+                    pass
+                result_file = files['result'][ntest] # result_files[ntest]
+                try:
+                    os.makedirs(os.path.dirname(result_file))
+                except:
+                    pass     
+
+                rslt = np.concatenate(
+                    [disp_out.reshape(-1,1),
+                     dataset_img['t_disps'], #t_disps[ntest],
+                     dataset_img['gtruths'], # gtruths[ntest],
+                     dbg_cost_nw.reshape(-1,1),
+                     dbg_cost_w.reshape(-1,1),
+                     dbg_d.reshape(-1,1),
+                     dbg_avg_disparity.reshape(-1,1),
+                     dbg_gt_disparity.reshape(-1,1),
+                     dbg_offs.reshape(-1,1)],1)
+
+                np.save(result_file,           rslt.reshape(HEIGHT,WIDTH,-1))
+                rslt = qsf.eval_results(result_file, ABSOLUTE_DISPARITY, radius=CLUSTER_RADIUS, logfile=lf)                
+                img_gain_test0 = rslt[0][0]/rslt[0][1]   
+                img_gain_test9 = rslt[9][0]/rslt[9][1]   
+                if SAVE_TIFFS:
+                    qsf.result_npy_to_tiff(result_file, ABSOLUTE_DISPARITY, fix_nan = True,labels=SLICE_LABELS, logfile=lf)
+                    
+                """
+                Remove dataset_img (if it is not [0] to reduce memory footprint         
+                """
+                if ntest > 0:
+                    image_data[ntest] = None
+            if lf:
+                lf.close()
+        # tensorboard scalars
+        tt_summaries[0]  = sess.run([merged],
+            feed_dict={ tf_ph_GW_loss:    tt_gw_avg[0],
+                        tf_ph_G_loss:     tt_g_avgs[0][0], #train_g_avg,
+                        tf_ph_G_losses:   tt_g_avgs[0],
+                        tf_ph_S_loss:     tt_s_avg[0],
+                        tf_ph_W_loss:     tt_w_avg[0],
+                        tf_ph_sq_diff:    tt2_avg[0], # train2_avg,
+                        tf_gtvar_diff:    tt_gtvar_avg[0],
+                        tf_img_test0:     img_gain_test0,
+                        tf_img_test9:     img_gain_test9}) # previous value of *_avg #Fetch argument 0.0 has invalid type <class 'float'>, must be a string or Tensor. (Can not convert a float into a Tensor or Operation.)
+        for ntest, _ in enumerate(datasets_test):
+            tt_summaries[ntest+1]  = sess.run([merged],
+                feed_dict={
+                        tf_ph_GW_loss:    tt_gw_avg[ntest+1],
+                        tf_ph_G_loss:     tt_g_avgs[ntest+1][0],
+                        tf_ph_G_losses:   tt_g_avgs[ntest+1], # train_g_avgs, # temporary, there is o data fro test
+                        tf_ph_S_loss:     tt_s_avg[ntest+1],
+                        tf_ph_W_loss:     tt_w_avg[ntest+1],
+                        tf_ph_sq_diff:    tt2_avg[ntest+1], #test2_avg,
+                        tf_gtvar_diff:    tt_gtvar_avg[ntest+1],
+                        tf_img_test0:     img_gain_test0,
+                        tf_img_test9:     img_gain_test9})
+        for n,tt_writer in enumerate(tt_writers):
+##            tt_writer.add_summary(tt_summaries[n],epoch)     
+            tt_writer.add_summary(tt_summaries[n][0],epoch)
+#            if epoch ==0 :
+#                print ("adding delay to make directory creation time different")
+#                time.sleep(2.0) # reduce later
+     
+        qsf.print_time("==== %04d:%03d -> %.4f %.4f %.4f %.4f %.4f (%.4f %.4f %.4f %.4f %.4f) ===="%(
+                        epoch,i,
+                        tt_gw_avg[0], tt_gw_avg[1], tt_gw_avg[2], tt_gw_avg[3], tt_gw_avg[4],
+                        tt2_avg[0],   tt2_avg[1],   tt2_avg[2],   tt2_avg[3],   tt2_avg[4]))
+        if (not CHECKPOINT_PERIOD is None) and  (((epoch + 1) % CHECKPOINT_PERIOD) == 0):
+            print("Saving periodic checkpoint (trained variables only) to %s, global_step = %d"%(os.path.dirname(files['checkpoints']), epoch),end=" => ")
+            print(saver.save(sess, files['checkpoints'], global_step=epoch, write_meta_graph=False))
+    # Close writers
+    for tt_writer in tt_writers:
+        try:
+            tt_writer.close()
+        except:
+            print ("Could not close tt_writer: ",tt_writer)
+    print("Saving final checkpoint (trained variables only) to %s"%(files['checkpoints']),end=" => ")
+    print(saver.save(sess, files["checkpoints"]))    
+            
+
+print("All done")
+exit (0)
+"""
+Traceback (most recent call last):
+  File "nn_ds_neibs30.py", line 721, in <module>
+    tt2_avg[0],   tt2_avg[1],   tt2_avg[2],   tt2_avg[3],   tt2_avg[4]))
+ValueError: unsupported format character ' ' (0x20) at index 20
+
+"""
\ No newline at end of file
--- a/qcstereo_functions.py
+++ b/qcstereo_functions.py
+#!/usr/bin/env python3
+__copyright__ = "Copyright 2018, Elphel, Inc."
+__license__   = "GPL-3.0+"
+__email__     = "andrey@elphel.com"
+
+import os
+import numpy as np
+import tensorflow as tf
+import xml.etree.ElementTree as ET
+import time
+import imagej_tiffwriter
+TIME_LAST = 0
+TIME_START = 0
+
+class bcolors:
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[38;5;214m'
+    FAIL = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    BOLDWHITE = '\033[1;37m'
+    UNDERLINE = '\033[4m'
+def print_time(txt="",end="\n"):
+    global TIME_LAST
+    t = time.time()
+    if txt:
+        txt +=" "
+    print(("%s"+bcolors.BOLDWHITE+"at %.4fs (+%.4fs)"+bcolors.ENDC)%(txt,t-TIME_START,t-TIME_LAST), end = end, flush=True)
+    TIME_LAST = t
+
+DEFAULT_TITLES = [
+        ['test_lvar',  'Test_flat_heuristic'],
+        ['test_hvar',  'Test_edge_heuristic'],
+        ['test_lvar1', 'Test_flat_random'],
+        ['test_hvar1', 'Test_edge_random'],
+        ['fake_lvar',  'Fake_flat_heuristic'],
+        ['fake_hvar',  'Fake_edge_heuristic'],
+        ['fake_lvar1', 'Fake_flat_random'],
+        ['fake_hvar1', 'Fake_edge_random']]
+    
+def parseXmlConfig(conf_file, root_dir):
+    tree = ET.parse(conf_file)
+    root = tree.getroot()
+    parameters = {}
+    for p in root.find('parameters'):
+##        print ("p.tag=%s,  p.text.stri[p()=%s"%(p.tag,p.text.strip()))
+        parameters[p.tag]=eval(p.text.strip())
+#    globals    
+    dirs={}
+    for p in root.find('directories'):
+        dirs[p.tag]=eval(p.text.strip())
+        if not os.path.isabs(dirs[p.tag]):
+            dirs[p.tag] = os.path.join(root_dir, dirs[p.tag])
+    files={}
+    for p in root.find('files'):
+        files[p.tag]=eval(p.text.strip())
+    dbg_parameters = {}
+    for p in root.find('dbg_parameters'):
+        dbg_parameters[p.tag]=eval(p.text.strip())
+
+    return parameters, dirs, files, dbg_parameters
+
+def defaultTestTitles(files):
+    test_titles = []
+    for f, n in DEFAULT_TITLES:
+        if f in files:
+            test_titles.append(n)
+        else:
+            test_titles.append(None)
+    return test_titles
+
+
+def prepareFiles(dirs, files, suffix):
+    #MAX_FILES_PER_GROUP
+    for i, path in enumerate(files['train_lvar']):
+        files['train_lvar'][i]=os.path.join(dirs['train_lvar'], path)
+        
+    for i, path in enumerate(files['train_hvar']):
+        files['train_hvar'][i]=os.path.join(dirs['train_hvar'], path)
+    
+    for i, path in enumerate(files['train_lvar1']):
+        files['train_lvar1'][i]=os.path.join(dirs['train_lvar1'], path)
+        
+    for i, path in enumerate(files['train_hvar1']):
+        files['train_hvar1'][i]=os.path.join(dirs['train_hvar1'], path)
+        
+    for i, path in enumerate(files['test_lvar']):
+        files['test_lvar'][i]=os.path.join(dirs['test_lvar'], path)
+        
+    for i, path in enumerate(files['test_hvar']):
+        files['test_hvar'][i]=os.path.join(dirs['test_hvar'], path)
+    if ('test_lvar1' in files) and ('test_lvar1' in dirs):
+        for i, path in enumerate(files['test_lvar1']):
+            files['test_lvar1'][i]=os.path.join(dirs['test_lvar1'], path)
+    if ('test_hvar1' in files) and ('test_hvar1' in dirs):
+        for i, path in enumerate(files['test_hvar1']):
+            files['test_hvar1'][i]=os.path.join(dirs['test_hvar1'], path)
+
+    if ('fake_lvar' in files) and ('fake_lvar' in dirs):
+        for i, path in enumerate(files['fake_lvar']):
+            files['fake_lvar'][i]=os.path.join(dirs['fake_lvar'], path)
+    if ('fake_hvar' in files) and ('fake_hvar' in dirs):
+        for i, path in enumerate(files['fake_hvar']):
+            files['fake_hvar'][i]=os.path.join(dirs['fake_hvar'], path)
+
+    if ('fake_lvar1' in files) and ('fake_lvar1' in dirs):
+        for i, path in enumerate(files['fake_lvar1']):
+            files['fake_lvar1'][i]=os.path.join(dirs['fake_lvar1'], path)
+    if ('fake_hvar' in files) and ('fake_hvar' in dirs):
+        for i, path in enumerate(files['fake_hvar1']):
+            files['fake_hvar1'][i]=os.path.join(dirs['fake_hvar1'], path)
+
+         
+        
+    result_files=[]
+    for i, path in enumerate(files['images']):
+        result_files.append(os.path.join(dirs['result'], path+"_"+suffix+'.npy'))
+    files['result'] = result_files
+
+    if not 'checkpoints' in files:
+        files['checkpoints'] = 'checkpoints'
+    if not 'checkpoints' in dirs:
+        dirs['checkpoints'] = dirs['result']
+    files['checkpoints'] = os.path.join(dirs['checkpoints'], files['checkpoints'])    
+
+    if not 'inference' in files:
+        files['inference'] = 'inference'
+    if not 'inference' in dirs:
+        dirs['inference'] = dirs['result']
+    files['inference'] = os.path.join(dirs['inference'], files['inference'])
+
+    if not 'exportdir' in files:
+        files['exportdir'] = 'exportdir'
+    if not 'exportdir' in dirs:
+        dirs['exportdir'] = dirs['result']
+    files['exportdir'] = os.path.join(dirs['exportdir'], files['exportdir'])
+    
+    if not 'figures' in dirs:
+        dirs['figures'] = os.path.join(dirs['result'],"figs")
+     
+    files['train'] = [files['train_lvar'],files['train_hvar'], files['train_lvar1'], files['train_hvar1']]
+    files['test'] =  [files['test_lvar'], files['test_hvar']]
+    if 'test_lvar1' in files:
+        files['test'].append(files['test_lvar1'])
+    if 'test_hvar1' in files:
+        files['test'].append(files['test_hvar1'])
+    # should be after result files
+    for i, path in enumerate(files['images']):
+        files['images'][i] =      os.path.join(dirs['images'],    path+'.tfrecords')
+
+def readTFRewcordsEpoch(train_filename, cluster_radius):
+    if not  '.tfrecords' in train_filename:
+        train_filename += '.tfrecords'
+    npy_dir_name = "npy"
+    dirname = os.path.dirname(train_filename) 
+    npy_dir = os.path.join(dirname, npy_dir_name)
+    filebasename, _ = os.path.splitext(train_filename)
+    filebasename = os.path.basename(filebasename)
+    file_all =     os.path.join(npy_dir,filebasename + '.npy')
+    if  os.path.exists(file_all):
+        data =             np.load (file_all)
+    else:     
+        record_iterator = tf.python_io.tf_record_iterator(path=train_filename)
+        corr2d_list=[]
+        target_disparity_list=[]
+        gt_ds_list = []
+        extra_list = []
+        for string_record in record_iterator:
+            example = tf.train.Example()
+            example.ParseFromString(string_record)
+            corr2d_list.append           (np.array(example.features.feature['corr2d'].float_list.value, dtype=np.float32))
+            target_disparity_list.append (np.array(example.features.feature['target_disparity'].float_list.value, dtype=np.float32))
+            gt_ds_list.append            (np.array(example.features.feature['gt_ds'].float_list.value, dtype= np.float32))
+            try:
+                extra_list.append        (np.array(example.features.feature['extra'].float_list.value, dtype= np.float32))
+            except:
+                pass
+        corr2d=            np.array(corr2d_list)
+        target_disparity = np.array(target_disparity_list)
+        gt_ds =            np.array(gt_ds_list)
+        if len(extra_list):
+            extra =        np.array(extra_list)
+        else:
+            extra =        None
+        try:
+            os.makedirs(os.path.dirname(file_all))
+        except:
+            pass
+        if cluster_radius > 0:
+            reformat_to_clusters(
+                corr2d,
+                target_disparity,
+                gt_ds,
+                extra,
+                cluster_radius)
+        if not extra is None:
+            data = np.concatenate(
+                [corr2d, target_disparity, gt_ds, extra],
+                axis = 1)
+        else:
+            data = np.concatenate(
+                [corr2d, target_disparity, gt_ds],
+                axis = 1)
+        np.save(file_all, data)
+    
+    return data
+
+
+def getMoreFiles(fpaths,rslt, cluster_radius, hor_flip, tile_layers, tile_side):
+    for fpath in fpaths:
+        dataset = readTFRewcordsEpoch(fpath, cluster_radius)
+        
+        if hor_flip:
+            if np.random.randint(2):
+                print_time("Performing horizontal flip", end=" ")
+                flip_horizontal(dataset, cluster_radius, tile_layers, tile_side)
+                print_time("Done")
+        rslt.append(dataset)
+
+#from http://warmspringwinds.github.io/tensorflow/tf-slim/2016/12/21/tfrecords-guide/
+def read_and_decode(filename_queue, featrures_per_tile):
+    reader = tf.TFRecordReader()
+    _, serialized_example = reader.read(filename_queue)
+
+    features = tf.parse_single_example(
+      serialized_example,
+      # Defaults are not specified since both keys are required.
+      features={
+        'corr2d':           tf.FixedLenFeature([featrures_per_tile],tf.float32), #string),
+        'target_disparity': tf.FixedLenFeature([1],   tf.float32), #.string),
+        'gt_ds':            tf.FixedLenFeature([2],  tf.float32)  #.string)
+        })
+    corr2d =           features['corr2d'] # tf.decode_raw(features['corr2d'], tf.float32)
+    target_disparity = features['target_disparity'] # tf.decode_raw(features['target_disparity'], tf.float32)
+    gt_ds =            tf.cast(features['gt_ds'], tf.float32) # tf.decode_raw(features['gt_ds'], tf.float32)
+    in_features = tf.concat([corr2d,target_disparity],0)
+    corr2d_out, target_disparity_out, gt_ds_out = tf.train.shuffle_batch( [in_features, target_disparity, gt_ds],
+                                                 batch_size=1000, # 2,
+                                                 capacity=30,
+                                                 num_threads=2,
+                                                 min_after_dequeue=10)
+    return corr2d_out, target_disparity_out, gt_ds_out
+def add_margins(npa,radius, val = np.nan):
+    npa_ext = np.empty((npa.shape[0]+2*radius, npa.shape[1]+2*radius, npa.shape[2]), dtype = npa.dtype)
+    npa_ext[radius:radius + npa.shape[0],radius:radius + npa.shape[1]] = npa
+    npa_ext[0:radius,:,:] = val  
+    npa_ext[radius + npa.shape[0]:,:,:] = val  
+    npa_ext[:,0:radius,:] = val  
+    npa_ext[:, radius + npa.shape[1]:,:] = val
+    return npa_ext   
+
+def add_neibs(npa_ext,radius):
+    height = npa_ext.shape[0]-2*radius
+    width =  npa_ext.shape[1]-2*radius
+    side = 2 * radius + 1
+#    size = side * side
+    npa_neib = np.empty((height,  width, side, side, npa_ext.shape[2]), dtype = npa_ext.dtype)
+    for dy in range (side):
+        for dx in range (side):
+            npa_neib[:,:,dy, dx,:]= npa_ext[dy:dy+height, dx:dx+width]
+    return npa_neib.reshape(height, width, -1)    
+  
+def extend_img_to_clusters(datasets_img,radius, width): #  = 324):
+#    side = 2 * radius + 1
+#    size = side * side
+    if len(datasets_img) ==0:
+        return
+    num_tiles = datasets_img[0]['corr2d'].shape[0]
+    height = num_tiles // width 
+    for rec in datasets_img:
+        if not rec is None:
+            rec['corr2d'] =           add_neibs(add_margins(rec['corr2d'].reshape((height,width,-1)), radius, np.nan), radius).reshape((num_tiles,-1)) 
+            rec['target_disparity'] = add_neibs(add_margins(rec['target_disparity'].reshape((height,width,-1)), radius, np.nan), radius).reshape((num_tiles,-1)) 
+            rec['gt_ds'] =            add_neibs(add_margins(rec['gt_ds'].reshape((height,width,-1)), radius, np.nan), radius).reshape((num_tiles,-1))
+            try:
+                rec['extra'] =        add_neibs(add_margins(rec['extra'].reshape((height,width,-1)), radius, np.nan), radius).reshape((num_tiles,-1))
+            except:
+                pass    
+            pass
+        
+def reformat_to_clusters_rec(datasets_data, cluster_radius):
+    cluster_size = (2 * cluster_radius + 1) * (2 * cluster_radius + 1)
+# Reformat input data
+    for rec in datasets_data:
+        rec['corr2d'] =           rec['corr2d'].reshape(          (rec['corr2d'].shape[0]//cluster_size,           rec['corr2d'].shape[1] * cluster_size)) 
+        rec['target_disparity'] = rec['target_disparity'].reshape((rec['target_disparity'].shape[0]//cluster_size, rec['target_disparity'].shape[1] * cluster_size))
+        rec['gt_ds'] =            rec['gt_ds'].reshape(           (rec['gt_ds'].shape[0]//cluster_size,            rec['gt_ds'].shape[1] * cluster_size))
+        try:
+            rec['extra'] =            rec['extra'].reshape(           (rec['extra'].shape[0]//cluster_size,            rec['extra'].shape[1] * cluster_size))
+        except:
+            pass
+def reformat_to_clusters(
+        corr2d,
+        target_disparity,
+        gt_ds,
+        extra, # may be None
+        cluster_radius):
+    cluster_size = (2 * cluster_radius + 1) * (2 * cluster_radius + 1)
+# Reformat input data
+    corr2d.shape =           ((corr2d.shape[0]//cluster_size,           corr2d.shape[1] * cluster_size)) 
+    target_disparity.shape = ((target_disparity.shape[0]//cluster_size, target_disparity.shape[1] * cluster_size)) 
+    gt_ds.shape =            ((gt_ds.shape[0]//cluster_size,            gt_ds.shape[1] * cluster_size))
+    if not extra is None:
+        extra.shape =        ((extra.shape[0]//cluster_size,            extra.shape[1] * cluster_size))
+
+def get_lengths(
+        cluster_radius,
+        tile_layers,
+        tile_side):
+    cluster_side = 2 * cluster_radius + 1
+    cl = cluster_side * cluster_side * tile_layers * tile_side * tile_side
+    tl = cluster_side * cluster_side
+    gl = cluster_side * cluster_side * 2 # disparity+strength, resto goes to extra
+    return  cl, tl, gl, cluster_side
+
+
+def flip_horizontal(dataset, cluster_radius, tile_layers, tile_side):
+    cl, tl, gl, cluster_side = get_lengths(cluster_radius, tile_layers, tile_side) 
+    corr2d =           dataset[:,:cl]     .reshape([dataset.shape[0],  cluster_side, cluster_side, tile_layers, tile_side, tile_side])
+    target_disparity = dataset[:,cl:cl+tl].reshape([dataset.shape[0],  cluster_side, cluster_side, -1])
+    gt_ds =            dataset[:,cl+tl:cl+tl+gl]  .reshape([dataset.shape[0],  cluster_side, cluster_side, -1])
+    # no extra here !
+
+    """
+    Horizontal flip of tiles
+    """
+    corr2d = corr2d[:,:,::-1,...]
+    target_disparity = target_disparity[:,:,::-1,...]
+    gt_ds = gt_ds[:,:,::-1,...]
+    
+    corr2d[:,:,:,0,:,:] = corr2d[:,:,:,0,::-1,:] # flip vertical layer0   (hor) 
+    corr2d[:,:,:,1,:,:] = corr2d[:,:,:,1,:,::-1]  # flip horizontal layer1 (vert)
+    corr2d_2 =            corr2d[:,:,:,3,::-1,:].copy() # flip vertical layer3   (diago)
+    corr2d[:,:,:,3,:,:] = corr2d[:,:,:,2,::-1,:] # flip vertical layer2   (diago)
+    corr2d[:,:,:,2,:,:] = corr2d_2
+    """
+    pack back into a single (input)array
+    """
+    dataset[:,:cl] =      corr2d.reshape((corr2d.shape[0],-1))
+    dataset[:,cl:cl+tl] = target_disparity.reshape((target_disparity.shape[0],-1)) 
+    dataset[:,cl+tl:] =   gt_ds.reshape((gt_ds.shape[0],-1))
+
+def replace_nan(datasets_data): # , cluster_radius):
+# Reformat input data
+    for rec in datasets_data:
+        if not rec is None:
+            np.nan_to_num(rec['corr2d'],           copy = False) 
+            np.nan_to_num(rec['target_disparity'], copy = False)
+            if  'gt_ds' in rec:
+                np.nan_to_num(rec['gt_ds'],        copy = False)
+            if  'extra' in rec:
+                np.nan_to_num(rec['extra'],        copy = False)
+
+def permute_to_swaps(perm):
+    pairs = []
+    for i in range(len(perm)):
+        w = np.where(perm == i)[0][0]
+        if w != i:
+            pairs.append([i,w])
+            perm[w] = perm[i]
+            perm[i] = i
+    return pairs        
+        
+
+def shuffle_in_place(dataset_data, #alternating clusters from 4 sources.each cluster has all needed data (concatenated)
+                      period):
+    for i in range (period):
+        np.random.shuffle(dataset_data[i::period]) 
+
+def add_file_to_dataset(dataset, new_dataset, train_next):
+    train_next['file'] = (train_next['file']+1)%train_next['files'] 
+    l = new_dataset.shape[0] * train_next['step']
+    if (train_next['entry'] + l) < (train_next['entries']+train_next['step']):
+        dataset[train_next['entry']:train_next['entry']+l:train_next['step']] = new_dataset
+        train_next['entry'] += l
+        if (train_next['entry'] >= train_next['entries']):
+            train_next['entry'] -= train_next['entries']
+            return True
+        else:
+            return False
+    else: # split it two parts
+        l = (train_next['entries'] - train_next['entry'] + (train_next['step']-1)) // train_next['step']
+        dataset[train_next['entry']::train_next['step']] = new_dataset[:l]
+        
+        train_next['entry'] = (train_next['entry'] + l * train_next['step']) % train_next['entries'] #0,1,2,3
+        
+        l1 = new_dataset.shape[0] - l # remainder
+        ln = train_next['entry'] + l1 * train_next['step']
+        dataset[train_next['entry']:ln:train_next['step']] = new_dataset[l:]
+        train_next['entry'] = ln 
+        return True
+
+"""
+train_next[n_train]
+Read as many files as needed, possibly repeating, until each buffer is f
+"""    
+
+def initTrainTestData(
+        files,
+        cluster_radius,
+        buffer_size, # number of clusters per train
+        test_titles = None
+        ):
+    """
+    Generates a single np array for training with concatenated cluster of corr2d,
+    cluster of target_disparity, and cluster of gt_ds for convenient shuffling                  
+    
+    """
+    num_trains = len(files['train'])
+    num_entries = num_trains * buffer_size  
+    dataset_train_merged = None
+    train_next = [None]*num_trains
+    for n_train, f_train in enumerate(files['train']):
+        train_next[n_train] = {'file':0, 'entry':n_train, 'files':len(f_train), 'entries': num_entries, 'step':num_trains, 'more_files':False}
+        buffer_full = False
+        while not buffer_full:
+            for fpath in f_train:
+                print_time("Importing train data "+(["low variance","high variance", "low variance1","high variance1"][n_train]) +" from "+fpath, end="")
+                new_dataset = readTFRewcordsEpoch(fpath, cluster_radius)
+                if dataset_train_merged is None:
+                    dataset_train_merged = np.empty([num_entries,new_dataset.shape[1]], dtype =new_dataset.dtype)
+##                print("\nbefore add_file_to_dataset: train_next["+str(n_train)+"]=",train_next[n_train])
+                rollover = add_file_to_dataset(
+                    dataset = dataset_train_merged,
+                    new_dataset = new_dataset,
+                    train_next = train_next[n_train])
+##                print("after add_file_to_dataset: train_next["+str(n_train)+"]=",train_next[n_train])
+                print_time("  Done")
+                if rollover:
+                    buffer_full = True
+                    train_next[n_train][ 'more_files'] = train_next[n_train][ 'file'] < train_next[n_train][ 'files'] # Not all files used, need to load during training
+                    break
+    if test_titles is None:
+        test_titles = defaultTestTitles(files)
+    datasets_test = []
+    for t,v in zip(test_titles,DEFAULT_TITLES):
+        if not t is None:
+            grp = v[0]
+            for fpath in files[grp]:
+                print_time("Importing test data ("+grp+") from "+fpath, end="")
+                new_dataset = readTFRewcordsEpoch(fpath, cluster_radius)
+                datasets_test.append(new_dataset)
+                print_time("  Done")
+    """
+    for grp in ['test_lvar','test_hvar','test_lvar1','test_hvar1']:
+        if grp in files:
+            for fpath in files[grp]:
+                print_time("Importing test data ("+grp+") from "+fpath, end="")
+                new_dataset = readTFRewcordsEpoch(fpath, cluster_radius)
+                datasets_test.append(new_dataset)
+                print_time("  Done")
+    """
+    return train_next, dataset_train_merged, datasets_test
+        
+def get_full_tile_indices2d(height,width):
+    a = np.empty([height,width,2], dtype=np.int32)
+    a[...,0] = np.arange(height).reshape([height,1])
+    a[...,1] = np.arange(width)
+    return a.reshape(-1,2)
+def get_full_tile_indices(height,width):
+    return np.arange(height*width).reshape(-1,1)
+
+def readImageData(image_data,
+                  files,
+                  indx,
+                  cluster_radius,
+                  tile_layers,
+                  tile_side,
+                  width,
+                  replace_nans,
+                  infer =          False,
+                  keep_gt =        False):
+    cl, tl, gl, _ = get_lengths(0, tile_layers, tile_side) 
+    if image_data[indx] is None:
+        dataset = readTFRewcordsEpoch(
+            files['images'][indx],
+            cluster_radius = 0)
+        corr2d =           dataset[:,:cl]
+        target_disparity = dataset[:,cl:cl+tl]
+        if infer:
+            image_data[indx] = {
+                'corr2d':           corr2d,
+                'target_disparity': target_disparity,
+                'xy':               get_full_tile_indices2d(corr2d.shape[0]//width, width),
+                'ntile':            get_full_tile_indices(corr2d.shape[0]//width, width)}
+            if keep_gt:
+                gt_ds =                          dataset[:,cl+tl:cl+tl+gl]
+                image_data[indx]["gt_ds"] =      gt_ds
+                image_data[indx]["gtruths"]=     gt_ds.copy()
+                image_data[indx]["t_disps"]=     target_disparity.reshape([-1,1]).copy()
+        else:
+            gt_ds =            dataset[:,cl+tl:cl+tl+gl]
+            extra =            dataset[:,cl+tl+gl:]
+            image_data[indx] = {
+                'corr2d':           corr2d,
+                'target_disparity': target_disparity,
+                 "gt_ds":           gt_ds,
+                 "gtruths":         gt_ds.copy(), # never used?
+                 "t_disps":         target_disparity.reshape([-1,1]).copy(),
+                 "extra":           extra,       # will be increased by 25
+                 "t_extra":         extra.copy() } #will still be (ntiles,3)
+            if cluster_radius > 0:
+                extend_img_to_clusters(
+                     [image_data[indx]],
+                     cluster_radius,
+                     width)
+        if replace_nans:
+            replace_nan([image_data[indx]])
+            
+    return image_data[indx]
+
+def initImageData(files,
+                  max_imgs,
+                  cluster_radius,
+                  tile_layers,
+                  tile_side,
+                  width,
+                  replace_nans,
+                  infer =          False,
+                  keep_gt =        False):
+#                  no_train = False):
+    num_imgs = len(files['images'])
+    img_data = [None] * num_imgs
+    for nfile in range(min(num_imgs, max_imgs)):
+        print_time("Importing test image data from "+ files['images'][nfile], end="")
+        readImageData(img_data,
+                      files,
+                      nfile,
+                      cluster_radius,
+                      tile_layers,
+                      tile_side,
+                      width,
+                      replace_nans,
+                      infer =     infer,
+                      keep_gt =   keep_gt)
+        print_time("  Done")
+        return img_data
+            
+def evaluateAllResults(result_files, absolute_disparity, cluster_radius, labels=None, logpath=None):
+    if logpath:
+        lf=open(logpath,"w")
+    else:
+        lf = None
+    for result_file in result_files:
+        try:
+            print_time("Reading resuts from "+result_file, end=" ")
+            eval_results(result_file, absolute_disparity, radius=cluster_radius, logfile=lf)
+        except:
+            print_time(" - does not exist")
+            continue
+        print_time("Done")
+        print_time("Saving resuts to tiff", end=" ")
+        result_npy_to_tiff(result_file, absolute_disparity, fix_nan = True, labels=labels)        
+        print_time("Done")
+    if lf:
+        lf.close()
+
+
+def result_npy_prepare(npy_path, absolute, fix_nan, insert_deltas=True,labels=None):
+    
+    """
+    @param npy_path full path to the npy file with 4-layer data (242,324,4) - nn_disparity(offset), target_disparity, gt disparity, gt strength
+           data will be written as 4-layer tiff, extension '.npy' replaced with '.tiff'
+    @param absolute - True - the first layer contains absolute disparity, False - difference from target_disparity
+    @param fix_nan - replace nan in target_disparity with 0 to apply offset, target_disparity will still contain nan
+    @parame insert_deltas: +1 - add delta layers, +2 - add variance (max - min of this and 8 neighbors)
+    """
+    data = np.load(npy_path) #(324,242,4) [nn_disp, target_disp,gt_disp, gt_conf]
+    if labels is None:
+        labels = ["chn%d"%(i) for i in range(data.shape[2])]
+#    labels = ["nn_out","hier_out","gt_disparity","gt_strength"]
+    nn_out =            0
+#    target_disparity =  1     
+    gt_disparity =      2     
+    gt_strength =       3
+    heur_err =          7
+    min_heur_err =      0.001     
+    height = data.shape[0]
+    width =  data.shape[1]
+    nocenter9 = np.array([[[1,1,1,1,np.nan,1,1,1,1]]], dtype = data.dtype)
+    if not absolute:
+        if fix_nan:
+            data[...,nn_out] +=  np.nan_to_num(data[...,1], copy=True)
+        else:
+            data[...,nn_out] +=  data[...,1]
+    if (insert_deltas & 1):
+        np.nan_to_num(data[...,gt_strength], copy=False)
+        data = np.concatenate([data[...,0:4],data[...,0:2],data[...,0:2],data[...,4:]], axis = 2) # data[...,4:] may be empty
+        labels = labels[:4]+["nn_out","hier_out","nn_err","hier_err"]+labels[4:]
+        data[...,6] -= data[...,gt_disparity] 
+        data[...,7] -= data[...,gt_disparity]
+        for l in [2, 4, 5, 6, 7]:
+            if l < data.shape[2]:
+                data[...,l] = np.select([data[...,gt_strength]==0.0, data[...,gt_strength]>0.0], [np.nan,data[...,l]])
+        # All other layers - mast too
+        for l in range(8,data.shape[2]):
+            data[...,l] = np.select([data[...,gt_strength]==0.0, data[...,gt_strength]>0.0], [np.nan,data[...,l]])
+        """
+        Calculate bad tiles where ggt was used as a master, to remove them from the results (later versions add random error)
+        """
+        bad1 =     abs(data[...,heur_err]) < min_heur_err
+        bad1_ext = np.concatenate([bad1    [0:1,:], bad1    [0:1,:], bad1[:,:],     bad1    [-1:height,:], bad1    [-1:height,:]],axis = 0)
+        bad1_ext = np.concatenate([bad1_ext[:,0:1], bad1_ext[:,0:1], bad1_ext[:,:], bad1_ext[:,-1:width],  bad1_ext[:,-1:width]], axis = 1)
+        bad25 = np.empty(shape=[height, width, 25], dtype=bad1.dtype)
+        bm25=np.array([[[1,1,1,1,1, 1,1,1,1,1, 1,1,1,1,1, 1,1,1,1,1, 1,1,1,1,1]]])
+        bm09=np.array([[[0,0,0,0,0, 0,1,1,1,0, 0,1,1,1,0, 0,1,1,1,0, 0,0,0,0,0]]])
+        bm01=np.array([[[0,0,0,0,0, 0,0,0,0,0, 0,0,1,0,0, 0,0,0,0,0, 0,0,0,0,0]]])
+        for row in range(5):
+            for col in range(5):
+                pass
+                bad25  [...,row*5+col]= bad1_ext[row:height+row, col:width+col] 
+            
+        bad_num1=(np.sum(bad25*bm25,axis=2) > 0).astype(data.dtype)  
+        bad_num2=(np.sum(bad25*bm09,axis=2) > 0).astype(data.dtype) 
+        bad_num3=(np.sum(bad25*bm01,axis=2) > 0).astype(data.dtype)
+        bad_num = bad_num1 + bad_num2 + bad_num3   
+    if (insert_deltas & 2):
+        wo = 0.7 # ortho
+        wc = 0.5 #corner
+        w8=np.array([wc,wo,wc,wo,0.0,wo,wc,wo,wc], dtype=data.dtype)
+        w8/=np.sum(w8) #normalize
+        
+        gt_ext =  np.concatenate([data[0:1,:,gt_disparity],data[:,:,gt_disparity],data[-1:height,:,gt_disparity]],axis = 0)
+        gt_ext =  np.concatenate([gt_ext[:,0:1],           gt_ext[:,:],           gt_ext[:,-1:width]],axis = 1)
+        gs_ext =  np.concatenate([data[0:1,:,gt_strength], data[:,:,gt_strength], data[-1:height,:,gt_strength]],axis = 0)
+        gs_ext =  np.concatenate([gs_ext[:,0:1],           gs_ext[:,:],           gs_ext[:,-1:width]],axis = 1)
+        
+        data9 =   np.empty(shape=[height, width, 9], dtype=data.dtype)
+        weight9 = np.empty(shape=[height, width, 9], dtype=data.dtype)
+        for row in range(3):
+            for col in range(3):
+                pass
+                data9  [...,row*3+col]= gt_ext[row:height+row, col:width+col] 
+                weight9[...,row*3+col]= gs_ext[row:height+row, col:width+col]
+                
+        data9 *= weight9/weight9 # make data=nan where wigth is 0         
+            
+#        data = np.concatenate([data[...],np.empty_like(data[...,-1])], axis = 2) # data[...,4:] may be empty
+        data =        np.concatenate([data[...],np.empty(shape=[height,width,4],dtype=data.dtype)], axis = 2) # data[...,4:] may be empty
+        data[...,-4] = np.nanmax(data9*nocenter9, axis=2)-np.nanmin(data9*nocenter9,axis=2)# will ignore nan
+        
+        np.nan_to_num(data9,copy=False) # replace all nan in data9 with 0.
+        weight9 *= w8
+        w_center =   np.sum(weight9, axis=2) 
+        dw_center =  np.sum(data9*weight9, axis=2)
+        dw_center /= w_center # now dw_center - weighted average in the center  
+        
+        data[...,-3] = np.abs(data[...,gt_disparity]- dw_center)
+        
+#        data[...,-2] = data[...,gt_disparity]- dw_center
+        #data[...,-3] *= (data[...,-4] < 1.0) # just temporary
+        #data[...,-3] *= (data[...,gt_disparity] < 5) #just temporary
+
+        data[...,-2] =bad_num.astype(data.dtype)
+        
+        data [...,-1]= np.sum(np.nan_to_num(weight9/weight9),axis=2).astype(data.dtype)
+#        data[...,-1] = dw_center
+        labels +=["max-min","abs-center","badness","neibs"]
+        #neib = np.concatenate([gt_ext[:height,:width,:],],axis = )
+        pass
+    return data, labels        
+
+def result_npy_to_tiff(npy_path,
+                        absolute,
+                        fix_nan,
+                        insert_deltas=True,
+                        labels =      None,
+                        logfile =     None):
+    
+    """
+    @param npy_path full path to the npy file with 4-layer data (242,324,4) - nn_disparity(offset), target_disparity, gt disparity, gt strength
+           data will be written as 4-layer tiff, extension '.npy' replaced with '.tiff'
+    @param absolute - True - the first layer contains absolute disparity, False - difference from target_disparity
+    @param fix_nan - replace nan in target_disparity with 0 to apply offset, target_disparity will still contain nan
+    """
+    data,labels = result_npy_prepare(npy_path, absolute, fix_nan, insert_deltas, labels=labels)
+    tiff_path = npy_path.replace('.npy','.tiff')
+            
+    data = data.transpose(2,0,1)
+    print("Saving results to TIFF: "+tiff_path)
+    if (logfile):
+        print("Saving results to TIFF: "+tiff_path,file=logfile)
+        
+    imagej_tiffwriter.save(tiff_path,data,labels=labels)        
+
+def eval_results(rslt_path, absolute,
+                 min_disp =       -0.1, #minimal GT disparity
+                 max_disp =       20.0, # maximal GT disparity
+                 max_ofst_target = 1.0,
+                 max_ofst_result = 1.0,
+                 str_pow =         2.0,
+                 radius =          0,
+                 logfile = None):
+    variants = [[         -0.1,         5.0,              0.5,            0.5,          1.0],           
+                [         -0.1,         5.0,              0.5,            0.5,          2.0],
+                [         -0.1,         5.0,              0.2,            0.2,          1.0],
+                [         -0.1,         5.0,              0.2,            0.2,          2.0],
+                [         -0.1,        20.0,              0.5,            0.5,          1.0],           
+                [         -0.1,        20.0,              0.5,            0.5,          2.0],
+                [         -0.1,        20.0,              0.2,            0.2,          1.0],
+                [         -0.1,        20.0,              0.2,            0.2,          2.0],
+                [         -0.1,        20.0,              1.0,            1.0,          1.0],
+                [min_disp, max_disp, max_ofst_target, max_ofst_result, str_pow]]
+    
+
+    rslt = np.load(rslt_path)
+    not_nan  =  ~np.isnan(rslt[...,0])
+    not_nan &=  ~np.isnan(rslt[...,1])
+    not_nan &=  ~np.isnan(rslt[...,2])
+    not_nan &=  ~np.isnan(rslt[...,3])
+    not_nan_ext = np.zeros((rslt.shape[0] + 2*radius,rslt.shape[1] + 2 * radius),dtype=np.bool) 
+    not_nan_ext[radius:-radius,radius:-radius] = not_nan
+    for dy in range(2*radius+1):
+        for dx in range(2*radius+1):
+            not_nan_ext[dy:dy+not_nan.shape[0], dx:dx+not_nan.shape[1]] &= not_nan
+    not_nan = not_nan_ext[radius:-radius,radius:-radius]         
+        
+    if  not absolute:
+        rslt[...,0] +=  rslt[...,1]
+    nn_disparity =     np.nan_to_num(rslt[...,0], copy = False)
+    target_disparity = np.nan_to_num(rslt[...,1], copy = False)
+    gt_disparity =     np.nan_to_num(rslt[...,2], copy = False)
+    gt_strength =      np.nan_to_num(rslt[...,3], copy = False)
+    rslt = []
+    print ("--------------- %s ---------------"%(rslt_path))
+    if logfile:
+        print ("--------------- %s ---------------"%(rslt_path), file=logfile)
+
+    for min_disparity, max_disparity, max_offset_target, max_offset_result, strength_pow in variants:
+        good_tiles = not_nan.copy();
+        good_tiles &= (gt_disparity >= min_disparity)
+        good_tiles &= (gt_disparity <= max_disparity)
+        good_tiles &= (target_disparity != gt_disparity)
+        good_tiles &= (np.abs(target_disparity - gt_disparity) <= max_offset_target)
+        good_tiles &= (np.abs(target_disparity - nn_disparity) <= max_offset_result)
+        gt_w =  gt_strength * good_tiles
+        gt_w = np.power(gt_w,strength_pow)
+        sw = gt_w.sum() 
+        diff0 = target_disparity - gt_disparity
+        diff1 = nn_disparity -     gt_disparity
+        diff0_2w = gt_w*diff0*diff0
+        diff1_2w = gt_w*diff1*diff1
+        rms0 = np.sqrt(diff0_2w.sum()/sw) 
+        rms1 = np.sqrt(diff1_2w.sum()/sw)
+        print ("%7.3f<disp<%7.3f, offs_tgt<%5.2f, offs_rslt<%5.2f pwr=%05.3f, rms0=%7.4f, rms1=%7.4f (gain=%7.4f) num good tiles = %5d"%(
+            min_disparity, max_disparity, max_offset_target,  max_offset_result, strength_pow, rms0, rms1, rms0/rms1, good_tiles.sum() ))
+        if logfile:
+            print ("%7.3f<disp<%7.3f, offs_tgt<%5.2f, offs_rslt<%5.2f pwr=%05.3f, rms0=%7.4f, rms1=%7.4f (gain=%7.4f) num good tiles = %5d"%(
+                min_disparity, max_disparity, max_offset_target,  max_offset_result, strength_pow, rms0, rms1, rms0/rms1, good_tiles.sum() ),file=logfile)
+            
+        rslt.append([rms0,rms1])
+    return rslt 
+
+def concentricSquares(radius):
+    side = 2 * radius + 1
+    return [[((i // side) >= var) and
+             ((i // side) < (side - var)) and
+             ((i % side)  >= var) and
+             ((i % side)  < (side - var))  for i in range (side*side) ] for var in range(radius+1)]    
+                         
--- a/qcstereo_losses.py
+++ b/qcstereo_losses.py
+#!/usr/bin/env python3
+__copyright__ = "Copyright 2018, Elphel, Inc."
+__license__   = "GPL-3.0+"
+__email__     = "andrey@elphel.com"
+
+#from numpy import float64
+#import numpy as np
+import tensorflow as tf
+
+def smoothLoss(out_batch,                   # [batch_size,(1..2)] tf_result
+               target_disparity_batch,      # [batch_size]        tf placeholder
+               gt_ds_batch_clust,           # [batch_size,25,2]      tf placeholder
+               clip,                        # limit punishment for cutting corners (disparity pix)
+               absolute_disparity =     False, #when false there should be no activation on disparity output !
+               cluster_radius =         2):
+    with tf.name_scope("SmoothLoss"):
+        center_tile_index = 2 * cluster_radius * (cluster_radius + 1)
+        cluster_side =      2 * cluster_radius + 1
+        cluster_size = cluster_side * cluster_side
+        w_corner = 0.7
+        w8 = [w_corner,1.0,w_corner,1.0,1.0,w_corner,1.0,w_corner]
+        w8 = [w/sum(w8) for w in w8]   
+        tf_w8=tf.reshape(tf.constant(w8, dtype=tf.float32, name="w8_"), shape=[1,-1], name="w8")
+        i8 = []
+        for dy in [-1,0,1]:
+            for dx in [-1,0,1]:
+                if (dy != 0) or (dx != 0):
+                    i8.append(center_tile_index+(dy*cluster_side)+dx)
+        tf_clip = tf.constant(clip, dtype=tf.float32,                                 name = "clip")
+        tf_gt_ds_all =     tf.reshape(gt_ds_batch_clust,[-1,cluster_size,gt_ds_batch_clust.shape[1]//cluster_size], name = "gt_ds_all")            
+        tf_neibs8 =        tf.gather(tf_gt_ds_all, indices = i8, axis = 1,       name = "neibs8")
+        tf_gt_disparity8 = tf.reshape(tf_neibs8[:,:,0], [-1,8],                       name = "gt8_disparity") # (?,8)
+        tf_gt_strength8 =  tf.reshape(tf_neibs8[:,:,1], [-1,8],                       name = "gt8_strength") # (?,8)
+        tf_w =             tf.multiply(tf_gt_strength8,  tf_w8,                       name = "w")
+        tf_dw =            tf.multiply(tf_gt_disparity8, tf_w,                        name = "dw")
+        tf_sum_w =         tf.reduce_sum(tf_w,  axis = 1,                             name = "sum_w")
+        tf_sum_dw =        tf.reduce_sum(tf_dw, axis = 1,                             name = "sum_dw")
+        tf_avg_disparity = tf.divide(tf_sum_dw, tf_sum_w,                             name = "avg_disparity") # (?,)
+        tf_gt_disparity =  tf.reshape(tf_gt_ds_all[:,center_tile_index,0], [-1], name = "gt_disparity") # (?,)
+        """
+        It is good to limit tf_gt_disparityby min/max (+margin) tf.reduce_min(tf_gt_disparity8, axis=1,...) but there could be zeros caused by undefined GT for the tile
+        """
+
+        tf_gt_strength =   tf.reshape(tf_gt_ds_all[:,center_tile_index,1], [-1], name = "gt_strength") # (?,)
+        tf_d0 =            tf.abs(tf_gt_disparity - tf_avg_disparity,                 name = "tf_d0")
+        tf_d =             tf.maximum(tf_d0, 0.001,                                   name = "tf_d")
+##        tf_d2 =            tf.multiply(tf_d, tf_d,                                    name = "tf_d2")
+        
+        tf_out =           tf.reshape(out_batch[:,0],[-1],                            name = "tf_out")
+        if absolute_disparity:
+            tf_out_disparity = tf_out
+        else:
+            tf_out_disparity = tf.add(tf_out, tf.reshape(target_disparity_batch,[-1]),name = "out_disparity")
+            
+        tf_offs =          tf.subtract(tf_out_disparity, tf_avg_disparity,            name = "offs")
+        tf_offs2 =         tf.multiply(tf_offs, tf_offs,                              name = "offs2")
+        
+        tf_offs2_d =       tf.divide(tf_offs2, tf_d,                                  name = "offs2_d")
+        tf_cost0 =         tf.maximum(tf_d - tf_offs2_d, 0.0,                         name = "cost0")
+        tf_cost_nw =       tf.minimum(tf_cost0,  tf_clip,                             name = "cost_nw")
+
+##        tf_cost_nw =       tf.maximum(tf_d2 - tf_offs2, 0.0,                          name = "cost_nw")
+        tf_cost_w =        tf.multiply(tf_cost_nw, tf_gt_strength,                    name = "cost_w")
+        tf_sum_wc =        tf.reduce_sum(tf_gt_strength,                              name = "sum_wc")
+        tf_sum_costw =     tf.reduce_sum(tf_cost_w,                                   name = "sum_costw")
+        tf_cost =          tf.divide(tf_sum_costw, tf_sum_wc,                         name = "cost")
+        return tf_cost, tf_cost_nw, tf_cost_w, tf_d , tf_avg_disparity, tf_gt_disparity, tf_offs
+
+
+def batchLoss(out_batch,                   # [batch_size,(1..2)] tf_result
+              target_disparity_batch,      # [batch_size]        tf placeholder
+              gt_ds_batch,                 # [batch_size,2]      tf placeholder
+              batch_weights,               # [batch_size] now batch index % 4 - different sources, even - low variance, odd - high variance
+              disp_diff_cap =         10.0, # cap disparity difference to this value (give up on large errors)
+              disp_diff_slope=         0.0, #allow squared error to grow above disp_diff_cap
+              absolute_disparity =     False, #when false there should be no activation on disparity output ! 
+              use_confidence =         False, 
+              lambda_conf_avg =        0.01,
+##              lambda_conf_pwr =        0.1,
+              conf_pwr =               2.0,
+              gt_conf_offset =         0.08,
+              gt_conf_pwr =            1.0,
+              error2_offset =          0.0025, # 0.0, # 0.0025, # (0.05^2) ~= coring
+              disp_wmin =              1.0,    # minimal disparity to apply weight boosting for small disparities
+              disp_wmax =              8.0,    # maximal disparity to apply weight boosting for small disparities
+              use_out =                False):  # use calculated disparity for disparity weight boosting (False - use target disparity)
+               
+    with tf.name_scope("BatchLoss"):
+        """
+        Here confidence should be after relU. Disparity - may be also if absolute, but no activation if output is residual disparity
+        """
+        tf_lambda_conf_avg = tf.constant(lambda_conf_avg, dtype=tf.float32, name="tf_lambda_conf_avg")
+##        tf_lambda_conf_pwr = tf.constant(lambda_conf_pwr, dtype=tf.float32, name="tf_lambda_conf_pwr")
+##        tf_conf_pwr =        tf.constant(conf_pwr,        dtype=tf.float32, name="tf_conf_pwr")
+        tf_gt_conf_offset =  tf.constant(gt_conf_offset,  dtype=tf.float32, name="tf_gt_conf_offset")
+        tf_gt_conf_pwr =     tf.constant(gt_conf_pwr,     dtype=tf.float32, name="tf_gt_conf_pwr")
+        tf_num_tiles =       tf.shape(gt_ds_batch)[0]
+        tf_0f =              tf.constant(0.0,             dtype=tf.float32, name="tf_0f")
+        tf_1f =              tf.constant(1.0,             dtype=tf.float32, name="tf_1f")
+##        tf_maxw =            tf.constant(1.0,             dtype=tf.float32, name="tf_maxw")
+        tf_disp_diff_cap2=   tf.constant(disp_diff_cap*disp_diff_cap,  dtype=tf.float32, name="disp_diff_cap2")
+        tf_disp_diff_slope=  tf.constant(disp_diff_slope, dtype=tf.float32, name="disp_diff_slope")
+        
+        if gt_conf_pwr == 0:
+            w = tf.ones((out_batch.shape[0]), dtype=tf.float32,name="w_ones")
+        else:
+            w_slice = tf.reshape(gt_ds_batch[:,1],[-1],                     name = "w_gt_slice")
+            
+            w_sub =   tf.subtract      (w_slice, tf_gt_conf_offset,         name = "w_sub")
+            w_clip =  tf.maximum(w_sub, tf_0f,                              name = "w_clip")
+            if gt_conf_pwr == 1.0:
+                w = w_clip
+            else:
+                w=tf.pow(w_clip, tf_gt_conf_pwr, name = "w_pow")
+    
+        if use_confidence:
+            tf_num_tilesf =      tf.cast(tf_num_tiles, dtype=tf.float32,     name="tf_num_tilesf")
+            conf_slice =     tf.reshape(out_batch[:,1],[-1],                 name = "conf_slice")
+            conf_sum =       tf.reduce_sum(conf_slice,                       name = "conf_sum")
+            conf_avg =       tf.divide(conf_sum, tf_num_tilesf,              name = "conf_avg")
+            conf_avg1 =      tf.subtract(conf_avg, tf_1f,                    name = "conf_avg1")
+            conf_avg2 =      tf.square(conf_avg1,                            name = "conf_avg2")
+            cost2 =          tf.multiply (conf_avg2, tf_lambda_conf_avg,     name = "cost2")
+    
+            iconf_avg =      tf.divide(tf_1f, conf_avg,                      name = "iconf_avg")
+            nconf =          tf.multiply (conf_slice, iconf_avg,             name = "nconf") #normalized confidence
+            nconf_pwr =      tf.pow(nconf, conf_pwr,                         name = "nconf_pwr")
+            nconf_pwr_sum =  tf.reduce_sum(nconf_pwr,                        name = "nconf_pwr_sum")
+            nconf_pwr_offs = tf.subtract(nconf_pwr_sum, tf_1f,               name = "nconf_pwr_offs")
+            cost3 =          tf.multiply (conf_avg2, nconf_pwr_offs,         name = "cost3")
+            w_all =          tf.multiply (w, nconf,                          name = "w_all")
+        else:
+            w_all = w
+#            cost2 = 0.0
+#            cost3 = 0.0    
+        # normalize weights
+        w_sum =              tf.reduce_sum(w_all,                            name = "w_sum")
+        iw_sum =             tf.divide(tf_1f, w_sum,                         name = "iw_sum")
+        w_norm =             tf.multiply (w_all, iw_sum,                     name = "w_norm")
+        
+        disp_slice =         tf.reshape(out_batch[:,0],[-1],                 name = "disp_slice")
+        d_gt_slice =         tf.reshape(gt_ds_batch[:,0],[-1],               name = "d_gt_slice")
+        
+        td_flat =        tf.reshape(target_disparity_batch,[-1],         name = "td_flat")
+        if absolute_disparity:
+            adisp =          disp_slice
+        else:
+            adisp =          tf.add(disp_slice, td_flat,                     name = "adisp")
+        out_diff =           tf.subtract(adisp, d_gt_slice,                  name = "out_diff")
+            
+            
+        out_diff2 =          tf.square(out_diff,                             name = "out_diff2")
+        pre_cap0 =           tf.abs(out_diff,                                name = "pre_cap0")
+        pre_cap =            tf.multiply(pre_cap0, tf_disp_diff_slope,       name = "pre_cap")
+        diff_cap =           tf.add(pre_cap, tf_disp_diff_cap2,              name = "diff_cap")
+        out_diff2_capped =   tf.minimum(out_diff2, diff_cap,                 name = "out_diff2_capped")
+        out_wdiff2 =         tf.multiply (out_diff2_capped, w_norm,          name = "out_wdiff2")
+        
+        cost1 =              tf.reduce_sum(out_wdiff2,                       name = "cost1")
+        
+        out_diff2_offset =   tf.subtract(out_diff2, error2_offset,           name = "out_diff2_offset")
+        out_diff2_biased =   tf.maximum(out_diff2_offset, 0.0,               name = "out_diff2_biased")
+        
+        # calculate disparity-based weight boost
+        if use_out:
+            dispw =          tf.clip_by_value(adisp, disp_wmin, disp_wmax,   name = "dispw")
+        else:
+            dispw =          tf.clip_by_value(td_flat, disp_wmin, disp_wmax, name = "dispw")
+        dispw_boost =        tf.divide(disp_wmax, dispw,                     name = "dispw_boost")
+        dispw_comp =         tf.multiply (dispw_boost, w_norm,               name = "dispw_comp") #HERE??
+
+        if batch_weights.shape[0] > 1:
+            dispw_batch =        tf.multiply (dispw_comp,  batch_weights,    name = "dispw_batch")# apply weights for high/low variance and sources
+        else:
+            dispw_batch =        tf.multiply (dispw_comp,  tf_1f,            name = "dispw_batch")# apply weights for high/low variance and sources
+
+
+        dispw_sum =          tf.reduce_sum(dispw_batch,                      name = "dispw_sum")
+        idispw_sum =         tf.divide(tf_1f, dispw_sum,                     name = "idispw_sum")
+        dispw_norm =         tf.multiply (dispw_batch, idispw_sum,           name = "dispw_norm")
+        
+        out_diff2_wbiased =  tf.multiply(out_diff2_biased, dispw_norm,       name = "out_diff2_wbiased")
+#        out_diff2_wbiased =  tf.multiply(out_diff2_biased, w_norm,       name = "out_diff2_wbiased")
+        cost1b =             tf.reduce_sum(out_diff2_wbiased,                name = "cost1b")
+        
+        if use_confidence:
+            cost12 =         tf.add(cost1b, cost2,                           name = "cost12")
+            cost123 =        tf.add(cost12, cost3,                           name = "cost123")    
+            
+            return cost123, disp_slice, d_gt_slice, out_diff,out_diff2, w_norm, out_wdiff2, cost1
+        else:
+            return cost1b,  disp_slice, d_gt_slice, out_diff,out_diff2, w_norm, out_wdiff2, cost1
+        
+        
+def weightsLoss(inp_weights,
+                tile_layers,
+                tile_side,
+                wborders_zero):
+                
+                # [batch_size,(1..2)] tf_result
+#                weights_lambdas):  # single lambda or same length as inp_weights.shape[1]
+    """
+    Enforcing 'smooth' weights for the input 2d correlation tiles
+    @return mean squared difference for each weight and average of 8 neighbors divided by mean squared weights
+    """
+    weight_ortho = 1.0
+    weight_diag  = 0.7
+    sw = 4.0 * (weight_ortho + weight_diag)
+    weight_ortho /= sw
+    weight_diag /=  sw
+#    w_neib = tf.const([[weight_diag,  weight_ortho, weight_diag],
+#                       [weight_ortho, -1.0,         weight_ortho],
+#                       [weight_diag,  weight_ortho, weight_diag]])
+    #WBORDERS_ZERO
+    with tf.name_scope("WeightsLoss"):
+        # Adding 1 tile border
+#        tf_inp =     tf.reshape(inp_weights[:TILE_LAYERS * TILE_SIZE,:], [TILE_LAYERS, FILE_TILE_SIDE, FILE_TILE_SIDE, inp_weights.shape[1]], name = "tf_inp")
+        tf_inp =     tf.reshape(inp_weights[:tile_layers * tile_side * tile_side,:], [tile_layers, tile_side, tile_side, inp_weights.shape[1]], name = "tf_inp")
+        if wborders_zero:
+            tf_zero_col = tf.constant(0.0, dtype=tf.float32, shape=[tf_inp.shape[0], tf_inp.shape[1], 1,                   tf_inp.shape[3]], name = "tf_zero_col")
+            tf_zero_row = tf.constant(0.0, dtype=tf.float32, shape=[tf_inp.shape[0], 1 ,              tf_inp.shape[2] + 2, tf_inp.shape[3]], name = "tf_zero_row")
+            tf_inp_ext_h = tf.concat([tf_zero_col,                 tf_inp,       tf_zero_col                 ], axis = 2, name ="tf_inp_ext_h")
+            tf_inp_ext   = tf.concat([tf_zero_row,                 tf_inp_ext_h, tf_zero_row                 ], axis = 1, name ="tf_inp_ext")
+        else:
+            tf_inp_ext_h = tf.concat([tf_inp       [:, :,  :1, :], tf_inp,       tf_inp      [:,   :, -1:, :]], axis = 2, name ="tf_inp_ext_h")
+            tf_inp_ext   = tf.concat([tf_inp_ext_h [:, :1, :,  :], tf_inp_ext_h, tf_inp_ext_h[:, -1:,   :, :]], axis = 1, name ="tf_inp_ext")
+        
+        s_ortho = tf_inp_ext[:,1:-1,:-2,:] + tf_inp_ext[:,1:-1, 2:,:] + tf_inp_ext[:,1:-1,:-2,:] + tf_inp_ext[:,1:-1, 2:, :] 
+        s_corn =  tf_inp_ext[:, :-2,:-2,:] + tf_inp_ext[:, :-2, 2:,:] + tf_inp_ext[:,2:,  :-2,:] + tf_inp_ext[:,2:  , 2:, :]
+        w_diff =  tf.subtract(tf_inp, s_ortho * weight_ortho + s_corn * weight_diag, name="w_diff") 
+        w_diff2 = tf.multiply(w_diff, w_diff,                                        name="w_diff2") 
+        w_var =   tf.reduce_mean(w_diff2,                                            name="w_var")
+        w2_mean = tf.reduce_mean(inp_weights * inp_weights,                          name="w2_mean")
+        w_rel =   tf.divide(w_var, w2_mean,                                          name= "w_rel")
+        return w_rel # scalar, cost for weights non-smoothness in 2d
--- a/qcstereo_network.py
+++ b/qcstereo_network.py
+#!/usr/bin/env python3
+__copyright__ = "Copyright 2018, Elphel, Inc."
+__license__   = "GPL-3.0+"
+__email__     = "andrey@elphel.com"
+
+#from numpy import float64
+#import numpy as np
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+
+NN_LAYOUTS = {0:[0,   0,   0,   32,  20,  16],
+              1:[0,   0,   0,  256, 128,  64],
+              2:[0, 128,  32,   32,  32,  16],
+              3:[0,   0,  40,   32,  20,  16],
+              4:[0,   0,   0,    0,  16,  16],
+              5:[0,   0,  64,   32,  32,  16],
+              6:[0,   0,  32,   16,  16,  16],
+              7:[0,   0,  64,   16,  16,  16],
+              8:[0,   0,   0,   64,  20,  16],
+              9:[0,   0, 256,   64,  32,  16],
+             10:[0, 256, 128,   64,  32,  16],
+             11:[0,   0,   0,    0,  64,  32],
+             12:[0,   0, 256,  128,  64,  32],
+             13:[0,   0,   0,  256, 128,  32],
+              }
+
+def lrelu(x):
+    return tf.maximum(x*0.2,x)
+#    return tf.nn.relu(x) 
+
+def sym_inputs8(inp, cluster_radius = 2):
+    """
+    get input vector [?:4*9*9+1] (last being target_disparity) and reorder for horizontal flip,
+    vertical flip and transpose (8 variants, mode + 1 - hor, +2 - vert, +4 - transpose)
+    return same lengh, reordered
+    """
+    tile_side = 2 * cluster_radius + 1
+    with tf.name_scope("sym_inputs8"):
+        td =           inp[:,-1:] # tf.reshape(inp,[-1], name = "td")[-1]
+        inp_corr =     tf.reshape(inp[:,:-1],[-1,4,tile_side,tile_side], name = "inp_corr")
+        inp_corr_h =   tf.stack([-inp_corr  [:,0,:,-1::-1], inp_corr  [:,1,:,-1::-1], -inp_corr  [:,3,:,-1::-1], -inp_corr  [:,2,:,-1::-1]], axis=1, name = "inp_corr_h")
+        inp_corr_v =   tf.stack([ inp_corr  [:,0,-1::-1,:],-inp_corr  [:,1,-1::-1,:],  inp_corr  [:,3,-1::-1,:],  inp_corr  [:,2,-1::-1,:]], axis=1, name = "inp_corr_v")
+        inp_corr_hv =  tf.stack([ inp_corr_h[:,0,-1::-1,:],-inp_corr_h[:,1,-1::-1,:],  inp_corr_h[:,3,-1::-1,:],  inp_corr_h[:,2,-1::-1,:]], axis=1, name = "inp_corr_hv")
+        inp_corr_t =   tf.stack([tf.transpose(inp_corr   [:,1], perm=[0,2,1]),
+                                 tf.transpose(inp_corr   [:,0], perm=[0,2,1]),
+                                 tf.transpose(inp_corr   [:,2], perm=[0,2,1]),
+                                -tf.transpose(inp_corr   [:,3], perm=[0,2,1])], axis=1, name = "inp_corr_t")
+        inp_corr_ht =  tf.stack([tf.transpose(inp_corr_h [:,1], perm=[0,2,1]),
+                                 tf.transpose(inp_corr_h [:,0], perm=[0,2,1]),
+                                 tf.transpose(inp_corr_h [:,2], perm=[0,2,1]),
+                                -tf.transpose(inp_corr_h [:,3], perm=[0,2,1])], axis=1, name = "inp_corr_ht")
+        inp_corr_vt =  tf.stack([tf.transpose(inp_corr_v [:,1], perm=[0,2,1]),
+                                 tf.transpose(inp_corr_v [:,0], perm=[0,2,1]),
+                                 tf.transpose(inp_corr_v [:,2], perm=[0,2,1]),
+                                -tf.transpose(inp_corr_v [:,3], perm=[0,2,1])], axis=1, name = "inp_corr_vt")
+        inp_corr_hvt = tf.stack([tf.transpose(inp_corr_hv[:,1], perm=[0,2,1]),
+                                 tf.transpose(inp_corr_hv[:,0], perm=[0,2,1]),
+                                 tf.transpose(inp_corr_hv[:,2], perm=[0,2,1]),
+                                -tf.transpose(inp_corr_hv[:,3], perm=[0,2,1])], axis=1, name = "inp_corr_hvt")
+#        return td, [inp_corr, inp_corr_h, inp_corr_v, inp_corr_hv, inp_corr_t, inp_corr_ht, inp_corr_vt, inp_corr_hvt]
+        """
+        return [tf.concat([tf.reshape(inp_corr,    [inp_corr.shape[0],-1]),td], axis=1,name = "out_corr"),
+                tf.concat([tf.reshape(inp_corr_h,  [inp_corr.shape[0],-1]),td], axis=1,name = "out_corr_h"),
+                tf.concat([tf.reshape(inp_corr_v,  [inp_corr.shape[0],-1]),td], axis=1,name = "out_corr_v"),
+                tf.concat([tf.reshape(inp_corr_hv, [inp_corr.shape[0],-1]),td], axis=1,name = "out_corr_hv"),
+                tf.concat([tf.reshape(inp_corr_t,  [inp_corr.shape[0],-1]),td], axis=1,name = "out_corr_t"),
+                tf.concat([tf.reshape(inp_corr_ht, [inp_corr.shape[0],-1]),td], axis=1,name = "out_corr_ht"),
+                tf.concat([tf.reshape(inp_corr_vt, [inp_corr.shape[0],-1]),td], axis=1,name = "out_corr_vt"),
+                tf.concat([tf.reshape(inp_corr_hvt,[inp_corr.shape[0],-1]),td], axis=1,name = "out_corr_hvt")]
+        """
+        cl = 4 * tile_side * tile_side
+        return [tf.concat([tf.reshape(inp_corr,    [-1,cl]),td], axis=1,name = "out_corr"),
+                tf.concat([tf.reshape(inp_corr_h,  [-1,cl]),td], axis=1,name = "out_corr_h"),
+                tf.concat([tf.reshape(inp_corr_v,  [-1,cl]),td], axis=1,name = "out_corr_v"),
+                tf.concat([tf.reshape(inp_corr_hv, [-1,cl]),td], axis=1,name = "out_corr_hv"),
+                tf.concat([tf.reshape(inp_corr_t,  [-1,cl]),td], axis=1,name = "out_corr_t"),
+                tf.concat([tf.reshape(inp_corr_ht, [-1,cl]),td], axis=1,name = "out_corr_ht"),
+                tf.concat([tf.reshape(inp_corr_vt, [-1,cl]),td], axis=1,name = "out_corr_vt"),
+                tf.concat([tf.reshape(inp_corr_hvt,[-1,cl]),td], axis=1,name = "out_corr_hvt")]
+#                           inp_corr_h, inp_corr_v, inp_corr_hv, inp_corr_t, inp_corr_ht, inp_corr_vt, inp_corr_hvt]
+    
+
+def network_sub(input_tensor,
+                input_global,  #add to all layers (but first) if not None
+                layout,
+                reuse,
+                sym8 = False,
+                cluster_radius = 2):
+#    last_indx = None;
+    fc = []
+    inp_weights = []
+    for i, num_outs in enumerate (layout):
+        if num_outs:
+            if fc:
+                if input_global is None:
+                    inp = fc[-1]
+                else:
+                    inp = tf.concat([fc[-1], input_global], axis = 1)
+                fc.append(slim.fully_connected(inp,    num_outs, activation_fn=lrelu, scope='g_fc_sub'+str(i), reuse = reuse))
+            else:
+                inp = input_tensor
+                if sym8:
+                    inp8 = sym_inputs8(inp, cluster_radius)
+                    num_non_sum = num_outs %  len(inp8) # if number of first layer outputs is not multiple of 8
+                    num_sym8 =    num_outs // len(inp8) # number of symmetrical groups
+                    fc_sym = []
+                    for j in range (len(inp8)): # ==8
+                        reuse_this = reuse | (j > 0)
+                        scp = 'g_fc_sub'+str(i)
+                        fc_sym.append(slim.fully_connected(inp8[j],    num_sym8, activation_fn=lrelu, scope= scp,     reuse = reuse_this))
+                        if not reuse_this:
+                            with tf.compat.v1.variable_scope(scp,reuse=True) : # tf.AUTO_REUSE):
+                                inp_weights.append(tf.get_variable('weights')) # ,shape=[inp.shape[1],num_outs])) 
+                    if num_non_sum > 0:
+                        reuse_this = reuse
+                        scp = 'g_fc_sub'+str(i)+"r"
+                        fc_sym.append(slim.fully_connected(inp,     num_non_sum, activation_fn=lrelu, scope=scp, reuse = reuse_this))    
+                        if not reuse_this:
+                            with tf.compat.v1.variable_scope(scp,reuse=True) : # tf.AUTO_REUSE):
+                                inp_weights.append(tf.get_variable('weights')) # ,shape=[inp.shape[1],num_outs])) 
+                    fc.append(tf.concat(fc_sym, 1, name='sym_input_layer'))
+                else:
+                    scp = 'g_fc_sub'+str(i)
+                    fc.append(slim.fully_connected(inp,    num_outs, activation_fn=lrelu, scope= scp, reuse = reuse))
+                    if not reuse:
+                        with tf.compat.v1.variable_scope(scp, reuse=True) : # tf.AUTO_REUSE):
+                            inp_weights.append(tf.get_variable('weights')) # ,shape=[inp.shape[1],num_outs])) 
+           
+    return fc[-1], inp_weights
+
+def network_inter(input_tensor,
+                  input_global,  #add to all layers (but first) if not None
+                  layout,
+                  reuse=False,
+                  use_confidence=False):
+    #last_indx = None;
+    fc = []
+    for i, num_outs in enumerate (layout):
+        if num_outs:
+            if fc:
+                if input_global is None:
+                    inp = fc[-1]
+                else:
+                    inp = tf.concat([fc[-1], input_global], axis = 1)
+            else:
+                inp = input_tensor
+            fc.append(slim.fully_connected(inp,    num_outs, activation_fn=lrelu, scope='g_fc_inter'+str(i), reuse = reuse))
+    if use_confidence:
+        fc_out  = slim.fully_connected(fc[-1],     2, activation_fn=lrelu, scope='g_fc_inter_out', reuse = reuse)
+    else:     
+        fc_out  = slim.fully_connected(fc[-1],     1, activation_fn=None, scope='g_fc_inter_out', reuse = reuse)
+        #If using residual disparity, split last layer into 2 or remove activation and add rectifier to confidence only  
+    return fc_out
+
+def networks_siam(input_tensor, # now [?,9,325]-> [?,25,325]
+                  input_global, # add to all layers (but first) if not None
+                  layout1, 
+                  layout2,
+                  inter_convergence,
+                  sym8 =        False,
+                  only_tile =   None, # just for debugging - feed only data from the center sub-network
+                  partials =    None,
+                  use_confidence=False,
+                  cluster_radius = 2):
+                  
+    center_index = (input_tensor.shape[1] - 1) // 2 
+    with tf.name_scope("Siam_net"):
+        inp_weights = []
+        num_legs =  input_tensor.shape[1] # == 25
+        if partials is None:
+            partials = [[True] * num_legs]
+        inter_lists = [[] for _ in partials]
+        reuse = False
+        for i in range (num_legs):
+            if ((only_tile is None) or (i == only_tile)) and any([p[i] for p in partials]) :
+                if input_global is None:
+                    ig = None
+                else:
+                    ig =input_global[:,i,:]
+                ns, ns_weights = network_sub(input_tensor[:,i,:],
+                                             ig, # input_global[:,i,:],
+                                             layout= layout1,
+                                             reuse= reuse,
+                                             sym8 = sym8,
+                                             cluster_radius = cluster_radius)
+                for n, partial in enumerate(partials):
+                    if partial[i]:
+                        inter_lists[n].append(ns)
+                    else:
+                        inter_lists[n].append(tf.zeros_like(ns))
+                inp_weights += ns_weights
+                reuse = True
+        outs = []         
+        for n, _ in enumerate(partials):
+            if input_global is None:
+                ig = None
+            else:
+                ig =input_global[:,center_index,:]
+            
+            outs.append(network_inter (input_tensor = tf.concat(inter_lists[n],
+                                                 axis=1,
+                                                 name='inter_tensor'+str(n)),
+                                       input_global =   [None, ig][inter_convergence], # optionally feed all convergence values (from each tile of a cluster)
+                                       layout =         layout2,
+                                       reuse =          (n > 0),
+                                       use_confidence = use_confidence))
+        return  outs,  inp_weights 
+
+
+