refactoring

67764921 · Andrey Filippov · 3519f5ec · 67764921 · 67764921 · 67764921
Commit 67764921 authored Sep 07, 2018 by Andrey Filippov
Hide whitespace changes
Inline Side-by-side

Showing with 220 additions and 283 deletions

nn_ds_neibs17.py nn_ds_neibs17.py +70 -77

qcstereo_conf_sample.xml qcstereo_conf_sample.xml +17 -4

qcstereo_functions.py qcstereo_functions.py +133 -202

No files found.
--- a/nn_ds_neibs16.py
+++ b/nn_ds_neibs16.py
@@ -7,31 +7,17 @@ __copyright__ = "Copyright 2018, Elphel, Inc."
 __license__   = "GPL-3.0+"
 __email__     = "andrey@elphel.com"

-
-##from PIL import Image
-
+#python3 nn_ds_neibs17.py /home/eyesis/x3d_data/data_sets/conf/qcstereo_conf13.xml /home/eyesis/x3d_data/data_sets
 import os
 import sys
-##import glob
-
 import numpy as np
-##import itertools
-
 import time
-
-##import matplotlib.pyplot as plt
-
 import shutil
 from threading import Thread
-
-#import imagej_tiffwriter
-
 import qcstereo_network
 import qcstereo_losses
 import qcstereo_functions as qsf

-#import xml.etree.ElementTree as ET
-
 qsf.TIME_START = time.time()
 qsf.TIME_LAST  = qsf.TIME_START

@@ -69,11 +55,15 @@ USE_CONFIDENCE, WBORDERS_ZERO, EPOCHS_TO_RUN, FILE_UPDATE_EPOCHS = [None] * 4
 LR600,LR400,LR200,LR100,LR = [None]*5
 SHUFFLE_FILES, EPOCHS_FULL_TEST, SAVE_TIFFS = [None] * 3

+TRAIN_BUFFER_GPU, TRAIN_BUFFER_CPU = [None]*2

-
+"""
+Next gets globals from the config file
+"""
 globals().update(parameters)


+TRAIN_BUFFER_SIZE = TRAIN_BUFFER_GPU * TRAIN_BUFFER_CPU # in merged (quad) batches



@@ -111,15 +101,6 @@ NN_LAYOUTS = {0:[0,   0,   0,   32,  20,  16],
 NN_LAYOUT1 = NN_LAYOUTS[NET_ARCH1]
 NN_LAYOUT2 = NN_LAYOUTS[NET_ARCH2]
 USE_PARTIALS =      not PARTIALS_WEIGHTS is None # False - just a single Siamese net, True - partial outputs that use concentric squares of the first level subnets
-#http://stackoverflow.com/questions/287871/print-in-terminal-with-colors-using-python
-#reading to memory (testing)
-train_next = [{'file':0, 'slot':0, 'files':0, 'slots':0},
-              {'file':0, 'slot':0, 'files':0, 'slots':0}]
-
-if TWO_TRAINS:
-    train_next +=  [{'file':0, 'slot':0, 'files':0, 'slots':0},
-                    {'file':0, 'slot':0, 'files':0, 'slots':0}]
-
 ##############################################################################
 cluster_size = (2 * CLUSTER_RADIUS + 1) * (2 * CLUSTER_RADIUS + 1)
 center_tile_index = 2 * CLUSTER_RADIUS * (CLUSTER_RADIUS + 1)
@@ -141,37 +122,44 @@ qsf.evaluateAllResults(result_files = files['result'],
                       cluster_radius = CLUSTER_RADIUS)

 image_data = qsf.initImageData(
-                  files =          files,
-                  max_imgs =       MAX_IMGS_IN_MEM,
-                  cluster_radius = CLUSTER_RADIUS,
-                  width =          IMG_WIDTH,
-                  replace_nans =   True)
-
-
+                files =          files,
+                max_imgs =       MAX_IMGS_IN_MEM,
+                cluster_radius = CLUSTER_RADIUS,
+                tile_layers =    TILE_LAYERS,
+                tile_side =      TILE_SIDE,
+                width =          IMG_WIDTH,
+                replace_nans =   True)
+    
+#    return train_next, dataset_train_all, datasets_test
+corr2d_len, target_disparity_len, _ = qsf.get_lengths(CLUSTER_RADIUS, TILE_LAYERS, TILE_SIDE)
+ 
+train_next, dataset_train, datasets_test= qsf.initTrainTestData(
+        files = files,
+        cluster_radius =      CLUSTER_RADIUS,
+        buffer_size =         TRAIN_BUFFER_SIZE * BATCH_SIZE) # number of clusters per train
+##    return corr2d_len, target_disparity_len, train_next, dataset_train_merged, datasets_test

-datasets_train, datasets_test, num_train_sets= qsf.initTrainTestData(
-    files =               files,
-    cluster_radius =      CLUSTER_RADIUS,
-    max_files_per_group = MAX_FILES_PER_GROUP, # shuffling buffer for files
-    two_trains =          TWO_TRAINS,
-    train_next =          train_next)   
    
-corr2d_train_placeholder =           tf.placeholder(datasets_train[0]['corr2d'].dtype,           (None,FEATURES_PER_TILE * cluster_size)) # corr2d_train.shape)
-target_disparity_train_placeholder = tf.placeholder(datasets_train[0]['target_disparity'].dtype, (None,1 *   cluster_size))  #target_disparity_train.shape)
-gt_ds_train_placeholder =            tf.placeholder(datasets_train[0]['gt_ds'].dtype,            (None,2 *   cluster_size)) #gt_ds_train.shape)
+corr2d_train_placeholder =           tf.placeholder(dataset_train.dtype, (None,FEATURES_PER_TILE * cluster_size)) # corr2d_train.shape)
+target_disparity_train_placeholder = tf.placeholder(dataset_train.dtype, (None,1 *   cluster_size))  #target_disparity_train.shape)
+gt_ds_train_placeholder =            tf.placeholder(dataset_train.dtype, (None,2 *   cluster_size)) #gt_ds_train.shape)

 dataset_tt = tf.data.Dataset.from_tensor_slices({
-    "corr2d":corr2d_train_placeholder,
+    "corr2d":           corr2d_train_placeholder,
    "target_disparity": target_disparity_train_placeholder,
-    "gt_ds": gt_ds_train_placeholder})
+    "gt_ds":            gt_ds_train_placeholder})

 tf_batch_weights = tf.placeholder(shape=(None,), dtype=tf.float32, name = "batch_weights") # way to increase importance of the high variance clusters 
 feed_batch_weights =   np.array(BATCH_WEIGHTS*(BATCH_SIZE//len(BATCH_WEIGHTS)), dtype=np.float32)
 feed_batch_weight_1 =  np.array([1.0], dtype=np.float32) 

-dataset_train_size = len(datasets_train[0]['corr2d'])
-dataset_train_size //= BATCH_SIZE
-dataset_test_size = len(datasets_test[0]['corr2d'])
+##dataset_train_size = len(datasets_train[0]['corr2d'])
+##dataset_train_size //= BATCH_SIZE
+
+#dataset_train_size = TRAIN_BUFFER_GPU * num_train_subs # TRAIN_BUFFER_SIZE
+
+#dataset_test_size = len(datasets_test[0]['corr2d'])
+dataset_test_size = len(datasets_test[0])
 dataset_test_size //= BATCH_SIZE
 #dataset_img_size = len(datasets_img[0]['corr2d'])
 dataset_img_size = len(image_data[0]['corr2d'])
@@ -351,7 +339,7 @@ lr=                tf.placeholder(tf.float32)
 G_opt=             tf.train.AdamOptimizer(learning_rate=lr).minimize(GW_loss)


-ROOT_PATH  = './attic/nn_ds_neibs16_graph'+SUFFIX+"/"
+ROOT_PATH  = './attic/nn_ds_neibs17_graph'+SUFFIX+"/"
 TRAIN_PATH =  ROOT_PATH + 'train'
 TEST_PATH  =  ROOT_PATH + 'test'
 TEST_PATH1  = ROOT_PATH + 'test1'
@@ -364,6 +352,9 @@ shutil.rmtree(TEST_PATH1, ignore_errors=True)
 WIDTH=324
 HEIGHT=242

+num_train_subs = len(train_next) # number of (different type) merged training sets    
+dataset_train_size = TRAIN_BUFFER_GPU * num_train_subs # TRAIN_BUFFER_SIZE
+
 with tf.Session()  as sess:
    
    sess.run(tf.global_variables_initializer())
@@ -415,10 +406,9 @@ with tf.Session()  as sess:
    img_gain_test0 =  1.0
    img_gain_test9 =  1.0
    
-    num_train_variants = len(datasets_train)
    thr=None
    thr_result = None
-    trains_to_update = [train_next[n_train]['files'] > train_next[n_train]['slots'] for n_train in range(len(train_next))]
+    trains_to_update = [train_next[n_train]['more_files'] for n_train in range(len(train_next))]
    for epoch in range (EPOCHS_TO_RUN):
        """
        update files after each epoch, all 4.
@@ -436,23 +426,19 @@ with tf.Session()  as sess:
                qsf.print_time("Inserting new data", end=" ")
                for n_train in range(len(trains_to_update)):
                    if trains_to_update[n_train]:
-#                        print("n_train= %d, len(thr_result)=%d"%(n_train,len(thr_result)))
-                        qsf.replaceNextDataset(datasets_train,
-                                           thr_result[n_train],
-                                           train_next= train_next[n_train],
-                                           nset=n_train,
-                                           period=len(train_next))
-                        qsf._nextFileSlot(train_next[n_train])
+                        qsf.add_file_to_dataset(dataset = dataset_train,
+                                                new_dataset = thr_result[n_train],
+                                                train_next = train_next[n_train])
                qsf.print_time("Done")
            thr_result = []
            fpaths = []
-            for n_train in range(len(train_next)):
-                if train_next[n_train]['files'] > train_next[n_train]['slots']:
+            for n_train in range(len(trains_to_update)):
+                if trains_to_update[n_train]:
                    fpaths.append(files['train'][n_train][train_next[n_train]['file']])
                    qsf.print_time("Will read in background: "+fpaths[-1])
            thr = Thread(target=qsf.getMoreFiles, args=(fpaths,thr_result, CLUSTER_RADIUS, HOR_FLIP, TILE_LAYERS, TILE_SIDE))            
            thr.start()        
-        file_index = epoch  % num_train_variants
+        train_buf_index = epoch %   TRAIN_BUFFER_CPU # GPU memory from CPU memory (now 4)
        if   epoch >=600:
            learning_rate = LR600
        elif epoch >=400:
@@ -463,20 +449,20 @@ with tf.Session()  as sess:
            learning_rate = LR100
        else:
            learning_rate = LR
-#        print ("sr1",file=sys.stderr,end=" ")
-        if (file_index == 0) and SHUFFLE_FILES:
-            num_train_sets # num_sets = len(datasets_train_all)
+        if (train_buf_index == 0) and SHUFFLE_FILES:
            qsf.print_time("Shuffling how datasets datasets_train_lvar and datasets_train_hvar are zipped together", end="")
-            for i in range(num_train_sets):
-                qsf.shuffle_in_place (datasets_train, i, num_train_sets)
-            qsf.print_time("  Done")
-            qsf.print_time("Shuffling tile chunks ", end="")
-            qsf.shuffle_chunks_in_place (datasets_train, 1)
+            qsf.shuffle_in_place(
+                dataset_data = dataset_train, #alternating clusters from 4 sources.each cluster has all needed data (concatenated)
+                period = num_train_subs)
            qsf.print_time("  Done")
-            
-        sess.run(iterator_tt.initializer, feed_dict={corr2d_train_placeholder:           datasets_train[file_index]['corr2d'],
-                                                     target_disparity_train_placeholder: datasets_train[file_index]['target_disparity'],
-                                                     gt_ds_train_placeholder:            datasets_train[file_index]['gt_ds']})
+        sti = train_buf_index *  dataset_train_size * BATCH_SIZE #      TRAIN_BUFFER_GPU * num_train_subs
+        eti = sti+   dataset_train_size * BATCH_SIZE#    (train_buf_index +1) *  TRAIN_BUFFER_GPU * num_train_subs
+         
+        sess.run(iterator_tt.initializer, feed_dict={corr2d_train_placeholder:           dataset_train[sti:eti,:corr2d_len], 
+                                                     target_disparity_train_placeholder: dataset_train[sti:eti,corr2d_len:corr2d_len+target_disparity_len],
+                                                     gt_ds_train_placeholder:            dataset_train[sti:eti,corr2d_len+target_disparity_len:] })
+        
+        
        for i in range(dataset_train_size):
            try:
 #                train_summary,_, GW_loss_trained,  G_loss_trained,  W_loss_trained,  output, disp_slice, d_gt_slice, out_diff, out_diff2, w_norm, out_wdiff2, out_cost1, gt_variance  = sess.run(
@@ -511,7 +497,6 @@ with tf.Session()  as sess:
                               tf_img_test9:     img_gain_test9}) # previous value of *_avg #Fetch argument 0.0 has invalid type <class 'float'>, must be a string or Tensor. (Can not convert a float into a Tensor or Operation.)
                
                loss_gw_train_hist[i] = GW_loss_trained
-#                loss_g_train_hist[i] =  G_loss_trained
                for nn, gl  in enumerate(G_losses_trained):
                    loss_g_train_hists[nn][i] =  gl
                loss_s_train_hist[i] =  S_loss_trained
@@ -519,8 +504,9 @@ with tf.Session()  as sess:
                loss2_train_hist[i] = out_cost1
                gtvar_train_hist[i] = gt_variance
            except tf.errors.OutOfRangeError:
-                print("train done at step %d"%(i))
+                print("****** NO MORE DATA! train done at step %d"%(i))
                break
+#            print ("==== i=%d, GW_loss_trained=%f  loss_gw_train_hist[%d]=%f ===="%(i,GW_loss_trained,i,loss_gw_train_hist[i]))

        train_gw_avg =      np.average(loss_gw_train_hist).astype(np.float32)     
        train_g_avg =       np.average(loss_g_train_hist).astype(np.float32) 
@@ -536,9 +522,10 @@ with tf.Session()  as sess:
        tst_avg =        [0.0]*len(datasets_test)
        tst2_avg =       [0.0]*len(datasets_test)
        for ntest,dataset_test in enumerate(datasets_test):
-            sess.run(iterator_tt.initializer, feed_dict={corr2d_train_placeholder:      dataset_test['corr2d'],
-                                                    target_disparity_train_placeholder: dataset_test['target_disparity'],
-                                                    gt_ds_train_placeholder:            dataset_test['gt_ds']})
+            sess.run(iterator_tt.initializer, feed_dict={corr2d_train_placeholder:      dataset_test[:, :corr2d_len],  #['corr2d'],
+                                                    target_disparity_train_placeholder: dataset_test[:, corr2d_len:corr2d_len+target_disparity_len], # ['target_disparity'],
+                                                    gt_ds_train_placeholder:            dataset_test[:, corr2d_len+target_disparity_len:] }) # ['gt_ds']})
+            
            for i in range(dataset_test_size):
                try:
                    test_summaries[ntest], GW_loss_tested, G_losses_tested, S_loss_tested, W_loss_tested, output, disp_slice, d_gt_slice, out_diff, out_diff2, w_norm, out_wdiff2, out_cost1, gt_variance = sess.run(
@@ -597,8 +584,12 @@ with tf.Session()  as sess:
        test_writer.add_summary(test_summaries[0], epoch)
        test_writer1.add_summary(test_summaries[1], epoch)
        
-        qsf.print_time("%d:%d -> %f %f %f (%f %f %f) dbg:%f %f"%(epoch,i,train_gw_avg, tst_avg[0], tst_avg[1], train2_avg, tst2_avg[0], tst2_avg[1], gtvar_train_avg, gtvar_test_avg))
+        qsf.print_time("==== %d:%d -> %f %f %f (%f %f %f) dbg:%f %f ===="%(epoch,i,train_gw_avg, tst_avg[0], tst_avg[1], train2_avg, tst2_avg[0], tst2_avg[1], gtvar_train_avg, gtvar_test_avg))
        if (((epoch + 1) == EPOCHS_TO_RUN) or (((epoch + 1) % EPOCHS_FULL_TEST) == 0)) and (len(image_data) > 0) :
+            if (epoch + 1) == EPOCHS_TO_RUN: # last
+                print("Last epoch, removing train/test datasets to reduce memory footprint")
+                del(dataset_train)
+                del(dataset_test)             
            last_epoch = (epoch + 1) == EPOCHS_TO_RUN
            ind_img = [0]
            if last_epoch:
@@ -622,6 +613,8 @@ with tf.Session()  as sess:
                    files =          files,
                    indx =           ntest,
                    cluster_radius = CLUSTER_RADIUS,
+                    tile_layers =    TILE_LAYERS,
+                    tile_side =      TILE_SIDE,
                    width =          IMG_WIDTH,
                    replace_nans =   True)


--- a/qcstereo_conf_sample.xml
+++ b/qcstereo_conf_sample.xml
@@ -2,7 +2,7 @@
 <properties>
    <parameters>
        <EPOCHS_TO_RUN>        650 </EPOCHS_TO_RUN> <!-- 752# 3000#0 #0 -->
-        <NET_ARCH1>              0 </NET_ARCH1> <!--1-st stage network  -->
+        <NET_ARCH1>              1 </NET_ARCH1> <!--1-st stage network  -->
        <NET_ARCH2>              9 </NET_ARCH2> <!-- 2-nd stage network -->
        <SYM8_SUB>           False </SYM8_SUB>          <!-- enforce inputs from 2d correlation have symmetrical ones (groups of 8) -->
        <SPREAD_CONVERGENCE> False </SPREAD_CONVERGENCE><!-- Input target disparity to all nodes of the 1-st stage -->
@@ -22,7 +22,7 @@
        <ONLY_TILE>           None </ONLY_TILE>           <!--  (remove all but center tile data), put None here for normal operation) -->
        <CLUSTER_RADIUS>         2 </CLUSTER_RADIUS>      <!--  1 # 1 - 3x3, 2 - 5x5 tiles -->
        <SHUFFLE_FILES>       True </SHUFFLE_FILES>
-        <WLOSS_LAMBDA>         3.0 </WLOSS_LAMBDA>        <!-- fraction of the W_loss (input layers weight non-uniformity) added to G_loss -->
+        <WLOSS_LAMBDA>         0.1 </WLOSS_LAMBDA>        <!-- fraction of the W_loss (input layers weight non-uniformity) added to G_loss -->
        <SLOSS_LAMBDA>         0.1 </SLOSS_LAMBDA>        <!-- weight of loss for smooth fg/bg transitions -->
        <SLOSS_CLIP>           0.2 </SLOSS_CLIP>          <!-- limit punishment for cutting corners (disparity pix) -->
        <WBORDERS_ZERO>       True </WBORDERS_ZERO>       <!-- Border conditions for first layer weights: False - free, True - tied to 0 -->
@@ -35,7 +35,20 @@
        <BATCH_WEIGHTS> [0.9, 1.0, 0.9, 1.0]</BATCH_WEIGHTS> <!-- lvar, hvar, lvar1, hvar1 (increase importance of non-flat clusters -->
        <DISP_DIFF_CAP>       0.3  </DISP_DIFF_CAP><!-- cap disparity difference (do not increase loss above)-->
        <DISP_DIFF_SLOPE>     0.03 </DISP_DIFF_SLOPE><!-- allow squared error to grow above DISP_DIFF_CAP -->
+        <TRAIN_BUFFER_GPU>      79 </TRAIN_BUFFER_GPU> <!-- in batches merged (now quad)  batches-->
+        <TRAIN_BUFFER_CPU>       4 </TRAIN_BUFFER_CPU> <!-- in TRAIN_BUFFER_GPU-s -->
+        
    </parameters>
+    <dbg_parameters>
+        <disparity_ranges>
+            [[[0.0, 0.6,[140,230,135,60]],                                "Overlook"],
+             [[0.0, 1.0,[120,180,125,80]],[2.0, 4.0,   [50,130,125, 70]], "State Street1"],            
+             [[0.0, 1.0,[130,210,135,95]],[0.5, 2.5,   [50,150,150, 75]], "State Street2"],
+             [                            [1.0, 2.5,   [90,170, 50,  0]], "B737 near"],
+             [                            [0.75, 1.5, [125,150, 90, 70]], "B737 midrange"],
+             [                            [0.4,  0.8, [135,150,102,112]], "B737 far"]]
+        </disparity_ranges>
+    </dbg_parameters>
    <directories>
        <train_lvar>
            "tf_data_5x5_main_1"
@@ -211,8 +224,8 @@
        </test_hvar>
        
        <images>
-            ["1527256858_150165-v01", <!-- State Street -->
-             "1527257933_150165-v04", <!--  overlook -->
+            ["1527257933_150165-v04", <!--  overlook -->
+             "1527256858_150165-v01", <!--  State Street -->
             "1527256816_150165-v02", <!--  State Street -->
             "1527182802_096892-v02", <!--  plane near plane -->
             "1527182805_096892-v02", <!--  plane midrange used up to -49 plane -->

--- a/qcstereo_functions.py
+++ b/qcstereo_functions.py
@@ -83,25 +83,20 @@ def prepareFiles(dirs, files, suffix):
    for i, path in enumerate(files['images']):
        files['images'][i] =      os.path.join(dirs['images'],    path+'.tfrecords')

-def readTFRewcordsEpoch(train_filename):
+def readTFRewcordsEpoch(train_filename, cluster_radius):
    if not  '.tfrecords' in train_filename:
        train_filename += '.tfrecords'
    npy_dir_name = "npy"
    dirname = os.path.dirname(train_filename) 
    npy_dir = os.path.join(dirname, npy_dir_name)
-#    filebasename, file_extension = os.path.splitext(train_filename)
    filebasename, _ = os.path.splitext(train_filename)
    filebasename = os.path.basename(filebasename)
-    file_corr2d =           os.path.join(npy_dir,filebasename + '_corr2d.npy')
-    file_target_disparity = os.path.join(npy_dir,filebasename + '_target_disparity.npy')
-    file_gt_ds =            os.path.join(npy_dir,filebasename + '_gt_ds.npy')
-    if (os.path.exists(file_corr2d) and
-        os.path.exists(file_target_disparity) and
-        os.path.exists(file_gt_ds)):
-        corr2d=            np.load (file_corr2d)
-        target_disparity = np.load(file_target_disparity)
-        gt_ds =            np.load(file_gt_ds)
-        pass
+    file_all =     os.path.join(npy_dir,filebasename + '.npy')
+    if  os.path.exists(file_all):
+        data =             np.load (file_all)
+#        corr2d=            np.load (file_corr2d)
+#        target_disparity = np.load(file_target_disparity)
+#        gt_ds =            np.load(file_gt_ds)
    else:     
        record_iterator = tf.python_io.tf_record_iterator(path=train_filename)
        corr2d_list=[]
@@ -118,30 +113,25 @@ def readTFRewcordsEpoch(train_filename):
        target_disparity = np.array(target_disparity_list)
        gt_ds =            np.array(gt_ds_list)
        try:
-            os.makedirs(os.path.dirname(file_corr2d))
+            os.makedirs(os.path.dirname(file_all))
        except:
-            pass     
+            pass
+        if cluster_radius > 0:
+            reformat_to_clusters(corr2d, target_disparity, gt_ds, cluster_radius)
+        data = np.concatenate([corr2d, target_disparity, gt_ds],axis = 1)
+        np.save(file_all, data)
+    
+    return data

-        np.save(file_corr2d,           corr2d)
-        np.save(file_target_disparity, target_disparity)
-        np.save(file_gt_ds,            gt_ds)
-    return corr2d, target_disparity, gt_ds

 def getMoreFiles(fpaths,rslt, cluster_radius, hor_flip, tile_layers, tile_side):
    for fpath in fpaths:
-        corr2d, target_disparity, gt_ds = readTFRewcordsEpoch(fpath)
-        dataset = {"corr2d":           corr2d,
-                     "target_disparity": target_disparity,
-                     "gt_ds":            gt_ds}
-        """
-        if FILE_TILE_SIDE > TILE_SIDE:
-            reduce_tile_size([dataset],   TILE_LAYERS, TILE_SIDE)
-        """
-        reformat_to_clusters([dataset], cluster_radius)
+        dataset = readTFRewcordsEpoch(fpath, cluster_radius)
+        
        if hor_flip:
            if np.random.randint(2):
                print_time("Performing horizontal flip", end=" ")
-                flip_horizontal([dataset], cluster_radius, tile_layers, tile_side)
+                flip_horizontal(dataset, cluster_radius, tile_layers, tile_side)
                print_time("Done")
        rslt.append(dataset)

@@ -205,47 +195,53 @@ def extend_img_to_clusters(datasets_img,radius, width): #  = 324):
            rec['target_disparity'] = add_neibs(add_margins(rec['target_disparity'].reshape((height,width,-1)), radius, np.nan), radius).reshape((num_tiles,-1)) 
            rec['gt_ds'] =            add_neibs(add_margins(rec['gt_ds'].reshape((height,width,-1)), radius, np.nan), radius).reshape((num_tiles,-1))
            pass
-
-def reformat_to_clusters(datasets_data, cluster_radius):
+def reformat_to_clusters_rec(datasets_data, cluster_radius):
    cluster_size = (2 * cluster_radius + 1) * (2 * cluster_radius + 1)
 # Reformat input data
    for rec in datasets_data:
        rec['corr2d'] =           rec['corr2d'].reshape(          (rec['corr2d'].shape[0]//cluster_size,           rec['corr2d'].shape[1] * cluster_size)) 
        rec['target_disparity'] = rec['target_disparity'].reshape((rec['target_disparity'].shape[0]//cluster_size, rec['target_disparity'].shape[1] * cluster_size)) 
        rec['gt_ds'] =            rec['gt_ds'].reshape(           (rec['gt_ds'].shape[0]//cluster_size,            rec['gt_ds'].shape[1] * cluster_size))
+def reformat_to_clusters(corr2d, target_disparity, gt_ds, cluster_radius):
+    cluster_size = (2 * cluster_radius + 1) * (2 * cluster_radius + 1)
+# Reformat input data
+    corr2d.shape =           ((corr2d.shape[0]//cluster_size,           corr2d.shape[1] * cluster_size)) 
+    target_disparity.shape = ((target_disparity.shape[0]//cluster_size, target_disparity.shape[1] * cluster_size)) 
+    gt_ds.shape =            ((gt_ds.shape[0]//cluster_size,            gt_ds.shape[1] * cluster_size))

-def flip_horizontal(datasets_data, cluster_radius, tile_layers, tile_side):
+def get_lengths(cluster_radius, tile_layers, tile_side):
    cluster_side = 2 * cluster_radius + 1
-#    cluster_size = cluster_side * cluster_side
+    cl = cluster_side * cluster_side * tile_layers * tile_side * tile_side
+    tl = cluster_side * cluster_side
+    return  cl, tl,cluster_side
+
+
+def flip_horizontal(dataset, cluster_radius, tile_layers, tile_side):
+    cl,tl,cluster_side = get_lengths(cluster_radius, tile_layers, tile_side) 
+    corr2d =           dataset[:,:cl]     .reshape([dataset.shape[0],  cluster_side, cluster_side, tile_layers, tile_side, tile_side])
+    target_disparity = dataset[:,cl:cl+tl].reshape([dataset.shape[0],  cluster_side, cluster_side, -1])
+    gt_ds =            dataset[:,cl+tl:]  .reshape([dataset.shape[0],  cluster_side, cluster_side, -1])
+
    """
-TILE_LAYERS =        4
-TILE_SIDE =          9 # 7
-TILE_SIZE =         TILE_SIDE* TILE_SIDE # == 81
+    Horizontal flip of tiles
    """
-    for rec in datasets_data:
-        corr2d =           rec['corr2d'].reshape(          (rec['corr2d'].shape[0],  cluster_side, cluster_side, tile_layers, tile_side, tile_side))
-        target_disparity = rec['target_disparity'].reshape((rec['corr2d'].shape[0],  cluster_side, cluster_side, -1))
-        gt_ds =            rec['gt_ds'].reshape(           (rec['corr2d'].shape[0],  cluster_side, cluster_side, -1))
-        """
-        Horizontal flip of tiles
-        """
-        corr2d = corr2d[:,:,::-1,...]
-        target_disparity = target_disparity[:,:,::-1,...]
-        gt_ds = gt_ds[:,:,::-1,...]
-        
-        corr2d[:,:,:,0,:,:] = corr2d[:,:,:,0,::-1,:] # flip vertical layer0   (hor) 
-        corr2d[:,:,:,1,:,:] = corr2d[:,:,:,1,:,::-1]  # flip horizontal layer1 (vert)
-        corr2d_2 =            corr2d[:,:,:,3,::-1,:].copy() # flip vertical layer3   (diago)
-        corr2d[:,:,:,3,:,:] = corr2d[:,:,:,2,::-1,:] # flip vertical layer2   (diago)
-        corr2d[:,:,:,2,:,:] = corr2d_2
-        
-        
-        rec['corr2d'] =           corr2d.reshape((corr2d.shape[0],-1)) 
-        rec['target_disparity'] = target_disparity.reshape((target_disparity.shape[0],-1)) 
-        rec['gt_ds'] =            gt_ds.reshape((gt_ds.shape[0],-1))
+    corr2d = corr2d[:,:,::-1,...]
+    target_disparity = target_disparity[:,:,::-1,...]
+    gt_ds = gt_ds[:,:,::-1,...]
+    
+    corr2d[:,:,:,0,:,:] = corr2d[:,:,:,0,::-1,:] # flip vertical layer0   (hor) 
+    corr2d[:,:,:,1,:,:] = corr2d[:,:,:,1,:,::-1]  # flip horizontal layer1 (vert)
+    corr2d_2 =            corr2d[:,:,:,3,::-1,:].copy() # flip vertical layer3   (diago)
+    corr2d[:,:,:,3,:,:] = corr2d[:,:,:,2,::-1,:] # flip vertical layer2   (diago)
+    corr2d[:,:,:,2,:,:] = corr2d_2
+    """
+    pack back into a single (input)array
+    """
+    dataset[:,:cl] =      corr2d.reshape((corr2d.shape[0],-1))
+    dataset[:,cl:cl+tl] = target_disparity.reshape((target_disparity.shape[0],-1)) 
+    dataset[:,cl+tl:] =   gt_ds.reshape((gt_ds.shape[0],-1))

 def replace_nan(datasets_data): # , cluster_radius):
-#    cluster_size = (2 * cluster_radius + 1) * (2 * cluster_radius + 1)
 # Reformat input data
    for rec in datasets_data:
        if not rec is None:
@@ -263,185 +259,112 @@ def permute_to_swaps(perm):
            perm[i] = i
    return pairs        
        
-def shuffle_in_place(datasets_data, indx, period):
-    swaps = permute_to_swaps(np.random.permutation(len(datasets_data)))
-#    num_entries = datasets_data[0]['corr2d'].shape[0] // period
-    for swp in swaps:
-        ds0 = datasets_data[swp[0]]
-        ds1 = datasets_data[swp[1]]
-        tmp = ds0['corr2d'][indx::period].copy() 
-        ds0['corr2d'][indx::period] = ds1['corr2d'][indx::period]
-        ds1['corr2d'][indx::period] = tmp
-
-        tmp = ds0['target_disparity'][indx::period].copy() 
-        ds0['target_disparity'][indx::period] = ds1['target_disparity'][indx::period]
-        ds1['target_disparity'][indx::period] = tmp
-
-        tmp = ds0['gt_ds'][indx::period].copy() 
-        ds0['gt_ds'][indx::period] = ds1['gt_ds'][indx::period]
-        ds1['gt_ds'][indx::period] = tmp
-    
-def shuffle_chunks_in_place(datasets_data, tiles_groups_per_chunk):
-    """
-    Improve shuffling by preserving indices inside batches (0 <->0, ... 39 <->39 for 40 tile group batches)
-    """
-#    num_files = len(datasets_data)
-    #chunks_per_file = datasets_data[0]['target_disparity']
-#    for nf, ds in enumerate(datasets_data):
-    for ds in datasets_data:
-        groups_per_file = ds['corr2d'].shape[0]
-        chunks_per_file = groups_per_file//tiles_groups_per_chunk
-        permut = np.random.permutation(chunks_per_file)
-        ds['corr2d'] =           ds['corr2d'].          reshape((chunks_per_file,-1))[permut].reshape((groups_per_file,-1))
-        ds['target_disparity'] = ds['target_disparity'].reshape((chunks_per_file,-1))[permut].reshape((groups_per_file,-1))
-        ds['gt_ds'] =            ds['gt_ds'].           reshape((chunks_per_file,-1))[permut].reshape((groups_per_file,-1))
-
-def _setFileSlot(train_next, files, max_files_per_group):
-    train_next['files'] = files
-    train_next['slots'] = min(train_next['files'], max_files_per_group)
-     
-def _nextFileSlot(train_next):
-    train_next['file'] = (train_next['file'] + 1) % train_next['files'] 
-    train_next['slot'] = (train_next['slot'] + 1) % train_next['slots'] 

-    
-def replaceNextDataset(datasets_data, new_dataset, train_next, nset,period):
-    replaceDataset(datasets_data, new_dataset, nset, period, findx = train_next['slot'])
-#    _nextFileSlot(train_next[nset])
+def shuffle_in_place(dataset_data, #alternating clusters from 4 sources.each cluster has all needed data (concatenated)
+                      period):
+    for i in range (period):
+        np.random.shuffle(dataset_data[i::period]) 

+def add_file_to_dataset(dataset, new_dataset, train_next):
+    l = new_dataset.shape[0] * train_next['step']
+    rollover = False
+    if (train_next['entry'] + l) < (train_next['entries']+train_next['step']):
+        dataset[train_next['entry']:train_next['entry']+l:train_next['step']] = new_dataset
+    else: # split it two parts
+        rollover = True
+        l = (train_next['entries'] - train_next['entry']) // train_next['step']
+        dataset[train_next['entry']::train_next['step']] = new_dataset[:l]
        
-def replaceDataset(datasets_data, new_dataset, nset, period, findx):
-    """
-    Replace one file in the dataset
-    """
-    datasets_data[findx]['corr2d']          [nset::period] =  new_dataset['corr2d'] 
-    datasets_data[findx]['target_disparity'][nset::period] =  new_dataset['target_disparity'] 
-    datasets_data[findx]['gt_ds']           [nset::period] =  new_dataset['gt_ds'] 
-    
+        train_next['entry'] = (train_next['entry'] + l * train_next['step']) % train_next['entries']
+        
+        l1 = new_dataset.shape[0] - l # remainder
+        ln = train_next['entry'] + l1 * train_next['step']
+        dataset[train_next['entry']:ln:train_next['step']] = new_dataset[l:]
+    train_next['entry'] += new_dataset.shape[0] * train_next['step']
+    train_next['file'] = (train_next['file']+1)%train_next['files'] 
+    if (train_next['entry'] >= train_next['entries']):
+        train_next['entry'] -= train_next['entries']
+        return True
+    return rollover

-def zip_lvar_hvar(datasets_all_data, del_src = True):
-#    cluster_size = (2 * CLUSTER_RADIUS + 1) * (2 * CLUSTER_RADIUS + 1)
-# Reformat input data
-    num_sets_to_combine = len(datasets_all_data)
-    datasets_data = []
-    if num_sets_to_combine:
-        for nrec in range(len(datasets_all_data[0])):
-            recs = [[] for _ in range(num_sets_to_combine)]
-            for nset, datasets in enumerate(datasets_all_data):
-                recs[nset] = datasets[nrec]
-                
-            rec = {'corr2d':           np.empty((recs[0]['corr2d'].shape[0]*num_sets_to_combine,          recs[0]['corr2d'].shape[1]),dtype=np.float32),
-                   'target_disparity': np.empty((recs[0]['target_disparity'].shape[0]*num_sets_to_combine,recs[0]['target_disparity'].shape[1]),dtype=np.float32),
-                   'gt_ds':            np.empty((recs[0]['gt_ds'].shape[0]*num_sets_to_combine,           recs[0]['gt_ds'].shape[1]),dtype=np.float32)}
-            
-#            for nset, reci in enumerate(recs):
-            for nset, _ in enumerate(recs):
-                rec['corr2d']          [nset::num_sets_to_combine] =  recs[nset]['corr2d'] 
-                rec['target_disparity'][nset::num_sets_to_combine] =  recs[nset]['target_disparity'] 
-                rec['gt_ds']           [nset::num_sets_to_combine] =  recs[nset]['gt_ds'] 
-            if del_src:
-                for nset in range(num_sets_to_combine):
-                    datasets_all_data[nset][nrec] = None
-            datasets_data.append(rec)
-    return datasets_data    
-         
+"""
+train_next[n_train]
+Read as many files as needed, possibly repeating, until each buffer is f
+"""    

-# list of dictionaries  
-def reduce_tile_size(datasets_data, num_tile_layers, reduced_tile_side):
-    if (not datasets_data is None) and (len (datasets_data) > 0): 
-        tsz = (datasets_data[0]['corr2d'].shape[1])// num_tile_layers # 81 # list index out of range
-        tss = int(np.sqrt(tsz)+0.5)
-        offs = (tss - reduced_tile_side) // 2
-        for rec in datasets_data:
-            rec['corr2d'] =  (rec['corr2d'].reshape((-1, num_tile_layers,  tss, tss))
-                                 [..., offs:offs+reduced_tile_side, offs:offs+reduced_tile_side].
-                                 reshape(-1,num_tile_layers*reduced_tile_side*reduced_tile_side))
-            
-            
 def initTrainTestData(
        files,
        cluster_radius,
-        max_files_per_group, # shuffling buffer for files
-        two_trains,
-        train_next):
-#    datasets_train_lvar =  []
-#    datasets_train_hvar =  []
-#    datasets_train_lvar1 = []
-#    datasets_train_hvar1 = []
-    datasets_train_all = [[],[],[],[]]
+        buffer_size, # number of clusters per train
+        ):
+    """
+    Generates a single np array for training with concatenated cluster of corr2d,
+    cluster of target_disparity, and cluster of gt_ds for convenient shuffling                  
+    
+    """
+    num_trains = len(files['train'])
+    num_entries = num_trains * buffer_size  
+#    dataset_train_all = None
+    dataset_train_merged = None
+    train_next = [None]*num_trains
    for n_train, f_train in enumerate(files['train']):
-        if len(f_train) and ((n_train<2) or two_trains):
-            _setFileSlot(train_next[n_train], len(f_train), max_files_per_group)
-            for i, fpath in enumerate(f_train):
-                if i >= max_files_per_group:
-                    break
+        train_next[n_train] = {'file':0, 'entry':n_train, 'files':len(f_train), 'entries': num_entries, 'step':num_trains, 'more_files':False}
+        buffer_full = False
+        while not buffer_full:
+            for fpath in f_train:
                print_time("Importing train data "+(["low variance","high variance", "low variance1","high variance1"][n_train]) +" from "+fpath, end="")
-                corr2d, target_disparity, gt_ds = readTFRewcordsEpoch(fpath)
-                datasets_train_all[n_train].append({"corr2d":corr2d,
-                                            "target_disparity":target_disparity,
-                                            "gt_ds":gt_ds})
-                _nextFileSlot(train_next[n_train])
+                new_dataset = readTFRewcordsEpoch(fpath, cluster_radius)
+                if dataset_train_merged is None:
+                    dataset_train_merged = np.empty([num_entries,new_dataset.shape[1]], dtype =new_dataset.dtype)
+                rollover = add_file_to_dataset(
+                    dataset = dataset_train_merged,
+                    new_dataset = new_dataset,
+                    train_next = train_next[n_train])
                print_time("  Done")
+                if rollover:
+                    buffer_full = True
+                    train_next[n_train][ 'more_files'] = train_next[n_train][ 'file'] < train_next[n_train][ 'files'] # Not all files used, need to load during training
+                    break
    
    datasets_test_lvar = []
    for fpath in files['test_lvar']:
        print_time("Importing test data (low variance) from "+fpath, end="")
-        corr2d, target_disparity, gt_ds = readTFRewcordsEpoch(fpath)
-        datasets_test_lvar.append({"corr2d":corr2d,
-                                    "target_disparity":target_disparity,
-                                    "gt_ds":gt_ds})
+        new_dataset = readTFRewcordsEpoch(fpath, cluster_radius)
+        datasets_test_lvar.append(new_dataset)
        print_time("  Done")
    datasets_test_hvar = []
    for fpath in files['test_hvar']:
        print_time("Importing test data (high variance) from "+fpath, end="")
-        corr2d, target_disparity, gt_ds = readTFRewcordsEpoch(fpath)
-        datasets_test_hvar.append({"corr2d":corr2d,
-                                    "target_disparity":target_disparity,
-                                    "gt_ds":gt_ds})
+        new_dataset = readTFRewcordsEpoch(fpath, cluster_radius)
+        datasets_test_hvar.append(new_dataset)
        print_time("  Done")
        
-    # Reformat to 1/9/25 tile clusters
-    for n_train, d_train in enumerate(datasets_train_all):
-        print_time("Reshaping train data ("+(["low variance","high variance", "low variance1","high variance1"][n_train])+") ", end="")
-        reformat_to_clusters(d_train, cluster_radius)
-        print_time("  Done")
-    
-    print_time("Reshaping test data (low variance)", end="")
-    reformat_to_clusters(datasets_test_lvar, cluster_radius)
-    print_time("  Done")
-    print_time("Reshaping test data (high variance)", end="")
-    reformat_to_clusters(datasets_test_hvar, cluster_radius)
-    print_time("  Done")
-    pass    
-    
    """
    datasets_train_lvar & datasets_train_hvar ( that will increase batch size and placeholders twice
    test has to have even original, batches will not zip - just use two batches for one big one
    """
-    print_time("Zipping together datasets datasets_train_lvar and datasets_train_hvar", end="")
-    datasets_train = zip_lvar_hvar(datasets_train_all, del_src = True) # no shuffle, delete src
-    print_time("  Done")
-    
    datasets_test = []
    for dataset_test_lvar in datasets_test_lvar:
        datasets_test.append(dataset_test_lvar)
    for dataset_test_hvar in datasets_test_hvar:
        datasets_test.append(dataset_test_hvar)
    
-    return datasets_train, datasets_test, len(datasets_train_all) # 4
-    
-                
-                
-                
-    
+    return train_next, dataset_train_merged, datasets_test
+        
 def readImageData(image_data,
                  files,
                  indx,
                  cluster_radius,
+                  tile_layers,
+                  tile_side,
                  width,
                  replace_nans):
+    cl,tl,_ = get_lengths(0, tile_layers, tile_side) 
    if image_data[indx] is None:
-        corr2d, target_disparity, gt_ds = readTFRewcordsEpoch(files['images'][indx])
+        dataset = readTFRewcordsEpoch(files['images'][indx], cluster_radius = 0)
+        corr2d =           dataset[:,:cl]
+        target_disparity = dataset[:,cl:cl+tl]
+        gt_ds =            dataset[:,cl+tl:]
        image_data[indx] = {
            'corr2d':           corr2d,
            'target_disparity': target_disparity,
@@ -453,23 +376,31 @@ def readImageData(image_data,
             cluster_radius,
             width)
        if replace_nans:
-#            replace_nan([image_data[indx]], cluster_radius)
            replace_nan([image_data[indx]])
            
    return image_data[indx]
-               
+
 def initImageData(files,
                  max_imgs,
                  cluster_radius,
+                  tile_layers,
+                  tile_side,
                  width,
                  replace_nans):
    num_imgs = len(files['images'])
    img_data = [None] * num_imgs
    for nfile in range(min(num_imgs, max_imgs)):
        print_time("Importing test image data from "+ files['images'][nfile], end="")
-        readImageData(img_data,files, nfile, cluster_radius, width, replace_nans)
+        readImageData(img_data,
+                      files,
+                      nfile,
+                      cluster_radius,
+                      tile_layers,
+                      tile_side,
+                      width,
+                      replace_nans)
        print_time("  Done")
-        return img_data    
+        return img_data
            
 def evaluateAllResults(result_files, absolute_disparity, cluster_radius):
    for result_file in result_files: