Updating textures and related constants

2bfa77d0 · Andrey Filippov · 6a2b0886 · 2bfa77d0 · 2bfa77d0 · 2bfa77d0
Commit 2bfa77d0 authored Nov 19, 2022 by Andrey Filippov
Show whitespace changes
Inline Side-by-side

Showing with 18 additions and 15 deletions

TileProcessor.cuh src/TileProcessor.cuh +13 -10

test_tp.cu src/test_tp.cu +2 -2

tp_defines.h src/tp_defines.h +3 -3

No files found.
--- a/src/TileProcessor.cuh
+++ b/src/TileProcessor.cuh
@@ -2749,8 +2749,10 @@ __global__ void gen_texture_list(
 #endif // DEBUG12
 //	*(gpu_texture_indices + buf_offset) = task | ((x + y * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
 	// keep only 8 LSBs of task, use higher 24 for task number
-	*(gpu_texture_indices + buf_offset) = (task & ((1 << CORR_NTILE_SHIFT) -1)) | ((x + y * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
+//	*(gpu_texture_indices + buf_offset) = (task & ((1 << TEXT_NTILE_SHIFT) -1)) | ((x + y * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
-	//CORR_NTILE_SHIFT
+	// keep only 4 lower task bits
+	*(gpu_texture_indices + buf_offset) = (task & TASK_TEXTURE_BITS) | ((x + y * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
+	//CORR_NTILE_SHIFT // TASK_TEXTURE_BITS
 }
 //inline __device__ int get_task_size(int num_cams){
 //	return sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - num_cams);
@@ -2822,7 +2824,7 @@ extern "C" __global__ void create_nonoverlap_list(
 	}
 ///	int cxy = gpu_tasks[num_tile].txy;
 	int cxy = get_task_txy(num_tile, gpu_ftasks, num_cams);
-	int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS;
+	int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS;
 //	if (gpu_tasks[num_tile].task != 0) {
 	if (task_task != 0) {
 		nonoverlap_list[atomicAdd(pnonoverlap_length, 1)] = texture_task_code;
@@ -3401,10 +3403,10 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
 	}
 	// get number of tile
 	int tile_code = gpu_texture_indices[tile_indx + gpu_texture_indices_offset]; // Added for Java, no DP
-	if ((tile_code & (1 << CORR_TEXTURE_BIT)) == 0){
+	if ((tile_code & (1 << LIST_TEXTURE_BIT)) == 0){
 		return; // nothing to do
 	}
-	int tile_num = tile_code >> CORR_NTILE_SHIFT;
+	int tile_num = tile_code >> TEXT_NTILE_SHIFT;
 #ifdef DEBUG7A
 	__syncthreads();// __syncwarp();
 	if ((tile_num == DBG_TILE)  && (threadIdx.x == 0) && (threadIdx.y == 0)){
@@ -3784,6 +3786,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
 #endif // DEBUG12
 		int alpha_mode = alphaIndex[tile_code]; // only 4 lowest bits
 		if (alpha_mode){ // only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ???
+			// TODO: calculate per-color average and add with (1.0-alphaFade) for colors only, no
 			for (int pass = 0; pass < 8; pass ++) {
 				int row = pass * 2 + (threadIdx.y >> 1);
 				int col = ((threadIdx.y & 1) << 3) + threadIdx.x;
@@ -3843,10 +3846,6 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
 			}
 			__syncthreads();// __syncwarp();
 #endif // DEBUG12
-			///			if (!border_tile ||
-			///					((g_row >= 0) && (g_col >= 0) && (g_row < (DTT_SIZE * TILES-Y)) && (g_col < (DTT_SIZE * TILES-X)))){
-			///					((g_row >= 0) && (g_col >= 0) && (g_row < height) && (g_col < (DTT_SIZE * TILES-X)))){
 			// always copy 3 (1) colors + alpha
 			if (colors == 3){
 #pragma unroll
@@ -3871,7 +3870,11 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
 						int gi = row * DTT_SIZE2  + col;
 						float * mclt_dst_i = mclt_dst_ncam +  i;
 						for (int ncol = 0; ncol < colors; ncol++) {
-							*(mclt_dst_i + ncol * (MCLT_UNION_LEN)) *= alphaFade[alpha_mode][gi]; // reduce [tile_code] by LUT
+//							*(mclt_dst_i + ncol * (MCLT_UNION_LEN)) *= alphaFade[alpha_mode][gi]; // reduce [tile_code] by LUT
+							float a = alphaFade[alpha_mode][gi];
+							float v = a * (*(mclt_dst_i + ncol * (MCLT_UNION_LEN))) +
+									  (1 - a) * ports_rgb_shared[ncol * num_cams + ncam];
+							*(mclt_dst_i + ncol * (MCLT_UNION_LEN)) = v; // see if ports_rgb_shared[] meeds scaling
 						}
 					}
 				}

--- a/src/test_tp.cu
+++ b/src/test_tp.cu
@@ -1140,9 +1140,9 @@ int main(int argc, char **argv)
    	for (int tx = 0; tx < TILESX; tx++){
    		int nt = ty * TILESX + tx;
            float *tp = ftask_data + task_size * nt;
-    		int cm = (*(int *) tp) & TASK_TEXTURE_BITS;
+    		int cm = (*(int *) tp) & TASK_TEXTURE_BITS; // non-zero any of 4 lower task bits
    		if (cm){
-    			texture_indices[num_textures++] = (nt << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
+    			texture_indices[num_textures++] = (nt << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT); // setting  0x80 in texture indices
    		}
    	}
    }

--- a/src/tp_defines.h
+++ b/src/tp_defines.h
@@ -62,15 +62,15 @@
 #define CORR_NTILE_SHIFT               8 // higher bits - number of a pair, other bits tile number
 // only lower bit will be used to request correlations, correlation mask will be common for all the scene
 //#define CORR_PAIRS_MASK             0x3f// lower bits used to address correlation pair for the selected tile
-#define CORR_TEXTURE_BIT               7 // bit 7 used to request texture for the tile
+//#define CORR_TEXTURE_BIT               7 // bit 7 used to request texture for the tile
 #define TASK_CORR_BITS                 4
 #define TASK_TEXTURE_N_BIT             0 // Texture with North neighbor
 #define TASK_TEXTURE_E_BIT             1 // Texture with East  neighbor
 #define TASK_TEXTURE_S_BIT             2 // Texture with South neighbor
 #define TASK_TEXTURE_W_BIT             3 // Texture with West  neighbor
-//#define TASK_TEXTURE_BIT               3 // bit to request texture calculation int task field of struct tp_task
 #define LIST_TEXTURE_BIT               7 // bit to request texture calculation
-//#define CORR_OUT_RAD                 7 // full tile (15x15), was 4 (9x9)
+#define TEXT_NTILE_SHIFT               8 // tile number shift for texture calculation (will be different from CORR_NTILE_SHIFT!)
 #define FAT_ZERO_WEIGHT                0.0001 // add to port weights to avoid nan
 #define THREADS_DYNAMIC_BITS           5 // treads in block for CDP creation of the texture list