adding generation of data for macroblocks

fdc9840a · Andrey Filippov · ee71f035 · fdc9840a · fdc9840a · fdc9840a
Commit fdc9840a authored Apr 17, 2020 by Andrey Filippov
Show whitespace changes
Inline Side-by-side

Showing with 23 additions and 13 deletions

TileProcessor.cuh src/TileProcessor.cuh +11 -8

TileProcessor.h src/TileProcessor.h +3 -1

test_tp.cu src/test_tp.cu +9 -4

No files found.
--- a/src/TileProcessor.cuh
+++ b/src/TileProcessor.cuh
@@ -1196,7 +1196,8 @@ __global__ void generate_RBGA(
 			int               dust_remove,        // Do not reduce average weight when only one image differs much from the average
 			int               keep_weights,       // return channel weights after A in RGBA (was removed)
 			const size_t      texture_rbga_stride,     // in floats
-			float           * gpu_texture_tiles)  // (number of colors +1 + ?)*16*16 rgba texture tiles
+			float           * gpu_texture_tiles,  // (number of colors +1 + ?)*16*16 rgba texture tiles
+			float           * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
 {
 // TODO use atomic_add to increment	num_texture_tiles
 // TODO calculate woi
@@ -1329,7 +1330,9 @@ __global__ void generate_RBGA(
 						texture_rbga_stride,             // size_t      texture_rbg_stride, // in floats
 						gpu_texture_tiles,               // float           * gpu_texture_rbg,     // (number of colors +1 + ?)*16*16 rgba texture tiles
 			    		0,                               // size_t      texture_stride,     // in floats (now 256*4 = 1024)
-						gpu_texture_tiles); // (float *) 0 );                   // float           * gpu_texture_tiles);  // (number of colors +1 + ?)*16*16 rgba texture tiles
+						gpu_texture_tiles, //(float *)0);// float           * gpu_texture_tiles);  // (number of colors +1 + ?)*16*16 rgba texture tiles
+						gpu_diff_rgb_combo);             // float           * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
+
 				cudaDeviceSynchronize(); // not needed yet, just for testing
 				/* */
 			}
@@ -1788,8 +1791,8 @@ __global__ void textures_accumulate(
 		size_t            texture_rbg_stride, // in floats
 		float           * gpu_texture_rbg,    // (number of colors +1 + ?)*16*16 rgba texture tiles
 		size_t            texture_stride,     // in floats (now 256*4 = 1024)
-		float           * gpu_texture_tiles)  // (number of colors +1 + ?)*16*16 rgba texture tiles
-
+		float           * gpu_texture_tiles,  // (number of colors +1 + ?)*16*16 rgba texture tiles
+		float           * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
 {
 	//						(float *) gpu_geometry_correction ->pXY0,
 //	float weights[3] = {weight0, weight1, weight2};
@@ -1997,8 +2000,8 @@ __global__ void textures_accumulate(
 			(float*) shr.mclt_debayer, // float * mclt_tile,     // debayer // has gaps to align with union !
 			(float*) mclt_tiles,       // float * rbg_tile,      // if not null - original (not-debayered) rbg tile to use for the output
 			(float *) shr1.rgbaw,      // float * rgba,          // result
-			(float * ) 0,              // float * ports_rgb,     // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
-			(float * ) 0,              // float * max_diff,      // maximal (weighted) deviation of each channel from the average /null
+			(float * ) ports_rgb,      // float * ports_rgb,     // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
+			(float * ) max_diff,       // float * max_diff,      // maximal (weighted) deviation of each channel from the average /null
 			(float *) port_offsets,    // float * port_offsets,  // [port]{x_off, y_off} - just to scale pixel value differences
 			diff_sigma,                // float   diff_sigma,     // pixel value/pixel change
 			diff_threshold,            // float   diff_threshold, // pixel value/pixel change
@@ -2013,8 +2016,8 @@ __global__ void textures_accumulate(
 			(float*) shr.mclt_debayer, // float * mclt_tile,     // debayer // has gaps to align with union !
 			(float*) mclt_tiles,       // float * rbg_tile,      // if not null - original (not-debayered) rbg tile to use for the output
 			(float *) shr1.rgbaw,      // float * rgba,          // result
-			(float * ) 0,              // float * ports_rgb,     // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
-			(float * ) 0,              // float * max_diff,      // maximal (weighted) deviation of each channel from the average /null
+			(float * ) ports_rgb,      // float * ports_rgb,     // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
+			(float * ) max_diff,       // float * max_diff,      // maximal (weighted) deviation of each channel from the average /null
 			(float *) port_offsets,    // float * port_offsets,  // [port]{x_off, y_off} - just to scale pixel value differences
 			diff_sigma,                // float   diff_sigma,     // pixel value/pixel change
 			diff_threshold,            // float   diff_threshold, // pixel value/pixel change

--- a/src/TileProcessor.h
+++ b/src/TileProcessor.h
@@ -96,7 +96,9 @@ extern "C" __global__ void textures_accumulate(
 		size_t            texture_rbg_stride, // in floats
 		float           * gpu_texture_rbg,    // (number of colors +1 + ?)*16*16 rgba texture tiles
 		size_t            texture_stride,     // in floats (now 256*4 = 1024)
-		float           * gpu_texture_tiles);  // (number of colors +1 + ?)*16*16 rgba texture tiles
+		float           * gpu_texture_tiles,  // (number of colors +1 + ?)*16*16 rgba texture tiles
+		float           * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
+

 extern "C"
 __global__ void imclt_rbg_all(

--- a/src/test_tp.cu
+++ b/src/test_tp.cu
@@ -341,6 +341,7 @@ int main(int argc, char **argv)
    int              * gpu_corr_indices;

    float            * gpu_textures;
+    float            * gpu_diff_rgb_combo;
    float            * gpu_textures_rbga;
    int              * gpu_texture_indices;
    int              * gpu_woi;
@@ -587,7 +588,8 @@ int main(int argc, char **argv)
    		&dstride_textures_rbga,              // in bytes ! for one rgba/ya 16x16 tile
 			rgba_width,              // int width (floats),
 			rgba_height * rbga_slices);               // int height);
-
+//    checkCudaErrors(cudaMalloc((void **)&gpu_diff_rgb_combo,  TILESX * TILESY * NUM_CAMS * (NUM_COLS+1)* sizeof(float)));
+    checkCudaErrors(cudaMalloc((void **)&gpu_diff_rgb_combo,  TILESX * TILESY * NUM_CAMS * (NUM_COLORS + 1) * sizeof(float)));

    // Now copy arrays of per-camera pointers to GPU memory to GPU itself

@@ -1094,7 +1096,9 @@ int main(int argc, char **argv)
    			0, // const size_t      texture_rbg_stride, // in floats
    			(float *) 0, // float           * gpu_texture_rbg,     // (number of colors +1 + ?)*16*16 rgba texture tiles
 				dstride_textures/sizeof(float), // const size_t      texture_stride,     // in floats (now 256*4 = 1024)
-				gpu_textures);    // float           * gpu_texture_tiles);  // 4*16*16 rgba texture tiles
+				gpu_textures,    // float           * gpu_texture_tiles);  // 4*16*16 rgba texture tiles
+				gpu_diff_rgb_combo);             // float           * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
+
    	getLastCudaError("Kernel failure");
    	checkCudaErrors(cudaDeviceSynchronize());
    	printf("test pass: %d\n",i);
@@ -1271,7 +1275,8 @@ int main(int argc, char **argv)
 	            1,                     // int               dust_remove,        // Do not reduce average weight when only one image differes much from the average
 	            0,                     // int               keep_weights,       // return channel weights after A in RGBA
 				dstride_textures_rbga/sizeof(float), // 	const size_t      texture_rbga_stride,     // in floats
-				gpu_textures_rbga);    // 	float           * gpu_texture_tiles)    // (number of colors +1 + ?)*16*16 rgba texture tiles
+				gpu_textures_rbga,     // 	float           * gpu_texture_tiles)    // (number of colors +1 + ?)*16*16 rgba texture tiles
+				gpu_diff_rgb_combo);   // float           * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]

    	getLastCudaError("Kernel failure");
    	checkCudaErrors(cudaDeviceSynchronize());
@@ -1362,9 +1367,9 @@ int main(int argc, char **argv)
 	checkCudaErrors(cudaFree(gpu_color_weights));
 	checkCudaErrors(cudaFree(gpu_textures));
 	checkCudaErrors(cudaFree(gpu_textures_rbga));
+	checkCudaErrors(cudaFree(gpu_diff_rgb_combo));
 	checkCudaErrors(cudaFree(gpu_woi));
 	checkCudaErrors(cudaFree(gpu_num_texture_tiles));
-
 	checkCudaErrors(cudaFree(gpu_geometry_correction));
    checkCudaErrors(cudaFree(gpu_correction_vector));
    checkCudaErrors(cudaFree(gpu_rByRDist));