More code reimplemented as methods

6f9c7399 · Andrey Filippov · 14d7689b · 6f9c7399 · 6f9c7399 · 6f9c7399
Commit 6f9c7399 authored Apr 09, 2025 by Andrey Filippov
Hide whitespace changes
Inline Side-by-side

Showing with 74 additions and 17 deletions

TpHostGpu.cu src/TpHostGpu.cu +68 -12

TpHostGpu.h src/TpHostGpu.h +4 -2

test_tp.cu src/test_tp.cu +2 -3

No files found.
--- a/src/TpHostGpu.cu
+++ b/src/TpHostGpu.cu
@@ -556,13 +556,13 @@ void TpHostGpu::saveClt(
 	if (!paths)	return;
    int rslt_size = (m_tpParams.tilesy * m_tpParams.tilesx * m_tpParams.num_colors * 4 * m_tpParams.dtt_size * m_tpParams.dtt_size);
    float * cpu_clt = (float *)malloc(rslt_size*sizeof(float));
-    for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) if (paths[ncam]){
+    for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) if (paths[ncam] && m_gpu_clt_h[ncam]){
    	checkCudaErrors(cudaMemcpy( // segfault
    			cpu_clt,
 				m_gpu_clt_h[ncam],
 				rslt_size * sizeof(float),
    			cudaMemcpyDeviceToHost));
-        printf("Writing CLT data to %s\n",  paths[ncam]);
+        printf("Writing %s to %s\n",  prompt, paths[ncam]);
    	writeFloatsToFile(cpu_clt, // float *       data, // allocated array
    			rslt_size, // int           size, // length in elements
 				paths[ncam]); // 			   const char *  path) // file path
@@ -570,28 +570,84 @@ void TpHostGpu::saveClt(
    hfree(cpu_clt);
 }

+void TpHostGpu::saveRgb(
+		const char ** paths,  // m_tpPaths.result_rbg_file
+		const char * prompt,  // "RBG data"
+		float **     gpu_corr_images_h){
+	if (!paths)	return;
+	int rslt_img_size =       m_tpParams.num_colors * (m_tpParams.img_height + m_tpParams.dtt_size) * (m_tpParams.img_width + m_tpParams.dtt_size);
+	float * cpu_corr_image = (float *)malloc(rslt_img_size * sizeof(float));
+    for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) if (paths[ncam] && gpu_corr_images_h[ncam]){
+    	checkCudaErrors(cudaMemcpy2D( // segfault
+    			cpu_corr_image,
+				(m_tpParams.img_width + m_tpParams.dtt_size) * sizeof(float),
+				gpu_corr_images_h[ncam],
+				dstride_rslt,
+				(m_tpParams.img_width + m_tpParams.dtt_size) * sizeof(float),
+				m_tpParams.num_colors* (m_tpParams.img_height + m_tpParams.dtt_size),
+    			cudaMemcpyDeviceToHost));
+        printf("Writing %s to %s\n",  prompt, paths[ncam]);
+    	writeFloatsToFile( // will have margins
+    			cpu_corr_image, // float *       data, // allocated array
+				rslt_img_size,  // int           size, // length in elements
+				paths[ncam]);   // 			   const char *  path) // file path
+    }
+    free(cpu_corr_image);
+}

-
+/*
+// not implemented
 void TpHostGpu::testImclt               (int num_runs){ // 682
 	int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
 	int i0 =            m_tpParams.debug_tile ? 0 : -1;
 	if (!m_gpu_geometry_correction){
 		throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized ");
 	}
-	/*
-
-	 */
 }
-
+*/
 void TpHostGpu::testImcltRbgAll         (int num_runs){ // 701
 	int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
 	int i0 =            m_tpParams.debug_tile ? 0 : -1;
-	if (!m_gpu_geometry_correction){
-		throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized ");
+	if (!m_gpu_clt){
+		throw std::runtime_error("Error: m_gpu_clt is not initialized ");
 	}
-	/*
-
-	 */
+	if (!m_gpu_corr_images){
+		throw std::runtime_error("Error: m_gpu_corr_images is not initialized ");
+	}
+    StopWatchInterface *timerIMCLT = 0;
+    sdkCreateTimer(&timerIMCLT);
+    for (int i = i0; i < numIterations; i++) {
+    	if (i == 0) {
+    		checkCudaErrors(cudaDeviceSynchronize());
+    		sdkResetTimer(&timerIMCLT);
+    		sdkStartTimer(&timerIMCLT);
+    	}
+        dim3 threads_imclt_all(1, 1, 1);
+		dim3 grid_imclt_all(1, 1, 1);
+        printf("threads_imclt_all=(%d, %d, %d)\n",threads_imclt_all.x,threads_imclt_all.y,threads_imclt_all.z);
+        printf("grid_imclt_all=   (%d, %d, %d)\n",grid_imclt_all.x,   grid_imclt_all.y,   grid_imclt_all.z);
+        imclt_rbg_all<<<grid_imclt_all,threads_imclt_all>>>(
+        		m_tpParams.num_cams,                    // int                num_cams,
+        		m_gpu_clt,                     // float           ** gpu_clt,            // [num_cams][TILESY][TILESX][num_colors][DTT_SIZE*DTT_SIZE]
+				m_gpu_corr_images,             // float           ** gpu_corr_images,    // [num_cams][WIDTH, 3 * HEIGHT]
+				1,                           // int               apply_lpf,
+				m_tpParams.num_colors,                  // int               colors,               // defines lpf filter
+				m_tpParams.tilesx,      // TILESX,                      // int               woi_twidth,
+				m_tpParams.tilesy,      // TILESY,                      // int               woi_theight,
+				dstride_rslt/sizeof(float)); // const size_t      dstride);            // in floats (pixels)
+    	getLastCudaError("Kernel failure");
+    	checkCudaErrors(cudaDeviceSynchronize());
+    	printf("test pass: %d\n",i);
+    }
+    // TODO: *** Stop here for initial testing ***
+    sdkStopTimer(&timerIMCLT);
+    float avgTimeIMCLT = (float)sdkGetTimerValue(&timerIMCLT) / (float)numIterations;
+    sdkDeleteTimer(&timerIMCLT);
+    printf("Average imclt_rbg_all run time =%f ms\n",  avgTimeIMCLT);
+    saveRgb(
+    		m_tpPaths.result_rbg_file, // const char ** paths,  // m_tpPaths.result_rbg_file
+    		"RBG data",                // const char * prompt,  // "RBG data"
+    		m_gpu_corr_images_h); // float **     gpu_corr_images_h){
 }

 void TpHostGpu::testCorrelate2DIntra(int num_runs){

--- a/src/TpHostGpu.h
+++ b/src/TpHostGpu.h
@@ -126,14 +126,16 @@ public:
 	void testReverseDistortions  (int num_runs); // 468
 	void testGeomCorrect         (int num_runs); // 534
 	void testConvertDirect       (int num_runs); // 608
-	void testImclt               (int num_runs); // 682
+//	void testImclt               (int num_runs); // 682 // not implemented
 	void testImcltRbgAll         (int num_runs); // 701

-
 	void testCorrelate2DIntra    (int num_runs);
 	void testCorrelate2DInterSelf(int num_runs);

 	void saveClt(const char ** paths,  const char * prompt, float **     gpu_clt_h);
+	void saveRgb(const char ** paths,  const char * prompt, float **     gpu_corr_images_h);
+
+
 	// for both intra and inter!
 	void saveIntraCorrFile(const char * path, const char * prompt, int num_corrs, int num_corr_indices, float * gpu_corrs, int * gpu_corr_indices, int num_sel_sensors);
 	void saveInterCorrFile(const char * path, const char * prompt, int num_corrs, int num_corr_indices, float  * gpu_corrs_td,  int * gpu_corr_indices, int num_sel_sensors);

--- a/src/test_tp.cu
+++ b/src/test_tp.cu
@@ -678,8 +678,8 @@ int main(int argc, char **argv)
 				tpPaths.ports_clt_file[ncam]); // 			   const char *  path) // file path
    }
 #endif
-
 #ifdef TEST_IMCLT
+    // test_imclt does not exist
     {
    	// testing imclt
    	dim3 threads_imclt(IMCLT_THREADS_PER_TILE, IMCLT_TILES_PER_BLOCK, 1);
@@ -700,7 +700,6 @@ int main(int argc, char **argv)

    StopWatchInterface *timerIMCLT = 0;
    sdkCreateTimer(&timerIMCLT);
-
    for (int i = i0; i < numIterations; i++)
    {
    	if (i == 0)
@@ -732,7 +731,7 @@ int main(int argc, char **argv)
    sdkStopTimer(&timerIMCLT);
    float avgTimeIMCLT = (float)sdkGetTimerValue(&timerIMCLT) / (float)numIterations;
    sdkDeleteTimer(&timerIMCLT);
-    printf("Average IMCLT run time =%f ms\n",  avgTimeIMCLT);
+    printf("Average imclt_rbg_all run time =%f ms\n",  avgTimeIMCLT);

    int rslt_img_size =       tpParams.num_colors * (IMG_HEIGHT + DTT_SIZE) * (IMG_WIDTH + DTT_SIZE);
    float * cpu_corr_image = (float *)malloc(rslt_img_size * sizeof(float));