Commit 6f9c7399 authored by Andrey Filippov's avatar Andrey Filippov

More code reimplemented as methods

parent 14d7689b
......@@ -556,13 +556,13 @@ void TpHostGpu::saveClt(
if (!paths) return;
int rslt_size = (m_tpParams.tilesy * m_tpParams.tilesx * m_tpParams.num_colors * 4 * m_tpParams.dtt_size * m_tpParams.dtt_size);
float * cpu_clt = (float *)malloc(rslt_size*sizeof(float));
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) if (paths[ncam]){
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) if (paths[ncam] && m_gpu_clt_h[ncam]){
checkCudaErrors(cudaMemcpy( // segfault
cpu_clt,
m_gpu_clt_h[ncam],
rslt_size * sizeof(float),
cudaMemcpyDeviceToHost));
printf("Writing CLT data to %s\n", paths[ncam]);
printf("Writing %s to %s\n", prompt, paths[ncam]);
writeFloatsToFile(cpu_clt, // float * data, // allocated array
rslt_size, // int size, // length in elements
paths[ncam]); // const char * path) // file path
......@@ -570,28 +570,84 @@ void TpHostGpu::saveClt(
hfree(cpu_clt);
}
void TpHostGpu::saveRgb(
const char ** paths, // m_tpPaths.result_rbg_file
const char * prompt, // "RBG data"
float ** gpu_corr_images_h){
if (!paths) return;
int rslt_img_size = m_tpParams.num_colors * (m_tpParams.img_height + m_tpParams.dtt_size) * (m_tpParams.img_width + m_tpParams.dtt_size);
float * cpu_corr_image = (float *)malloc(rslt_img_size * sizeof(float));
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) if (paths[ncam] && gpu_corr_images_h[ncam]){
checkCudaErrors(cudaMemcpy2D( // segfault
cpu_corr_image,
(m_tpParams.img_width + m_tpParams.dtt_size) * sizeof(float),
gpu_corr_images_h[ncam],
dstride_rslt,
(m_tpParams.img_width + m_tpParams.dtt_size) * sizeof(float),
m_tpParams.num_colors* (m_tpParams.img_height + m_tpParams.dtt_size),
cudaMemcpyDeviceToHost));
printf("Writing %s to %s\n", prompt, paths[ncam]);
writeFloatsToFile( // will have margins
cpu_corr_image, // float * data, // allocated array
rslt_img_size, // int size, // length in elements
paths[ncam]); // const char * path) // file path
}
free(cpu_corr_image);
}
/*
// not implemented
void TpHostGpu::testImclt (int num_runs){ // 682
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_geometry_correction){
throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized ");
}
/*
*/
}
*/
void TpHostGpu::testImcltRbgAll (int num_runs){ // 701
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_geometry_correction){
throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized ");
if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized ");
}
/*
*/
if (!m_gpu_corr_images){
throw std::runtime_error("Error: m_gpu_corr_images is not initialized ");
}
StopWatchInterface *timerIMCLT = 0;
sdkCreateTimer(&timerIMCLT);
for (int i = i0; i < numIterations; i++) {
if (i == 0) {
checkCudaErrors(cudaDeviceSynchronize());
sdkResetTimer(&timerIMCLT);
sdkStartTimer(&timerIMCLT);
}
dim3 threads_imclt_all(1, 1, 1);
dim3 grid_imclt_all(1, 1, 1);
printf("threads_imclt_all=(%d, %d, %d)\n",threads_imclt_all.x,threads_imclt_all.y,threads_imclt_all.z);
printf("grid_imclt_all= (%d, %d, %d)\n",grid_imclt_all.x, grid_imclt_all.y, grid_imclt_all.z);
imclt_rbg_all<<<grid_imclt_all,threads_imclt_all>>>(
m_tpParams.num_cams, // int num_cams,
m_gpu_clt, // float ** gpu_clt, // [num_cams][TILESY][TILESX][num_colors][DTT_SIZE*DTT_SIZE]
m_gpu_corr_images, // float ** gpu_corr_images, // [num_cams][WIDTH, 3 * HEIGHT]
1, // int apply_lpf,
m_tpParams.num_colors, // int colors, // defines lpf filter
m_tpParams.tilesx, // TILESX, // int woi_twidth,
m_tpParams.tilesy, // TILESY, // int woi_theight,
dstride_rslt/sizeof(float)); // const size_t dstride); // in floats (pixels)
getLastCudaError("Kernel failure");
checkCudaErrors(cudaDeviceSynchronize());
printf("test pass: %d\n",i);
}
// TODO: *** Stop here for initial testing ***
sdkStopTimer(&timerIMCLT);
float avgTimeIMCLT = (float)sdkGetTimerValue(&timerIMCLT) / (float)numIterations;
sdkDeleteTimer(&timerIMCLT);
printf("Average imclt_rbg_all run time =%f ms\n", avgTimeIMCLT);
saveRgb(
m_tpPaths.result_rbg_file, // const char ** paths, // m_tpPaths.result_rbg_file
"RBG data", // const char * prompt, // "RBG data"
m_gpu_corr_images_h); // float ** gpu_corr_images_h){
}
void TpHostGpu::testCorrelate2DIntra(int num_runs){
......
......@@ -126,14 +126,16 @@ public:
void testReverseDistortions (int num_runs); // 468
void testGeomCorrect (int num_runs); // 534
void testConvertDirect (int num_runs); // 608
void testImclt (int num_runs); // 682
// void testImclt (int num_runs); // 682 // not implemented
void testImcltRbgAll (int num_runs); // 701
void testCorrelate2DIntra (int num_runs);
void testCorrelate2DInterSelf(int num_runs);
void saveClt(const char ** paths, const char * prompt, float ** gpu_clt_h);
void saveRgb(const char ** paths, const char * prompt, float ** gpu_corr_images_h);
// for both intra and inter!
void saveIntraCorrFile(const char * path, const char * prompt, int num_corrs, int num_corr_indices, float * gpu_corrs, int * gpu_corr_indices, int num_sel_sensors);
void saveInterCorrFile(const char * path, const char * prompt, int num_corrs, int num_corr_indices, float * gpu_corrs_td, int * gpu_corr_indices, int num_sel_sensors);
......
......@@ -678,8 +678,8 @@ int main(int argc, char **argv)
tpPaths.ports_clt_file[ncam]); // const char * path) // file path
}
#endif
#ifdef TEST_IMCLT
// test_imclt does not exist
{
// testing imclt
dim3 threads_imclt(IMCLT_THREADS_PER_TILE, IMCLT_TILES_PER_BLOCK, 1);
......@@ -700,7 +700,6 @@ int main(int argc, char **argv)
StopWatchInterface *timerIMCLT = 0;
sdkCreateTimer(&timerIMCLT);
for (int i = i0; i < numIterations; i++)
{
if (i == 0)
......@@ -732,7 +731,7 @@ int main(int argc, char **argv)
sdkStopTimer(&timerIMCLT);
float avgTimeIMCLT = (float)sdkGetTimerValue(&timerIMCLT) / (float)numIterations;
sdkDeleteTimer(&timerIMCLT);
printf("Average IMCLT run time =%f ms\n", avgTimeIMCLT);
printf("Average imclt_rbg_all run time =%f ms\n", avgTimeIMCLT);
int rslt_img_size = tpParams.num_colors * (IMG_HEIGHT + DTT_SIZE) * (IMG_WIDTH + DTT_SIZE);
float * cpu_corr_image = (float *)malloc(rslt_img_size * sizeof(float));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment