Commit 3610b7a6 authored by Andrey Filippov's avatar Andrey Filippov

testCorrelate2DIntraTD

parent 6f9c7399
This diff is collapsed.
...@@ -129,8 +129,11 @@ public: ...@@ -129,8 +129,11 @@ public:
// void testImclt (int num_runs); // 682 // not implemented // void testImclt (int num_runs); // 682 // not implemented
void testImcltRbgAll (int num_runs); // 701 void testImcltRbgAll (int num_runs); // 701
void testCorrelate2DIntra (int num_runs); void testCorrelate2DIntra (int num_runs); // 762 - 885
void testCorrelate2DInterSelf(int num_runs); void testCorrelate2DInterSelf(int num_runs); // 1136 - 1411
void testCorrelate2DIntraTD (int num_runs, int quad_combine); // 886 - 1123
void saveClt(const char ** paths, const char * prompt, float ** gpu_clt_h); void saveClt(const char ** paths, const char * prompt, float ** gpu_clt_h);
void saveRgb(const char ** paths, const char * prompt, float ** gpu_corr_images_h); void saveRgb(const char ** paths, const char * prompt, float ** gpu_corr_images_h);
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
// all of the next 5 were disabled // all of the next 5 were disabled
//#define NOCORR //#define NOCORR
#define NOCORR_TD //#define NOCORR_TD
#define NOTEXTURES // #define NOTEXTURES //
#define NOTEXTURE_RGBA // #define NOTEXTURE_RGBA //
//#define NOTEXTURE_RGBAXXX // //#define NOTEXTURE_RGBAXXX //
...@@ -883,10 +883,8 @@ int main(int argc, char **argv) ...@@ -883,10 +883,8 @@ int main(int argc, char **argv)
free (corr_img); free (corr_img);
#endif // ifndef NOCORR #endif // ifndef NOCORR
#ifndef NOCORR_TD #ifndef NOCORR_TD
//#define QUAD_COMBINE
// cudaProfilerStart(); // cudaProfilerStart();
// testing corr // testing corr
StopWatchInterface *timerCORRTD = 0; StopWatchInterface *timerCORRTD = 0;
...@@ -903,10 +901,10 @@ int main(int argc, char **argv) ...@@ -903,10 +901,10 @@ int main(int argc, char **argv)
// FIXME: provide sel_pairs // FIXME: provide sel_pairs
correlate2D<<<1,1>>>( // output TD tiles, no normalization correlate2D<<<1,1>>>( // output TD tiles, no normalization
tpParams.num_cams, // int num_cams, tpParams.num_cams, // int num_cams,
TpParams.sel_pairs[0], // int sel_pairs0 // unused bits should be 0 tpParams.sel_pairs[0], // int sel_pairs0 // unused bits should be 0
TpParams.sel_pairs[1], // int sel_pairs1, // unused bits should be 0 tpParams.sel_pairs[1], // int sel_pairs1, // unused bits should be 0
TpParams.sel_pairs[2], // int sel_pairs2, // unused bits should be 0 tpParams.sel_pairs[2], // int sel_pairs2, // unused bits should be 0
TpParams.sel_pairs[3], // int sel_pairs3, // unused bits should be 0 tpParams.sel_pairs[3], // int sel_pairs3, // unused bits should be 0
gpu_clt, // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] gpu_clt, // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
tpParams.num_colors, // int colors, // number of colors (3/1) tpParams.num_colors, // int colors, // number of colors (3/1)
tpParams.color_weights[0], // 0.25, // float scale0, // scale for R tpParams.color_weights[0], // 0.25, // float scale0, // scale for R
...@@ -1019,20 +1017,18 @@ int main(int argc, char **argv) ...@@ -1019,20 +1017,18 @@ int main(int argc, char **argv)
// int rslt_corr_size = num_corrs * corr_size * corr_size; // int rslt_corr_size = num_corrs * corr_size * corr_size;
// float * cpu_corr = (float *)malloc(rslt_corr_size * sizeof(float)); // float * cpu_corr = (float *)malloc(rslt_corr_size * sizeof(float));
rslt_corr_size = num_corrs * corr_length * corr_length; rslt_corr_size = num_corrs * tpParams.corr_length; // corr_size * corr_size;
corr_img_size = num_corr_indices * 16*16; // NAN corr_img_size = num_corr_indices * 16*16; // NAN
corr_img = (float *)malloc(corr_img_size * sizeof(float)); corr_img = (float *)malloc(corr_img_size * sizeof(float));
cpu_corr = (float *)malloc(rslt_corr_size * sizeof(float)); cpu_corr = (float *)malloc(rslt_corr_size * sizeof(float));
cpu_corr_indices = (int *) malloc(num_corr_indices * sizeof(int)); cpu_corr_indices = (int *) malloc(num_corr_indices * sizeof(int));
checkCudaErrors(cudaMemcpy2D( checkCudaErrors(cudaMemcpy2D(
cpu_corr, cpu_corr,
(corr_length * corr_length) * sizeof(float), tpParams.corr_length * sizeof(float),
gpu_corrs, gpu_corrs,
dstride_corr, dstride_corr,
(corr_length * corr_length) * sizeof(float), tpParams.corr_length * sizeof(float),
num_corrs, num_corrs,
cudaMemcpyDeviceToHost)); cudaMemcpyDeviceToHost));
// checkCudaErrors (cudaMalloc((void **)&gpu_corr_indices, num_pairs * TILESX * TILESY*sizeof(int))); // checkCudaErrors (cudaMalloc((void **)&gpu_corr_indices, num_pairs * TILESX * TILESY*sizeof(int)));
...@@ -1056,13 +1052,13 @@ int main(int argc, char **argv) ...@@ -1056,13 +1052,13 @@ int main(int argc, char **argv)
int ty = ctt / TILESX; int ty = ctt / TILESX;
int tx = ctt % TILESX; int tx = ctt % TILESX;
// int src_offs0 = ict * tpParams.num_pairs * corr_size * corr_size; // int src_offs0 = ict * tpParams.num_pairs * corr_size * corr_size;
int src_offs0 = ict * corr_length * corr_length; int src_offs0 = ict * tpParams.corr_length;
int dst_offs0 = cpair * (num_tiles * 16 * 16) + (ty * 16 * TILESX * 16) + (tx * 16); int dst_offs0 = cpair * (num_tiles * 16 * 16) + (ty * 16 * TILESX * 16) + (tx * 16);
for (int iy = 0; iy < corr_length; iy++){ for (int iy = 0; iy < tpParams.corr_size; iy++){
int src_offs = src_offs0 + iy * corr_length; // ict * tpParams.num_pairs * corr_size * corr_size; int src_offs = src_offs0 + iy * tpParams.corr_size; // ict * tpParams.num_pairs * corr_size * corr_size;
int dst_offs = dst_offs0 + iy * (TILESX * 16); int dst_offs = dst_offs0 + iy * (TILESX * 16);
for (int ix = 0; ix < corr_length; ix++){ for (int ix = 0; ix < tpParams.corr_size; ix++){
corr_img[dst_offs++] = cpu_corr[src_offs++]; corr_img[dst_offs++] = cpu_corr[src_offs++];
} }
} }
...@@ -1116,15 +1112,9 @@ int main(int argc, char **argv) ...@@ -1116,15 +1112,9 @@ int main(int argc, char **argv)
"clt/aux_intrascene-TD.raw"); // const char * path) // file path "clt/aux_intrascene-TD.raw"); // const char * path) // file path
#endif #endif
free (cpu_corr_td); free (cpu_corr_td);
#endif // if 1 #endif // if 1
// reuse image, export TD data // reuse image, export TD data
free (cpu_corr); free (cpu_corr);
free (cpu_corr_indices); free (cpu_corr_indices);
free (corr_img); free (corr_img);
...@@ -1135,6 +1125,9 @@ int main(int argc, char **argv) ...@@ -1135,6 +1125,9 @@ int main(int argc, char **argv)
// Testing "interframe" correlation with itself, assuming direct convert already ran // Testing "interframe" correlation with itself, assuming direct convert already ran
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment