Commit 3610b7a6 authored by Andrey Filippov's avatar Andrey Filippov

testCorrelate2DIntraTD

parent 6f9c7399
This diff is collapsed.
......@@ -129,8 +129,11 @@ public:
// void testImclt (int num_runs); // 682 // not implemented
void testImcltRbgAll (int num_runs); // 701
void testCorrelate2DIntra (int num_runs);
void testCorrelate2DInterSelf(int num_runs);
void testCorrelate2DIntra (int num_runs); // 762 - 885
void testCorrelate2DInterSelf(int num_runs); // 1136 - 1411
void testCorrelate2DIntraTD (int num_runs, int quad_combine); // 886 - 1123
void saveClt(const char ** paths, const char * prompt, float ** gpu_clt_h);
void saveRgb(const char ** paths, const char * prompt, float ** gpu_corr_images_h);
......
......@@ -32,7 +32,7 @@
// all of the next 5 were disabled
//#define NOCORR
#define NOCORR_TD
//#define NOCORR_TD
#define NOTEXTURES //
#define NOTEXTURE_RGBA //
//#define NOTEXTURE_RGBAXXX //
......@@ -883,10 +883,8 @@ int main(int argc, char **argv)
free (corr_img);
#endif // ifndef NOCORR
#ifndef NOCORR_TD
//#define QUAD_COMBINE
// cudaProfilerStart();
// testing corr
StopWatchInterface *timerCORRTD = 0;
......@@ -903,10 +901,10 @@ int main(int argc, char **argv)
// FIXME: provide sel_pairs
correlate2D<<<1,1>>>( // output TD tiles, no normalization
tpParams.num_cams, // int num_cams,
TpParams.sel_pairs[0], // int sel_pairs0 // unused bits should be 0
TpParams.sel_pairs[1], // int sel_pairs1, // unused bits should be 0
TpParams.sel_pairs[2], // int sel_pairs2, // unused bits should be 0
TpParams.sel_pairs[3], // int sel_pairs3, // unused bits should be 0
tpParams.sel_pairs[0], // int sel_pairs0 // unused bits should be 0
tpParams.sel_pairs[1], // int sel_pairs1, // unused bits should be 0
tpParams.sel_pairs[2], // int sel_pairs2, // unused bits should be 0
tpParams.sel_pairs[3], // int sel_pairs3, // unused bits should be 0
gpu_clt, // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
tpParams.num_colors, // int colors, // number of colors (3/1)
tpParams.color_weights[0], // 0.25, // float scale0, // scale for R
......@@ -1019,20 +1017,18 @@ int main(int argc, char **argv)
// int rslt_corr_size = num_corrs * corr_size * corr_size;
// float * cpu_corr = (float *)malloc(rslt_corr_size * sizeof(float));
rslt_corr_size = num_corrs * corr_length * corr_length;
rslt_corr_size = num_corrs * tpParams.corr_length; // corr_size * corr_size;
corr_img_size = num_corr_indices * 16*16; // NAN
corr_img = (float *)malloc(corr_img_size * sizeof(float));
cpu_corr = (float *)malloc(rslt_corr_size * sizeof(float));
cpu_corr_indices = (int *) malloc(num_corr_indices * sizeof(int));
checkCudaErrors(cudaMemcpy2D(
cpu_corr,
(corr_length * corr_length) * sizeof(float),
tpParams.corr_length * sizeof(float),
gpu_corrs,
dstride_corr,
(corr_length * corr_length) * sizeof(float),
tpParams.corr_length * sizeof(float),
num_corrs,
cudaMemcpyDeviceToHost));
// checkCudaErrors (cudaMalloc((void **)&gpu_corr_indices, num_pairs * TILESX * TILESY*sizeof(int)));
......@@ -1056,13 +1052,13 @@ int main(int argc, char **argv)
int ty = ctt / TILESX;
int tx = ctt % TILESX;
// int src_offs0 = ict * tpParams.num_pairs * corr_size * corr_size;
int src_offs0 = ict * corr_length * corr_length;
int src_offs0 = ict * tpParams.corr_length;
int dst_offs0 = cpair * (num_tiles * 16 * 16) + (ty * 16 * TILESX * 16) + (tx * 16);
for (int iy = 0; iy < corr_length; iy++){
int src_offs = src_offs0 + iy * corr_length; // ict * tpParams.num_pairs * corr_size * corr_size;
for (int iy = 0; iy < tpParams.corr_size; iy++){
int src_offs = src_offs0 + iy * tpParams.corr_size; // ict * tpParams.num_pairs * corr_size * corr_size;
int dst_offs = dst_offs0 + iy * (TILESX * 16);
for (int ix = 0; ix < corr_length; ix++){
for (int ix = 0; ix < tpParams.corr_size; ix++){
corr_img[dst_offs++] = cpu_corr[src_offs++];
}
}
......@@ -1116,15 +1112,9 @@ int main(int argc, char **argv)
"clt/aux_intrascene-TD.raw"); // const char * path) // file path
#endif
free (cpu_corr_td);
#endif // if 1
// reuse image, export TD data
free (cpu_corr);
free (cpu_corr_indices);
free (corr_img);
......@@ -1135,6 +1125,9 @@ int main(int argc, char **argv)
// Testing "interframe" correlation with itself, assuming direct convert already ran
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment