Commit 32ed475a authored by Andrey Filippov's avatar Andrey Filippov

testing refactored

parent b469db50
...@@ -16,6 +16,12 @@ ...@@ -16,6 +16,12 @@
#include "GenerateRgbaHost.h" #include "GenerateRgbaHost.h"
#include "TpHostGpu.h" #include "TpHostGpu.h"
#define MY_EXCEPTION(aMessage) \
{ \
std::ostringstream stream; \
stream << "ERROR in " << __func__ << ": "<< aMessage << ", file " <<__FILE__ << " line " << __LINE__; \
throw std::runtime_error(stream.str()); \
}
TpHostGpu::~TpHostGpu(){ TpHostGpu::~TpHostGpu(){
hfree(m_host_kern_buf); hfree(m_host_kern_buf);
...@@ -31,18 +37,18 @@ TpHostGpu::~TpHostGpu(){ ...@@ -31,18 +37,18 @@ TpHostGpu::~TpHostGpu(){
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) { for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
// Free used host memory, initialized as arrays of pointers // Free used host memory, initialized as arrays of pointers
hfree(m_gpu_kernels_h[ncam]); gfree(m_gpu_kernels_h[ncam]);
hfree(m_gpu_kernel_offsets_h[ncam]); gfree(m_gpu_kernel_offsets_h[ncam]);
hfree(m_gpu_images_h[ncam]); gfree(m_gpu_images_h[ncam]);
hfree(m_gpu_clt_h[ncam]); gfree(m_gpu_clt_h[ncam]);
hfree(m_gpu_corr_images_h[ncam]); gfree(m_gpu_corr_images_h[ncam]);
// Free used GPU memory initialized as ** // Free used GPU memory initialized as **
if (m_gpu_kernels) gfree(m_gpu_kernels[ncam]);
if (m_gpu_kernel_offsets) gfree(m_gpu_kernel_offsets[ncam]);
if (m_gpu_images) gfree(m_gpu_images[ncam]);
if (m_gpu_clt) gfree(m_gpu_clt[ncam]);
if (m_gpu_corr_images) gfree(m_gpu_corr_images[ncam]);
} }
if (m_gpu_kernels) gfree(m_gpu_kernels);
if (m_gpu_kernel_offsets) gfree(m_gpu_kernel_offsets);
if (m_gpu_images) gfree(m_gpu_images);
if (m_gpu_clt) gfree(m_gpu_clt);
if (m_gpu_corr_images) gfree(m_gpu_corr_images);
gfree(m_gpu_corrs); gfree(m_gpu_corrs);
gfree(m_gpu_corrs_td); gfree(m_gpu_corrs_td);
gfree(m_gpu_corrs_combo); gfree(m_gpu_corrs_combo);
...@@ -66,7 +72,74 @@ TpHostGpu::~TpHostGpu(){ ...@@ -66,7 +72,74 @@ TpHostGpu::~TpHostGpu(){
return; return;
}; };
void TpHostGpu::setImageKernels(){
void TpHostGpu::allTests(
int num_runs,
int image_dx,
int image_dy,
const float target_disparity,
const float scale,
int quad_combine,
int use_dp,
int debug){
int is_bayer = m_tpParams.num_colors > 1;
// Prepere all but setImgBuffersShifted() // Is it important?
setImageKernels(); // 233-258 (overlap)
setCltBuffers(); // 246
setCorrImgBuffers(); // 252
setImgBuffers(); // 283-292
//setImgBuffersShifted(int is_bayer, int image_dx, int image_dy); // 1171-1188
setGeometryCorrectionBuffers(); // 207-231
setCorrelationBuffers(); // 260-281 , 332-333
setTasks (target_disparity,scale); // 129, 302-325
setTextures(); // 337-348, ??
setRGBA(); // 377 - 390
testRotMatrices (num_runs); // 420
testReverseDistortions (num_runs); // 468
testGeomCorrect (num_runs); // 534
testConvertDirect (num_runs); // 608
testImcltRbgAll (num_runs); // 701
testCorrelate2DIntra (num_runs); // 762 - 885
testCorrelate2DIntraTD (num_runs, quad_combine); // 886 - 1123
setImgBuffersShifted(is_bayer, image_dx, image_dy); // 1171-1188
testCorrelate2DInterSelf(num_runs); // 1136 - 1411
testTextures (num_runs, use_dp, debug); // 1422-1664
testTexturesRGBA (num_runs, use_dp, debug); // 1669-1810
return;
/*
void setImageKernels(); // 233-258 (overlap)
void setCltBuffers(); // 246
void setCorrImgBuffers(); // 252
void setImgBuffers(); // 283-292
void setImgBuffersShifted(int is_bayer, int image_dx, int image_dy); // 1171-1188
void setGeometryCorrectionBuffers(); // 207-231
void setCorrelationBuffers(); // 260-281 , 332-333
void setTasks(const float target_disparity, const float scale); // 129, 302-325
void setTextures(); // 337-348, ??
void setRGBA(); // 377 - 390
trot_deriv testRotMatrices (int num_runs); // 420
void testReverseDistortions (int num_runs); // 468
void testGeomCorrect (int num_runs); // 534
void testConvertDirect (int num_runs); // 608
// void testImclt (int num_runs); // 682 // not implemented
void testImcltRbgAll (int num_runs); // 701
void testCorrelate2DIntra (int num_runs); // 762 - 885
void testCorrelate2DIntraTD (int num_runs, int quad_combine); // 886 - 1123
//void setImgBuffersShifted(int is_bayer, int image_dx, int image_dy); // 1171-1188
void testCorrelate2DInterSelf(int num_runs); // 1136 - 1411
void testTextures (int num_runs, int use_dp, int debug); // 1422-1664
void testTexturesRGBA (int num_runs, int use_dp, int debug); // 1669-1810
*/
}
void TpHostGpu::setImageKernels(){ // 233-258 (overlap)
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) { for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
readFloatsFromFile( readFloatsFromFile(
m_host_kern_buf, // float * data, // allocated array m_host_kern_buf, // float * data, // allocated array
...@@ -82,7 +155,7 @@ void TpHostGpu::setImageKernels(){ ...@@ -82,7 +155,7 @@ void TpHostGpu::setImageKernels(){
m_gpu_kernels = copyalloc_pointers_gpu (m_gpu_kernels_h, m_tpParams.num_cams); // NUM_CAMS); m_gpu_kernels = copyalloc_pointers_gpu (m_gpu_kernels_h, m_tpParams.num_cams); // NUM_CAMS);
m_gpu_kernel_offsets = (struct CltExtra **) copyalloc_pointers_gpu ((float **) m_gpu_kernel_offsets_h, m_tpParams.num_cams); // NUM_CAMS); m_gpu_kernel_offsets = (struct CltExtra **) copyalloc_pointers_gpu ((float **) m_gpu_kernel_offsets_h, m_tpParams.num_cams); // NUM_CAMS);
} }
void TpHostGpu::setCltBuffers(){ void TpHostGpu::setCltBuffers(){ // 246
const int slice_size {m_tpParams.tilesy * m_tpParams.tilesx * m_tpParams.num_colors * 4 * m_tpParams.dtt_size * m_tpParams.dtt_size}; const int slice_size {m_tpParams.tilesy * m_tpParams.tilesx * m_tpParams.num_colors * 4 * m_tpParams.dtt_size * m_tpParams.dtt_size};
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) { for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
// will get results back // will get results back
...@@ -91,9 +164,13 @@ void TpHostGpu::setCltBuffers(){ ...@@ -91,9 +164,13 @@ void TpHostGpu::setCltBuffers(){
printf("Allocating GPU memory, 0x%x floats\n", slice_size) ; printf("Allocating GPU memory, 0x%x floats\n", slice_size) ;
} }
m_gpu_clt = copyalloc_pointers_gpu (m_gpu_clt_h, m_tpParams.num_cams); // NUM_CAMS); m_gpu_clt = copyalloc_pointers_gpu (m_gpu_clt_h, m_tpParams.num_cams); // NUM_CAMS);
checkCudaErrors (cudaMalloc((void **)&m_gpu_active_tiles, m_tpParams.tilesx * m_tpParams.tilesy * sizeof(int)));
checkCudaErrors (cudaMalloc((void **)&m_gpu_num_active, sizeof(int)));
} }
void TpHostGpu::setCorrImgBuffers(){ void TpHostGpu::setCorrImgBuffers(){ // 252
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) { for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
// allocate result images (3x height to accommodate 3 colors // allocate result images (3x height to accommodate 3 colors
// Image is extended by 4 pixels each side to avoid checking (mclt tiles extend by 4) // Image is extended by 4 pixels each side to avoid checking (mclt tiles extend by 4)
...@@ -106,7 +183,7 @@ void TpHostGpu::setCorrImgBuffers(){ ...@@ -106,7 +183,7 @@ void TpHostGpu::setCorrImgBuffers(){
m_gpu_corr_images = copyalloc_pointers_gpu (m_gpu_corr_images_h, m_tpParams.num_cams); // NUM_CAMS); m_gpu_corr_images = copyalloc_pointers_gpu (m_gpu_corr_images_h, m_tpParams.num_cams); // NUM_CAMS);
} }
void TpHostGpu::setImgBuffers(){ void TpHostGpu::setImgBuffers(){ // 283-292
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) { for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
readFloatsFromFile( readFloatsFromFile(
m_host_kern_buf, // float * data, // allocated array m_host_kern_buf, // float * data, // allocated array
...@@ -120,7 +197,10 @@ void TpHostGpu::setImgBuffers(){ ...@@ -120,7 +197,10 @@ void TpHostGpu::setImgBuffers(){
m_gpu_images = copyalloc_pointers_gpu (m_gpu_images_h, m_tpParams.num_cams); // NUM_CAMS); m_gpu_images = copyalloc_pointers_gpu (m_gpu_images_h, m_tpParams.num_cams); // NUM_CAMS);
} }
void TpHostGpu::setImgBuffersShifted(int is_bayer, int image_dx, int image_dy) { void TpHostGpu::setImgBuffersShifted( // 1171-1188
int is_bayer, // 0
int image_dx, // 2
int image_dy) { // 0
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) { for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
readFloatsFromFile( readFloatsFromFile(
m_host_kern_buf, // float * data, // allocated array m_host_kern_buf, // float * data, // allocated array
...@@ -149,7 +229,7 @@ void TpHostGpu::setImgBuffersShifted(int is_bayer, int image_dx, int image_dy) { ...@@ -149,7 +229,7 @@ void TpHostGpu::setImgBuffersShifted(int is_bayer, int image_dx, int image_dy) {
void TpHostGpu::setGeometryCorrectionBuffers() { void TpHostGpu::setGeometryCorrectionBuffers() { // 207-231
readFloatsFromFile( readFloatsFromFile(
(float *) &m_fgeometry_correction, // float * data, // allocated array, no need to free (float *) &m_fgeometry_correction, // float * data, // allocated array, no need to free
m_tpPaths.geometry_correction_file); // char * path) // file path m_tpPaths.geometry_correction_file); // char * path) // file path
...@@ -169,8 +249,9 @@ void TpHostGpu::setGeometryCorrectionBuffers() { ...@@ -169,8 +249,9 @@ void TpHostGpu::setGeometryCorrectionBuffers() {
m_rByRDist, m_rByRDist,
m_rByRDist_length); m_rByRDist_length);
checkCudaErrors(cudaMalloc((void **)&m_gpu_rot_deriv, sizeof(trot_deriv))); checkCudaErrors(cudaMalloc((void **)&m_gpu_rot_deriv, sizeof(trot_deriv)));
m_ftask_data1 = (float *) malloc( m_tpParams.tilesx * m_tpParams.tilesy * m_tpParams.task_size * sizeof(float));
} }
void TpHostGpu::setCorrelationBuffers(){ void TpHostGpu::setCorrelationBuffers(){ // 260-281 , 332-333
// allocates one correlation kernel per line (15x15 floats), number of rows - number of tiles * number of pairs // allocates one correlation kernel per line (15x15 floats), number of rows - number of tiles * number of pairs
m_gpu_corrs = alloc_image_gpu( m_gpu_corrs = alloc_image_gpu(
&dstride_corr, // in bytes ! for one 2d phase correlation (padded 15x15x4 bytes) &dstride_corr, // in bytes ! for one 2d phase correlation (padded 15x15x4 bytes)
...@@ -195,12 +276,21 @@ void TpHostGpu::setCorrelationBuffers(){ ...@@ -195,12 +276,21 @@ void TpHostGpu::setCorrelationBuffers(){
// just allocate // just allocate
checkCudaErrors (cudaMalloc((void **)&m_gpu_corr_indices, m_tpParams.num_pairs * m_tpParams.tilesx * m_tpParams.tilesy * sizeof(int))); checkCudaErrors (cudaMalloc((void **)&m_gpu_corr_indices, m_tpParams.num_pairs * m_tpParams.tilesx * m_tpParams.tilesy * sizeof(int)));
checkCudaErrors (cudaMalloc((void **)&m_gpu_corrs_combo_indices, m_tpParams.tilesx * m_tpParams.tilesy * sizeof(int))); checkCudaErrors (cudaMalloc((void **)&m_gpu_corrs_combo_indices, m_tpParams.tilesx * m_tpParams.tilesy * sizeof(int)));
checkCudaErrors (cudaMalloc((void **)&m_gpu_num_corr_tiles, sizeof(int)));
} }
void TpHostGpu::setTasks(const float target_disparity, const float scale){ void TpHostGpu::setTasks( // 129, 302-325
const float target_disparity,
const float scale){
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
readFloatsFromFile(
(float *) &m_tile_coords_h[ncam],
m_tpPaths.ports_offs_xy_file[ncam]); // char * path) // file path
}
// allocate m_ftask_data // allocate m_ftask_data
m_ftask_data = (float *) malloc( m_tpParams.tilesx * m_tpParams.tilesy * m_tpParams.task_size * sizeof(float)); m_ftask_data = (float *) malloc( m_tpParams.tilesx * m_tpParams.tilesy * m_tpParams.task_size * sizeof(float));
// float * ftask_data1 = (float *) malloc(TILESX * TILESY * tpParams.task_size * sizeof(float)); // float * ftask_data1 = (float *) malloc(TILESX * TILESY * tpParams.task_size * sizeof(float));
// tasks for all tiles // tasks for all tiles
for (int ty = 0; ty < m_tpParams.tilesy ; ty++){ for (int ty = 0; ty < m_tpParams.tilesy ; ty++){
...@@ -224,9 +314,9 @@ void TpHostGpu::setTasks(const float target_disparity, const float scale){ ...@@ -224,9 +314,9 @@ void TpHostGpu::setTasks(const float target_disparity, const float scale){
} }
m_gpu_ftasks = (float *) copyalloc_kernel_gpu(m_ftask_data, m_tpParams.tp_tasks_size * m_tpParams.task_size); // (sizeof(struct tp_task)/sizeof(float))); m_gpu_ftasks = (float *) copyalloc_kernel_gpu(m_ftask_data, m_tpParams.tp_tasks_size * m_tpParams.task_size); // (sizeof(struct tp_task)/sizeof(float)));
} }
void TpHostGpu::setTextures(){ void TpHostGpu::setTextures(){ // 337-348, ??
if (!m_ftask_data) { // ftask_data is not initialized if (!m_ftask_data) { // ftask_data is not initialized
throw std::runtime_error("Error: m_ftask_data is not initialized "); MY_EXCEPTION("m_ftask_data is not initialized ");
} }
m_tile_texture_layers = (m_tpParams.texture_colors + 1 + (m_tpParams.keep_texture_weights? (m_tpParams.num_cams + m_tpParams.texture_colors + 1): 0)); m_tile_texture_layers = (m_tpParams.texture_colors + 1 + (m_tpParams.keep_texture_weights? (m_tpParams.num_cams + m_tpParams.texture_colors + 1): 0));
m_tile_texture_size = m_tile_texture_layers *256; m_tile_texture_size = m_tile_texture_layers *256;
...@@ -236,7 +326,7 @@ void TpHostGpu::setTextures(){ ...@@ -236,7 +326,7 @@ void TpHostGpu::setTextures(){
for (int tx = 0; tx < m_tpParams.tilesx; tx++){ for (int tx = 0; tx < m_tpParams.tilesx; tx++){
int nt = ty * m_tpParams.tilesx + tx; int nt = ty * m_tpParams.tilesx + tx;
float *tp = m_ftask_data + m_tpParams.task_size * nt; float *tp = m_ftask_data + m_tpParams.task_size * nt;
int cm = (*(int *) tp) & m_tpParams.task_texture_bits; // non-zero any of 4 lower task bits int cm = (*(int *) tp) & (m_tpParams.task_texture_bits | (1 << m_tpParams.task_text_en)); // non-zero any of 8 lower task bits or bit 8 (TASK_TEXT_EN)
if (cm){ if (cm){
m_texture_indices[m_num_textures++] = (nt << m_tpParams.text_ntile_shift) | (1 << m_tpParams.list_texture_bit); // setting 0x80 in texture indices m_texture_indices[m_num_textures++] = (nt << m_tpParams.text_ntile_shift) | (1 << m_tpParams.list_texture_bit); // setting 0x80 in texture indices
} }
...@@ -254,7 +344,7 @@ void TpHostGpu::setTextures(){ ...@@ -254,7 +344,7 @@ void TpHostGpu::setTextures(){
m_tpParams.tilesx * m_tpParams.tilesy); // int height); m_tpParams.tilesx * m_tpParams.tilesy); // int height);
} }
void TpHostGpu::setRGBA(){ void TpHostGpu::setRGBA(){ // 377 - 390
/* /*
// appears in setTextures // appears in setTextures
m_tile_texture_layers = (m_tpParams.texture_colors + 1 + (m_tpParams.keep_texture_weights? (m_tpParams.num_cams + m_tpParams.texture_colors + 1): 0)); m_tile_texture_layers = (m_tpParams.texture_colors + 1 + (m_tpParams.keep_texture_weights? (m_tpParams.num_cams + m_tpParams.texture_colors + 1): 0));
...@@ -281,14 +371,14 @@ void TpHostGpu::setRGBA(){ ...@@ -281,14 +371,14 @@ void TpHostGpu::setRGBA(){
(m_tpParams.num_colors + 1) * sizeof(float))); (m_tpParams.num_colors + 1) * sizeof(float)));
} }
trot_deriv TpHostGpu::testRotMatrices (int num_runs){ // 424 trot_deriv TpHostGpu::testRotMatrices (int num_runs){ // 420
int numIterations = m_tpParams.debug_tile ? 1 : num_runs; int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1; int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_correction_vector){ if (!m_gpu_correction_vector){
throw std::runtime_error("Error: m_gpu_correction_vector is not initialized "); MY_EXCEPTION("m_gpu_correction_vector is not initialized ");
} }
if (!m_gpu_rot_deriv){ if (!m_gpu_rot_deriv){
throw std::runtime_error("Error: m_gpu_rot_deriv is not initialized "); MY_EXCEPTION("m_gpu_rot_deriv is not initialized ");
} }
dim3 threads_rot (3,3,3); dim3 threads_rot (3,3,3);
dim3 grid_rot (m_tpParams.num_cams, 1, 1); dim3 grid_rot (m_tpParams.num_cams, 1, 1);
...@@ -328,13 +418,13 @@ void TpHostGpu::testReverseDistortions (int num_runs){ // 468 ...@@ -328,13 +418,13 @@ void TpHostGpu::testReverseDistortions (int num_runs){ // 468
int numIterations = m_tpParams.debug_tile ? 1 : num_runs; int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1; int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_geometry_correction){ if (!m_gpu_geometry_correction){
throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized "); MY_EXCEPTION("m_gpu_geometry_correction is not initialized ");
} }
if (!m_gpu_rByRDist){ if (!m_gpu_rByRDist){
throw std::runtime_error("Error: m_gpu_rByRDist is not initialized "); MY_EXCEPTION("m_gpu_rByRDist is not initialized ");
} }
if (!m_rByRDist){ if (!m_rByRDist){
throw std::runtime_error("Error: m_rByRDist is not initialized "); MY_EXCEPTION("m_rByRDist is not initialized ");
} }
dim3 threads_rd(3,3,3); dim3 threads_rd(3,3,3);
dim3 grid_rd (NUM_CAMS, 1, 1); // can get rid of NUM_CAMS dim3 grid_rd (NUM_CAMS, 1, 1); // can get rid of NUM_CAMS
...@@ -389,25 +479,25 @@ void TpHostGpu::testGeomCorrect (int num_runs){ // 534 ...@@ -389,25 +479,25 @@ void TpHostGpu::testGeomCorrect (int num_runs){ // 534
int numIterations = m_tpParams.debug_tile ? 1 : num_runs; int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1; int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_ftasks){ if (!m_gpu_ftasks){
throw std::runtime_error("Error: m_gpu_ftasks is not initialized "); MY_EXCEPTION("m_gpu_ftasks is not initialized ");
} }
if (!m_gpu_geometry_correction){ if (!m_gpu_geometry_correction){
throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized "); MY_EXCEPTION("m_gpu_geometry_correction is not initialized ");
} }
if (!m_gpu_correction_vector){ if (!m_gpu_correction_vector){
throw std::runtime_error("Error: m_gpu_correction_vector is not initialized "); MY_EXCEPTION("m_gpu_correction_vector is not initialized ");
} }
if (!m_gpu_rByRDist){ if (!m_gpu_rByRDist){
throw std::runtime_error("Error: m_gpu_rByRDist is not initialized "); MY_EXCEPTION("m_gpu_rByRDist is not initialized ");
} }
if (!m_gpu_rot_deriv){ if (!m_gpu_rot_deriv){
throw std::runtime_error("Error: m_gpu_rot_deriv is not initialized "); MY_EXCEPTION("m_gpu_rot_deriv is not initialized ");
} }
if (!m_ftask_data){ if (!m_ftask_data){
throw std::runtime_error("Error: m_ftask_data is not initialized "); MY_EXCEPTION("m_ftask_data is not initialized ");
} }
if (!m_ftask_data1){ if (!m_ftask_data1){
throw std::runtime_error("Error: m_ftask_data1 is not initialized "); MY_EXCEPTION("m_ftask_data1 is not initialized");
} }
dim3 threads_geom(m_tpParams.num_cams, m_tpParams.tiles_per_block_geom, 1); dim3 threads_geom(m_tpParams.num_cams, m_tpParams.tiles_per_block_geom, 1);
dim3 grid_geom ((m_tpParams.tp_tasks_size + m_tpParams.tiles_per_block_geom-1)/m_tpParams.tiles_per_block_geom, 1, 1); dim3 grid_geom ((m_tpParams.tp_tasks_size + m_tpParams.tiles_per_block_geom-1)/m_tpParams.tiles_per_block_geom, 1, 1);
...@@ -440,8 +530,8 @@ void TpHostGpu::testGeomCorrect (int num_runs){ // 534 ...@@ -440,8 +530,8 @@ void TpHostGpu::testGeomCorrect (int num_runs){ // 534
sdkStopTimer(&timerGEOM); sdkStopTimer(&timerGEOM);
float avgTimeGEOM = (float)sdkGetTimerValue(&timerGEOM) / (float)numIterations; float avgTimeGEOM = (float)sdkGetTimerValue(&timerGEOM) / (float)numIterations;
sdkDeleteTimer(&timerGEOM); sdkDeleteTimer(&timerGEOM);
printf("Average TextureList run time =%f ms\n", avgTimeGEOM); // printf("Average TextureList run time =%f ms\n", avgTimeGEOM);
printf("Average calculate_tiles_offsets run time =%f ms\n", avgTimeGEOM);
checkCudaErrors(cudaMemcpy( // copy modified/calculated tasks checkCudaErrors(cudaMemcpy( // copy modified/calculated tasks
m_ftask_data1, m_ftask_data1,
m_gpu_ftasks, m_gpu_ftasks,
...@@ -453,7 +543,7 @@ void TpHostGpu::testGeomCorrect (int num_runs){ // 534 ...@@ -453,7 +543,7 @@ void TpHostGpu::testGeomCorrect (int num_runs){ // 534
struct tp_task * new_task = &task_data1[DBG_TILE]; struct tp_task * new_task = &task_data1[DBG_TILE];
#endif #endif
if( m_tpParams.debug_tile) { if( m_tpParams.debug_tile) {
printf("old_task txy = 0x%x\n", *(int *) (m_ftask_data + m_tpParams.tp_tasks_size * m_tpParams.dbg_tile + 1)) ; // task_data [DBG_TILE].txy); printf("old_task txy = 0x%x\n", *(int *) (m_ftask_data + m_tpParams.task_size * m_tpParams.dbg_tile + 1)) ; // task_data [DBG_TILE].txy);
printf("new_task txy = 0x%x\n", *(int *) (m_ftask_data1 + m_tpParams.task_size * m_tpParams.dbg_tile + 1)) ; // task_data1[DBG_TILE].txy); printf("new_task txy = 0x%x\n", *(int *) (m_ftask_data1 + m_tpParams.task_size * m_tpParams.dbg_tile + 1)) ; // task_data1[DBG_TILE].txy);
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++){ for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++){
printf("camera %d pX old %f new %f diff = %f\n", ncam, printf("camera %d pX old %f new %f diff = %f\n", ncam,
...@@ -474,25 +564,25 @@ void TpHostGpu::testConvertDirect (int num_runs){ // 608 ...@@ -474,25 +564,25 @@ void TpHostGpu::testConvertDirect (int num_runs){ // 608
int numIterations = m_tpParams.debug_tile ? 1 : num_runs; int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1; int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_kernel_offsets){ if (!m_gpu_kernel_offsets){
throw std::runtime_error("Error: m_gpu_kernel_offsets is not initialized "); MY_EXCEPTION("m_gpu_kernel_offsets is not initialized ");
} }
if (!m_gpu_kernels){ if (!m_gpu_kernels){
throw std::runtime_error("Error: m_gpu_kernels is not initialized "); MY_EXCEPTION("m_gpu_kernels is not initialized ");
} }
if (!m_gpu_images){ if (!m_gpu_images){
throw std::runtime_error("Error: m_gpu_images is not initialized "); MY_EXCEPTION("m_gpu_images is not initialized ");
} }
if (!m_gpu_ftasks){ if (!m_gpu_ftasks){
throw std::runtime_error("Error: m_gpu_ftasks is not initialized "); MY_EXCEPTION("m_gpu_ftasks is not initialized ");
} }
if (!m_gpu_clt){ if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized "); MY_EXCEPTION("m_gpu_clt is not initialized ");
} }
if (!m_gpu_active_tiles){ if (!m_gpu_active_tiles){
throw std::runtime_error("Error: m_gpu_active_tiles is not initialized "); MY_EXCEPTION("m_gpu_active_tiles is not initialized "); //*
} }
if (!m_gpu_num_active){ if (!m_gpu_num_active){
throw std::runtime_error("Error: m_gpu_num_active is not initialized "); MY_EXCEPTION("m_gpu_num_active is not initialized ");
} }
//create and start CUDA timer //create and start CUDA timer
StopWatchInterface *timerTP = 0; StopWatchInterface *timerTP = 0;
...@@ -524,7 +614,7 @@ void TpHostGpu::testConvertDirect (int num_runs){ // 608 ...@@ -524,7 +614,7 @@ void TpHostGpu::testConvertDirect (int num_runs){ // 608
m_tpParams.img_height, // IMG_HEIGHT, // int woi_height, m_tpParams.img_height, // IMG_HEIGHT, // int woi_height,
0, // m_tpParams.kernels_hor, // KERNELS_HOR, // int kernels_hor, 0, // m_tpParams.kernels_hor, // KERNELS_HOR, // int kernels_hor,
m_tpParams.kernels_hor, // KERNELS_VERT, // int kernels_vert); m_tpParams.kernels_hor, // KERNELS_VERT, // int kernels_vert);
m_gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles m_gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated list of tiles
m_gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks m_gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks
m_tpParams.tilesx); // TILESX); // int tilesx) m_tpParams.tilesx); // TILESX); // int tilesx)
printf("HOST: convert_direct() done\n"); printf("HOST: convert_direct() done\n");
...@@ -557,7 +647,7 @@ void TpHostGpu::testImclt (int num_runs){ // 682 ...@@ -557,7 +647,7 @@ void TpHostGpu::testImclt (int num_runs){ // 682
int numIterations = m_tpParams.debug_tile ? 1 : num_runs; int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1; int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_geometry_correction){ if (!m_gpu_geometry_correction){
throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized "); MY_EXCEPTION("m_gpu_geometry_correction is not initialized ");
} }
} }
*/ */
...@@ -565,10 +655,10 @@ void TpHostGpu::testImcltRbgAll (int num_runs){ // 701 ...@@ -565,10 +655,10 @@ void TpHostGpu::testImcltRbgAll (int num_runs){ // 701
int numIterations = m_tpParams.debug_tile ? 1 : num_runs; int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1; int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_clt){ if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized "); MY_EXCEPTION("m_gpu_clt is not initialized ");
} }
if (!m_gpu_corr_images){ if (!m_gpu_corr_images){
throw std::runtime_error("Error: m_gpu_corr_images is not initialized "); MY_EXCEPTION("m_gpu_corr_images is not initialized ");
} }
StopWatchInterface *timerIMCLT = 0; StopWatchInterface *timerIMCLT = 0;
sdkCreateTimer(&timerIMCLT); sdkCreateTimer(&timerIMCLT);
...@@ -611,13 +701,13 @@ void TpHostGpu::testCorrelate2DIntra(int num_runs){ ...@@ -611,13 +701,13 @@ void TpHostGpu::testCorrelate2DIntra(int num_runs){
int numIterations = m_tpParams.debug_tile ? 1 : num_runs; int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1; int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_clt){ if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized "); MY_EXCEPTION("m_gpu_clt is not initialized ");
} }
if (!m_gpu_ftasks) { if (!m_gpu_ftasks) {
throw std::runtime_error("Error: m_gpu_ftasks is not initialized "); MY_EXCEPTION("m_gpu_ftasks is not initialized ");
} }
if (!m_gpu_corrs) { if (!m_gpu_corrs) {
throw std::runtime_error("Error: m_gpu_corrs is not initialized "); MY_EXCEPTION("m_gpu_corrs is not initialized ");
} }
StopWatchInterface *timerCORR = 0; StopWatchInterface *timerCORR = 0;
sdkCreateTimer(&timerCORR); sdkCreateTimer(&timerCORR);
...@@ -679,28 +769,28 @@ void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886 ...@@ -679,28 +769,28 @@ void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886
int i0 = m_tpParams.debug_tile ? 0 : -1; int i0 = m_tpParams.debug_tile ? 0 : -1;
// check/replace names // check/replace names
if (!m_gpu_clt){ if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized "); MY_EXCEPTION("m_gpu_clt is not initialized ");
} }
if (!m_gpu_corrs_td){ if (!m_gpu_corrs_td){
throw std::runtime_error("Error: m_gpu_corrs_td is not initialized "); MY_EXCEPTION("m_gpu_corrs_td is not initialized ");
} }
if (!m_gpu_ftasks){ if (!m_gpu_ftasks){
throw std::runtime_error("Error: m_gpu_ftasks is not initialized "); MY_EXCEPTION("m_gpu_ftasks is not initialized ");
} }
if (!m_gpu_corr_indices){ if (!m_gpu_corr_indices){
throw std::runtime_error("Error: m_gpu_corr_indices is not initialized "); MY_EXCEPTION("m_gpu_corr_indices is not initialized ");
} }
if (!m_gpu_num_corr_tiles){ if (!m_gpu_num_corr_tiles){
throw std::runtime_error("Error: m_gpu_num_corr_tiles is not initialized "); MY_EXCEPTION("m_gpu_num_corr_tiles is not initialized ");
} }
if (!m_gpu_corrs_combo_td){ if (!m_gpu_corrs_combo_td){
throw std::runtime_error("Error: m_gpu_corrs_combo_td is not initialized "); MY_EXCEPTION("m_gpu_corrs_combo_td is not initialized ");
} }
if (!m_gpu_corrs_combo_indices){ if (!m_gpu_corrs_combo_indices){
throw std::runtime_error("Error: m_gpu_corrs_combo_indices is not initialized "); MY_EXCEPTION("m_gpu_corrs_combo_indices is not initialized ");
} }
if (!m_gpu_corrs_combo){ if (!m_gpu_corrs_combo){
throw std::runtime_error("Error: m_gpu_corrs_combo is not initialized "); MY_EXCEPTION("m_gpu_corrs_combo is not initialized ");
} }
//m_gpu_corrs_combo //m_gpu_corrs_combo
// testing corr // testing corr
...@@ -871,7 +961,7 @@ void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886 ...@@ -871,7 +961,7 @@ void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886
corr_img_size, // int corr_img_size, corr_img_size, // int corr_img_size,
num_corr_indices, //int num_corr_indices, num_corr_indices, //int num_corr_indices,
cpu_corr_indices, // int * cpu_corr_indices, cpu_corr_indices, // int * cpu_corr_indices,
m_gpu_corrs_td, // float * cpu_corr, cpu_corr_td, // float * cpu_corr,
16); // num_sel_sensors); // int num_sel_sensors) // // Will not be used 16); // num_sel_sensors); // int num_sel_sensors) // // Will not be used
printf("Writing intrascene phase correlation TD data tp %s\n", m_tpPaths.result_intrascene_td); printf("Writing intrascene phase correlation TD data tp %s\n", m_tpPaths.result_intrascene_td);
...@@ -892,13 +982,13 @@ void TpHostGpu::testCorrelate2DInterSelf(int num_runs){ // 889 ...@@ -892,13 +982,13 @@ void TpHostGpu::testCorrelate2DInterSelf(int num_runs){ // 889
int i0 = m_tpParams.debug_tile ? 0 : -1; int i0 = m_tpParams.debug_tile ? 0 : -1;
// check/replace names // check/replace names
if (!m_gpu_clt){ if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized "); MY_EXCEPTION("m_gpu_clt is not initialized ");
} }
if (!m_gpu_ftasks) { if (!m_gpu_ftasks) {
throw std::runtime_error("Error: m_gpu_ftasks is not initialized "); MY_EXCEPTION("m_gpu_ftasks is not initialized ");
} }
if (!m_gpu_corrs) { if (!m_gpu_corrs) {
throw std::runtime_error("Error: m_gpu_corrs is not initialized "); MY_EXCEPTION("m_gpu_corrs is not initialized ");
} }
int sel_sensors = 0xffff; // 0x7fff; // 0xffff; int sel_sensors = 0xffff; // 0x7fff; // 0xffff;
int num_sel_sensors = 16; // 15; // 16; int num_sel_sensors = 16; // 15; // 16;
...@@ -932,7 +1022,7 @@ void TpHostGpu::testCorrelate2DInterSelf(int num_runs){ // 889 ...@@ -932,7 +1022,7 @@ void TpHostGpu::testCorrelate2DInterSelf(int num_runs){ // 889
m_tpParams.img_height, // int woi_height, m_tpParams.img_height, // int woi_height,
m_tpParams.kernels_hor, // int kernels_hor, m_tpParams.kernels_hor, // int kernels_hor,
m_tpParams.kernels_vert, //, // int kernels_vert); m_tpParams.kernels_vert, //, // int kernels_vert);
m_gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles m_gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated list of tiles
m_gpu_num_active, // int * pnum_active_tiles); // indices to gpu_tasks m_gpu_num_active, // int * pnum_active_tiles); // indices to gpu_tasks
m_tpParams.tilesx); // int tilesx) m_tpParams.tilesx); // int tilesx)
getLastCudaError("Kernel execution failed"); getLastCudaError("Kernel execution failed");
...@@ -957,7 +1047,7 @@ void TpHostGpu::testCorrelate2DInterSelf(int num_runs){ // 889 ...@@ -957,7 +1047,7 @@ void TpHostGpu::testCorrelate2DInterSelf(int num_runs){ // 889
m_tpParams.img_height, // int woi_height, m_tpParams.img_height, // int woi_height,
m_tpParams.kernels_hor, // int kernels_hor, m_tpParams.kernels_hor, // int kernels_hor,
m_tpParams.kernels_vert, //, // int kernels_vert); m_tpParams.kernels_vert, //, // int kernels_vert);
m_gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles m_gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated list of tiles
m_gpu_num_active, // int * pnum_active_tiles); // indices to gpu_tasks m_gpu_num_active, // int * pnum_active_tiles); // indices to gpu_tasks
m_tpParams.tilesx); // int tilesx) m_tpParams.tilesx); // int tilesx)
getLastCudaError("Kernel execution failed"); getLastCudaError("Kernel execution failed");
...@@ -1041,25 +1131,25 @@ void TpHostGpu::testTextures ( ...@@ -1041,25 +1131,25 @@ void TpHostGpu::testTextures (
int i0 = m_tpParams.debug_tile ? 0 : -1; int i0 = m_tpParams.debug_tile ? 0 : -1;
// check/replace names // check/replace names
if (!m_gpu_ftasks){ if (!m_gpu_ftasks){
throw std::runtime_error("Error: m_gpu_ftasks is not initialized "); MY_EXCEPTION("m_gpu_ftasks is not initialized ");
} }
if (!m_gpu_texture_indices){ if (!m_gpu_texture_indices){
throw std::runtime_error("Error: m_gpu_texture_indices is not initialized "); MY_EXCEPTION("m_gpu_texture_indices is not initialized ");
} }
if (!m_gpu_clt){ if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized "); MY_EXCEPTION("m_gpu_clt is not initialized ");
} }
if (!m_gpu_geometry_correction){ if (!m_gpu_geometry_correction){
throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized "); MY_EXCEPTION("m_gpu_geometry_correction is not initialized ");
} }
if (!m_gpu_textures){ if (!m_gpu_textures){
throw std::runtime_error("Error: m_gpu_textures is not initialized "); MY_EXCEPTION("m_gpu_textures is not initialized ");
} }
if (!m_gpu_diff_rgb_combo){ if (!m_gpu_diff_rgb_combo){
throw std::runtime_error("Error: m_gpu_diff_rgb_combo is not initialized "); MY_EXCEPTION("m_gpu_diff_rgb_combo is not initialized ");
} }
if (!m_gpu_generate_RBGA_params){ if (!m_gpu_generate_RBGA_params){
throw std::runtime_error("Error: m_gpu_generate_RBGA_params is not initialized "); MY_EXCEPTION("m_gpu_generate_RBGA_params is not initialized ");
} }
dim3 threads0(m_tpParams.convert_direct_indexing_threads, 1, 1); dim3 threads0(m_tpParams.convert_direct_indexing_threads, 1, 1);
dim3 blocks0 ((m_tpParams.tp_tasks_size + m_tpParams.convert_direct_indexing_threads -1) >> m_tpParams.convert_direct_indexing_threads_log2,1, 1); dim3 blocks0 ((m_tpParams.tp_tasks_size + m_tpParams.convert_direct_indexing_threads -1) >> m_tpParams.convert_direct_indexing_threads_log2,1, 1);
...@@ -1293,31 +1383,31 @@ void TpHostGpu::testTexturesRGBA ( ...@@ -1293,31 +1383,31 @@ void TpHostGpu::testTexturesRGBA (
int i0 = m_tpParams.debug_tile ? 0 : -1; int i0 = m_tpParams.debug_tile ? 0 : -1;
// check/replace names // check/replace names
if (!m_gpu_ftasks){ if (!m_gpu_ftasks){
throw std::runtime_error("Error: m_gpu_ftasks is not initialized "); MY_EXCEPTION("m_gpu_ftasks is not initialized ");
} }
if (!m_gpu_texture_indices){ if (!m_gpu_texture_indices){
throw std::runtime_error("Error: m_gpu_texture_indices is not initialized "); MY_EXCEPTION("m_gpu_texture_indices is not initialized ");
} }
if (!m_gpu_clt){ if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized "); MY_EXCEPTION("m_gpu_clt is not initialized ");
} }
if (!m_gpu_geometry_correction){ if (!m_gpu_geometry_correction){
throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized "); MY_EXCEPTION("m_gpu_geometry_correction is not initialized ");
} }
if (!m_gpu_num_texture_tiles){ if (!m_gpu_num_texture_tiles){
throw std::runtime_error("Error: m_gpu_num_texture_tiles is not initialized "); MY_EXCEPTION("m_gpu_num_texture_tiles is not initialized ");
} }
if (!m_gpu_textures_rbga){ if (!m_gpu_textures_rbga){
throw std::runtime_error("Error: m_gpu_textures_rbga is not initialized "); MY_EXCEPTION("m_gpu_textures_rbga is not initialized ");
} }
if (!m_gpu_generate_RBGA_params){ if (!m_gpu_generate_RBGA_params){
throw std::runtime_error("Error: m_gpu_generate_RBGA_params is not initialized "); MY_EXCEPTION("m_gpu_generate_RBGA_params is not initialized ");
} }
if (!m_gpu_twh){ if (!m_gpu_twh){
throw std::runtime_error("Error: m_gpu_twh is not initialized "); MY_EXCEPTION("m_gpu_twh is not initialized ");
} }
if (!m_gpu_woi){ if (!m_gpu_woi){
throw std::runtime_error("Error: m_gpu_woi is not initialized "); MY_EXCEPTION("m_gpu_woi is not initialized ");
} }
dim3 threads_rgba(1, 1, 1); dim3 threads_rgba(1, 1, 1);
dim3 grid_rgba(1,1,1); dim3 grid_rgba(1,1,1);
...@@ -1597,7 +1687,7 @@ void TpHostGpu::saveInterCorrFile( ...@@ -1597,7 +1687,7 @@ void TpHostGpu::saveInterCorrFile(
corr_img_size, // int corr_img_size, corr_img_size, // int corr_img_size,
num_corr_indices, //int num_corr_indices, num_corr_indices, //int num_corr_indices,
cpu_corr_indices, // int * cpu_corr_indices, cpu_corr_indices, // int * cpu_corr_indices,
gpu_corrs_td, // float * cpu_corr, cpu_corr_td, // float * cpu_corr,
num_sel_sensors); // int num_sel_sensors) num_sel_sensors); // int num_sel_sensors)
printf("Writing %s TD data to %s\n", prompt, path); printf("Writing %s TD data to %s\n", prompt, path);
writeFloatsToFile( writeFloatsToFile(
...@@ -1705,7 +1795,7 @@ float * TpHostGpu::getCorrTdImg( ...@@ -1705,7 +1795,7 @@ float * TpHostGpu::getCorrTdImg(
int src_offs = src_offs0 + iy * m_tpParams.dtt_size + iquad * m_tpParams.dtt_size * m_tpParams.dtt_size; int src_offs = src_offs0 + iy * m_tpParams.dtt_size + iquad * m_tpParams.dtt_size * m_tpParams.dtt_size;
int dst_offs = dst_offs0 + (iy + m_tpParams.dtt_size * iqy)* (m_tpParams.tilesx * 16) + iqx * m_tpParams.dtt_size; int dst_offs = dst_offs0 + (iy + m_tpParams.dtt_size * iqy)* (m_tpParams.tilesx * 16) + iqx * m_tpParams.dtt_size;
for (int ix = 0; ix < m_tpParams.dtt_size; ix++){ for (int ix = 0; ix < m_tpParams.dtt_size; ix++){
corr_img[dst_offs++] = cpu_corr_td[src_offs++]; corr_img[dst_offs++] = cpu_corr_td[src_offs++]; // segmentation
} }
} }
} }
...@@ -1994,5 +2084,5 @@ void TpHostGpu::gfree(struct gc * p) {if (p) checkCudaErrors(cudaFree(p ...@@ -1994,5 +2084,5 @@ void TpHostGpu::gfree(struct gc * p) {if (p) checkCudaErrors(cudaFree(p
void TpHostGpu::gfree(struct corr_vector * p) {if (p) checkCudaErrors(cudaFree(p));p = {};} void TpHostGpu::gfree(struct corr_vector * p) {if (p) checkCudaErrors(cudaFree(p));p = {};}
void TpHostGpu::gfree(struct trot_deriv * p) {if (p) checkCudaErrors(cudaFree(p));p = {};} void TpHostGpu::gfree(struct trot_deriv * p) {if (p) checkCudaErrors(cudaFree(p));p = {};}
void TpHostGpu::gfree(float ** p) {if (p) checkCudaErrors(cudaFree(p));p = {};} void TpHostGpu::gfree(float ** p) {if (p) checkCudaErrors(cudaFree(p));p = {};}
void TpHostGpu::gfree(struct CltExtra ** p) {if (p) checkCudaErrors(cudaFree(p));p = {};}
//CltExtra
...@@ -111,18 +111,27 @@ public: ...@@ -111,18 +111,27 @@ public:
,m_host_kern_buf{(float *) malloc(tpParams.kern_size * sizeof(float))} ,m_host_kern_buf{(float *) malloc(tpParams.kern_size * sizeof(float))}
{}; {};
~TpHostGpu(); ~TpHostGpu();
void setImageKernels(); void allTests(
void setCltBuffers(); int num_runs,
void setCorrImgBuffers(); int image_dx, // 2
void setImgBuffers(); int image_dy, // 0
void setImgBuffersShifted(int is_bayer, int image_dx, int image_dy); const float target_disparity, // DBG_DISPARITY == 0.0
void setGeometryCorrectionBuffers(); const float scale, // 0.0
void setCorrelationBuffers(); int quad_combine,
void setTasks(const float target_disparity, const float scale); int use_dp,
void setTextures(); int debug);
void setRGBA(); void setImageKernels(); // 233-258 (overlap)
void setCltBuffers(); // 246
trot_deriv testRotMatrices (int num_runs); // 424 void setCorrImgBuffers(); // 252
void setImgBuffers(); // 283-292
void setImgBuffersShifted(int is_bayer, int image_dx, int image_dy); // 1171-1188 SHOULD be called before testCorrelate2DInterSelf
void setGeometryCorrectionBuffers(); // 207-231
void setCorrelationBuffers(); // 260-281 , 332-333
void setTasks(const float target_disparity, const float scale); // 129, 302-325
void setTextures(); // 337-348, ??
void setRGBA(); // 377 - 390
trot_deriv testRotMatrices (int num_runs); // 420
void testReverseDistortions (int num_runs); // 468 void testReverseDistortions (int num_runs); // 468
void testGeomCorrect (int num_runs); // 534 void testGeomCorrect (int num_runs); // 534
void testConvertDirect (int num_runs); // 608 void testConvertDirect (int num_runs); // 608
...@@ -131,8 +140,8 @@ public: ...@@ -131,8 +140,8 @@ public:
void testCorrelate2DIntra (int num_runs); // 762 - 885 void testCorrelate2DIntra (int num_runs); // 762 - 885
void testCorrelate2DIntraTD (int num_runs, int quad_combine); // 886 - 1123 void testCorrelate2DIntraTD (int num_runs, int quad_combine); // 886 - 1123
//void setImgBuffersShifted(int is_bayer, int image_dx, int image_dy); // 1171-1188
void testCorrelate2DInterSelf(int num_runs); // 1136 - 1411 void testCorrelate2DInterSelf(int num_runs); // 1136 - 1411
void testTextures (int num_runs, int use_dp, int debug); // 1422-1664 void testTextures (int num_runs, int use_dp, int debug); // 1422-1664
void testTexturesRGBA (int num_runs, int use_dp, int debug); // 1669-1810 void testTexturesRGBA (int num_runs, int use_dp, int debug); // 1669-1810
...@@ -149,7 +158,7 @@ private: ...@@ -149,7 +158,7 @@ private:
float * getCorrImg (int corr_img_size, int num_corr_indices, int * cpu_corr_indices, float * cpu_corr, int num_sel_sensors); float * getCorrImg (int corr_img_size, int num_corr_indices, int * cpu_corr_indices, float * cpu_corr, int num_sel_sensors);
float * getCorrTdImg(int corr_img_size, int num_corr_indices, int * cpu_corr_indices, float * cpu_corr_td, int num_sel_sensors); float * getCorrTdImg(int corr_img_size, int num_corr_indices, int * cpu_corr_indices, float * cpu_corr_td, int num_sel_sensors);
void generate_RBGA_host( void generate_RBGA_host( // not a member
int num_cams, // number of cameras used int num_cams, // number of cameras used
// Parameters to generate texture tasks // Parameters to generate texture tasks
float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16p// struct tp_task * gpu_tasks, float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16p// struct tp_task * gpu_tasks,
...@@ -183,6 +192,7 @@ private: ...@@ -183,6 +192,7 @@ private:
void gfree(struct corr_vector * p); void gfree(struct corr_vector * p);
void gfree(struct trot_deriv * p); void gfree(struct trot_deriv * p);
void gfree(float ** p); void gfree(float ** p);
void gfree(struct CltExtra ** p);
}; };
......
...@@ -97,8 +97,6 @@ ...@@ -97,8 +97,6 @@
* *
* \return Status code * \return Status code
*/ */
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
// //
...@@ -106,41 +104,40 @@ int main(int argc, char **argv) ...@@ -106,41 +104,40 @@ int main(int argc, char **argv)
// //
printf("%s Starting...\n\n", argv[0]); printf("%s Starting...\n\n", argv[0]);
printf("sizeof(float*)=%d\n",(int)sizeof(float*)); printf("sizeof(float*)=%d\n",(int)sizeof(float*));
//initialize CUDA //initialize CUDA
findCudaDevice(argc, (const char **)argv); findCudaDevice(argc, (const char **)argv);
float fat_zero = 1000.0f; // 300.0f; // 30.0; float fat_zero = 1000.0f; // 300.0f; // 30.0;
int is_bayer = 0; // from 1136
int image_dx = 2;
int image_dy = 0;
#if TEST_LWIR #if TEST_LWIR
int use_lwir= 1; int use_lwir= 1;
#else #else
int use_lwir= 0; int use_lwir= 0;
#endif #endif
class YourClass
{
std::vector<int> myVector;
// ...
};
TpParams tpParams(use_lwir); TpParams tpParams(use_lwir);
TpPaths tpPaths(use_lwir); TpPaths tpPaths(use_lwir);
TpHostGpu tpHostGpu(tpParams,tpPaths); TpHostGpu tpHostGpu(tpParams,tpPaths);
/* */
GenerateRgbaHost generateRgbaHost{}; // = new GenerateRgbaHost(); tpHostGpu.allTests(
10, // int num_runs,
2, // int image_dx, // 2
0, // int image_dy, // 0
0.0, // const float target_disparity, // DBG_DISPARITY == 0.0
0.0, // const float scale, // 0.0
0, // int quad_combine,
0, // int use_dp,
0); // int debug);
return 0;
/* */
GenerateRgbaHost generateRgbaHost{}; // = new GenerateRgbaHost(); Remove files, use tpHostGpu::
// return 0; // return 0;
float * host_kern_buf = (float *) malloc(tpParams.kern_size * sizeof(float)); float * host_kern_buf = (float *) malloc(tpParams.kern_size * sizeof(float));
float * ftask_data = (float *) malloc(TILESX * TILESY * tpParams.task_size * sizeof(float)); float * ftask_data = (float *) malloc(TILESX * TILESY * tpParams.task_size * sizeof(float));
float * ftask_data1 = (float *) malloc(TILESX * TILESY * tpParams.task_size * sizeof(float)); float * ftask_data1 = (float *) malloc(TILESX * TILESY * tpParams.task_size * sizeof(float));
trot_deriv rot_deriv; trot_deriv rot_deriv;
int texture_indices [TILESX*TILESYA]; int texture_indices [TILESX*TILESYA];
int cpu_woi [4]; int cpu_woi [4];
// host array of pointers to GPU memory // host array of pointers to GPU memory
float * gpu_kernels_h [tpParams.num_cams]; float * gpu_kernels_h [tpParams.num_cams];
struct CltExtra * gpu_kernel_offsets_h [tpParams.num_cams]; struct CltExtra * gpu_kernel_offsets_h [tpParams.num_cams];
...@@ -148,14 +145,12 @@ int main(int argc, char **argv) ...@@ -148,14 +145,12 @@ int main(int argc, char **argv)
float tile_coords_h [tpParams.num_cams][TILESX * TILESY][2]; float tile_coords_h [tpParams.num_cams][TILESX * TILESY][2];
float * gpu_clt_h [tpParams.num_cams]; float * gpu_clt_h [tpParams.num_cams];
float * gpu_corr_images_h [tpParams.num_cams]; float * gpu_corr_images_h [tpParams.num_cams];
float * gpu_corrs; // correlation tiles (per tile, per pair) in pixel domain float * gpu_corrs; // correlation tiles (per tile, per pair) in pixel domain
float * gpu_corrs_td; // correlation tiles (per tile, per pair) in transform domain float * gpu_corrs_td; // correlation tiles (per tile, per pair) in transform domain
int * gpu_corr_indices; // shared by gpu_corrs gpu_corrs_td int * gpu_corr_indices; // shared by gpu_corrs gpu_corrs_td
float * gpu_corrs_combo; // correlation tiles combined (1 per tile), pixel domain float * gpu_corrs_combo; // correlation tiles combined (1 per tile), pixel domain
float * gpu_corrs_combo_td; // correlation tiles combined (1 per tile), transform domain float * gpu_corrs_combo_td; // correlation tiles combined (1 per tile), transform domain
int * gpu_corrs_combo_indices; // shared by gpu_corrs_combo and gpu_corrs_combo_td int * gpu_corrs_combo_indices; // shared by gpu_corrs_combo and gpu_corrs_combo_td
float * gpu_textures; float * gpu_textures;
float * gpu_diff_rgb_combo; float * gpu_diff_rgb_combo;
float * gpu_textures_rbga; float * gpu_textures_rbga;
...@@ -176,7 +171,6 @@ int main(int argc, char **argv) ...@@ -176,7 +171,6 @@ int main(int argc, char **argv)
float ** gpu_corr_images; // [NUM_CAMS]; float ** gpu_corr_images; // [NUM_CAMS];
// GPU pointers to GPU memory // GPU pointers to GPU memory
float * gpu_ftasks; // TODO: ***** allocate ! **** DONE float * gpu_ftasks; // TODO: ***** allocate ! **** DONE
int * gpu_active_tiles; int * gpu_active_tiles;
...@@ -344,7 +338,7 @@ int main(int argc, char **argv) ...@@ -344,7 +338,7 @@ int main(int argc, char **argv)
for (int tx = 0; tx < TILESX; tx++){ for (int tx = 0; tx < TILESX; tx++){
int nt = ty * TILESX + tx; int nt = ty * TILESX + tx;
float *tp = ftask_data + tpParams.task_size * nt; float *tp = ftask_data + tpParams.task_size * nt;
int cm = (*(int *) tp) & TASK_TEXTURE_BITS; // non-zero any of 4 lower task bits int cm = (*(int *) tp) & (TASK_TEXTURE_BITS | (1 << TASK_TEXT_EN)); // non-zero any of 8 lower task bits or bit 8 (TASK_TEXT_EN)
if (cm){ if (cm){
texture_indices[num_textures++] = (nt << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT); // setting 0x80 in texture indices texture_indices[num_textures++] = (nt << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT); // setting 0x80 in texture indices
} }
...@@ -640,7 +634,7 @@ int main(int argc, char **argv) ...@@ -640,7 +634,7 @@ int main(int argc, char **argv)
IMG_HEIGHT, // int woi_height, IMG_HEIGHT, // int woi_height,
0, // KERNELS_HOR, // int kernels_hor, 0, // KERNELS_HOR, // int kernels_hor,
KERNELS_VERT, // int kernels_vert); KERNELS_VERT, // int kernels_vert);
gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated list of tiles
gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks
TILESX); // int tilesx) TILESX); // int tilesx)
printf("HOST: convert_direct() done\n"); printf("HOST: convert_direct() done\n");
...@@ -1138,9 +1132,9 @@ int main(int argc, char **argv) ...@@ -1138,9 +1132,9 @@ int main(int argc, char **argv)
int num_sel_sensors = 16; // 15; // 16; int num_sel_sensors = 16; // 15; // 16;
int num_pairs_inter = num_sel_sensors+1; int num_pairs_inter = num_sel_sensors+1;
num_corr_indices = num_pairs_inter * num_tiles; num_corr_indices = num_pairs_inter * num_tiles;
int is_bayer = 0; // int is_bayer = 0;
int image_dx = 2; // int image_dx = 2;
int image_dy = 0; // int image_dy = 0;
float * gpu_clt_ref_h [tpParams.num_cams]; float * gpu_clt_ref_h [tpParams.num_cams];
float ** gpu_clt_ref; // [NUM_CAMS]; float ** gpu_clt_ref; // [NUM_CAMS];
...@@ -1166,7 +1160,7 @@ int main(int argc, char **argv) ...@@ -1166,7 +1160,7 @@ int main(int argc, char **argv)
IMG_HEIGHT, // int woi_height, IMG_HEIGHT, // int woi_height,
KERNELS_HOR, // int kernels_hor, KERNELS_HOR, // int kernels_hor,
KERNELS_VERT, // int kernels_vert); KERNELS_VERT, // int kernels_vert);
gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated list of tiles
gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks
TILESX); // int tilesx) TILESX); // int tilesx)
...@@ -1208,7 +1202,7 @@ int main(int argc, char **argv) ...@@ -1208,7 +1202,7 @@ int main(int argc, char **argv)
IMG_HEIGHT, // int woi_height, IMG_HEIGHT, // int woi_height,
KERNELS_HOR, // int kernels_hor, KERNELS_HOR, // int kernels_hor,
KERNELS_VERT, // int kernels_vert); KERNELS_VERT, // int kernels_vert);
gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated list of tiles
gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks
TILESX); // int tilesx) TILESX); // int tilesx)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment