Commit 32ed475a authored by Andrey Filippov's avatar Andrey Filippov

testing refactored

parent b469db50
......@@ -16,6 +16,12 @@
#include "GenerateRgbaHost.h"
#include "TpHostGpu.h"
#define MY_EXCEPTION(aMessage) \
{ \
std::ostringstream stream; \
stream << "ERROR in " << __func__ << ": "<< aMessage << ", file " <<__FILE__ << " line " << __LINE__; \
throw std::runtime_error(stream.str()); \
}
TpHostGpu::~TpHostGpu(){
hfree(m_host_kern_buf);
......@@ -31,18 +37,18 @@ TpHostGpu::~TpHostGpu(){
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
// Free used host memory, initialized as arrays of pointers
hfree(m_gpu_kernels_h[ncam]);
hfree(m_gpu_kernel_offsets_h[ncam]);
hfree(m_gpu_images_h[ncam]);
hfree(m_gpu_clt_h[ncam]);
hfree(m_gpu_corr_images_h[ncam]);
gfree(m_gpu_kernels_h[ncam]);
gfree(m_gpu_kernel_offsets_h[ncam]);
gfree(m_gpu_images_h[ncam]);
gfree(m_gpu_clt_h[ncam]);
gfree(m_gpu_corr_images_h[ncam]);
// Free used GPU memory initialized as **
if (m_gpu_kernels) gfree(m_gpu_kernels[ncam]);
if (m_gpu_kernel_offsets) gfree(m_gpu_kernel_offsets[ncam]);
if (m_gpu_images) gfree(m_gpu_images[ncam]);
if (m_gpu_clt) gfree(m_gpu_clt[ncam]);
if (m_gpu_corr_images) gfree(m_gpu_corr_images[ncam]);
}
if (m_gpu_kernels) gfree(m_gpu_kernels);
if (m_gpu_kernel_offsets) gfree(m_gpu_kernel_offsets);
if (m_gpu_images) gfree(m_gpu_images);
if (m_gpu_clt) gfree(m_gpu_clt);
if (m_gpu_corr_images) gfree(m_gpu_corr_images);
gfree(m_gpu_corrs);
gfree(m_gpu_corrs_td);
gfree(m_gpu_corrs_combo);
......@@ -66,7 +72,74 @@ TpHostGpu::~TpHostGpu(){
return;
};
void TpHostGpu::setImageKernels(){
void TpHostGpu::allTests(
int num_runs,
int image_dx,
int image_dy,
const float target_disparity,
const float scale,
int quad_combine,
int use_dp,
int debug){
int is_bayer = m_tpParams.num_colors > 1;
// Prepere all but setImgBuffersShifted() // Is it important?
setImageKernels(); // 233-258 (overlap)
setCltBuffers(); // 246
setCorrImgBuffers(); // 252
setImgBuffers(); // 283-292
//setImgBuffersShifted(int is_bayer, int image_dx, int image_dy); // 1171-1188
setGeometryCorrectionBuffers(); // 207-231
setCorrelationBuffers(); // 260-281 , 332-333
setTasks (target_disparity,scale); // 129, 302-325
setTextures(); // 337-348, ??
setRGBA(); // 377 - 390
testRotMatrices (num_runs); // 420
testReverseDistortions (num_runs); // 468
testGeomCorrect (num_runs); // 534
testConvertDirect (num_runs); // 608
testImcltRbgAll (num_runs); // 701
testCorrelate2DIntra (num_runs); // 762 - 885
testCorrelate2DIntraTD (num_runs, quad_combine); // 886 - 1123
setImgBuffersShifted(is_bayer, image_dx, image_dy); // 1171-1188
testCorrelate2DInterSelf(num_runs); // 1136 - 1411
testTextures (num_runs, use_dp, debug); // 1422-1664
testTexturesRGBA (num_runs, use_dp, debug); // 1669-1810
return;
/*
void setImageKernels(); // 233-258 (overlap)
void setCltBuffers(); // 246
void setCorrImgBuffers(); // 252
void setImgBuffers(); // 283-292
void setImgBuffersShifted(int is_bayer, int image_dx, int image_dy); // 1171-1188
void setGeometryCorrectionBuffers(); // 207-231
void setCorrelationBuffers(); // 260-281 , 332-333
void setTasks(const float target_disparity, const float scale); // 129, 302-325
void setTextures(); // 337-348, ??
void setRGBA(); // 377 - 390
trot_deriv testRotMatrices (int num_runs); // 420
void testReverseDistortions (int num_runs); // 468
void testGeomCorrect (int num_runs); // 534
void testConvertDirect (int num_runs); // 608
// void testImclt (int num_runs); // 682 // not implemented
void testImcltRbgAll (int num_runs); // 701
void testCorrelate2DIntra (int num_runs); // 762 - 885
void testCorrelate2DIntraTD (int num_runs, int quad_combine); // 886 - 1123
//void setImgBuffersShifted(int is_bayer, int image_dx, int image_dy); // 1171-1188
void testCorrelate2DInterSelf(int num_runs); // 1136 - 1411
void testTextures (int num_runs, int use_dp, int debug); // 1422-1664
void testTexturesRGBA (int num_runs, int use_dp, int debug); // 1669-1810
*/
}
void TpHostGpu::setImageKernels(){ // 233-258 (overlap)
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
readFloatsFromFile(
m_host_kern_buf, // float * data, // allocated array
......@@ -82,7 +155,7 @@ void TpHostGpu::setImageKernels(){
m_gpu_kernels = copyalloc_pointers_gpu (m_gpu_kernels_h, m_tpParams.num_cams); // NUM_CAMS);
m_gpu_kernel_offsets = (struct CltExtra **) copyalloc_pointers_gpu ((float **) m_gpu_kernel_offsets_h, m_tpParams.num_cams); // NUM_CAMS);
}
void TpHostGpu::setCltBuffers(){
void TpHostGpu::setCltBuffers(){ // 246
const int slice_size {m_tpParams.tilesy * m_tpParams.tilesx * m_tpParams.num_colors * 4 * m_tpParams.dtt_size * m_tpParams.dtt_size};
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
// will get results back
......@@ -91,9 +164,13 @@ void TpHostGpu::setCltBuffers(){
printf("Allocating GPU memory, 0x%x floats\n", slice_size) ;
}
m_gpu_clt = copyalloc_pointers_gpu (m_gpu_clt_h, m_tpParams.num_cams); // NUM_CAMS);
checkCudaErrors (cudaMalloc((void **)&m_gpu_active_tiles, m_tpParams.tilesx * m_tpParams.tilesy * sizeof(int)));
checkCudaErrors (cudaMalloc((void **)&m_gpu_num_active, sizeof(int)));
}
void TpHostGpu::setCorrImgBuffers(){
void TpHostGpu::setCorrImgBuffers(){ // 252
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
// allocate result images (3x height to accommodate 3 colors
// Image is extended by 4 pixels each side to avoid checking (mclt tiles extend by 4)
......@@ -106,7 +183,7 @@ void TpHostGpu::setCorrImgBuffers(){
m_gpu_corr_images = copyalloc_pointers_gpu (m_gpu_corr_images_h, m_tpParams.num_cams); // NUM_CAMS);
}
void TpHostGpu::setImgBuffers(){
void TpHostGpu::setImgBuffers(){ // 283-292
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
readFloatsFromFile(
m_host_kern_buf, // float * data, // allocated array
......@@ -120,7 +197,10 @@ void TpHostGpu::setImgBuffers(){
m_gpu_images = copyalloc_pointers_gpu (m_gpu_images_h, m_tpParams.num_cams); // NUM_CAMS);
}
void TpHostGpu::setImgBuffersShifted(int is_bayer, int image_dx, int image_dy) {
void TpHostGpu::setImgBuffersShifted( // 1171-1188
int is_bayer, // 0
int image_dx, // 2
int image_dy) { // 0
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
readFloatsFromFile(
m_host_kern_buf, // float * data, // allocated array
......@@ -149,7 +229,7 @@ void TpHostGpu::setImgBuffersShifted(int is_bayer, int image_dx, int image_dy) {
void TpHostGpu::setGeometryCorrectionBuffers() {
void TpHostGpu::setGeometryCorrectionBuffers() { // 207-231
readFloatsFromFile(
(float *) &m_fgeometry_correction, // float * data, // allocated array, no need to free
m_tpPaths.geometry_correction_file); // char * path) // file path
......@@ -169,8 +249,9 @@ void TpHostGpu::setGeometryCorrectionBuffers() {
m_rByRDist,
m_rByRDist_length);
checkCudaErrors(cudaMalloc((void **)&m_gpu_rot_deriv, sizeof(trot_deriv)));
m_ftask_data1 = (float *) malloc( m_tpParams.tilesx * m_tpParams.tilesy * m_tpParams.task_size * sizeof(float));
}
void TpHostGpu::setCorrelationBuffers(){
void TpHostGpu::setCorrelationBuffers(){ // 260-281 , 332-333
// allocates one correlation kernel per line (15x15 floats), number of rows - number of tiles * number of pairs
m_gpu_corrs = alloc_image_gpu(
&dstride_corr, // in bytes ! for one 2d phase correlation (padded 15x15x4 bytes)
......@@ -195,12 +276,21 @@ void TpHostGpu::setCorrelationBuffers(){
// just allocate
checkCudaErrors (cudaMalloc((void **)&m_gpu_corr_indices, m_tpParams.num_pairs * m_tpParams.tilesx * m_tpParams.tilesy * sizeof(int)));
checkCudaErrors (cudaMalloc((void **)&m_gpu_corrs_combo_indices, m_tpParams.tilesx * m_tpParams.tilesy * sizeof(int)));
checkCudaErrors (cudaMalloc((void **)&m_gpu_num_corr_tiles, sizeof(int)));
}
void TpHostGpu::setTasks(const float target_disparity, const float scale){
void TpHostGpu::setTasks( // 129, 302-325
const float target_disparity,
const float scale){
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
readFloatsFromFile(
(float *) &m_tile_coords_h[ncam],
m_tpPaths.ports_offs_xy_file[ncam]); // char * path) // file path
}
// allocate m_ftask_data
m_ftask_data = (float *) malloc( m_tpParams.tilesx * m_tpParams.tilesy * m_tpParams.task_size * sizeof(float));
// float * ftask_data1 = (float *) malloc(TILESX * TILESY * tpParams.task_size * sizeof(float));
// float * ftask_data1 = (float *) malloc(TILESX * TILESY * tpParams.task_size * sizeof(float));
// tasks for all tiles
for (int ty = 0; ty < m_tpParams.tilesy ; ty++){
......@@ -224,9 +314,9 @@ void TpHostGpu::setTasks(const float target_disparity, const float scale){
}
m_gpu_ftasks = (float *) copyalloc_kernel_gpu(m_ftask_data, m_tpParams.tp_tasks_size * m_tpParams.task_size); // (sizeof(struct tp_task)/sizeof(float)));
}
void TpHostGpu::setTextures(){
void TpHostGpu::setTextures(){ // 337-348, ??
if (!m_ftask_data) { // ftask_data is not initialized
throw std::runtime_error("Error: m_ftask_data is not initialized ");
MY_EXCEPTION("m_ftask_data is not initialized ");
}
m_tile_texture_layers = (m_tpParams.texture_colors + 1 + (m_tpParams.keep_texture_weights? (m_tpParams.num_cams + m_tpParams.texture_colors + 1): 0));
m_tile_texture_size = m_tile_texture_layers *256;
......@@ -236,7 +326,7 @@ void TpHostGpu::setTextures(){
for (int tx = 0; tx < m_tpParams.tilesx; tx++){
int nt = ty * m_tpParams.tilesx + tx;
float *tp = m_ftask_data + m_tpParams.task_size * nt;
int cm = (*(int *) tp) & m_tpParams.task_texture_bits; // non-zero any of 4 lower task bits
int cm = (*(int *) tp) & (m_tpParams.task_texture_bits | (1 << m_tpParams.task_text_en)); // non-zero any of 8 lower task bits or bit 8 (TASK_TEXT_EN)
if (cm){
m_texture_indices[m_num_textures++] = (nt << m_tpParams.text_ntile_shift) | (1 << m_tpParams.list_texture_bit); // setting 0x80 in texture indices
}
......@@ -254,7 +344,7 @@ void TpHostGpu::setTextures(){
m_tpParams.tilesx * m_tpParams.tilesy); // int height);
}
void TpHostGpu::setRGBA(){
void TpHostGpu::setRGBA(){ // 377 - 390
/*
// appears in setTextures
m_tile_texture_layers = (m_tpParams.texture_colors + 1 + (m_tpParams.keep_texture_weights? (m_tpParams.num_cams + m_tpParams.texture_colors + 1): 0));
......@@ -281,14 +371,14 @@ void TpHostGpu::setRGBA(){
(m_tpParams.num_colors + 1) * sizeof(float)));
}
trot_deriv TpHostGpu::testRotMatrices (int num_runs){ // 424
trot_deriv TpHostGpu::testRotMatrices (int num_runs){ // 420
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_correction_vector){
throw std::runtime_error("Error: m_gpu_correction_vector is not initialized ");
MY_EXCEPTION("m_gpu_correction_vector is not initialized ");
}
if (!m_gpu_rot_deriv){
throw std::runtime_error("Error: m_gpu_rot_deriv is not initialized ");
MY_EXCEPTION("m_gpu_rot_deriv is not initialized ");
}
dim3 threads_rot (3,3,3);
dim3 grid_rot (m_tpParams.num_cams, 1, 1);
......@@ -328,13 +418,13 @@ void TpHostGpu::testReverseDistortions (int num_runs){ // 468
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_geometry_correction){
throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized ");
MY_EXCEPTION("m_gpu_geometry_correction is not initialized ");
}
if (!m_gpu_rByRDist){
throw std::runtime_error("Error: m_gpu_rByRDist is not initialized ");
MY_EXCEPTION("m_gpu_rByRDist is not initialized ");
}
if (!m_rByRDist){
throw std::runtime_error("Error: m_rByRDist is not initialized ");
MY_EXCEPTION("m_rByRDist is not initialized ");
}
dim3 threads_rd(3,3,3);
dim3 grid_rd (NUM_CAMS, 1, 1); // can get rid of NUM_CAMS
......@@ -389,25 +479,25 @@ void TpHostGpu::testGeomCorrect (int num_runs){ // 534
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_ftasks){
throw std::runtime_error("Error: m_gpu_ftasks is not initialized ");
MY_EXCEPTION("m_gpu_ftasks is not initialized ");
}
if (!m_gpu_geometry_correction){
throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized ");
MY_EXCEPTION("m_gpu_geometry_correction is not initialized ");
}
if (!m_gpu_correction_vector){
throw std::runtime_error("Error: m_gpu_correction_vector is not initialized ");
MY_EXCEPTION("m_gpu_correction_vector is not initialized ");
}
if (!m_gpu_rByRDist){
throw std::runtime_error("Error: m_gpu_rByRDist is not initialized ");
MY_EXCEPTION("m_gpu_rByRDist is not initialized ");
}
if (!m_gpu_rot_deriv){
throw std::runtime_error("Error: m_gpu_rot_deriv is not initialized ");
MY_EXCEPTION("m_gpu_rot_deriv is not initialized ");
}
if (!m_ftask_data){
throw std::runtime_error("Error: m_ftask_data is not initialized ");
MY_EXCEPTION("m_ftask_data is not initialized ");
}
if (!m_ftask_data1){
throw std::runtime_error("Error: m_ftask_data1 is not initialized ");
MY_EXCEPTION("m_ftask_data1 is not initialized");
}
dim3 threads_geom(m_tpParams.num_cams, m_tpParams.tiles_per_block_geom, 1);
dim3 grid_geom ((m_tpParams.tp_tasks_size + m_tpParams.tiles_per_block_geom-1)/m_tpParams.tiles_per_block_geom, 1, 1);
......@@ -440,8 +530,8 @@ void TpHostGpu::testGeomCorrect (int num_runs){ // 534
sdkStopTimer(&timerGEOM);
float avgTimeGEOM = (float)sdkGetTimerValue(&timerGEOM) / (float)numIterations;
sdkDeleteTimer(&timerGEOM);
printf("Average TextureList run time =%f ms\n", avgTimeGEOM);
// printf("Average TextureList run time =%f ms\n", avgTimeGEOM);
printf("Average calculate_tiles_offsets run time =%f ms\n", avgTimeGEOM);
checkCudaErrors(cudaMemcpy( // copy modified/calculated tasks
m_ftask_data1,
m_gpu_ftasks,
......@@ -453,7 +543,7 @@ void TpHostGpu::testGeomCorrect (int num_runs){ // 534
struct tp_task * new_task = &task_data1[DBG_TILE];
#endif
if( m_tpParams.debug_tile) {
printf("old_task txy = 0x%x\n", *(int *) (m_ftask_data + m_tpParams.tp_tasks_size * m_tpParams.dbg_tile + 1)) ; // task_data [DBG_TILE].txy);
printf("old_task txy = 0x%x\n", *(int *) (m_ftask_data + m_tpParams.task_size * m_tpParams.dbg_tile + 1)) ; // task_data [DBG_TILE].txy);
printf("new_task txy = 0x%x\n", *(int *) (m_ftask_data1 + m_tpParams.task_size * m_tpParams.dbg_tile + 1)) ; // task_data1[DBG_TILE].txy);
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++){
printf("camera %d pX old %f new %f diff = %f\n", ncam,
......@@ -474,25 +564,25 @@ void TpHostGpu::testConvertDirect (int num_runs){ // 608
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_kernel_offsets){
throw std::runtime_error("Error: m_gpu_kernel_offsets is not initialized ");
MY_EXCEPTION("m_gpu_kernel_offsets is not initialized ");
}
if (!m_gpu_kernels){
throw std::runtime_error("Error: m_gpu_kernels is not initialized ");
MY_EXCEPTION("m_gpu_kernels is not initialized ");
}
if (!m_gpu_images){
throw std::runtime_error("Error: m_gpu_images is not initialized ");
MY_EXCEPTION("m_gpu_images is not initialized ");
}
if (!m_gpu_ftasks){
throw std::runtime_error("Error: m_gpu_ftasks is not initialized ");
MY_EXCEPTION("m_gpu_ftasks is not initialized ");
}
if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized ");
MY_EXCEPTION("m_gpu_clt is not initialized ");
}
if (!m_gpu_active_tiles){
throw std::runtime_error("Error: m_gpu_active_tiles is not initialized ");
MY_EXCEPTION("m_gpu_active_tiles is not initialized "); //*
}
if (!m_gpu_num_active){
throw std::runtime_error("Error: m_gpu_num_active is not initialized ");
MY_EXCEPTION("m_gpu_num_active is not initialized ");
}
//create and start CUDA timer
StopWatchInterface *timerTP = 0;
......@@ -524,7 +614,7 @@ void TpHostGpu::testConvertDirect (int num_runs){ // 608
m_tpParams.img_height, // IMG_HEIGHT, // int woi_height,
0, // m_tpParams.kernels_hor, // KERNELS_HOR, // int kernels_hor,
m_tpParams.kernels_hor, // KERNELS_VERT, // int kernels_vert);
m_gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles
m_gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated list of tiles
m_gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks
m_tpParams.tilesx); // TILESX); // int tilesx)
printf("HOST: convert_direct() done\n");
......@@ -557,7 +647,7 @@ void TpHostGpu::testImclt (int num_runs){ // 682
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_geometry_correction){
throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized ");
MY_EXCEPTION("m_gpu_geometry_correction is not initialized ");
}
}
*/
......@@ -565,10 +655,10 @@ void TpHostGpu::testImcltRbgAll (int num_runs){ // 701
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized ");
MY_EXCEPTION("m_gpu_clt is not initialized ");
}
if (!m_gpu_corr_images){
throw std::runtime_error("Error: m_gpu_corr_images is not initialized ");
MY_EXCEPTION("m_gpu_corr_images is not initialized ");
}
StopWatchInterface *timerIMCLT = 0;
sdkCreateTimer(&timerIMCLT);
......@@ -611,13 +701,13 @@ void TpHostGpu::testCorrelate2DIntra(int num_runs){
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized ");
MY_EXCEPTION("m_gpu_clt is not initialized ");
}
if (!m_gpu_ftasks) {
throw std::runtime_error("Error: m_gpu_ftasks is not initialized ");
MY_EXCEPTION("m_gpu_ftasks is not initialized ");
}
if (!m_gpu_corrs) {
throw std::runtime_error("Error: m_gpu_corrs is not initialized ");
MY_EXCEPTION("m_gpu_corrs is not initialized ");
}
StopWatchInterface *timerCORR = 0;
sdkCreateTimer(&timerCORR);
......@@ -679,28 +769,28 @@ void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886
int i0 = m_tpParams.debug_tile ? 0 : -1;
// check/replace names
if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized ");
MY_EXCEPTION("m_gpu_clt is not initialized ");
}
if (!m_gpu_corrs_td){
throw std::runtime_error("Error: m_gpu_corrs_td is not initialized ");
MY_EXCEPTION("m_gpu_corrs_td is not initialized ");
}
if (!m_gpu_ftasks){
throw std::runtime_error("Error: m_gpu_ftasks is not initialized ");
MY_EXCEPTION("m_gpu_ftasks is not initialized ");
}
if (!m_gpu_corr_indices){
throw std::runtime_error("Error: m_gpu_corr_indices is not initialized ");
MY_EXCEPTION("m_gpu_corr_indices is not initialized ");
}
if (!m_gpu_num_corr_tiles){
throw std::runtime_error("Error: m_gpu_num_corr_tiles is not initialized ");
MY_EXCEPTION("m_gpu_num_corr_tiles is not initialized ");
}
if (!m_gpu_corrs_combo_td){
throw std::runtime_error("Error: m_gpu_corrs_combo_td is not initialized ");
MY_EXCEPTION("m_gpu_corrs_combo_td is not initialized ");
}
if (!m_gpu_corrs_combo_indices){
throw std::runtime_error("Error: m_gpu_corrs_combo_indices is not initialized ");
MY_EXCEPTION("m_gpu_corrs_combo_indices is not initialized ");
}
if (!m_gpu_corrs_combo){
throw std::runtime_error("Error: m_gpu_corrs_combo is not initialized ");
MY_EXCEPTION("m_gpu_corrs_combo is not initialized ");
}
//m_gpu_corrs_combo
// testing corr
......@@ -871,7 +961,7 @@ void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886
corr_img_size, // int corr_img_size,
num_corr_indices, //int num_corr_indices,
cpu_corr_indices, // int * cpu_corr_indices,
m_gpu_corrs_td, // float * cpu_corr,
cpu_corr_td, // float * cpu_corr,
16); // num_sel_sensors); // int num_sel_sensors) // // Will not be used
printf("Writing intrascene phase correlation TD data tp %s\n", m_tpPaths.result_intrascene_td);
......@@ -892,13 +982,13 @@ void TpHostGpu::testCorrelate2DInterSelf(int num_runs){ // 889
int i0 = m_tpParams.debug_tile ? 0 : -1;
// check/replace names
if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized ");
MY_EXCEPTION("m_gpu_clt is not initialized ");
}
if (!m_gpu_ftasks) {
throw std::runtime_error("Error: m_gpu_ftasks is not initialized ");
MY_EXCEPTION("m_gpu_ftasks is not initialized ");
}
if (!m_gpu_corrs) {
throw std::runtime_error("Error: m_gpu_corrs is not initialized ");
MY_EXCEPTION("m_gpu_corrs is not initialized ");
}
int sel_sensors = 0xffff; // 0x7fff; // 0xffff;
int num_sel_sensors = 16; // 15; // 16;
......@@ -932,7 +1022,7 @@ void TpHostGpu::testCorrelate2DInterSelf(int num_runs){ // 889
m_tpParams.img_height, // int woi_height,
m_tpParams.kernels_hor, // int kernels_hor,
m_tpParams.kernels_vert, //, // int kernels_vert);
m_gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles
m_gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated list of tiles
m_gpu_num_active, // int * pnum_active_tiles); // indices to gpu_tasks
m_tpParams.tilesx); // int tilesx)
getLastCudaError("Kernel execution failed");
......@@ -957,7 +1047,7 @@ void TpHostGpu::testCorrelate2DInterSelf(int num_runs){ // 889
m_tpParams.img_height, // int woi_height,
m_tpParams.kernels_hor, // int kernels_hor,
m_tpParams.kernels_vert, //, // int kernels_vert);
m_gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles
m_gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated list of tiles
m_gpu_num_active, // int * pnum_active_tiles); // indices to gpu_tasks
m_tpParams.tilesx); // int tilesx)
getLastCudaError("Kernel execution failed");
......@@ -1041,25 +1131,25 @@ void TpHostGpu::testTextures (
int i0 = m_tpParams.debug_tile ? 0 : -1;
// check/replace names
if (!m_gpu_ftasks){
throw std::runtime_error("Error: m_gpu_ftasks is not initialized ");
MY_EXCEPTION("m_gpu_ftasks is not initialized ");
}
if (!m_gpu_texture_indices){
throw std::runtime_error("Error: m_gpu_texture_indices is not initialized ");
MY_EXCEPTION("m_gpu_texture_indices is not initialized ");
}
if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized ");
MY_EXCEPTION("m_gpu_clt is not initialized ");
}
if (!m_gpu_geometry_correction){
throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized ");
MY_EXCEPTION("m_gpu_geometry_correction is not initialized ");
}
if (!m_gpu_textures){
throw std::runtime_error("Error: m_gpu_textures is not initialized ");
MY_EXCEPTION("m_gpu_textures is not initialized ");
}
if (!m_gpu_diff_rgb_combo){
throw std::runtime_error("Error: m_gpu_diff_rgb_combo is not initialized ");
MY_EXCEPTION("m_gpu_diff_rgb_combo is not initialized ");
}
if (!m_gpu_generate_RBGA_params){
throw std::runtime_error("Error: m_gpu_generate_RBGA_params is not initialized ");
MY_EXCEPTION("m_gpu_generate_RBGA_params is not initialized ");
}
dim3 threads0(m_tpParams.convert_direct_indexing_threads, 1, 1);
dim3 blocks0 ((m_tpParams.tp_tasks_size + m_tpParams.convert_direct_indexing_threads -1) >> m_tpParams.convert_direct_indexing_threads_log2,1, 1);
......@@ -1293,31 +1383,31 @@ void TpHostGpu::testTexturesRGBA (
int i0 = m_tpParams.debug_tile ? 0 : -1;
// check/replace names
if (!m_gpu_ftasks){
throw std::runtime_error("Error: m_gpu_ftasks is not initialized ");
MY_EXCEPTION("m_gpu_ftasks is not initialized ");
}
if (!m_gpu_texture_indices){
throw std::runtime_error("Error: m_gpu_texture_indices is not initialized ");
MY_EXCEPTION("m_gpu_texture_indices is not initialized ");
}
if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized ");
MY_EXCEPTION("m_gpu_clt is not initialized ");
}
if (!m_gpu_geometry_correction){
throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized ");
MY_EXCEPTION("m_gpu_geometry_correction is not initialized ");
}
if (!m_gpu_num_texture_tiles){
throw std::runtime_error("Error: m_gpu_num_texture_tiles is not initialized ");
MY_EXCEPTION("m_gpu_num_texture_tiles is not initialized ");
}
if (!m_gpu_textures_rbga){
throw std::runtime_error("Error: m_gpu_textures_rbga is not initialized ");
MY_EXCEPTION("m_gpu_textures_rbga is not initialized ");
}
if (!m_gpu_generate_RBGA_params){
throw std::runtime_error("Error: m_gpu_generate_RBGA_params is not initialized ");
MY_EXCEPTION("m_gpu_generate_RBGA_params is not initialized ");
}
if (!m_gpu_twh){
throw std::runtime_error("Error: m_gpu_twh is not initialized ");
MY_EXCEPTION("m_gpu_twh is not initialized ");
}
if (!m_gpu_woi){
throw std::runtime_error("Error: m_gpu_woi is not initialized ");
MY_EXCEPTION("m_gpu_woi is not initialized ");
}
dim3 threads_rgba(1, 1, 1);
dim3 grid_rgba(1,1,1);
......@@ -1597,7 +1687,7 @@ void TpHostGpu::saveInterCorrFile(
corr_img_size, // int corr_img_size,
num_corr_indices, //int num_corr_indices,
cpu_corr_indices, // int * cpu_corr_indices,
gpu_corrs_td, // float * cpu_corr,
cpu_corr_td, // float * cpu_corr,
num_sel_sensors); // int num_sel_sensors)
printf("Writing %s TD data to %s\n", prompt, path);
writeFloatsToFile(
......@@ -1705,7 +1795,7 @@ float * TpHostGpu::getCorrTdImg(
int src_offs = src_offs0 + iy * m_tpParams.dtt_size + iquad * m_tpParams.dtt_size * m_tpParams.dtt_size;
int dst_offs = dst_offs0 + (iy + m_tpParams.dtt_size * iqy)* (m_tpParams.tilesx * 16) + iqx * m_tpParams.dtt_size;
for (int ix = 0; ix < m_tpParams.dtt_size; ix++){
corr_img[dst_offs++] = cpu_corr_td[src_offs++];
corr_img[dst_offs++] = cpu_corr_td[src_offs++]; // segmentation
}
}
}
......@@ -1994,5 +2084,5 @@ void TpHostGpu::gfree(struct gc * p) {if (p) checkCudaErrors(cudaFree(p
void TpHostGpu::gfree(struct corr_vector * p) {if (p) checkCudaErrors(cudaFree(p));p = {};}
void TpHostGpu::gfree(struct trot_deriv * p) {if (p) checkCudaErrors(cudaFree(p));p = {};}
void TpHostGpu::gfree(float ** p) {if (p) checkCudaErrors(cudaFree(p));p = {};}
void TpHostGpu::gfree(struct CltExtra ** p) {if (p) checkCudaErrors(cudaFree(p));p = {};}
//CltExtra
......@@ -111,18 +111,27 @@ public:
,m_host_kern_buf{(float *) malloc(tpParams.kern_size * sizeof(float))}
{};
~TpHostGpu();
void setImageKernels();
void setCltBuffers();
void setCorrImgBuffers();
void setImgBuffers();
void setImgBuffersShifted(int is_bayer, int image_dx, int image_dy);
void setGeometryCorrectionBuffers();
void setCorrelationBuffers();
void setTasks(const float target_disparity, const float scale);
void setTextures();
void setRGBA();
trot_deriv testRotMatrices (int num_runs); // 424
void allTests(
int num_runs,
int image_dx, // 2
int image_dy, // 0
const float target_disparity, // DBG_DISPARITY == 0.0
const float scale, // 0.0
int quad_combine,
int use_dp,
int debug);
void setImageKernels(); // 233-258 (overlap)
void setCltBuffers(); // 246
void setCorrImgBuffers(); // 252
void setImgBuffers(); // 283-292
void setImgBuffersShifted(int is_bayer, int image_dx, int image_dy); // 1171-1188 SHOULD be called before testCorrelate2DInterSelf
void setGeometryCorrectionBuffers(); // 207-231
void setCorrelationBuffers(); // 260-281 , 332-333
void setTasks(const float target_disparity, const float scale); // 129, 302-325
void setTextures(); // 337-348, ??
void setRGBA(); // 377 - 390
trot_deriv testRotMatrices (int num_runs); // 420
void testReverseDistortions (int num_runs); // 468
void testGeomCorrect (int num_runs); // 534
void testConvertDirect (int num_runs); // 608
......@@ -131,8 +140,8 @@ public:
void testCorrelate2DIntra (int num_runs); // 762 - 885
void testCorrelate2DIntraTD (int num_runs, int quad_combine); // 886 - 1123
//void setImgBuffersShifted(int is_bayer, int image_dx, int image_dy); // 1171-1188
void testCorrelate2DInterSelf(int num_runs); // 1136 - 1411
void testTextures (int num_runs, int use_dp, int debug); // 1422-1664
void testTexturesRGBA (int num_runs, int use_dp, int debug); // 1669-1810
......@@ -149,7 +158,7 @@ private:
float * getCorrImg (int corr_img_size, int num_corr_indices, int * cpu_corr_indices, float * cpu_corr, int num_sel_sensors);
float * getCorrTdImg(int corr_img_size, int num_corr_indices, int * cpu_corr_indices, float * cpu_corr_td, int num_sel_sensors);
void generate_RBGA_host(
void generate_RBGA_host( // not a member
int num_cams, // number of cameras used
// Parameters to generate texture tasks
float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16p// struct tp_task * gpu_tasks,
......@@ -183,6 +192,7 @@ private:
void gfree(struct corr_vector * p);
void gfree(struct trot_deriv * p);
void gfree(float ** p);
void gfree(struct CltExtra ** p);
};
......
......@@ -97,8 +97,6 @@
*
* \return Status code
*/
int main(int argc, char **argv)
{
//
......@@ -106,41 +104,40 @@ int main(int argc, char **argv)
//
printf("%s Starting...\n\n", argv[0]);
printf("sizeof(float*)=%d\n",(int)sizeof(float*));
//initialize CUDA
findCudaDevice(argc, (const char **)argv);
float fat_zero = 1000.0f; // 300.0f; // 30.0;
int is_bayer = 0; // from 1136
int image_dx = 2;
int image_dy = 0;
#if TEST_LWIR
int use_lwir= 1;
#else
int use_lwir= 0;
#endif
class YourClass
{
std::vector<int> myVector;
// ...
};
TpParams tpParams(use_lwir);
TpPaths tpPaths(use_lwir);
TpHostGpu tpHostGpu(tpParams,tpPaths);
GenerateRgbaHost generateRgbaHost{}; // = new GenerateRgbaHost();
/* */
tpHostGpu.allTests(
10, // int num_runs,
2, // int image_dx, // 2
0, // int image_dy, // 0
0.0, // const float target_disparity, // DBG_DISPARITY == 0.0
0.0, // const float scale, // 0.0
0, // int quad_combine,
0, // int use_dp,
0); // int debug);
return 0;
/* */
GenerateRgbaHost generateRgbaHost{}; // = new GenerateRgbaHost(); Remove files, use tpHostGpu::
// return 0;
float * host_kern_buf = (float *) malloc(tpParams.kern_size * sizeof(float));
float * ftask_data = (float *) malloc(TILESX * TILESY * tpParams.task_size * sizeof(float));
float * ftask_data1 = (float *) malloc(TILESX * TILESY * tpParams.task_size * sizeof(float));
trot_deriv rot_deriv;
int texture_indices [TILESX*TILESYA];
int cpu_woi [4];
// host array of pointers to GPU memory
float * gpu_kernels_h [tpParams.num_cams];
struct CltExtra * gpu_kernel_offsets_h [tpParams.num_cams];
......@@ -148,14 +145,12 @@ int main(int argc, char **argv)
float tile_coords_h [tpParams.num_cams][TILESX * TILESY][2];
float * gpu_clt_h [tpParams.num_cams];
float * gpu_corr_images_h [tpParams.num_cams];
float * gpu_corrs; // correlation tiles (per tile, per pair) in pixel domain
float * gpu_corrs_td; // correlation tiles (per tile, per pair) in transform domain
int * gpu_corr_indices; // shared by gpu_corrs gpu_corrs_td
float * gpu_corrs_combo; // correlation tiles combined (1 per tile), pixel domain
float * gpu_corrs_combo_td; // correlation tiles combined (1 per tile), transform domain
int * gpu_corrs_combo_indices; // shared by gpu_corrs_combo and gpu_corrs_combo_td
float * gpu_textures;
float * gpu_diff_rgb_combo;
float * gpu_textures_rbga;
......@@ -176,7 +171,6 @@ int main(int argc, char **argv)
float ** gpu_corr_images; // [NUM_CAMS];
// GPU pointers to GPU memory
float * gpu_ftasks; // TODO: ***** allocate ! **** DONE
int * gpu_active_tiles;
......@@ -344,7 +338,7 @@ int main(int argc, char **argv)
for (int tx = 0; tx < TILESX; tx++){
int nt = ty * TILESX + tx;
float *tp = ftask_data + tpParams.task_size * nt;
int cm = (*(int *) tp) & TASK_TEXTURE_BITS; // non-zero any of 4 lower task bits
int cm = (*(int *) tp) & (TASK_TEXTURE_BITS | (1 << TASK_TEXT_EN)); // non-zero any of 8 lower task bits or bit 8 (TASK_TEXT_EN)
if (cm){
texture_indices[num_textures++] = (nt << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT); // setting 0x80 in texture indices
}
......@@ -640,7 +634,7 @@ int main(int argc, char **argv)
IMG_HEIGHT, // int woi_height,
0, // KERNELS_HOR, // int kernels_hor,
KERNELS_VERT, // int kernels_vert);
gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles
gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated list of tiles
gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks
TILESX); // int tilesx)
printf("HOST: convert_direct() done\n");
......@@ -1138,9 +1132,9 @@ int main(int argc, char **argv)
int num_sel_sensors = 16; // 15; // 16;
int num_pairs_inter = num_sel_sensors+1;
num_corr_indices = num_pairs_inter * num_tiles;
int is_bayer = 0;
int image_dx = 2;
int image_dy = 0;
// int is_bayer = 0;
// int image_dx = 2;
// int image_dy = 0;
float * gpu_clt_ref_h [tpParams.num_cams];
float ** gpu_clt_ref; // [NUM_CAMS];
......@@ -1166,7 +1160,7 @@ int main(int argc, char **argv)
IMG_HEIGHT, // int woi_height,
KERNELS_HOR, // int kernels_hor,
KERNELS_VERT, // int kernels_vert);
gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles
gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated list of tiles
gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks
TILESX); // int tilesx)
......@@ -1208,7 +1202,7 @@ int main(int argc, char **argv)
IMG_HEIGHT, // int woi_height,
KERNELS_HOR, // int kernels_hor,
KERNELS_VERT, // int kernels_vert);
gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles
gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated list of tiles
gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks
TILESX); // int tilesx)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment