Commit de3c497a authored by Andrey Filippov's avatar Andrey Filippov

refactoring for CDP2

parent f8839287
This diff is collapsed.
......@@ -40,6 +40,9 @@
#include "tp_defines.h"
#include "dtt8x8.h"
#include "geometry_correction.h"
// #include "TileProcessor.h"
#endif // #ifndef JCUDA
#ifndef get_task_size
......@@ -337,9 +340,6 @@ extern "C" __global__ void calculate_tiles_offsets(
gpu_rot_deriv); // union trot_deriv * gpu_rot_deriv);
}
// __syncthreads();// __syncwarp();
// cudaDeviceSynchronize();
// cudaDeviceSynchronize();
}
......
......@@ -33,9 +33,9 @@
// all of the next 5 were disabled
#define NOCORR
#define NOCORR_TD
#define NOTEXTURES
#define NOTEXTURE_RGBA
#define NOTEXTURE_RGBAXXX
//#define NOTEXTURES
//#define NOTEXTURE_RGBA
//#define NOTEXTURE_RGBAXXX
#define SAVE_CLT
......@@ -574,11 +574,23 @@ void generate_RBGA_host(
int border_tile = pass >> 2;
int ntt = *(cpu_num_texture_tiles + ((pass & 3) << 1) + border_tile);
int *pntt = gpu_num_texture_tiles + ((pass & 3) << 1) + border_tile;
dim3 grid_texture((ntt + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1); // TEXTURE_TILES_PER_BLOCK = 1
/* before CDP2
int ti_offset = (pass & 3) * (width * (tilesya >> 2)); // (TILES-X * (TILES-YA >> 2)); // 1/4
if (border_tile){
ti_offset += width * (tilesya >> 2) - ntt; // TILES-X * (TILES-YA >> 2) - ntt;
}
*/
// for CDP2
int ti_offset = (pass & 3) * (width * (tilesya >> 2)); // (TILES-X * (TILES-YA >> 2)); // 1/4
if (border_tile){
// ti_offset += width * (tilesya >> 2) - ntt; // TILES-X * (TILES-YA >> 2) - ntt;
ti_offset += width * (tilesya >> 2); // TILES-X * (TILES-YA >> 2) - ntt;
ti_offset = - ti_offset; // does not depend on results of the previous kernel, but is negative
}
#ifdef DEBUG8A
printf("\ngenerate_RBGA() pass= %d, border_tile= %d, ti_offset= %d, ntt=%d\n",
pass, border_tile,ti_offset, ntt);
......@@ -604,7 +616,7 @@ void generate_RBGA_host(
num_cams, // int num_cams, // number of cameras used
gpu_woi, // int * woi, // x, y, width,height
gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
ntt, // size_t num_texture_tiles, // number of texture tiles to process
pntt, // ntt, // int * num_texture_tiles, // number of texture tiles to process
ti_offset, // gpu_texture_indices_offset,// add to gpu_texture_indices
gpu_texture_indices, // + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
// gpu_texture_indices + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
......@@ -1451,10 +1463,11 @@ int main(int argc, char **argv)
gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles
gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks
TILESX); // int tilesx)
printf("HOST: convert_direct() done\n");
getLastCudaError("Kernel execution failed");
printf("HOST: convert_direct() done - 1\n");
checkCudaErrors(cudaDeviceSynchronize());
printf("HOST: convert_direct() done - 2\n");
// printf("%d\n",i);
}
sdkStopTimer(&timerTP);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment