Commit f134cfa4 authored by Andrey Filippov's avatar Andrey Filippov

4 images with CDP

parent bd04c118
......@@ -2055,7 +2055,34 @@ __global__ void imclt_rbg_all(
int woi_theight,
const size_t dstride) // in floats (pixels)
{
dim3 threads_imclt(IMCLT_THREADS_PER_TILE, IMCLT_TILES_PER_BLOCK, 1);
if (threadIdx.x == 0) { // anyway 1,1,1
for (int ncam = 0; ncam < NUM_CAMS; ncam++) {
for (int color = 0; color < colors; color++) {
for (int v_offs = 0; v_offs < 2; v_offs++){
for (int h_offs = 0; h_offs < 2; h_offs++){
int tilesy_half = (woi_theight + (v_offs ^ 1)) >> 1;
int tilesx_half = (woi_twidth + (h_offs ^ 1)) >> 1;
int tiles_in_pass = tilesy_half * tilesx_half;
dim3 grid_imclt((tiles_in_pass + IMCLT_TILES_PER_BLOCK-1) / IMCLT_TILES_PER_BLOCK,1,1);
// printf("grid_imclt= (%d, %d, %d)\n",grid_imclt.x, grid_imclt.y, grid_imclt.z);
imclt_rbg<<<grid_imclt,threads_imclt>>>(
gpu_clt[ncam], // float * gpu_clt, // [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
gpu_corr_images[ncam], // float * gpu_rbg, // WIDTH, 3 * HEIGHT
1, // int apply_lpf,
colors, // int colors, // defines lpf filter
color, // int color, // defines location of clt data
v_offs, // int v_offset,
h_offs, // int h_offset,
woi_twidth, // int woi_twidth, // will increase by DTT_SIZE (todo - cut away?)
woi_theight, // int woi_theight, // will increase by DTT_SIZE (todo - cut away?)
dstride); // const size_t dstride); // in floats (pixels)
cudaDeviceSynchronize();
}
}
}
}
}
}
......
......@@ -109,6 +109,16 @@ extern "C" __global__ void textures_accumulate(
size_t texture_stride, // in floats (now 256*4 = 1024)
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
extern "C"
__global__ void imclt_rbg_all(
float ** gpu_clt, // [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float ** gpu_corr_images, // [NUM_CAMS][WIDTH, 3 * HEIGHT]
int apply_lpf,
int colors,
int woi_twidth,
int woi_theight,
const size_t dstride); // in floats (pixels)
extern "C" __global__ void imclt_rbg(
float * gpu_clt, // [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float * gpu_rbg, // WIDTH, 3 * HEIGHT
......@@ -151,5 +161,3 @@ __global__ void generate_RBGA(
const size_t texture_rbga_stride, // in floats
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
......@@ -823,8 +823,8 @@ int main(int argc, char **argv)
// testing imclt
dim3 threads_imclt(IMCLT_THREADS_PER_TILE, IMCLT_TILES_PER_BLOCK, 1);
printf("threads_imclt=(%d, %d, %d)\n",threads_imclt.x,threads_imclt.y,threads_imclt.z);
// dim3 threads_imclt(IMCLT_THREADS_PER_TILE, IMCLT_TILES_PER_BLOCK, 1);
// printf("threads_imclt=(%d, %d, %d)\n",threads_imclt.x,threads_imclt.y,threads_imclt.z);
StopWatchInterface *timerIMCLT = 0;
sdkCreateTimer(&timerIMCLT);
......@@ -836,7 +836,23 @@ int main(int argc, char **argv)
sdkResetTimer(&timerIMCLT);
sdkStartTimer(&timerIMCLT);
}
#define CDP1
#ifdef CDP1
dim3 threads_imclt_all(1, 1, 1);
dim3 grid_imclt_all(1, 1, 1);
printf("threads_imclt_all=(%d, %d, %d)\n",threads_imclt_all.x,threads_imclt_all.y,threads_imclt_all.z);
printf("grid_imclt_all= (%d, %d, %d)\n",grid_imclt_all.x, grid_imclt_all.y, grid_imclt_all.z);
imclt_rbg_all<<<grid_imclt_all,threads_imclt_all>>>(
gpu_clt, // float ** gpu_clt, // [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
gpu_corr_images, // float ** gpu_corr_images, // [NUM_CAMS][WIDTH, 3 * HEIGHT]
1, // int apply_lpf,
NUM_COLORS, // int colors, // defines lpf filter
TILESX, // int woi_twidth,
TILESY, // int woi_theight,
dstride_rslt/sizeof(float)); // const size_t dstride); // in floats (pixels)
#else
dim3 threads_imclt(IMCLT_THREADS_PER_TILE, IMCLT_TILES_PER_BLOCK, 1);
printf("threads_imclt=(%d, %d, %d)\n",threads_imclt.x,threads_imclt.y,threads_imclt.z);
for (int ncam = 0; ncam < NUM_CAMS; ncam++) {
for (int color = 0; color < NUM_COLORS; color++) {
for (int v_offs = 0; v_offs < 2; v_offs++){
......@@ -861,6 +877,7 @@ int main(int argc, char **argv)
}
}
}
#endif
getLastCudaError("Kernel failure");
checkCudaErrors(cudaDeviceSynchronize());
printf("test pass: %d\n",i);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment