Commit 74a14618 authored by Andrey Filippov's avatar Andrey Filippov

getting rid of IMG_WIDTH,

parent d872b1ec
...@@ -1287,7 +1287,7 @@ extern "C" __global__ void generate_RBGA( ...@@ -1287,7 +1287,7 @@ extern "C" __global__ void generate_RBGA(
float diff_sigma = params[2]; // pixel value/pixel change float diff_sigma = params[2]; // pixel value/pixel change
float diff_threshold = params[3]; // pixel value/pixel change float diff_threshold = params[3]; // pixel value/pixel change
float min_agree = params[4]; // minimal number of channels to agree on a point (real number to work with fuzzy averages) float min_agree = params[4]; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
int tilesya = ((height +3) & (~3)); //#define TILESYA ((TILESY +3) & (~3))
dim3 threads0((1 << THREADS_DYNAMIC_BITS), 1, 1); dim3 threads0((1 << THREADS_DYNAMIC_BITS), 1, 1);
int blocks_x = (width + ((1 << THREADS_DYNAMIC_BITS) - 1)) >> THREADS_DYNAMIC_BITS; int blocks_x = (width + ((1 << THREADS_DYNAMIC_BITS) - 1)) >> THREADS_DYNAMIC_BITS;
dim3 blocks0 (blocks_x, height, 1); dim3 blocks0 (blocks_x, height, 1);
...@@ -1308,8 +1308,8 @@ extern "C" __global__ void generate_RBGA( ...@@ -1308,8 +1308,8 @@ extern "C" __global__ void generate_RBGA(
gpu_texture_indices); // packed tile + bits (now only (1 << 7) gpu_texture_indices); // packed tile + bits (now only (1 << 7)
cudaDeviceSynchronize(); cudaDeviceSynchronize();
// mark n/e/s/w used tiles from gpu_texture_indices memory to gpu_tasks lower 4 bits // mark n/e/s/w used tiles from gpu_texture_indices memory to gpu_tasks lower 4 bits
*(woi + 0) = TILESX; *(woi + 0) = width; // TILESX;
*(woi + 1) = TILESY; *(woi + 1) = height; // TILESY;
*(woi + 2) = 0; // maximal x *(woi + 2) = 0; // maximal x
*(woi + 3) = 0; // maximal y *(woi + 3) = 0; // maximal y
mark_texture_neighbor_tiles <<<blocks,threads>>>( mark_texture_neighbor_tiles <<<blocks,threads>>>(
...@@ -1365,9 +1365,9 @@ extern "C" __global__ void generate_RBGA( ...@@ -1365,9 +1365,9 @@ extern "C" __global__ void generate_RBGA(
int border_tile = pass >> 2; int border_tile = pass >> 2;
int ntt = *(num_texture_tiles + ((pass & 3) << 1) + border_tile); int ntt = *(num_texture_tiles + ((pass & 3) << 1) + border_tile);
dim3 grid_texture((ntt + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1); // TEXTURE_TILES_PER_BLOCK = 1 dim3 grid_texture((ntt + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1); // TEXTURE_TILES_PER_BLOCK = 1
int ti_offset = (pass & 3) * (TILESX * (TILESYA >> 2)); // 1/4 int ti_offset = (pass & 3) * (width * (tilesya >> 2)); // (TILESX * (TILESYA >> 2)); // 1/4
if (border_tile){ if (border_tile){
ti_offset += TILESX * (TILESYA >> 2) - ntt; ti_offset += width * (tilesya >> 2) - ntt;; // TILESX * (TILESYA >> 2) - ntt;
} }
#ifdef DEBUG12 #ifdef DEBUG12
printf("\ngenerate_RBGA() pass= %d, border_tile= %d, ti_offset= %d, ntt=%d\n", printf("\ngenerate_RBGA() pass= %d, border_tile= %d, ti_offset= %d, ntt=%d\n",
...@@ -1485,8 +1485,8 @@ __global__ void prepare_texture_list( ...@@ -1485,8 +1485,8 @@ __global__ void prepare_texture_list(
gpu_texture_indices); // packed tile + bits (now only (1 << 7) gpu_texture_indices); // packed tile + bits (now only (1 << 7)
cudaDeviceSynchronize(); cudaDeviceSynchronize();
// mark n/e/s/w used tiles from gpu_texture_indices memory to gpu_tasks lower 4 bits // mark n/e/s/w used tiles from gpu_texture_indices memory to gpu_tasks lower 4 bits
*(woi + 0) = TILESX; *(woi + 0) = width; // TILESX;
*(woi + 1) = TILESY; *(woi + 1) = height; // TILESY;
*(woi + 2) = 0; // maximal x *(woi + 2) = 0; // maximal x
*(woi + 3) = 0; // maximal y *(woi + 3) = 0; // maximal y
mark_texture_neighbor_tiles <<<blocks,threads>>>( mark_texture_neighbor_tiles <<<blocks,threads>>>(
...@@ -1538,7 +1538,7 @@ __global__ void clear_texture_list( ...@@ -1538,7 +1538,7 @@ __global__ void clear_texture_list(
if (col > width) { if (col > width) {
return; return;
} }
*(gpu_texture_indices + col + row * TILESX) = 0; *(gpu_texture_indices + col + row * width) = 0; // TILESX) = 0;
} }
/** /**
* Helper kernel for prepare_texture_list() (for generate_RBGA) - mark used tiles in * Helper kernel for prepare_texture_list() (for generate_RBGA) - mark used tiles in
...@@ -3173,12 +3173,16 @@ __device__ void convertCorrectTile( ...@@ -3173,12 +3173,16 @@ __device__ void convertCorrectTile(
int_topleft[1] = itly; int_topleft[1] = itly;
#ifdef DEBUG_OOB11 #ifdef DEBUG_OOB11
#ifdef IMG_WIDTH
#ifdef IMG_HEIGHT
if ((int_topleft[0] < 0) || (int_topleft[1] < 0) || (int_topleft[0] >= (IMG_WIDTH - DTT_SIZE)) || (int_topleft[1] >= IMG_HEIGHT - DTT_SIZE)){ if ((int_topleft[0] < 0) || (int_topleft[1] < 0) || (int_topleft[0] >= (IMG_WIDTH - DTT_SIZE)) || (int_topleft[1] >= IMG_HEIGHT - DTT_SIZE)){
printf("Source data OOB, left=%d, top=%d\n",int_topleft[0],int_topleft[1]); printf("Source data OOB, left=%d, top=%d\n",int_topleft[0],int_topleft[1]);
printf("\n"); printf("\n");
printf("\n"); printf("\n");
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
} }
#endif // IMG_HEIGHT
#endif // IMG_WIDTH
#endif // DEBUG_OOB1 #endif // DEBUG_OOB1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment