Commit 2bfa77d0 authored by Andrey Filippov's avatar Andrey Filippov

Updating textures and related constants

parent 6a2b0886
......@@ -2749,8 +2749,10 @@ __global__ void gen_texture_list(
#endif // DEBUG12
// *(gpu_texture_indices + buf_offset) = task | ((x + y * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
// keep only 8 LSBs of task, use higher 24 for task number
*(gpu_texture_indices + buf_offset) = (task & ((1 << CORR_NTILE_SHIFT) -1)) | ((x + y * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
//CORR_NTILE_SHIFT
// *(gpu_texture_indices + buf_offset) = (task & ((1 << TEXT_NTILE_SHIFT) -1)) | ((x + y * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
// keep only 4 lower task bits
*(gpu_texture_indices + buf_offset) = (task & TASK_TEXTURE_BITS) | ((x + y * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
//CORR_NTILE_SHIFT // TASK_TEXTURE_BITS
}
//inline __device__ int get_task_size(int num_cams){
// return sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - num_cams);
......@@ -2822,7 +2824,7 @@ extern "C" __global__ void create_nonoverlap_list(
}
/// int cxy = gpu_tasks[num_tile].txy;
int cxy = get_task_txy(num_tile, gpu_ftasks, num_cams);
int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS;
int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS;
// if (gpu_tasks[num_tile].task != 0) {
if (task_task != 0) {
nonoverlap_list[atomicAdd(pnonoverlap_length, 1)] = texture_task_code;
......@@ -3401,10 +3403,10 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
// get number of tile
int tile_code = gpu_texture_indices[tile_indx + gpu_texture_indices_offset]; // Added for Java, no DP
if ((tile_code & (1 << CORR_TEXTURE_BIT)) == 0){
if ((tile_code & (1 << LIST_TEXTURE_BIT)) == 0){
return; // nothing to do
}
int tile_num = tile_code >> CORR_NTILE_SHIFT;
int tile_num = tile_code >> TEXT_NTILE_SHIFT;
#ifdef DEBUG7A
__syncthreads();// __syncwarp();
if ((tile_num == DBG_TILE) && (threadIdx.x == 0) && (threadIdx.y == 0)){
......@@ -3784,6 +3786,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
#endif // DEBUG12
int alpha_mode = alphaIndex[tile_code]; // only 4 lowest bits
if (alpha_mode){ // only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ???
// TODO: calculate per-color average and add with (1.0-alphaFade) for colors only, no
for (int pass = 0; pass < 8; pass ++) {
int row = pass * 2 + (threadIdx.y >> 1);
int col = ((threadIdx.y & 1) << 3) + threadIdx.x;
......@@ -3843,10 +3846,6 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
__syncthreads();// __syncwarp();
#endif // DEBUG12
/// if (!border_tile ||
/// ((g_row >= 0) && (g_col >= 0) && (g_row < (DTT_SIZE * TILES-Y)) && (g_col < (DTT_SIZE * TILES-X)))){
/// ((g_row >= 0) && (g_col >= 0) && (g_row < height) && (g_col < (DTT_SIZE * TILES-X)))){
// always copy 3 (1) colors + alpha
if (colors == 3){
#pragma unroll
......@@ -3871,7 +3870,11 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
int gi = row * DTT_SIZE2 + col;
float * mclt_dst_i = mclt_dst_ncam + i;
for (int ncol = 0; ncol < colors; ncol++) {
*(mclt_dst_i + ncol * (MCLT_UNION_LEN)) *= alphaFade[alpha_mode][gi]; // reduce [tile_code] by LUT
// *(mclt_dst_i + ncol * (MCLT_UNION_LEN)) *= alphaFade[alpha_mode][gi]; // reduce [tile_code] by LUT
float a = alphaFade[alpha_mode][gi];
float v = a * (*(mclt_dst_i + ncol * (MCLT_UNION_LEN))) +
(1 - a) * ports_rgb_shared[ncol * num_cams + ncam];
*(mclt_dst_i + ncol * (MCLT_UNION_LEN)) = v; // see if ports_rgb_shared[] meeds scaling
}
}
}
......
......@@ -1140,9 +1140,9 @@ int main(int argc, char **argv)
for (int tx = 0; tx < TILESX; tx++){
int nt = ty * TILESX + tx;
float *tp = ftask_data + task_size * nt;
int cm = (*(int *) tp) & TASK_TEXTURE_BITS;
int cm = (*(int *) tp) & TASK_TEXTURE_BITS; // non-zero any of 4 lower task bits
if (cm){
texture_indices[num_textures++] = (nt << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
texture_indices[num_textures++] = (nt << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT); // setting 0x80 in texture indices
}
}
}
......
......@@ -62,15 +62,15 @@
#define CORR_NTILE_SHIFT 8 // higher bits - number of a pair, other bits tile number
// only lower bit will be used to request correlations, correlation mask will be common for all the scene
//#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
#define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile
//#define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile
#define TASK_CORR_BITS 4
#define TASK_TEXTURE_N_BIT 0 // Texture with North neighbor
#define TASK_TEXTURE_E_BIT 1 // Texture with East neighbor
#define TASK_TEXTURE_S_BIT 2 // Texture with South neighbor
#define TASK_TEXTURE_W_BIT 3 // Texture with West neighbor
//#define TASK_TEXTURE_BIT 3 // bit to request texture calculation int task field of struct tp_task
#define LIST_TEXTURE_BIT 7 // bit to request texture calculation
//#define CORR_OUT_RAD 7 // full tile (15x15), was 4 (9x9)
#define TEXT_NTILE_SHIFT 8 // tile number shift for texture calculation (will be different from CORR_NTILE_SHIFT!)
#define FAT_ZERO_WEIGHT 0.0001 // add to port weights to avoid nan
#define THREADS_DYNAMIC_BITS 5 // treads in block for CDP creation of the texture list
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment