Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
2bfa77d0
Commit
2bfa77d0
authored
Nov 19, 2022
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Updating textures and related constants
parent
6a2b0886
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
18 additions
and
15 deletions
+18
-15
TileProcessor.cuh
src/TileProcessor.cuh
+13
-10
test_tp.cu
src/test_tp.cu
+2
-2
tp_defines.h
src/tp_defines.h
+3
-3
No files found.
src/TileProcessor.cuh
View file @
2bfa77d0
...
@@ -2749,8 +2749,10 @@ __global__ void gen_texture_list(
...
@@ -2749,8 +2749,10 @@ __global__ void gen_texture_list(
#endif // DEBUG12
#endif // DEBUG12
// *(gpu_texture_indices + buf_offset) = task | ((x + y * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
// *(gpu_texture_indices + buf_offset) = task | ((x + y * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
// keep only 8 LSBs of task, use higher 24 for task number
// keep only 8 LSBs of task, use higher 24 for task number
*(gpu_texture_indices + buf_offset) = (task & ((1 << CORR_NTILE_SHIFT) -1)) | ((x + y * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
// *(gpu_texture_indices + buf_offset) = (task & ((1 << TEXT_NTILE_SHIFT) -1)) | ((x + y * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
//CORR_NTILE_SHIFT
// keep only 4 lower task bits
*(gpu_texture_indices + buf_offset) = (task & TASK_TEXTURE_BITS) | ((x + y * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
//CORR_NTILE_SHIFT // TASK_TEXTURE_BITS
}
}
//inline __device__ int get_task_size(int num_cams){
//inline __device__ int get_task_size(int num_cams){
// return sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - num_cams);
// return sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - num_cams);
...
@@ -2822,7 +2824,7 @@ extern "C" __global__ void create_nonoverlap_list(
...
@@ -2822,7 +2824,7 @@ extern "C" __global__ void create_nonoverlap_list(
}
}
/// int cxy = gpu_tasks[num_tile].txy;
/// int cxy = gpu_tasks[num_tile].txy;
int cxy = get_task_txy(num_tile, gpu_ftasks, num_cams);
int cxy = get_task_txy(num_tile, gpu_ftasks, num_cams);
int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * width) <<
CORR
_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS;
int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * width) <<
TEXT
_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS;
// if (gpu_tasks[num_tile].task != 0) {
// if (gpu_tasks[num_tile].task != 0) {
if (task_task != 0) {
if (task_task != 0) {
nonoverlap_list[atomicAdd(pnonoverlap_length, 1)] = texture_task_code;
nonoverlap_list[atomicAdd(pnonoverlap_length, 1)] = texture_task_code;
...
@@ -3401,10 +3403,10 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3401,10 +3403,10 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
}
// get number of tile
// get number of tile
int tile_code = gpu_texture_indices[tile_indx + gpu_texture_indices_offset]; // Added for Java, no DP
int tile_code = gpu_texture_indices[tile_indx + gpu_texture_indices_offset]; // Added for Java, no DP
if ((tile_code & (1 <<
CORR
_TEXTURE_BIT)) == 0){
if ((tile_code & (1 <<
LIST
_TEXTURE_BIT)) == 0){
return; // nothing to do
return; // nothing to do
}
}
int tile_num = tile_code >>
CORR
_NTILE_SHIFT;
int tile_num = tile_code >>
TEXT
_NTILE_SHIFT;
#ifdef DEBUG7A
#ifdef DEBUG7A
__syncthreads();// __syncwarp();
__syncthreads();// __syncwarp();
if ((tile_num == DBG_TILE) && (threadIdx.x == 0) && (threadIdx.y == 0)){
if ((tile_num == DBG_TILE) && (threadIdx.x == 0) && (threadIdx.y == 0)){
...
@@ -3784,6 +3786,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3784,6 +3786,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
#endif // DEBUG12
#endif // DEBUG12
int alpha_mode = alphaIndex[tile_code]; // only 4 lowest bits
int alpha_mode = alphaIndex[tile_code]; // only 4 lowest bits
if (alpha_mode){ // only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ???
if (alpha_mode){ // only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ???
// TODO: calculate per-color average and add with (1.0-alphaFade) for colors only, no
for (int pass = 0; pass < 8; pass ++) {
for (int pass = 0; pass < 8; pass ++) {
int row = pass * 2 + (threadIdx.y >> 1);
int row = pass * 2 + (threadIdx.y >> 1);
int col = ((threadIdx.y & 1) << 3) + threadIdx.x;
int col = ((threadIdx.y & 1) << 3) + threadIdx.x;
...
@@ -3843,10 +3846,6 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3843,10 +3846,6 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
}
__syncthreads();// __syncwarp();
__syncthreads();// __syncwarp();
#endif // DEBUG12
#endif // DEBUG12
/// if (!border_tile ||
/// ((g_row >= 0) && (g_col >= 0) && (g_row < (DTT_SIZE * TILES-Y)) && (g_col < (DTT_SIZE * TILES-X)))){
/// ((g_row >= 0) && (g_col >= 0) && (g_row < height) && (g_col < (DTT_SIZE * TILES-X)))){
// always copy 3 (1) colors + alpha
// always copy 3 (1) colors + alpha
if (colors == 3){
if (colors == 3){
#pragma unroll
#pragma unroll
...
@@ -3871,7 +3870,11 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3871,7 +3870,11 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
int gi = row * DTT_SIZE2 + col;
int gi = row * DTT_SIZE2 + col;
float * mclt_dst_i = mclt_dst_ncam + i;
float * mclt_dst_i = mclt_dst_ncam + i;
for (int ncol = 0; ncol < colors; ncol++) {
for (int ncol = 0; ncol < colors; ncol++) {
*(mclt_dst_i + ncol * (MCLT_UNION_LEN)) *= alphaFade[alpha_mode][gi]; // reduce [tile_code] by LUT
// *(mclt_dst_i + ncol * (MCLT_UNION_LEN)) *= alphaFade[alpha_mode][gi]; // reduce [tile_code] by LUT
float a = alphaFade[alpha_mode][gi];
float v = a * (*(mclt_dst_i + ncol * (MCLT_UNION_LEN))) +
(1 - a) * ports_rgb_shared[ncol * num_cams + ncam];
*(mclt_dst_i + ncol * (MCLT_UNION_LEN)) = v; // see if ports_rgb_shared[] meeds scaling
}
}
}
}
}
}
...
...
src/test_tp.cu
View file @
2bfa77d0
...
@@ -1140,9 +1140,9 @@ int main(int argc, char **argv)
...
@@ -1140,9 +1140,9 @@ int main(int argc, char **argv)
for (int tx = 0; tx < TILESX; tx++){
for (int tx = 0; tx < TILESX; tx++){
int nt = ty * TILESX + tx;
int nt = ty * TILESX + tx;
float *tp = ftask_data + task_size * nt;
float *tp = ftask_data + task_size * nt;
int cm = (*(int *) tp) & TASK_TEXTURE_BITS;
int cm = (*(int *) tp) & TASK_TEXTURE_BITS;
// non-zero any of 4 lower task bits
if (cm){
if (cm){
texture_indices[num_textures++] = (nt << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
texture_indices[num_textures++] = (nt << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
// setting 0x80 in texture indices
}
}
}
}
}
}
...
...
src/tp_defines.h
View file @
2bfa77d0
...
@@ -62,15 +62,15 @@
...
@@ -62,15 +62,15 @@
#define CORR_NTILE_SHIFT 8 // higher bits - number of a pair, other bits tile number
#define CORR_NTILE_SHIFT 8 // higher bits - number of a pair, other bits tile number
// only lower bit will be used to request correlations, correlation mask will be common for all the scene
// only lower bit will be used to request correlations, correlation mask will be common for all the scene
//#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
//#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
#define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile
//
#define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile
#define TASK_CORR_BITS 4
#define TASK_CORR_BITS 4
#define TASK_TEXTURE_N_BIT 0 // Texture with North neighbor
#define TASK_TEXTURE_N_BIT 0 // Texture with North neighbor
#define TASK_TEXTURE_E_BIT 1 // Texture with East neighbor
#define TASK_TEXTURE_E_BIT 1 // Texture with East neighbor
#define TASK_TEXTURE_S_BIT 2 // Texture with South neighbor
#define TASK_TEXTURE_S_BIT 2 // Texture with South neighbor
#define TASK_TEXTURE_W_BIT 3 // Texture with West neighbor
#define TASK_TEXTURE_W_BIT 3 // Texture with West neighbor
//#define TASK_TEXTURE_BIT 3 // bit to request texture calculation int task field of struct tp_task
#define LIST_TEXTURE_BIT 7 // bit to request texture calculation
#define LIST_TEXTURE_BIT 7 // bit to request texture calculation
//#define CORR_OUT_RAD 7 // full tile (15x15), was 4 (9x9)
#define TEXT_NTILE_SHIFT 8 // tile number shift for texture calculation (will be different from CORR_NTILE_SHIFT!)
#define FAT_ZERO_WEIGHT 0.0001 // add to port weights to avoid nan
#define FAT_ZERO_WEIGHT 0.0001 // add to port weights to avoid nan
#define THREADS_DYNAMIC_BITS 5 // treads in block for CDP creation of the texture list
#define THREADS_DYNAMIC_BITS 5 // treads in block for CDP creation of the texture list
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment