Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
bfa258e0
Commit
bfa258e0
authored
Dec 03, 2021
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
minor updates
parent
d85c42b5
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
13 additions
and
17 deletions
+13
-17
TileProcessor.cuh
src/TileProcessor.cuh
+4
-6
geometry_correction.h
src/geometry_correction.h
+0
-1
test_tp.cu
src/test_tp.cu
+4
-5
tp_defines.h
src/tp_defines.h
+5
-5
No files found.
src/TileProcessor.cuh
View file @
bfa258e0
...
@@ -1288,7 +1288,7 @@ extern "C" __global__ void correlate2D_inner(
...
@@ -1288,7 +1288,7 @@ extern "C" __global__ void correlate2D_inner(
// copy clt (frequency domain data)
// copy clt (frequency domain data)
float * clt_tile1 = ((float *) clt_tiles1) + corr_in_block * (4 * DTT_SIZE * DTT_SIZE1);
float * clt_tile1 = ((float *) clt_tiles1) + corr_in_block * (4 * DTT_SIZE * DTT_SIZE1);
float * clt_tile2 = ((float *) clt_tiles2) + corr_in_block * (4 * DTT_SIZE * DTT_SIZE1);
float * clt_tile2 = ((float *) clt_tiles2) + corr_in_block * (4 * DTT_SIZE * DTT_SIZE1);
int offs = (tile_num *
NUM_COLORS
+ color) * (4 * DTT_SIZE * DTT_SIZE) + threadIdx.x;
int offs = (tile_num *
colors
+ color) * (4 * DTT_SIZE * DTT_SIZE) + threadIdx.x;
float * gpu_tile1 = ((float *) gpu_clt[cam1]) + offs;
float * gpu_tile1 = ((float *) gpu_clt[cam1]) + offs;
float * gpu_tile2 = ((float *) gpu_clt[cam2]) + offs;
float * gpu_tile2 = ((float *) gpu_clt[cam2]) + offs;
float * clt_tile1i = clt_tile1 + threadIdx.x;
float * clt_tile1i = clt_tile1 + threadIdx.x;
...
@@ -1892,7 +1892,6 @@ extern "C" __global__ void corr2D_normalize_inner(
...
@@ -1892,7 +1892,6 @@ extern "C" __global__ void corr2D_normalize_inner(
*
*
* @param num_cams number of cameras
* @param num_cams number of cameras
* @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
* @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
//* @param gpu_tasks array of per-tile tasks (struct tp_task)
* @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param gpu_texture_indices allocated array - 1 integer per tile to process
* @param gpu_texture_indices allocated array - 1 integer per tile to process
* @param num_texture_tiles allocated array - 8 integers (may be reduced to 4 later)
* @param num_texture_tiles allocated array - 8 integers (may be reduced to 4 later)
...
@@ -2126,7 +2125,6 @@ __global__ void clear_texture_rbga(
...
@@ -2126,7 +2125,6 @@ __global__ void clear_texture_rbga(
*
*
* @param num_cams number of cameras
* @param num_cams number of cameras
* @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
* @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
//* @param gpu_tasks array of per-tile tasks (struct tp_task)
* @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param gpu_texture_indices allocated array - 1 integer per tile to process
* @param gpu_texture_indices allocated array - 1 integer per tile to process
* @param num_texture_tiles number of texture tiles to process (allocated 8-element integer array)
* @param num_texture_tiles number of texture tiles to process (allocated 8-element integer array)
...
@@ -2243,7 +2241,6 @@ __global__ void clear_texture_list(
...
@@ -2243,7 +2241,6 @@ __global__ void clear_texture_list(
*
*
* @param num_cams number of cameras <= NUM_CAMS
* @param num_cams number of cameras <= NUM_CAMS
* @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
* @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
//* @param gpu_tasks array of per-tile tasks (struct tp_task)
* @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param width number of tiles in a row
* @param width number of tiles in a row
* @param gpu_texture_indices allocated array - 1 integer per tile to process
* @param gpu_texture_indices allocated array - 1 integer per tile to process
...
@@ -2459,7 +2456,6 @@ __global__ void index_direct(
...
@@ -2459,7 +2456,6 @@ __global__ void index_direct(
*
*
* @param num_cams number of cameras <= NUM_CAMS
* @param num_cams number of cameras <= NUM_CAMS
* @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
* @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
//* @param gpu_tasks array of per-tile tasks (struct tp_task)
* @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param width number of tiles in a row
* @param width number of tiles in a row
* @param nonoverlap_list integer array to place the generated list
* @param nonoverlap_list integer array to place the generated list
...
@@ -3338,7 +3334,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3338,7 +3334,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
// always copy 3 (1) colors + alpha
// always copy 3 (1) colors + alpha
if (colors == 3){
if (colors == 3){
#pragma unroll
#pragma unroll
for (int ncol = 0; ncol <
NUM_COLORS
+ 1; ncol++) { // 4
for (int ncol = 0; ncol <
colors
+ 1; ncol++) { // 4
*(gpu_texture_rbg_gi + ncol * slice_stride) += *(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21));
*(gpu_texture_rbg_gi + ncol * slice_stride) += *(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21));
}
}
} else { // assuming colors = 1
} else { // assuming colors = 1
...
@@ -3970,7 +3966,9 @@ __device__ void convertCorrectTile(
...
@@ -3970,7 +3966,9 @@ __device__ void convertCorrectTile(
int kernels_vert,
int kernels_vert,
int tilesx)
int tilesx)
{
{
#ifdef DEBUG30
int dbg_tile = (num_colors & 16) != 0;
int dbg_tile = (num_colors & 16) != 0;
#endif
num_colors &= 7;
num_colors &= 7;
// int tilesx = TILES-X;
// int tilesx = TILES-X;
int is_mono = num_colors == 1;
int is_mono = num_colors == 1;
...
...
src/geometry_correction.h
View file @
bfa258e0
...
@@ -65,7 +65,6 @@ struct tp_task {
...
@@ -65,7 +65,6 @@ struct tp_task {
float
centerXY
[
2
];
// "ideal" centerX, centerY to use instead of the uniform tile centers (txy) for interscene accumulation
float
centerXY
[
2
];
// "ideal" centerX, centerY to use instead of the uniform tile centers (txy) for interscene accumulation
// if isnan(centerXY[0]), then txy is used to calculate centerXY and all xy
// if isnan(centerXY[0]), then txy is used to calculate centerXY and all xy
float
xy
[
NUM_CAMS
][
2
];
float
xy
[
NUM_CAMS
][
2
];
// float target_disparity;
float
disp_dist
[
NUM_CAMS
][
4
];
// calculated with getPortsCoordinates()
float
disp_dist
[
NUM_CAMS
][
4
];
// calculated with getPortsCoordinates()
};
};
...
...
src/test_tp.cu
View file @
bfa258e0
...
@@ -892,8 +892,8 @@ int main(int argc, char **argv)
...
@@ -892,8 +892,8 @@ int main(int argc, char **argv)
#define TEST_REVERSE_DISTORTIONS
#define TEST_REVERSE_DISTORTIONS
#ifdef TEST_REVERSE_DISTORTIONS
#ifdef TEST_REVERSE_DISTORTIONS
dim3 threads_rd(3,3,3);
dim3 threads_rd(3,3,3);
// dim3 grid_rd (NUM_CAMS, 1, 1);
dim3 grid_rd (NUM_CAMS, 1, 1); // can get rid of NUM_CAMS
dim3 grid_rd (num_cams, 1, 1);
//
dim3 grid_rd (num_cams, 1, 1);
printf("REVERSE DISTORTIONS: threads_list=(%d, %d, %d)\n",threads_rd.x,threads_rd.y,threads_rd.z);
printf("REVERSE DISTORTIONS: threads_list=(%d, %d, %d)\n",threads_rd.x,threads_rd.y,threads_rd.z);
printf("REVERSE DISTORTIONS: grid_list=(%d, %d, %d)\n",grid_rd.x,grid_rd.y,grid_rd.z);
printf("REVERSE DISTORTIONS: grid_list=(%d, %d, %d)\n",grid_rd.x,grid_rd.y,grid_rd.z);
...
@@ -1255,7 +1255,6 @@ int main(int argc, char **argv)
...
@@ -1255,7 +1255,6 @@ int main(int argc, char **argv)
color_weights[2], // 0.5, // float scale2, // scale for G
color_weights[2], // 0.5, // float scale2, // scale for G
30.0, // float fat_zero, // here - absolute
30.0, // float fat_zero, // here - absolute
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles) // number of tiles in task
tp_task_size, // int num_tiles) // number of tiles in task
TILESX, // int tilesx, // number of tile rows
TILESX, // int tilesx, // number of tile rows
gpu_corr_indices, // int * gpu_corr_indices, // packed tile+pair
gpu_corr_indices, // int * gpu_corr_indices, // packed tile+pair
...
...
src/tp_defines.h
View file @
bfa258e0
...
@@ -41,10 +41,10 @@
...
@@ -41,10 +41,10 @@
#ifndef JCUDA
#ifndef JCUDA
#include <stdio.h>
#include <stdio.h>
#define THREADSX (DTT_SIZE)
#define THREADSX (DTT_SIZE)
#define TEST_LWIR
1
#define TEST_LWIR
0
#define NUM_CAMS 16 // now maximal number of cameras
#define NUM_CAMS 16 // now maximal number of cameras
//#define NUM_PAIRS 6
//#define NUM_PAIRS 6
#define NUM_COLORS 1 //3
//
#define NUM_COLORS 1 //3
// kernels [num_cams][num_colors][KERNELS_HOR][KERNELS_VERT][4][64]
// kernels [num_cams][num_colors][KERNELS_HOR][KERNELS_VERT][4][64]
#if TEST_LWIR
#if TEST_LWIR
#define IMG_WIDTH 640
#define IMG_WIDTH 640
...
@@ -72,7 +72,7 @@
...
@@ -72,7 +72,7 @@
#define IMCLT_TILES_PER_BLOCK 4
#define IMCLT_TILES_PER_BLOCK 4
#define CORR_NTILE_SHIFT 8 // higher bits - number of a pair, other bits tile number
#define CORR_NTILE_SHIFT 8 // higher bits - number of a pair, other bits tile number
// only lower bit will be used to request correlations, correlation mask will be common for all the scene
// only lower bit will be used to request correlations, correlation mask will be common for all the scene
#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
//
#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
#define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile
#define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile
#define TASK_CORR_BITS 4
#define TASK_CORR_BITS 4
#define TASK_TEXTURE_N_BIT 0 // Texture with North neighbor
#define TASK_TEXTURE_N_BIT 0 // Texture with North neighbor
...
@@ -110,8 +110,8 @@
...
@@ -110,8 +110,8 @@
#define DBG_TILE_Y 36 // 88 // 121 // 69 // 111 // 66
#define DBG_TILE_Y 36 // 88 // 121 // 69 // 111 // 66
#define DBG_TILE (DBG_TILE_Y * 80 + DBG_TILE_X)
#define DBG_TILE (DBG_TILE_Y * 80 + DBG_TILE_X)
#else
#else
#define DBG_TILE_X 32 // 162 // 151 // 161 // 49
#define DBG_TILE_X
114 //
32 // 162 // 151 // 161 // 49
#define DBG_TILE_Y 88 // 121 // 69 // 111 // 66
#define DBG_TILE_Y
51 // 52 //
88 // 121 // 69 // 111 // 66
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#endif
#endif
#undef DBG_MARK_DBG_TILE
#undef DBG_MARK_DBG_TILE
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment