Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
bb7792f8
Commit
bb7792f8
authored
Aug 07, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
removing TILESX
parent
45b35601
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
11 deletions
+20
-11
TileProcessor.cuh
src/TileProcessor.cuh
+16
-9
TileProcessor.h
src/TileProcessor.h
+2
-1
test_tp.cu
src/test_tp.cu
+2
-1
No files found.
src/TileProcessor.cuh
View file @
bb7792f8
...
@@ -928,7 +928,9 @@ extern "C" __global__ void textures_accumulate(
...
@@ -928,7 +928,9 @@ extern "C" __global__ void textures_accumulate(
float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
size_t texture_stride, // in floats (now 256*4 = 1024)
size_t texture_stride, // in floats (now 256*4 = 1024)
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
float * gpu_diff_rgb_combo, //'); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
int tilesx)
;
// ====== end of local declarations ====
// ====== end of local declarations ====
...
@@ -1415,8 +1417,8 @@ extern "C" __global__ void generate_RBGA(
...
@@ -1415,8 +1417,8 @@ extern "C" __global__ void generate_RBGA(
gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
0, // size_t texture_stride, // in floats (now 256*4 = 1024)
0, // size_t texture_stride, // in floats (now 256*4 = 1024)
gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
(float *)0);//gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
(float *)0
, //
);//gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
width);
cudaDeviceSynchronize(); // not needed yet, just for testing
cudaDeviceSynchronize(); // not needed yet, just for testing
/* */
/* */
}
}
...
@@ -2036,9 +2038,11 @@ extern "C" __global__ void textures_nonoverlap(
...
@@ -2036,9 +2038,11 @@ extern "C" __global__ void textures_nonoverlap(
// combining both non-overlap and overlap (each calculated if pointer is not null )
// combining both non-overlap and overlap (each calculated if pointer is not null )
size_t texture_stride, // in floats (now 256*4 = 1024)
size_t texture_stride, // in floats (now 256*4 = 1024)
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
float * gpu_diff_rgb_combo, // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
int num_tilesx)
// num_tilesx in the end - worked, after num_tiles - did not compile with JIT in Eclipse
{
{
int num_tilesx = TILESX;
//
int num_tilesx = TILESX;
float min_shot = params[0]; // 10.0
float min_shot = params[0]; // 10.0
float scale_shot = params[1]; // 3.0
float scale_shot = params[1]; // 3.0
float diff_sigma = params[2]; // pixel value/pixel change
float diff_sigma = params[2]; // pixel value/pixel change
...
@@ -2081,7 +2085,8 @@ extern "C" __global__ void textures_nonoverlap(
...
@@ -2081,7 +2085,8 @@ extern "C" __global__ void textures_nonoverlap(
(float *) 0, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
(float *) 0, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
texture_stride, // size_t texture_stride, // in floats (now 256*4 = 1024)
texture_stride, // size_t texture_stride, // in floats (now 256*4 = 1024)
gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
gpu_diff_rgb_combo, //); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
num_tilesx);
}
}
}
}
...
@@ -2137,8 +2142,10 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -2137,8 +2142,10 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
size_t texture_stride, // in floats (now 256*4 = 1024)
size_t texture_stride, // in floats (now 256*4 = 1024)
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
float * gpu_diff_rgb_combo, //) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
int tilesx)
{
{
// int tilesx = TILESX;
// (float *) gpu_geometry_correction ->pXY0,
// (float *) gpu_geometry_correction ->pXY0,
// float weights[3] = {weight0, weight1, weight2};
// float weights[3] = {weight0, weight1, weight2};
// will process exactly 4 cameras in one block (so this number is not adjustable here NUM_CAMS should be == 4 !
// will process exactly 4 cameras in one block (so this number is not adjustable here NUM_CAMS should be == 4 !
...
@@ -2476,8 +2483,8 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -2476,8 +2483,8 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
}
}
}
int slice_stride = texture_rbg_stride * (*(woi + 3) + 1) * DTT_SIZE; // offset to the next color
int slice_stride = texture_rbg_stride * (*(woi + 3) + 1) * DTT_SIZE; // offset to the next color
int tileY = tile_num / TILESX; // slow, but 1 per tile
int tileY = tile_num /
tilesx; //
TILESX; // slow, but 1 per tile
int tileX = tile_num - tileY * TILESX;
int tileX = tile_num - tileY *
tilesx; //
TILESX;
int tile_x0 = (tileX - *(woi + 0)) * DTT_SIZE; // - (DTT_SIZE/2); // may be negative == -4
int tile_x0 = (tileX - *(woi + 0)) * DTT_SIZE; // - (DTT_SIZE/2); // may be negative == -4
int tile_y0 = (tileY - *(woi + 1)) * DTT_SIZE; // - (DTT_SIZE/2); // may be negative == -4
int tile_y0 = (tileY - *(woi + 1)) * DTT_SIZE; // - (DTT_SIZE/2); // may be negative == -4
/// int height = *(woi + 3) << DTT_SIZE_LOG2;
/// int height = *(woi + 3) << DTT_SIZE_LOG2;
...
...
src/TileProcessor.h
View file @
bb7792f8
...
@@ -94,7 +94,8 @@ extern "C" __global__ void textures_nonoverlap(
...
@@ -94,7 +94,8 @@ extern "C" __global__ void textures_nonoverlap(
// combining both non-overlap and overlap (each calculated if pointer is not null )
// combining both non-overlap and overlap (each calculated if pointer is not null )
size_t
texture_stride
,
// in floats (now 256*4 = 1024) // may be 0 if not needed
size_t
texture_stride
,
// in floats (now 256*4 = 1024) // may be 0 if not needed
float
*
gpu_texture_tiles
,
// (number of colors +1 + ?)*16*16 rgba texture tiles // may be 0 if not needed
float
*
gpu_texture_tiles
,
// (number of colors +1 + ?)*16*16 rgba texture tiles // may be 0 if not needed
float
*
gpu_diff_rgb_combo
);
// diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
float
*
gpu_diff_rgb_combo
,
//); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
int
num_tilesx
);
extern
"C"
extern
"C"
__global__
void
imclt_rbg_all
(
__global__
void
imclt_rbg_all
(
...
...
src/test_tp.cu
View file @
bb7792f8
...
@@ -1106,7 +1106,8 @@ int main(int argc, char **argv)
...
@@ -1106,7 +1106,8 @@ int main(int argc, char **argv)
// combining both non-overlap and overlap (each calculated if pointer is not null )
// combining both non-overlap and overlap (each calculated if pointer is not null )
0, // dstride_textures/sizeof(float), // size_t texture_stride, // in floats (now 256*4 = 1024) // may be 0 if not needed
0, // dstride_textures/sizeof(float), // size_t texture_stride, // in floats (now 256*4 = 1024) // may be 0 if not needed
(float *) 0, // gpu_textures, // float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles // may be 0 if not needed
(float *) 0, // gpu_textures, // float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles // may be 0 if not needed
gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
gpu_diff_rgb_combo, //); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
TILESX);
getLastCudaError("Kernel failure");
getLastCudaError("Kernel failure");
checkCudaErrors(cudaDeviceSynchronize());
checkCudaErrors(cudaDeviceSynchronize());
printf("test pass: %d\n",i);
printf("test pass: %d\n",i);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment