Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
f134cfa4
Commit
f134cfa4
authored
Apr 14, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
4 images with CDP
parent
bd04c118
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
58 additions
and
6 deletions
+58
-6
TileProcessor.cuh
src/TileProcessor.cuh
+28
-1
TileProcessor.h
src/TileProcessor.h
+10
-2
test_tp.cu
src/test_tp.cu
+20
-3
No files found.
src/TileProcessor.cuh
View file @
f134cfa4
...
@@ -2055,7 +2055,34 @@ __global__ void imclt_rbg_all(
...
@@ -2055,7 +2055,34 @@ __global__ void imclt_rbg_all(
int woi_theight,
int woi_theight,
const size_t dstride) // in floats (pixels)
const size_t dstride) // in floats (pixels)
{
{
dim3 threads_imclt(IMCLT_THREADS_PER_TILE, IMCLT_TILES_PER_BLOCK, 1);
if (threadIdx.x == 0) { // anyway 1,1,1
for (int ncam = 0; ncam < NUM_CAMS; ncam++) {
for (int color = 0; color < colors; color++) {
for (int v_offs = 0; v_offs < 2; v_offs++){
for (int h_offs = 0; h_offs < 2; h_offs++){
int tilesy_half = (woi_theight + (v_offs ^ 1)) >> 1;
int tilesx_half = (woi_twidth + (h_offs ^ 1)) >> 1;
int tiles_in_pass = tilesy_half * tilesx_half;
dim3 grid_imclt((tiles_in_pass + IMCLT_TILES_PER_BLOCK-1) / IMCLT_TILES_PER_BLOCK,1,1);
// printf("grid_imclt= (%d, %d, %d)\n",grid_imclt.x, grid_imclt.y, grid_imclt.z);
imclt_rbg<<<grid_imclt,threads_imclt>>>(
gpu_clt[ncam], // float * gpu_clt, // [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
gpu_corr_images[ncam], // float * gpu_rbg, // WIDTH, 3 * HEIGHT
1, // int apply_lpf,
colors, // int colors, // defines lpf filter
color, // int color, // defines location of clt data
v_offs, // int v_offset,
h_offs, // int h_offset,
woi_twidth, // int woi_twidth, // will increase by DTT_SIZE (todo - cut away?)
woi_theight, // int woi_theight, // will increase by DTT_SIZE (todo - cut away?)
dstride); // const size_t dstride); // in floats (pixels)
cudaDeviceSynchronize();
}
}
}
}
}
}
}
...
...
src/TileProcessor.h
View file @
f134cfa4
...
@@ -109,6 +109,16 @@ extern "C" __global__ void textures_accumulate(
...
@@ -109,6 +109,16 @@ extern "C" __global__ void textures_accumulate(
size_t
texture_stride
,
// in floats (now 256*4 = 1024)
size_t
texture_stride
,
// in floats (now 256*4 = 1024)
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
extern
"C"
__global__
void
imclt_rbg_all
(
float
**
gpu_clt
,
// [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_corr_images
,
// [NUM_CAMS][WIDTH, 3 * HEIGHT]
int
apply_lpf
,
int
colors
,
int
woi_twidth
,
int
woi_theight
,
const
size_t
dstride
);
// in floats (pixels)
extern
"C"
__global__
void
imclt_rbg
(
extern
"C"
__global__
void
imclt_rbg
(
float
*
gpu_clt
,
// [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
*
gpu_clt
,
// [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
*
gpu_rbg
,
// WIDTH, 3 * HEIGHT
float
*
gpu_rbg
,
// WIDTH, 3 * HEIGHT
...
@@ -151,5 +161,3 @@ __global__ void generate_RBGA(
...
@@ -151,5 +161,3 @@ __global__ void generate_RBGA(
const
size_t
texture_rbga_stride
,
// in floats
const
size_t
texture_rbga_stride
,
// in floats
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
src/test_tp.cu
View file @
f134cfa4
...
@@ -823,8 +823,8 @@ int main(int argc, char **argv)
...
@@ -823,8 +823,8 @@ int main(int argc, char **argv)
// testing imclt
// testing imclt
dim3 threads_imclt(IMCLT_THREADS_PER_TILE, IMCLT_TILES_PER_BLOCK, 1);
//
dim3 threads_imclt(IMCLT_THREADS_PER_TILE, IMCLT_TILES_PER_BLOCK, 1);
printf("threads_imclt=(%d, %d, %d)\n",threads_imclt.x,threads_imclt.y,threads_imclt.z);
//
printf("threads_imclt=(%d, %d, %d)\n",threads_imclt.x,threads_imclt.y,threads_imclt.z);
StopWatchInterface *timerIMCLT = 0;
StopWatchInterface *timerIMCLT = 0;
sdkCreateTimer(&timerIMCLT);
sdkCreateTimer(&timerIMCLT);
...
@@ -836,7 +836,23 @@ int main(int argc, char **argv)
...
@@ -836,7 +836,23 @@ int main(int argc, char **argv)
sdkResetTimer(&timerIMCLT);
sdkResetTimer(&timerIMCLT);
sdkStartTimer(&timerIMCLT);
sdkStartTimer(&timerIMCLT);
}
}
#define CDP1
#ifdef CDP1
dim3 threads_imclt_all(1, 1, 1);
dim3 grid_imclt_all(1, 1, 1);
printf("threads_imclt_all=(%d, %d, %d)\n",threads_imclt_all.x,threads_imclt_all.y,threads_imclt_all.z);
printf("grid_imclt_all= (%d, %d, %d)\n",grid_imclt_all.x, grid_imclt_all.y, grid_imclt_all.z);
imclt_rbg_all<<<grid_imclt_all,threads_imclt_all>>>(
gpu_clt, // float ** gpu_clt, // [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
gpu_corr_images, // float ** gpu_corr_images, // [NUM_CAMS][WIDTH, 3 * HEIGHT]
1, // int apply_lpf,
NUM_COLORS, // int colors, // defines lpf filter
TILESX, // int woi_twidth,
TILESY, // int woi_theight,
dstride_rslt/sizeof(float)); // const size_t dstride); // in floats (pixels)
#else
dim3 threads_imclt(IMCLT_THREADS_PER_TILE, IMCLT_TILES_PER_BLOCK, 1);
printf("threads_imclt=(%d, %d, %d)\n",threads_imclt.x,threads_imclt.y,threads_imclt.z);
for (int ncam = 0; ncam < NUM_CAMS; ncam++) {
for (int ncam = 0; ncam < NUM_CAMS; ncam++) {
for (int color = 0; color < NUM_COLORS; color++) {
for (int color = 0; color < NUM_COLORS; color++) {
for (int v_offs = 0; v_offs < 2; v_offs++){
for (int v_offs = 0; v_offs < 2; v_offs++){
...
@@ -861,6 +877,7 @@ int main(int argc, char **argv)
...
@@ -861,6 +877,7 @@ int main(int argc, char **argv)
}
}
}
}
}
}
#endif
getLastCudaError("Kernel failure");
getLastCudaError("Kernel failure");
checkCudaErrors(cudaDeviceSynchronize());
checkCudaErrors(cudaDeviceSynchronize());
printf("test pass: %d\n",i);
printf("test pass: %d\n",i);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment