Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
9619c4ac
Commit
9619c4ac
authored
Apr 02, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
implementing prepare_texture_list with CDP
parent
891a0d82
Changes
2
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
291 additions
and
23 deletions
+291
-23
TileProcessor.cuh
src/TileProcessor.cuh
+177
-12
dtt8x8.cu
src/dtt8x8.cu
+114
-11
No files found.
src/TileProcessor.cuh
View file @
9619c4ac
This diff is collapsed.
Click to expand it.
src/dtt8x8.cu
View file @
9619c4ac
...
...
@@ -51,10 +51,11 @@
float * copyalloc_kernel_gpu(float * kernel_host,
int size) // size in floats
int size, // size in floats
int full_size)
{
float *kernel_gpu;
checkCudaErrors(cudaMalloc((void **)&kernel_gpu, size * sizeof(float)));
checkCudaErrors(cudaMalloc((void **)&kernel_gpu,
full_
size * sizeof(float)));
checkCudaErrors(cudaMemcpy( // segfault
kernel_gpu,
kernel_host,
...
...
@@ -62,6 +63,17 @@ float * copyalloc_kernel_gpu(float * kernel_host,
cudaMemcpyHostToDevice));
return kernel_gpu;
}
float * copyalloc_kernel_gpu(float * kernel_host,
int size)
{
return copyalloc_kernel_gpu(kernel_host,
size, // size in floats
size);
}
float * alloccopy_from_gpu(
float * gpu_data,
float * cpu_data, // if null, will allocate
...
...
@@ -294,7 +306,9 @@ int main(int argc, char **argv)
struct tp_task task_data [TILESX*TILESY]; // maximal length - each tile
int corr_indices [NUM_PAIRS*TILESX*TILESY];
int texture_indices [TILESX*TILESY];
// int texture_indices [TILESX*TILESY];
int texture_indices [TILESX*TILESYA];
int cpu_woi [4];
// host array of pointers to GPU memory
float * gpu_kernels_h [NUM_CAMS];
...
...
@@ -312,6 +326,8 @@ int main(int argc, char **argv)
float * gpu_textures;
int * gpu_texture_indices;
int * gpu_woi;
int * gpu_num_texture_tiles;
float * gpu_port_offsets;
int num_corrs;
int num_textures;
...
...
@@ -448,14 +464,19 @@ int main(int argc, char **argv)
}
// num_corrs now has the total number of correlations
// copy corr_indices to gpu
gpu_corr_indices = (int *) copyalloc_kernel_gpu((float * ) corr_indices, num_corrs);
// gpu_corr_indices = (int *) copyalloc_kernel_gpu((float * ) corr_indices, num_corrs);
gpu_corr_indices = (int *) copyalloc_kernel_gpu(
(float * ) corr_indices,
num_corrs,
NUM_PAIRS * TILESX * TILESY);
// build texture_indices
num_textures = 0;
for (int ty = 0; ty < TILESY; ty++){
for (int tx = 0; tx < TILESX; tx++){
int nt = ty * TILESX + tx;
int cm = (task_data[nt].task >> TASK_TEXTURE_BIT) & 1;
// int cm = (task_data[nt].task >> TASK_TEXTURE_BIT) & 1;
int cm = task_data[nt].task & TASK_TEXTURE_BITS;
if (cm){
texture_indices[num_textures++] = (nt << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
}
...
...
@@ -463,7 +484,15 @@ int main(int argc, char **argv)
}
// num_textures now has the total number of textures
// copy corr_indices to gpu
gpu_texture_indices = (int *) copyalloc_kernel_gpu((float * ) texture_indices, num_textures);
// gpu_texture_indices = (int *) copyalloc_kernel_gpu((float * ) texture_indices, num_textures);
gpu_texture_indices = (int *) copyalloc_kernel_gpu(
(float * ) texture_indices,
num_textures,
TILESX * TILESYA); // number of rows - multiple of 4
// just allocate
checkCudaErrors(cudaMalloc((void **)&gpu_woi, 4 * sizeof(float)));
checkCudaErrors(cudaMalloc((void **)&gpu_num_texture_tiles, 4 * sizeof(float)));
// copy port indices to gpu
gpu_port_offsets = (float *) copyalloc_kernel_gpu((float * ) port_offsets, num_ports * 2);
...
...
@@ -780,7 +809,6 @@ int main(int argc, char **argv)
3.0, // float scale_shot, // 3.0
1.5f, // float diff_sigma, // pixel value/pixel change
10.0f, // float diff_threshold, // pixel value/pixel change
// int diff_gauss, // when averaging images, use gaussian around average as weight (false - sharp all/nothing)
3.0, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
0.294118, // float weight0, // scale for R
0.117647, // float weight1, // scale for B
...
...
@@ -828,6 +856,7 @@ int main(int argc, char **argv)
result_textures_file); // const char * path) // file path
//DBG_TILE
#ifdef DEBUG9
int texture_offset = DBG_TILE * tile_texture_size;
int chn = 0;
for (int i = 0; i < tile_texture_size; i++){
...
...
@@ -842,16 +871,88 @@ int main(int argc, char **argv)
}
}
// int tile_texture_size = (texture_colors + 1 + (keep_texture_weights? (NUM_CAMS + texture_colors + 1): 0)) *256;
#endif // DEBUG9
#endif
free(cpu_textures);
#endif // ifndef NOTEXTURES
#define GEN_TEXTURE_LIST
#ifdef GEN_TEXTURE_LIST
dim3 threads_list(1,1, 1); // TEXTURE_TILES_PER_BLOCK, 1);
dim3 grid_list (1,1,1);
printf("threads_list=(%d, %d, %d)\n",threads_list.x,threads_list.y,threads_list.z);
printf("grid_list=(%d, %d, %d)\n",grid_list.x,grid_list.y,grid_list.z);
StopWatchInterface *timerTEXTURELIST = 0;
sdkCreateTimer(&timerTEXTURELIST);
for (int i = i0; i < numIterations; i++)
{
if (i == 0)
{
checkCudaErrors(cudaDeviceSynchronize());
sdkResetTimer(&timerTEXTURELIST);
sdkStartTimer(&timerTEXTURELIST);
}
prepare_texture_list<<<grid_list,threads_list>>> (
gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles, // number of tiles in task list
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_num_texture_tiles, // int * num_texture_tiles, // number of texture tiles to process (4 elements)
gpu_woi, // int * woi, // x,y,width,height of the woi
TILESX, // int width, // <= TILESX, use for faster processing of LWIR images (should be actual + 1)
TILESY); // int height); // <= TILESY, use for faster processing of LWIR images
getLastCudaError("Kernel failure");
checkCudaErrors(cudaDeviceSynchronize());
printf("test pass: %d\n",i);
#ifdef DEBUG4
break;
#endif
#ifdef DEBUG5
break;
#endif
}
/// cudaProfilerStop();
sdkStopTimer(&timerTEXTURELIST);
float avgTimeTEXTURESLIST = (float)sdkGetTimerValue(&timerTEXTURELIST) / (float)numIterations;
sdkDeleteTimer(&timerTEXTURELIST);
printf("Average TextureList run time =%f ms\n", avgTimeTEXTURESLIST);
int cpu_num_texture_tiles[4];
checkCudaErrors(cudaMemcpy(
cpu_woi,
gpu_woi,
4 * sizeof(float),
cudaMemcpyDeviceToHost));
printf("WOI x=%d, y=%d, width=%d, height=%d\n", cpu_woi[0], cpu_woi[1], cpu_woi[2], cpu_woi[3]);
checkCudaErrors(cudaMemcpy(
// &cpu_num_texture_tiles,
cpu_num_texture_tiles,
gpu_num_texture_tiles,
4 * sizeof(float), // 4 separate sequences
cudaMemcpyDeviceToHost));
printf("cpu_num_texture_tiles=(%d, %d, %d, %d)\n", cpu_num_texture_tiles[0],
cpu_num_texture_tiles[1], cpu_num_texture_tiles[2], cpu_num_texture_tiles[3]);
for (int q = 0; q < 4; q++) {
checkCudaErrors(cudaMemcpy(
texture_indices + q * TILESX * (TILESYA >> 2),
gpu_texture_indices + q * TILESX * (TILESYA >> 2),
cpu_num_texture_tiles[q] * sizeof(float), // change to cpu_num_texture_tiles when ready
cudaMemcpyDeviceToHost));
}
for (int q = 0; q < 4; q++) {
printf("%d: %3x:%3x %3x:%3x %3x:%3x %3x:%3x %3x:%3x %3x:%3x %3x:%3x %3x:%3x \n",q,
(texture_indices[q * TILESX * (TILESYA >> 2) + 0] >> 8) / TILESX, (texture_indices[q * TILESX * (TILESYA >> 2) + 0] >> 8) % TILESX,
(texture_indices[q * TILESX * (TILESYA >> 2) + 1] >> 8) / TILESX, (texture_indices[q * TILESX * (TILESYA >> 2) + 1] >> 8) % TILESX,
(texture_indices[q * TILESX * (TILESYA >> 2) + 2] >> 8) / TILESX, (texture_indices[q * TILESX * (TILESYA >> 2) + 2] >> 8) % TILESX,
(texture_indices[q * TILESX * (TILESYA >> 2) + 3] >> 8) / TILESX, (texture_indices[q * TILESX * (TILESYA >> 2) + 3] >> 8) % TILESX,
(texture_indices[q * TILESX * (TILESYA >> 2) + 4] >> 8) / TILESX, (texture_indices[q * TILESX * (TILESYA >> 2) + 4] >> 8) % TILESX,
(texture_indices[q * TILESX * (TILESYA >> 2) + 5] >> 8) / TILESX, (texture_indices[q * TILESX * (TILESYA >> 2) + 5] >> 8) % TILESX,
(texture_indices[q * TILESX * (TILESYA >> 2) + 6] >> 8) / TILESX, (texture_indices[q * TILESX * (TILESYA >> 2) + 6] >> 8) % TILESX,
(texture_indices[q * TILESX * (TILESYA >> 2) + 7] >> 8) / TILESX, (texture_indices[q * TILESX * (TILESYA >> 2) + 7] >> 8) % TILESX);
}
#endif //GEN_TEXTURE_LIST
#ifdef SAVE_CLT
free(cpu_clt);
...
...
@@ -879,6 +980,8 @@ int main(int argc, char **argv)
checkCudaErrors(cudaFree(gpu_texture_indices));
checkCudaErrors(cudaFree(gpu_port_offsets));
checkCudaErrors(cudaFree(gpu_textures));
checkCudaErrors(cudaFree(gpu_woi));
checkCudaErrors(cudaFree(gpu_num_texture_tiles));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment