Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
28e55dce
Commit
28e55dce
authored
May 15, 2022
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Trying interscene correlation
parent
0dcd2dee
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
565 additions
and
17 deletions
+565
-17
TileProcessor.cuh
src/TileProcessor.cuh
+407
-15
TileProcessor.h
src/TileProcessor.h
+19
-1
test_tp.cu
src/test_tp.cu
+139
-1
No files found.
src/TileProcessor.cuh
View file @
28e55dce
This diff is collapsed.
Click to expand it.
src/TileProcessor.h
View file @
28e55dce
...
...
@@ -82,11 +82,29 @@ extern "C" __global__ void correlate2D(
int
tilesx
,
// number of tile rows
int
*
gpu_corr_indices
,
// packed tile+pair
int
*
pnum_corr_tiles
,
// pointer to a number of correlation tiles to process
size_t
corr_stride
,
// in floats
size_t
corr_stride
,
// in floats
// int corr_stride, // in floats
int
corr_radius
,
// radius of the output correlation (7 for 15x15)
float
*
gpu_corrs
);
// correlation output data
extern
"C"
__global__
void
correlate2D_inter
(
// only results in TD
int
num_cams
,
int
sel_sensors
,
float
**
gpu_clt
,
// [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt_ref
,
// [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
int
colors
,
// number of colors (3/1)
float
scale0
,
// scale for R
float
scale1
,
// scale for B
float
scale2
,
// scale for G
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
int
num_tiles
,
// number of tiles in task
int
tilesx
,
// number of tile rows
int
*
gpu_corr_indices
,
// packed tile+pair
int
*
pnum_corr_tiles
,
// pointer to a number of correlation tiles to process
size_t
corr_stride
,
// in floats
float
*
gpu_corrs
);
// correlation output data
extern
"C"
__global__
void
corr2D_normalize
(
int
num_corr_tiles
,
// number of correlation tiles to process
const
size_t
corr_stride_td
,
// in floats
...
...
src/test_tp.cu
View file @
28e55dce
...
...
@@ -36,7 +36,7 @@
// #define NOTEXTURE_RGBA
#define SAVE_CLT
//#define NO_DP
#define CORR_INTER_SELF 1
#include <stdio.h>
...
...
@@ -685,6 +685,7 @@ int main(int argc, char **argv)
const char* result_corr_quad_file = "clt/aux_corr-quad.corr";
const char* result_corr_td_norm_file = "clt/aux_corr-td-norm.corr";
/// const char* result_corr_cross_file = "clt/aux_corr-cross.corr";
const char* result_inter_td_norm_file = "clt/aux_inter-td-norm.corr";
const char* result_textures_file = "clt/aux_texture_nodp.rgba";
const char* result_diff_rgb_combo_file ="clt/aux_diff_rgb_combo_nodp.drbg";
const char* result_textures_rgba_file = "clt/aux_texture_rgba_nodp.rgba";
...
...
@@ -752,6 +753,7 @@ int main(int argc, char **argv)
const char* result_corr_quad_file = "clt/main_corr-quad.corr";
const char* result_corr_td_norm_file = "clt/aux_corr-td-norm.corr";
/// const char* result_corr_cross_file = "clt/main_corr-cross.corr";
const char* result_inter_td_norm_file = "clt/aux_inter-td-norm.corr";
const char* result_textures_file = "clt/main_texture_nodp.rgba";
const char* result_diff_rgb_combo_file ="clt/main_diff_rgb_combo_nodp.drbg";
const char* result_textures_rgba_file = "clt/main_texture_rgba_nodp.rgba";
...
...
@@ -1790,6 +1792,142 @@ int main(int argc, char **argv)
#endif // ifndef NOCORR_TD
// Testing "interframe" correlation with itself, assuming direct convert already ran
#ifdef CORR_INTER_SELF
int sel_sensors = 0xffff;
int num_sel_senosrs = 16;
num_pairs = num_sel_senosrs+1;
num_corr_indices = num_pairs * num_tiles;
StopWatchInterface *timerINTERSELF = 0;
sdkCreateTimer(&timerINTERSELF);
// int num_corr_combo_inter;
for (int i = i0; i < numIterations; i++)
{
if (i == 0)
{
checkCudaErrors(cudaDeviceSynchronize());
sdkResetTimer(&timerINTERSELF);
sdkStartTimer(&timerINTERSELF);
}
correlate2D_inter<<<1,1>>>( // only results in TD
num_cams, // int num_cams,
sel_sensors, // int sel_sensors,
gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
gpu_clt, // float ** gpu_clt_ref, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
num_colors, // int colors, // number of colors (3/1)
color_weights[0], // 0.25, // float scale0, // scale for R
color_weights[1], // 0.25, // float scale1, // scale for B
color_weights[2], // 0.5, // float scale2, // scale for G
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
tp_task_size, // int num_tiles) // number of tiles in task
TILESX, // int tilesx, // number of tile rows
gpu_corr_indices, // int * gpu_corr_indices, // packed tile+pair
gpu_num_corr_tiles, // int * pnum_corr_tiles, // pointer to a number of correlation tiles to process
dstride_corr_td/sizeof(float), // const size_t corr_stride, // in floats
gpu_corrs_td); // float * gpu_corrs); // correlation output data
getLastCudaError("Kernel failure:correlate2D_inter");
checkCudaErrors(cudaDeviceSynchronize());
printf("correlate2D_inter-TD pass: %d\n",i);
checkCudaErrors(cudaMemcpy(
&num_corrs,
gpu_num_corr_tiles,
sizeof(int),
cudaMemcpyDeviceToHost));
checkCudaErrors(cudaDeviceSynchronize());
corr2D_normalize<<<1,1>>>(
num_corrs, //tp_task_size, // int num_corr_tiles, // number of correlation tiles to process
dstride_corr_td/sizeof(float), // const size_t corr_stride_td, // in floats
gpu_corrs_td, // float * gpu_corrs_td, // correlation tiles in transform domain
(float *) 0, // corr_weights, // float * corr_weights, // null or per-tile weight (fat_zero2 will be divided by it)
dstride_corr/sizeof(float), // const size_t corr_stride, // in floats
gpu_corrs, // float * gpu_corrs, // correlation output data (pixel domain)
30.0 * 30.0, // float fat_zero2, // here - absolute
CORR_OUT_RAD); // int corr_radius); // radius of the output correlation (7 for 15x15)
getLastCudaError("Kernel failure:corr2D_normalize");
checkCudaErrors(cudaDeviceSynchronize());
printf("corr2D_normalize pass: %d\n",i);
}
sdkStopTimer(&timerINTERSELF);
float avgTimeINTERSELF = (float)sdkGetTimerValue(&timerINTERSELF) / (float)numIterations;
sdkDeleteTimer(&timerINTERSELF);
printf("Average CORR-TD and companions run time =%f ms, num cor tiles (old) = %d\n", avgTimeINTERSELF, num_corrs);
rslt_corr_size = num_corrs * corr_size * corr_size;
corr_img_size = num_corr_indices * 16*16; // NAN
corr_img = (float *)malloc(corr_img_size * sizeof(float));
cpu_corr = (float *)malloc(rslt_corr_size * sizeof(float));
cpu_corr_indices = (int *) malloc(num_corr_indices * sizeof(int));
checkCudaErrors(cudaMemcpy2D(
cpu_corr,
(corr_size * corr_size) * sizeof(float),
gpu_corrs,
dstride_corr,
(corr_size * corr_size) * sizeof(float),
num_corrs,
cudaMemcpyDeviceToHost));
checkCudaErrors(cudaMemcpy(
cpu_corr_indices,
gpu_corr_indices,
num_corr_indices * sizeof(int),
cudaMemcpyDeviceToHost));
for (int i = 0; i < corr_img_size; i++){
corr_img[i] = NAN;
}
// int num_pairs = 120;
// int sel_sensors = 0xffff;
// int num_sel_senosrs = 16;
// int corr_size = 2 * CORR_OUT_RAD + 1; // 15
// int num_tiles = tp_task_size; // TILESX * TILESYA; //Was this on 01/22/2022
// int num_corr_indices = num_pairs * num_tiles;
for (int ict = 0; ict < num_corr_indices; ict++){
int ctt = ( cpu_corr_indices[ict] >> CORR_NTILE_SHIFT);
int cpair = cpu_corr_indices[ict] & ((1 << CORR_NTILE_SHIFT) - 1);
if (cpair == 0xff){
cpair = num_sel_senosrs;
}
int ty = ctt / TILESX;
int tx = ctt % TILESX;
int src_offs0 = ict * corr_size * corr_size;
int dst_offs0 = cpair * (num_tiles * 16 * 16) + (ty * 16 * TILESX * 16) + (tx * 16);
for (int iy = 0; iy < corr_size; iy++){
int src_offs = src_offs0 + iy * corr_size; // ict * num_pairs * corr_size * corr_size;
int dst_offs = dst_offs0 + iy * (TILESX * 16);
for (int ix = 0; ix < corr_size; ix++){
corr_img[dst_offs++] = cpu_corr[src_offs++];
}
}
}
#ifndef NSAVE_CORR
printf("Writing interscene phase correlation data to %s, width = %d, height=%d, slices=%d, length=%ld bytes\n",
result_inter_td_norm_file, (TILESX*16),(TILESYA*16), num_pairs, (corr_img_size * sizeof(float)) ) ;
writeFloatsToFile(
corr_img, // float * data, // allocated array
corr_img_size, // int size, // length in elements
result_inter_td_norm_file); // const char * path) // file path
#endif
free (cpu_corr);
free (cpu_corr_indices);
free (corr_img);
#endif // #ifdef CORR_INTER_SELF
// -----------------
#ifndef NOTEXTURES
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment