Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
dc090454
Commit
dc090454
authored
Apr 07, 2025
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
More refactoring
parent
67816dbf
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
248 additions
and
32 deletions
+248
-32
TileProcessor.h
src/TileProcessor.h
+1
-1
TpHostGpu.cu
src/TpHostGpu.cu
+223
-22
TpHostGpu.h
src/TpHostGpu.h
+19
-6
TpParams.cu
src/TpParams.cu
+1
-1
TpParams.h
src/TpParams.h
+2
-2
test_tp.cu
src/test_tp.cu
+2
-0
No files found.
src/TileProcessor.h
View file @
dc090454
...
...
@@ -126,7 +126,7 @@ extern "C" __global__ void correlate2D_inter( // only results in TD
int
*
gpu_corr_indices
,
// packed tile+pair
int
*
pnum_corr_tiles
,
// pointer to a number of correlation tiles to process
size_t
corr_stride
,
// in floats
float
*
gpu_corrs
);
// correlation output data
float
*
gpu_corrs
);
// correlation output data
extern
"C"
__global__
void
corr2D_normalize
(
...
...
src/TpHostGpu.cu
View file @
dc090454
...
...
@@ -119,6 +119,35 @@ void TpHostGpu::setImgBuffers(){
m_gpu_images = copyalloc_pointers_gpu (m_gpu_images_h, m_tpParams.num_cams); // NUM_CAMS);
}
void TpHostGpu::setImgBuffersShifted(int is_bayer, int image_dx, int image_dy) {
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
readFloatsFromFile(
m_host_kern_buf, // float * data, // allocated array
m_tpPaths.image_files[ncam]); // char * path) // file path
shift_image (
m_host_kern_buf, // float * image,
m_tpParams.img_width, // int width,
m_tpParams.img_height, // int height,
is_bayer, // int bayer,
image_dx, // int dx,
image_dy); // int dy);
update_image_gpu(
m_host_kern_buf, // float * image_host,
m_gpu_images_h[ncam], // float * image_gpu,
dstride, // size_t dstride, // in floats !
m_tpParams.img_width, // IMG_WIDTH, // int width,
m_tpParams.img_height); // IMG_HEIGHT); // int height);
m_gpu_images_h[ncam] = copyalloc_image_gpu(
m_host_kern_buf, // float * image_host,
&dstride, // size_t* dstride,
m_tpParams.img_width, // IMG_WIDTH, // int width,
m_tpParams.img_height); // IMG_HEIGHT); // int height);
}
}
void TpHostGpu::setGeometryCorrectionBuffers() {
readFloatsFromFile(
(float *) &m_fgeometry_correction, // float * data, // allocated array, no need to free
...
...
@@ -251,7 +280,26 @@ void TpHostGpu::setRGBA(){
(m_tpParams.num_colors + 1) * sizeof(float)));
}
void TpHostGpu::testRotMatrices (int num_runs){ // 424
}
void TpHostGpu::testReverseDistortions (int num_runs){ // 468
}
void TpHostGpu::testGeomCorrect (int num_runs){ // 534
}
void TpHostGpu::testConvertDirect (int num_runs){ // 608
}
void TpHostGpu::testImclt (int num_runs){ // 682
}
void TpHostGpu::testImcltRbgAll (int num_runs){ // 701
}
void TpHostGpu::testCorrelate2DIntra(int num_runs){
int num_corr_indices = m_tpParams.num_pairs * m_tpParams.num_tiles;
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_clt){
...
...
@@ -311,27 +359,174 @@ void TpHostGpu::testCorrelate2DIntra(int num_runs){
saveIntraCorrFile(
m_tpPaths.result_corr_file, // const char * path,
"phase correlation data", // const char * prompt,
num_corrs, // int num_corrs,
m_gpu_corrs, // float * gpu_corrs,
m_gpu_corr_indices, // int * gpu_corr_indices)
16); // //int num_sel_sensors) { // only for interscene
num_corrs, // int num_corrs,
num_corr_indices, // int num_corr_indices,
m_gpu_corrs, // float * gpu_corrs,
m_gpu_corr_indices, // int * gpu_corr_indices)
16); //int num_sel_sensors) { // only for interscene
}
void TpHostGpu::testCorrelate2DInterSelf(int num_runs){
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
// check/replace names
if (!m_gpu_clt){
throw std::runtime_error("Error: m_gpu_clt is not initialized ");
}
if (!m_gpu_ftasks) {
throw std::runtime_error("Error: m_gpu_ftasks is not initialized ");
}
if (!m_gpu_corrs) {
throw std::runtime_error("Error: m_gpu_corrs is not initialized ");
}
int sel_sensors = 0xffff; // 0x7fff; // 0xffff;
int num_sel_sensors = 16; // 15; // 16;
int num_pairs_inter = num_sel_sensors+1;
int num_corr_indices = num_pairs_inter * m_tpParams.num_tiles;
int is_bayer = 0;
int image_dx = 2;
int image_dy = 0;
float * gpu_clt_ref_h [m_tpParams.num_cams];
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
gpu_clt_ref_h[ncam] = alloc_kernel_gpu(m_tpParams.tilesy * m_tpParams.tilesx * m_tpParams.num_colors * 4 * m_tpParams.dtt_size * m_tpParams.dtt_size);
}
float ** gpu_clt_ref = copyalloc_pointers_gpu (gpu_clt_ref_h, m_tpParams.num_cams); // NUM_CAMS);
dim3 threads_tp(1, 1, 1);
dim3 grid_tp(1, 1, 1);
float ** fgpu_kernel_offsets = (float **) m_gpu_kernel_offsets; // [tpParams.num_cams] [NUM_CAMS];
// use gpu_images and convert to gpu_clt_ref
convert_direct<<<grid_tp,threads_tp>>>( // called with a single block, CONVERT_DIRECT_INDEXING_THREADS threads
m_tpParams.num_cams, // int num_cams, // actual number of cameras
m_tpParams.num_colors, // int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
fgpu_kernel_offsets, // struct CltExtra ** gpu_kernel_offsets,
m_gpu_kernels, // float ** gpu_kernels,
m_gpu_images, // float ** gpu_images,
m_gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
gpu_clt_ref, //****** // float ** gpu_clt, // [num_cams][TILESY][TILESX][num_colors][DTT_SIZE*DTT_SIZE]
dstride/sizeof(float), // size_t dstride, // for gpu_images
m_tpParams.tp_tasks_size,// int num_tiles) // number of tiles in task
0, // int lpf_mask) // apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green
m_tpParams.img_width, // int woi_width,
m_tpParams.img_height, // int woi_height,
m_tpParams.kernels_hor, // int kernels_hor,
m_tpParams.kernels_vert, //, // int kernels_vert);
m_gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles
m_gpu_num_active, // int * pnum_active_tiles); // indices to gpu_tasks
m_tpParams.tilesx); // int tilesx)
getLastCudaError("Kernel execution failed");
checkCudaErrors (cudaDeviceSynchronize());
// re-read same images. shift them, update gpu_images and convert to gpu_clt;
setImgBuffersShifted(
is_bayer, // int is_bayer,
image_dx, // int image_dx,
image_dy); // int image_dy)
convert_direct<<<grid_tp,threads_tp>>>( // called with a single block, CONVERT_DIRECT_INDEXING_THREADS threads
m_tpParams.num_cams, // int num_cams, // actual number of cameras
m_tpParams.num_colors, // int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
fgpu_kernel_offsets, // struct CltExtra ** gpu_kernel_offsets,
m_gpu_kernels, // float ** gpu_kernels,
m_gpu_images, // float ** gpu_images,
m_gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
m_gpu_clt, //****** // float ** gpu_clt, // [num_cams][TILESY][TILESX][num_colors][DTT_SIZE*DTT_SIZE]
dstride/sizeof(float), // size_t dstride, // for gpu_images
m_tpParams.tp_tasks_size, // int num_tiles) // number of tiles in task
0, // int lpf_mask) // apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green
m_tpParams.img_width, // int woi_width,
m_tpParams.img_height, // int woi_height,
m_tpParams.kernels_hor, // int kernels_hor,
m_tpParams.kernels_vert, //, // int kernels_vert);
m_gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles
m_gpu_num_active, // int * pnum_active_tiles); // indices to gpu_tasks
m_tpParams.tilesx); // int tilesx)
getLastCudaError("Kernel execution failed");
checkCudaErrors(cudaDeviceSynchronize());
StopWatchInterface *timerINTERSELF = 0;
sdkCreateTimer(&timerINTERSELF);
int num_corrs{}; // will get data from the gpu memory
for (int i = i0; i < numIterations; i++) {
if (i == 0){
checkCudaErrors(cudaDeviceSynchronize());
sdkResetTimer(&timerINTERSELF);
sdkStartTimer(&timerINTERSELF);
}
correlate2D_inter<<<1,1>>>( // only results in TD
m_tpParams.num_cams, // int num_cams, // actual number of cameras
sel_sensors, // int sel_sensors,
m_gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
gpu_clt_ref, // ********* // float ** gpu_clt_ref, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
m_tpParams.num_colors, // int colors, // number of colors (3/1)
m_tpParams.color_weights[0], // 0.25, // float scale0, // scale for R
m_tpParams.color_weights[1], // 0.25, // float scale1, // scale for B
m_tpParams.color_weights[2], // 0.5, // float scale2, // scale for G
m_gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
m_tpParams.tp_tasks_size, // int num_tiles) // number of tiles in task
m_tpParams.tilesx, // int tilesx, // number of tile rows
m_gpu_corr_indices, // int * gpu_corr_indices, // packed tile+pair
m_gpu_num_corr_tiles, // int * pnum_corr_tiles, // pointer to a number of correlation tiles to process
dstride_corr_td/sizeof(float), // const size_t corr_stride, // in floats
m_gpu_corrs_td); // float * gpu_corrs); // correlation output data
getLastCudaError("Kernel failure:correlate2D_inter");
checkCudaErrors(cudaDeviceSynchronize());
printf("correlate2D_inter-TD pass: %d\n",i);
checkCudaErrors(cudaMemcpy(
&num_corrs,
m_gpu_num_corr_tiles,
sizeof(int),
cudaMemcpyDeviceToHost));
checkCudaErrors(cudaDeviceSynchronize());
corr2D_normalize<<<1,1>>>(
num_corrs, //tp_task_size, // int num_corr_tiles, // number of correlation tiles to process
dstride_corr_td/sizeof(float), // const size_t corr_stride_td, // in floats
m_gpu_corrs_td, // float * gpu_corrs_td, // correlation tiles in transform domain
(float *) 0, // corr_weights, // float * corr_weights, // null or per-tile weight (fat_zero2 will be divided by it)
dstride_corr/sizeof(float), // const size_t corr_stride, // in floats
m_gpu_corrs, // float * gpu_corrs, // correlation output data (pixel domain)
m_tpParams.fat_zero * m_tpParams.fat_zero, // float fat_zero2, // here - absolute
m_tpParams.corr_out_rad); // int corr_radius); // radius of the output correlation (7 for 15x15)
getLastCudaError("Kernel failure:corr2D_normalize");
checkCudaErrors(cudaDeviceSynchronize());
printf("corr2D_normalize pass: %d\n",i);
}
sdkStopTimer(&timerINTERSELF);
float avgTimeINTERSELF = (float)sdkGetTimerValue(&timerINTERSELF) / (float)numIterations;
sdkDeleteTimer(&timerINTERSELF);
printf("Average CORR-TD and companions run time =%f ms, num cor tiles (old) = %d\n", avgTimeINTERSELF, num_corrs);
saveInterCorrFile(
m_tpPaths.result_interscene_td, // const char * path, // "clt/aux_interscene-TD.raw" m_tpPaths.result_interscene_td
"interscene phase correlation", // const char * prompt, // "interscene phase correlation"
num_corrs, // int num_corrs,
num_corr_indices, // int num_corr_indices,
m_gpu_corrs_td, // float * gpu_corrs_td,
m_gpu_corr_indices, // int * gpu_corr_indices,
num_sel_sensors); // int num_sel_sensors);
saveInterCorrIndicesFile(
m_tpPaths.result_interscene_indices, // const char * path, // "clt/aux_inter-indices.raw" m_tpPaths.result_interscene_indices
"interscene indices", // const char * prompt, // "interscene indices"
num_corr_indices, // int num_corr_indices,
m_gpu_corr_indices, // int * gpu_corr_indices,
num_sel_sensors); // int num_sel_sensors)
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) {
gfree(gpu_clt_ref_h[ncam]);
}
gfree(gpu_clt_ref);
}
void TpHostGpu::saveIntraCorrFile(
const char * path,
const char * prompt,
int num_corrs,
int num_corr_indices,
float * gpu_corrs,
int * gpu_corr_indices,
int num_sel_sensors) { // only for interscene
if (!path) return;
int rslt_corr_length = num_corrs * m_tpParams.corr_length;
int corr_img_size =
m_tpParams.
num_corr_indices * 16 * 16; // NAN
int corr_img_size =
num_corr_indices * 16 * 16; // NAN
// float * corr_img = (float *)malloc(corr_img_size * sizeof(float));
float * cpu_corr = (float *)malloc(rslt_corr_length * sizeof(float));
int * cpu_corr_indices = (int *) malloc(
m_tpParams.
num_corr_indices * sizeof(int));
int * cpu_corr_indices = (int *) malloc(num_corr_indices * sizeof(int));
checkCudaErrors(cudaMemcpy2D(
cpu_corr,
m_tpParams.corr_length * sizeof(float),
...
...
@@ -343,12 +538,13 @@ void TpHostGpu::saveIntraCorrFile(
checkCudaErrors(cudaMemcpy(
cpu_corr_indices,
gpu_corr_indices,
m_tpParams.
num_corr_indices * sizeof(int),
num_corr_indices * sizeof(int),
cudaMemcpyDeviceToHost));
float * corr_img = getCorrImg(
corr_img_size, // int corr_img_size,
corr_img_size, // int corr_img_size,
num_corr_indices, //int num_corr_indices,
cpu_corr_indices, // int * cpu_corr_indices,
cpu_corr, // float * cpu_corr,
cpu_corr,
// float * cpu_corr,
num_sel_sensors); // int num_sel_sensors)
printf("Writing %s to %s, width = %d, height=%d, slices=%d, length=%ld bytes\n",
prompt, path, (m_tpParams.tilesx * 16),(m_tpParams.tilesya * 16), m_tpParams.num_pairs, (corr_img_size * sizeof(float)) ) ;
...
...
@@ -373,6 +569,7 @@ void TpHostGpu::saveIntraCorrFile(
float * TpHostGpu::getCorrImg(
int corr_img_size,
int num_corr_indices,
int * cpu_corr_indices,
float * cpu_corr,
int num_sel_sensors){
...
...
@@ -380,7 +577,7 @@ float * TpHostGpu::getCorrImg(
for (int i = 0; i < corr_img_size; i++){
corr_img[i] = NAN;
}
for (int ict = 0; ict <
m_tpParams.
num_corr_indices; ict++){
for (int ict = 0; ict < num_corr_indices; ict++){
int ctt = ( cpu_corr_indices[ict] >> m_tpParams.corr_ntile_shift); // CORR_NTILE_SHIFT);
int cpair = cpu_corr_indices[ict] & ((1 << m_tpParams.corr_ntile_shift) - 1);
if (cpair == 0xff){ // Was not here - only for interscene. Will it hurt?
...
...
@@ -403,6 +600,7 @@ float * TpHostGpu::getCorrImg(
float * TpHostGpu::getCorrTdImg(
int corr_img_size,
int num_corr_indices,
int * cpu_corr_indices,
float * cpu_corr_td,
int num_sel_sensors){
...
...
@@ -410,7 +608,7 @@ float * TpHostGpu::getCorrTdImg(
for (int i = 0; i < corr_img_size; i++){
corr_img[i] = NAN;
}
for (int ict = 0; ict <
m_tpParams.
num_corr_indices; ict++){
for (int ict = 0; ict < num_corr_indices; ict++){
int ctt = ( cpu_corr_indices[ict] >> m_tpParams.corr_ntile_shift); // CORR_NTILE_SHIFT);
int cpair = cpu_corr_indices[ict] & ((1 << m_tpParams.corr_ntile_shift) - 1);
if (cpair == 0xff){ // Was not here - only for interscene. Will it hurt?
...
...
@@ -442,15 +640,16 @@ void TpHostGpu::saveInterCorrFile(
const char * path, // "clt/aux_interscene-TD.raw"
const char * prompt, // "interscene phase correlation"
int num_corrs,
int num_corr_indices,
float * gpu_corrs_td,
int * gpu_corr_indices,
int num_sel_sensors){
if (!path) return;
int corr_img_size =
m_tpParams.
num_corr_indices * 16 * 16; // NAN
int corr_img_size = num_corr_indices * 16 * 16; // NAN
int rslt_corr_size_td = num_corrs * m_tpParams.dtt_size2 * m_tpParams.dtt_size2;
float * cpu_corr_td = (float *)malloc(rslt_corr_size_td * sizeof(float));
int dtile_bytes = (m_tpParams.dtt_size2*m_tpParams.dtt_size2) * sizeof(float);
int * cpu_corr_indices = (int *) malloc(
m_tpParams.
num_corr_indices * sizeof(int));
int * cpu_corr_indices = (int *) malloc(num_corr_indices * sizeof(int));
checkCudaErrors(cudaMemcpy2D(
cpu_corr_td,
...
...
@@ -463,11 +662,11 @@ void TpHostGpu::saveInterCorrFile(
checkCudaErrors(cudaMemcpy(
cpu_corr_indices,
gpu_corr_indices,
m_tpParams.
num_corr_indices * sizeof(int),
num_corr_indices * sizeof(int),
cudaMemcpyDeviceToHost));
float * corr_img = getCorrTdImg(
corr_img_size, // int corr_img_size,
num_corr_indices, //int num_corr_indices,
cpu_corr_indices, // int * cpu_corr_indices,
gpu_corrs_td, // float * cpu_corr,
num_sel_sensors); // int num_sel_sensors)
...
...
@@ -476,7 +675,7 @@ void TpHostGpu::saveInterCorrFile(
corr_img, // float * data, // allocated array
corr_img_size, // int size, // length in elements
path); // const char * path) // file path
free(cpu_corr_indices);
free
(cpu_corr_indices);
free (corr_img);
free (cpu_corr_td);
}
...
...
@@ -484,21 +683,22 @@ void TpHostGpu::saveInterCorrFile(
void TpHostGpu::saveInterCorrIndicesFile(
const char * path, // "clt/aux_inter-indices.raw"
const char * prompt, // "interscene indices"
int * gpu_corr_indices,
int num_sel_sensors){
int num_corr_indices,
int * gpu_corr_indices,
int num_sel_sensors){
if (!path) return;
int * cpu_corr_indices = (int *) malloc(
m_tpParams.
num_corr_indices * sizeof(int));
int * cpu_corr_indices = (int *) malloc(num_corr_indices * sizeof(int));
checkCudaErrors(cudaMemcpy(
cpu_corr_indices,
gpu_corr_indices,
m_tpParams.
num_corr_indices * sizeof(int),
num_corr_indices * sizeof(int),
cudaMemcpyDeviceToHost));
int corr_index_img_length = m_tpParams.tilesx * m_tpParams.tilesy * (num_sel_sensors+1) ;
float *corr_index_img = (float *)malloc(corr_index_img_length * sizeof(float));
for (int i = 0; i < corr_index_img_length; i++){
corr_index_img[i] = NAN;
}
for (int ict = 0; ict <
m_tpParams.
num_corr_indices; ict++){
for (int ict = 0; ict < num_corr_indices; ict++){
int ctt = ( cpu_corr_indices[ict] >> m_tpParams.corr_ntile_shift); // CORR_NTILE_SHIFT);
int cpair = cpu_corr_indices[ict] & ((1 << m_tpParams.corr_ntile_shift) - 1);
if (cpair == 0xff){
...
...
@@ -514,7 +714,7 @@ void TpHostGpu::saveInterCorrIndicesFile(
corr_index_img_length, // int size, // length in elements
"clt/aux_inter-indices.raw"); // const char * path) // file path
free (corr_index_img);
free(cpu_corr_indices);
free
(cpu_corr_indices);
}
...
...
@@ -527,5 +727,6 @@ void TpHostGpu::gfree(struct CltExtra * p) {if (p) checkCudaErrors(cudaFree(p
void TpHostGpu::gfree(struct gc * p) {if (p) checkCudaErrors(cudaFree(p));p = {};}
void TpHostGpu::gfree(struct corr_vector * p) {if (p) checkCudaErrors(cudaFree(p));p = {};}
void TpHostGpu::gfree(struct trot_deriv * p) {if (p) checkCudaErrors(cudaFree(p));p = {};}
void TpHostGpu::gfree(float ** p) {if (p) checkCudaErrors(cudaFree(p));p = {};}
src/TpHostGpu.h
View file @
dc090454
...
...
@@ -115,20 +115,32 @@ public:
void
setCltBuffers
();
void
setCorrImgBuffers
();
void
setImgBuffers
();
void
setImgBuffersShifted
(
int
is_bayer
,
int
image_dx
,
int
image_dy
);
void
setGeometryCorrectionBuffers
();
void
setCorrelationBuffers
();
void
setTasks
(
const
float
target_disparity
,
const
float
scale
);
void
setTextures
();
void
setRGBA
();
void
testCorrelate2DIntra
(
int
num_runs
);
void
testRotMatrices
(
int
num_runs
);
// 424
void
testReverseDistortions
(
int
num_runs
);
// 468
void
testGeomCorrect
(
int
num_runs
);
// 534
void
testConvertDirect
(
int
num_runs
);
// 608
void
testImclt
(
int
num_runs
);
// 682
void
testImcltRbgAll
(
int
num_runs
);
// 701
void
testCorrelate2DIntra
(
int
num_runs
);
void
testCorrelate2DInterSelf
(
int
num_runs
);
// for both intra and inter!
void
saveIntraCorrFile
(
const
char
*
path
,
const
char
*
prompt
,
int
num_corrs
,
float
*
gpu_corrs
,
int
*
gpu_corr_indices
,
int
num_sel_sensors
);
void
saveInterCorrFile
(
const
char
*
path
,
const
char
*
prompt
,
int
num_corrs
,
float
*
gpu_corrs_td
,
int
*
gpu_corr_indices
,
int
num_sel_sensors
);
void
saveInterCorrIndicesFile
(
const
char
*
path
,
const
char
*
prompt
,
int
*
gpu_corr_indices
,
int
num_sel_sensors
);
void
saveIntraCorrFile
(
const
char
*
path
,
const
char
*
prompt
,
int
num_corrs
,
int
num_corr_indices
,
float
*
gpu_corrs
,
int
*
gpu_corr_indices
,
int
num_sel_sensors
);
void
saveInterCorrFile
(
const
char
*
path
,
const
char
*
prompt
,
int
num_corrs
,
int
num_corr_indices
,
float
*
gpu_corrs_td
,
int
*
gpu_corr_indices
,
int
num_sel_sensors
);
void
saveInterCorrIndicesFile
(
const
char
*
path
,
const
char
*
prompt
,
int
num_corr_indices
,
int
*
gpu_corr_indices
,
int
num_sel_sensors
);
private
:
float
*
getCorrImg
(
int
corr_img_size
,
int
*
cpu_corr_indices
,
float
*
cpu_corr
,
int
num_sel_sensors
);
float
*
getCorrTdImg
(
int
corr_img_size
,
int
*
cpu_corr_indices
,
float
*
cpu_corr_td
,
int
num_sel_sensors
);
float
*
getCorrImg
(
int
corr_img_size
,
int
num_corr_indices
,
int
*
cpu_corr_indices
,
float
*
cpu_corr
,
int
num_sel_sensors
);
float
*
getCorrTdImg
(
int
corr_img_size
,
int
num_corr_indices
,
int
*
cpu_corr_indices
,
float
*
cpu_corr_td
,
int
num_sel_sensors
);
void
hfree
(
float
*
p
);
// {if (p) free (p);}
void
hfree
(
struct
CltExtra
*
p
);
void
gfree
(
float
*
p
);
...
...
@@ -138,6 +150,7 @@ private:
void
gfree
(
struct
gc
*
p
);
void
gfree
(
struct
corr_vector
*
p
);
void
gfree
(
struct
trot_deriv
*
p
);
void
gfree
(
float
**
p
);
};
...
...
src/TpParams.cu
View file @
dc090454
...
...
@@ -39,7 +39,7 @@ TpParams::TpParams(int lwir){
corr_size = 2 * corr_out_rad + 1;
corr_length = corr_size * corr_size;
num_tiles = tp_tasks_size;
num_corr_indices = num_pairs * num_tiles;
//
num_corr_indices = num_pairs * num_tiles;
}
src/TpParams.h
View file @
dc090454
...
...
@@ -22,7 +22,7 @@ public:
static
constexpr
int
img_width
=
IMG_WIDTH
;
static
constexpr
int
img_height
=
IMG_HEIGHT
;
static
constexpr
int
kernels_hor
=
KERNELS_HOR
;
static
constexpr
int
kernel
_vert
=
KERNELS_VERT
;
static
constexpr
int
kernel
s_vert
=
KERNELS_VERT
;
static
constexpr
int
task_inter_en
=
TASK_INTER_EN
;
// 10 // Task bit to enable interscene correlation
static
constexpr
int
task_corr_en
=
TASK_CORR_EN
;
// 9 // Task bit to enable intrascene correlation (pairs defined separately)
...
...
@@ -90,7 +90,7 @@ public:
int
num_tiles
{};
int
corr_size
{};
int
corr_length
{};
int
num_corr_indices
{};
// int num_corr_indices{}; // removing - different length for intra/inter
// std::vector<float[2]> m_port_offsets;
...
...
src/test_tp.cu
View file @
dc090454
...
...
@@ -463,6 +463,8 @@ int main(int argc, char **argv)
#endif // TEST_ROT_MATRICES
#define TEST_REVERSE_DISTORTIONS
#ifdef TEST_REVERSE_DISTORTIONS
dim3 threads_rd(3,3,3);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment