Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
3610b7a6
Commit
3610b7a6
authored
Apr 10, 2025
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
testCorrelate2DIntraTD
parent
6f9c7399
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
352 additions
and
142 deletions
+352
-142
TpHostGpu.cu
src/TpHostGpu.cu
+331
-117
TpHostGpu.h
src/TpHostGpu.h
+5
-2
test_tp.cu
src/test_tp.cu
+16
-23
No files found.
src/TpHostGpu.cu
View file @
3610b7a6
This diff is collapsed.
Click to expand it.
src/TpHostGpu.h
View file @
3610b7a6
...
...
@@ -129,8 +129,11 @@ public:
// void testImclt (int num_runs); // 682 // not implemented
void
testImcltRbgAll
(
int
num_runs
);
// 701
void
testCorrelate2DIntra
(
int
num_runs
);
void
testCorrelate2DInterSelf
(
int
num_runs
);
void
testCorrelate2DIntra
(
int
num_runs
);
// 762 - 885
void
testCorrelate2DInterSelf
(
int
num_runs
);
// 1136 - 1411
void
testCorrelate2DIntraTD
(
int
num_runs
,
int
quad_combine
);
// 886 - 1123
void
saveClt
(
const
char
**
paths
,
const
char
*
prompt
,
float
**
gpu_clt_h
);
void
saveRgb
(
const
char
**
paths
,
const
char
*
prompt
,
float
**
gpu_corr_images_h
);
...
...
src/test_tp.cu
View file @
3610b7a6
...
...
@@ -32,7 +32,7 @@
// all of the next 5 were disabled
//#define NOCORR
#define NOCORR_TD
//
#define NOCORR_TD
#define NOTEXTURES //
#define NOTEXTURE_RGBA //
//#define NOTEXTURE_RGBAXXX //
...
...
@@ -883,10 +883,8 @@ int main(int argc, char **argv)
free (corr_img);
#endif // ifndef NOCORR
#ifndef NOCORR_TD
//#define QUAD_COMBINE
// cudaProfilerStart();
// testing corr
StopWatchInterface *timerCORRTD = 0;
...
...
@@ -903,10 +901,10 @@ int main(int argc, char **argv)
// FIXME: provide sel_pairs
correlate2D<<<1,1>>>( // output TD tiles, no normalization
tpParams.num_cams, // int num_cams,
T
pParams.sel_pairs[0], // int sel_pairs0 // unused bits should be 0
T
pParams.sel_pairs[1], // int sel_pairs1, // unused bits should be 0
T
pParams.sel_pairs[2], // int sel_pairs2, // unused bits should be 0
T
pParams.sel_pairs[3], // int sel_pairs3, // unused bits should be 0
t
pParams.sel_pairs[0], // int sel_pairs0 // unused bits should be 0
t
pParams.sel_pairs[1], // int sel_pairs1, // unused bits should be 0
t
pParams.sel_pairs[2], // int sel_pairs2, // unused bits should be 0
t
pParams.sel_pairs[3], // int sel_pairs3, // unused bits should be 0
gpu_clt, // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
tpParams.num_colors, // int colors, // number of colors (3/1)
tpParams.color_weights[0], // 0.25, // float scale0, // scale for R
...
...
@@ -1019,20 +1017,18 @@ int main(int argc, char **argv)
// int rslt_corr_size = num_corrs * corr_size * corr_size;
// float * cpu_corr = (float *)malloc(rslt_corr_size * sizeof(float));
rslt_corr_size = num_corrs *
corr_length * corr_length
;
rslt_corr_size = num_corrs *
tpParams.corr_length; // corr_size * corr_size
;
corr_img_size = num_corr_indices * 16*16; // NAN
corr_img = (float *)malloc(corr_img_size * sizeof(float));
cpu_corr = (float *)malloc(rslt_corr_size * sizeof(float));
cpu_corr_indices = (int *) malloc(num_corr_indices * sizeof(int));
checkCudaErrors(cudaMemcpy2D(
cpu_corr,
(corr_length * corr_length)
* sizeof(float),
tpParams.corr_length
* sizeof(float),
gpu_corrs,
dstride_corr,
(corr_length * corr_length)
* sizeof(float),
tpParams.corr_length
* sizeof(float),
num_corrs,
cudaMemcpyDeviceToHost));
// checkCudaErrors (cudaMalloc((void **)&gpu_corr_indices, num_pairs * TILESX * TILESY*sizeof(int)));
...
...
@@ -1056,13 +1052,13 @@ int main(int argc, char **argv)
int ty = ctt / TILESX;
int tx = ctt % TILESX;
// int src_offs0 = ict * tpParams.num_pairs * corr_size * corr_size;
int src_offs0 = ict *
corr_length *
corr_length;
int src_offs0 = ict *
tpParams.
corr_length;
int dst_offs0 = cpair * (num_tiles * 16 * 16) + (ty * 16 * TILESX * 16) + (tx * 16);
for (int iy = 0; iy <
corr_length
; iy++){
int src_offs = src_offs0 + iy *
corr_length
; // ict * tpParams.num_pairs * corr_size * corr_size;
for (int iy = 0; iy <
tpParams.corr_size
; iy++){
int src_offs = src_offs0 + iy *
tpParams.corr_size
; // ict * tpParams.num_pairs * corr_size * corr_size;
int dst_offs = dst_offs0 + iy * (TILESX * 16);
for (int ix = 0; ix <
corr_length
; ix++){
for (int ix = 0; ix <
tpParams.corr_size
; ix++){
corr_img[dst_offs++] = cpu_corr[src_offs++];
}
}
...
...
@@ -1116,15 +1112,9 @@ int main(int argc, char **argv)
"clt/aux_intrascene-TD.raw"); // const char * path) // file path
#endif
free (cpu_corr_td);
#endif // if 1
// reuse image, export TD data
free (cpu_corr);
free (cpu_corr_indices);
free (corr_img);
...
...
@@ -1135,6 +1125,9 @@ int main(int argc, char **argv)
// Testing "interframe" correlation with itself, assuming direct convert already ran
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment