Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
6f9c7399
Commit
6f9c7399
authored
Apr 09, 2025
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
More code reimplemented as methods
parent
14d7689b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
74 additions
and
17 deletions
+74
-17
TpHostGpu.cu
src/TpHostGpu.cu
+68
-12
TpHostGpu.h
src/TpHostGpu.h
+4
-2
test_tp.cu
src/test_tp.cu
+2
-3
No files found.
src/TpHostGpu.cu
View file @
6f9c7399
...
...
@@ -556,13 +556,13 @@ void TpHostGpu::saveClt(
if (!paths) return;
int rslt_size = (m_tpParams.tilesy * m_tpParams.tilesx * m_tpParams.num_colors * 4 * m_tpParams.dtt_size * m_tpParams.dtt_size);
float * cpu_clt = (float *)malloc(rslt_size*sizeof(float));
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) if (paths[ncam]){
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) if (paths[ncam]
&& m_gpu_clt_h[ncam]
){
checkCudaErrors(cudaMemcpy( // segfault
cpu_clt,
m_gpu_clt_h[ncam],
rslt_size * sizeof(float),
cudaMemcpyDeviceToHost));
printf("Writing
CLT data to %s\n",
paths[ncam]);
printf("Writing
%s to %s\n", prompt,
paths[ncam]);
writeFloatsToFile(cpu_clt, // float * data, // allocated array
rslt_size, // int size, // length in elements
paths[ncam]); // const char * path) // file path
...
...
@@ -570,28 +570,84 @@ void TpHostGpu::saveClt(
hfree(cpu_clt);
}
void TpHostGpu::saveRgb(
const char ** paths, // m_tpPaths.result_rbg_file
const char * prompt, // "RBG data"
float ** gpu_corr_images_h){
if (!paths) return;
int rslt_img_size = m_tpParams.num_colors * (m_tpParams.img_height + m_tpParams.dtt_size) * (m_tpParams.img_width + m_tpParams.dtt_size);
float * cpu_corr_image = (float *)malloc(rslt_img_size * sizeof(float));
for (int ncam = 0; ncam < m_tpParams.num_cams; ncam++) if (paths[ncam] && gpu_corr_images_h[ncam]){
checkCudaErrors(cudaMemcpy2D( // segfault
cpu_corr_image,
(m_tpParams.img_width + m_tpParams.dtt_size) * sizeof(float),
gpu_corr_images_h[ncam],
dstride_rslt,
(m_tpParams.img_width + m_tpParams.dtt_size) * sizeof(float),
m_tpParams.num_colors* (m_tpParams.img_height + m_tpParams.dtt_size),
cudaMemcpyDeviceToHost));
printf("Writing %s to %s\n", prompt, paths[ncam]);
writeFloatsToFile( // will have margins
cpu_corr_image, // float * data, // allocated array
rslt_img_size, // int size, // length in elements
paths[ncam]); // const char * path) // file path
}
free(cpu_corr_image);
}
/*
// not implemented
void TpHostGpu::testImclt (int num_runs){ // 682
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_geometry_correction){
throw std::runtime_error("Error: m_gpu_geometry_correction is not initialized ");
}
/*
*/
}
*/
void TpHostGpu::testImcltRbgAll (int num_runs){ // 701
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
if (!m_gpu_
geometry_correction
){
throw std::runtime_error("Error: m_gpu_
geometry_correction
is not initialized ");
if (!m_gpu_
clt
){
throw std::runtime_error("Error: m_gpu_
clt
is not initialized ");
}
/*
*/
if (!m_gpu_corr_images){
throw std::runtime_error("Error: m_gpu_corr_images is not initialized ");
}
StopWatchInterface *timerIMCLT = 0;
sdkCreateTimer(&timerIMCLT);
for (int i = i0; i < numIterations; i++) {
if (i == 0) {
checkCudaErrors(cudaDeviceSynchronize());
sdkResetTimer(&timerIMCLT);
sdkStartTimer(&timerIMCLT);
}
dim3 threads_imclt_all(1, 1, 1);
dim3 grid_imclt_all(1, 1, 1);
printf("threads_imclt_all=(%d, %d, %d)\n",threads_imclt_all.x,threads_imclt_all.y,threads_imclt_all.z);
printf("grid_imclt_all= (%d, %d, %d)\n",grid_imclt_all.x, grid_imclt_all.y, grid_imclt_all.z);
imclt_rbg_all<<<grid_imclt_all,threads_imclt_all>>>(
m_tpParams.num_cams, // int num_cams,
m_gpu_clt, // float ** gpu_clt, // [num_cams][TILESY][TILESX][num_colors][DTT_SIZE*DTT_SIZE]
m_gpu_corr_images, // float ** gpu_corr_images, // [num_cams][WIDTH, 3 * HEIGHT]
1, // int apply_lpf,
m_tpParams.num_colors, // int colors, // defines lpf filter
m_tpParams.tilesx, // TILESX, // int woi_twidth,
m_tpParams.tilesy, // TILESY, // int woi_theight,
dstride_rslt/sizeof(float)); // const size_t dstride); // in floats (pixels)
getLastCudaError("Kernel failure");
checkCudaErrors(cudaDeviceSynchronize());
printf("test pass: %d\n",i);
}
// TODO: *** Stop here for initial testing ***
sdkStopTimer(&timerIMCLT);
float avgTimeIMCLT = (float)sdkGetTimerValue(&timerIMCLT) / (float)numIterations;
sdkDeleteTimer(&timerIMCLT);
printf("Average imclt_rbg_all run time =%f ms\n", avgTimeIMCLT);
saveRgb(
m_tpPaths.result_rbg_file, // const char ** paths, // m_tpPaths.result_rbg_file
"RBG data", // const char * prompt, // "RBG data"
m_gpu_corr_images_h); // float ** gpu_corr_images_h){
}
void TpHostGpu::testCorrelate2DIntra(int num_runs){
...
...
src/TpHostGpu.h
View file @
6f9c7399
...
...
@@ -126,14 +126,16 @@ public:
void
testReverseDistortions
(
int
num_runs
);
// 468
void
testGeomCorrect
(
int
num_runs
);
// 534
void
testConvertDirect
(
int
num_runs
);
// 608
void
testImclt
(
int
num_runs
);
// 682
// void testImclt (int num_runs); // 682 // not implemented
void
testImcltRbgAll
(
int
num_runs
);
// 701
void
testCorrelate2DIntra
(
int
num_runs
);
void
testCorrelate2DInterSelf
(
int
num_runs
);
void
saveClt
(
const
char
**
paths
,
const
char
*
prompt
,
float
**
gpu_clt_h
);
void
saveRgb
(
const
char
**
paths
,
const
char
*
prompt
,
float
**
gpu_corr_images_h
);
// for both intra and inter!
void
saveIntraCorrFile
(
const
char
*
path
,
const
char
*
prompt
,
int
num_corrs
,
int
num_corr_indices
,
float
*
gpu_corrs
,
int
*
gpu_corr_indices
,
int
num_sel_sensors
);
void
saveInterCorrFile
(
const
char
*
path
,
const
char
*
prompt
,
int
num_corrs
,
int
num_corr_indices
,
float
*
gpu_corrs_td
,
int
*
gpu_corr_indices
,
int
num_sel_sensors
);
...
...
src/test_tp.cu
View file @
6f9c7399
...
...
@@ -678,8 +678,8 @@ int main(int argc, char **argv)
tpPaths.ports_clt_file[ncam]); // const char * path) // file path
}
#endif
#ifdef TEST_IMCLT
// test_imclt does not exist
{
// testing imclt
dim3 threads_imclt(IMCLT_THREADS_PER_TILE, IMCLT_TILES_PER_BLOCK, 1);
...
...
@@ -700,7 +700,6 @@ int main(int argc, char **argv)
StopWatchInterface *timerIMCLT = 0;
sdkCreateTimer(&timerIMCLT);
for (int i = i0; i < numIterations; i++)
{
if (i == 0)
...
...
@@ -732,7 +731,7 @@ int main(int argc, char **argv)
sdkStopTimer(&timerIMCLT);
float avgTimeIMCLT = (float)sdkGetTimerValue(&timerIMCLT) / (float)numIterations;
sdkDeleteTimer(&timerIMCLT);
printf("Average
IMCLT
run time =%f ms\n", avgTimeIMCLT);
printf("Average
imclt_rbg_all
run time =%f ms\n", avgTimeIMCLT);
int rslt_img_size = tpParams.num_colors * (IMG_HEIGHT + DTT_SIZE) * (IMG_WIDTH + DTT_SIZE);
float * cpu_corr_image = (float *)malloc(rslt_img_size * sizeof(float));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment