Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
d8e9a454
Commit
d8e9a454
authored
Apr 15, 2025
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
debugged initially with jcuda
parent
414f6351
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
89 additions
and
1809 deletions
+89
-1809
TpHostGpu.cu
src/TpHostGpu.cu
+25
-15
TpHostGpu.h
src/TpHostGpu.h
+19
-19
TpParams.h
src/TpParams.h
+1
-1
geometry_correction.cu
src/geometry_correction.cu
+16
-5
geometry_correction.h
src/geometry_correction.h
+5
-0
test_tp.cu
src/test_tp.cu
+23
-1769
No files found.
src/TpHostGpu.cu
View file @
d8e9a454
...
@@ -5,15 +5,21 @@
...
@@ -5,15 +5,21 @@
* Author: elphel
* Author: elphel
*/
*/
#include <stdexcept>
#include <stdexcept>
#include <helper_cuda.h> // for checkCudaErrors
//#include <driver_types.h> // was not needed before, only for indexes - needs __DRIVER_TYPES_H__
#include <cuda_runtime.h> // cudaFree
#include <cstdlib>
#include <cstdio>
#include <cuda_runtime.h> // cudaFree
#include <helper_cuda.h> // for checkCudaErrors
#include <helper_functions.h> // timer functions
#include <helper_functions.h> // timer functions
//#include "TpParams.h" // TpHostGpu.h has it
//#include "TpParams.h" // TpHostGpu.h has it
#include "tp_paths.h"
#include "tp_paths.h"
#include "tp_files.h"
#include "tp_files.h"
#include "tp_utils.h" // for copyalloc_kernel_gpu
#include "tp_utils.h" // for copyalloc_kernel_gpu
#include "GenerateRgbaHost.h"
//
#include "GenerateRgbaHost.h"
#include "TpHostGpu.h"
#include "TpHostGpu.h"
#define MY_EXCEPTION(aMessage) \
#define MY_EXCEPTION(aMessage) \
...
@@ -79,6 +85,7 @@ void TpHostGpu::allTests(
...
@@ -79,6 +85,7 @@ void TpHostGpu::allTests(
int image_dy,
int image_dy,
const float target_disparity,
const float target_disparity,
const float scale,
const float scale,
const float fat_zero, // 1000.0
int quad_combine,
int quad_combine,
int use_dp,
int use_dp,
int debug){
int debug){
...
@@ -100,10 +107,10 @@ void TpHostGpu::allTests(
...
@@ -100,10 +107,10 @@ void TpHostGpu::allTests(
testConvertDirect (num_runs); // 608
testConvertDirect (num_runs); // 608
testImcltRbgAll (num_runs); // 701
testImcltRbgAll (num_runs); // 701
testCorrelate2DIntra (num_runs); // 762 - 885
testCorrelate2DIntra (num_runs
, fat_zero
); // 762 - 885
testCorrelate2DIntraTD (num_runs, quad_combine); // 886 - 1123
testCorrelate2DIntraTD (num_runs,
fat_zero,
quad_combine); // 886 - 1123
setImgBuffersShifted(is_bayer, image_dx, image_dy); // 1171-1188
setImgBuffersShifted(is_bayer, image_dx, image_dy); // 1171-1188
testCorrelate2DInterSelf(num_runs); // 1136 - 1411
testCorrelate2DInterSelf(num_runs
, fat_zero
); // 1136 - 1411
testTextures (num_runs, use_dp, debug); // 1422-1664
testTextures (num_runs, use_dp, debug); // 1422-1664
testTexturesRGBA (num_runs, use_dp, debug); // 1669-1810
testTexturesRGBA (num_runs, use_dp, debug); // 1669-1810
return;
return;
...
@@ -696,7 +703,7 @@ void TpHostGpu::testImcltRbgAll (int num_runs){ // 701
...
@@ -696,7 +703,7 @@ void TpHostGpu::testImcltRbgAll (int num_runs){ // 701
m_gpu_corr_images_h); // float ** gpu_corr_images_h){
m_gpu_corr_images_h); // float ** gpu_corr_images_h){
}
}
void TpHostGpu::testCorrelate2DIntra(int num_runs){
void TpHostGpu::testCorrelate2DIntra(int num_runs
, float fat_zero
){
int num_corr_indices = m_tpParams.num_pairs * m_tpParams.num_tiles;
int num_corr_indices = m_tpParams.num_pairs * m_tpParams.num_tiles;
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
int i0 = m_tpParams.debug_tile ? 0 : -1;
...
@@ -730,7 +737,7 @@ void TpHostGpu::testCorrelate2DIntra(int num_runs){
...
@@ -730,7 +737,7 @@ void TpHostGpu::testCorrelate2DIntra(int num_runs){
m_tpParams.color_weights[0], // 0.25, // float scale0, // scale for R
m_tpParams.color_weights[0], // 0.25, // float scale0, // scale for R
m_tpParams.color_weights[1], // 0.25, // float scale1, // scale for B
m_tpParams.color_weights[1], // 0.25, // float scale1, // scale for B
m_tpParams.color_weights[2], // 0.5, // float scale2, // scale for G
m_tpParams.color_weights[2], // 0.5, // float scale2, // scale for G
m_tpParams.fat_zero * m_tpParams.fat_zero,
// float fat_zero2, // here - absolute
fat_zero * fat_zero,
// float fat_zero2, // here - absolute
m_gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
m_gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
m_tpParams.tp_tasks_size, // int num_tiles) // number of tiles in task
m_tpParams.tp_tasks_size, // int num_tiles) // number of tiles in task
m_tpParams.tilesx, // int tilesx, // number of tile rows
m_tpParams.tilesx, // int tilesx, // number of tile rows
...
@@ -764,7 +771,7 @@ void TpHostGpu::testCorrelate2DIntra(int num_runs){
...
@@ -764,7 +771,7 @@ void TpHostGpu::testCorrelate2DIntra(int num_runs){
16); //int num_sel_sensors) { // only for interscene
16); //int num_sel_sensors) { // only for interscene
}
}
void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886 - 1123
void TpHostGpu::testCorrelate2DIntraTD (int num_runs,
float fat_zero,
int quad_combine){ // 886 - 1123
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
int i0 = m_tpParams.debug_tile ? 0 : -1;
// check/replace names
// check/replace names
...
@@ -817,7 +824,7 @@ void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886
...
@@ -817,7 +824,7 @@ void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886
m_tpParams.color_weights[0], // 0.25, // float scale0, // scale for R
m_tpParams.color_weights[0], // 0.25, // float scale0, // scale for R
m_tpParams.color_weights[1], // 0.25, // float scale1, // scale for B
m_tpParams.color_weights[1], // 0.25, // float scale1, // scale for B
m_tpParams.color_weights[2], // 0.5, // float scale2, // scale for G
m_tpParams.color_weights[2], // 0.5, // float scale2, // scale for G
m_tpParams.fat_zero * m_tpParams.fat_zero,
// float fat_zero2, // here - absolute
fat_zero * fat_zero,
// float fat_zero2, // here - absolute
m_gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
m_gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
m_tpParams.tp_tasks_size, // int num_tiles) // number of tiles in task
m_tpParams.tp_tasks_size, // int num_tiles) // number of tiles in task
m_tpParams.tilesx, // int tilesx, // number of tile rows
m_tpParams.tilesx, // int tilesx, // number of tile rows
...
@@ -856,7 +863,7 @@ void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886
...
@@ -856,7 +863,7 @@ void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886
(float *) 0, // float * corr_weights, // null or per-tile weight (fat_zero2 will be divided by it)
(float *) 0, // float * corr_weights, // null or per-tile weight (fat_zero2 will be divided by it)
dstride_corr_combo/sizeof(float), // const size_t corr_stride, // in floats
dstride_corr_combo/sizeof(float), // const size_t corr_stride, // in floats
m_gpu_corrs_combo, // float * gpu_corrs, // correlation output data (pixel domain)
m_gpu_corrs_combo, // float * gpu_corrs, // correlation output data (pixel domain)
m_tpParams.fat_zero * m_tpParams.fat_zero,
// float fat_zero2, // here - absolute
fat_zero * fat_zero,
// float fat_zero2, // here - absolute
m_tpParams.corr_out_rad); // int corr_radius); // radius of the output correlation (7 for 15x15)
m_tpParams.corr_out_rad); // int corr_radius); // radius of the output correlation (7 for 15x15)
printf("corr2D_combine pass: %d\n",i);
printf("corr2D_combine pass: %d\n",i);
}else { // if (quad_combine) {
}else { // if (quad_combine) {
...
@@ -868,7 +875,7 @@ void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886
...
@@ -868,7 +875,7 @@ void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886
(float *) 0, // corr_weights, // float * corr_weights, // null or per-tile weight (fat_zero2 will be divided by it)
(float *) 0, // corr_weights, // float * corr_weights, // null or per-tile weight (fat_zero2 will be divided by it)
dstride_corr/sizeof(float), // const size_t corr_stride, // in floats
dstride_corr/sizeof(float), // const size_t corr_stride, // in floats
m_gpu_corrs, // float * gpu_corrs, // correlation output data (pixel domain)
m_gpu_corrs, // float * gpu_corrs, // correlation output data (pixel domain)
m_tpParams.fat_zero * m_tpParams.fat_zero,
// float fat_zero2, // here - absolute
fat_zero * fat_zero,
// float fat_zero2, // here - absolute
m_tpParams.corr_out_rad); // int corr_radius); // radius of the output correlation (7 for 15x15)
m_tpParams.corr_out_rad); // int corr_radius); // radius of the output correlation (7 for 15x15)
} // if (quad_combine) {
} // if (quad_combine) {
...
@@ -971,7 +978,7 @@ void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886
...
@@ -971,7 +978,7 @@ void TpHostGpu::testCorrelate2DIntraTD (int num_runs, int quad_combine){ // 886
} // if (quad_combine) {
} // if (quad_combine) {
}
}
void TpHostGpu::testCorrelate2DInterSelf(int num_runs){ // 889
void TpHostGpu::testCorrelate2DInterSelf(int num_runs
, float fat_zero
){ // 889
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int numIterations = m_tpParams.debug_tile ? 1 : num_runs;
int i0 = m_tpParams.debug_tile ? 0 : -1;
int i0 = m_tpParams.debug_tile ? 0 : -1;
// check/replace names
// check/replace names
...
@@ -1087,7 +1094,7 @@ void TpHostGpu::testCorrelate2DInterSelf(int num_runs){ // 889
...
@@ -1087,7 +1094,7 @@ void TpHostGpu::testCorrelate2DInterSelf(int num_runs){ // 889
(float *) 0, // corr_weights, // float * corr_weights, // null or per-tile weight (fat_zero2 will be divided by it)
(float *) 0, // corr_weights, // float * corr_weights, // null or per-tile weight (fat_zero2 will be divided by it)
dstride_corr/sizeof(float), // const size_t corr_stride, // in floats
dstride_corr/sizeof(float), // const size_t corr_stride, // in floats
m_gpu_corrs, // float * gpu_corrs, // correlation output data (pixel domain)
m_gpu_corrs, // float * gpu_corrs, // correlation output data (pixel domain)
m_tpParams.fat_zero * m_tpParams.fat_zero,
// float fat_zero2, // here - absolute
fat_zero * fat_zero,
// float fat_zero2, // here - absolute
m_tpParams.corr_out_rad); // int corr_radius); // radius of the output correlation (7 for 15x15)
m_tpParams.corr_out_rad); // int corr_radius); // radius of the output correlation (7 for 15x15)
getLastCudaError("Kernel failure:corr2D_normalize");
getLastCudaError("Kernel failure:corr2D_normalize");
checkCudaErrors(cudaDeviceSynchronize());
checkCudaErrors(cudaDeviceSynchronize());
...
@@ -1440,6 +1447,8 @@ void TpHostGpu::testTexturesRGBA (
...
@@ -1440,6 +1447,8 @@ void TpHostGpu::testTexturesRGBA (
printf("\n1. shared_size=%d, num_cams=%d, colors=%d\n",shared_size, m_tpParams.num_cams, m_tpParams.texture_colors);
printf("\n1. shared_size=%d, num_cams=%d, colors=%d\n",shared_size, m_tpParams.num_cams, m_tpParams.texture_colors);
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributeMaxDynamicSharedMemorySize, shared_size); // 60000); // 5536); // for CC 7.5
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributeMaxDynamicSharedMemorySize, shared_size); // 60000); // 5536); // for CC 7.5
// was not here - next line
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributePreferredSharedMemoryCarveout,cudaSharedmemCarveoutMaxShared);
generate_RBGA<<<1,1>>> (
generate_RBGA<<<1,1>>> (
m_tpParams.num_cams, // int num_cams, // number of cameras used
m_tpParams.num_cams, // int num_cams, // number of cameras used
// Parameters to generate texture tasks
// Parameters to generate texture tasks
...
@@ -1788,6 +1797,8 @@ float * TpHostGpu::getCorrTdImg(
...
@@ -1788,6 +1797,8 @@ float * TpHostGpu::getCorrTdImg(
return corr_img;
return corr_img;
}
}
//void TpHostGpu::generate_RBGA_host(
// static // https://stackoverflow.com/questions/15725922/static-function-a-storage-class-may-not-be-specified-here
void TpHostGpu::generate_RBGA_host(
void TpHostGpu::generate_RBGA_host(
int num_cams, // number of cameras used
int num_cams, // number of cameras used
// Parameters to generate texture tasks
// Parameters to generate texture tasks
...
@@ -2031,7 +2042,6 @@ void TpHostGpu::generate_RBGA_host(
...
@@ -2031,7 +2042,6 @@ void TpHostGpu::generate_RBGA_host(
pntt, // ntt, // int * num_texture_tiles, // number of texture tiles to process
pntt, // ntt, // int * num_texture_tiles, // number of texture tiles to process
ti_offset, // gpu_texture_indices_offset,// add to gpu_texture_indices
ti_offset, // gpu_texture_indices_offset,// add to gpu_texture_indices
gpu_texture_indices, // + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_texture_indices, // + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
// gpu_texture_indices + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
colors, // int colors, // number of colors (3/1)
colors, // int colors, // number of colors (3/1)
is_lwir, // int is_lwir, // do not perform shot correction
is_lwir, // int is_lwir, // do not perform shot correction
...
...
src/TpHostGpu.h
View file @
d8e9a454
...
@@ -117,6 +117,7 @@ public:
...
@@ -117,6 +117,7 @@ public:
int
image_dy
,
// 0
int
image_dy
,
// 0
const
float
target_disparity
,
// DBG_DISPARITY == 0.0
const
float
target_disparity
,
// DBG_DISPARITY == 0.0
const
float
scale
,
// 0.0
const
float
scale
,
// 0.0
const
float
fat_zero
,
// 1000.0
int
quad_combine
,
int
quad_combine
,
int
use_dp
,
int
use_dp
,
int
debug
);
int
debug
);
...
@@ -138,27 +139,13 @@ public:
...
@@ -138,27 +139,13 @@ public:
// void testImclt (int num_runs); // 682 // not implemented
// void testImclt (int num_runs); // 682 // not implemented
void
testImcltRbgAll
(
int
num_runs
);
// 701
void
testImcltRbgAll
(
int
num_runs
);
// 701
void
testCorrelate2DIntra
(
int
num_runs
);
// 762 - 885
void
testCorrelate2DIntra
(
int
num_runs
,
float
fat_zero
);
// 762 - 885
void
testCorrelate2DIntraTD
(
int
num_runs
,
int
quad_combine
);
// 886 - 1123
void
testCorrelate2DIntraTD
(
int
num_runs
,
float
fat_zero
,
int
quad_combine
);
// 886 - 1123
//void setImgBuffersShifted(int is_bayer, int image_dx, int image_dy); // 1171-1188
//void setImgBuffersShifted(int is_bayer, int image_dx, int image_dy); // 1171-1188
void
testCorrelate2DInterSelf
(
int
num_runs
);
// 1136 - 1411
void
testCorrelate2DInterSelf
(
int
num_runs
,
float
fat_zero
);
// 1136 - 1411
void
testTextures
(
int
num_runs
,
int
use_dp
,
int
debug
);
// 1422-1664
void
testTextures
(
int
num_runs
,
int
use_dp
,
int
debug
);
// 1422-1664
void
testTexturesRGBA
(
int
num_runs
,
int
use_dp
,
int
debug
);
// 1669-1810
void
testTexturesRGBA
(
int
num_runs
,
int
use_dp
,
int
debug
);
// 1669-1810
static
void
generate_RBGA_host
(
// not a member
private
:
void
saveClt
(
const
char
**
paths
,
const
char
*
prompt
,
float
**
gpu_clt_h
);
void
saveRgb
(
const
char
**
paths
,
const
char
*
prompt
,
float
**
gpu_corr_images_h
);
// for both intra and inter!
void
saveIntraCorrFile
(
const
char
*
path
,
const
char
*
prompt
,
int
num_corrs
,
int
num_corr_indices
,
float
*
gpu_corrs
,
int
*
gpu_corr_indices
,
int
num_sel_sensors
);
void
saveInterCorrFile
(
const
char
*
path
,
const
char
*
prompt
,
int
num_corrs
,
int
num_corr_indices
,
float
*
gpu_corrs_td
,
int
*
gpu_corr_indices
,
int
num_sel_sensors
);
void
saveInterCorrIndicesFile
(
const
char
*
path
,
const
char
*
prompt
,
int
num_corr_indices
,
int
*
gpu_corr_indices
,
int
num_sel_sensors
);
float
*
getCorrImg
(
int
corr_img_size
,
int
num_corr_indices
,
int
*
cpu_corr_indices
,
float
*
cpu_corr
,
int
num_sel_sensors
);
float
*
getCorrTdImg
(
int
corr_img_size
,
int
num_corr_indices
,
int
*
cpu_corr_indices
,
float
*
cpu_corr_td
,
int
num_sel_sensors
);
void
generate_RBGA_host
(
// not a member
int
num_cams
,
// number of cameras used
int
num_cams
,
// number of cameras used
// Parameters to generate texture tasks
// Parameters to generate texture tasks
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16p// struct tp_task * gpu_tasks,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16p// struct tp_task * gpu_tasks,
...
@@ -182,6 +169,19 @@ private:
...
@@ -182,6 +169,19 @@ private:
const
int
texture_rbga_stride
,
// in floats
const
int
texture_rbga_stride
,
// in floats
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
private
:
void
saveClt
(
const
char
**
paths
,
const
char
*
prompt
,
float
**
gpu_clt_h
);
void
saveRgb
(
const
char
**
paths
,
const
char
*
prompt
,
float
**
gpu_corr_images_h
);
// for both intra and inter!
void
saveIntraCorrFile
(
const
char
*
path
,
const
char
*
prompt
,
int
num_corrs
,
int
num_corr_indices
,
float
*
gpu_corrs
,
int
*
gpu_corr_indices
,
int
num_sel_sensors
);
void
saveInterCorrFile
(
const
char
*
path
,
const
char
*
prompt
,
int
num_corrs
,
int
num_corr_indices
,
float
*
gpu_corrs_td
,
int
*
gpu_corr_indices
,
int
num_sel_sensors
);
void
saveInterCorrIndicesFile
(
const
char
*
path
,
const
char
*
prompt
,
int
num_corr_indices
,
int
*
gpu_corr_indices
,
int
num_sel_sensors
);
float
*
getCorrImg
(
int
corr_img_size
,
int
num_corr_indices
,
int
*
cpu_corr_indices
,
float
*
cpu_corr
,
int
num_sel_sensors
);
float
*
getCorrTdImg
(
int
corr_img_size
,
int
num_corr_indices
,
int
*
cpu_corr_indices
,
float
*
cpu_corr_td
,
int
num_sel_sensors
);
void
hfree
(
float
*&
p
);
// {if (p) free (p);}
void
hfree
(
float
*&
p
);
// {if (p) free (p);}
void
hfree
(
struct
CltExtra
*&
p
);
void
hfree
(
struct
CltExtra
*&
p
);
void
gfree
(
float
*&
p
);
void
gfree
(
float
*&
p
);
...
@@ -193,9 +193,9 @@ private:
...
@@ -193,9 +193,9 @@ private:
void
gfree
(
struct
trot_deriv
*&
p
);
void
gfree
(
struct
trot_deriv
*&
p
);
void
gfree
(
float
**&
p
);
void
gfree
(
float
**&
p
);
void
gfree
(
struct
CltExtra
**&
p
);
void
gfree
(
struct
CltExtra
**&
p
);
};
};
#endif
/* SRC_TPHOSTGPU_H_ */
#endif
/* SRC_TPHOSTGPU_H_ */
src/TpParams.h
View file @
d8e9a454
...
@@ -49,7 +49,7 @@ public:
...
@@ -49,7 +49,7 @@ public:
static
constexpr
int
tp_task_centerxy_offset
=
TP_TASK_CENTERXY_OFFSET
;
// 3
static
constexpr
int
tp_task_centerxy_offset
=
TP_TASK_CENTERXY_OFFSET
;
// 3
static
constexpr
int
tp_task_scale_offset
=
TP_TASK_SCALE_OFFSET
;
// 5
static
constexpr
int
tp_task_scale_offset
=
TP_TASK_SCALE_OFFSET
;
// 5
static
constexpr
int
tp_task_xy_offset
=
TP_TASK_XY_OFFSET
;
// 6
static
constexpr
int
tp_task_xy_offset
=
TP_TASK_XY_OFFSET
;
// 6
static
constexpr
float
fat_zero
=
1000
.
0
f
;
// 300.0f; // 30.0;
//
static constexpr float fat_zero = 1000.0f; // 300.0f; // 30.0;
static
constexpr
int
convert_direct_indexing_threads
=
CONVERT_DIRECT_INDEXING_THREADS
;
//
static
constexpr
int
convert_direct_indexing_threads
=
CONVERT_DIRECT_INDEXING_THREADS
;
//
static
constexpr
int
convert_direct_indexing_threads_log2
=
CONVERT_DIRECT_INDEXING_THREADS_LOG2
;
//
static
constexpr
int
convert_direct_indexing_threads_log2
=
CONVERT_DIRECT_INDEXING_THREADS_LOG2
;
//
...
...
src/geometry_correction.cu
View file @
d8e9a454
...
@@ -112,12 +112,23 @@ __constant__ float ROTS_TEMPLATE[7][3][3][3] = {// ...{cos,sin,const}...
...
@@ -112,12 +112,23 @@ __constant__ float ROTS_TEMPLATE[7][3][3][3] = {// ...{cos,sin,const}...
{{ 0, 0,0},{0, 0,0},{ 0, 0,0}},
{{ 0, 0,0},{0, 0,0},{ 0, 0,0}},
}
}
};
};
// TODO: Make offsets calculate in compile time, to avoid NVRTC(in java): " error: dynamic initialization is not supported for a __constant__ variable"
__constant__ int angles_offsets [4] {15,0,30,30};
/*
__constant__ int angles_offsets [4] {
(int) (offsetof4(corr_vector, azimuth)),
(int) (offsetof4(corr_vector, tilt)),
(int) (offsetof4(corr_vector, roll)),
(int) (offsetof4(corr_vector, roll))};
*/
/*
__constant__ int angles_offsets [4] = {
(int) (offsetof(corr_vector, azimuth)/sizeof(float)),
(int) (offsetof(corr_vector, tilt) /sizeof(float)),
(int) (offsetof(corr_vector, roll) /sizeof(float)),
(int) (offsetof(corr_vector, roll) /sizeof(float))};
__constant__ int angles_offsets [4] = {
*/
offsetof(corr_vector, azimuth)/sizeof(float),
offsetof(corr_vector, tilt) /sizeof(float),
offsetof(corr_vector, roll) /sizeof(float),
offsetof(corr_vector, roll) /sizeof(float)};
__constant__ int mm_seq [3][3][3]={
__constant__ int mm_seq [3][3][3]={
{
{
{6,5,12}, // a_t * a_z -> tmp0
{6,5,12}, // a_t * a_z -> tmp0
...
...
src/geometry_correction.h
View file @
d8e9a454
...
@@ -51,6 +51,11 @@
...
@@ -51,6 +51,11 @@
((size_t)&(((st *)0)->m))
((size_t)&(((st *)0)->m))
//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
#endif
#endif
#ifndef offsetof4
#define offsetof4(st, m) \
(((size_t)&(((st *)0)->m))>>2)
//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
#endif
#define SCENE_UNITS_SCALE 0.001 // meters from mm
#define SCENE_UNITS_SCALE 0.001 // meters from mm
...
...
src/test_tp.cu
View file @
d8e9a454
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment