Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
0c53ff72
Commit
0c53ff72
authored
Apr 16, 2025
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'foliage-2504' into foliage-gpu
parents
6381665c
b4d8c441
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
507 additions
and
306 deletions
+507
-306
pom.xml
pom.xml
+11
-1
GPUTileProcessor.java
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+12
-10
GpuQuad.java
src/main/java/com/elphel/imagej/gpu/GpuQuad.java
+60
-15
TileProcessor.cu
src/main/resources/kernels/TileProcessor.cu
+291
-271
TileProcessor.h
src/main/resources/kernels/TileProcessor.h
+104
-1
geometry_correction.cu
src/main/resources/kernels/geometry_correction.cu
+24
-8
geometry_correction.h
src/main/resources/kernels/geometry_correction.h
+5
-0
No files found.
pom.xml
View file @
0c53ff72
...
@@ -58,7 +58,8 @@
...
@@ -58,7 +58,8 @@
<groupId>
org.jcuda
</groupId>
<groupId>
org.jcuda
</groupId>
<artifactId>
jcuda
</artifactId>
<artifactId>
jcuda
</artifactId>
<!-- <version>10.1.0</version> -->
<!-- <version>10.1.0</version> -->
<version>
11.2.0
</version>
<!--<version>11.2.0</version> -->
<version>
12.6.0
</version>
</dependency>
</dependency>
<!--
<!--
As of 2018/09/11 TF for GPU on Maven supports CUDA 9.0 (vs latest 9.2)
As of 2018/09/11 TF for GPU on Maven supports CUDA 9.0 (vs latest 9.2)
...
@@ -113,6 +114,15 @@
...
@@ -113,6 +114,15 @@
<artifactId>
loci_tools
</artifactId>
<artifactId>
loci_tools
</artifactId>
<version>
6.1.0
</version>
<version>
6.1.0
</version>
</dependency>
</dependency>
<!-- https://mvnrepository.com/artifact/ome/pom-bio-formats -->
<!-- Was source in attic for development -->
<dependency>
<groupId>
ome
</groupId>
<artifactId>
pom-bio-formats
</artifactId>
<version>
6.13.0
</version>
<type>
pom
</type>
</dependency>
<!--
<!--
<dependency>
<dependency>
<groupId>com.drewnoakes</groupId>
<groupId>com.drewnoakes</groupId>
...
...
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
View file @
0c53ff72
...
@@ -6,7 +6,7 @@ package com.elphel.imagej.gpu;
...
@@ -6,7 +6,7 @@ package com.elphel.imagej.gpu;
** GPU acceleration for the Tile Processor
** GPU acceleration for the Tile Processor
**
**
**
**
** Copyright (C) 2018 Elphel, Inc.
** Copyright (C) 2018
-2025
Elphel, Inc.
**
**
** -----------------------------------------------------------------------------**
** -----------------------------------------------------------------------------**
**
**
...
@@ -72,16 +72,18 @@ import jcuda.nvrtc.JNvrtc;
...
@@ -72,16 +72,18 @@ import jcuda.nvrtc.JNvrtc;
import
jcuda.nvrtc.nvrtcProgram
;
import
jcuda.nvrtc.nvrtcProgram
;
public
class
GPUTileProcessor
{
public
class
GPUTileProcessor
{
public
static
boolean
USE_DS_DP
=
false
;
// Use Dynamic Shared memory with Dynamic Parallelism (not implemented)
public
static
boolean
USE_DS_DP
=
true
;
//
false; // Use Dynamic Shared memory with Dynamic Parallelism (not implemented)
String
LIBRARY_PATH
=
"/usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a"
;
// linux
String
LIBRARY_PATH
=
"/usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a"
;
// linux
// Can be downloaded and twice extracted from
// Can be downloaded and twice extracted from
// https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-cudart-dev-11-2_11.2.152-1_amd64.deb
// https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-cudart-dev-11-2_11.2.152-1_amd64.deb
// First deb itself, then data.tar.xz, and it will have usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a inside
// First deb itself, then data.tar.xz, and it will have usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a inside
// Found "cuda-cudart-dev" on https://ubuntu.pkgs.org/
// Found "cuda-cudart-dev" on https://ubuntu.pkgs.org/
static
String
GPU_RESOURCE_DIR
=
"kernels"
;
static
String
GPU_RESOURCE_DIR
=
"kernels"
;
static
String
[]
GPU_KERNEL_FILES
=
{
"dtt8x8.cuh"
,
"TileProcessor.cuh"
};
// static String [] GPU_KERNEL_FILES = {"dtt8x8.cuh","TileProcessor.cuh"}; // was never used and dtt8x8.cuh had incorrect name
// static String [] GPU_KERNEL_FILES = {"dtt8x8.cu","TileProcessor.cu"};
// "*" - generated defines, first index - separately compiled unit
// "*" - generated defines, first index - separately compiled unit
static
String
[][]
GPU_SRC_FILES
=
{{
"*"
,
"dtt8x8.h"
,
"dtt8x8.cu"
,
"geometry_correction.h"
,
"geometry_correction.cu"
,
"TileProcessor.h"
,
"TileProcessor.cuh"
}};
// static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cuh"}};
static
String
[][]
GPU_SRC_FILES
=
{{
"*"
,
"dtt8x8.h"
,
"dtt8x8.cu"
,
"geometry_correction.h"
,
"geometry_correction.cu"
,
"TileProcessor.h"
,
"TileProcessor.cu"
}};
static
String
GPU_CONVERT_DIRECT_NAME
=
"convert_direct"
;
// name in C code
static
String
GPU_CONVERT_DIRECT_NAME
=
"convert_direct"
;
// name in C code
static
String
GPU_IMCLT_ALL_NAME
=
"imclt_rbg_all"
;
static
String
GPU_IMCLT_ALL_NAME
=
"imclt_rbg_all"
;
static
String
GPU_CORRELATE2D_NAME
=
"correlate2D"
;
// name in C code
static
String
GPU_CORRELATE2D_NAME
=
"correlate2D"
;
// name in C code
...
@@ -89,7 +91,7 @@ public class GPUTileProcessor {
...
@@ -89,7 +91,7 @@ public class GPUTileProcessor {
static
String
GPU_CORR2D_COMBINE_NAME
=
"corr2D_combine"
;
// name in C code
static
String
GPU_CORR2D_COMBINE_NAME
=
"corr2D_combine"
;
// name in C code
static
String
GPU_CORR2D_NORMALIZE_NAME
=
"corr2D_normalize"
;
// name in C code
static
String
GPU_CORR2D_NORMALIZE_NAME
=
"corr2D_normalize"
;
// name in C code
static
String
GPU_TEXTURES_NAME
=
"textures_nonoverlap"
;
// name in C code
static
String
GPU_TEXTURES_NAME
=
"textures_nonoverlap"
;
// name in C code
static
String
GPU_RBGA_NAME
=
"generate_RBGA"
;
// name in C code
static
String
GPU_RBGA_NAME
=
"generate_RBGA"
;
// name in C code
//// *** Modified 2025 *** ////
static
String
GPU_ROT_DERIV
=
"calc_rot_deriv"
;
// calculate rotation matrices and derivatives
static
String
GPU_ROT_DERIV
=
"calc_rot_deriv"
;
// calculate rotation matrices and derivatives
static
String
GPU_SET_TILES_OFFSETS
=
"get_tiles_offsets"
;
// calculate pixel offsets and disparity distortions
static
String
GPU_SET_TILES_OFFSETS
=
"get_tiles_offsets"
;
// calculate pixel offsets and disparity distortions
static
String
GPU_CALCULATE_TILES_OFFSETS
=
"calculate_tiles_offsets"
;
// calculate pixel offsets and disparity distortions
static
String
GPU_CALCULATE_TILES_OFFSETS
=
"calculate_tiles_offsets"
;
// calculate pixel offsets and disparity distortions
...
@@ -100,7 +102,7 @@ public class GPUTileProcessor {
...
@@ -100,7 +102,7 @@ public class GPUTileProcessor {
static
String
GPU_MARK_TEXTURE_NEIGHBOR_NAME
=
"mark_texture_neighbor_tiles"
;
static
String
GPU_MARK_TEXTURE_NEIGHBOR_NAME
=
"mark_texture_neighbor_tiles"
;
static
String
GPU_GEN_TEXTURE_LIST_NAME
=
"gen_texture_list"
;
static
String
GPU_GEN_TEXTURE_LIST_NAME
=
"gen_texture_list"
;
static
String
GPU_CLEAR_TEXTURE_RBGA_NAME
=
"clear_texture_rbga"
;
static
String
GPU_CLEAR_TEXTURE_RBGA_NAME
=
"clear_texture_rbga"
;
static
String
GPU_TEXTURES_ACCUMULATE_NAME
=
"textures_accumulate"
;
static
String
GPU_TEXTURES_ACCUMULATE_NAME
=
"textures_accumulate"
;
//// *** Modified 2025 *** ////
static
String
GPU_CREATE_NONOVERLAP_LIST_NAME
=
"create_nonoverlap_list"
;
static
String
GPU_CREATE_NONOVERLAP_LIST_NAME
=
"create_nonoverlap_list"
;
static
String
GPU_ERASE_CLT_TILES_NAME
=
"erase_clt_tiles"
;
static
String
GPU_ERASE_CLT_TILES_NAME
=
"erase_clt_tiles"
;
...
@@ -298,7 +300,7 @@ public class GPUTileProcessor {
...
@@ -298,7 +300,7 @@ public class GPUTileProcessor {
ClassLoader
classLoader
=
getClass
().
getClassLoader
();
ClassLoader
classLoader
=
getClass
().
getClassLoader
();
String
[]
kernelSources
=
new
String
[
GPU_SRC_FILES
.
length
];
String
[]
kernelSources
=
new
String
[
GPU_SRC_FILES
.
length
];
boolean
show_source
=
false
;
// true;
boolean
show_source
=
true
;
//
false; // true;
for
(
int
cunit
=
0
;
cunit
<
kernelSources
.
length
;
cunit
++)
{
for
(
int
cunit
=
0
;
cunit
<
kernelSources
.
length
;
cunit
++)
{
kernelSources
[
cunit
]
=
""
;
// use StringBuffer?
kernelSources
[
cunit
]
=
""
;
// use StringBuffer?
for
(
String
src_file:
GPU_SRC_FILES
[
cunit
])
{
for
(
String
src_file:
GPU_SRC_FILES
[
cunit
])
{
...
@@ -370,7 +372,7 @@ public class GPUTileProcessor {
...
@@ -370,7 +372,7 @@ public class GPUTileProcessor {
GPU_CORR2D_COMBINE_kernel
=
functions
[
4
];
GPU_CORR2D_COMBINE_kernel
=
functions
[
4
];
GPU_CORR2D_NORMALIZE_kernel
=
functions
[
5
];
GPU_CORR2D_NORMALIZE_kernel
=
functions
[
5
];
GPU_TEXTURES_kernel
=
functions
[
6
];
GPU_TEXTURES_kernel
=
functions
[
6
];
GPU_RBGA_kernel
=
functions
[
7
];
GPU_RBGA_kernel
=
functions
[
7
];
//// *** Modified 2025 *** ////
GPU_ROT_DERIV_kernel
=
functions
[
8
];
GPU_ROT_DERIV_kernel
=
functions
[
8
];
GPU_CALCULATE_TILES_OFFSETS_kernel
=
functions
[
9
];
GPU_CALCULATE_TILES_OFFSETS_kernel
=
functions
[
9
];
GPU_CALC_REVERSE_DISTORTION_kernel
=
functions
[
10
];
GPU_CALC_REVERSE_DISTORTION_kernel
=
functions
[
10
];
...
@@ -380,7 +382,7 @@ public class GPUTileProcessor {
...
@@ -380,7 +382,7 @@ public class GPUTileProcessor {
GPU_MARK_TEXTURE_NEIGHBOR_kernel
=
functions
[
13
];
GPU_MARK_TEXTURE_NEIGHBOR_kernel
=
functions
[
13
];
GPU_GEN_TEXTURE_LIST_kernel
=
functions
[
14
];
GPU_GEN_TEXTURE_LIST_kernel
=
functions
[
14
];
GPU_CLEAR_TEXTURE_RBGA_kernel
=
functions
[
15
];
GPU_CLEAR_TEXTURE_RBGA_kernel
=
functions
[
15
];
GPU_TEXTURES_ACCUMULATE_kernel
=
functions
[
16
];
GPU_TEXTURES_ACCUMULATE_kernel
=
functions
[
16
];
//// *** Modified 2025 *** ////
GPU_CREATE_NONOVERLAP_LIST_kernel
=
functions
[
17
];
GPU_CREATE_NONOVERLAP_LIST_kernel
=
functions
[
17
];
GPU_ERASE_CLT_TILES_kernel
=
functions
[
18
];
GPU_ERASE_CLT_TILES_kernel
=
functions
[
18
];
...
@@ -504,7 +506,7 @@ public class GPUTileProcessor {
...
@@ -504,7 +506,7 @@ public class GPUTileProcessor {
// Use the NVRTC to create a program by compiling the source code
// Use the NVRTC to create a program by compiling the source code
nvrtcProgram
program
=
new
nvrtcProgram
();
nvrtcProgram
program
=
new
nvrtcProgram
();
nvrtcCreateProgram
(
program
,
sourceCode
,
null
,
0
,
null
,
null
);
nvrtcCreateProgram
(
program
,
sourceCode
,
null
,
0
,
null
,
null
);
String
options
[]
=
{
"--gpu-architecture=compute_"
+
capability
};
String
options
[]
=
{
"--gpu-architecture=compute_"
+
capability
,
"--extensible-whole-program"
};
try
{
try
{
nvrtcCompileProgram
(
program
,
options
.
length
,
options
);
nvrtcCompileProgram
(
program
,
options
.
length
,
options
);
...
...
src/main/java/com/elphel/imagej/gpu/GpuQuad.java
View file @
0c53ff72
This diff is collapsed.
Click to expand it.
src/main/resources/kernels/TileProcessor.cu
h
→
src/main/resources/kernels/TileProcessor.cu
View file @
0c53ff72
This diff is collapsed.
Click to expand it.
src/main/resources/kernels/TileProcessor.h
View file @
0c53ff72
...
@@ -37,10 +37,34 @@
...
@@ -37,10 +37,34 @@
*/
*/
#pragma once
#pragma once
#ifndef TILE_PROCESSOR_H_
#define TILE_PROCESSOR_H_
#ifndef NUM_CAMS
#ifndef NUM_CAMS
#include "tp_defines.h"
#include "tp_defines.h"
#endif
#endif
#define TASK_TEXTURE_BITS ((1 << TASK_TEXT_N_BIT) | (1 << TASK_TEXT_NE_BIT) | (1 << TASK_TEXT_E_BIT) | (1 << TASK_TEXT_SE_BIT)\
| (1 << TASK_TEXT_S_BIT) | (1 << TASK_TEXT_SW_BIT) | (1 << TASK_TEXT_W_BIT) | (1 << TASK_TEXT_NW_BIT))
#define CONVERT_DIRECT_INDEXING_THREADS_LOG2 5
#define CONVERT_DIRECT_INDEXING_THREADS (1 << CONVERT_DIRECT_INDEXING_THREADS_LOG2) // 32
#define MCLT_UNION_LEN (DTT_SIZE2 * (DTT_SIZE2 + 2))
struct
CltExtra
{
float
data_x
;
// kernel data is relative to this displacement X (0.5 pixel increments)
float
data_y
;
// kernel data is relative to this displacement Y (0.5 pixel increments)
float
center_x
;
// actual center X (use to find derivatives)
float
center_y
;
// actual center X (use to find derivatives)
float
dxc_dx
;
// add this to data_x per each pixel X-shift relative to the kernel center location
float
dxc_dy
;
// same per each Y-shift pixel
float
dyc_dx
;
float
dyc_dy
;
};
extern
"C"
__global__
void
convert_direct
(
// called with a single block, single thread
extern
"C"
__global__
void
convert_direct
(
// called with a single block, single thread
// struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct parameters
// struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct parameters
...
@@ -102,7 +126,7 @@ extern "C" __global__ void correlate2D_inter( // only results in TD
...
@@ -102,7 +126,7 @@ extern "C" __global__ void correlate2D_inter( // only results in TD
int
*
gpu_corr_indices
,
// packed tile+pair
int
*
gpu_corr_indices
,
// packed tile+pair
int
*
pnum_corr_tiles
,
// pointer to a number of correlation tiles to process
int
*
pnum_corr_tiles
,
// pointer to a number of correlation tiles to process
size_t
corr_stride
,
// in floats
size_t
corr_stride
,
// in floats
float
*
gpu_corrs
);
// correlation output data
float
*
gpu_corrs
);
// correlation output data
extern
"C"
__global__
void
corr2D_normalize
(
extern
"C"
__global__
void
corr2D_normalize
(
...
@@ -216,5 +240,84 @@ extern "C" __global__ void generate_RBGA(
...
@@ -216,5 +240,84 @@ extern "C" __global__ void generate_RBGA(
int
dust_remove
,
// Do not reduce average weight when only one image differs much from the average
int
dust_remove
,
// Do not reduce average weight when only one image differs much from the average
int
keep_weights
,
// return channel weights after A in RGBA (was removed)
int
keep_weights
,
// return channel weights after A in RGBA (was removed)
const
size_t
texture_rbga_stride
,
// in floats
const
size_t
texture_rbga_stride
,
// in floats
float
*
gpu_texture_tiles
,
// (number of colors +1 + ?)*16*16 rgba texture tiles
int
*
twh
);
extern
"C"
__global__
void
textures_accumulate
(
// (8,4,1) (N,1,1)
int
num_cams
,
// number of cameras used
int
*
woi
,
// x, y, width,height
float
**
gpu_clt
,
// [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
/// size_t num_texture_tiles, // number of texture tiles to process
int
*
pnum_texture_tiles
,
// pointer to a number of texture tiles to process
int
gpu_texture_indices_offset
,
// add to gpu_texture_indices
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
// TODO: use geometry_correction rXY !
struct
gc
*
gpu_geometry_correction
,
int
colors
,
// number of colors (3/1)
int
is_lwir
,
// do not perform shot correction
float
min_shot
,
// 10.0
float
scale_shot
,
// 3.0
float
diff_sigma
,
// pixel value/pixel change
float
diff_threshold
,
// pixel value/pixel change
float
min_agree
,
// minimal number of channels to agree on a point (real number to work with fuzzy averages)
const
float
weights
[
3
],
// scale for R,B,G
int
dust_remove
,
// Do not reduce average weight when only one image differs much from the average
int
keep_weights
,
// return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
// combining both non-overlap and overlap (each calculated if pointer is not null )
size_t
texture_rbg_stride
,
// in floats
float
*
gpu_texture_rbg
,
// (number of colors +1 + ?)*16*16 rgba texture tiles
size_t
texture_stride
,
// in floats (now 256*4 = 1024)
float
*
gpu_texture_tiles
,
// (number of colors +1 + ?)*16*16 rgba texture tiles
int
linescan_order
,
// if !=0 then output gpu_diff_rgb_combo in linescan order, else - in gpu_texture_indices order
float
*
gpu_diff_rgb_combo
,
//) // diff[num_cams], R[num_cams], B[num_cams],G[num_cams]
int
tilesx
);
extern
"C"
__global__
void
clear_texture_list
(
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
int
width
,
// <= TILES-X, use for faster processing of LWIR images
int
height
);
// <= TILES-Y, use for faster processing of LWIR images
extern
"C"
__global__
void
clear_texture_rbga
(
int
texture_width
,
int
texture_slice_height
,
const
size_t
texture_rbga_stride
,
// in floats 8*stride
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
extern
"C"
__global__
void
create_nonoverlap_list
(
int
num_cams
,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
int
num_tiles
,
// number of tiles in task
int
width
,
// number of tiles in a row
int
*
nonoverlap_list
,
// pointer to the calculated number of non-zero tiles
int
*
pnonoverlap_length
);
// indices to gpu_tasks // should be initialized to zero
extern
"C"
__global__
void
mark_texture_tiles
(
int
num_cams
,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
int
num_tiles
,
// number of tiles in task list
int
width
,
// number of tiles in a row
int
*
gpu_texture_indices
);
// packed tile + bits (now only (1 << 7)
extern
"C"
__global__
void
mark_texture_neighbor_tiles
(
// TODO: remove __global__?
int
num_cams
,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
int
num_tiles
,
// number of tiles in task list
int
width
,
// number of tiles in a row
int
height
,
// number of tiles rows
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
int
*
woi
);
// x,y,width,height of the woi
extern
"C"
__global__
void
gen_texture_list
(
int
num_cams
,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
int
num_tiles
,
// number of tiles in task list
int
width
,
// number of tiles in a row
int
height
,
// number of tiles rows
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
int
*
num_texture_tiles
,
// number of texture tiles to process
int
*
woi
);
// min_x, min_y, max_x, max_y input
#endif
src/main/resources/kernels/geometry_correction.cu
View file @
0c53ff72
...
@@ -40,6 +40,14 @@
...
@@ -40,6 +40,14 @@
#include "tp_defines.h"
#include "tp_defines.h"
#include "dtt8x8.h"
#include "dtt8x8.h"
#include "geometry_correction.h"
#include "geometry_correction.h"
// #include "TileProcessor.h"
#include <cuda_runtime.h>
// #include <helper_cuda.h>
// #include <helper_functions.h>
#endif // #ifndef JCUDA
#endif // #ifndef JCUDA
#ifndef get_task_size
#ifndef get_task_size
...
@@ -104,12 +112,23 @@ __constant__ float ROTS_TEMPLATE[7][3][3][3] = {// ...{cos,sin,const}...
...
@@ -104,12 +112,23 @@ __constant__ float ROTS_TEMPLATE[7][3][3][3] = {// ...{cos,sin,const}...
{{
0
,
0
,
0
},{
0
,
0
,
0
},{
0
,
0
,
0
}},
{{
0
,
0
,
0
},{
0
,
0
,
0
},{
0
,
0
,
0
}},
}
}
};
};
// TODO: Make offsets calculate in compile time, to avoid NVRTC(in java): " error: dynamic initialization is not supported for a __constant__ variable"
__constant__
int
angles_offsets
[
4
]
{
15
,
0
,
30
,
30
};
/*
__constant__ int angles_offsets [4] {
(int) (offsetof4(corr_vector, azimuth)),
(int) (offsetof4(corr_vector, tilt)),
(int) (offsetof4(corr_vector, roll)),
(int) (offsetof4(corr_vector, roll))};
*/
/*
__constant__ int angles_offsets [4] = {
(int) (offsetof(corr_vector, azimuth)/sizeof(float)),
(int) (offsetof(corr_vector, tilt) /sizeof(float)),
(int) (offsetof(corr_vector, roll) /sizeof(float)),
(int) (offsetof(corr_vector, roll) /sizeof(float))};
__constant__
int
angles_offsets
[
4
]
=
{
*/
offsetof
(
corr_vector
,
azimuth
)
/
sizeof
(
float
),
offsetof
(
corr_vector
,
tilt
)
/
sizeof
(
float
),
offsetof
(
corr_vector
,
roll
)
/
sizeof
(
float
),
offsetof
(
corr_vector
,
roll
)
/
sizeof
(
float
)};
__constant__
int
mm_seq
[
3
][
3
][
3
]
=
{
__constant__
int
mm_seq
[
3
][
3
][
3
]
=
{
{
{
{
6
,
5
,
12
},
// a_t * a_z -> tmp0
{
6
,
5
,
12
},
// a_t * a_z -> tmp0
...
@@ -337,9 +356,6 @@ extern "C" __global__ void calculate_tiles_offsets(
...
@@ -337,9 +356,6 @@ extern "C" __global__ void calculate_tiles_offsets(
gpu_rot_deriv
);
// union trot_deriv * gpu_rot_deriv);
gpu_rot_deriv
);
// union trot_deriv * gpu_rot_deriv);
}
}
// __syncthreads();// __syncwarp();
// cudaDeviceSynchronize();
// cudaDeviceSynchronize();
}
}
...
...
src/main/resources/kernels/geometry_correction.h
View file @
0c53ff72
...
@@ -51,6 +51,11 @@
...
@@ -51,6 +51,11 @@
((size_t)&(((st *)0)->m))
((size_t)&(((st *)0)->m))
//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
#endif
#endif
#ifndef offsetof4
#define offsetof4(st, m) \
(((size_t)&(((st *)0)->m))>>2)
//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
#endif
#define SCENE_UNITS_SCALE 0.001 // meters from mm
#define SCENE_UNITS_SCALE 0.001 // meters from mm
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment