Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
b4d8c441
Commit
b4d8c441
authored
Apr 15, 2025
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
updated to jcuda 12.6, provided kernels
parent
2516bcc1
Changes
4
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
424 additions
and
280 deletions
+424
-280
TileProcessor.cu
src/main/resources/kernels/TileProcessor.cu
+291
-271
TileProcessor.h
src/main/resources/kernels/TileProcessor.h
+104
-1
geometry_correction.cu
src/main/resources/kernels/geometry_correction.cu
+24
-8
geometry_correction.h
src/main/resources/kernels/geometry_correction.h
+5
-0
No files found.
src/main/resources/kernels/TileProcessor.cu
h
→
src/main/resources/kernels/TileProcessor.cu
View file @
b4d8c441
This diff is collapsed.
Click to expand it.
src/main/resources/kernels/TileProcessor.h
View file @
b4d8c441
...
@@ -37,10 +37,34 @@
...
@@ -37,10 +37,34 @@
*/
*/
#pragma once
#pragma once
#ifndef TILE_PROCESSOR_H_
#define TILE_PROCESSOR_H_
#ifndef NUM_CAMS
#ifndef NUM_CAMS
#include "tp_defines.h"
#include "tp_defines.h"
#endif
#endif
#define TASK_TEXTURE_BITS ((1 << TASK_TEXT_N_BIT) | (1 << TASK_TEXT_NE_BIT) | (1 << TASK_TEXT_E_BIT) | (1 << TASK_TEXT_SE_BIT)\
| (1 << TASK_TEXT_S_BIT) | (1 << TASK_TEXT_SW_BIT) | (1 << TASK_TEXT_W_BIT) | (1 << TASK_TEXT_NW_BIT))
#define CONVERT_DIRECT_INDEXING_THREADS_LOG2 5
#define CONVERT_DIRECT_INDEXING_THREADS (1 << CONVERT_DIRECT_INDEXING_THREADS_LOG2) // 32
#define MCLT_UNION_LEN (DTT_SIZE2 * (DTT_SIZE2 + 2))
struct
CltExtra
{
float
data_x
;
// kernel data is relative to this displacement X (0.5 pixel increments)
float
data_y
;
// kernel data is relative to this displacement Y (0.5 pixel increments)
float
center_x
;
// actual center X (use to find derivatives)
float
center_y
;
// actual center X (use to find derivatives)
float
dxc_dx
;
// add this to data_x per each pixel X-shift relative to the kernel center location
float
dxc_dy
;
// same per each Y-shift pixel
float
dyc_dx
;
float
dyc_dy
;
};
extern
"C"
__global__
void
convert_direct
(
// called with a single block, single thread
extern
"C"
__global__
void
convert_direct
(
// called with a single block, single thread
// struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct parameters
// struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct parameters
...
@@ -216,5 +240,84 @@ extern "C" __global__ void generate_RBGA(
...
@@ -216,5 +240,84 @@ extern "C" __global__ void generate_RBGA(
int
dust_remove
,
// Do not reduce average weight when only one image differs much from the average
int
dust_remove
,
// Do not reduce average weight when only one image differs much from the average
int
keep_weights
,
// return channel weights after A in RGBA (was removed)
int
keep_weights
,
// return channel weights after A in RGBA (was removed)
const
size_t
texture_rbga_stride
,
// in floats
const
size_t
texture_rbga_stride
,
// in floats
float
*
gpu_texture_tiles
,
// (number of colors +1 + ?)*16*16 rgba texture tiles
int
*
twh
);
extern
"C"
__global__
void
textures_accumulate
(
// (8,4,1) (N,1,1)
int
num_cams
,
// number of cameras used
int
*
woi
,
// x, y, width,height
float
**
gpu_clt
,
// [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
/// size_t num_texture_tiles, // number of texture tiles to process
int
*
pnum_texture_tiles
,
// pointer to a number of texture tiles to process
int
gpu_texture_indices_offset
,
// add to gpu_texture_indices
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
// TODO: use geometry_correction rXY !
struct
gc
*
gpu_geometry_correction
,
int
colors
,
// number of colors (3/1)
int
is_lwir
,
// do not perform shot correction
float
min_shot
,
// 10.0
float
scale_shot
,
// 3.0
float
diff_sigma
,
// pixel value/pixel change
float
diff_threshold
,
// pixel value/pixel change
float
min_agree
,
// minimal number of channels to agree on a point (real number to work with fuzzy averages)
const
float
weights
[
3
],
// scale for R,B,G
int
dust_remove
,
// Do not reduce average weight when only one image differs much from the average
int
keep_weights
,
// return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
// combining both non-overlap and overlap (each calculated if pointer is not null )
size_t
texture_rbg_stride
,
// in floats
float
*
gpu_texture_rbg
,
// (number of colors +1 + ?)*16*16 rgba texture tiles
size_t
texture_stride
,
// in floats (now 256*4 = 1024)
float
*
gpu_texture_tiles
,
// (number of colors +1 + ?)*16*16 rgba texture tiles
int
linescan_order
,
// if !=0 then output gpu_diff_rgb_combo in linescan order, else - in gpu_texture_indices order
float
*
gpu_diff_rgb_combo
,
//) // diff[num_cams], R[num_cams], B[num_cams],G[num_cams]
int
tilesx
);
extern
"C"
__global__
void
clear_texture_list
(
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
int
width
,
// <= TILES-X, use for faster processing of LWIR images
int
height
);
// <= TILES-Y, use for faster processing of LWIR images
extern
"C"
__global__
void
clear_texture_rbga
(
int
texture_width
,
int
texture_slice_height
,
const
size_t
texture_rbga_stride
,
// in floats 8*stride
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
extern
"C"
__global__
void
create_nonoverlap_list
(
int
num_cams
,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
int
num_tiles
,
// number of tiles in task
int
width
,
// number of tiles in a row
int
*
nonoverlap_list
,
// pointer to the calculated number of non-zero tiles
int
*
pnonoverlap_length
);
// indices to gpu_tasks // should be initialized to zero
extern
"C"
__global__
void
mark_texture_tiles
(
int
num_cams
,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
int
num_tiles
,
// number of tiles in task list
int
width
,
// number of tiles in a row
int
*
gpu_texture_indices
);
// packed tile + bits (now only (1 << 7)
extern
"C"
__global__
void
mark_texture_neighbor_tiles
(
// TODO: remove __global__?
int
num_cams
,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
int
num_tiles
,
// number of tiles in task list
int
width
,
// number of tiles in a row
int
height
,
// number of tiles rows
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
int
*
woi
);
// x,y,width,height of the woi
extern
"C"
__global__
void
gen_texture_list
(
int
num_cams
,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
int
num_tiles
,
// number of tiles in task list
int
width
,
// number of tiles in a row
int
height
,
// number of tiles rows
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
int
*
num_texture_tiles
,
// number of texture tiles to process
int
*
woi
);
// min_x, min_y, max_x, max_y input
#endif
src/main/resources/kernels/geometry_correction.cu
View file @
b4d8c441
...
@@ -40,6 +40,14 @@
...
@@ -40,6 +40,14 @@
#include "tp_defines.h"
#include "tp_defines.h"
#include "dtt8x8.h"
#include "dtt8x8.h"
#include "geometry_correction.h"
#include "geometry_correction.h"
// #include "TileProcessor.h"
#include <cuda_runtime.h>
// #include <helper_cuda.h>
// #include <helper_functions.h>
#endif // #ifndef JCUDA
#endif // #ifndef JCUDA
#ifndef get_task_size
#ifndef get_task_size
...
@@ -104,12 +112,23 @@ __constant__ float ROTS_TEMPLATE[7][3][3][3] = {// ...{cos,sin,const}...
...
@@ -104,12 +112,23 @@ __constant__ float ROTS_TEMPLATE[7][3][3][3] = {// ...{cos,sin,const}...
{{
0
,
0
,
0
},{
0
,
0
,
0
},{
0
,
0
,
0
}},
{{
0
,
0
,
0
},{
0
,
0
,
0
},{
0
,
0
,
0
}},
}
}
};
};
// TODO: Make offsets calculate in compile time, to avoid NVRTC(in java): " error: dynamic initialization is not supported for a __constant__ variable"
__constant__
int
angles_offsets
[
4
]
{
15
,
0
,
30
,
30
};
/*
__constant__ int angles_offsets [4] {
(int) (offsetof4(corr_vector, azimuth)),
(int) (offsetof4(corr_vector, tilt)),
(int) (offsetof4(corr_vector, roll)),
(int) (offsetof4(corr_vector, roll))};
*/
/*
__constant__ int angles_offsets [4] = {
(int) (offsetof(corr_vector, azimuth)/sizeof(float)),
(int) (offsetof(corr_vector, tilt) /sizeof(float)),
(int) (offsetof(corr_vector, roll) /sizeof(float)),
(int) (offsetof(corr_vector, roll) /sizeof(float))};
__constant__
int
angles_offsets
[
4
]
=
{
*/
offsetof
(
corr_vector
,
azimuth
)
/
sizeof
(
float
),
offsetof
(
corr_vector
,
tilt
)
/
sizeof
(
float
),
offsetof
(
corr_vector
,
roll
)
/
sizeof
(
float
),
offsetof
(
corr_vector
,
roll
)
/
sizeof
(
float
)};
__constant__
int
mm_seq
[
3
][
3
][
3
]
=
{
__constant__
int
mm_seq
[
3
][
3
][
3
]
=
{
{
{
{
6
,
5
,
12
},
// a_t * a_z -> tmp0
{
6
,
5
,
12
},
// a_t * a_z -> tmp0
...
@@ -337,9 +356,6 @@ extern "C" __global__ void calculate_tiles_offsets(
...
@@ -337,9 +356,6 @@ extern "C" __global__ void calculate_tiles_offsets(
gpu_rot_deriv
);
// union trot_deriv * gpu_rot_deriv);
gpu_rot_deriv
);
// union trot_deriv * gpu_rot_deriv);
}
}
// __syncthreads();// __syncwarp();
// cudaDeviceSynchronize();
// cudaDeviceSynchronize();
}
}
...
...
src/main/resources/kernels/geometry_correction.h
View file @
b4d8c441
...
@@ -51,6 +51,11 @@
...
@@ -51,6 +51,11 @@
((size_t)&(((st *)0)->m))
((size_t)&(((st *)0)->m))
//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
#endif
#endif
#ifndef offsetof4
#define offsetof4(st, m) \
(((size_t)&(((st *)0)->m))>>2)
//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
#endif
#define SCENE_UNITS_SCALE 0.001 // meters from mm
#define SCENE_UNITS_SCALE 0.001 // meters from mm
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment