Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
0bb31239
Commit
0bb31239
authored
Apr 14, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
4 images with CDP
parent
095bd8c2
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
154 additions
and
315 deletions
+154
-315
GPUTileProcessor.java
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+55
-6
TwoQuadCLT.java
...main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
+2
-1
TileProcessor.cuh
src/main/resources/kernels/TileProcessor.cuh
+76
-302
TileProcessor.h
src/main/resources/kernels/TileProcessor.h
+18
-3
geometry_correction.h
src/main/resources/kernels/geometry_correction.h
+3
-3
No files found.
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
View file @
0bb31239
...
@@ -107,6 +107,7 @@ public class GPUTileProcessor {
...
@@ -107,6 +107,7 @@ public class GPUTileProcessor {
static
String
GPU_RBGA_NAME
=
"generate_RBGA"
;
// name in C code
static
String
GPU_RBGA_NAME
=
"generate_RBGA"
;
// name in C code
static
String
GPU_ROT_DERIV
=
"calc_rot_deriv"
;
// calculate rotation matrices and derivatives
static
String
GPU_ROT_DERIV
=
"calc_rot_deriv"
;
// calculate rotation matrices and derivatives
static
String
SET_TILES_OFFSETS
=
"get_tiles_offsets"
;
// calculate pixel offsets and disparity distortions
static
String
SET_TILES_OFFSETS
=
"get_tiles_offsets"
;
// calculate pixel offsets and disparity distortions
static
String
GPU_IMCLT_ALL_NAME
=
"imclt_rbg_all"
;
// pass some defines to gpu source code with #ifdef JCUDA
// pass some defines to gpu source code with #ifdef JCUDA
...
@@ -168,6 +169,8 @@ public class GPUTileProcessor {
...
@@ -168,6 +169,8 @@ public class GPUTileProcessor {
private
CUfunction
GPU_RBGA_kernel
=
null
;
private
CUfunction
GPU_RBGA_kernel
=
null
;
private
CUfunction
GPU_ROT_DERIV_kernel
=
null
;
private
CUfunction
GPU_ROT_DERIV_kernel
=
null
;
private
CUfunction
SET_TILES_OFFSETS_kernel
=
null
;
private
CUfunction
SET_TILES_OFFSETS_kernel
=
null
;
private
CUfunction
GPU_IMCLT_ALL_kernel
=
null
;
// CPU arrays of pointers to GPU memory
// CPU arrays of pointers to GPU memory
// These arrays may go to methods, they are here just to be able to free GPU memory if needed
// These arrays may go to methods, they are here just to be able to free GPU memory if needed
...
@@ -186,6 +189,7 @@ public class GPUTileProcessor {
...
@@ -186,6 +189,7 @@ public class GPUTileProcessor {
private
CUdeviceptr
gpu_corrs
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.POINTER
private
CUdeviceptr
gpu_corrs
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.POINTER
private
CUdeviceptr
gpu_textures
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * ? * 256 * Sizeof.POINTER
private
CUdeviceptr
gpu_textures
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * ? * 256 * Sizeof.POINTER
private
CUdeviceptr
gpu_clt
=
new
CUdeviceptr
();
private
CUdeviceptr
gpu_clt
=
new
CUdeviceptr
();
private
CUdeviceptr
gpu_4_images
=
new
CUdeviceptr
();
private
CUdeviceptr
gpu_corr_indices
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * 6 * Sizeof.POINTER
private
CUdeviceptr
gpu_corr_indices
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * 6 * Sizeof.POINTER
private
CUdeviceptr
gpu_texture_indices
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * 6 * Sizeof.POINTER
private
CUdeviceptr
gpu_texture_indices
=
new
CUdeviceptr
();
// allocate tilesX * tilesY * 6 * Sizeof.POINTER
private
CUdeviceptr
gpu_port_offsets
=
new
CUdeviceptr
();
// allocate Quad * 2 * Sizeof.POINTER
private
CUdeviceptr
gpu_port_offsets
=
new
CUdeviceptr
();
// allocate Quad * 2 * Sizeof.POINTER
...
@@ -466,7 +470,8 @@ public class GPUTileProcessor {
...
@@ -466,7 +470,8 @@ public class GPUTileProcessor {
GPU_TEXTURES_NAME
,
GPU_TEXTURES_NAME
,
GPU_RBGA_NAME
,
GPU_RBGA_NAME
,
GPU_ROT_DERIV
,
GPU_ROT_DERIV
,
SET_TILES_OFFSETS
SET_TILES_OFFSETS
,
GPU_IMCLT_ALL_NAME
};
};
CUfunction
[]
functions
=
createFunctions
(
kernelSources
,
CUfunction
[]
functions
=
createFunctions
(
kernelSources
,
func_names
,
func_names
,
...
@@ -479,7 +484,7 @@ public class GPUTileProcessor {
...
@@ -479,7 +484,7 @@ public class GPUTileProcessor {
GPU_RBGA_kernel
=
functions
[
4
];
GPU_RBGA_kernel
=
functions
[
4
];
GPU_ROT_DERIV_kernel
=
functions
[
5
];
GPU_ROT_DERIV_kernel
=
functions
[
5
];
SET_TILES_OFFSETS_kernel
=
functions
[
6
];
SET_TILES_OFFSETS_kernel
=
functions
[
6
];
GPU_IMCLT_ALL_kernel
=
functions
[
7
];
System
.
out
.
println
(
"GPU kernel functions initialized"
);
System
.
out
.
println
(
"GPU kernel functions initialized"
);
System
.
out
.
println
(
GPU_CONVERT_CORRECT_TILES_kernel
.
toString
());
System
.
out
.
println
(
GPU_CONVERT_CORRECT_TILES_kernel
.
toString
());
...
@@ -531,10 +536,13 @@ public class GPUTileProcessor {
...
@@ -531,10 +536,13 @@ public class GPUTileProcessor {
cuMemAlloc
(
gpu_kernel_offsets
,
NUM_CAMS
*
Sizeof
.
POINTER
);
cuMemAlloc
(
gpu_kernel_offsets
,
NUM_CAMS
*
Sizeof
.
POINTER
);
cuMemAlloc
(
gpu_bayer
,
NUM_CAMS
*
Sizeof
.
POINTER
);
cuMemAlloc
(
gpu_bayer
,
NUM_CAMS
*
Sizeof
.
POINTER
);
cuMemAlloc
(
gpu_clt
,
NUM_CAMS
*
Sizeof
.
POINTER
);
cuMemAlloc
(
gpu_clt
,
NUM_CAMS
*
Sizeof
.
POINTER
);
cuMemAlloc
(
gpu_4_images
,
NUM_CAMS
*
Sizeof
.
POINTER
);
long
[]
gpu_kernels_l
=
new
long
[
NUM_CAMS
];
long
[]
gpu_kernels_l
=
new
long
[
NUM_CAMS
];
long
[]
gpu_kernel_offsets_l
=
new
long
[
NUM_CAMS
];
long
[]
gpu_kernel_offsets_l
=
new
long
[
NUM_CAMS
];
long
[]
gpu_bayer_l
=
new
long
[
NUM_CAMS
];
long
[]
gpu_bayer_l
=
new
long
[
NUM_CAMS
];
long
[]
gpu_clt_l
=
new
long
[
NUM_CAMS
];
long
[]
gpu_clt_l
=
new
long
[
NUM_CAMS
];
long
[]
gpu_4_images_l
=
new
long
[
NUM_CAMS
];
for
(
int
ncam
=
0
;
ncam
<
NUM_CAMS
;
ncam
++)
gpu_kernels_l
[
ncam
]
=
getPointerAddress
(
gpu_kernels_h
[
ncam
]);
for
(
int
ncam
=
0
;
ncam
<
NUM_CAMS
;
ncam
++)
gpu_kernels_l
[
ncam
]
=
getPointerAddress
(
gpu_kernels_h
[
ncam
]);
cuMemcpyHtoD
(
gpu_kernels
,
Pointer
.
to
(
gpu_kernels_l
),
NUM_CAMS
*
Sizeof
.
POINTER
);
cuMemcpyHtoD
(
gpu_kernels
,
Pointer
.
to
(
gpu_kernels_l
),
NUM_CAMS
*
Sizeof
.
POINTER
);
...
@@ -548,6 +556,9 @@ public class GPUTileProcessor {
...
@@ -548,6 +556,9 @@ public class GPUTileProcessor {
for
(
int
ncam
=
0
;
ncam
<
NUM_CAMS
;
ncam
++)
gpu_clt_l
[
ncam
]
=
getPointerAddress
(
gpu_clt_h
[
ncam
]);
for
(
int
ncam
=
0
;
ncam
<
NUM_CAMS
;
ncam
++)
gpu_clt_l
[
ncam
]
=
getPointerAddress
(
gpu_clt_h
[
ncam
]);
cuMemcpyHtoD
(
gpu_clt
,
Pointer
.
to
(
gpu_clt_l
),
NUM_CAMS
*
Sizeof
.
POINTER
);
cuMemcpyHtoD
(
gpu_clt
,
Pointer
.
to
(
gpu_clt_l
),
NUM_CAMS
*
Sizeof
.
POINTER
);
for
(
int
ncam
=
0
;
ncam
<
NUM_CAMS
;
ncam
++)
gpu_4_images_l
[
ncam
]
=
getPointerAddress
(
gpu_corr_images_h
[
ncam
]);
cuMemcpyHtoD
(
gpu_4_images
,
Pointer
.
to
(
gpu_4_images_l
),
NUM_CAMS
*
Sizeof
.
POINTER
);
// Set GeometryCorrection data
// Set GeometryCorrection data
cuMemAlloc
(
gpu_geometry_correction
,
GeometryCorrection
.
arrayLength
(
NUM_CAMS
)
*
Sizeof
.
FLOAT
);
cuMemAlloc
(
gpu_geometry_correction
,
GeometryCorrection
.
arrayLength
(
NUM_CAMS
)
*
Sizeof
.
FLOAT
);
cuMemAlloc
(
gpu_rByRDist
,
RBYRDIST_LEN
*
Sizeof
.
FLOAT
);
cuMemAlloc
(
gpu_rByRDist
,
RBYRDIST_LEN
*
Sizeof
.
FLOAT
);
...
@@ -1093,9 +1104,12 @@ public class GPUTileProcessor {
...
@@ -1093,9 +1104,12 @@ public class GPUTileProcessor {
Pointer
.
to
(
gpu_clt
),
Pointer
.
to
(
gpu_clt
),
Pointer
.
to
(
new
int
[]
{
mclt_stride
}),
Pointer
.
to
(
new
int
[]
{
mclt_stride
}),
Pointer
.
to
(
new
int
[]
{
num_task_tiles
}),
Pointer
.
to
(
new
int
[]
{
num_task_tiles
}),
// move lpf to 4-image generator kernel
// move lpf to 4-image generator kernel - DONE
// Pointer.to(new int[] { 7 }) // lpf_mask ??? (C-code has it 0)
Pointer
.
to
(
new
int
[]
{
0
}),
// lpf_mask
Pointer
.
to
(
new
int
[]
{
0
})
// lpf_mask ??? (C-code has it 0)
Pointer
.
to
(
new
int
[]
{
IMG_WIDTH
}),
// int woi_width,
Pointer
.
to
(
new
int
[]
{
IMG_HEIGHT
}),
// int woi_height,
Pointer
.
to
(
new
int
[]
{
KERNELS_HOR
}),
// int kernels_hor,
Pointer
.
to
(
new
int
[]
{
KERNELS_VERT
})
// int kernels_vert);
);
);
cuCtxSynchronize
();
cuCtxSynchronize
();
...
@@ -1132,10 +1146,12 @@ public class GPUTileProcessor {
...
@@ -1132,10 +1146,12 @@ public class GPUTileProcessor {
Pointer
.
to
(
gpu_clt_h
[
ncam
]),
Pointer
.
to
(
gpu_clt_h
[
ncam
]),
Pointer
.
to
(
gpu_corr_images_h
[
ncam
]),
Pointer
.
to
(
gpu_corr_images_h
[
ncam
]),
Pointer
.
to
(
new
int
[]
{
apply_lpf
}),
Pointer
.
to
(
new
int
[]
{
apply_lpf
}),
Pointer
.
to
(
new
int
[]
{
is_mono
?
1
:
0
}),
Pointer
.
to
(
new
int
[]
{
is_mono
?
1
:
NUM_COLORS
}),
// now - NUM_COLORS
Pointer
.
to
(
new
int
[]
{
color
}),
Pointer
.
to
(
new
int
[]
{
color
}),
Pointer
.
to
(
new
int
[]
{
v_offs
}),
Pointer
.
to
(
new
int
[]
{
v_offs
}),
Pointer
.
to
(
new
int
[]
{
h_offs
}),
Pointer
.
to
(
new
int
[]
{
h_offs
}),
Pointer
.
to
(
new
int
[]
{
tilesX
}),
Pointer
.
to
(
new
int
[]
{
tilesY
}),
Pointer
.
to
(
new
int
[]
{
imclt_stride
})
// lpf_mask
Pointer
.
to
(
new
int
[]
{
imclt_stride
})
// lpf_mask
);
);
cuCtxSynchronize
();
cuCtxSynchronize
();
...
@@ -1152,6 +1168,39 @@ public class GPUTileProcessor {
...
@@ -1152,6 +1168,39 @@ public class GPUTileProcessor {
cuCtxSynchronize
();
cuCtxSynchronize
();
}
}
public
void
execImcltRbgAll
(
boolean
is_mono
)
{
if
(
GPU_IMCLT_ALL_kernel
==
null
)
{
IJ
.
showMessage
(
"Error"
,
"No GPU kernel: GPU_IMCLT_ALL_kernel"
);
return
;
}
int
apply_lpf
=
1
;
int
tilesX
=
IMG_WIDTH
/
DTT_SIZE
;
int
tilesY
=
IMG_HEIGHT
/
DTT_SIZE
;
int
[]
ThreadsFullWarps
=
{
1
,
1
,
1
};
int
[]
GridFullWarps
=
{
1
,
1
,
1
};
Pointer
kernelParameters
=
Pointer
.
to
(
Pointer
.
to
(
gpu_clt
),
// float ** gpu_clt, // [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
Pointer
.
to
(
gpu_4_images
),
// float ** gpu_corr_images, // [NUM_CAMS][WIDTH, 3 * HEIGHT]
Pointer
.
to
(
new
int
[]
{
apply_lpf
}),
// int apply_lpf,
Pointer
.
to
(
new
int
[]
{
is_mono
?
1
:
NUM_COLORS
}),
// int colors,
Pointer
.
to
(
new
int
[]
{
tilesX
}),
// int woi_twidth,
Pointer
.
to
(
new
int
[]
{
tilesY
}),
// int woi_theight,
Pointer
.
to
(
new
int
[]
{
imclt_stride
})
// const size_t dstride); // in floats (pixels)
);
cuCtxSynchronize
();
// Call the kernel function
cuLaunchKernel
(
GPU_IMCLT_ALL_kernel
,
GridFullWarps
[
0
],
GridFullWarps
[
1
],
GridFullWarps
[
2
],
// Grid dimension
ThreadsFullWarps
[
0
],
ThreadsFullWarps
[
1
],
ThreadsFullWarps
[
2
],
// Block dimension
0
,
null
,
// Shared memory size and stream (shared - only dynamic, static is in code)
kernelParameters
,
null
);
// Kernel- and extra parameters
cuCtxSynchronize
();
}
public
void
execCorr2D
(
public
void
execCorr2D
(
double
[]
scales
,
double
[]
scales
,
double
fat_zero
,
double
fat_zero
,
...
...
src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
View file @
0bb31239
...
@@ -2119,7 +2119,8 @@ public class TwoQuadCLT {
...
@@ -2119,7 +2119,8 @@ public class TwoQuadCLT {
// run imclt;
// run imclt;
long
startIMCLT
=
System
.
nanoTime
();
long
startIMCLT
=
System
.
nanoTime
();
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
gPUTileProcessor
.
execImcltRbg
(
quadCLT_main
.
isMonochrome
());
// gPUTileProcessor.execImcltRbg(quadCLT_main.isMonochrome());
gPUTileProcessor
.
execImcltRbgAll
(
quadCLT_main
.
isMonochrome
());
}
}
long
endImcltTime
=
System
.
nanoTime
();
long
endImcltTime
=
System
.
nanoTime
();
// run correlation
// run correlation
...
...
src/main/resources/kernels/TileProcessor.cuh
View file @
0bb31239
This diff is collapsed.
Click to expand it.
src/main/resources/kernels/TileProcessor.h
View file @
0bb31239
...
@@ -51,7 +51,12 @@ __global__ void convert_correct_tiles(
...
@@ -51,7 +51,12 @@ __global__ void convert_correct_tiles(
float
**
gpu_clt
,
// [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt
,
// [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t
dstride
,
// in floats (pixels)
size_t
dstride
,
// in floats (pixels)
int
num_tiles
,
// number of tiles in task
int
num_tiles
,
// number of tiles in task
int
lpf_mask
);
// apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
int
lpf_mask
,
// apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
int
woi_width
,
int
woi_height
,
int
kernels_hor
,
int
kernels_vert
);
extern
"C"
__global__
void
clear_texture_list
(
extern
"C"
__global__
void
clear_texture_list
(
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
...
@@ -104,6 +109,16 @@ extern "C" __global__ void textures_accumulate(
...
@@ -104,6 +109,16 @@ extern "C" __global__ void textures_accumulate(
size_t
texture_stride
,
// in floats (now 256*4 = 1024)
size_t
texture_stride
,
// in floats (now 256*4 = 1024)
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
extern
"C"
__global__
void
imclt_rbg_all
(
float
**
gpu_clt
,
// [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_corr_images
,
// [NUM_CAMS][WIDTH, 3 * HEIGHT]
int
apply_lpf
,
int
colors
,
int
woi_twidth
,
int
woi_theight
,
const
size_t
dstride
);
// in floats (pixels)
extern
"C"
__global__
void
imclt_rbg
(
extern
"C"
__global__
void
imclt_rbg
(
float
*
gpu_clt
,
// [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
*
gpu_clt
,
// [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
*
gpu_rbg
,
// WIDTH, 3 * HEIGHT
float
*
gpu_rbg
,
// WIDTH, 3 * HEIGHT
...
@@ -112,6 +127,8 @@ extern "C" __global__ void imclt_rbg(
...
@@ -112,6 +127,8 @@ extern "C" __global__ void imclt_rbg(
int
color
,
// defines location of clt data
int
color
,
// defines location of clt data
int
v_offset
,
int
v_offset
,
int
h_offset
,
int
h_offset
,
int
woi_twidth
,
int
woi_theight
,
const
size_t
dstride
);
// in floats (pixels)
const
size_t
dstride
);
// in floats (pixels)
extern
"C"
extern
"C"
...
@@ -144,5 +161,3 @@ __global__ void generate_RBGA(
...
@@ -144,5 +161,3 @@ __global__ void generate_RBGA(
const
size_t
texture_rbga_stride
,
// in floats
const
size_t
texture_rbga_stride
,
// in floats
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
src/main/resources/kernels/geometry_correction.h
View file @
0bb31239
...
@@ -114,9 +114,9 @@ struct gc {
...
@@ -114,9 +114,9 @@ struct gc {
float
distortionA7
;
//r^7 (normalized to focal length or to sensor half width?)
float
distortionA7
;
//r^7 (normalized to focal length or to sensor half width?)
float
distortionA8
;
//r^8 (normalized to focal length or to sensor half width?)
float
distortionA8
;
//r^8 (normalized to focal length or to sensor half width?)
#ifndef NVRTC_BUG
#ifndef NVRTC_BUG
//
};
};
//
float rad_coeff [7];
float
rad_coeff
[
7
];
//
};
};
#endif
#endif
// parameters, common for all sensors
// parameters, common for all sensors
float
elevation
;
// degrees, up - positive;
float
elevation
;
// degrees, up - positive;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment