Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
3d5ddc28
Commit
3d5ddc28
authored
Apr 07, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
prepared for separate compilation, for now merged
parent
39e75987
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
2088 additions
and
1034 deletions
+2088
-1034
GPUTileProcessor.java
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+154
-66
TileProcessor.cuh
src/main/resources/kernels/TileProcessor.cuh
+166
-943
dtt8x8.cu
src/main/resources/kernels/dtt8x8.cu
+466
-25
dtt8x8.h
src/main/resources/kernels/dtt8x8.h
+111
-0
test_tp.cu
src/main/resources/kernels/test_tp.cu
+1097
-0
tp_defines.h
src/main/resources/kernels/tp_defines.h
+94
-0
No files found.
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
View file @
3d5ddc28
This diff is collapsed.
Click to expand it.
src/main/resources/kernels/TileProcessor.cuh
View file @
3d5ddc28
This diff is collapsed.
Click to expand it.
src/main/resources/kernels/dtt8x8.cu
h
→
src/main/resources/kernels/dtt8x8.cu
View file @
3d5ddc28
This diff is collapsed.
Click to expand it.
src/main/resources/kernels/dtt8x8.h
0 → 100644
View file @
3d5ddc28
/**
**
** dtt8x8.h
**
** Copyright (C) 2018 Elphel, Inc.
**
** -----------------------------------------------------------------------------**
**
** dtt8x8.cuh is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
**
** Additional permission under GNU GPL version 3 section 7
**
** If you modify this Program, or any covered work, by linking or
** combining it with NVIDIA Corporation's CUDA libraries from the
** NVIDIA CUDA Toolkit (or a modified version of those libraries),
** containing parts covered by the terms of NVIDIA CUDA Toolkit
** EULA, the licensors of this Program grant you additional
** permission to convey the resulting work.
** -----------------------------------------------------------------------------**
*/
/**
**************************************************************************
* \file dtt8x8.h
* \brief DCT-II, DST-II, DCT-IV and DST-IV for Complex Lapped Transform of 16x16 (stride 8)
* in GPU
* This file contains building blocks for the 16x16 stride 8 COmplex Lapped Transform (CLT)
* implementation. DTT-IV are used for forward and inverse 2D CLT, DTT-II - to convert correlation
* results from the frequency to pixel domain. DTT-III (inverse of DTT-II) is not implemented
* here it is used to convert convolution kernels and LPF to the frequency domain - done in
* software.
*
* This file is cpompatible with both runtime and driver API, runtime is used for development
* with Nvidia Nsight, driver API when calling these kernels from Java
*/
#ifndef JCUDA
#define DTT_SIZE_LOG2 3
#endif
#pragma once
#define DTT_SIZE (1 << DTT_SIZE_LOG2)
#define DTT_SIZE1 (DTT_SIZE + 1)
#define DTT_SIZE2 (2 * DTT_SIZE)
#define DTT_SIZE21 (DTT_SIZE2 + 1)
#define DTT_SIZE4 (4 * DTT_SIZE)
#define DTT_SIZE2M1 (DTT_SIZE2 - 1)
#define BAYER_RED 0
#define BAYER_BLUE 1
#define BAYER_GREEN 2
// assuming GR/BG as now
#define BAYER_RED_ROW 0
#define BAYER_RED_COL 1
#define DTTTEST_BLOCK_WIDTH 32
#define DTTTEST_BLOCK_HEIGHT 16
#define DTTTEST_BLK_STRIDE (DTTTEST_BLOCK_WIDTH+1)
//extern __constant__ float idct_signs[4][4][4];
//extern __constant__ int imclt_indx9[16];
//extern __constant__ float HWINDOW2[];
inline
__device__
void
dttii_shared_mem_nonortho
(
float
*
x0
,
int
inc
,
int
dst_not_dct
);
// does not scale by y[0] (y[7]) by 1/sqrt[0]
inline
__device__
void
dttii_shared_mem
(
float
*
x0
,
int
inc
,
int
dst_not_dct
);
// used in GPU_DTT24_DRV
inline
__device__
void
dttiv_shared_mem
(
float
*
x0
,
int
inc
,
int
dst_not_dct
);
// used in GPU_DTT24_DRV
inline
__device__
void
dttiv_nodiverg
(
float
*
x
,
int
inc
,
int
dst_not_dct
);
// not used
inline
__device__
void
dctiv_nodiverg
(
float
*
x0
,
int
inc
);
// used in TP
inline
__device__
void
dstiv_nodiverg
(
float
*
x0
,
int
inc
);
// used in TP
inline
__device__
void
dct_ii8
(
float
x
[
8
],
float
y
[
8
]);
// x,y point to 8-element arrays each // not used
inline
__device__
void
dct_iv8
(
float
x
[
8
],
float
y
[
8
]);
// x,y point to 8-element arrays each // not used
inline
__device__
void
dst_iv8
(
float
x
[
8
],
float
y
[
8
]);
// x,y point to 8-element arrays each // not used
inline
__device__
void
_dctii_nrecurs8
(
float
x
[
8
],
float
y
[
8
]);
// x,y point to 8-element arrays each // not used
inline
__device__
void
_dctiv_nrecurs8
(
float
x
[
8
],
float
y
[
8
]);
// x,y point to 8-element arrays each // not used
// kernels (not used so far)
#ifdef BBBB
extern
"C"
__global__
void
GPU_DTT24_DRV
(
float
*
dst
,
float
*
src
,
int
src_stride
,
int
dtt_mode
);
#endif// #ifdef BBBB
//=========================== 2D functions ===============
extern
__device__
void
corrUnfoldTile
(
int
corr_radius
,
float
*
qdata0
,
// [4][DTT_SIZE][DTT_SIZE1], // 4 quadrants of the clt data, rows extended to optimize shared ports
float
*
rslt
);
// [DTT_SIZE2M1][DTT_SIZE2M1]) // 15x15
extern
__device__
void
dttii_2d
(
float
*
clt_corr
);
// shared memory, [4][DTT_SIZE1][DTT_SIZE]
extern
__device__
void
dttiv_color_2d
(
float
*
clt_tile
,
int
color
);
extern
__device__
void
imclt
(
float
*
clt_tile
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports [4][8][9]
float
*
mclt_tile
);
extern
__device__
void
imclt8threads
(
int
do_acc
,
// 1 - add to previous value, 0 - overwrite
float
*
clt_tile
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports [4][8][9]
float
*
mclt_tile
,
// [2* DTT_SIZE][DTT_SIZE1+ DTT_SIZE], // +1 to alternate column ports[16][17]
int
debug
);
src/main/resources/kernels/test_tp.cu
0 → 100644
View file @
3d5ddc28
This diff is collapsed.
Click to expand it.
src/main/resources/kernels/tp_defines.h
0 → 100644
View file @
3d5ddc28
/**
**
** tp_defines.h
**
** Copyright (C) 2020 Elphel, Inc.
**
** -----------------------------------------------------------------------------**
**
** tp_defines.h is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
**
** Additional permission under GNU GPL version 3 section 7
**
** If you modify this Program, or any covered work, by linking or
** combining it with NVIDIA Corporation's CUDA libraries from the
** NVIDIA CUDA Toolkit (or a modified version of those libraries),
** containing parts covered by the terms of NVIDIA CUDA Toolkit
** EULA, the licensors of this Program grant you additional
** permission to convey the resulting work.
** -----------------------------------------------------------------------------**
*/
/**
**************************************************************************
* \file tp_defines.h
* \brief Defines for running in C++ environment, replaced when called from Java
*/
// Avoiding includes in jcuda, all source files will be merged
#pragma once
#ifndef JCUDA
#define THREADSX (DTT_SIZE)
#define NUM_CAMS 4
#define NUM_PAIRS 6
#define NUM_COLORS 3
#define IMG_WIDTH 2592
#define IMG_HEIGHT 1936
#define KERNELS_HOR 164
#define KERNELS_VERT 123
#define KERNELS_LSTEP 4
#define THREADS_PER_TILE 8
#define TILES_PER_BLOCK 4
#define CORR_THREADS_PER_TILE 8
#define CORR_TILES_PER_BLOCK 4
#define TEXTURE_THREADS_PER_TILE 8
#define TEXTURE_TILES_PER_BLOCK 1
#define IMCLT_THREADS_PER_TILE 16
#define IMCLT_TILES_PER_BLOCK 4
#define CORR_NTILE_SHIFT 8 // higher bits - number of a pair, other bits tile number
#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
#define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile
#define TASK_CORR_BITS 4
#define TASK_TEXTURE_N_BIT 0 // Texture with North neighbor
#define TASK_TEXTURE_E_BIT 1 // Texture with East neighbor
#define TASK_TEXTURE_S_BIT 2 // Texture with South neighbor
#define TASK_TEXTURE_W_BIT 3 // Texture with West neighbor
#define TASK_TEXTURE_BIT 3 // bit to request texture calculation int task field of struct tp_task
#define LIST_TEXTURE_BIT 7 // bit to request texture calculation
#define CORR_OUT_RAD 4
#define FAT_ZERO_WEIGHT 0.0001 // add to port weights to avoid nan
#define THREADS_DYNAMIC_BITS 5 // treads in block for CDP creation of the texture list
//#undef HAS_PRINTF
#define HAS_PRINTF
//7
//#define DEBUG1 1
//#define DEBUG2 1
//#define DEBUG3 1
//#define DEBUG4 1
//#define DEBUG5 1
//#define DEBUG6 1
/*
#define DEBUG7 1
#define DEBUG8 1
#define DEBUG9 1
*/
#define DEBUG10 1
#define DEBUG11 1
#define DEBUG12 1
//#define USE_textures_gen
#define DEBUG_OOB1 1
#endif //#ifndef JCUDA
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment