moved inline declaration from .h

f15b9b74 · Andrey Filippov · 94aa6562 · f15b9b74 · f15b9b74 · 94aa6562
Commit f15b9b74 authored Apr 08, 2020 by Andrey Filippov
Hide whitespace changes
Inline Side-by-side

Showing with 13 additions and 106 deletions

dtt8x8.cu src/main/resources/kernels/dtt8x8.cu +13 -0

dtt8x8.h src/main/resources/kernels/dtt8x8.h +0 -12

tp_defines.h src/main/resources/kernels/tp_defines.h +0 -94

No files found.
--- a/src/main/resources/kernels/dtt8x8.cu
+++ b/src/main/resources/kernels/dtt8x8.cu
@@ -105,6 +105,19 @@ __constant__ float HWINDOW2[] =  {0.049009f, 0.145142f, 0.235698f, 0.317197f,
                                  0.386505f, 0.440961f, 0.478470f, 0.497592f};


+inline __device__ void dttii_shared_mem_nonortho(float * x0,  int inc, int dst_not_dct); // does not scale by y[0] (y[7]) by 1/sqrt[0]
+inline __device__ void dttii_shared_mem(float * x0,  int inc, int dst_not_dct);   // used in GPU_DTT24_DRV
+inline __device__ void dttiv_shared_mem(float * x0,  int inc, int dst_not_dct);   // used in GPU_DTT24_DRV
+inline __device__ void dttiv_nodiverg  (float * x,   int inc, int dst_not_dct);   // not used
+inline __device__ void dctiv_nodiverg  (float * x0,  int inc);                    // used in TP
+inline __device__ void dstiv_nodiverg  (float * x0,  int inc);                    // used in TP
+
+inline __device__ void dct_ii8         ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used
+inline __device__ void dct_iv8         ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used
+inline __device__ void dst_iv8         ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used
+inline __device__ void _dctii_nrecurs8 ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used
+inline __device__ void _dctiv_nrecurs8 ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used
+

 /**
 **************************************************************************

--- a/src/main/resources/kernels/dtt8x8.h
+++ b/src/main/resources/kernels/dtt8x8.h
@@ -70,18 +70,6 @@
 //extern __constant__ int imclt_indx9[16];
 //extern __constant__ float HWINDOW2[];

-inline __device__ void dttii_shared_mem_nonortho(float * x0,  int inc, int dst_not_dct); // does not scale by y[0] (y[7]) by 1/sqrt[0]
-inline __device__ void dttii_shared_mem(float * x0,  int inc, int dst_not_dct);   // used in GPU_DTT24_DRV
-inline __device__ void dttiv_shared_mem(float * x0,  int inc, int dst_not_dct);   // used in GPU_DTT24_DRV
-inline __device__ void dttiv_nodiverg  (float * x,   int inc, int dst_not_dct);   // not used
-inline __device__ void dctiv_nodiverg  (float * x0,  int inc);                    // used in TP
-inline __device__ void dstiv_nodiverg  (float * x0,  int inc);                    // used in TP
-
-inline __device__ void dct_ii8         ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used
-inline __device__ void dct_iv8         ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used
-inline __device__ void dst_iv8         ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used
-inline __device__ void _dctii_nrecurs8 ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used
-inline __device__ void _dctiv_nrecurs8 ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used

 // kernels (not used so far)
 #ifdef BBBB

--- a/src/main/resources/kernels/tp_defines.h
+++ b/src/main/resources/kernels/tp_defines.h
-/**
- **
- ** tp_defines.h
- **
- ** Copyright (C) 2020 Elphel, Inc.
- **
- ** -----------------------------------------------------------------------------**
- **
- **  tp_defines.h is free software: you can redistribute it and/or modify
- **  it under the terms of the GNU General Public License as published by
- **  the Free Software Foundation, either version 3 of the License, or
- **  (at your option) any later version.
- **
- **  This program is distributed in the hope that it will be useful,
- **  but WITHOUT ANY WARRANTY; without even the implied warranty of
- **  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- **  GNU General Public License for more details.
- **
- **  You should have received a copy of the GNU General Public License
- **  along with this program.  If not, see <http://www.gnu.org/licenses/>.
- **
- **  Additional permission under GNU GPL version 3 section 7
- **
- **  If you modify this Program, or any covered work, by linking or
- **  combining it with NVIDIA Corporation's CUDA libraries from the
- **  NVIDIA CUDA Toolkit (or a modified version of those libraries),
- **  containing parts covered by the terms of NVIDIA CUDA Toolkit
- **  EULA, the licensors of this Program grant you additional
- **  permission to convey the resulting work.
- ** -----------------------------------------------------------------------------**
- */
-
-/**
-**************************************************************************
-* \file tp_defines.h
-* \brief Defines for running in C++ environment, replaced when called from Java
-
-*/
-// Avoiding includes in jcuda, all source files will be merged
-#pragma once
-#ifndef JCUDA
-#define THREADSX         (DTT_SIZE)
-#define NUM_CAMS                  4
-#define NUM_PAIRS                 6
-#define NUM_COLORS                3
-#define IMG_WIDTH              2592
-#define IMG_HEIGHT             1936
-#define KERNELS_HOR             164
-#define KERNELS_VERT            123
-#define KERNELS_LSTEP             4
-#define THREADS_PER_TILE          8
-#define TILES_PER_BLOCK           4
-#define CORR_THREADS_PER_TILE     8
-#define CORR_TILES_PER_BLOCK      4
-#define TEXTURE_THREADS_PER_TILE  8
-#define TEXTURE_TILES_PER_BLOCK   1
-#define IMCLT_THREADS_PER_TILE   16
-#define IMCLT_TILES_PER_BLOCK     4
-#define CORR_NTILE_SHIFT          8 // higher bits - number of a pair, other bits tile number
-#define CORR_PAIRS_MASK        0x3f// lower bits used to address correlation pair for the selected tile
-#define CORR_TEXTURE_BIT          7 // bit 7 used to request texture for the tile
-#define TASK_CORR_BITS            4
-#define TASK_TEXTURE_N_BIT        0 // Texture with North neighbor
-#define TASK_TEXTURE_E_BIT        1 // Texture with East  neighbor
-#define TASK_TEXTURE_S_BIT        2 // Texture with South neighbor
-#define TASK_TEXTURE_W_BIT        3 // Texture with West  neighbor
-#define TASK_TEXTURE_BIT          3 // bit to request texture calculation int task field of struct tp_task
-#define LIST_TEXTURE_BIT          7 // bit to request texture calculation
-#define CORR_OUT_RAD              4
-#define FAT_ZERO_WEIGHT           0.0001 // add to port weights to avoid nan
-
-#define THREADS_DYNAMIC_BITS      5 // treads in block for CDP creation of the texture list
-
-//#undef HAS_PRINTF
-#define HAS_PRINTF
-//7
-//#define DEBUG1 1
-//#define DEBUG2 1
-//#define DEBUG3 1
-//#define DEBUG4 1
-//#define DEBUG5 1
-//#define DEBUG6 1
-/*
-#define DEBUG7 1
-#define DEBUG8 1
-#define DEBUG9 1
-*/
-#define DEBUG10 1
-#define DEBUG11 1
-#define DEBUG12 1
-//#define USE_textures_gen
-#define DEBUG_OOB1 1
-#endif //#ifndef JCUDA
-