Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
2bed5def
Commit
2bed5def
authored
Apr 07, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
preparing for separate compilation in java3
parent
406eb708
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
97 additions
and
82 deletions
+97
-82
TileProcessor.cuh
src/TileProcessor.cuh
+3
-82
tp_defines.h
src/tp_defines.h
+94
-0
No files found.
src/TileProcessor.cuh
View file @
2bed5def
...
...
@@ -39,60 +39,10 @@
// Avoiding includes in jcuda, all source files will be merged
#pragma once
#ifndef JCUDA
#define THREADSX (DTT_SIZE)
#define NUM_CAMS 4
#define NUM_PAIRS 6
#define NUM_COLORS 3
#define IMG_WIDTH 2592
#define IMG_HEIGHT 1936
#define KERNELS_HOR 164
#define KERNELS_VERT 123
#define KERNELS_LSTEP 4
#define THREADS_PER_TILE 8
#define TILES_PER_BLOCK 4
#define CORR_THREADS_PER_TILE 8
#define CORR_TILES_PER_BLOCK 4
#define TEXTURE_THREADS_PER_TILE 8
#define TEXTURE_TILES_PER_BLOCK 1
#define IMCLT_THREADS_PER_TILE 16
#define IMCLT_TILES_PER_BLOCK 4
#define CORR_NTILE_SHIFT 8 // higher bits - number of a pair, other bits tile number
#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
#define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile
#define TASK_CORR_BITS 4
#define TASK_TEXTURE_N_BIT 0 // Texture with North neighbor
#define TASK_TEXTURE_E_BIT 1 // Texture with East neighbor
#define TASK_TEXTURE_S_BIT 2 // Texture with South neighbor
#define TASK_TEXTURE_W_BIT 3 // Texture with West neighbor
#define TASK_TEXTURE_BIT 3 // bit to request texture calculation int task field of struct tp_task
#define LIST_TEXTURE_BIT 7 // bit to request texture calculation
#define CORR_OUT_RAD 4
#define FAT_ZERO_WEIGHT 0.0001 // add to port weights to avoid nan
#define THREADS_DYNAMIC_BITS 5 // treads in block for CDP creation of the texture list
//#undef HAS_PRINTF
#define HAS_PRINTF
//7
//#define DEBUG1 1
//#define DEBUG2 1
//#define DEBUG3 1
//#define DEBUG4 1
//#define DEBUG5 1
//#define DEBUG6 1
/*
#define DEBUG7 1
#define DEBUG8 1
#define DEBUG9 1
*/
#define DEBUG10 1
#define DEBUG11 1
#define DEBUG12 1
//#define USE_textures_gen
#define DEBUG_OOB1 1
#endif //#ifndef JCUDA
#include "tp_defines.h"
#endif // #ifndef JCUDA
#include "dtt8x8.h"
#define TASK_TEXTURE_BITS ((1 << TASK_TEXTURE_N_BIT) | (1 << TASK_TEXTURE_E_BIT) | (1 << TASK_TEXTURE_S_BIT) | (1 << TASK_TEXTURE_W_BIT))
//#define IMCLT14
...
...
@@ -426,35 +376,6 @@ __constant__ int fold_inc[]= {0x02feee12, 0x021eeef2};
//__constant__ int imclt_indx[16] = {0x24,0x2c,0x34,0x3c,0x3c,0x34,0x2c,0x24,0x1c,0x22,0x21,0x20,0x20,0x21,0x22,0x23};
//__constant__ int imclt_indx9[16] = {0x28,0x31,0x3a,0x43,0x43,0x3a,0x31,0x28,0x1f,0x16,0x0d,0x04,0x04,0x0d,0x16,0x1f};
#ifdef BBBB
__constant__ float HWINDOW2[] = {0.049009f, 0.145142f, 0.235698f, 0.317197f,
0.386505f, 0.440961f, 0.478470f, 0.497592f};
__constant__ int imclt_indx9[16] = {0x28,0x29,0x2a,0x2b,0x2b,0x2a,0x29,0x28,0x27,0x26,0x25,0x24,0x24,0x25,0x26,0x27};
// Hope that if 2 outer indices are known at compile time there will be no integer multiplications
__constant__ float idct_signs[4][4][4] ={
{ // quadrant 0, each elements corresponds to 4x4 pixel output, covering altogether 16x16
{ 1,-1,-1,-1},
{-1, 1, 1, 1},
{-1, 1, 1, 1},
{-1, 1, 1, 1}
},{ // quadrant 1, each elements corresponds to 4x4 pixel output, covering altogether 16x16
{ 1, 1, 1,-1},
{-1,-1,-1, 1},
{-1,-1,-1, 1},
{-1,-1,-1, 1}
},{ // quadrant 2, each elements corresponds to 4x4 pixel output, covering altogether 16x16
{ 1,-1,-1,-1},
{ 1,-1,-1,-1},
{ 1,-1,-1,-1},
{-1, 1, 1, 1}
},{ // quadrant 3, each elements corresponds to 4x4 pixel output, covering altogether 16x16
{ 1, 1, 1,-1},
{ 1, 1, 1,-1},
{ 1, 1, 1,-1},
{-1,-1,-1, 1}
}};
#endif
// LPF for sigma 0.9 each color (modify through cudaMemcpyToSymbol() or similar in Driver API
//#ifndef NOICLT
__constant__ float lpf_data[4][64]={
...
...
src/tp_defines.h
0 → 100644
View file @
2bed5def
/**
**
** tp_defines.h
**
** Copyright (C) 2020 Elphel, Inc.
**
** -----------------------------------------------------------------------------**
**
** tp_defines.h is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
**
** Additional permission under GNU GPL version 3 section 7
**
** If you modify this Program, or any covered work, by linking or
** combining it with NVIDIA Corporation's CUDA libraries from the
** NVIDIA CUDA Toolkit (or a modified version of those libraries),
** containing parts covered by the terms of NVIDIA CUDA Toolkit
** EULA, the licensors of this Program grant you additional
** permission to convey the resulting work.
** -----------------------------------------------------------------------------**
*/
/**
**************************************************************************
* \file tp_defines.h
* \brief Defines for running in C++ environment, replaced when called from Java
*/
// Avoiding includes in jcuda, all source files will be merged
#pragma once
#ifndef JCUDA
#define THREADSX (DTT_SIZE)
#define NUM_CAMS 4
#define NUM_PAIRS 6
#define NUM_COLORS 3
#define IMG_WIDTH 2592
#define IMG_HEIGHT 1936
#define KERNELS_HOR 164
#define KERNELS_VERT 123
#define KERNELS_LSTEP 4
#define THREADS_PER_TILE 8
#define TILES_PER_BLOCK 4
#define CORR_THREADS_PER_TILE 8
#define CORR_TILES_PER_BLOCK 4
#define TEXTURE_THREADS_PER_TILE 8
#define TEXTURE_TILES_PER_BLOCK 1
#define IMCLT_THREADS_PER_TILE 16
#define IMCLT_TILES_PER_BLOCK 4
#define CORR_NTILE_SHIFT 8 // higher bits - number of a pair, other bits tile number
#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
#define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile
#define TASK_CORR_BITS 4
#define TASK_TEXTURE_N_BIT 0 // Texture with North neighbor
#define TASK_TEXTURE_E_BIT 1 // Texture with East neighbor
#define TASK_TEXTURE_S_BIT 2 // Texture with South neighbor
#define TASK_TEXTURE_W_BIT 3 // Texture with West neighbor
#define TASK_TEXTURE_BIT 3 // bit to request texture calculation int task field of struct tp_task
#define LIST_TEXTURE_BIT 7 // bit to request texture calculation
#define CORR_OUT_RAD 4
#define FAT_ZERO_WEIGHT 0.0001 // add to port weights to avoid nan
#define THREADS_DYNAMIC_BITS 5 // treads in block for CDP creation of the texture list
//#undef HAS_PRINTF
#define HAS_PRINTF
//7
//#define DEBUG1 1
//#define DEBUG2 1
//#define DEBUG3 1
//#define DEBUG4 1
//#define DEBUG5 1
//#define DEBUG6 1
/*
#define DEBUG7 1
#define DEBUG8 1
#define DEBUG9 1
*/
#define DEBUG10 1
#define DEBUG11 1
#define DEBUG12 1
//#define USE_textures_gen
#define DEBUG_OOB1 1
#endif //#ifndef JCUDA
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment