Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
46256d14
Commit
46256d14
authored
Apr 01, 2025
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
GenerateRgbaHost - from class instance
parent
1bf5e8bf
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
356 additions
and
18 deletions
+356
-18
GenerateRgbaHost.cu
src/GenerateRgbaHost.cu
+294
-0
GenerateRgbaHost.h
src/GenerateRgbaHost.h
+32
-0
test_tp.cu
src/test_tp.cu
+11
-18
tp_defines.h
src/tp_defines.h
+19
-0
No files found.
src/GenerateRgbaHost.cu
0 → 100644
View file @
46256d14
#include <cuda_runtime.h>
#include <helper_cuda.h>
#include <helper_functions.h>
#include "tp_defines.h" // was not here
#include "dtt8x8.h"
/*
#include "tp_defines.h" // was not here
#include "geometry_correction.h"
*/
#include "TileProcessor.h"
#include "tp_utils.h" // for host_get_textures_shared_size
#include "GenerateRgbaHost.h"
GenerateRgbaHost::GenerateRgbaHost(){
}
GenerateRgbaHost::~GenerateRgbaHost(){
}
void GenerateRgbaHost::generate_RBGA_host(
int num_cams, // number of cameras used
// Parameters to generate texture tasks
float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16p// struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task list
// declare arrays in device code?
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
int * gpu_num_texture_tiles, // number of texture tiles to process (8 separate elements for accumulation)
int * gpu_woi, // x,y,width,height of the woi
int width, // <= TILES-X, use for faster processing of LWIR images (should be actual + 1)
int height, // <= TILES-Y, use for faster processing of LWIR images
// Parameters for the texture generation
float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY !
struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction
const float cpu_params[5], // mitigating CUDA_ERROR_INVALID_PTX
const float weights[3], // scale for R,B,G should be host_array, not gpu
int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
const int texture_rbga_stride, // in floats
float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
{
int cpu_woi[4];
int cpu_num_texture_tiles[8];
float min_shot = cpu_params[0]; // 10.0
float scale_shot = cpu_params[1]; // 3.0
float diff_sigma = cpu_params[2]; // pixel value/pixel change
float diff_threshold = cpu_params[3]; // pixel value/pixel change
float min_agree = cpu_params[4]; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
int tilesya = ((height +3) & (~3)); //#define TILES-YA ((TILES-Y +3) & (~3))
dim3 threads0((1 << THREADS_DYNAMIC_BITS), 1, 1);
int blocks_x = (width + ((1 << THREADS_DYNAMIC_BITS) - 1)) >> THREADS_DYNAMIC_BITS;
dim3 blocks0 (blocks_x, height, 1);
clear_texture_list<<<blocks0,threads0>>>(
gpu_texture_indices,
width,
height);
checkCudaErrors(cudaDeviceSynchronize()); // not needed yet, just for testing
dim3 threads((1 << THREADS_DYNAMIC_BITS), 1, 1);
int blocks_t = (num_tiles + ((1 << THREADS_DYNAMIC_BITS)) -1) >> THREADS_DYNAMIC_BITS;//
dim3 blocks(blocks_t, 1, 1);
// mark used tiles in gpu_texture_indices memory
mark_texture_tiles <<<blocks,threads>>>(
num_cams, // int num_cams,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
num_tiles, // number of tiles in task list
width, // number of tiles in a row
gpu_texture_indices); // packed tile + bits (now only (1 << 7)
checkCudaErrors(cudaDeviceSynchronize());
// mark n/e/s/w used tiles from gpu_texture_indices memory to gpu_tasks lower 4 bits
checkCudaErrors(cudaMemcpy(
(float * ) cpu_woi,
gpu_woi,
4 * sizeof(float),
cudaMemcpyDeviceToHost));
cpu_woi[0] = width;
cpu_woi[1] = height;
cpu_woi[2] = 0;
cpu_woi[3] = 0;
checkCudaErrors(cudaMemcpy(
gpu_woi,
cpu_woi,
4 * sizeof(float),
cudaMemcpyHostToDevice));
/*
*(woi + 0) = width; // TILES-X;
*(woi + 1) = height; // TILES-Y;
*(woi + 2) = 0; // maximal x
*(woi + 3) = 0; // maximal y
int * gpu_woi = (int *) copyalloc_kernel_gpu(
(float * ) woi,
4); // number of elements
*/
// TODO: create gpu_woi to pass (copy from woi)
// set lower 4 bits in each gpu_ftasks task
mark_texture_neighbor_tiles <<<blocks,threads>>>(
num_cams, // int num_cams,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
num_tiles, // number of tiles in task list
width, // number of tiles in a row
height, // number of tiles rows
gpu_texture_indices, // packed tile + bits (now only (1 << 7)
gpu_woi); // min_x, min_y, max_x, max_y
checkCudaErrors(cudaDeviceSynchronize());
/*
checkCudaErrors(cudaMemcpy( //
(float * ) cpu_num_texture_tiles,
gpu_num_texture_tiles,
8 * sizeof(float),
cudaMemcpyDeviceToHost));
*/
// Generate tile indices list, upper 24 bits - tile index, lower 4 bits: n/e/s/w neighbors, bit 7 - set to 1
for (int i = 0; i <8; i++){
cpu_num_texture_tiles[i] = 0;
}
/*
*(num_texture_tiles+0) = 0;
*(num_texture_tiles+1) = 0;
*(num_texture_tiles+2) = 0;
*(num_texture_tiles+3) = 0;
*(num_texture_tiles+4) = 0;
*(num_texture_tiles+5) = 0;
*(num_texture_tiles+6) = 0;
*(num_texture_tiles+7) = 0;
*/
// copy zeroed num_texture_tiles
// int * gpu_num_texture_tiles = (int *) copyalloc_kernel_gpu(
// (float * ) num_texture_tiles,
// 8); // number of elements
checkCudaErrors(cudaMemcpy(
gpu_num_texture_tiles,
cpu_num_texture_tiles,
8 * sizeof(float),
cudaMemcpyHostToDevice));
gen_texture_list <<<blocks,threads>>>(
num_cams, // int num_cams,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
num_tiles, // number of tiles in task list
width, // number of tiles in a row
height, // int height, // number of tiles rows
gpu_texture_indices, // packed tile + bits (now only (1 << 7)
gpu_num_texture_tiles, // number of texture tiles to process
gpu_woi); // x,y, here woi[2] = max_X, woi[3] - max-Y
checkCudaErrors(cudaDeviceSynchronize()); // not needed yet, just for testing
// copy gpu_woi back to host woi
checkCudaErrors(cudaMemcpy(
(float * ) cpu_woi,
gpu_woi,
4 * sizeof(float),
cudaMemcpyDeviceToHost));
// *(cpu_woi + 2) += 1 - *(cpu_woi + 0); // width (was min and max)
// *(cpu_woi + 3) += 1 - *(cpu_woi + 1); // height (was min and max)
cpu_woi[2] += 1 - cpu_woi[0]; // width (was min and max)
cpu_woi[3] += 1 - cpu_woi[1]; // height (was min and max)
// copy host-modified data back to GPU
checkCudaErrors(cudaMemcpy(
gpu_woi,
cpu_woi,
4 * sizeof(float),
cudaMemcpyHostToDevice));
// copy gpu_num_texture_tiles back to host num_texture_tiles
checkCudaErrors(cudaMemcpy(
(float * ) cpu_num_texture_tiles,
gpu_num_texture_tiles,
8 * sizeof(float),
cudaMemcpyDeviceToHost));
// Zero output textures. Trim
// texture_rbga_stride
// int texture_width = (*(cpu_woi + 2) + 1) * DTT_SIZE;
// int texture_tiles_height = (*(cpu_woi + 3) + 1) * DTT_SIZE;
int texture_width = (cpu_woi[2] + 1) * DTT_SIZE;
int texture_tiles_height = (cpu_woi[3] + 1) * DTT_SIZE;
int texture_slices = colors + 1;
if (keep_weights & 2){
texture_slices += colors * num_cams;
}
dim3 threads2((1 << THREADS_DYNAMIC_BITS), 1, 1);
int blocks_x2 = (texture_width + ((1 << (THREADS_DYNAMIC_BITS + DTT_SIZE_LOG2 )) - 1)) >> (THREADS_DYNAMIC_BITS + DTT_SIZE_LOG2);
dim3 blocks2 (blocks_x2, texture_tiles_height * texture_slices, 1); // each thread - 8 vertical
#ifdef DEBUG8A
int cpu_texture_indices [TILESX*TILESYA];
checkCudaErrors(cudaMemcpy(
(float * ) cpu_texture_indices,
gpu_texture_indices,
TILESX*TILESYA * sizeof(float),
cudaMemcpyDeviceToHost));
for (int i = 0; i < 256; i++){
int indx = cpu_texture_indices[i];
printf("%02d %04x %03d %03d %x\n",i,indx, (indx>>8) / 80, (indx >> 8) % 80, indx&0xff);
}
#endif // #ifdef DEBUG8A
clear_texture_rbga<<<blocks2,threads2>>>( // illegal value error
texture_width,
texture_tiles_height * texture_slices, // int texture_slice_height,
texture_rbga_stride, // const size_t texture_rbga_stride, // in floats 8*stride
gpu_texture_tiles) ; // float * gpu_texture_tiles);
// Run 8 times - first 4 1-tile offsets inner tiles (w/o verifying margins), then - 4 times with verification and ignoring 4-pixel
// oversize (border 16x 16 tiles overhang by 4 pixels)
checkCudaErrors(cudaDeviceSynchronize()); // not needed yet, just for testing
for (int pass = 0; pass < 8; pass++){
int num_cams_per_thread = NUM_THREADS / TEXTURE_THREADS_PER_TILE; // 4 cameras parallel, then repeat
dim3 threads_texture(TEXTURE_THREADS_PER_TILE, num_cams_per_thread, 1); // TEXTURE_TILES_PER_BLOCK, 1);
int border_tile = pass >> 2;
int ntt = *(cpu_num_texture_tiles + ((pass & 3) << 1) + border_tile);
int *pntt = gpu_num_texture_tiles + ((pass & 3) << 1) + border_tile;
dim3 grid_texture((ntt + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1); // TEXTURE_TILES_PER_BLOCK = 1
/* before CDP2
int ti_offset = (pass & 3) * (width * (tilesya >> 2)); // (TILES-X * (TILES-YA >> 2)); // 1/4
if (border_tile){
ti_offset += width * (tilesya >> 2) - ntt; // TILES-X * (TILES-YA >> 2) - ntt;
}
*/
// for CDP2
int ti_offset = (pass & 3) * (width * (tilesya >> 2)); // (TILES-X * (TILES-YA >> 2)); // 1/4
if (border_tile){
// ti_offset += width * (tilesya >> 2) - ntt; // TILES-X * (TILES-YA >> 2) - ntt;
ti_offset += width * (tilesya >> 2); // TILES-X * (TILES-YA >> 2) - ntt;
ti_offset = - ti_offset; // does not depend on results of the previous kernel, but is negative
}
#ifdef DEBUG8A
printf("\ngenerate_RBGA() pass= %d, border_tile= %d, ti_offset= %d, ntt=%d\n",
pass, border_tile,ti_offset, ntt);
printf("\ngenerate_RBGA() gpu_texture_indices= %p, gpu_texture_indices + ti_offset= %p\n",
(void *) gpu_texture_indices, (void *) (gpu_texture_indices + ti_offset));
printf("\ngenerate_RBGA() grid_texture={%d, %d, %d)\n",
grid_texture.x, grid_texture.y, grid_texture.z);
printf("\ngenerate_RBGA() threads_texture={%d, %d, %d)\n",
threads_texture.x, threads_texture.y, threads_texture.z);
printf("\n");
#endif
/* */
int shared_size = host_get_textures_shared_size( // in bytes
num_cams, // int num_cams, // actual number of cameras
colors, // int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
0); // int * offsets); // in floats
printf("\n2. shared_size=%d, num_cams=%d, colors=%d\n",shared_size,num_cams, colors);
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributeMaxDynamicSharedMemorySize, shared_size); // 65536); // for CC 7.5
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributePreferredSharedMemoryCarveout,cudaSharedmemCarveoutMaxShared);
textures_accumulate <<<grid_texture,threads_texture, shared_size>>>(
num_cams, // int num_cams, // number of cameras used
gpu_woi, // int * woi, // x, y, width,height
gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
pntt, // ntt, // int * num_texture_tiles, // number of texture tiles to process
ti_offset, // gpu_texture_indices_offset,// add to gpu_texture_indices
gpu_texture_indices, // + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
// gpu_texture_indices + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
colors, // int colors, // number of colors (3/1)
is_lwir, // int is_lwir, // do not perform shot correction
min_shot, // float min_shot, // 10.0
scale_shot, // float scale_shot, // 3.0
diff_sigma, // float diff_sigma, // pixel value/pixel change
diff_threshold, // float diff_threshold, // pixel value/pixel change
min_agree, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
weights, // float weights[3], // scale for R,B,G
dust_remove, // int dust_remove, // Do not reduce average weight when only one image differs much from the average
keep_weights, // int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
// combining both non-overlap and overlap (each calculated if pointer is not null )
texture_rbga_stride, // size_t texture_rbg_stride, // in floats
gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
0, // size_t texture_stride, // in floats (now 256*4 = 1024)
(float *) 0, // gpu_texture_tiles, // float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
0, // 1, // int linescan_order, // if !=0 then output gpu_diff_rgb_combo in linescan order, else - in gpu_texture_indices order
(float *)0, //);//gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[num_cams], R[num_cams], B[num_cams],G[num_cams]
width);
checkCudaErrors(cudaDeviceSynchronize()); // not needed yet, just for testing
/* */
}
// checkCudaErrors(cudaFree(gpu_woi));
// checkCudaErrors(cudaFree(gpu_num_texture_tiles));
// __syncthreads();
};
src/GenerateRgbaHost.h
0 → 100644
View file @
46256d14
#ifndef GENERATE_RGBA_HOST_H_
#define GENERATE_RGBA_HOST_H_
class
GenerateRgbaHost
{
public
:
GenerateRgbaHost
();
~
GenerateRgbaHost
();
void
generate_RBGA_host
(
int
num_cams
,
// number of cameras used
// Parameters to generate texture tasks
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16p// struct tp_task * gpu_tasks,
int
num_tiles
,
// number of tiles in task list
// declare arrays in device code?
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
int
*
gpu_num_texture_tiles
,
// number of texture tiles to process (8 separate elements for accumulation)
int
*
gpu_woi
,
// x,y,width,height of the woi
int
width
,
// <= TILES-X, use for faster processing of LWIR images (should be actual + 1)
int
height
,
// <= TILES-Y, use for faster processing of LWIR images
// Parameters for the texture generation
float
**
gpu_clt
,
// [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY !
struct
gc
*
gpu_geometry_correction
,
int
colors
,
// number of colors (3/1)
int
is_lwir
,
// do not perform shot correction
const
float
cpu_params
[
5
],
// mitigating CUDA_ERROR_INVALID_PTX
const
float
weights
[
3
],
// scale for R,B,G should be host_array, not gpu
int
dust_remove
,
// Do not reduce average weight when only one image differs much from the average
int
keep_weights
,
// return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
const
int
texture_rbga_stride
,
// in floats
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
};
#endif
src/test_tp.cu
View file @
46256d14
...
@@ -65,7 +65,8 @@
...
@@ -65,7 +65,8 @@
#include "tp_files.h"
#include "tp_files.h"
//#include "tp_paths.cuh"
//#include "tp_paths.cuh"
#include "tp_paths.h"
#include "tp_paths.h"
#include "GenerateRgbaHost.h"
/*
#if TEST_LWIR
#if TEST_LWIR
#define IMG_WIDTH 640
#define IMG_WIDTH 640
#define IMG_HEIGHT 512
#define IMG_HEIGHT 512
...
@@ -83,9 +84,9 @@
...
@@ -83,9 +84,9 @@
#define TILESX (IMG_WIDTH / DTT_SIZE)
#define TILESX (IMG_WIDTH / DTT_SIZE)
#define TILESY (IMG_HEIGHT / DTT_SIZE)
#define TILESY (IMG_HEIGHT / DTT_SIZE)
#define TILESYA ((TILESY +3) & (~3))
#define TILESYA ((TILESY +3) & (~3))
*/
#include "generate_RGBA_host.inc"
//
#include "generate_RGBA_host.inc"
#if 0
void generate_RBGA_host(
void generate_RBGA_host(
int num_cams, // number of cameras used
int num_cams, // number of cameras used
// Parameters to generate texture tasks
// Parameters to generate texture tasks
...
@@ -359,7 +360,7 @@ void generate_RBGA_host(
...
@@ -359,7 +360,7 @@ void generate_RBGA_host(
// checkCudaErrors(cudaFree(gpu_num_texture_tiles));
// checkCudaErrors(cudaFree(gpu_num_texture_tiles));
// __syncthreads();
// __syncthreads();
}
}
#endif // 0
/**
/**
...
@@ -392,6 +393,7 @@ int main(int argc, char **argv)
...
@@ -392,6 +393,7 @@ int main(int argc, char **argv)
#endif
#endif
TpPaths tpPaths(use_lwir);
TpPaths tpPaths(use_lwir);
GenerateRgbaHost generateRgbaHost{}; // = new GenerateRgbaHost();
#if TEST_LWIR
#if TEST_LWIR
...
@@ -474,22 +476,13 @@ int main(int argc, char **argv)
...
@@ -474,22 +476,13 @@ int main(int argc, char **argv)
int KERN_TILES = KERNELS_HOR * KERNELS_VERT * num_colors; // NUM_COLORS;
int KERN_TILES = KERNELS_HOR * KERNELS_VERT * num_colors; // NUM_COLORS;
int KERN_SIZE = KERN_TILES * 4 * 64;
int KERN_SIZE = KERN_TILES * 4 * 64;
int CORR_SIZE = (2 * CORR_OUT_RAD + 1) * (2 * CORR_OUT_RAD + 1);
int CORR_SIZE = (2 * CORR_OUT_RAD + 1) * (2 * CORR_OUT_RAD + 1);
float * host_kern_buf = (float *)malloc(KERN_SIZE * sizeof(float));
float * host_kern_buf = (float *)malloc(KERN_SIZE * sizeof(float));
// static - see https://stackoverflow.com/questions/20253267/segmentation-fault-before-main
/// static struct tp_task task_data [TILESX*TILESY]; // maximal length - each tile
/// static struct tp_task task_data1 [TILESX*TILESY]; // maximal length - each tile
float * ftask_data = (float *) malloc(TILESX * TILESY * task_size * sizeof(float));
float * ftask_data = (float *) malloc(TILESX * TILESY * task_size * sizeof(float));
float * ftask_data1 = (float *) malloc(TILESX * TILESY * task_size * sizeof(float));
float * ftask_data1 = (float *) malloc(TILESX * TILESY * task_size * sizeof(float));
trot_deriv rot_deriv;
trot_deriv rot_deriv;
/// int corr_indices [NUM_PAIRS*TILESX*TILESY];
int texture_indices [TILESX*TILESYA];
int texture_indices [TILESX*TILESYA];
int cpu_woi [4];
int cpu_woi [4];
...
@@ -521,7 +514,6 @@ int main(int argc, char **argv)
...
@@ -521,7 +514,6 @@ int main(int argc, char **argv)
float * gpu_generate_RBGA_params;
float * gpu_generate_RBGA_params;
int num_corrs;
int num_corrs;
int num_textures;
int num_textures;
/// int num_ports = NUM_CAMS;
// GPU pointers to GPU pointers to memory
// GPU pointers to GPU pointers to memory
float ** gpu_kernels; // [NUM_CAMS];
float ** gpu_kernels; // [NUM_CAMS];
struct CltExtra ** gpu_kernel_offsets; // [NUM_CAMS];
struct CltExtra ** gpu_kernel_offsets; // [NUM_CAMS];
...
@@ -532,7 +524,6 @@ int main(int argc, char **argv)
...
@@ -532,7 +524,6 @@ int main(int argc, char **argv)
// GPU pointers to GPU memory
// GPU pointers to GPU memory
/// struct tp_task * gpu_tasks; // TODO: ***** remove ! **** DONE
float * gpu_ftasks; // TODO: ***** allocate ! **** DONE
float * gpu_ftasks; // TODO: ***** allocate ! **** DONE
int * gpu_active_tiles;
int * gpu_active_tiles;
int * gpu_num_active;
int * gpu_num_active;
...
@@ -554,6 +545,7 @@ int main(int argc, char **argv)
...
@@ -554,6 +545,7 @@ int main(int argc, char **argv)
size_t dstride_textures_rbga; // in bytes ! for one rgba/ya 16x16 tile
size_t dstride_textures_rbga; // in bytes ! for one rgba/ya 16x16 tile
struct gc fgeometry_correction;
struct gc fgeometry_correction;
float* correction_vector;
float* correction_vector;
int correction_vector_length;
int correction_vector_length;
float * rByRDist;
float * rByRDist;
...
@@ -2044,7 +2036,7 @@ int main(int argc, char **argv)
...
@@ -2044,7 +2036,7 @@ int main(int argc, char **argv)
}
}
// FIXME: update to use new correlations and num_cams
// FIXME: update to use new correlations and num_cams
#ifdef NO_DP
#ifdef NO_DP
generate_RBGA_host (
generate
RgbaHost.generate
_RBGA_host (
num_cams, // int num_cams, // number of cameras used
num_cams, // int num_cams, // number of cameras used
// Parameters to generate texture tasks
// Parameters to generate texture tasks
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
...
@@ -2222,6 +2214,7 @@ int main(int argc, char **argv)
...
@@ -2222,6 +2214,7 @@ int main(int argc, char **argv)
free (correction_vector);
free (correction_vector);
free (ftask_data);
free (ftask_data);
free (ftask_data1);
free (ftask_data1);
// delete generateRgbaHost;
exit(0);
exit(0);
}
}
src/tp_defines.h
View file @
46256d14
...
@@ -155,6 +155,25 @@
...
@@ -155,6 +155,25 @@
#endif //#ifdef DEBUG_ANY
#endif //#ifdef DEBUG_ANY
#if TEST_LWIR
#define IMG_WIDTH 640
#define IMG_HEIGHT 512
#define KERNELS_HOR 82 // 80+2
#define KERNELS_VERT 66 // 64+2
#else
#define IMG_WIDTH 2592
#define IMG_HEIGHT 1936
#define KERNELS_HOR 164 // 2592 / 16 + 2
#define KERNELS_VERT 123 // 1936 / 16 + 2
#endif
#define CORR_OUT_RAD 7 // full tile (15x15), was 4 (9x9)
#define DBG_DISPARITY 0.0 // 56.0// 0.0 // 56.0 // disparity for which to calculate offsets (not needed in Java)
// only used in C++ test
#define TILESX (IMG_WIDTH / DTT_SIZE)
#define TILESY (IMG_HEIGHT / DTT_SIZE)
#define TILESYA ((TILESY +3) & (~3))
#endif //#ifndef JCUDA
#endif //#ifndef JCUDA
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment