Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
8612a64e
Commit
8612a64e
authored
Jul 22, 2025
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed inter-scene correlation
parent
d8e9a454
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
11 deletions
+7
-11
TileProcessor.cu
src/TileProcessor.cu
+6
-11
test_tp.cu
src/test_tp.cu
+1
-0
No files found.
src/TileProcessor.cu
View file @
8612a64e
...
...
@@ -947,11 +947,9 @@ extern "C" __global__ void correlate2D_inter( // only results in TD
float * gpu_corrs) // correlation output data
{
dim3 threads0(CONVERT_DIRECT_INDEXING_THREADS, 1, 1);
dim3 blocks0 ((num_tiles
*num_tiles
+ CONVERT_DIRECT_INDEXING_THREADS -1) >> CONVERT_DIRECT_INDEXING_THREADS_LOG2,1, 1);
dim3 blocks0 ((num_tiles + CONVERT_DIRECT_INDEXING_THREADS -1) >> CONVERT_DIRECT_INDEXING_THREADS_LOG2,1, 1);
dim3 threads_corr(CORR_THREADS_PER_TILE, CORR_TILES_PER_BLOCK, 1);
// dim3 grid_corr((num_corr_tiles_wo_sum + CORR_TILES_PER_BLOCK-1) / CORR_TILES_PER_BLOCK,1,1);
dim3 grid_corr((num_cams + CORR_TILES_PER_BLOCK-1) / CORR_TILES_PER_BLOCK,1,1);
dim3 grid_combine((num_tiles + CORR_TILES_PER_BLOCK-1) / CORR_TILES_PER_BLOCK,1,1);
if (threadIdx.x == 0) { // only 1 thread, 1 block
...
...
@@ -959,6 +957,7 @@ extern "C" __global__ void correlate2D_inter( // only results in TD
if (num_sel_sensors > 0){
// try with null tp_tasks to use same sequence from GPU memory
*pnum_corr_tiles = 0;
dim3 grid_corr((num_tiles * num_sel_sensors + CORR_TILES_PER_BLOCK-1) / CORR_TILES_PER_BLOCK,1,1);
index_inter_correlate<<<blocks0,threads0, 0, cudaStreamFireAndForget>>>(
num_cams, // int num_cams,
sel_sensors, // int sel_sensors,
...
...
@@ -967,12 +966,8 @@ extern "C" __global__ void correlate2D_inter( // only results in TD
tilesx, // int width, // number of tiles in a row
gpu_corr_indices, // int * gpu_corr_indices, // array of correlation tasks
pnum_corr_tiles); // int * pnum_corr_tiles); // pointer to the length of correlation tasks array
/// cudaDeviceSynchronize();
/// __device__ int num_corr_tiles_with_sum = (*pnum_corr_tiles);
/// int num_corr_tiles_wo_sum = num_corr_tiles_with_sum * num_sel_sensors/ (num_sel_sensors + 1); // remove sum from count
/// dim3 threads_corr(CORR_THREADS_PER_TILE, CORR_TILES_PER_BLOCK, 1);
/// dim3 grid_corr((num_corr_tiles_wo_sum + CORR_TILES_PER_BLOCK-1) / CORR_TILES_PER_BLOCK,1,1);
correlate2D_inter_inner<<<grid_corr,threads_corr, 0, cudaStreamTailLaunch>>>( // will only process to TD, no normalisations and back conversion
correlate2D_inter_inner<<<grid_corr,threads_corr, 0, cudaStreamTailLaunch>>>( // will only process to TD, no normalizations and back conversion
num_cams, // int num_cams,
num_sel_sensors, // int num_sel_sensors, // number of sensors to correlate (not counting sum of all)
gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
...
...
@@ -985,7 +980,7 @@ extern "C" __global__ void correlate2D_inter( // only results in TD
gpu_corr_indices, // int * gpu_corr_indices, // packed tile + sensor (0xff - sum)
corr_stride, // size_t corr_stride, // in floats
gpu_corrs); // float * gpu_corrs) // correlation output data (either pixel domain or transform domain
/// dim3 grid_combine((num_tiles + CORR_TILES_PER_BLOCK-1) / CORR_TILES_PER_BLOCK,1,1);
combine_inter<<<grid_combine,threads_corr, 0, cudaStreamTailLaunch>>>( // combine per-senor interscene correlations
num_sel_sensors, // int num_sel_sensors, // number of sensors to correlate (not counting sum of all)
pnum_corr_tiles, //num_corr_tiles_with_sum, // int num_corr_tiles, // number of correlation tiles to process (here it includes sum)
...
...
@@ -2771,7 +2766,7 @@ extern "C" __global__ void convert_direct( // called with a single block, singl
//__device__
*pnum_active_tiles = 0; // already _device_
int task_size = get_task_size(num_cams);
index_direct<<<blocks0,threads0, 0, cudaStreamFireAndForget>>>(
// cudaStreamFireAndForget>>>(
index_direct<<<blocks0,threads0, 0, cudaStreamFireAndForget>>>(
task_size, // int task_size, // flattened task size in 4-byte floats
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
num_tiles, //int num_tiles, // number of tiles in task
...
...
src/test_tp.cu
View file @
8612a64e
...
...
@@ -46,6 +46,7 @@
#include <cstdlib>
#include <cstdio>
#include <cuda_runtime.h> // cudaFree
//#define __CUDA_RUNTIME_H__ // turn on, then off to fix missing findCudaDevice()
#include <helper_cuda.h> // for checkCudaErrors
#include <helper_functions.h> // timer functions
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment