Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
18d8e56b
Commit
18d8e56b
authored
Nov 21, 2021
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
started updating
parent
94114c14
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
9 deletions
+9
-9
TileProcessor.cuh
src/TileProcessor.cuh
+3
-3
test_tp.cu
src/test_tp.cu
+5
-5
tp_defines.h
src/tp_defines.h
+1
-1
No files found.
src/TileProcessor.cuh
View file @
18d8e56b
...
@@ -1318,7 +1318,7 @@ extern "C" __global__ void correlate2D_inner(
...
@@ -1318,7 +1318,7 @@ extern "C" __global__ void correlate2D_inner(
*
*
* @param num_tiles, // number of tiles to process (each with num_pairs)
* @param num_tiles, // number of tiles to process (each with num_pairs)
* @param num_pairs, // num pairs per tile (should be the same)
* @param num_pairs, // num pairs per tile (should be the same)
* @param init_output, //
!=0 - reset output tiles to zero before accumulating
* @param init_output, //
& 1 - reset output tiles to zero before accumulating, &2 no transpose
* @param pairs_mask, // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
* @param pairs_mask, // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
* @param gpu_corr_indices, // packed tile+pair
* @param gpu_corr_indices, // packed tile+pair
* @param gpu_combo_indices, // output if noty null: packed tile+pairs_mask (will point to the first used pair
* @param gpu_combo_indices, // output if noty null: packed tile+pairs_mask (will point to the first used pair
...
@@ -1411,7 +1411,7 @@ extern "C" __global__ void corr2D_combine_inner(
...
@@ -1411,7 +1411,7 @@ extern "C" __global__ void corr2D_combine_inner(
float *clt = clt_corr + threadIdx.x;
float *clt = clt_corr + threadIdx.x;
float *mem_corr = gpu_corrs_combo + corr_stride_combo * tile_index + threadIdx.x;
float *mem_corr = gpu_corrs_combo + corr_stride_combo * tile_index + threadIdx.x;
if (init_output
!= 0
){ // reset combo
if (init_output
& 1
){ // reset combo
#pragma unroll
#pragma unroll
for (int i = 0; i < DTT_SIZE4; i++){
for (int i = 0; i < DTT_SIZE4; i++){
(*clt) = 0.0f;
(*clt) = 0.0f;
...
@@ -1439,7 +1439,7 @@ extern "C" __global__ void corr2D_combine_inner(
...
@@ -1439,7 +1439,7 @@ extern "C" __global__ void corr2D_combine_inner(
// if (corr_pair > NUM_PAIRS){
// if (corr_pair > NUM_PAIRS){
// return; // BUG - should not happen
// return; // BUG - should not happen
// }
// }
if (
PAIRS_HOR_DIAG_MAIN & pair_bit
){ // just accumulate. This if-s will branch in all threads, no diversion
if (
(PAIRS_HOR_DIAG_MAIN & pair_bit) || (init_output & 2)
){ // just accumulate. This if-s will branch in all threads, no diversion
clt = clt_corr + threadIdx.x;
clt = clt_corr + threadIdx.x;
mem_corr = gpu_corrs + corr_stride_combo * corr_tile_index + threadIdx.x;
mem_corr = gpu_corrs + corr_stride_combo * corr_tile_index + threadIdx.x;
#pragma unroll
#pragma unroll
...
...
src/test_tp.cu
View file @
18d8e56b
...
@@ -274,7 +274,7 @@ int main(int argc, char **argv)
...
@@ -274,7 +274,7 @@ int main(int argc, char **argv)
"/home/eyesis/git/tile_processor_gpu/clt/main_chn2.portsxy",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn2.portsxy",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn3.portsxy"};
"/home/eyesis/git/tile_processor_gpu/clt/main_chn3.portsxy"};
#ifndef DBG_TILE
//
#ifndef DBG_TILE
const char* ports_clt_file[] = { // never referenced
const char* ports_clt_file[] = { // never referenced
"/home/eyesis/git/tile_processor_gpu/clt/main_chn0.clt",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn0.clt",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn1.clt",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn1.clt",
...
@@ -285,7 +285,7 @@ int main(int argc, char **argv)
...
@@ -285,7 +285,7 @@ int main(int argc, char **argv)
"/home/eyesis/git/tile_processor_gpu/clt/main_chn1.rbg",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn1.rbg",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn2.rbg",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn2.rbg",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn3.rbg"};
"/home/eyesis/git/tile_processor_gpu/clt/main_chn3.rbg"};
#endif
//
#endif
const char* result_corr_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr.corr";
const char* result_corr_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr.corr";
const char* result_corr_quad_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr-quad.corr";
const char* result_corr_quad_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr-quad.corr";
const char* result_corr_cross_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr-cross.corr";
const char* result_corr_cross_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr-cross.corr";
...
@@ -519,7 +519,7 @@ int main(int argc, char **argv)
...
@@ -519,7 +519,7 @@ int main(int argc, char **argv)
// segfault in the next
// segfault in the next
gpu_tasks = (struct tp_task *) copyalloc_kernel_gpu((float * ) &task_data, tp_task_size * (sizeof(struct tp_task)/sizeof(float)));
gpu_tasks = (struct tp_task *) copyalloc_kernel_gpu((float * ) &task_data, tp_task_size * (sizeof(struct tp_task)/sizeof(float)));
// build corr_indices - not needed anym
p
ore?
// build corr_indices - not needed anymore?
/*
/*
num_corrs = 0;
num_corrs = 0;
for (int ty = 0; ty < TILESY; ty++){
for (int ty = 0; ty < TILESY; ty++){
...
@@ -940,13 +940,13 @@ int main(int argc, char **argv)
...
@@ -940,13 +940,13 @@ int main(int argc, char **argv)
3* (IMG_HEIGHT + DTT_SIZE),
3* (IMG_HEIGHT + DTT_SIZE),
cudaMemcpyDeviceToHost));
cudaMemcpyDeviceToHost));
#ifndef DBG_TILE
///
#ifndef DBG_TILE
printf("Writing RBG data to %s\n", result_rbg_file[ncam]);
printf("Writing RBG data to %s\n", result_rbg_file[ncam]);
writeFloatsToFile( // will have margins
writeFloatsToFile( // will have margins
cpu_corr_image, // float * data, // allocated array
cpu_corr_image, // float * data, // allocated array
rslt_img_size, // int size, // length in elements
rslt_img_size, // int size, // length in elements
result_rbg_file[ncam]); // const char * path) // file path
result_rbg_file[ncam]); // const char * path) // file path
#endif
///
#endif
}
}
free(cpu_corr_image);
free(cpu_corr_image);
...
...
src/tp_defines.h
View file @
18d8e56b
...
@@ -99,7 +99,7 @@
...
@@ -99,7 +99,7 @@
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#undef DBG_MARK_DBG_TILE
#undef DBG_MARK_DBG_TILE
//#undef DBG_TILE
//#undef HAS_PRINTF
//#undef HAS_PRINTF
#define HAS_PRINTF
#define HAS_PRINTF
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment