Commit 821b753a authored by Andrey Filippov's avatar Andrey Filippov

Refactoring, moving functions to tp_utils, tp_files<.cu,.h>

parent de3c497a
...@@ -1146,7 +1146,7 @@ extern "C" __global__ void combine_inter( // combine per-senor interscene co ...@@ -1146,7 +1146,7 @@ extern "C" __global__ void combine_inter( // combine per-senor interscene co
int corr_in_block = threadIdx.y; int corr_in_block = threadIdx.y;
int itile = blockIdx.x * CORR_TILES_PER_BLOCK + corr_in_block; // correlation tile index int itile = blockIdx.x * CORR_TILES_PER_BLOCK + corr_in_block; // correlation tile index
int corr_offset = itile * (num_sel_sensors + 1); // index of the first correlation for this task; int corr_offset = itile * (num_sel_sensors + 1); // index of the first correlation for this task;
if (corr_offset >= (num_corr_tiles - num_sel_sensors)) { if (corr_offset >= (*num_corr_tiles - num_sel_sensors)) { // was if (corr_offset >= (num_corr_tiles - num_sel_sensors)) {
return; return;
} }
// __syncthreads();// __syncwarp(); // __syncthreads();// __syncwarp();
...@@ -2058,7 +2058,8 @@ extern "C" __global__ void generate_RBGA( ...@@ -2058,7 +2058,8 @@ extern "C" __global__ void generate_RBGA(
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed) int keep_weights, // return channel weights after A in RGBA (was removed)
const size_t texture_rbga_stride, // in floats const size_t texture_rbga_stride, // in floats
float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
int * twh)
{ {
float min_shot = params[0]; // 10.0 float min_shot = params[0]; // 10.0
float scale_shot = params[1]; // 3.0 float scale_shot = params[1]; // 3.0
...@@ -2166,12 +2167,11 @@ extern "C" __global__ void generate_RBGA( ...@@ -2166,12 +2167,11 @@ extern "C" __global__ void generate_RBGA(
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
*/ */
__device__ int twh[2]; /// __device__ int twh[2];
update_woi<<<1,1, 0, cudaStreamTailLaunch>>>( update_woi<<<1,1, 0, cudaStreamTailLaunch>>>(
texture_slices, // int texture_slices, texture_slices, // int texture_slices,
woi, // int * // min_x, min_y, max_x, max_y input, not modified, max_x - not used woi, // int * // min_x, min_y, max_x, max_y input, not modified, max_x - not used
twh); // int * twh) // 2-element in device global memory twh); // int * twh) // 2-element in device global memory
);
// next kernels will see woi as {x,y,width,height} // next kernels will see woi as {x,y,width,height}
dim3 threads2((1 << THREADS_DYNAMIC_BITS), 1, 1); dim3 threads2((1 << THREADS_DYNAMIC_BITS), 1, 1);
......
...@@ -216,5 +216,7 @@ extern "C" __global__ void generate_RBGA( ...@@ -216,5 +216,7 @@ extern "C" __global__ void generate_RBGA(
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed) int keep_weights, // return channel weights after A in RGBA (was removed)
const size_t texture_rbga_stride, // in floats const size_t texture_rbga_stride, // in floats
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
int * twh);
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
#define SAVE_CLT #define SAVE_CLT
#define NO_DP #define NO_DP
/// #define CORR_INTER_SELF 1 #define CORR_INTER_SELF 1
#include <stdio.h> #include <stdio.h>
...@@ -58,6 +58,8 @@ ...@@ -58,6 +58,8 @@
#include "dtt8x8.h" #include "dtt8x8.h"
#include "geometry_correction.h" #include "geometry_correction.h"
#include "TileProcessor.cuh" #include "TileProcessor.cuh"
#include "tp_utils.h"
#include "tp_files.h"
#if TEST_LWIR #if TEST_LWIR
#define IMG_WIDTH 640 #define IMG_WIDTH 640
...@@ -78,235 +80,6 @@ ...@@ -78,235 +80,6 @@
#define TILESYA ((TILESY +3) & (~3)) #define TILESYA ((TILESY +3) & (~3))
float * copyalloc_kernel_gpu(float * kernel_host,
int size, // size in floats
int full_size)
{
float *kernel_gpu;
checkCudaErrors(cudaMalloc((void **)&kernel_gpu, full_size * sizeof(float)));
checkCudaErrors(cudaMemcpy( // segfault
kernel_gpu,
kernel_host,
size * sizeof(float),
cudaMemcpyHostToDevice));
return kernel_gpu;
}
float * copyalloc_kernel_gpu(float * kernel_host,
int size)
{
return copyalloc_kernel_gpu(kernel_host,
size, // size in floats
size);
}
float * alloccopy_from_gpu(
float * gpu_data,
float * cpu_data, // if null, will allocate
int size)
{
if (!cpu_data) {
cpu_data = (float *)malloc(size*sizeof(float));
}
checkCudaErrors(cudaMemcpy( // segfault
cpu_data,
gpu_data,
size * sizeof(float),
cudaMemcpyDeviceToHost));
return cpu_data;
}
float * alloc_kernel_gpu(int size) // size in floats
{
float *kernel_gpu;
checkCudaErrors(cudaMalloc((void **)&kernel_gpu, size * sizeof(float)));
return kernel_gpu;
}
float ** copyalloc_pointers_gpu(float ** gpu_pointer,
int size) // number of entries (cameras)
{
float ** gpu_pointer_to_gpu_pointers;
checkCudaErrors(cudaMalloc((void **)&gpu_pointer_to_gpu_pointers, size * sizeof(float*)));
checkCudaErrors(cudaMemcpy(
gpu_pointer_to_gpu_pointers,
gpu_pointer,
size * sizeof(float*),
cudaMemcpyHostToDevice));
return gpu_pointer_to_gpu_pointers;
}
// shift image in-place, repeat lines/columns
void shift_image (
float * image,
int width,
int height,
int bayer,
int dx,
int dy)
{
int step = 1;
if (bayer){
step = 2;
dx &= -2;
dy &= -2;
}
// vertical shift dy>0 - down, dy < 0 - up
for (int m = 0; m < dy; m+= step) { // only if dy > 0 (down)
for (int y = height - 1; y >= step; y++){
float * dp = image + (y * width);
float * sp = dp - step * width;
for (int x = 0; x < width; x++){
(*dp++) = (*sp++);
}
}
}
// vertical shift dy < 0 - up
for (int m = 0; m > dy; m-= step) { // only if dy < 0 (up)
for (int y = 0; y < height - step; y++){
float * dp = image + (y * width);
float * sp = dp + step * width;
for (int x = 0; x < width; x++){
(*dp++) = (*sp++);
}
}
}
// horizontal shift dx > 0 - right, dx < 0 - left
for (int m = 0; m < dx; m+= step) { // only if dx > 0 (right)
for (int y = 0; y < height; y++){
float * dp = image + (y * width) + width - 1;
float * sp = dp - step;
for (int x = 0; x < (width - step); x++){
(*dp--) = (*sp--);
}
}
}
// horizontal shift dx < 0 - left
for (int m = 0; m > dx; m-= step) { // only if dx < 0 (left)
for (int y = 0; y < height; y++){
float * dp = image + (y * width);
float * sp = dp + step;
for (int x = 0; x < (width - step); x++){
(*dp++) = (*sp++);
}
}
}
}
void update_image_gpu(
float * image_host,
float * image_gpu,
size_t dstride, // in floats !
int width,
int height){
checkCudaErrors(cudaMemcpy2D(
image_gpu,
dstride, // * sizeof(float),
image_host,
width * sizeof(float), // make in 16*n?
width * sizeof(float),
height,
cudaMemcpyHostToDevice));
}
float * copyalloc_image_gpu(
float * image_host,
size_t* dstride, // in floats !
int width,
int height)
{
float *image_gpu;
checkCudaErrors(cudaMallocPitch((void **)&image_gpu, dstride, width * sizeof(float), height));
update_image_gpu(
image_host,
image_gpu,
*dstride, // in floats !
width,
height);
/*
checkCudaErrors(cudaMemcpy2D(
image_gpu,
*dstride, // * sizeof(float),
image_host,
width * sizeof(float), // make in 16*n?
width * sizeof(float),
height,
cudaMemcpyHostToDevice));
*/
return image_gpu;
}
float * alloc_image_gpu(size_t* dstride, // in bytes!!
int width,
int height)
{
float *image_gpu;
checkCudaErrors(cudaMallocPitch((void **)&image_gpu, dstride, width * sizeof(float), height));
return image_gpu;
}
int get_file_size(std::string filename) // path to file
{
FILE *p_file = NULL;
p_file = fopen(filename.c_str(),"rb");
fseek(p_file,0,SEEK_END);
int size = ftell(p_file);
fclose(p_file);
return size;
}
int readFloatsFromFile(float * data, // allocated array
const char * path) // file path
{
printf("readFloatsFromFile(%s)\n", path);
int fsize = get_file_size(path);
std::ifstream input(path, std::ios::binary );
// copies all data into buffer
std::vector<char> buffer((
std::istreambuf_iterator<char>(input)),
(std::istreambuf_iterator<char>()));
std::copy( buffer.begin(), buffer.end(), (char *) data);
printf("---- Bytes read: %d from %s\n", fsize, path);
return 0;
}
float * readAllFloatsFromFile(const char * path,
int * len_in_floats) //
{
int fsize = get_file_size(path);
float * data = (float *) malloc(fsize);
std::ifstream input(path, std::ios::binary );
std::vector<char> buffer((
std::istreambuf_iterator<char>(input)),
(std::istreambuf_iterator<char>()));
std::copy( buffer.begin(), buffer.end(), (char *) data);
printf("---- Bytes read: %d from %s\n", fsize, path);
* len_in_floats = fsize/sizeof(float);
return data;
}
int writeFloatsToFile(float * data, // allocated array
int size, // length in elements
const char * path) // file path
{
std::ofstream ofile(path, std::ios::binary);
ofile.write((char *) data, size * sizeof(float));
return 0;
}
// Prepare low pass filter (64 long) to be applied to each quadrant of the CLT data // Prepare low pass filter (64 long) to be applied to each quadrant of the CLT data
void set_clt_lpf( void set_clt_lpf(
float * lpf, // size*size array to be filled out float * lpf, // size*size array to be filled out
...@@ -978,6 +751,7 @@ int main(int argc, char **argv) ...@@ -978,6 +751,7 @@ int main(int argc, char **argv)
float * gpu_textures_rbga; float * gpu_textures_rbga;
int * gpu_texture_indices; int * gpu_texture_indices;
int * gpu_woi; int * gpu_woi;
int * gpu_twh;
int * gpu_num_texture_tiles; int * gpu_num_texture_tiles;
float * gpu_port_offsets; float * gpu_port_offsets;
float * gpu_color_weights; float * gpu_color_weights;
...@@ -1173,6 +947,8 @@ int main(int argc, char **argv) ...@@ -1173,6 +947,8 @@ int main(int argc, char **argv)
TILESX * TILESYA); // number of rows - multiple of 4 TILESX * TILESYA); // number of rows - multiple of 4
// just allocate // just allocate
checkCudaErrors(cudaMalloc((void **)&gpu_woi, 4 * sizeof(float))); checkCudaErrors(cudaMalloc((void **)&gpu_woi, 4 * sizeof(float)));
checkCudaErrors(cudaMalloc((void **)&gpu_twh, 2 * sizeof(float)));
checkCudaErrors(cudaMalloc((void **)&gpu_num_texture_tiles, 8 * sizeof(float))); // for each subsequence - number of non-border, checkCudaErrors(cudaMalloc((void **)&gpu_num_texture_tiles, 8 * sizeof(float))); // for each subsequence - number of non-border,
// number of border tiles // number of border tiles
...@@ -2312,7 +2088,7 @@ int main(int argc, char **argv) ...@@ -2312,7 +2088,7 @@ int main(int argc, char **argv)
num_cams, // int num_cams, // number of cameras used num_cams, // int num_cams, // number of cameras used
(int *) 0, // int * woi, // x, y, width,height (int *) 0, // int * woi, // x, y, width,height
gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE] gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
cpu_pnum_texture_tiles, // *pnum_texture_tiles, // size_t num_texture_tiles, // number of texture tiles to process gpu_pnum_texture_tiles, /// cpu_pnum_texture_tiles, // *pnum_texture_tiles, // size_t num_texture_tiles, // number of texture tiles to process
0, // gpu_texture_indices_offset,// add to gpu_texture_indices 0, // gpu_texture_indices_offset,// add to gpu_texture_indices
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7) gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_geometry_correction, // struct gc * gpu_geometry_correction, gpu_geometry_correction, // struct gc * gpu_geometry_correction,
...@@ -2561,7 +2337,8 @@ int main(int argc, char **argv) ...@@ -2561,7 +2337,8 @@ int main(int argc, char **argv)
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average 1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
keep_texture_weights, // int keep_weights, // return channel weights after A in RGBA keep_texture_weights, // int keep_weights, // return channel weights after A in RGBA
dstride_textures_rbga/sizeof(float), // const size_t texture_rbga_stride, // in floats dstride_textures_rbga/sizeof(float), // const size_t texture_rbga_stride, // in floats
gpu_textures_rbga); // float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles gpu_textures_rbga, // float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_twh); // int * twh)
getLastCudaError("Kernel failure"); getLastCudaError("Kernel failure");
checkCudaErrors(cudaDeviceSynchronize()); checkCudaErrors(cudaDeviceSynchronize());
...@@ -2673,6 +2450,7 @@ int main(int argc, char **argv) ...@@ -2673,6 +2450,7 @@ int main(int argc, char **argv)
checkCudaErrors(cudaFree(gpu_textures_rbga)); checkCudaErrors(cudaFree(gpu_textures_rbga));
checkCudaErrors(cudaFree(gpu_diff_rgb_combo)); checkCudaErrors(cudaFree(gpu_diff_rgb_combo));
checkCudaErrors(cudaFree(gpu_woi)); checkCudaErrors(cudaFree(gpu_woi));
checkCudaErrors(cudaFree(gpu_twh));
checkCudaErrors(cudaFree(gpu_num_texture_tiles)); checkCudaErrors(cudaFree(gpu_num_texture_tiles));
checkCudaErrors(cudaFree(gpu_geometry_correction)); checkCudaErrors(cudaFree(gpu_geometry_correction));
checkCudaErrors(cudaFree(gpu_correction_vector)); checkCudaErrors(cudaFree(gpu_correction_vector));
......
/*
* tp_files.cu
*
* Created on: Mar 25, 2025
* Author: elphel
*/
//#include <cuda_runtime.h>
//#include <helper_cuda.h>
//#include <stdio.h>
//#include <stdlib.h>
//#include <fstream>
//#include <iterator>
//#include <vector>
//#include "tp_utils.h"
#include "tp_files.h"
int get_file_size(std::string filename) // path to file
{
FILE *p_file = NULL;
p_file = fopen(filename.c_str(),"rb");
fseek(p_file,0,SEEK_END);
int size = ftell(p_file);
fclose(p_file);
return size;
}
int readFloatsFromFile(float * data, // allocated array
const char * path) // file path
{
printf("readFloatsFromFile(%s)\n", path);
int fsize = get_file_size(path);
std::ifstream input(path, std::ios::binary );
// copies all data into buffer
std::vector<char> buffer((
std::istreambuf_iterator<char>(input)),
(std::istreambuf_iterator<char>()));
std::copy( buffer.begin(), buffer.end(), (char *) data);
printf("---- Bytes read: %d from %s\n", fsize, path);
return 0;
}
float * readAllFloatsFromFile(const char * path,
int * len_in_floats) //
{
int fsize = get_file_size(path);
float * data = (float *) malloc(fsize);
std::ifstream input(path, std::ios::binary );
std::vector<char> buffer((
std::istreambuf_iterator<char>(input)),
(std::istreambuf_iterator<char>()));
std::copy( buffer.begin(), buffer.end(), (char *) data);
printf("---- Bytes read: %d from %s\n", fsize, path);
* len_in_floats = fsize/sizeof(float);
return data;
}
int writeFloatsToFile(float * data, // allocated array
int size, // length in elements
const char * path) // file path
{
std::ofstream ofile(path, std::ios::binary);
ofile.write((char *) data, size * sizeof(float));
return 0;
}
/*
* tp_files.h
*
* Created on: Mar 25, 2025
* Author: elphel
*/
#ifndef SRC_TP_FILES_H_
#define SRC_TP_FILES_H_
#include <fstream>
#include <iterator>
#include <vector>
int get_file_size(std::string filename); // path to file
int readFloatsFromFile(float * data, // allocated array
const char * path); // file path
float * readAllFloatsFromFile(const char * path,
int * len_in_floats);
int writeFloatsToFile(float * data, // allocated array
int size, // length in elements
const char * path); // file path
#endif /* SRC_TP_FILES_H_ */
/*
* utils.cu
*
* Created on: Mar 25, 2025
* Author: elphel
*/
#include <cuda_runtime.h>
#include <helper_cuda.h>
#include "tp_utils.h"
float * copyalloc_kernel_gpu(float * kernel_host,
int size, // size in floats
int full_size)
{
float *kernel_gpu;
checkCudaErrors(cudaMalloc((void **)&kernel_gpu, full_size * sizeof(float)));
checkCudaErrors(cudaMemcpy( // segfault
kernel_gpu,
kernel_host,
size * sizeof(float),
cudaMemcpyHostToDevice));
return kernel_gpu;
}
float * copyalloc_kernel_gpu(float * kernel_host,
int size)
{
return copyalloc_kernel_gpu(kernel_host,
size, // size in floats
size);
}
float * alloccopy_from_gpu(
float * gpu_data,
float * cpu_data, // if null, will allocate
int size)
{
if (!cpu_data) {
cpu_data = (float *)malloc(size*sizeof(float));
}
checkCudaErrors(cudaMemcpy( // segfault
cpu_data,
gpu_data,
size * sizeof(float),
cudaMemcpyDeviceToHost));
return cpu_data;
}
float * alloc_kernel_gpu(int size) // size in floats
{
float *kernel_gpu;
checkCudaErrors(cudaMalloc((void **)&kernel_gpu, size * sizeof(float)));
return kernel_gpu;
}
float ** copyalloc_pointers_gpu(float ** gpu_pointer,
int size) // number of entries (cameras)
{
float ** gpu_pointer_to_gpu_pointers;
checkCudaErrors(cudaMalloc((void **)&gpu_pointer_to_gpu_pointers, size * sizeof(float*)));
checkCudaErrors(cudaMemcpy(
gpu_pointer_to_gpu_pointers,
gpu_pointer,
size * sizeof(float*),
cudaMemcpyHostToDevice));
return gpu_pointer_to_gpu_pointers;
}
// image-related
// shift image in-place, repeat lines/columns
void shift_image (
float * image,
int width,
int height,
int bayer,
int dx,
int dy)
{
int step = 1;
if (bayer){
step = 2;
dx &= -2;
dy &= -2;
}
// vertical shift dy>0 - down, dy < 0 - up
for (int m = 0; m < dy; m+= step) { // only if dy > 0 (down)
for (int y = height - 1; y >= step; y++){
float * dp = image + (y * width);
float * sp = dp - step * width;
for (int x = 0; x < width; x++){
(*dp++) = (*sp++);
}
}
}
// vertical shift dy < 0 - up
for (int m = 0; m > dy; m-= step) { // only if dy < 0 (up)
for (int y = 0; y < height - step; y++){
float * dp = image + (y * width);
float * sp = dp + step * width;
for (int x = 0; x < width; x++){
(*dp++) = (*sp++);
}
}
}
// horizontal shift dx > 0 - right, dx < 0 - left
for (int m = 0; m < dx; m+= step) { // only if dx > 0 (right)
for (int y = 0; y < height; y++){
float * dp = image + (y * width) + width - 1;
float * sp = dp - step;
for (int x = 0; x < (width - step); x++){
(*dp--) = (*sp--);
}
}
}
// horizontal shift dx < 0 - left
for (int m = 0; m > dx; m-= step) { // only if dx < 0 (left)
for (int y = 0; y < height; y++){
float * dp = image + (y * width);
float * sp = dp + step;
for (int x = 0; x < (width - step); x++){
(*dp++) = (*sp++);
}
}
}
}
void update_image_gpu(
float * image_host,
float * image_gpu,
size_t dstride, // in floats !
int width,
int height){
checkCudaErrors(cudaMemcpy2D(
image_gpu,
dstride, // * sizeof(float),
image_host,
width * sizeof(float), // make in 16*n?
width * sizeof(float),
height,
cudaMemcpyHostToDevice));
}
float * copyalloc_image_gpu(
float * image_host,
size_t* dstride, // in floats !
int width,
int height)
{
float *image_gpu;
checkCudaErrors(cudaMallocPitch((void **)&image_gpu, dstride, width * sizeof(float), height));
update_image_gpu(
image_host,
image_gpu,
*dstride, // in floats !
width,
height);
return image_gpu;
}
float * alloc_image_gpu(size_t* dstride, // in bytes!!
int width,
int height)
{
float *image_gpu;
checkCudaErrors(cudaMallocPitch((void **)&image_gpu, dstride, width * sizeof(float), height));
return image_gpu;
}
/*
* tp_utils.h
*
* Created on: Mar 25, 2025
* Author: elphel
*/
#ifndef SRC_TP_UTILS_H_
#define SRC_TP_UTILS_H_
float * copyalloc_kernel_gpu(float * kernel_host,
int size, // size in floats
int full_size);
float * copyalloc_kernel_gpu(float * kernel_host,
int size);
float * alloccopy_from_gpu(
float * gpu_data,
float * cpu_data, // if null, will allocate
int size);
float * alloc_kernel_gpu(int size); // size in floats
float ** copyalloc_pointers_gpu(float ** gpu_pointer,
int size); // number of entries (cameras)
// image-related
// shift image in-place, repeat lines/columns
void shift_image (
float * image,
int width,
int height,
int bayer,
int dx,
int dy);
void update_image_gpu(
float * image_host,
float * image_gpu,
size_t dstride, // in floats !
int width,
int height);
float * copyalloc_image_gpu(
float * image_host,
size_t* dstride, // in floats !
int width,
int height);
float * alloc_image_gpu(size_t* dstride, // in bytes!!
int width,
int height);
#endif /* SRC_TP_UTILS_H_ */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment