Refactoring, working state

1bf5e8bf · Andrey Filippov · 13f515b9 · 1bf5e8bf · 1bf5e8bf · 1bf5e8bf
Commit 1bf5e8bf authored Apr 01, 2025 by Andrey Filippov
8 changed files
--- a/src/TileProcessor.cuh
+++ b/src/TileProcessor.cuh
--- a/src/TileProcessor.h
+++ b/src/TileProcessor.h
@@ -37,10 +37,34 @@
 */
 #pragma once
+#ifndef TILE_PROCESSOR_H_
+#define TILE_PROCESSOR_H_
 #ifndef NUM_CAMS
 #include "tp_defines.h"
 #endif
+#define TASK_TEXTURE_BITS ((1 << TASK_TEXT_N_BIT) | (1 << TASK_TEXT_NE_BIT) | (1 << TASK_TEXT_E_BIT) | (1 << TASK_TEXT_SE_BIT)\
+		| (1 << TASK_TEXT_S_BIT) | (1 << TASK_TEXT_SW_BIT) | (1 << TASK_TEXT_W_BIT) | (1 << TASK_TEXT_NW_BIT))
+#define CONVERT_DIRECT_INDEXING_THREADS_LOG2 5
+#define CONVERT_DIRECT_INDEXING_THREADS (1 << CONVERT_DIRECT_INDEXING_THREADS_LOG2) // 32
+#define MCLT_UNION_LEN   (DTT_SIZE2 * (DTT_SIZE2 + 2))
+struct CltExtra{
+	float data_x;   // kernel data is relative to this displacement X (0.5 pixel increments)
+	float data_y;   // kernel data is relative to this displacement Y (0.5 pixel increments)
+	float center_x; // actual center X (use to find derivatives)
+	float center_y; // actual center X (use to find derivatives)
+	float dxc_dx;   // add this to data_x per each pixel X-shift relative to the kernel center location
+	float dxc_dy;   // same per each Y-shift pixel
+	float dyc_dx;
+	float dyc_dy;
+};
 extern "C" __global__ void convert_direct( // called with a single block, single thread
 		//		struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct parameters
@@ -219,4 +243,81 @@ extern "C" __global__ void generate_RBGA(
 		float           * gpu_texture_tiles,  // (number of colors +1 + ?)*16*16 rgba texture tiles
 		int             * twh);
+extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
+		int               num_cams,           // number of cameras used
+		int             * woi,                // x, y, width,height
+		float          ** gpu_clt,            // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
+///		size_t            num_texture_tiles,  // number of texture tiles to process
+		int             * pnum_texture_tiles,  // pointer to a number of texture tiles to process
+		int               gpu_texture_indices_offset,// add to gpu_texture_indices
+		int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
+		// TODO: use geometry_correction rXY !
+		struct gc       * gpu_geometry_correction,
+		int               colors,             // number of colors (3/1)
+		int               is_lwir,            // do not perform shot correction
+		float             min_shot,           // 10.0
+		float             scale_shot,         // 3.0
+		float             diff_sigma,         // pixel value/pixel change
+		float             diff_threshold,     // pixel value/pixel change
+		float             min_agree,          // minimal number of channels to agree on a point (real number to work with fuzzy averages)
+		const float       weights[3],         // scale for R,B,G
+		int               dust_remove,        // Do not reduce average weight when only one image differs much from the average
+		int               keep_weights,       // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
+// combining both non-overlap and overlap (each calculated if pointer is not null )
+		size_t            texture_rbg_stride, // in floats
+		float           * gpu_texture_rbg,    // (number of colors +1 + ?)*16*16 rgba texture tiles
+		size_t            texture_stride,     // in floats (now 256*4 = 1024)
+		float           * gpu_texture_tiles,  // (number of colors +1 + ?)*16*16 rgba texture tiles
+		int               linescan_order,     // if !=0 then output gpu_diff_rgb_combo in linescan order, else  - in gpu_texture_indices order
+		float           * gpu_diff_rgb_combo, //) // diff[num_cams], R[num_cams], B[num_cams],G[num_cams]
+		int               tilesx);
+extern "C" __global__ void clear_texture_list(
+		int              * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
+		int                width,  // <= TILES-X, use for faster processing of LWIR images
+		int                height); // <= TILES-Y, use for faster processing of LWIR images
+extern "C" __global__ void clear_texture_rbga(
+		int               texture_width,
+		int               texture_slice_height,
+		const size_t      texture_rbga_stride,     // in floats 8*stride
+		float           * gpu_texture_tiles);  // (number of colors +1 + ?)*16*16 rgba texture tiles
+extern "C" __global__ void create_nonoverlap_list(
+		int                num_cams,
+		float            * gpu_ftasks ,         // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
+		int                num_tiles,           // number of tiles in task
+		int                width,               // number of tiles in a row
+		int *              nonoverlap_list,     // pointer to the calculated number of non-zero tiles
+		int *              pnonoverlap_length); //  indices to gpu_tasks  // should be initialized to zero
+extern "C" __global__ void mark_texture_tiles(
+		int                num_cams,
+		float            * gpu_ftasks,         // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
+		int                num_tiles,           // number of tiles in task list
+		int                width,               // number of tiles in a row
+		int              * gpu_texture_indices);// packed tile + bits (now only (1 << 7)
+extern "C" __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__?
+		int                num_cams,
+		float            * gpu_ftasks,          // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
+		int                num_tiles,           // number of tiles in task list
+		int                width,               // number of tiles in a row
+		int                height,              // number of tiles rows
+		int              * gpu_texture_indices, // packed tile + bits (now only (1 << 7)
+		int              * woi);                  // x,y,width,height of the woi
+extern "C" __global__ void gen_texture_list(
+		int                num_cams,
+		float            * gpu_ftasks,          // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
+		int                num_tiles,            // number of tiles in task list
+		int                width,                // number of tiles in a row
+		int                height,               // number of tiles rows
+		int              * gpu_texture_indices,  // packed tile + bits (now only (1 << 7)
+		int              * num_texture_tiles,    // number of texture tiles to process
+		int              * woi);                 // min_x, min_y, max_x, max_y input
+#endif
--- a/src/geometry_correction.cu
+++ b/src/geometry_correction.cu
@@ -43,6 +43,11 @@
 //	#include "TileProcessor.h"
+	#include <cuda_runtime.h>
+//	#include <helper_cuda.h>
+//	#include <helper_functions.h>
 #endif // #ifndef JCUDA
 #ifndef get_task_size

--- a/src/test_tp.cu
+++ b/src/test_tp.cu
@@ -33,9 +33,9 @@
 // all of the next 5 were disabled
 #define NOCORR
 #define NOCORR_TD
-//#define NOTEXTURES
+#define NOTEXTURES //
-//#define NOTEXTURE_RGBA
+#define NOTEXTURE_RGBA //
-//#define NOTEXTURE_RGBAXXX
+//#define NOTEXTURE_RGBAXXX //
 #define SAVE_CLT
@@ -55,11 +55,15 @@
 #include <iterator>
 #include <vector>
+#include "tp_defines.h"   // was not here
 #include "dtt8x8.h"
 #include "geometry_correction.h"
-#include "TileProcessor.cuh"
+//#include "TileProcessor.cuh"
+#include "TileProcessor.h"
 #include "tp_utils.h"
 #include "tp_files.h"
+//#include "tp_paths.cuh"
 #include "tp_paths.h"
 #if TEST_LWIR
@@ -2130,6 +2134,7 @@ int main(int argc, char **argv)
    		cudaMemcpyDeviceToHost));
 #ifndef NSAVE_TEXTURES
+    printf("Import raw, real, little-endian, 18 images 648x520\n");
 #ifdef NO_DP
    printf("Writing RBGA texture slices to %s\n",  tpPaths.result_textures_rgba_file);
    writeFloatsToFile(

--- a/src/tp_files.cu
+++ b/src/tp_files.cu
@@ -17,7 +17,7 @@
 #include "tp_files.h"
-int get_file_size(std::string filename) // path to file
+int get_file_size(const std::string filename) // path to file
 {
    FILE *p_file = NULL;
    p_file = fopen(filename.c_str(),"rb");
@@ -26,6 +26,7 @@ int get_file_size(std::string filename) // path to file
    fclose(p_file);
    return size;
 }
 int readFloatsFromFile(float *       data, // allocated array
 					   const char *  path) // file path
 {
@@ -42,6 +43,8 @@ int readFloatsFromFile(float *       data, // allocated array
 	return 0;
 }
 float * readAllFloatsFromFile(const char *  path,
 		int * len_in_floats) //
 {
@@ -55,7 +58,6 @@ float * readAllFloatsFromFile(const char *  path,
    printf("---- Bytes read: %d from %s\n", fsize, path);
    * len_in_floats = fsize/sizeof(float);
    return data;
 }
 int writeFloatsToFile(float *       data, // allocated array
@@ -68,5 +70,46 @@ int writeFloatsToFile(float *       data, // allocated array
 	return 0;
 }
+/*
+int readFloatsFromFile(float *           data, // allocated array
+					   const std::string path) // file path
+{
+    printf("readFloatsFromFile(%s)\n", path);
+    int fsize = get_file_size(path);
+    std::ifstream input(path, std::ios::binary );
+    // copies all data into buffer
+    std::vector<char> buffer((
+            std::istreambuf_iterator<char>(input)),
+            (std::istreambuf_iterator<char>()));
+    std::copy( buffer.begin(), buffer.end(), (char *) data);
+    printf("---- Bytes read: %d from %s\n", fsize, path);
+	return 0;
+}
+float * readAllFloatsFromFile(const std::string path,
+		int * len_in_floats) //
+{
+    int fsize = get_file_size(path);
+    float * data = (float *) malloc(fsize);
+    std::ifstream input(path, std::ios::binary );
+    std::vector<char> buffer((
+            std::istreambuf_iterator<char>(input)),
+            (std::istreambuf_iterator<char>()));
+    std::copy( buffer.begin(), buffer.end(), (char *) data);
+    printf("---- Bytes read: %d from %s\n", fsize, path);
+    * len_in_floats = fsize/sizeof(float);
+    return data;
+}
+int writeFloatsToFile(float *           data, // allocated array
+		               int              size, // length in elements
+					   const std::string path) // file path
+{
+	std::ofstream ofile(path, std::ios::binary);
+	ofile.write((char *) data, size * sizeof(float));
+	return 0;
+}
+*/
--- a/src/tp_files.h
+++ b/src/tp_files.h
@@ -11,7 +11,7 @@
 #include <iterator>
 #include <vector>
-int get_file_size(std::string filename); // path to file
+int get_file_size(const std::string filename); // path to file
 int readFloatsFromFile(float *       data, // allocated array
 					   const char *  path); // file path
 float * readAllFloatsFromFile(const char *  path,
@@ -19,4 +19,14 @@ float * readAllFloatsFromFile(const char *  path,
 int writeFloatsToFile(float *       data, // allocated array
 		               int           size, // length in elements
 					   const char *  path); //  file path
+/*
+int readFloatsFromFile(float *       data, // allocated array
+					   const std::string filename); // file path
+float * readAllFloatsFromFile(const std::string filename,
+		int * len_in_floats);
+int writeFloatsToFile(float *       data, // allocated array
+		               int           size, // length in elements
+					   const std::string filename); //  file path
+*/
 #endif /* SRC_TP_FILES_H_ */
--- a/src/tp_paths.cu
+++ b/src/tp_paths.cu
--- a/src/tp_paths.h
+++ b/src/tp_paths.h
@@ -11,7 +11,9 @@
 class TpPaths{
 public:
 	    TpPaths(int lwir);
-		const char ** kernel_file;
+	    int    m_lwir;
+	    const char ** kernel_file;
 		const char ** kernel_offs_file;
 		const char ** image_files;
 		const char ** ports_offs_xy_file;
@@ -31,7 +33,141 @@ public:
 		const char* rByRDist_file;
 		const char* correction_vector_file;
 		const char* geometry_correction_file;
-//		float * color_weights;// [3];
-//		float * generate_RBGA_params; // [5];
+private:
+	    const char * m_kernel_file_lwir[16] ={
+	    		"clt/aux_chn0_transposed.kernel",
+				"clt/aux_chn1_transposed.kernel",
+				"clt/aux_chn2_transposed.kernel",
+				"clt/aux_chn3_transposed.kernel",
+				"clt/aux_chn4_transposed.kernel",
+				"clt/aux_chn5_transposed.kernel",
+				"clt/aux_chn6_transposed.kernel",
+				"clt/aux_chn7_transposed.kernel",
+				"clt/aux_chn8_transposed.kernel",
+				"clt/aux_chn9_transposed.kernel",
+				"clt/aux_chn10_transposed.kernel",
+				"clt/aux_chn11_transposed.kernel",
+				"clt/aux_chn12_transposed.kernel",
+				"clt/aux_chn13_transposed.kernel",
+				"clt/aux_chn14_transposed.kernel",
+				"clt/aux_chn15_transposed.kernel"};
+	    const char * m_kernel_offs_file_lwir[16] = {
+	    		"clt/aux_chn0_transposed.kernel_offsets",
+				"clt/aux_chn1_transposed.kernel_offsets",
+				"clt/aux_chn2_transposed.kernel_offsets",
+				"clt/aux_chn3_transposed.kernel_offsets",
+				"clt/aux_chn4_transposed.kernel_offsets",
+				"clt/aux_chn5_transposed.kernel_offsets",
+				"clt/aux_chn6_transposed.kernel_offsets",
+				"clt/aux_chn7_transposed.kernel_offsets",
+				"clt/aux_chn8_transposed.kernel_offsets",
+				"clt/aux_chn9_transposed.kernel_offsets",
+				"clt/aux_chn10_transposed.kernel_offsets",
+				"clt/aux_chn11_transposed.kernel_offsets",
+				"clt/aux_chn12_transposed.kernel_offsets",
+				"clt/aux_chn13_transposed.kernel_offsets",
+				"clt/aux_chn14_transposed.kernel_offsets",
+				"clt/aux_chn15_transposed.kernel_offsets"};
+	    const char * m_image_files_lwir[16]= {
+	    		"clt/aux_chn0.bayer",
+				"clt/aux_chn1.bayer",
+				"clt/aux_chn2.bayer",
+				"clt/aux_chn3.bayer",
+				"clt/aux_chn4.bayer",
+				"clt/aux_chn5.bayer",
+				"clt/aux_chn6.bayer",
+				"clt/aux_chn7.bayer",
+				"clt/aux_chn8.bayer",
+				"clt/aux_chn9.bayer",
+				"clt/aux_chn10.bayer",
+				"clt/aux_chn11.bayer",
+				"clt/aux_chn12.bayer",
+				"clt/aux_chn13.bayer",
+				"clt/aux_chn14.bayer",
+				"clt/aux_chn15.bayer"};
+	    const char * m_ports_offs_xy_file_lwir[16]= {
+	    		"clt/aux_chn0.portsxy",
+				"clt/aux_chn1.portsxy",
+				"clt/aux_chn2.portsxy",
+				"clt/aux_chn3.portsxy",
+				"clt/aux_chn4.portsxy",
+				"clt/aux_chn5.portsxy",
+				"clt/aux_chn6.portsxy",
+				"clt/aux_chn7.portsxy",
+				"clt/aux_chn8.portsxy",
+				"clt/aux_chn9.portsxy",
+				"clt/aux_chn10.portsxy",
+				"clt/aux_chn11.portsxy",
+				"clt/aux_chn12.portsxy",
+				"clt/aux_chn13.portsxy",
+				"clt/aux_chn14.portsxy",
+				"clt/aux_chn15.portsxy"};
+	    const char * m_ports_clt_file_lwir[16]= { // never referenced
+	    		"clt/aux_chn0.clt",
+				"clt/aux_chn1.clt",
+				"clt/aux_chn2.clt",
+				"clt/aux_chn3.clt",
+				"clt/aux_chn4.clt",
+				"clt/aux_chn5.clt",
+				"clt/aux_chn6.clt",
+				"clt/aux_chn7.clt",
+				"clt/aux_chn8.clt",
+				"clt/aux_chn9.clt",
+				"clt/aux_chn10.clt",
+				"clt/aux_chn11.clt",
+				"clt/aux_chn12.clt",
+				"clt/aux_chn13.clt",
+				"clt/aux_chn14.clt",
+				"clt/aux_chn15.clt"};
+	    const char * m_result_rbg_file_lwir[16] = {
+	    		"clt/aux_chn0.rbg",
+				"clt/aux_chn1.rbg",
+				"clt/aux_chn2.rbg",
+				"clt/aux_chn3.rbg",
+				"clt/aux_chn4.rbg",
+				"clt/aux_chn5.rbg",
+				"clt/aux_chn6.rbg",
+				"clt/aux_chn7.rbg",
+				"clt/aux_chn8.rbg",
+				"clt/aux_chn9.rbg",
+				"clt/aux_chn10.rbg",
+				"clt/aux_chn11.rbg",
+				"clt/aux_chn12.rbg",
+				"clt/aux_chn13.rbg",
+				"clt/aux_chn14.rbg",
+				"clt/aux_chn15.rbg"};
+	    const char * m_kernel_file_rgb[4] = {
+	    		"clt/main_chn0_transposed.kernel",
+				"clt/main_chn1_transposed.kernel",
+				"clt/main_chn2_transposed.kernel",
+				"clt/main_chn3_transposed.kernel"};
+	    const char * m_kernel_offs_file_rgb[4] = {
+	    		"clt/main_chn0_transposed.kernel_offsets",
+				"clt/main_chn1_transposed.kernel_offsets",
+				"clt/main_chn2_transposed.kernel_offsets",
+				"clt/main_chn3_transposed.kernel_offsets"};
+	    const char * m_image_files_rgb[4] = {
+	    		"clt/main_chn0.bayer",
+				"clt/main_chn1.bayer",
+				"clt/main_chn2.bayer",
+				"clt/main_chn3.bayer"};
+	    const char * m_ports_offs_xy_file_rgb[4] = {
+	    		"clt/main_chn0.portsxy",
+				"clt/main_chn1.portsxy",
+				"clt/main_chn2.portsxy",
+				"clt/main_chn3.portsxy"};
+	    const char * m_ports_clt_file_rgb[4] = { // never referenced
+	    		"clt/main_chn0.clt",
+				"clt/main_chn1.clt",
+				"clt/main_chn2.clt",
+				"clt/main_chn3.clt"};
+	    const char * m_result_rbg_file_rgb[4] = {
+	    		"clt/main_chn0.rbg",
+				"clt/main_chn1.rbg",
+				"clt/main_chn2.rbg",
+				"clt/main_chn3.rbg"};
 };
 #endif /* SRC_TP_PATHS_H_ */