Commit e1bf6e5c authored by Andrey Filippov's avatar Andrey Filippov

Debugged monochrome direct/inverse mclt conversions

parent d26457f8
This diff is collapsed.
...@@ -951,6 +951,7 @@ __device__ void dttiv_color_2d( ...@@ -951,6 +951,7 @@ __device__ void dttiv_color_2d(
dctiv_nodiverg( // all colors dctiv_nodiverg( // all colors
clt_tile + (DTT_SIZE1 * threadIdx.x), // [0][threadIdx.x], // pointer to start of row clt_tile + (DTT_SIZE1 * threadIdx.x), // [0][threadIdx.x], // pointer to start of row
1); //int inc); 1); //int inc);
// __syncthreads();// worsened
if (color == BAYER_GREEN){ if (color == BAYER_GREEN){
dstiv_nodiverg( // all colors dstiv_nodiverg( // all colors
clt_tile + DTT_SIZE1 * threadIdx.x + DTT_SIZE1 * DTT_SIZE, // clt_tile[1][threadIdx.x], // pointer to start of row clt_tile + DTT_SIZE1 * threadIdx.x + DTT_SIZE1 * DTT_SIZE, // clt_tile[1][threadIdx.x], // pointer to start of row
...@@ -969,6 +970,7 @@ __device__ void dttiv_color_2d( ...@@ -969,6 +970,7 @@ __device__ void dttiv_color_2d(
dctiv_nodiverg( // all colors dctiv_nodiverg( // all colors
clt_tile + threadIdx.x, // &clt_tile[0][0][threadIdx.x], // pointer to start of column clt_tile + threadIdx.x, // &clt_tile[0][0][threadIdx.x], // pointer to start of column
DTT_SIZE1); // int inc, DTT_SIZE1); // int inc,
// __syncthreads();// worsened
if (color == BAYER_GREEN){ if (color == BAYER_GREEN){
dctiv_nodiverg( // all colors dctiv_nodiverg( // all colors
clt_tile + threadIdx.x + (DTT_SIZE1 * DTT_SIZE), // &clt_tile[1][0][threadIdx.x], // pointer to start of column clt_tile + threadIdx.x + (DTT_SIZE1 * DTT_SIZE), // &clt_tile[1][0][threadIdx.x], // pointer to start of column
...@@ -977,6 +979,50 @@ __device__ void dttiv_color_2d( ...@@ -977,6 +979,50 @@ __device__ void dttiv_color_2d(
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
} }
__device__ void dttiv_mono_2d(
float * clt_tile)
{
// Copy 0-> 1
dctiv_nodiverg(
clt_tile + (DTT_SIZE1 * threadIdx.x) + (0 * DTT_SIZE1 * DTT_SIZE),
1); //int inc);
dstiv_nodiverg(
clt_tile + (DTT_SIZE1 * threadIdx.x) + (1 * DTT_SIZE1 * DTT_SIZE),
1); //int inc);
dctiv_nodiverg(
clt_tile + (DTT_SIZE1 * threadIdx.x) + (2 * DTT_SIZE1 * DTT_SIZE),
1); //int inc);
dstiv_nodiverg(
clt_tile + (DTT_SIZE1 * threadIdx.x) + (3 * DTT_SIZE1 * DTT_SIZE),
1); //int inc);
__syncthreads();// __syncwarp();
#ifdef DEBUG222
if ((threadIdx.x) == 0){
printf("\nDTT Tiles after horizontal pass, color=%d\n",color);
debug_print_clt1(clt_tile, color, (color== BAYER_GREEN)?3:1); // only 1 quadrant for R,B and 2 - for G
}
__syncthreads();// __syncwarp();
#endif
dctiv_nodiverg( // CC
clt_tile + threadIdx.x,
DTT_SIZE1); // int inc,
dctiv_nodiverg( // SC
clt_tile + threadIdx.x + 1 * (DTT_SIZE1 * DTT_SIZE),
DTT_SIZE1); // int inc,
dstiv_nodiverg( // CS
clt_tile + threadIdx.x + 2 * (DTT_SIZE1 * DTT_SIZE), // &clt_tile[1][0][threadIdx.x], // pointer to start of column
DTT_SIZE1); // int inc,
dstiv_nodiverg( // SS
clt_tile + threadIdx.x + 3 * (DTT_SIZE1 * DTT_SIZE), // &clt_tile[1][0][threadIdx.x], // pointer to start of column
DTT_SIZE1); // int inc,
__syncthreads();// __syncwarp();
}
// //
// Uses 16 threads, gets 4*8*8 clt tiles, performs idtt-iv (swapping 1 and 2 quadrants) and then unfolds with window, // Uses 16 threads, gets 4*8*8 clt tiles, performs idtt-iv (swapping 1 and 2 quadrants) and then unfolds with window,
// adding to the output 16x16 tile (to use Read-modify-write with 4 passes over the frame. Should be zeroed before the // adding to the output 16x16 tile (to use Read-modify-write with 4 passes over the frame. Should be zeroed before the
......
...@@ -88,6 +88,8 @@ extern __device__ void dttii_2d( ...@@ -88,6 +88,8 @@ extern __device__ void dttii_2d(
extern __device__ void dttiv_color_2d( extern __device__ void dttiv_color_2d(
float * clt_tile, float * clt_tile,
int color); int color);
extern __device__ void dttiv_mono_2d(
float * clt_tile);
extern __device__ void imclt( extern __device__ void imclt(
float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports [4][8][9] float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports [4][8][9]
float * mclt_tile ); float * mclt_tile );
......
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
#define NOCORR_TD #define NOCORR_TD
#define NOTEXTURES #define NOTEXTURES
#define NOTEXTURE_RGBA #define NOTEXTURE_RGBA
#define SAVE_CLT
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
...@@ -330,7 +330,7 @@ int main(int argc, char **argv) ...@@ -330,7 +330,7 @@ int main(int argc, char **argv)
"/home/eyesis/git/tile_processor_gpu/clt/aux_chn15.portsxy"}; "/home/eyesis/git/tile_processor_gpu/clt/aux_chn15.portsxy"};
//#ifndef DBG_TILE //#ifndef DBG_TILE
/* #ifdef SAVE_CLT
const char* ports_clt_file[] = { // never referenced const char* ports_clt_file[] = { // never referenced
"/home/eyesis/git/tile_processor_gpu/clt/aux_chn0.clt", "/home/eyesis/git/tile_processor_gpu/clt/aux_chn0.clt",
"/home/eyesis/git/tile_processor_gpu/clt/aux_chn1.clt", "/home/eyesis/git/tile_processor_gpu/clt/aux_chn1.clt",
...@@ -348,7 +348,8 @@ int main(int argc, char **argv) ...@@ -348,7 +348,8 @@ int main(int argc, char **argv)
"/home/eyesis/git/tile_processor_gpu/clt/aux_chn13.clt", "/home/eyesis/git/tile_processor_gpu/clt/aux_chn13.clt",
"/home/eyesis/git/tile_processor_gpu/clt/aux_chn14.clt", "/home/eyesis/git/tile_processor_gpu/clt/aux_chn14.clt",
"/home/eyesis/git/tile_processor_gpu/clt/aux_chn15.clt"}; "/home/eyesis/git/tile_processor_gpu/clt/aux_chn15.clt"};
*/
#endif
const char* result_rbg_file[] = { const char* result_rbg_file[] = {
"/home/eyesis/git/tile_processor_gpu/clt/aux_chn0.rbg", "/home/eyesis/git/tile_processor_gpu/clt/aux_chn0.rbg",
"/home/eyesis/git/tile_processor_gpu/clt/aux_chn1.rbg", "/home/eyesis/git/tile_processor_gpu/clt/aux_chn1.rbg",
...@@ -401,15 +402,13 @@ int main(int argc, char **argv) ...@@ -401,15 +402,13 @@ int main(int argc, char **argv)
"/home/eyesis/git/tile_processor_gpu/clt/main_chn1.portsxy", "/home/eyesis/git/tile_processor_gpu/clt/main_chn1.portsxy",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn2.portsxy", "/home/eyesis/git/tile_processor_gpu/clt/main_chn2.portsxy",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn3.portsxy"}; "/home/eyesis/git/tile_processor_gpu/clt/main_chn3.portsxy"};
#ifdef SAVE_CLT
//#ifndef DBG_TILE
/*
const char* ports_clt_file[] = { // never referenced const char* ports_clt_file[] = { // never referenced
"/home/eyesis/git/tile_processor_gpu/clt/main_chn0.clt", "/home/eyesis/git/tile_processor_gpu/clt/main_chn0.clt",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn1.clt", "/home/eyesis/git/tile_processor_gpu/clt/main_chn1.clt",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn2.clt", "/home/eyesis/git/tile_processor_gpu/clt/main_chn2.clt",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn3.clt"}; "/home/eyesis/git/tile_processor_gpu/clt/main_chn3.clt"};
*/ #endif
const char* result_rbg_file[] = { const char* result_rbg_file[] = {
"/home/eyesis/git/tile_processor_gpu/clt/main_chn0.rbg", "/home/eyesis/git/tile_processor_gpu/clt/main_chn0.rbg",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn1.rbg", "/home/eyesis/git/tile_processor_gpu/clt/main_chn1.rbg",
...@@ -1133,12 +1132,12 @@ int main(int argc, char **argv) ...@@ -1133,12 +1132,12 @@ int main(int argc, char **argv)
gpu_clt_h[ncam], gpu_clt_h[ncam],
rslt_size * sizeof(float), rslt_size * sizeof(float),
cudaMemcpyDeviceToHost)); cudaMemcpyDeviceToHost));
#ifndef DBG_TILE //#ifndef DBG_TILE
printf("Writing CLT data to %s\n", ports_clt_file[ncam]); printf("Writing CLT data to %s\n", ports_clt_file[ncam]);
writeFloatsToFile(cpu_clt, // float * data, // allocated array writeFloatsToFile(cpu_clt, // float * data, // allocated array
rslt_size, // int size, // length in elements rslt_size, // int size, // length in elements
ports_clt_file[ncam]); // const char * path) // file path ports_clt_file[ncam]); // const char * path) // file path
#endif //#endif
} }
#endif #endif
......
...@@ -140,11 +140,12 @@ ...@@ -140,11 +140,12 @@
// geom // geom
//#define DEBUG20 1 //#define DEBUG20 1
#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0) #if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
#define DEBUG20 1 // Geometry Correction #define DEBUG20 1 // Geometry Correction
#define DEBUG21 1 // Geometry Correction #define DEBUG21 1 // Geometry Correction
//#define DEBUG210 1 //#define DEBUG210 1
#define DEBUG30 1
//#define DEBUG22 1 //#define DEBUG22 1
//#define DEBUG23 1 //#define DEBUG23 1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment