Commit 0b00f0ae authored by Andrey Filippov's avatar Andrey Filippov

updated GPU code with new texture boundaries

parent 95e514a4
...@@ -1607,9 +1607,6 @@ public class TexturedModel { ...@@ -1607,9 +1607,6 @@ public class TexturedModel {
} }
// only trim if nothing obscures this and has some BG // only trim if nothing obscures this and has some BG
if (is_fg_tile[fnslice][tile] && has_bg_tile[fnslice][tile]) { if (is_fg_tile[fnslice][tile] && has_bg_tile[fnslice][tile]) {
if (dbg_is_fg != null) {
dbg_is_fg[cindx] = 1000.0;
}
if (vars_inter[cindx] > fg_max_inter) { if (vars_inter[cindx] > fg_max_inter) {
dirs_avg[cindx] = Double.NaN; dirs_avg[cindx] = Double.NaN;
} }
...@@ -2246,7 +2243,7 @@ public class TexturedModel { ...@@ -2246,7 +2243,7 @@ public class TexturedModel {
tilesX, // final int tilesX, tilesX, // final int tilesX,
slice_disparities, // final double [][] slice_disparities, slice_disparities, // final double [][] slice_disparities,
sensor_textures, // final double [][] sensor_texture, // per-sensor texture value sensor_textures, // final double [][] sensor_texture, // per-sensor texture value
combo_textures, // null, // final double [] combo_texture_in, // average texture value null, // combo_textures, // null, // final double [] combo_texture_in, // average texture value
ref_scene.getImageName()); // final String dbg_prefix); ref_scene.getImageName()); // final String dbg_prefix);
......
...@@ -53,7 +53,11 @@ nofast ...@@ -53,7 +53,11 @@ nofast
GPU run time =523.451927ms, (direct conversion: 24.080189999999998ms, imclt: 17.090526999999998ms), corr2D: 30.623282999999997ms), textures: 231.154339ms, RGBA: 220.503017ms GPU run time =523.451927ms, (direct conversion: 24.080189999999998ms, imclt: 17.090526999999998ms), corr2D: 30.623282999999997ms), textures: 231.154339ms, RGBA: 220.503017ms
*/ */
#define TASK_TEXTURE_BITS ((1 << TASK_TEXTURE_N_BIT) | (1 << TASK_TEXTURE_E_BIT) | (1 << TASK_TEXTURE_S_BIT) | (1 << TASK_TEXTURE_W_BIT)) //#define TASK_TEXTURE_BITS ((1 << TASK_TEXTURE_N_BIT) | (1 << TASK_TEXTURE_E_BIT) | (1 << TASK_TEXTURE_S_BIT) | (1 << TASK_TEXTURE_W_BIT))
#define TASK_TEXTURE_BITS ((1 << TASK_TEXT_N_BIT) | (1 << TASK_TEXT_NE_BIT) | (1 << TASK_TEXT_E_BIT) | (1 << TASK_TEXT_SE_BIT)\
| (1 << TASK_TEXT_S_BIT) | (1 << TASK_TEXT_SW_BIT) | (1 << TASK_TEXT_W_BIT) | (1 << TASK_TEXT_NW_BIT))
//#define IMCLT14 //#define IMCLT14
//#define NOICLT 1 //#define NOICLT 1
...@@ -244,83 +248,74 @@ def set_imclt_sa(stride=9): ...@@ -244,83 +248,74 @@ def set_imclt_sa(stride=9):
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import numpy as np # math import numpy as np # math
def printAlphaFade(transform_size): def printTextureBlend(transform_size):
ts2 = 2 * transform_size ts2 = 2 * transform_size
ts2m1 = ts2-1 ts2m1 = ts2-1
alphaFade = np.zeros(shape=(9,ts2*ts2), dtype=float) # double [][] alphaFade = new double[16][ts2*ts2]; alphabBlend = np.zeros(shape=(8,ts2*ts2), dtype=float) #
alphaIndex = np.zeros(shape=(16,), dtype=int) blend1d = np.zeros(shape=(ts2,), dtype=float)
fade1d = np.zeros(shape=(16,), dtype=float) # double [] fade1d = new double [ts2]; dirBlend = ((0,1),(-1,1),(-1,0),(-1,-1),(0,-1),(1,-1),(1,0),(1,1))
for i in range (ts2): for i in range (transform_size):
fade1d[i] = 0.5 * (1.0 - np.cos(np.pi * (i +0.5) /ts2)) blend1d[i] = 0.5 * (1.0 + np.cos(np.pi * (i +0.5) /transform_size))
for i in range (ts2): for i in range (ts2):
for j in range (ts2): for j in range (ts2):
indx = i * ts2 + j indx = i * ts2 + j
for m in range (16): for m, dir in enumerate(dirBlend):
# if m == 0: a = 1.0
# alphaFade[m][indx] = 0 if dir[0] > 0:
if m == 1: # 0 a *= blend1d[j]
alphaIndex[m] = 1 elif dir[0]<0:
alphaFade[alphaIndex[m]][indx] = fade1d[ts2m1 - i] a *= blend1d[ts2m1 - j]
elif m == 2: if dir[1] > 0:
alphaIndex[m] = 2 a *= blend1d[i]
alphaFade[alphaIndex[m]][indx] = fade1d[j] elif dir[1]<0:
elif m == 4: a *= blend1d[ts2m1 - i]
alphaIndex[m] = 3 alphabBlend[m][indx] = a;
alphaFade[alphaIndex[m]][indx] = fade1d[i]
elif m == 8: floats_in_line0=8 # 16 #8
alphaIndex[m] = 4 segment_len = transform_size*transform_size//2
alphaFade[alphaIndex[m]][indx] = fade1d[ts2m1 - j] print("__constant__ float textureBlend[8][%d] = {"%(segment_len)) #32
elif m == 3: # only for transform_size == 8
alphaIndex[m] = 5 for m, blend in enumerate (alphabBlend):
alphaFade[alphaIndex[m]][indx] = (fade1d[ts2m1 - i],fade1d[j])[j > ts2m1 - i]
elif m == 6: for i in range (segment_len):
alphaIndex[m] = 6 if m in (0,1):
alphaFade[alphaIndex[m]][indx] = (fade1d[i],fade1d[j])[j > i] x = 4 + (i % 8)
elif m == 9: y = 4 + (i // 8)
alphaIndex[m] = 7 elif m in (2,3):
alphaFade[alphaIndex[m]][indx] = (fade1d[ts2m1 - j],fade1d[ts2m1 - i])[j > i] x = 8 + (i % 4)
elif m == 12: y = 4 + (i // 4)
alphaIndex[m] = 8 elif m in (4,5):
alphaFade[alphaIndex[m]][indx] = (fade1d[ts2m1 - j],fade1d[i])[i > ts2m1 - j] x = 4 + (i % 8)
else: y = 8 + (i // 8)
alphaIndex[m] = 0 elif m in (6,7):
alphaFade[alphaIndex[m]][indx] = 1.0 x = 4 + (i % 4)
y = 4 + (i // 4)
floats_in_line=8 indx = x + 16 * y
print("__constant__ int alphaIndex[16] = {") floats_in_line = floats_in_line0
for m in range (16): if ((m >>1) & 1) !=0:
if ((m % floats_in_line) == 0): floats_in_line = floats_in_line0 // 2
print("\n ",end="")
else:
print(" ",end="")
print("%d"%(alphaIndex[m]), end ="")
if (m < (16-1)):
print(",",end="")
print("};")
print("__constant__ float alphaFade[9][%d] = {"%(ts2*ts2))
for m in range (9):
for i in range (ts2 * ts2):
if ((i % floats_in_line) == 0): if ((i % floats_in_line) == 0):
print(" ",end="") print(" ",end="")
if (i == 0) : if (i == 0) :
print("{",end="") print("{",end="")
else: else:
print(" ",end="") print(" ",end="")
print("%ff"%(alphaFade[m][i]), end ="") print("%ff"%(blend[indx]), end ="")
if (((i + 1) % floats_in_line) == 0): if (((i + 1) % floats_in_line) == 0):
if (i == (ts2 * ts2 -1)): if (i == (segment_len -1)):
print("}",end="") print("}",end="")
else: else:
print(",") print(",")
else: else:
print(", ",end="") print(", ",end="")
if (m == (9-1)): if (m == len(alphabBlend)-1):
print("};") print("};")
else: else:
print(",") print(",")
printAlphaFade(8)
printTextureBlend(8)
""" """
Set up correlation pairs - run: Set up correlation pairs - run:
setup_pairs(0,16) setup_pairs(0,16)
...@@ -461,306 +456,55 @@ __constant__ float LoG_corr[64]={ // modify if needed high-pass filter before co ...@@ -461,306 +456,55 @@ __constant__ float LoG_corr[64]={ // modify if needed high-pass filter before co
1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f,
1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f
}; };
/* __constant__ float textureBlend[8][32] = {
__constant__ int pairs[6][2]={ {0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f,
{0, 1}, 0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f,
{2, 3}, 0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f,
{0, 2}, 0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f},
{1, 3}, {0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.003867f, 0.033913f, 0.089431f, 0.161970f,
{0, 3}, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.002135f, 0.018725f, 0.049379f, 0.089431f,
{2, 1}}; 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000810f, 0.007101f, 0.018725f, 0.033913f,
*/ 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000092f, 0.000810f, 0.002135f, 0.003867f},
__constant__ int alphaIndex[16] = {0, 1, 2, 5, 3, 0, 6, 0, 4, 7, 0, 0, 8, 0, 0, 0}; {0.009607f, 0.084265f, 0.222215f, 0.402455f,
0.009607f, 0.084265f, 0.222215f, 0.402455f,
__constant__ float alphaFade[9][256] = { 0.009607f, 0.084265f, 0.222215f, 0.402455f,
{1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.009607f, 0.084265f, 0.222215f, 0.402455f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.009607f, 0.084265f, 0.222215f, 0.402455f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.009607f, 0.084265f, 0.222215f, 0.402455f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.009607f, 0.084265f, 0.222215f, 0.402455f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.009607f, 0.084265f, 0.222215f, 0.402455f},
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, {0.000000f, 0.000000f, 0.000000f, 0.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.000092f, 0.000810f, 0.002135f, 0.003867f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.000810f, 0.007101f, 0.018725f, 0.033913f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.002135f, 0.018725f, 0.049379f, 0.089431f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.003867f, 0.033913f, 0.089431f, 0.161970f},
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, {0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f},
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, {0.003867f, 0.002135f, 0.000810f, 0.000092f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.033913f, 0.018725f, 0.007101f, 0.000810f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.089431f, 0.049379f, 0.018725f, 0.002135f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.161970f, 0.089431f, 0.033913f, 0.003867f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, {0.402455f, 0.222215f, 0.084265f, 0.009607f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.402455f, 0.222215f, 0.084265f, 0.009607f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.402455f, 0.222215f, 0.084265f, 0.009607f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.402455f, 0.222215f, 0.084265f, 0.009607f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.402455f, 0.222215f, 0.084265f, 0.009607f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.402455f, 0.222215f, 0.084265f, 0.009607f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.402455f, 0.222215f, 0.084265f, 0.009607f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.402455f, 0.222215f, 0.084265f, 0.009607f},
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, {0.161970f, 0.089431f, 0.033913f, 0.003867f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 0.089431f, 0.049379f, 0.018725f, 0.002135f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f}, 0.033913f, 0.018725f, 0.007101f, 0.000810f,
{0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.003867f, 0.002135f, 0.000810f, 0.000092f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}};
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f,
0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f,
0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f,
0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f,
0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f,
0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f,
0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f,
0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f,
0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f,
0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f,
0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f,
0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f,
0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f,
0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f},
{0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f},
{0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f,
0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f,
0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f,
0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f,
0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f,
0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f,
0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f,
0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f,
0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f,
0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f,
0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f,
0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f,
0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f,
0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f},
{0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f},
{0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.997592f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.978470f, 0.997592f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.735698f, 0.735698f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.645142f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.059039f, 0.059039f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.021530f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f},
{0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.021530f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.059039f, 0.059039f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.645142f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.735698f, 0.735698f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.978470f, 0.997592f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.997592f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f},
{0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.997592f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.997592f, 0.978470f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.735698f, 0.735698f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.645142f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.113495f, 0.113495f, 0.113495f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.059039f, 0.059039f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.021530f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f},
{0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.021530f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.059039f, 0.059039f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.113495f, 0.113495f, 0.113495f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.645142f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.735698f, 0.735698f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.997592f, 0.978470f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.997592f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f}};
__constant__ int pairs_offsets[]= {0, 0, 0, 1, 4, 10, 20, 35, 56, 84, 120, 165, 220, 286, 364, 455, 560, 680}; __constant__ int pairs_offsets[]= {0, 0, 0, 1, 4, 10, 20, 35, 56, 84, 120, 165, 220, 286, 364, 455, 560, 680};
// {pair_start, pair_end, pair_length} // {pair_start, pair_end, pair_length}
...@@ -2298,7 +2042,6 @@ extern "C" __global__ void generate_RBGA( ...@@ -2298,7 +2042,6 @@ extern "C" __global__ void generate_RBGA(
mark_texture_tiles <<<blocks,threads>>>( mark_texture_tiles <<<blocks,threads>>>(
num_cams, // int num_cams, num_cams, // int num_cams,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16 gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
/// gpu_tasks,
num_tiles, // number of tiles in task list num_tiles, // number of tiles in task list
width, // number of tiles in a row width, // number of tiles in a row
gpu_texture_indices); // packed tile + bits (now only (1 << 7) gpu_texture_indices); // packed tile + bits (now only (1 << 7)
...@@ -2311,7 +2054,6 @@ extern "C" __global__ void generate_RBGA( ...@@ -2311,7 +2054,6 @@ extern "C" __global__ void generate_RBGA(
mark_texture_neighbor_tiles <<<blocks,threads>>>( mark_texture_neighbor_tiles <<<blocks,threads>>>(
num_cams, // int num_cams, num_cams, // int num_cams,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16 gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// gpu_tasks,
num_tiles, // number of tiles in task list num_tiles, // number of tiles in task list
width, // number of tiles in a row width, // number of tiles in a row
height, // number of tiles rows height, // number of tiles rows
...@@ -2332,7 +2074,6 @@ extern "C" __global__ void generate_RBGA( ...@@ -2332,7 +2074,6 @@ extern "C" __global__ void generate_RBGA(
gen_texture_list <<<blocks,threads>>>( gen_texture_list <<<blocks,threads>>>(
num_cams, // int num_cams, num_cams, // int num_cams,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16 gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// gpu_tasks,
num_tiles, // number of tiles in task list num_tiles, // number of tiles in task list
width, // number of tiles in a row width, // number of tiles in a row
height, // int height, // number of tiles rows height, // int height, // number of tiles rows
...@@ -2603,9 +2344,12 @@ __global__ void mark_texture_tiles( ...@@ -2603,9 +2344,12 @@ __global__ void mark_texture_tiles(
/// int task = gpu_tasks[task_num].task; /// int task = gpu_tasks[task_num].task;
int task = get_task_task(task_num, gpu_ftasks, num_cams); int task = get_task_task(task_num, gpu_ftasks, num_cams);
if (!(task & TASK_TEXTURE_BITS)){ // here any bit in TASK_TEXTURE_BITS is sufficient // if (!(task & TASK_TEXTURE_BITS)){ // here any bit in TASK_TEXTURE_BITS is sufficient
if (!(task & (1 << TASK_TEXT_EN))){ // here any bit in TASK_TEXTURE_BITS is sufficient
if (!task) {// temporary disabling
return; // NOP tile return; // NOP tile
} }
}
/// int cxy = gpu_tasks[task_num].txy; /// int cxy = gpu_tasks[task_num].txy;
int cxy = get_task_txy(task_num, gpu_ftasks, num_cams); int cxy = get_task_txy(task_num, gpu_ftasks, num_cams);
...@@ -2614,7 +2358,7 @@ __global__ void mark_texture_tiles( ...@@ -2614,7 +2358,7 @@ __global__ void mark_texture_tiles(
/** /**
* Helper kernel for prepare_texture_list() (for generate_RBGA) - calculate and save * Helper kernel for prepare_texture_list() (for generate_RBGA) - calculate and save
* bitmap of available neighbors in 4 directions (needed for alpha generation of * bitmap of available neighbors in 4->8 directions (needed for alpha generation of
* the result textures to fade along the border. * the result textures to fade along the border.
* *
* @param num_cams number of cameras * @param num_cams number of cameras
...@@ -2642,9 +2386,12 @@ __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__? ...@@ -2642,9 +2386,12 @@ __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__?
} }
int task = get_task_task(task_num, gpu_ftasks, num_cams); int task = get_task_task(task_num, gpu_ftasks, num_cams);
if (!(task & TASK_TEXTURE_BITS)){ // here any bit in TASK_TEXTURE_BITS is sufficient // if (!(task & TASK_TEXTURE_BITS)){ // here any bit in TASK_TEXTURE_BITS is sufficient
if (!(task & (1 << TASK_TEXT_EN))){ // here any bit in TASK_TEXTURE_BITS is sufficient
if (!task) {// temporary disabling
return; // NOP tile return; // NOP tile
} }
}
int cxy = get_task_txy(task_num, gpu_ftasks, num_cams); int cxy = get_task_txy(task_num, gpu_ftasks, num_cams);
int x = (cxy & 0xffff); int x = (cxy & 0xffff);
...@@ -2654,13 +2401,17 @@ __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__? ...@@ -2654,13 +2401,17 @@ __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__?
atomicMax(woi+2, x); atomicMax(woi+2, x);
atomicMax(woi+3, y); atomicMax(woi+3, y);
int d = 0; int d = 0;
if ((y > 0) && *(gpu_texture_indices + x + (y - 1) * width)) d |= (1 << TASK_TEXTURE_N_BIT); if ((y > 0) && *(gpu_texture_indices + x + (y - 1) * width)) d |= (1 << TASK_TEXT_N_BIT);
if ((x < (width - 1)) && *(gpu_texture_indices + (x + 1) + y * width)) d |= (1 << TASK_TEXTURE_E_BIT); if ((y > 0) && (x < (width - 1)) && *(gpu_texture_indices + (x + 1) + (y - 1) * width)) d |= (1 << TASK_TEXT_NE_BIT);
if ((y < (height - 1)) && *(gpu_texture_indices + x + (y + 1) * width)) d |= (1 << TASK_TEXTURE_S_BIT); if ( (x < (width - 1)) && *(gpu_texture_indices + (x + 1) + y * width)) d |= (1 << TASK_TEXT_E_BIT);
if ((x > 0) && *(gpu_texture_indices + (x - 1) + y * width)) d |= (1 << TASK_TEXTURE_W_BIT); if ((y < (height - 1)) && (x < (width - 1)) && *(gpu_texture_indices + (x + 1) + (y + 1) * width)) d |= (1 << TASK_TEXT_SE_BIT);
if ((y < (height - 1)) && *(gpu_texture_indices + x + (y + 1) * width)) d |= (1 << TASK_TEXT_S_BIT);
if ((y < (height - 1)) && (x > 0) && *(gpu_texture_indices + (x - 1) + (y + 1) * width)) d |= (1 << TASK_TEXT_SW_BIT);
if ( (x > 0) && *(gpu_texture_indices + (x - 1) + y * width)) d |= (1 << TASK_TEXT_W_BIT);
if ((y > 0) && (x > 0) && *(gpu_texture_indices + (x - 1) + (y - 1) * width)) d |= (1 << TASK_TEXT_NW_BIT);
// Set task texture bits in global gpu_ftasks array (lower 4 bits) // Set task texture bits in global gpu_ftasks array (lower 4 bits)
/// gpu_tasks[task_num].task = ((task ^ d) & TASK_TEXTURE_BITS) ^ task; /// gpu_tasks[task_num].task = ((task ^ d) & TASK_TEXTURE_BITS) ^ task;
*(int *) (gpu_ftasks + get_task_size(num_cams) * task_num) = ((task ^ d) & TASK_TEXTURE_BITS) ^ task; *(int *) (gpu_ftasks + get_task_size(num_cams) * task_num) = ((task ^ d) & TASK_TEXTURE_BITS) ^ task; // updates task bits???
} }
/** /**
...@@ -2697,7 +2448,7 @@ __global__ void gen_texture_list( ...@@ -2697,7 +2448,7 @@ __global__ void gen_texture_list(
/// int task = gpu_tasks[task_num].task & TASK_TEXTURE_BITS; /// int task = gpu_tasks[task_num].task & TASK_TEXTURE_BITS;
int task = get_task_task(task_num, gpu_ftasks, num_cams); int task = get_task_task(task_num, gpu_ftasks, num_cams);
if (!task){ // here any bit in TASK_TEXTURE_BITS is sufficient if (!task){ // here any bit in TASK_TEXTURE_BITS is sufficient
return; // NOP tile return; // NOP tile - any non-zero bit is sufficient
} }
// int cxy = gpu_tasks[task_num].txy; // int cxy = gpu_tasks[task_num].txy;
int cxy = get_task_txy(task_num, gpu_ftasks, num_cams); int cxy = get_task_txy(task_num, gpu_ftasks, num_cams);
...@@ -2749,8 +2500,10 @@ __global__ void gen_texture_list( ...@@ -2749,8 +2500,10 @@ __global__ void gen_texture_list(
#endif // DEBUG12 #endif // DEBUG12
// *(gpu_texture_indices + buf_offset) = task | ((x + y * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT); // *(gpu_texture_indices + buf_offset) = task | ((x + y * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
// keep only 8 LSBs of task, use higher 24 for task number // keep only 8 LSBs of task, use higher 24 for task number
*(gpu_texture_indices + buf_offset) = (task & ((1 << CORR_NTILE_SHIFT) -1)) | ((x + y * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT); // *(gpu_texture_indices + buf_offset) = (task & ((1 << TEXT_NTILE_SHIFT) -1)) | ((x + y * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
//CORR_NTILE_SHIFT // keep only 4 lower task bits
*(gpu_texture_indices + buf_offset) = (task & TASK_TEXTURE_BITS) | ((x + y * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
//CORR_NTILE_SHIFT // TASK_TEXTURE_BITS
} }
//inline __device__ int get_task_size(int num_cams){ //inline __device__ int get_task_size(int num_cams){
// return sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - num_cams); // return sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - num_cams);
...@@ -2817,12 +2570,16 @@ extern "C" __global__ void create_nonoverlap_list( ...@@ -2817,12 +2570,16 @@ extern "C" __global__ void create_nonoverlap_list(
} }
int task_task = get_task_task(num_tile, gpu_ftasks, num_cams); int task_task = get_task_task(num_tile, gpu_ftasks, num_cams);
/// if ((gpu_tasks[num_tile].task & TASK_TEXTURE_BITS) == 0){ /// if ((gpu_tasks[num_tile].task & TASK_TEXTURE_BITS) == 0){
if ((task_task & TASK_TEXTURE_BITS) == 0){ // if ((task_task & TASK_TEXTURE_BITS) == 0){
return; // nothing to do if (!(task_task & (1 << TASK_TEXT_EN))){ // here any bit in TASK_TEXTURE_BITS is sufficient
if (!task_task) {// temporary disabling
return; // NOP tile
}
} }
/// int cxy = gpu_tasks[num_tile].txy; /// int cxy = gpu_tasks[num_tile].txy;
int cxy = get_task_txy(num_tile, gpu_ftasks, num_cams); int cxy = get_task_txy(num_tile, gpu_ftasks, num_cams);
int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS; // all texture direction bits as it is non-overlapped list (bits probably unused)
int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS;
// if (gpu_tasks[num_tile].task != 0) { // if (gpu_tasks[num_tile].task != 0) {
if (task_task != 0) { if (task_task != 0) {
nonoverlap_list[atomicAdd(pnonoverlap_length, 1)] = texture_task_code; nonoverlap_list[atomicAdd(pnonoverlap_length, 1)] = texture_task_code;
...@@ -2862,9 +2619,11 @@ __global__ void index_correlate( ...@@ -2862,9 +2619,11 @@ __global__ void index_correlate(
int sel_pairs[] = {sel_pairs0, sel_pairs1, sel_pairs2, sel_pairs3}; int sel_pairs[] = {sel_pairs0, sel_pairs1, sel_pairs2, sel_pairs3};
// int task_size = get_task_size(num_cams); // int task_size = get_task_size(num_cams);
int task_task =get_task_task(num_tile, gpu_ftasks, num_cams); int task_task =get_task_task(num_tile, gpu_ftasks, num_cams);
if (((task_task >> TASK_CORR_BITS) & 1) == 0){ // needs correlation. Maybe just check task_task != 0? if ((task_task & ((1 << TASK_CORR_EN) | (1 << TASK_INTER_EN))) == 0){ // needs correlation. Maybe just check task_task != 0? TASK_CORR_EN
if (!task_task) { // temporary disabling
return; return;
} }
}
int pair_list_start = pairs_offsets[num_cams]; int pair_list_start = pairs_offsets[num_cams];
int pair_list_len = pairs_offsets[num_cams+1] - pair_list_start; int pair_list_len = pairs_offsets[num_cams+1] - pair_list_start;
int num_mask_words = (pair_list_len + 31) >> 5; // ceil int num_mask_words = (pair_list_len + 31) >> 5; // ceil
...@@ -2920,9 +2679,11 @@ __global__ void index_inter_correlate( ...@@ -2920,9 +2679,11 @@ __global__ void index_inter_correlate(
} }
// int task_size = get_task_size(num_cams); // int task_size = get_task_size(num_cams);
int task_task =get_task_task(num_tile, gpu_ftasks, num_cams); int task_task =get_task_task(num_tile, gpu_ftasks, num_cams);
if (((task_task >> TASK_CORR_BITS) & 1) == 0){ // needs correlation. Maybe just check task_task != 0? if (((task_task >> TASK_INTER_EN) & 1) == 0){ // needs correlation. Maybe just check task_task != 0?
if (!task_task){ // temporary disabling
return; return;
} }
}
int nb = __popc (sel_sensors); // number of non-zero bits int nb = __popc (sel_sensors); // number of non-zero bits
if (nb > 0){ if (nb > 0){
int indx = atomicAdd(pnum_corr_tiles, nb+1); int indx = atomicAdd(pnum_corr_tiles, nb+1);
...@@ -3401,10 +3162,10 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1) ...@@ -3401,10 +3162,10 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
} }
// get number of tile // get number of tile
int tile_code = gpu_texture_indices[tile_indx + gpu_texture_indices_offset]; // Added for Java, no DP int tile_code = gpu_texture_indices[tile_indx + gpu_texture_indices_offset]; // Added for Java, no DP
if ((tile_code & (1 << CORR_TEXTURE_BIT)) == 0){ if ((tile_code & (1 << LIST_TEXTURE_BIT)) == 0){
return; // nothing to do return; // nothing to do
} }
int tile_num = tile_code >> CORR_NTILE_SHIFT; int tile_num = tile_code >> TEXT_NTILE_SHIFT;
#ifdef DEBUG7A #ifdef DEBUG7A
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
if ((tile_num == DBG_TILE) && (threadIdx.x == 0) && (threadIdx.y == 0)){ if ((tile_num == DBG_TILE) && (threadIdx.x == 0) && (threadIdx.y == 0)){
...@@ -3445,7 +3206,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1) ...@@ -3445,7 +3206,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
float * max_diff_shared = &all_shared[offsets[5]] ; // [num_cams]; // 16 = 0x10 | 4 = 0x4 | return to system memory (optionally pass null to skip calculation) float * max_diff_shared = &all_shared[offsets[5]] ; // [num_cams]; // 16 = 0x10 | 4 = 0x4 | return to system memory (optionally pass null to skip calculation)
float * max_diff_tmp = &all_shared[offsets[6]] ; // [num_cams][TEXTURE_THREADS_PER_TILE]; // 16 * 8 = 0x80 | 4 * 8 = 0x20 | [4][8] float * max_diff_tmp = &all_shared[offsets[6]] ; // [num_cams][TEXTURE_THREADS_PER_TILE]; // 16 * 8 = 0x80 | 4 * 8 = 0x20 | [4][8]
float * ports_rgb_tmp = &all_shared[offsets[7]] ; // [colors][num_cams][TEXTURE_THREADS_PER_TILE]; // 16 * 1 * 8 = 0x80 | 4 * 3 * 8 = 0x60 | [4*3][8] float * ports_rgb_tmp = &all_shared[offsets[7]] ; // [colors][num_cams][TEXTURE_THREADS_PER_TILE]; // 16 * 1 * 8 = 0x80 | 4 * 3 * 8 = 0x60 | [4*3][8]
float * texture_averaging = max_diff_tmp; // [NUM_THREADS] reusing, needs 32 elements for texture averaging, shared
#ifdef DBG_TILE #ifdef DBG_TILE
#ifdef DEBUG7AXX #ifdef DEBUG7AXX
...@@ -3684,7 +3445,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1) ...@@ -3684,7 +3445,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
rgbaw, // (float *) shr1.rgbaw, // float * rgba, rgbaw, // (float *) shr1.rgbaw, // float * rgba,
// if calc_extra, rbg_tile will be ignored and output generated with blurred (debayered) data. Done so as debayered data is needed // if calc_extra, rbg_tile will be ignored and output generated with blurred (debayered) data. Done so as debayered data is needed
// to calculate max_diff_shared // to calculate max_diff_shared
calc_extra, // int calc_extra, // 1 - calcualate ports_rgb, max_diff calc_extra, // | (keep_weights & 2), // int calc_extra, // 1 - calcualate ports_rgb, max_diff
ports_rgb_shared,// float ports_rgb_shared [colors][num_cams], // return to system memory (optionally pass null to skip calculation) ports_rgb_shared,// float ports_rgb_shared [colors][num_cams], // return to system memory (optionally pass null to skip calculation)
max_diff_shared, // float max_diff_shared [num_cams], // return to system memory (optionally pass null to skip calculation) max_diff_shared, // float max_diff_shared [num_cams], // return to system memory (optionally pass null to skip calculation)
max_diff_tmp, // float max_diff_tmp [num_cams][TEXTURE_THREADS_PER_TILE], max_diff_tmp, // float max_diff_tmp [num_cams][TEXTURE_THREADS_PER_TILE],
...@@ -3695,7 +3456,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1) ...@@ -3695,7 +3456,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
min_agree, // float min_agree, NOT USED? // minimal number of channels to agree on a point (real number to work with fuzzy averages) min_agree, // float min_agree, NOT USED? // minimal number of channels to agree on a point (real number to work with fuzzy averages)
weights, // float * chn_weights, // color channel weights, sum == 1.0 weights, // float * chn_weights, // color channel weights, sum == 1.0
dust_remove, // int dust_remove, // Do not reduce average weight when only one image differs much from the average dust_remove, // int dust_remove, // Do not reduce average weight when only one image differs much from the average
(keep_weights & 1), // int keep_weights, // return channel weights and rms after A in RGBA (weight are always calculated) (keep_weights & 1), // | (keep_weights & 2), // int keep_weights, // return channel weights and rms after A in RGBA (weight are always calculated)
debug ); // int debug ); debug ); // int debug );
__syncthreads(); // _syncthreads();1 __syncthreads(); // _syncthreads();1
...@@ -3761,9 +3522,9 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1) ...@@ -3761,9 +3522,9 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
tile_code &= TASK_TEXTURE_BITS; tile_code &= TASK_TEXTURE_BITS;
if (!tile_code){ //// if (!tile_code){
return; // should not happen //// return; // should not happen
} //// }
// if no extra and no overlap -> nothing remains, return // if no extra and no overlap -> nothing remains, return
if (gpu_texture_rbg && (texture_rbg_stride != 0)) { // generate RGBA (overlapped) // keep_weights if (gpu_texture_rbg && (texture_rbg_stride != 0)) { // generate RGBA (overlapped) // keep_weights
#ifdef DEBUG7A #ifdef DEBUG7A
...@@ -3782,27 +3543,76 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1) ...@@ -3782,27 +3543,76 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
} }
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
#endif // DEBUG12 #endif // DEBUG12
int alpha_mode = alphaIndex[tile_code]; // only 4 lowest bits int alpha_mode = tile_code & 0xff; // alphaIndex[tile_code]; // only 4 lowest bits
if (!alpha_mode){ // only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ??? if (alpha_mode != 0xff){ // only add if needed, alpha_mode == 0xff (neighbors from all 8 directions) - keep as is. FIXME: alpha_mode ???
// Calculate average value per color, need 32 shared array
for (int ncol = 0; ncol < colors; ncol++) {
int sum_index = threadIdx.x + threadIdx.y * TEXTURE_THREADS_PER_TILE; // 0.. 31
texture_averaging[sum_index] = 0;
for (int pass = 0; pass < 8; pass ++) { for (int pass = 0; pass < 8; pass ++) {
int row = pass * 2 + (threadIdx.y >> 1); int row = pass * 2 + (threadIdx.y >> 1);
int col = ((threadIdx.y & 1) << 3) + threadIdx.x; int col = ((threadIdx.y & 1) << 3) + threadIdx.x;
int i = row * DTT_SIZE21 + col; int i = row * DTT_SIZE21 + col;
int gi = row * DTT_SIZE2 + col;
float * rgba_i = rgbaw + i; float * rgba_i = rgbaw + i;
// always copy 3 (1) colors + alpha texture_averaging[sum_index] += *(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21));
if (colors == 3){ }
__syncthreads();
if (threadIdx.y == 0){ // combine sums
#pragma unroll #pragma unroll
for (int ncol = 0; ncol < colors + 1; ncol++) { // 4 for (int i = 1; i < 4; i++) { // reduce sums to 8
*(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21)) *= alphaFade[alpha_mode][gi]; // reduce [tile_code] by LUT texture_averaging[threadIdx.x] += texture_averaging[threadIdx.x + TEXTURE_THREADS_PER_TILE * i];
} }
} else { // assuming colors = 1 }
__syncthreads();
if ((threadIdx.y == 0) && (threadIdx.x == 0)){ // combine sums
#pragma unroll #pragma unroll
for (int ncol = 0; ncol < 1 + 1; ncol++) { // 2 for (int i = 1; i < TEXTURE_THREADS_PER_TILE; i++) { // reduce sums to 8
*(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21)) *= alphaFade[alpha_mode][gi]; // reduce [tile_code] by LUT texture_averaging[0] += texture_averaging[i];
} }
texture_averaging[0] /= 64; // average value for uniform field
} }
__syncthreads();
float avg_val = texture_averaging[0];
// now add scale average value for each missing direction
for (int idir = 0; idir < 8; idir ++) if ((alpha_mode & (1 << idir)) == 0) { // no tile in this direction
/* */
int row, col;
switch (idir >> 1) {
case 0:
row = 4 + threadIdx.y;
col = 4 + threadIdx.x;
break;
case 1:
row = 4 + (threadIdx.x >> 2) + (threadIdx.y << 1);
col = 8 + (threadIdx.x & 3);
break;
case 2:
row = 8 + threadIdx.y;
col = 4 + threadIdx.x;
break;
case 3:
row = 4 + (threadIdx.x >> 2) + (threadIdx.y << 1);
col = 4 + (threadIdx.x & 3);
break;
} }
int i = row * DTT_SIZE21 + col;
float * rgba_i = rgbaw + i;
// always copy 3 (1) colors + alpha
*(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21)) += textureBlend[idir][(threadIdx.y <<3) + threadIdx.x] * avg_val;
/*
for (int pass = 0; pass < 8; pass ++) {
int row1 = pass * 2 + (threadIdx.y >> 1);
int col1 = ((threadIdx.y & 1) << 3) + threadIdx.x;
int i = row1 * DTT_SIZE21 + col1;
int gi = row1 * DTT_SIZE2 + col1;
float * rgba_i = rgbaw + i;
// always copy 3 (1) colors + alpha
*(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21)) += textureBlend[idir][gi] * avg_val;
}
*/
}
}
// __syncthreads();
} }
int slice_stride = texture_rbg_stride * (*(woi + 3) + 1) * DTT_SIZE; // offset to the next color int slice_stride = texture_rbg_stride * (*(woi + 3) + 1) * DTT_SIZE; // offset to the next color
int tileY = tile_num / tilesx; // TILES-X; // slow, but 1 per tile int tileY = tile_num / tilesx; // TILES-X; // slow, but 1 per tile
...@@ -3826,6 +3636,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1) ...@@ -3826,6 +3636,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
} }
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
#endif // DEBUG12 #endif // DEBUG12
// copy textures to global memory
for (int pass = 0; pass < 8; pass ++) { for (int pass = 0; pass < 8; pass ++) {
int row = pass * 2 + (threadIdx.y >> 1); // row inside a tile (0..15) int row = pass * 2 + (threadIdx.y >> 1); // row inside a tile (0..15)
int col = ((threadIdx.y & 1) << 3) + threadIdx.x; // column inside a tile (0..15) int col = ((threadIdx.y & 1) << 3) + threadIdx.x; // column inside a tile (0..15)
...@@ -3843,10 +3654,6 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1) ...@@ -3843,10 +3654,6 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
} }
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
#endif // DEBUG12 #endif // DEBUG12
/// if (!border_tile ||
/// ((g_row >= 0) && (g_col >= 0) && (g_row < (DTT_SIZE * TILES-Y)) && (g_col < (DTT_SIZE * TILES-X)))){
/// ((g_row >= 0) && (g_col >= 0) && (g_row < height) && (g_col < (DTT_SIZE * TILES-X)))){
// always copy 3 (1) colors + alpha // always copy 3 (1) colors + alpha
if (colors == 3){ if (colors == 3){
#pragma unroll #pragma unroll
...@@ -3860,21 +3667,74 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1) ...@@ -3860,21 +3667,74 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
} }
} }
} }
// generate and copy per-sensor texture
if (keep_weights & 2){ // copy individual sensors output if (keep_weights & 2){ // copy individual sensors output
for (int ncam = 0; ncam < num_cams; ncam++) { for (int ncam = 0; ncam < num_cams; ncam++) {
float * mclt_dst_ncam = mclt_debayer + (ncam * colors ) * (MCLT_UNION_LEN); float * mclt_dst_ncam = mclt_debayer + (ncam * colors ) * (MCLT_UNION_LEN);
if (!alpha_mode){ // only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ??? //if (alpha_mode){ // only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ???
if (alpha_mode != 0xff){
for (int ncol = 0; ncol < colors; ncol++) {
// calculate average value for blending
int sum_index = threadIdx.x + threadIdx.y * TEXTURE_THREADS_PER_TILE; // 0.. 31
texture_averaging[sum_index] = 0;
for (int pass = 0; pass < 8; pass ++) { for (int pass = 0; pass < 8; pass ++) {
int row = pass * 2 + (threadIdx.y >> 1); int row = pass * 2 + (threadIdx.y >> 1);
int col = ((threadIdx.y & 1) << 3) + threadIdx.x; int col = ((threadIdx.y & 1) << 3) + threadIdx.x;
int i = row * DTT_SIZE21 + col; int i = row * DTT_SIZE21 + col;
int gi = row * DTT_SIZE2 + col; float * rgba_i = rgbaw + i;
texture_averaging[sum_index] += *(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21));
}
__syncthreads();
if (threadIdx.y == 0){ // combine sums
#pragma unroll
for (int i = 1; i < 4; i++) { // reduce sums to 8
texture_averaging[threadIdx.x] += texture_averaging[threadIdx.x + TEXTURE_THREADS_PER_TILE * i];
}
}
__syncthreads();
if ((threadIdx.y == 0) && (threadIdx.x == 0)){ // combine sums
#pragma unroll
for (int i = 1; i < TEXTURE_THREADS_PER_TILE; i++) { // reduce sums to 8
texture_averaging[0] += texture_averaging[i];
}
texture_averaging[0] /= 64; // average value for uniform field
}
__syncthreads();
float avg_val = texture_averaging[0];
// Possible to re-use ports_rgb_shared[], if needed (change to (calc_extra | (keep_weights & 2) in tile_combine_rgba()).
// Now using averaging here (less noise if averaging sensor outside).
// float avg_val = ports_rgb_shared[ncol * num_cams + ncam]; // texture_averaging[0];
for (int idir = 0; idir < 8; idir ++) if ((alpha_mode & (1 << idir)) == 0) { // no tile in this direction
/* */
int row, col;
switch (idir >> 1) {
case 0:
row = 4 + threadIdx.y;
col = 4 + threadIdx.x;
break;
case 1:
row = 4 + (threadIdx.x >> 2) + (threadIdx.y << 1);
col = 8 + (threadIdx.x & 3);
break;
case 2:
row = 8 + threadIdx.y;
col = 4 + threadIdx.x;
break;
case 3:
row = 4 + (threadIdx.x >> 2) + (threadIdx.y << 1);
col = 4 + (threadIdx.x & 3);
break;
}
int i = row * DTT_SIZE21 + col;
float * mclt_dst_i = mclt_dst_ncam + i; float * mclt_dst_i = mclt_dst_ncam + i;
for (int ncol = 0; ncol < colors; ncol++) { int gi = (threadIdx.y <<3) + threadIdx.x;
*(mclt_dst_i + ncol * (MCLT_UNION_LEN)) *= alphaFade[alpha_mode][gi]; // reduce [tile_code] by LUT *(mclt_dst_i + ncol * (MCLT_UNION_LEN)) += textureBlend[idir][gi] * avg_val;
} }
__syncthreads(); // needed?
} }
} }
for (int pass = 0; pass < 8; pass ++) { for (int pass = 0; pass < 8; pass ++) {
int row = pass * 2 + (threadIdx.y >> 1); // row inside a tile (0..15) int row = pass * 2 + (threadIdx.y >> 1); // row inside a tile (0..15)
int col = ((threadIdx.y & 1) << 3) + threadIdx.x; // column inside a tile (0..15) int col = ((threadIdx.y & 1) << 3) + threadIdx.x; // column inside a tile (0..15)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment