Commit 0b00f0ae authored by Andrey Filippov's avatar Andrey Filippov

updated GPU code with new texture boundaries

parent 95e514a4
......@@ -1607,9 +1607,6 @@ public class TexturedModel {
}
// only trim if nothing obscures this and has some BG
if (is_fg_tile[fnslice][tile] && has_bg_tile[fnslice][tile]) {
if (dbg_is_fg != null) {
dbg_is_fg[cindx] = 1000.0;
}
if (vars_inter[cindx] > fg_max_inter) {
dirs_avg[cindx] = Double.NaN;
}
......@@ -2246,7 +2243,7 @@ public class TexturedModel {
tilesX, // final int tilesX,
slice_disparities, // final double [][] slice_disparities,
sensor_textures, // final double [][] sensor_texture, // per-sensor texture value
combo_textures, // null, // final double [] combo_texture_in, // average texture value
null, // combo_textures, // null, // final double [] combo_texture_in, // average texture value
ref_scene.getImageName()); // final String dbg_prefix);
......
......@@ -53,7 +53,11 @@ nofast
GPU run time =523.451927ms, (direct conversion: 24.080189999999998ms, imclt: 17.090526999999998ms), corr2D: 30.623282999999997ms), textures: 231.154339ms, RGBA: 220.503017ms
*/
#define TASK_TEXTURE_BITS ((1 << TASK_TEXTURE_N_BIT) | (1 << TASK_TEXTURE_E_BIT) | (1 << TASK_TEXTURE_S_BIT) | (1 << TASK_TEXTURE_W_BIT))
//#define TASK_TEXTURE_BITS ((1 << TASK_TEXTURE_N_BIT) | (1 << TASK_TEXTURE_E_BIT) | (1 << TASK_TEXTURE_S_BIT) | (1 << TASK_TEXTURE_W_BIT))
#define TASK_TEXTURE_BITS ((1 << TASK_TEXT_N_BIT) | (1 << TASK_TEXT_NE_BIT) | (1 << TASK_TEXT_E_BIT) | (1 << TASK_TEXT_SE_BIT)\
| (1 << TASK_TEXT_S_BIT) | (1 << TASK_TEXT_SW_BIT) | (1 << TASK_TEXT_W_BIT) | (1 << TASK_TEXT_NW_BIT))
//#define IMCLT14
//#define NOICLT 1
......@@ -244,83 +248,74 @@ def set_imclt_sa(stride=9):
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import numpy as np # math
def printAlphaFade(transform_size):
def printTextureBlend(transform_size):
ts2 = 2 * transform_size
ts2m1 = ts2-1
alphaFade = np.zeros(shape=(9,ts2*ts2), dtype=float) # double [][] alphaFade = new double[16][ts2*ts2];
alphaIndex = np.zeros(shape=(16,), dtype=int)
fade1d = np.zeros(shape=(16,), dtype=float) # double [] fade1d = new double [ts2];
for i in range (ts2):
fade1d[i] = 0.5 * (1.0 - np.cos(np.pi * (i +0.5) /ts2))
alphabBlend = np.zeros(shape=(8,ts2*ts2), dtype=float) #
blend1d = np.zeros(shape=(ts2,), dtype=float)
dirBlend = ((0,1),(-1,1),(-1,0),(-1,-1),(0,-1),(1,-1),(1,0),(1,1))
for i in range (transform_size):
blend1d[i] = 0.5 * (1.0 + np.cos(np.pi * (i +0.5) /transform_size))
for i in range (ts2):
for j in range (ts2):
indx = i * ts2 + j
for m in range (16):
# if m == 0:
# alphaFade[m][indx] = 0
if m == 1: # 0
alphaIndex[m] = 1
alphaFade[alphaIndex[m]][indx] = fade1d[ts2m1 - i]
elif m == 2:
alphaIndex[m] = 2
alphaFade[alphaIndex[m]][indx] = fade1d[j]
elif m == 4:
alphaIndex[m] = 3
alphaFade[alphaIndex[m]][indx] = fade1d[i]
elif m == 8:
alphaIndex[m] = 4
alphaFade[alphaIndex[m]][indx] = fade1d[ts2m1 - j]
elif m == 3:
alphaIndex[m] = 5
alphaFade[alphaIndex[m]][indx] = (fade1d[ts2m1 - i],fade1d[j])[j > ts2m1 - i]
elif m == 6:
alphaIndex[m] = 6
alphaFade[alphaIndex[m]][indx] = (fade1d[i],fade1d[j])[j > i]
elif m == 9:
alphaIndex[m] = 7
alphaFade[alphaIndex[m]][indx] = (fade1d[ts2m1 - j],fade1d[ts2m1 - i])[j > i]
elif m == 12:
alphaIndex[m] = 8
alphaFade[alphaIndex[m]][indx] = (fade1d[ts2m1 - j],fade1d[i])[i > ts2m1 - j]
else:
alphaIndex[m] = 0
alphaFade[alphaIndex[m]][indx] = 1.0
floats_in_line=8
print("__constant__ int alphaIndex[16] = {")
for m in range (16):
if ((m % floats_in_line) == 0):
print("\n ",end="")
else:
print(" ",end="")
print("%d"%(alphaIndex[m]), end ="")
if (m < (16-1)):
print(",",end="")
print("};")
print("__constant__ float alphaFade[9][%d] = {"%(ts2*ts2))
for m in range (9):
for i in range (ts2 * ts2):
for m, dir in enumerate(dirBlend):
a = 1.0
if dir[0] > 0:
a *= blend1d[j]
elif dir[0]<0:
a *= blend1d[ts2m1 - j]
if dir[1] > 0:
a *= blend1d[i]
elif dir[1]<0:
a *= blend1d[ts2m1 - i]
alphabBlend[m][indx] = a;
floats_in_line0=8 # 16 #8
segment_len = transform_size*transform_size//2
print("__constant__ float textureBlend[8][%d] = {"%(segment_len)) #32
# only for transform_size == 8
for m, blend in enumerate (alphabBlend):
for i in range (segment_len):
if m in (0,1):
x = 4 + (i % 8)
y = 4 + (i // 8)
elif m in (2,3):
x = 8 + (i % 4)
y = 4 + (i // 4)
elif m in (4,5):
x = 4 + (i % 8)
y = 8 + (i // 8)
elif m in (6,7):
x = 4 + (i % 4)
y = 4 + (i // 4)
indx = x + 16 * y
floats_in_line = floats_in_line0
if ((m >>1) & 1) !=0:
floats_in_line = floats_in_line0 // 2
if ((i % floats_in_line) == 0):
print(" ",end="")
if (i == 0) :
print("{",end="")
else:
print(" ",end="")
print("%ff"%(alphaFade[m][i]), end ="")
print("%ff"%(blend[indx]), end ="")
if (((i + 1) % floats_in_line) == 0):
if (i == (ts2 * ts2 -1)):
if (i == (segment_len -1)):
print("}",end="")
else:
print(",")
else:
print(", ",end="")
if (m == (9-1)):
if (m == len(alphabBlend)-1):
print("};")
else:
print(",")
printAlphaFade(8)
printTextureBlend(8)
"""
Set up correlation pairs - run:
setup_pairs(0,16)
......@@ -461,306 +456,55 @@ __constant__ float LoG_corr[64]={ // modify if needed high-pass filter before co
1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f,
1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f
};
/*
__constant__ int pairs[6][2]={
{0, 1},
{2, 3},
{0, 2},
{1, 3},
{0, 3},
{2, 1}};
*/
__constant__ int alphaIndex[16] = {0, 1, 2, 5, 3, 0, 6, 0, 4, 7, 0, 0, 8, 0, 0, 0};
__constant__ float alphaFade[9][256] = {
{1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f},
{0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f,
0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f,
0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f,
0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f,
0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f,
0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f,
0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f,
0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f,
0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f,
0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f,
0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f,
0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f,
0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f,
0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f},
{0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f},
{0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f,
0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f, 0.002408f,
0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f,
0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f, 0.021530f,
0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f,
0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f, 0.059039f,
0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f,
0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.113495f,
0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f,
0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f,
0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f,
0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f,
0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f,
0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f},
{0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f},
{0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.997592f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.978470f, 0.997592f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.735698f, 0.735698f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.645142f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.059039f, 0.059039f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.021530f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f},
{0.002408f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.021530f, 0.021530f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.059039f, 0.059039f, 0.059039f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.113495f, 0.113495f, 0.113495f, 0.113495f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.549009f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.645142f, 0.645142f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.735698f, 0.735698f, 0.735698f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.940961f, 0.978470f, 0.997592f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.978470f, 0.997592f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.997592f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f},
{0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.997592f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.997592f, 0.978470f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.735698f, 0.735698f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.645142f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.113495f, 0.113495f, 0.113495f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.059039f, 0.059039f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.021530f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f},
{0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.002408f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.021530f, 0.021530f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.059039f, 0.059039f, 0.059039f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.113495f, 0.113495f, 0.113495f, 0.113495f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.182803f, 0.182803f, 0.182803f, 0.182803f, 0.182803f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f, 0.264302f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f, 0.354858f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f, 0.450991f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.549009f,
0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f, 0.549009f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.645142f, 0.645142f,
0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f, 0.645142f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.735698f, 0.735698f, 0.735698f,
0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f, 0.735698f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f, 0.817197f,
0.997592f, 0.978470f, 0.940961f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f, 0.886505f,
0.997592f, 0.978470f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f, 0.940961f,
0.997592f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f, 0.978470f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f,
0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f, 0.997592f}};
__constant__ float textureBlend[8][32] = {
{0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f,
0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f,
0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f,
0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.003867f, 0.033913f, 0.089431f, 0.161970f,
0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.002135f, 0.018725f, 0.049379f, 0.089431f,
0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000810f, 0.007101f, 0.018725f, 0.033913f,
0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000092f, 0.000810f, 0.002135f, 0.003867f},
{0.009607f, 0.084265f, 0.222215f, 0.402455f,
0.009607f, 0.084265f, 0.222215f, 0.402455f,
0.009607f, 0.084265f, 0.222215f, 0.402455f,
0.009607f, 0.084265f, 0.222215f, 0.402455f,
0.009607f, 0.084265f, 0.222215f, 0.402455f,
0.009607f, 0.084265f, 0.222215f, 0.402455f,
0.009607f, 0.084265f, 0.222215f, 0.402455f,
0.009607f, 0.084265f, 0.222215f, 0.402455f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.000092f, 0.000810f, 0.002135f, 0.003867f,
0.000810f, 0.007101f, 0.018725f, 0.033913f,
0.002135f, 0.018725f, 0.049379f, 0.089431f,
0.003867f, 0.033913f, 0.089431f, 0.161970f},
{0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f, 0.009607f,
0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f, 0.084265f,
0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f, 0.222215f,
0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f, 0.402455f},
{0.003867f, 0.002135f, 0.000810f, 0.000092f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.033913f, 0.018725f, 0.007101f, 0.000810f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.089431f, 0.049379f, 0.018725f, 0.002135f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.161970f, 0.089431f, 0.033913f, 0.003867f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.402455f, 0.222215f, 0.084265f, 0.009607f,
0.402455f, 0.222215f, 0.084265f, 0.009607f,
0.402455f, 0.222215f, 0.084265f, 0.009607f,
0.402455f, 0.222215f, 0.084265f, 0.009607f,
0.402455f, 0.222215f, 0.084265f, 0.009607f,
0.402455f, 0.222215f, 0.084265f, 0.009607f,
0.402455f, 0.222215f, 0.084265f, 0.009607f,
0.402455f, 0.222215f, 0.084265f, 0.009607f},
{0.161970f, 0.089431f, 0.033913f, 0.003867f,
0.089431f, 0.049379f, 0.018725f, 0.002135f,
0.033913f, 0.018725f, 0.007101f, 0.000810f,
0.003867f, 0.002135f, 0.000810f, 0.000092f,
0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.000000f, 0.000000f, 0.000000f, 0.000000f,
0.000000f, 0.000000f, 0.000000f, 0.000000f}};
__constant__ int pairs_offsets[]= {0, 0, 0, 1, 4, 10, 20, 35, 56, 84, 120, 165, 220, 286, 364, 455, 560, 680};
// {pair_start, pair_end, pair_length}
......@@ -2298,7 +2042,6 @@ extern "C" __global__ void generate_RBGA(
mark_texture_tiles <<<blocks,threads>>>(
num_cams, // int num_cams,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
/// gpu_tasks,
num_tiles, // number of tiles in task list
width, // number of tiles in a row
gpu_texture_indices); // packed tile + bits (now only (1 << 7)
......@@ -2311,7 +2054,6 @@ extern "C" __global__ void generate_RBGA(
mark_texture_neighbor_tiles <<<blocks,threads>>>(
num_cams, // int num_cams,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// gpu_tasks,
num_tiles, // number of tiles in task list
width, // number of tiles in a row
height, // number of tiles rows
......@@ -2332,7 +2074,6 @@ extern "C" __global__ void generate_RBGA(
gen_texture_list <<<blocks,threads>>>(
num_cams, // int num_cams,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// gpu_tasks,
num_tiles, // number of tiles in task list
width, // number of tiles in a row
height, // int height, // number of tiles rows
......@@ -2603,9 +2344,12 @@ __global__ void mark_texture_tiles(
/// int task = gpu_tasks[task_num].task;
int task = get_task_task(task_num, gpu_ftasks, num_cams);
if (!(task & TASK_TEXTURE_BITS)){ // here any bit in TASK_TEXTURE_BITS is sufficient
// if (!(task & TASK_TEXTURE_BITS)){ // here any bit in TASK_TEXTURE_BITS is sufficient
if (!(task & (1 << TASK_TEXT_EN))){ // here any bit in TASK_TEXTURE_BITS is sufficient
if (!task) {// temporary disabling
return; // NOP tile
}
}
/// int cxy = gpu_tasks[task_num].txy;
int cxy = get_task_txy(task_num, gpu_ftasks, num_cams);
......@@ -2614,7 +2358,7 @@ __global__ void mark_texture_tiles(
/**
* Helper kernel for prepare_texture_list() (for generate_RBGA) - calculate and save
* bitmap of available neighbors in 4 directions (needed for alpha generation of
* bitmap of available neighbors in 4->8 directions (needed for alpha generation of
* the result textures to fade along the border.
*
* @param num_cams number of cameras
......@@ -2642,9 +2386,12 @@ __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__?
}
int task = get_task_task(task_num, gpu_ftasks, num_cams);
if (!(task & TASK_TEXTURE_BITS)){ // here any bit in TASK_TEXTURE_BITS is sufficient
// if (!(task & TASK_TEXTURE_BITS)){ // here any bit in TASK_TEXTURE_BITS is sufficient
if (!(task & (1 << TASK_TEXT_EN))){ // here any bit in TASK_TEXTURE_BITS is sufficient
if (!task) {// temporary disabling
return; // NOP tile
}
}
int cxy = get_task_txy(task_num, gpu_ftasks, num_cams);
int x = (cxy & 0xffff);
......@@ -2654,13 +2401,17 @@ __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__?
atomicMax(woi+2, x);
atomicMax(woi+3, y);
int d = 0;
if ((y > 0) && *(gpu_texture_indices + x + (y - 1) * width)) d |= (1 << TASK_TEXTURE_N_BIT);
if ((x < (width - 1)) && *(gpu_texture_indices + (x + 1) + y * width)) d |= (1 << TASK_TEXTURE_E_BIT);
if ((y < (height - 1)) && *(gpu_texture_indices + x + (y + 1) * width)) d |= (1 << TASK_TEXTURE_S_BIT);
if ((x > 0) && *(gpu_texture_indices + (x - 1) + y * width)) d |= (1 << TASK_TEXTURE_W_BIT);
if ((y > 0) && *(gpu_texture_indices + x + (y - 1) * width)) d |= (1 << TASK_TEXT_N_BIT);
if ((y > 0) && (x < (width - 1)) && *(gpu_texture_indices + (x + 1) + (y - 1) * width)) d |= (1 << TASK_TEXT_NE_BIT);
if ( (x < (width - 1)) && *(gpu_texture_indices + (x + 1) + y * width)) d |= (1 << TASK_TEXT_E_BIT);
if ((y < (height - 1)) && (x < (width - 1)) && *(gpu_texture_indices + (x + 1) + (y + 1) * width)) d |= (1 << TASK_TEXT_SE_BIT);
if ((y < (height - 1)) && *(gpu_texture_indices + x + (y + 1) * width)) d |= (1 << TASK_TEXT_S_BIT);
if ((y < (height - 1)) && (x > 0) && *(gpu_texture_indices + (x - 1) + (y + 1) * width)) d |= (1 << TASK_TEXT_SW_BIT);
if ( (x > 0) && *(gpu_texture_indices + (x - 1) + y * width)) d |= (1 << TASK_TEXT_W_BIT);
if ((y > 0) && (x > 0) && *(gpu_texture_indices + (x - 1) + (y - 1) * width)) d |= (1 << TASK_TEXT_NW_BIT);
// Set task texture bits in global gpu_ftasks array (lower 4 bits)
/// gpu_tasks[task_num].task = ((task ^ d) & TASK_TEXTURE_BITS) ^ task;
*(int *) (gpu_ftasks + get_task_size(num_cams) * task_num) = ((task ^ d) & TASK_TEXTURE_BITS) ^ task;
*(int *) (gpu_ftasks + get_task_size(num_cams) * task_num) = ((task ^ d) & TASK_TEXTURE_BITS) ^ task; // updates task bits???
}
/**
......@@ -2697,7 +2448,7 @@ __global__ void gen_texture_list(
/// int task = gpu_tasks[task_num].task & TASK_TEXTURE_BITS;
int task = get_task_task(task_num, gpu_ftasks, num_cams);
if (!task){ // here any bit in TASK_TEXTURE_BITS is sufficient
return; // NOP tile
return; // NOP tile - any non-zero bit is sufficient
}
// int cxy = gpu_tasks[task_num].txy;
int cxy = get_task_txy(task_num, gpu_ftasks, num_cams);
......@@ -2749,8 +2500,10 @@ __global__ void gen_texture_list(
#endif // DEBUG12
// *(gpu_texture_indices + buf_offset) = task | ((x + y * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
// keep only 8 LSBs of task, use higher 24 for task number
*(gpu_texture_indices + buf_offset) = (task & ((1 << CORR_NTILE_SHIFT) -1)) | ((x + y * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
//CORR_NTILE_SHIFT
// *(gpu_texture_indices + buf_offset) = (task & ((1 << TEXT_NTILE_SHIFT) -1)) | ((x + y * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
// keep only 4 lower task bits
*(gpu_texture_indices + buf_offset) = (task & TASK_TEXTURE_BITS) | ((x + y * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
//CORR_NTILE_SHIFT // TASK_TEXTURE_BITS
}
//inline __device__ int get_task_size(int num_cams){
// return sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - num_cams);
......@@ -2817,12 +2570,16 @@ extern "C" __global__ void create_nonoverlap_list(
}
int task_task = get_task_task(num_tile, gpu_ftasks, num_cams);
/// if ((gpu_tasks[num_tile].task & TASK_TEXTURE_BITS) == 0){
if ((task_task & TASK_TEXTURE_BITS) == 0){
return; // nothing to do
// if ((task_task & TASK_TEXTURE_BITS) == 0){
if (!(task_task & (1 << TASK_TEXT_EN))){ // here any bit in TASK_TEXTURE_BITS is sufficient
if (!task_task) {// temporary disabling
return; // NOP tile
}
}
/// int cxy = gpu_tasks[num_tile].txy;
int cxy = get_task_txy(num_tile, gpu_ftasks, num_cams);
int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS;
// all texture direction bits as it is non-overlapped list (bits probably unused)
int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS;
// if (gpu_tasks[num_tile].task != 0) {
if (task_task != 0) {
nonoverlap_list[atomicAdd(pnonoverlap_length, 1)] = texture_task_code;
......@@ -2862,9 +2619,11 @@ __global__ void index_correlate(
int sel_pairs[] = {sel_pairs0, sel_pairs1, sel_pairs2, sel_pairs3};
// int task_size = get_task_size(num_cams);
int task_task =get_task_task(num_tile, gpu_ftasks, num_cams);
if (((task_task >> TASK_CORR_BITS) & 1) == 0){ // needs correlation. Maybe just check task_task != 0?
if ((task_task & ((1 << TASK_CORR_EN) | (1 << TASK_INTER_EN))) == 0){ // needs correlation. Maybe just check task_task != 0? TASK_CORR_EN
if (!task_task) { // temporary disabling
return;
}
}
int pair_list_start = pairs_offsets[num_cams];
int pair_list_len = pairs_offsets[num_cams+1] - pair_list_start;
int num_mask_words = (pair_list_len + 31) >> 5; // ceil
......@@ -2920,9 +2679,11 @@ __global__ void index_inter_correlate(
}
// int task_size = get_task_size(num_cams);
int task_task =get_task_task(num_tile, gpu_ftasks, num_cams);
if (((task_task >> TASK_CORR_BITS) & 1) == 0){ // needs correlation. Maybe just check task_task != 0?
if (((task_task >> TASK_INTER_EN) & 1) == 0){ // needs correlation. Maybe just check task_task != 0?
if (!task_task){ // temporary disabling
return;
}
}
int nb = __popc (sel_sensors); // number of non-zero bits
if (nb > 0){
int indx = atomicAdd(pnum_corr_tiles, nb+1);
......@@ -3401,10 +3162,10 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
// get number of tile
int tile_code = gpu_texture_indices[tile_indx + gpu_texture_indices_offset]; // Added for Java, no DP
if ((tile_code & (1 << CORR_TEXTURE_BIT)) == 0){
if ((tile_code & (1 << LIST_TEXTURE_BIT)) == 0){
return; // nothing to do
}
int tile_num = tile_code >> CORR_NTILE_SHIFT;
int tile_num = tile_code >> TEXT_NTILE_SHIFT;
#ifdef DEBUG7A
__syncthreads();// __syncwarp();
if ((tile_num == DBG_TILE) && (threadIdx.x == 0) && (threadIdx.y == 0)){
......@@ -3445,7 +3206,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
float * max_diff_shared = &all_shared[offsets[5]] ; // [num_cams]; // 16 = 0x10 | 4 = 0x4 | return to system memory (optionally pass null to skip calculation)
float * max_diff_tmp = &all_shared[offsets[6]] ; // [num_cams][TEXTURE_THREADS_PER_TILE]; // 16 * 8 = 0x80 | 4 * 8 = 0x20 | [4][8]
float * ports_rgb_tmp = &all_shared[offsets[7]] ; // [colors][num_cams][TEXTURE_THREADS_PER_TILE]; // 16 * 1 * 8 = 0x80 | 4 * 3 * 8 = 0x60 | [4*3][8]
float * texture_averaging = max_diff_tmp; // [NUM_THREADS] reusing, needs 32 elements for texture averaging, shared
#ifdef DBG_TILE
#ifdef DEBUG7AXX
......@@ -3684,7 +3445,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
rgbaw, // (float *) shr1.rgbaw, // float * rgba,
// if calc_extra, rbg_tile will be ignored and output generated with blurred (debayered) data. Done so as debayered data is needed
// to calculate max_diff_shared
calc_extra, // int calc_extra, // 1 - calcualate ports_rgb, max_diff
calc_extra, // | (keep_weights & 2), // int calc_extra, // 1 - calcualate ports_rgb, max_diff
ports_rgb_shared,// float ports_rgb_shared [colors][num_cams], // return to system memory (optionally pass null to skip calculation)
max_diff_shared, // float max_diff_shared [num_cams], // return to system memory (optionally pass null to skip calculation)
max_diff_tmp, // float max_diff_tmp [num_cams][TEXTURE_THREADS_PER_TILE],
......@@ -3695,7 +3456,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
min_agree, // float min_agree, NOT USED? // minimal number of channels to agree on a point (real number to work with fuzzy averages)
weights, // float * chn_weights, // color channel weights, sum == 1.0
dust_remove, // int dust_remove, // Do not reduce average weight when only one image differs much from the average
(keep_weights & 1), // int keep_weights, // return channel weights and rms after A in RGBA (weight are always calculated)
(keep_weights & 1), // | (keep_weights & 2), // int keep_weights, // return channel weights and rms after A in RGBA (weight are always calculated)
debug ); // int debug );
__syncthreads(); // _syncthreads();1
......@@ -3761,9 +3522,9 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
tile_code &= TASK_TEXTURE_BITS;
if (!tile_code){
return; // should not happen
}
//// if (!tile_code){
//// return; // should not happen
//// }
// if no extra and no overlap -> nothing remains, return
if (gpu_texture_rbg && (texture_rbg_stride != 0)) { // generate RGBA (overlapped) // keep_weights
#ifdef DEBUG7A
......@@ -3782,27 +3543,76 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
__syncthreads();// __syncwarp();
#endif // DEBUG12
int alpha_mode = alphaIndex[tile_code]; // only 4 lowest bits
if (!alpha_mode){ // only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ???
int alpha_mode = tile_code & 0xff; // alphaIndex[tile_code]; // only 4 lowest bits
if (alpha_mode != 0xff){ // only add if needed, alpha_mode == 0xff (neighbors from all 8 directions) - keep as is. FIXME: alpha_mode ???
// Calculate average value per color, need 32 shared array
for (int ncol = 0; ncol < colors; ncol++) {
int sum_index = threadIdx.x + threadIdx.y * TEXTURE_THREADS_PER_TILE; // 0.. 31
texture_averaging[sum_index] = 0;
for (int pass = 0; pass < 8; pass ++) {
int row = pass * 2 + (threadIdx.y >> 1);
int col = ((threadIdx.y & 1) << 3) + threadIdx.x;
int i = row * DTT_SIZE21 + col;
int gi = row * DTT_SIZE2 + col;
float * rgba_i = rgbaw + i;
// always copy 3 (1) colors + alpha
if (colors == 3){
texture_averaging[sum_index] += *(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21));
}
__syncthreads();
if (threadIdx.y == 0){ // combine sums
#pragma unroll
for (int ncol = 0; ncol < colors + 1; ncol++) { // 4
*(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21)) *= alphaFade[alpha_mode][gi]; // reduce [tile_code] by LUT
for (int i = 1; i < 4; i++) { // reduce sums to 8
texture_averaging[threadIdx.x] += texture_averaging[threadIdx.x + TEXTURE_THREADS_PER_TILE * i];
}
} else { // assuming colors = 1
}
__syncthreads();
if ((threadIdx.y == 0) && (threadIdx.x == 0)){ // combine sums
#pragma unroll
for (int ncol = 0; ncol < 1 + 1; ncol++) { // 2
*(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21)) *= alphaFade[alpha_mode][gi]; // reduce [tile_code] by LUT
for (int i = 1; i < TEXTURE_THREADS_PER_TILE; i++) { // reduce sums to 8
texture_averaging[0] += texture_averaging[i];
}
texture_averaging[0] /= 64; // average value for uniform field
}
__syncthreads();
float avg_val = texture_averaging[0];
// now add scale average value for each missing direction
for (int idir = 0; idir < 8; idir ++) if ((alpha_mode & (1 << idir)) == 0) { // no tile in this direction
/* */
int row, col;
switch (idir >> 1) {
case 0:
row = 4 + threadIdx.y;
col = 4 + threadIdx.x;
break;
case 1:
row = 4 + (threadIdx.x >> 2) + (threadIdx.y << 1);
col = 8 + (threadIdx.x & 3);
break;
case 2:
row = 8 + threadIdx.y;
col = 4 + threadIdx.x;
break;
case 3:
row = 4 + (threadIdx.x >> 2) + (threadIdx.y << 1);
col = 4 + (threadIdx.x & 3);
break;
}
int i = row * DTT_SIZE21 + col;
float * rgba_i = rgbaw + i;
// always copy 3 (1) colors + alpha
*(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21)) += textureBlend[idir][(threadIdx.y <<3) + threadIdx.x] * avg_val;
/*
for (int pass = 0; pass < 8; pass ++) {
int row1 = pass * 2 + (threadIdx.y >> 1);
int col1 = ((threadIdx.y & 1) << 3) + threadIdx.x;
int i = row1 * DTT_SIZE21 + col1;
int gi = row1 * DTT_SIZE2 + col1;
float * rgba_i = rgbaw + i;
// always copy 3 (1) colors + alpha
*(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21)) += textureBlend[idir][gi] * avg_val;
}
*/
}
}
// __syncthreads();
}
int slice_stride = texture_rbg_stride * (*(woi + 3) + 1) * DTT_SIZE; // offset to the next color
int tileY = tile_num / tilesx; // TILES-X; // slow, but 1 per tile
......@@ -3826,6 +3636,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
__syncthreads();// __syncwarp();
#endif // DEBUG12
// copy textures to global memory
for (int pass = 0; pass < 8; pass ++) {
int row = pass * 2 + (threadIdx.y >> 1); // row inside a tile (0..15)
int col = ((threadIdx.y & 1) << 3) + threadIdx.x; // column inside a tile (0..15)
......@@ -3843,10 +3654,6 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
__syncthreads();// __syncwarp();
#endif // DEBUG12
/// if (!border_tile ||
/// ((g_row >= 0) && (g_col >= 0) && (g_row < (DTT_SIZE * TILES-Y)) && (g_col < (DTT_SIZE * TILES-X)))){
/// ((g_row >= 0) && (g_col >= 0) && (g_row < height) && (g_col < (DTT_SIZE * TILES-X)))){
// always copy 3 (1) colors + alpha
if (colors == 3){
#pragma unroll
......@@ -3860,21 +3667,74 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
}
}
// generate and copy per-sensor texture
if (keep_weights & 2){ // copy individual sensors output
for (int ncam = 0; ncam < num_cams; ncam++) {
float * mclt_dst_ncam = mclt_debayer + (ncam * colors ) * (MCLT_UNION_LEN);
if (!alpha_mode){ // only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ???
//if (alpha_mode){ // only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ???
if (alpha_mode != 0xff){
for (int ncol = 0; ncol < colors; ncol++) {
// calculate average value for blending
int sum_index = threadIdx.x + threadIdx.y * TEXTURE_THREADS_PER_TILE; // 0.. 31
texture_averaging[sum_index] = 0;
for (int pass = 0; pass < 8; pass ++) {
int row = pass * 2 + (threadIdx.y >> 1);
int col = ((threadIdx.y & 1) << 3) + threadIdx.x;
int i = row * DTT_SIZE21 + col;
int gi = row * DTT_SIZE2 + col;
float * rgba_i = rgbaw + i;
texture_averaging[sum_index] += *(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21));
}
__syncthreads();
if (threadIdx.y == 0){ // combine sums
#pragma unroll
for (int i = 1; i < 4; i++) { // reduce sums to 8
texture_averaging[threadIdx.x] += texture_averaging[threadIdx.x + TEXTURE_THREADS_PER_TILE * i];
}
}
__syncthreads();
if ((threadIdx.y == 0) && (threadIdx.x == 0)){ // combine sums
#pragma unroll
for (int i = 1; i < TEXTURE_THREADS_PER_TILE; i++) { // reduce sums to 8
texture_averaging[0] += texture_averaging[i];
}
texture_averaging[0] /= 64; // average value for uniform field
}
__syncthreads();
float avg_val = texture_averaging[0];
// Possible to re-use ports_rgb_shared[], if needed (change to (calc_extra | (keep_weights & 2) in tile_combine_rgba()).
// Now using averaging here (less noise if averaging sensor outside).
// float avg_val = ports_rgb_shared[ncol * num_cams + ncam]; // texture_averaging[0];
for (int idir = 0; idir < 8; idir ++) if ((alpha_mode & (1 << idir)) == 0) { // no tile in this direction
/* */
int row, col;
switch (idir >> 1) {
case 0:
row = 4 + threadIdx.y;
col = 4 + threadIdx.x;
break;
case 1:
row = 4 + (threadIdx.x >> 2) + (threadIdx.y << 1);
col = 8 + (threadIdx.x & 3);
break;
case 2:
row = 8 + threadIdx.y;
col = 4 + threadIdx.x;
break;
case 3:
row = 4 + (threadIdx.x >> 2) + (threadIdx.y << 1);
col = 4 + (threadIdx.x & 3);
break;
}
int i = row * DTT_SIZE21 + col;
float * mclt_dst_i = mclt_dst_ncam + i;
for (int ncol = 0; ncol < colors; ncol++) {
*(mclt_dst_i + ncol * (MCLT_UNION_LEN)) *= alphaFade[alpha_mode][gi]; // reduce [tile_code] by LUT
int gi = (threadIdx.y <<3) + threadIdx.x;
*(mclt_dst_i + ncol * (MCLT_UNION_LEN)) += textureBlend[idir][gi] * avg_val;
}
__syncthreads(); // needed?
}
}
for (int pass = 0; pass < 8; pass ++) {
int row = pass * 2 + (threadIdx.y >> 1); // row inside a tile (0..15)
int col = ((threadIdx.y & 1) << 3) + threadIdx.x; // column inside a tile (0..15)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment