Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
0b00f0ae
Commit
0b00f0ae
authored
Nov 21, 2022
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
updated GPU code with new texture boundaries
parent
95e514a4
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
281 additions
and
424 deletions
+281
-424
TexturedModel.java
...n/java/com/elphel/imagej/tileprocessor/TexturedModel.java
+1
-4
TileProcessor.cuh
src/main/resources/kernels/TileProcessor.cuh
+280
-420
No files found.
src/main/java/com/elphel/imagej/tileprocessor/TexturedModel.java
View file @
0b00f0ae
...
@@ -1607,9 +1607,6 @@ public class TexturedModel {
...
@@ -1607,9 +1607,6 @@ public class TexturedModel {
}
}
// only trim if nothing obscures this and has some BG
// only trim if nothing obscures this and has some BG
if
(
is_fg_tile
[
fnslice
][
tile
]
&&
has_bg_tile
[
fnslice
][
tile
])
{
if
(
is_fg_tile
[
fnslice
][
tile
]
&&
has_bg_tile
[
fnslice
][
tile
])
{
if
(
dbg_is_fg
!=
null
)
{
dbg_is_fg
[
cindx
]
=
1000.0
;
}
if
(
vars_inter
[
cindx
]
>
fg_max_inter
)
{
if
(
vars_inter
[
cindx
]
>
fg_max_inter
)
{
dirs_avg
[
cindx
]
=
Double
.
NaN
;
dirs_avg
[
cindx
]
=
Double
.
NaN
;
}
}
...
@@ -2246,7 +2243,7 @@ public class TexturedModel {
...
@@ -2246,7 +2243,7 @@ public class TexturedModel {
tilesX
,
// final int tilesX,
tilesX
,
// final int tilesX,
slice_disparities
,
// final double [][] slice_disparities,
slice_disparities
,
// final double [][] slice_disparities,
sensor_textures
,
// final double [][] sensor_texture, // per-sensor texture value
sensor_textures
,
// final double [][] sensor_texture, // per-sensor texture value
combo_textures
,
// null, // final double [] combo_texture_in, // average texture value
null
,
//
combo_textures, // null, // final double [] combo_texture_in, // average texture value
ref_scene
.
getImageName
());
// final String dbg_prefix);
ref_scene
.
getImageName
());
// final String dbg_prefix);
...
...
src/main/resources/kernels/TileProcessor.cuh
View file @
0b00f0ae
...
@@ -53,7 +53,11 @@ nofast
...
@@ -53,7 +53,11 @@ nofast
GPU run time =523.451927ms, (direct conversion: 24.080189999999998ms, imclt: 17.090526999999998ms), corr2D: 30.623282999999997ms), textures: 231.154339ms, RGBA: 220.503017ms
GPU run time =523.451927ms, (direct conversion: 24.080189999999998ms, imclt: 17.090526999999998ms), corr2D: 30.623282999999997ms), textures: 231.154339ms, RGBA: 220.503017ms
*/
*/
#define TASK_TEXTURE_BITS ((1 << TASK_TEXTURE_N_BIT) | (1 << TASK_TEXTURE_E_BIT) | (1 << TASK_TEXTURE_S_BIT) | (1 << TASK_TEXTURE_W_BIT))
//#define TASK_TEXTURE_BITS ((1 << TASK_TEXTURE_N_BIT) | (1 << TASK_TEXTURE_E_BIT) | (1 << TASK_TEXTURE_S_BIT) | (1 << TASK_TEXTURE_W_BIT))
#define TASK_TEXTURE_BITS ((1 << TASK_TEXT_N_BIT) | (1 << TASK_TEXT_NE_BIT) | (1 << TASK_TEXT_E_BIT) | (1 << TASK_TEXT_SE_BIT)\
| (1 << TASK_TEXT_S_BIT) | (1 << TASK_TEXT_SW_BIT) | (1 << TASK_TEXT_W_BIT) | (1 << TASK_TEXT_NW_BIT))
//#define IMCLT14
//#define IMCLT14
//#define NOICLT 1
//#define NOICLT 1
...
@@ -244,83 +248,74 @@ def set_imclt_sa(stride=9):
...
@@ -244,83 +248,74 @@ def set_imclt_sa(stride=9):
#!/usr/bin/env python3
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import numpy as np # math
import numpy as np # math
def print
AlphaFade
(transform_size):
def print
TextureBlend
(transform_size):
ts2 = 2 * transform_size
ts2 = 2 * transform_size
ts2m1 = ts2-1
ts2m1 = ts2-1
alpha
Fade = np.zeros(shape=(9,ts2*ts2), dtype=float) # double [][] alphaFade = new double[16][ts2*ts2];
alpha
bBlend = np.zeros(shape=(8,ts2*ts2), dtype=float) #
alphaIndex = np.zeros(shape=(16,), dtype=in
t)
blend1d = np.zeros(shape=(ts2,), dtype=floa
t)
fade1d = np.zeros(shape=(16,), dtype=float) # double [] fade1d = new double [ts2];
dirBlend = ((0,1),(-1,1),(-1,0),(-1,-1),(0,-1),(1,-1),(1,0),(1,1))
for i in range (t
s2
):
for i in range (t
ransform_size
):
fade1d[i] = 0.5 * (1.0 - np.cos(np.pi * (i +0.5) /ts2
))
blend1d[i] = 0.5 * (1.0 + np.cos(np.pi * (i +0.5) /transform_size
))
for i in range (ts2):
for i in range (ts2):
for j in range (ts2):
for j in range (ts2):
indx = i * ts2 + j
indx = i * ts2 + j
for m in range (16):
for m, dir in enumerate(dirBlend):
# if m == 0:
a = 1.0
# alphaFade[m][indx] = 0
if dir[0] > 0:
if m == 1: # 0
a *= blend1d[j]
alphaIndex[m] = 1
elif dir[0]<0:
alphaFade[alphaIndex[m]][indx] = fade1d[ts2m1 - i]
a *= blend1d[ts2m1 - j]
elif m == 2:
if dir[1] > 0:
alphaIndex[m] = 2
a *= blend1d[i]
alphaFade[alphaIndex[m]][indx] = fade1d[j]
elif dir[1]<0:
elif m == 4:
a *= blend1d[ts2m1 - i]
alphaIndex[m] = 3
alphabBlend[m][indx] = a;
alphaFade[alphaIndex[m]][indx] = fade1d[i]
elif m == 8:
floats_in_line0=8 # 16 #8
alphaIndex[m] = 4
segment_len = transform_size*transform_size//2
alphaFade[alphaIndex[m]][indx] = fade1d[ts2m1 - j]
print("__constant__ float textureBlend[8][%d] = {"%(segment_len)) #32
elif m == 3:
# only for transform_size == 8
alphaIndex[m] = 5
for m, blend in enumerate (alphabBlend):
alphaFade[alphaIndex[m]][indx] = (fade1d[ts2m1 - i],fade1d[j])[j > ts2m1 - i]
elif m == 6:
for i in range (segment_len):
alphaIndex[m] = 6
if m in (0,1):
alphaFade[alphaIndex[m]][indx] = (fade1d[i],fade1d[j])[j > i]
x = 4 + (i % 8)
elif m == 9:
y = 4 + (i // 8)
alphaIndex[m] = 7
elif m in (2,3):
alphaFade[alphaIndex[m]][indx] = (fade1d[ts2m1 - j],fade1d[ts2m1 - i])[j > i]
x = 8 + (i % 4)
elif m == 12:
y = 4 + (i // 4)
alphaIndex[m] = 8
elif m in (4,5):
alphaFade[alphaIndex[m]][indx] = (fade1d[ts2m1 - j],fade1d[i])[i > ts2m1 - j]
x = 4 + (i % 8)
else:
y = 8 + (i // 8)
alphaIndex[m] = 0
elif m in (6,7):
alphaFade[alphaIndex[m]][indx] = 1.0
x = 4 + (i % 4)
y = 4 + (i // 4)
floats_in_line=8
indx = x + 16 * y
print("__constant__ int alphaIndex[16] = {")
floats_in_line = floats_in_line0
for m in range (16):
if ((m >>1) & 1) !=0:
if ((m % floats_in_line) == 0):
floats_in_line = floats_in_line0 // 2
print("\n ",end="")
else:
print(" ",end="")
print("%d"%(alphaIndex[m]), end ="")
if (m < (16-1)):
print(",",end="")
print("};")
print("__constant__ float alphaFade[9][%d] = {"%(ts2*ts2))
for m in range (9):
for i in range (ts2 * ts2):
if ((i % floats_in_line) == 0):
if ((i % floats_in_line) == 0):
print(" ",end="")
print(" ",end="")
if (i == 0) :
if (i == 0) :
print("{",end="")
print("{",end="")
else:
else:
print(" ",end="")
print(" ",end="")
print("%ff"%(
alphaFade[m][i
]), end ="")
print("%ff"%(
blend[indx
]), end ="")
if (((i + 1) % floats_in_line) == 0):
if (((i + 1) % floats_in_line) == 0):
if (i == (
ts2 * ts2
-1)):
if (i == (
segment_len
-1)):
print("}",end="")
print("}",end="")
else:
else:
print(",")
print(",")
else:
else:
print(", ",end="")
print(", ",end="")
if (m ==
(9-1)
):
if (m ==
len(alphabBlend)-1
):
print("};")
print("};")
else:
else:
print(",")
print(",")
printAlphaFade(8)
printTextureBlend(8)
"""
"""
Set up correlation pairs - run:
Set up correlation pairs - run:
setup_pairs(0,16)
setup_pairs(0,16)
...
@@ -458,309 +453,58 @@ __constant__ float LoG_corr[64]={ // modify if needed high-pass filter before co
...
@@ -458,309 +453,58 @@ __constant__ float LoG_corr[64]={ // modify if needed high-pass filter before co
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
,
1.00000000
f
};
};
/*
__constant__
float
textureBlend
[
8
][
32
]
=
{
__constant__ int pairs[6][2]={
{
0.402455
f
,
0.402455
f
,
0.402455
f
,
0.402455
f
,
0.402455
f
,
0.402455
f
,
0.402455
f
,
0.402455
f
,
{0, 1},
0.222215
f
,
0.222215
f
,
0.222215
f
,
0.222215
f
,
0.222215
f
,
0.222215
f
,
0.222215
f
,
0.222215
f
,
{2, 3},
0.084265
f
,
0.084265
f
,
0.084265
f
,
0.084265
f
,
0.084265
f
,
0.084265
f
,
0.084265
f
,
0.084265
f
,
{0, 2},
0.009607
f
,
0.009607
f
,
0.009607
f
,
0.009607
f
,
0.009607
f
,
0.009607
f
,
0.009607
f
,
0.009607
f
},
{1, 3},
{
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.003867
f
,
0.033913
f
,
0.089431
f
,
0.161970
f
,
{0, 3},
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.002135
f
,
0.018725
f
,
0.049379
f
,
0.089431
f
,
{2, 1}};
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000810
f
,
0.007101
f
,
0.018725
f
,
0.033913
f
,
*/
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000092
f
,
0.000810
f
,
0.002135
f
,
0.003867
f
},
__constant__
int
alphaIndex
[
16
]
=
{
0
,
1
,
2
,
5
,
3
,
0
,
6
,
0
,
4
,
7
,
0
,
0
,
8
,
0
,
0
,
0
};
{
0.009607
f
,
0.084265
f
,
0.222215
f
,
0.402455
f
,
0.009607
f
,
0.084265
f
,
0.222215
f
,
0.402455
f
,
__constant__
float
alphaFade
[
9
][
256
]
=
{
0.009607
f
,
0.084265
f
,
0.222215
f
,
0.402455
f
,
{
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.009607
f
,
0.084265
f
,
0.222215
f
,
0.402455
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.009607
f
,
0.084265
f
,
0.222215
f
,
0.402455
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.009607
f
,
0.084265
f
,
0.222215
f
,
0.402455
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.009607
f
,
0.084265
f
,
0.222215
f
,
0.402455
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.009607
f
,
0.084265
f
,
0.222215
f
,
0.402455
f
},
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
{
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.000092
f
,
0.000810
f
,
0.002135
f
,
0.003867
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.000810
f
,
0.007101
f
,
0.018725
f
,
0.033913
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.002135
f
,
0.018725
f
,
0.049379
f
,
0.089431
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.003867
f
,
0.033913
f
,
0.089431
f
,
0.161970
f
},
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
{
0.009607
f
,
0.009607
f
,
0.009607
f
,
0.009607
f
,
0.009607
f
,
0.009607
f
,
0.009607
f
,
0.009607
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.084265
f
,
0.084265
f
,
0.084265
f
,
0.084265
f
,
0.084265
f
,
0.084265
f
,
0.084265
f
,
0.084265
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.222215
f
,
0.222215
f
,
0.222215
f
,
0.222215
f
,
0.222215
f
,
0.222215
f
,
0.222215
f
,
0.222215
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.402455
f
,
0.402455
f
,
0.402455
f
,
0.402455
f
,
0.402455
f
,
0.402455
f
,
0.402455
f
,
0.402455
f
},
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
{
0.003867
f
,
0.002135
f
,
0.000810
f
,
0.000092
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.033913
f
,
0.018725
f
,
0.007101
f
,
0.000810
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.089431
f
,
0.049379
f
,
0.018725
f
,
0.002135
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.161970
f
,
0.089431
f
,
0.033913
f
,
0.003867
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
},
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
{
0.402455
f
,
0.222215
f
,
0.084265
f
,
0.009607
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.402455
f
,
0.222215
f
,
0.084265
f
,
0.009607
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.402455
f
,
0.222215
f
,
0.084265
f
,
0.009607
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.402455
f
,
0.222215
f
,
0.084265
f
,
0.009607
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.402455
f
,
0.222215
f
,
0.084265
f
,
0.009607
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.402455
f
,
0.222215
f
,
0.084265
f
,
0.009607
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.402455
f
,
0.222215
f
,
0.084265
f
,
0.009607
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.402455
f
,
0.222215
f
,
0.084265
f
,
0.009607
f
},
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
{
0.161970
f
,
0.089431
f
,
0.033913
f
,
0.003867
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
0.089431
f
,
0.049379
f
,
0.018725
f
,
0.002135
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
,
1.000000
f
},
0.033913
f
,
0.018725
f
,
0.007101
f
,
0.000810
f
,
{
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.003867
f
,
0.002135
f
,
0.000810
f
,
0.000092
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
,
0.000000
f
}};
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
},
{
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
},
{
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.002408
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.021530
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
},
{
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
},
{
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.997592
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.021530
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
},
{
0.002408
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.021530
f
,
0.021530
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.978470
f
,
0.997592
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
},
{
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.021530
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
},
{
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.002408
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.021530
f
,
0.021530
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.059039
f
,
0.059039
f
,
0.059039
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.113495
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.182803
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.264302
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.354858
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.450991
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.549009
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.645142
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.735698
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.817197
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.886505
f
,
0.997592
f
,
0.978470
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.940961
f
,
0.997592
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.978470
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
,
0.997592
f
}};
__constant__
int
pairs_offsets
[]
=
{
0
,
0
,
0
,
1
,
4
,
10
,
20
,
35
,
56
,
84
,
120
,
165
,
220
,
286
,
364
,
455
,
560
,
680
};
__constant__
int
pairs_offsets
[]
=
{
0
,
0
,
0
,
1
,
4
,
10
,
20
,
35
,
56
,
84
,
120
,
165
,
220
,
286
,
364
,
455
,
560
,
680
};
// {pair_start, pair_end, pair_length}
// {pair_start, pair_end, pair_length}
...
@@ -2298,7 +2042,6 @@ extern "C" __global__ void generate_RBGA(
...
@@ -2298,7 +2042,6 @@ extern "C" __global__ void generate_RBGA(
mark_texture_tiles
<<<
blocks
,
threads
>>>
(
mark_texture_tiles
<<<
blocks
,
threads
>>>
(
num_cams
,
// int num_cams,
num_cams
,
// int num_cams,
gpu_ftasks
,
// float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
gpu_ftasks
,
// float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
/// gpu_tasks,
num_tiles
,
// number of tiles in task list
num_tiles
,
// number of tiles in task list
width
,
// number of tiles in a row
width
,
// number of tiles in a row
gpu_texture_indices
);
// packed tile + bits (now only (1 << 7)
gpu_texture_indices
);
// packed tile + bits (now only (1 << 7)
...
@@ -2311,7 +2054,6 @@ extern "C" __global__ void generate_RBGA(
...
@@ -2311,7 +2054,6 @@ extern "C" __global__ void generate_RBGA(
mark_texture_neighbor_tiles
<<<
blocks
,
threads
>>>
(
mark_texture_neighbor_tiles
<<<
blocks
,
threads
>>>
(
num_cams
,
// int num_cams,
num_cams
,
// int num_cams,
gpu_ftasks
,
// float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
gpu_ftasks
,
// float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// gpu_tasks,
num_tiles
,
// number of tiles in task list
num_tiles
,
// number of tiles in task list
width
,
// number of tiles in a row
width
,
// number of tiles in a row
height
,
// number of tiles rows
height
,
// number of tiles rows
...
@@ -2332,7 +2074,6 @@ extern "C" __global__ void generate_RBGA(
...
@@ -2332,7 +2074,6 @@ extern "C" __global__ void generate_RBGA(
gen_texture_list
<<<
blocks
,
threads
>>>
(
gen_texture_list
<<<
blocks
,
threads
>>>
(
num_cams
,
// int num_cams,
num_cams
,
// int num_cams,
gpu_ftasks
,
// float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
gpu_ftasks
,
// float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// gpu_tasks,
num_tiles
,
// number of tiles in task list
num_tiles
,
// number of tiles in task list
width
,
// number of tiles in a row
width
,
// number of tiles in a row
height
,
// int height, // number of tiles rows
height
,
// int height, // number of tiles rows
...
@@ -2603,8 +2344,11 @@ __global__ void mark_texture_tiles(
...
@@ -2603,8 +2344,11 @@ __global__ void mark_texture_tiles(
/// int task = gpu_tasks[task_num].task;
/// int task = gpu_tasks[task_num].task;
int
task
=
get_task_task
(
task_num
,
gpu_ftasks
,
num_cams
);
int
task
=
get_task_task
(
task_num
,
gpu_ftasks
,
num_cams
);
if
(
!
(
task
&
TASK_TEXTURE_BITS
)){
// here any bit in TASK_TEXTURE_BITS is sufficient
// if (!(task & TASK_TEXTURE_BITS)){ // here any bit in TASK_TEXTURE_BITS is sufficient
return
;
// NOP tile
if
(
!
(
task
&
(
1
<<
TASK_TEXT_EN
))){
// here any bit in TASK_TEXTURE_BITS is sufficient
if
(
!
task
)
{
// temporary disabling
return
;
// NOP tile
}
}
}
/// int cxy = gpu_tasks[task_num].txy;
/// int cxy = gpu_tasks[task_num].txy;
int
cxy
=
get_task_txy
(
task_num
,
gpu_ftasks
,
num_cams
);
int
cxy
=
get_task_txy
(
task_num
,
gpu_ftasks
,
num_cams
);
...
@@ -2614,7 +2358,7 @@ __global__ void mark_texture_tiles(
...
@@ -2614,7 +2358,7 @@ __global__ void mark_texture_tiles(
/**
/**
* Helper kernel for prepare_texture_list() (for generate_RBGA) - calculate and save
* Helper kernel for prepare_texture_list() (for generate_RBGA) - calculate and save
* bitmap of available neighbors in 4 directions (needed for alpha generation of
* bitmap of available neighbors in 4
->8
directions (needed for alpha generation of
* the result textures to fade along the border.
* the result textures to fade along the border.
*
*
* @param num_cams number of cameras
* @param num_cams number of cameras
...
@@ -2642,8 +2386,11 @@ __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__?
...
@@ -2642,8 +2386,11 @@ __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__?
}
}
int
task
=
get_task_task
(
task_num
,
gpu_ftasks
,
num_cams
);
int
task
=
get_task_task
(
task_num
,
gpu_ftasks
,
num_cams
);
if
(
!
(
task
&
TASK_TEXTURE_BITS
)){
// here any bit in TASK_TEXTURE_BITS is sufficient
// if (!(task & TASK_TEXTURE_BITS)){ // here any bit in TASK_TEXTURE_BITS is sufficient
return
;
// NOP tile
if
(
!
(
task
&
(
1
<<
TASK_TEXT_EN
))){
// here any bit in TASK_TEXTURE_BITS is sufficient
if
(
!
task
)
{
// temporary disabling
return
;
// NOP tile
}
}
}
int
cxy
=
get_task_txy
(
task_num
,
gpu_ftasks
,
num_cams
);
int
cxy
=
get_task_txy
(
task_num
,
gpu_ftasks
,
num_cams
);
...
@@ -2654,13 +2401,17 @@ __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__?
...
@@ -2654,13 +2401,17 @@ __global__ void mark_texture_neighbor_tiles( // TODO: remove __global__?
atomicMax
(
woi
+
2
,
x
);
atomicMax
(
woi
+
2
,
x
);
atomicMax
(
woi
+
3
,
y
);
atomicMax
(
woi
+
3
,
y
);
int
d
=
0
;
int
d
=
0
;
if
((
y
>
0
)
&&
*
(
gpu_texture_indices
+
x
+
(
y
-
1
)
*
width
))
d
|=
(
1
<<
TASK_TEXTURE_N_BIT
);
if
((
y
>
0
)
&&
*
(
gpu_texture_indices
+
x
+
(
y
-
1
)
*
width
))
d
|=
(
1
<<
TASK_TEXT_N_BIT
);
if
((
x
<
(
width
-
1
))
&&
*
(
gpu_texture_indices
+
(
x
+
1
)
+
y
*
width
))
d
|=
(
1
<<
TASK_TEXTURE_E_BIT
);
if
((
y
>
0
)
&&
(
x
<
(
width
-
1
))
&&
*
(
gpu_texture_indices
+
(
x
+
1
)
+
(
y
-
1
)
*
width
))
d
|=
(
1
<<
TASK_TEXT_NE_BIT
);
if
((
y
<
(
height
-
1
))
&&
*
(
gpu_texture_indices
+
x
+
(
y
+
1
)
*
width
))
d
|=
(
1
<<
TASK_TEXTURE_S_BIT
);
if
(
(
x
<
(
width
-
1
))
&&
*
(
gpu_texture_indices
+
(
x
+
1
)
+
y
*
width
))
d
|=
(
1
<<
TASK_TEXT_E_BIT
);
if
((
x
>
0
)
&&
*
(
gpu_texture_indices
+
(
x
-
1
)
+
y
*
width
))
d
|=
(
1
<<
TASK_TEXTURE_W_BIT
);
if
((
y
<
(
height
-
1
))
&&
(
x
<
(
width
-
1
))
&&
*
(
gpu_texture_indices
+
(
x
+
1
)
+
(
y
+
1
)
*
width
))
d
|=
(
1
<<
TASK_TEXT_SE_BIT
);
if
((
y
<
(
height
-
1
))
&&
*
(
gpu_texture_indices
+
x
+
(
y
+
1
)
*
width
))
d
|=
(
1
<<
TASK_TEXT_S_BIT
);
if
((
y
<
(
height
-
1
))
&&
(
x
>
0
)
&&
*
(
gpu_texture_indices
+
(
x
-
1
)
+
(
y
+
1
)
*
width
))
d
|=
(
1
<<
TASK_TEXT_SW_BIT
);
if
(
(
x
>
0
)
&&
*
(
gpu_texture_indices
+
(
x
-
1
)
+
y
*
width
))
d
|=
(
1
<<
TASK_TEXT_W_BIT
);
if
((
y
>
0
)
&&
(
x
>
0
)
&&
*
(
gpu_texture_indices
+
(
x
-
1
)
+
(
y
-
1
)
*
width
))
d
|=
(
1
<<
TASK_TEXT_NW_BIT
);
// Set task texture bits in global gpu_ftasks array (lower 4 bits)
// Set task texture bits in global gpu_ftasks array (lower 4 bits)
/// gpu_tasks[task_num].task = ((task ^ d) & TASK_TEXTURE_BITS) ^ task;
/// gpu_tasks[task_num].task = ((task ^ d) & TASK_TEXTURE_BITS) ^ task;
*
(
int
*
)
(
gpu_ftasks
+
get_task_size
(
num_cams
)
*
task_num
)
=
((
task
^
d
)
&
TASK_TEXTURE_BITS
)
^
task
;
*
(
int
*
)
(
gpu_ftasks
+
get_task_size
(
num_cams
)
*
task_num
)
=
((
task
^
d
)
&
TASK_TEXTURE_BITS
)
^
task
;
// updates task bits???
}
}
/**
/**
...
@@ -2697,7 +2448,7 @@ __global__ void gen_texture_list(
...
@@ -2697,7 +2448,7 @@ __global__ void gen_texture_list(
/// int task = gpu_tasks[task_num].task & TASK_TEXTURE_BITS;
/// int task = gpu_tasks[task_num].task & TASK_TEXTURE_BITS;
int
task
=
get_task_task
(
task_num
,
gpu_ftasks
,
num_cams
);
int
task
=
get_task_task
(
task_num
,
gpu_ftasks
,
num_cams
);
if
(
!
task
){
// here any bit in TASK_TEXTURE_BITS is sufficient
if
(
!
task
){
// here any bit in TASK_TEXTURE_BITS is sufficient
return
;
// NOP tile
return
;
// NOP tile
- any non-zero bit is sufficient
}
}
// int cxy = gpu_tasks[task_num].txy;
// int cxy = gpu_tasks[task_num].txy;
int
cxy
=
get_task_txy
(
task_num
,
gpu_ftasks
,
num_cams
);
int
cxy
=
get_task_txy
(
task_num
,
gpu_ftasks
,
num_cams
);
...
@@ -2749,8 +2500,10 @@ __global__ void gen_texture_list(
...
@@ -2749,8 +2500,10 @@ __global__ void gen_texture_list(
#endif // DEBUG12
#endif // DEBUG12
// *(gpu_texture_indices + buf_offset) = task | ((x + y * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
// *(gpu_texture_indices + buf_offset) = task | ((x + y * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
// keep only 8 LSBs of task, use higher 24 for task number
// keep only 8 LSBs of task, use higher 24 for task number
*
(
gpu_texture_indices
+
buf_offset
)
=
(
task
&
((
1
<<
CORR_NTILE_SHIFT
)
-
1
))
|
((
x
+
y
*
width
)
<<
CORR_NTILE_SHIFT
)
|
(
1
<<
LIST_TEXTURE_BIT
);
// *(gpu_texture_indices + buf_offset) = (task & ((1 << TEXT_NTILE_SHIFT) -1)) | ((x + y * width) << TEXT_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT);
//CORR_NTILE_SHIFT
// keep only 4 lower task bits
*
(
gpu_texture_indices
+
buf_offset
)
=
(
task
&
TASK_TEXTURE_BITS
)
|
((
x
+
y
*
width
)
<<
TEXT_NTILE_SHIFT
)
|
(
1
<<
LIST_TEXTURE_BIT
);
//CORR_NTILE_SHIFT // TASK_TEXTURE_BITS
}
}
//inline __device__ int get_task_size(int num_cams){
//inline __device__ int get_task_size(int num_cams){
// return sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - num_cams);
// return sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - num_cams);
...
@@ -2817,12 +2570,16 @@ extern "C" __global__ void create_nonoverlap_list(
...
@@ -2817,12 +2570,16 @@ extern "C" __global__ void create_nonoverlap_list(
}
}
int
task_task
=
get_task_task
(
num_tile
,
gpu_ftasks
,
num_cams
);
int
task_task
=
get_task_task
(
num_tile
,
gpu_ftasks
,
num_cams
);
/// if ((gpu_tasks[num_tile].task & TASK_TEXTURE_BITS) == 0){
/// if ((gpu_tasks[num_tile].task & TASK_TEXTURE_BITS) == 0){
if
((
task_task
&
TASK_TEXTURE_BITS
)
==
0
){
// if ((task_task & TASK_TEXTURE_BITS) == 0){
return
;
// nothing to do
if
(
!
(
task_task
&
(
1
<<
TASK_TEXT_EN
))){
// here any bit in TASK_TEXTURE_BITS is sufficient
if
(
!
task_task
)
{
// temporary disabling
return
;
// NOP tile
}
}
}
/// int cxy = gpu_tasks[num_tile].txy;
/// int cxy = gpu_tasks[num_tile].txy;
int
cxy
=
get_task_txy
(
num_tile
,
gpu_ftasks
,
num_cams
);
int
cxy
=
get_task_txy
(
num_tile
,
gpu_ftasks
,
num_cams
);
int
texture_task_code
=
(((
cxy
&
0xffff
)
+
(
cxy
>>
16
)
*
width
)
<<
CORR_NTILE_SHIFT
)
|
(
1
<<
LIST_TEXTURE_BIT
)
|
TASK_TEXTURE_BITS
;
// all texture direction bits as it is non-overlapped list (bits probably unused)
int
texture_task_code
=
(((
cxy
&
0xffff
)
+
(
cxy
>>
16
)
*
width
)
<<
TEXT_NTILE_SHIFT
)
|
(
1
<<
LIST_TEXTURE_BIT
)
|
TASK_TEXTURE_BITS
;
// if (gpu_tasks[num_tile].task != 0) {
// if (gpu_tasks[num_tile].task != 0) {
if
(
task_task
!=
0
)
{
if
(
task_task
!=
0
)
{
nonoverlap_list
[
atomicAdd
(
pnonoverlap_length
,
1
)]
=
texture_task_code
;
nonoverlap_list
[
atomicAdd
(
pnonoverlap_length
,
1
)]
=
texture_task_code
;
...
@@ -2862,8 +2619,10 @@ __global__ void index_correlate(
...
@@ -2862,8 +2619,10 @@ __global__ void index_correlate(
int
sel_pairs
[]
=
{
sel_pairs0
,
sel_pairs1
,
sel_pairs2
,
sel_pairs3
};
int
sel_pairs
[]
=
{
sel_pairs0
,
sel_pairs1
,
sel_pairs2
,
sel_pairs3
};
// int task_size = get_task_size(num_cams);
// int task_size = get_task_size(num_cams);
int
task_task
=
get_task_task
(
num_tile
,
gpu_ftasks
,
num_cams
);
int
task_task
=
get_task_task
(
num_tile
,
gpu_ftasks
,
num_cams
);
if
(((
task_task
>>
TASK_CORR_BITS
)
&
1
)
==
0
){
// needs correlation. Maybe just check task_task != 0?
if
((
task_task
&
((
1
<<
TASK_CORR_EN
)
|
(
1
<<
TASK_INTER_EN
)))
==
0
){
// needs correlation. Maybe just check task_task != 0? TASK_CORR_EN
return
;
if
(
!
task_task
)
{
// temporary disabling
return
;
}
}
}
int
pair_list_start
=
pairs_offsets
[
num_cams
];
int
pair_list_start
=
pairs_offsets
[
num_cams
];
int
pair_list_len
=
pairs_offsets
[
num_cams
+
1
]
-
pair_list_start
;
int
pair_list_len
=
pairs_offsets
[
num_cams
+
1
]
-
pair_list_start
;
...
@@ -2920,8 +2679,10 @@ __global__ void index_inter_correlate(
...
@@ -2920,8 +2679,10 @@ __global__ void index_inter_correlate(
}
}
// int task_size = get_task_size(num_cams);
// int task_size = get_task_size(num_cams);
int
task_task
=
get_task_task
(
num_tile
,
gpu_ftasks
,
num_cams
);
int
task_task
=
get_task_task
(
num_tile
,
gpu_ftasks
,
num_cams
);
if
(((
task_task
>>
TASK_CORR_BITS
)
&
1
)
==
0
){
// needs correlation. Maybe just check task_task != 0?
if
(((
task_task
>>
TASK_INTER_EN
)
&
1
)
==
0
){
// needs correlation. Maybe just check task_task != 0?
return
;
if
(
!
task_task
){
// temporary disabling
return
;
}
}
}
int
nb
=
__popc
(
sel_sensors
);
// number of non-zero bits
int
nb
=
__popc
(
sel_sensors
);
// number of non-zero bits
if
(
nb
>
0
){
if
(
nb
>
0
){
...
@@ -3401,10 +3162,10 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3401,10 +3162,10 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
}
// get number of tile
// get number of tile
int
tile_code
=
gpu_texture_indices
[
tile_indx
+
gpu_texture_indices_offset
];
// Added for Java, no DP
int
tile_code
=
gpu_texture_indices
[
tile_indx
+
gpu_texture_indices_offset
];
// Added for Java, no DP
if
((
tile_code
&
(
1
<<
CORR
_TEXTURE_BIT
))
==
0
){
if
((
tile_code
&
(
1
<<
LIST
_TEXTURE_BIT
))
==
0
){
return
;
// nothing to do
return
;
// nothing to do
}
}
int
tile_num
=
tile_code
>>
CORR
_NTILE_SHIFT
;
int
tile_num
=
tile_code
>>
TEXT
_NTILE_SHIFT
;
#ifdef DEBUG7A
#ifdef DEBUG7A
__syncthreads
();
// __syncwarp();
__syncthreads
();
// __syncwarp();
if
((
tile_num
==
DBG_TILE
)
&&
(
threadIdx
.
x
==
0
)
&&
(
threadIdx
.
y
==
0
)){
if
((
tile_num
==
DBG_TILE
)
&&
(
threadIdx
.
x
==
0
)
&&
(
threadIdx
.
y
==
0
)){
...
@@ -3445,7 +3206,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3445,7 +3206,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
float
*
max_diff_shared
=
&
all_shared
[
offsets
[
5
]]
;
// [num_cams]; // 16 = 0x10 | 4 = 0x4 | return to system memory (optionally pass null to skip calculation)
float
*
max_diff_shared
=
&
all_shared
[
offsets
[
5
]]
;
// [num_cams]; // 16 = 0x10 | 4 = 0x4 | return to system memory (optionally pass null to skip calculation)
float
*
max_diff_tmp
=
&
all_shared
[
offsets
[
6
]]
;
// [num_cams][TEXTURE_THREADS_PER_TILE]; // 16 * 8 = 0x80 | 4 * 8 = 0x20 | [4][8]
float
*
max_diff_tmp
=
&
all_shared
[
offsets
[
6
]]
;
// [num_cams][TEXTURE_THREADS_PER_TILE]; // 16 * 8 = 0x80 | 4 * 8 = 0x20 | [4][8]
float
*
ports_rgb_tmp
=
&
all_shared
[
offsets
[
7
]]
;
// [colors][num_cams][TEXTURE_THREADS_PER_TILE]; // 16 * 1 * 8 = 0x80 | 4 * 3 * 8 = 0x60 | [4*3][8]
float
*
ports_rgb_tmp
=
&
all_shared
[
offsets
[
7
]]
;
// [colors][num_cams][TEXTURE_THREADS_PER_TILE]; // 16 * 1 * 8 = 0x80 | 4 * 3 * 8 = 0x60 | [4*3][8]
float
*
texture_averaging
=
max_diff_tmp
;
// [NUM_THREADS] reusing, needs 32 elements for texture averaging, shared
#ifdef DBG_TILE
#ifdef DBG_TILE
#ifdef DEBUG7AXX
#ifdef DEBUG7AXX
...
@@ -3684,7 +3445,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3684,7 +3445,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
rgbaw
,
// (float *) shr1.rgbaw, // float * rgba,
rgbaw
,
// (float *) shr1.rgbaw, // float * rgba,
// if calc_extra, rbg_tile will be ignored and output generated with blurred (debayered) data. Done so as debayered data is needed
// if calc_extra, rbg_tile will be ignored and output generated with blurred (debayered) data. Done so as debayered data is needed
// to calculate max_diff_shared
// to calculate max_diff_shared
calc_extra
,
// int calc_extra, // 1 - calcualate ports_rgb, max_diff
calc_extra
,
// | (keep_weights & 2),
// int calc_extra, // 1 - calcualate ports_rgb, max_diff
ports_rgb_shared
,
// float ports_rgb_shared [colors][num_cams], // return to system memory (optionally pass null to skip calculation)
ports_rgb_shared
,
// float ports_rgb_shared [colors][num_cams], // return to system memory (optionally pass null to skip calculation)
max_diff_shared
,
// float max_diff_shared [num_cams], // return to system memory (optionally pass null to skip calculation)
max_diff_shared
,
// float max_diff_shared [num_cams], // return to system memory (optionally pass null to skip calculation)
max_diff_tmp
,
// float max_diff_tmp [num_cams][TEXTURE_THREADS_PER_TILE],
max_diff_tmp
,
// float max_diff_tmp [num_cams][TEXTURE_THREADS_PER_TILE],
...
@@ -3695,7 +3456,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3695,7 +3456,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
min_agree
,
// float min_agree, NOT USED? // minimal number of channels to agree on a point (real number to work with fuzzy averages)
min_agree
,
// float min_agree, NOT USED? // minimal number of channels to agree on a point (real number to work with fuzzy averages)
weights
,
// float * chn_weights, // color channel weights, sum == 1.0
weights
,
// float * chn_weights, // color channel weights, sum == 1.0
dust_remove
,
// int dust_remove, // Do not reduce average weight when only one image differs much from the average
dust_remove
,
// int dust_remove, // Do not reduce average weight when only one image differs much from the average
(
keep_weights
&
1
),
// int keep_weights, // return channel weights and rms after A in RGBA (weight are always calculated)
(
keep_weights
&
1
),
// | (keep_weights & 2),
// int keep_weights, // return channel weights and rms after A in RGBA (weight are always calculated)
debug
);
// int debug );
debug
);
// int debug );
__syncthreads
();
// _syncthreads();1
__syncthreads
();
// _syncthreads();1
...
@@ -3761,9 +3522,9 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3761,9 +3522,9 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
tile_code
&=
TASK_TEXTURE_BITS
;
tile_code
&=
TASK_TEXTURE_BITS
;
if
(
!
tile_code
){
////
if (!tile_code){
return
;
// should not happen
////
return; // should not happen
}
////
}
// if no extra and no overlap -> nothing remains, return
// if no extra and no overlap -> nothing remains, return
if
(
gpu_texture_rbg
&&
(
texture_rbg_stride
!=
0
))
{
// generate RGBA (overlapped) // keep_weights
if
(
gpu_texture_rbg
&&
(
texture_rbg_stride
!=
0
))
{
// generate RGBA (overlapped) // keep_weights
#ifdef DEBUG7A
#ifdef DEBUG7A
...
@@ -3782,27 +3543,76 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3782,27 +3543,76 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
}
__syncthreads
();
// __syncwarp();
__syncthreads
();
// __syncwarp();
#endif // DEBUG12
#endif // DEBUG12
int
alpha_mode
=
alphaIndex
[
tile_code
];
// only 4 lowest bits
int
alpha_mode
=
tile_code
&
0xff
;
// alphaIndex[tile_code]; // only 4 lowest bits
if
(
!
alpha_mode
){
// only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ???
if
(
alpha_mode
!=
0xff
){
// only add if needed, alpha_mode == 0xff (neighbors from all 8 directions) - keep as is. FIXME: alpha_mode ???
for
(
int
pass
=
0
;
pass
<
8
;
pass
++
)
{
// Calculate average value per color, need 32 shared array
int
row
=
pass
*
2
+
(
threadIdx
.
y
>>
1
);
for
(
int
ncol
=
0
;
ncol
<
colors
;
ncol
++
)
{
int
col
=
((
threadIdx
.
y
&
1
)
<<
3
)
+
threadIdx
.
x
;
int
sum_index
=
threadIdx
.
x
+
threadIdx
.
y
*
TEXTURE_THREADS_PER_TILE
;
// 0.. 31
int
i
=
row
*
DTT_SIZE21
+
col
;
texture_averaging
[
sum_index
]
=
0
;
int
gi
=
row
*
DTT_SIZE2
+
col
;
for
(
int
pass
=
0
;
pass
<
8
;
pass
++
)
{
float
*
rgba_i
=
rgbaw
+
i
;
int
row
=
pass
*
2
+
(
threadIdx
.
y
>>
1
);
// always copy 3 (1) colors + alpha
int
col
=
((
threadIdx
.
y
&
1
)
<<
3
)
+
threadIdx
.
x
;
if
(
colors
==
3
){
int
i
=
row
*
DTT_SIZE21
+
col
;
float
*
rgba_i
=
rgbaw
+
i
;
texture_averaging
[
sum_index
]
+=
*
(
rgba_i
+
ncol
*
(
DTT_SIZE2
*
DTT_SIZE21
));
}
__syncthreads
();
if
(
threadIdx
.
y
==
0
){
// combine sums
#pragma unroll
#pragma unroll
for
(
int
ncol
=
0
;
ncol
<
colors
+
1
;
ncol
++
)
{
// 4
for
(
int
i
=
1
;
i
<
4
;
i
++
)
{
// reduce sums to 8
*
(
rgba_i
+
ncol
*
(
DTT_SIZE2
*
DTT_SIZE21
))
*=
alphaFade
[
alpha_mode
][
gi
];
// reduce [tile_code] by LUT
texture_averaging
[
threadIdx
.
x
]
+=
texture_averaging
[
threadIdx
.
x
+
TEXTURE_THREADS_PER_TILE
*
i
];
}
}
}
else
{
// assuming colors = 1
}
__syncthreads
();
if
((
threadIdx
.
y
==
0
)
&&
(
threadIdx
.
x
==
0
)){
// combine sums
#pragma unroll
#pragma unroll
for
(
int
ncol
=
0
;
ncol
<
1
+
1
;
ncol
++
)
{
// 2
for
(
int
i
=
1
;
i
<
TEXTURE_THREADS_PER_TILE
;
i
++
)
{
// reduce sums to 8
*
(
rgba_i
+
ncol
*
(
DTT_SIZE2
*
DTT_SIZE21
))
*=
alphaFade
[
alpha_mode
][
gi
];
// reduce [tile_code] by LUT
texture_averaging
[
0
]
+=
texture_averaging
[
i
];
}
}
texture_averaging
[
0
]
/=
64
;
// average value for uniform field
}
__syncthreads
();
float
avg_val
=
texture_averaging
[
0
];
// now add scale average value for each missing direction
for
(
int
idir
=
0
;
idir
<
8
;
idir
++
)
if
((
alpha_mode
&
(
1
<<
idir
))
==
0
)
{
// no tile in this direction
/* */
int
row
,
col
;
switch
(
idir
>>
1
)
{
case
0
:
row
=
4
+
threadIdx
.
y
;
col
=
4
+
threadIdx
.
x
;
break
;
case
1
:
row
=
4
+
(
threadIdx
.
x
>>
2
)
+
(
threadIdx
.
y
<<
1
);
col
=
8
+
(
threadIdx
.
x
&
3
);
break
;
case
2
:
row
=
8
+
threadIdx
.
y
;
col
=
4
+
threadIdx
.
x
;
break
;
case
3
:
row
=
4
+
(
threadIdx
.
x
>>
2
)
+
(
threadIdx
.
y
<<
1
);
col
=
4
+
(
threadIdx
.
x
&
3
);
break
;
}
int
i
=
row
*
DTT_SIZE21
+
col
;
float
*
rgba_i
=
rgbaw
+
i
;
// always copy 3 (1) colors + alpha
*
(
rgba_i
+
ncol
*
(
DTT_SIZE2
*
DTT_SIZE21
))
+=
textureBlend
[
idir
][(
threadIdx
.
y
<<
3
)
+
threadIdx
.
x
]
*
avg_val
;
/*
for (int pass = 0; pass < 8; pass ++) {
int row1 = pass * 2 + (threadIdx.y >> 1);
int col1 = ((threadIdx.y & 1) << 3) + threadIdx.x;
int i = row1 * DTT_SIZE21 + col1;
int gi = row1 * DTT_SIZE2 + col1;
float * rgba_i = rgbaw + i;
// always copy 3 (1) colors + alpha
*(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21)) += textureBlend[idir][gi] * avg_val;
}
*/
}
}
}
}
// __syncthreads();
}
}
int
slice_stride
=
texture_rbg_stride
*
(
*
(
woi
+
3
)
+
1
)
*
DTT_SIZE
;
// offset to the next color
int
slice_stride
=
texture_rbg_stride
*
(
*
(
woi
+
3
)
+
1
)
*
DTT_SIZE
;
// offset to the next color
int
tileY
=
tile_num
/
tilesx
;
// TILES-X; // slow, but 1 per tile
int
tileY
=
tile_num
/
tilesx
;
// TILES-X; // slow, but 1 per tile
...
@@ -3826,6 +3636,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3826,6 +3636,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
}
__syncthreads
();
// __syncwarp();
__syncthreads
();
// __syncwarp();
#endif // DEBUG12
#endif // DEBUG12
// copy textures to global memory
for
(
int
pass
=
0
;
pass
<
8
;
pass
++
)
{
for
(
int
pass
=
0
;
pass
<
8
;
pass
++
)
{
int
row
=
pass
*
2
+
(
threadIdx
.
y
>>
1
);
// row inside a tile (0..15)
int
row
=
pass
*
2
+
(
threadIdx
.
y
>>
1
);
// row inside a tile (0..15)
int
col
=
((
threadIdx
.
y
&
1
)
<<
3
)
+
threadIdx
.
x
;
// column inside a tile (0..15)
int
col
=
((
threadIdx
.
y
&
1
)
<<
3
)
+
threadIdx
.
x
;
// column inside a tile (0..15)
...
@@ -3843,10 +3654,6 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3843,10 +3654,6 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
}
__syncthreads
();
// __syncwarp();
__syncthreads
();
// __syncwarp();
#endif // DEBUG12
#endif // DEBUG12
/// if (!border_tile ||
/// ((g_row >= 0) && (g_col >= 0) && (g_row < (DTT_SIZE * TILES-Y)) && (g_col < (DTT_SIZE * TILES-X)))){
/// ((g_row >= 0) && (g_col >= 0) && (g_row < height) && (g_col < (DTT_SIZE * TILES-X)))){
// always copy 3 (1) colors + alpha
// always copy 3 (1) colors + alpha
if
(
colors
==
3
){
if
(
colors
==
3
){
#pragma unroll
#pragma unroll
...
@@ -3860,21 +3667,74 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -3860,21 +3667,74 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
}
}
}
}
}
}
// generate and copy per-sensor texture
if
(
keep_weights
&
2
){
// copy individual sensors output
if
(
keep_weights
&
2
){
// copy individual sensors output
for
(
int
ncam
=
0
;
ncam
<
num_cams
;
ncam
++
)
{
for
(
int
ncam
=
0
;
ncam
<
num_cams
;
ncam
++
)
{
float
*
mclt_dst_ncam
=
mclt_debayer
+
(
ncam
*
colors
)
*
(
MCLT_UNION_LEN
);
float
*
mclt_dst_ncam
=
mclt_debayer
+
(
ncam
*
colors
)
*
(
MCLT_UNION_LEN
);
if
(
!
alpha_mode
){
// only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ???
//if (alpha_mode){ // only multiply if needed, alpha_mode == 0 - keep as is. FIXME: alpha_mode ???
for
(
int
pass
=
0
;
pass
<
8
;
pass
++
)
{
if
(
alpha_mode
!=
0xff
){
int
row
=
pass
*
2
+
(
threadIdx
.
y
>>
1
);
for
(
int
ncol
=
0
;
ncol
<
colors
;
ncol
++
)
{
int
col
=
((
threadIdx
.
y
&
1
)
<<
3
)
+
threadIdx
.
x
;
// calculate average value for blending
int
i
=
row
*
DTT_SIZE21
+
col
;
int
sum_index
=
threadIdx
.
x
+
threadIdx
.
y
*
TEXTURE_THREADS_PER_TILE
;
// 0.. 31
int
gi
=
row
*
DTT_SIZE2
+
col
;
texture_averaging
[
sum_index
]
=
0
;
float
*
mclt_dst_i
=
mclt_dst_ncam
+
i
;
for
(
int
pass
=
0
;
pass
<
8
;
pass
++
)
{
for
(
int
ncol
=
0
;
ncol
<
colors
;
ncol
++
)
{
int
row
=
pass
*
2
+
(
threadIdx
.
y
>>
1
);
*
(
mclt_dst_i
+
ncol
*
(
MCLT_UNION_LEN
))
*=
alphaFade
[
alpha_mode
][
gi
];
// reduce [tile_code] by LUT
int
col
=
((
threadIdx
.
y
&
1
)
<<
3
)
+
threadIdx
.
x
;
int
i
=
row
*
DTT_SIZE21
+
col
;
float
*
rgba_i
=
rgbaw
+
i
;
texture_averaging
[
sum_index
]
+=
*
(
rgba_i
+
ncol
*
(
DTT_SIZE2
*
DTT_SIZE21
));
}
__syncthreads
();
if
(
threadIdx
.
y
==
0
){
// combine sums
#pragma unroll
for
(
int
i
=
1
;
i
<
4
;
i
++
)
{
// reduce sums to 8
texture_averaging
[
threadIdx
.
x
]
+=
texture_averaging
[
threadIdx
.
x
+
TEXTURE_THREADS_PER_TILE
*
i
];
}
}
__syncthreads
();
if
((
threadIdx
.
y
==
0
)
&&
(
threadIdx
.
x
==
0
)){
// combine sums
#pragma unroll
for
(
int
i
=
1
;
i
<
TEXTURE_THREADS_PER_TILE
;
i
++
)
{
// reduce sums to 8
texture_averaging
[
0
]
+=
texture_averaging
[
i
];
}
texture_averaging
[
0
]
/=
64
;
// average value for uniform field
}
}
__syncthreads
();
float
avg_val
=
texture_averaging
[
0
];
// Possible to re-use ports_rgb_shared[], if needed (change to (calc_extra | (keep_weights & 2) in tile_combine_rgba()).
// Now using averaging here (less noise if averaging sensor outside).
// float avg_val = ports_rgb_shared[ncol * num_cams + ncam]; // texture_averaging[0];
for
(
int
idir
=
0
;
idir
<
8
;
idir
++
)
if
((
alpha_mode
&
(
1
<<
idir
))
==
0
)
{
// no tile in this direction
/* */
int
row
,
col
;
switch
(
idir
>>
1
)
{
case
0
:
row
=
4
+
threadIdx
.
y
;
col
=
4
+
threadIdx
.
x
;
break
;
case
1
:
row
=
4
+
(
threadIdx
.
x
>>
2
)
+
(
threadIdx
.
y
<<
1
);
col
=
8
+
(
threadIdx
.
x
&
3
);
break
;
case
2
:
row
=
8
+
threadIdx
.
y
;
col
=
4
+
threadIdx
.
x
;
break
;
case
3
:
row
=
4
+
(
threadIdx
.
x
>>
2
)
+
(
threadIdx
.
y
<<
1
);
col
=
4
+
(
threadIdx
.
x
&
3
);
break
;
}
int
i
=
row
*
DTT_SIZE21
+
col
;
float
*
mclt_dst_i
=
mclt_dst_ncam
+
i
;
int
gi
=
(
threadIdx
.
y
<<
3
)
+
threadIdx
.
x
;
*
(
mclt_dst_i
+
ncol
*
(
MCLT_UNION_LEN
))
+=
textureBlend
[
idir
][
gi
]
*
avg_val
;
}
__syncthreads
();
// needed?
}
}
}
}
for
(
int
pass
=
0
;
pass
<
8
;
pass
++
)
{
for
(
int
pass
=
0
;
pass
<
8
;
pass
++
)
{
int
row
=
pass
*
2
+
(
threadIdx
.
y
>>
1
);
// row inside a tile (0..15)
int
row
=
pass
*
2
+
(
threadIdx
.
y
>>
1
);
// row inside a tile (0..15)
int
col
=
((
threadIdx
.
y
&
1
)
<<
3
)
+
threadIdx
.
x
;
// column inside a tile (0..15)
int
col
=
((
threadIdx
.
y
&
1
)
<<
3
)
+
threadIdx
.
x
;
// column inside a tile (0..15)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment