Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tensorflow-feed-from-gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
tensorflow-feed-from-gpu
Commits
4e45d987
Commit
4e45d987
authored
Feb 20, 2020
by
Oleg Dzhimiev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
cleanup
parent
e35a9afe
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
42 additions
and
38 deletions
+42
-38
array.cu
array.cu
+1
-1
main.cpp
main.cpp
+41
-37
No files found.
array.cu
View file @
4e45d987
...
@@ -70,7 +70,7 @@ int myCreateCUDAArray(uint8_t *tf_ptr){
...
@@ -70,7 +70,7 @@ int myCreateCUDAArray(uint8_t *tf_ptr){
// check if the GPU did the same as the CPU
// check if the GPU did the same as the CPU
bool workedCorrectly = true;
bool workedCorrectly = true;
printf("CUDA kernel incrementing test:\n");
printf("CUDA kernel
simple
incrementing test:\n");
for(int i=0;i<numberOfNumbers;i++)
for(int i=0;i<numberOfNumbers;i++)
{
{
if (numbers1[i] != numbers2[i])
if (numbers1[i] != numbers2[i])
...
...
main.cpp
View file @
4e45d987
...
@@ -32,27 +32,19 @@ using tensorflow::Status;
...
@@ -32,27 +32,19 @@ using tensorflow::Status;
using
tensorflow
::
Tensor
;
using
tensorflow
::
Tensor
;
Status
loadGraph
(
unique_ptr
<
tensorflow
::
Session
>
*
session
){
Status
createGraphAndSession
(
unique_ptr
<
tensorflow
::
Session
>
*
session
){
tensorflow
::
GraphDef
graph_def
;
tensorflow
::
GraphDef
graph_def
;
using
namespace
tensorflow
;
using
namespace
tensorflow
;
using
namespace
tensorflow
::
ops
;
auto
scope
=
Scope
::
NewRootScope
();
auto
scope
=
Scope
::
NewRootScope
();
// TF likes power of 2
// TF likes power of 2
tensorflow
::
TensorShape
shape
=
tensorflow
::
TensorShape
({
256
});
tensorflow
::
TensorShape
shape
=
tensorflow
::
TensorShape
({
256
});
auto
a
=
Placeholder
(
scope
.
WithOpName
(
"array_tensor_in"
),
DT_UINT8
,
Placeholder
::
Shape
(
shape
));
auto
a
=
ops
::
Placeholder
(
scope
.
WithOpName
(
"array_tensor_in"
),
DT_UINT8
,
ops
::
Placeholder
::
Shape
(
shape
));
auto
b
=
ops
::
Identity
(
scope
.
WithOpName
(
"array_tensor_out"
),
a
);
//auto c0 = Const(scope,{256});
//auto c1 = Const(scope, (uint8_t)1, {});
//auto c2 = Fill(scope, c0, c1);
auto
b
=
Identity
(
scope
.
WithOpName
(
"array_tensor_out"
),
a
);
//auto b = Identity(scope.WithOpName("array_tensor_out"), c2);
//auto b = Add(scope.WithOpName("array_tensor_out"),c2,a);
TF_CHECK_OK
(
scope
.
ToGraphDef
(
&
graph_def
));
TF_CHECK_OK
(
scope
.
ToGraphDef
(
&
graph_def
));
...
@@ -65,7 +57,7 @@ Status loadGraph(unique_ptr<tensorflow::Session> *session){
...
@@ -65,7 +57,7 @@ Status loadGraph(unique_ptr<tensorflow::Session> *session){
session
->
reset
(
tensorflow
::
NewSession
(
session_options
));
session
->
reset
(
tensorflow
::
NewSession
(
session_options
));
Status
session_create_status
=
(
*
session
)
->
Create
(
graph_def
);
Status
session_create_status
=
(
*
session
)
->
Create
(
graph_def
);
if
(
!
session_create_status
.
ok
()){
if
(
!
session_create_status
.
ok
()){
LOG
(
ERROR
)
<<
"
loadGraph
(): ERROR"
<<
session_create_status
;
LOG
(
ERROR
)
<<
"
createGraphAndSession
(): ERROR"
<<
session_create_status
;
}
}
return
Status
::
OK
();
return
Status
::
OK
();
}
}
...
@@ -109,7 +101,7 @@ int main(int, char**) {
...
@@ -109,7 +101,7 @@ int main(int, char**) {
LOG
(
INFO
)
<<
"Tensorflow built with CUDA, keep running"
<<
endl
;
LOG
(
INFO
)
<<
"Tensorflow built with CUDA, keep running"
<<
endl
;
}
}
// check and init CUDA drivers and libs
// check and init CUDA drivers and libs
?
void
*
hHandleDriver
=
nullptr
;
void
*
hHandleDriver
=
nullptr
;
CUresult
cuda_res
=
cuInit
(
0
,
__CUDA_API_VERSION
,
hHandleDriver
);
CUresult
cuda_res
=
cuInit
(
0
,
__CUDA_API_VERSION
,
hHandleDriver
);
if
(
cuda_res
!=
CUDA_SUCCESS
)
if
(
cuda_res
!=
CUDA_SUCCESS
)
...
@@ -123,14 +115,17 @@ int main(int, char**) {
...
@@ -123,14 +115,17 @@ int main(int, char**) {
}
}
LOG
(
INFO
)
<<
"
\033
[1;32m"
<<
"CUDA init: ok"
<<
"
\033
[0m"
;
LOG
(
INFO
)
<<
"
\033
[1;32m"
<<
"CUDA init: ok"
<<
"
\033
[0m"
;
// construct graph and create TF session
std
::
unique_ptr
<
tensorflow
::
Session
>
session
;
std
::
unique_ptr
<
tensorflow
::
Session
>
session
;
createGraphAndSession
(
&
session
);
loadGraph
(
&
session
);
// do the opts and allocate tensor in GPU
// NOTE: must match with graph names
const
string
inputLayer
=
"array_tensor_in:0"
;
const
string
inputLayer
=
"array_tensor_in:0"
;
const
string
outputLayer
=
"array_tensor_out:0"
;
const
string
outputLayer
=
"array_tensor_out:0"
;
// do the opts
CallableOptions
opts
;
CallableOptions
opts
;
Session
::
CallableHandle
feed_gpu_fetch_cpu
;
Session
::
CallableHandle
feed_gpu_fetch_cpu
;
...
@@ -150,7 +145,6 @@ int main(int, char**) {
...
@@ -150,7 +145,6 @@ int main(int, char**) {
// TF likes power of 2 and 256s
// TF likes power of 2 and 256s
tensorflow
::
TensorShape
shape
=
tensorflow
::
TensorShape
({
256
});
tensorflow
::
TensorShape
shape
=
tensorflow
::
TensorShape
({
256
});
// allocate tensor on the GPU
tensorflow
::
PlatformGpuId
platform_gpu_id
(
0
);
tensorflow
::
PlatformGpuId
platform_gpu_id
(
0
);
tensorflow
::
GPUMemAllocator
*
sub_allocator
=
tensorflow
::
GPUMemAllocator
*
sub_allocator
=
...
@@ -165,32 +159,42 @@ int main(int, char**) {
...
@@ -165,32 +159,42 @@ int main(int, char**) {
LOG
(
INFO
)
<<
"
\033
[1;37m"
<<
"Is CUDA Tensor? "
<<
(
IsCUDATensor
(
inputTensor
)
?
"
\033
[1;32myes"
:
"
\033
[1;31mno"
)
<<
"
\033
[0m"
;
LOG
(
INFO
)
<<
"
\033
[1;37m"
<<
"Is CUDA Tensor? "
<<
(
IsCUDATensor
(
inputTensor
)
?
"
\033
[1;32myes"
:
"
\033
[1;31mno"
)
<<
"
\033
[0m"
;
// pointer to tensor data
tensorflow
::
uint8
*
p
=
inputTensor
.
flat
<
tensorflow
::
uint8
>
().
data
();
tensorflow
::
uint8
*
p
=
inputTensor
.
flat
<
tensorflow
::
uint8
>
().
data
();
// CUDA kernel call
// run and check
myCreateCUDAArray
(
p
);
vector
<
Tensor
>
outputs
;
vector
<
Tensor
>
outputs
;
LOG
(
INFO
)
<<
"RunCallable()..."
;
runStatus
=
session
->
RunCallable
(
feed_gpu_fetch_cpu
,
{
inputTensor
},
&
outputs
,
nullptr
);
if
(
!
runStatus
.
ok
())
{
LOG
(
ERROR
)
<<
"Running model failed: "
<<
runStatus
;
return
-
1
;
}
LOG
(
INFO
)
<<
"RunCallable() output:"
;
LOG
(
INFO
)
<<
outputs
[
0
].
DebugString
();
auto
tmap
=
outputs
[
0
].
tensor
<
uint8_t
,
1
>
();
cout
<<
"
\033
[1;37m"
;
for
(
int
i
=
0
;
i
<
2
;
i
++
){
for
(
int
d
=
0
;
d
<
256
;
d
++
)
{
cout
<<
(
int
)
tmap
(
d
);
cout
<<
endl
;
if
(
d
!=
255
)
cout
<<
", "
;
if
(
i
==
0
){
LOG
(
INFO
)
<<
"
\033
[1;32m"
<<
"RunCallable()... No feeding (zeroes)"
<<
"
\033
[0m"
;
}
if
(
i
==
1
){
LOG
(
INFO
)
<<
"
\033
[1;32m"
<<
"RunCallable()... Feeding from CUDA kernel"
<<
"
\033
[0m"
;
// CUDA kernel call
// NOTE: do not allocate memory for p inside the kernel
myCreateCUDAArray
(
p
);
}
runStatus
=
session
->
RunCallable
(
feed_gpu_fetch_cpu
,
{
inputTensor
},
&
outputs
,
nullptr
);
if
(
!
runStatus
.
ok
()){
LOG
(
ERROR
)
<<
"Running model failed: "
<<
runStatus
;
return
-
1
;
}
LOG
(
INFO
)
<<
"RunCallable() output:"
;
LOG
(
INFO
)
<<
outputs
[
0
].
DebugString
();
auto
tmap
=
outputs
[
0
].
tensor
<
uint8_t
,
1
>
();
cout
<<
"
\033
[1;37m"
;
for
(
int
d
=
0
;
d
<
256
;
d
++
){
cout
<<
(
int
)
tmap
(
d
);
if
(
d
!=
255
)
cout
<<
", "
;
}
cout
<<
"
\033
[0m"
<<
endl
;
}
}
cout
<<
"
\033
[0m"
<<
endl
;
session
->
ReleaseCallable
(
feed_gpu_fetch_cpu
);
session
->
ReleaseCallable
(
feed_gpu_fetch_cpu
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment