Commit 4e45d987 authored by Oleg Dzhimiev's avatar Oleg Dzhimiev

cleanup

parent e35a9afe
...@@ -70,7 +70,7 @@ int myCreateCUDAArray(uint8_t *tf_ptr){ ...@@ -70,7 +70,7 @@ int myCreateCUDAArray(uint8_t *tf_ptr){
// check if the GPU did the same as the CPU // check if the GPU did the same as the CPU
bool workedCorrectly = true; bool workedCorrectly = true;
printf("CUDA kernel incrementing test:\n"); printf("CUDA kernel simple incrementing test:\n");
for(int i=0;i<numberOfNumbers;i++) for(int i=0;i<numberOfNumbers;i++)
{ {
if (numbers1[i] != numbers2[i]) if (numbers1[i] != numbers2[i])
......
...@@ -32,27 +32,19 @@ using tensorflow::Status; ...@@ -32,27 +32,19 @@ using tensorflow::Status;
using tensorflow::Tensor; using tensorflow::Tensor;
Status loadGraph(unique_ptr<tensorflow::Session> *session){ Status createGraphAndSession(unique_ptr<tensorflow::Session> *session){
tensorflow::GraphDef graph_def; tensorflow::GraphDef graph_def;
using namespace tensorflow; using namespace tensorflow;
using namespace tensorflow::ops;
auto scope = Scope::NewRootScope(); auto scope = Scope::NewRootScope();
// TF likes power of 2 // TF likes power of 2
tensorflow::TensorShape shape = tensorflow::TensorShape({256}); tensorflow::TensorShape shape = tensorflow::TensorShape({256});
auto a = Placeholder(scope.WithOpName("array_tensor_in"), DT_UINT8, Placeholder::Shape(shape)); auto a = ops::Placeholder(scope.WithOpName("array_tensor_in"), DT_UINT8, ops::Placeholder::Shape(shape));
auto b = ops::Identity(scope.WithOpName("array_tensor_out"), a);
//auto c0 = Const(scope,{256});
//auto c1 = Const(scope, (uint8_t)1, {});
//auto c2 = Fill(scope, c0, c1);
auto b = Identity(scope.WithOpName("array_tensor_out"), a);
//auto b = Identity(scope.WithOpName("array_tensor_out"), c2);
//auto b = Add(scope.WithOpName("array_tensor_out"),c2,a);
TF_CHECK_OK(scope.ToGraphDef(&graph_def)); TF_CHECK_OK(scope.ToGraphDef(&graph_def));
...@@ -65,7 +57,7 @@ Status loadGraph(unique_ptr<tensorflow::Session> *session){ ...@@ -65,7 +57,7 @@ Status loadGraph(unique_ptr<tensorflow::Session> *session){
session->reset(tensorflow::NewSession(session_options)); session->reset(tensorflow::NewSession(session_options));
Status session_create_status = (*session)->Create(graph_def); Status session_create_status = (*session)->Create(graph_def);
if (!session_create_status.ok()){ if (!session_create_status.ok()){
LOG(ERROR) << "loadGraph(): ERROR" << session_create_status; LOG(ERROR) << "createGraphAndSession(): ERROR" << session_create_status;
} }
return Status::OK(); return Status::OK();
} }
...@@ -109,7 +101,7 @@ int main(int, char**) { ...@@ -109,7 +101,7 @@ int main(int, char**) {
LOG(INFO) << "Tensorflow built with CUDA, keep running" << endl; LOG(INFO) << "Tensorflow built with CUDA, keep running" << endl;
} }
// check and init CUDA drivers and libs // check and init CUDA drivers and libs?
void *hHandleDriver = nullptr; void *hHandleDriver = nullptr;
CUresult cuda_res = cuInit(0, __CUDA_API_VERSION, hHandleDriver); CUresult cuda_res = cuInit(0, __CUDA_API_VERSION, hHandleDriver);
if (cuda_res != CUDA_SUCCESS) if (cuda_res != CUDA_SUCCESS)
...@@ -123,14 +115,17 @@ int main(int, char**) { ...@@ -123,14 +115,17 @@ int main(int, char**) {
} }
LOG(INFO) << "\033[1;32m" << "CUDA init: ok" << "\033[0m"; LOG(INFO) << "\033[1;32m" << "CUDA init: ok" << "\033[0m";
// construct graph and create TF session
std::unique_ptr<tensorflow::Session> session; std::unique_ptr<tensorflow::Session> session;
createGraphAndSession(&session);
loadGraph(&session);
// do the opts and allocate tensor in GPU
// NOTE: must match with graph names
const string inputLayer = "array_tensor_in:0"; const string inputLayer = "array_tensor_in:0";
const string outputLayer = "array_tensor_out:0"; const string outputLayer = "array_tensor_out:0";
// do the opts
CallableOptions opts; CallableOptions opts;
Session::CallableHandle feed_gpu_fetch_cpu; Session::CallableHandle feed_gpu_fetch_cpu;
...@@ -150,7 +145,6 @@ int main(int, char**) { ...@@ -150,7 +145,6 @@ int main(int, char**) {
// TF likes power of 2 and 256s // TF likes power of 2 and 256s
tensorflow::TensorShape shape = tensorflow::TensorShape({256}); tensorflow::TensorShape shape = tensorflow::TensorShape({256});
// allocate tensor on the GPU
tensorflow::PlatformGpuId platform_gpu_id(0); tensorflow::PlatformGpuId platform_gpu_id(0);
tensorflow::GPUMemAllocator *sub_allocator = tensorflow::GPUMemAllocator *sub_allocator =
...@@ -165,32 +159,42 @@ int main(int, char**) { ...@@ -165,32 +159,42 @@ int main(int, char**) {
LOG(INFO) << "\033[1;37m" << "Is CUDA Tensor? " << (IsCUDATensor(inputTensor)?"\033[1;32myes":"\033[1;31mno") << "\033[0m"; LOG(INFO) << "\033[1;37m" << "Is CUDA Tensor? " << (IsCUDATensor(inputTensor)?"\033[1;32myes":"\033[1;31mno") << "\033[0m";
// pointer to tensor data
tensorflow::uint8 *p = inputTensor.flat<tensorflow::uint8>().data(); tensorflow::uint8 *p = inputTensor.flat<tensorflow::uint8>().data();
// CUDA kernel call // run and check
myCreateCUDAArray(p);
vector<Tensor> outputs; vector<Tensor> outputs;
LOG(INFO) << "RunCallable()...";
runStatus = session->RunCallable(feed_gpu_fetch_cpu, {inputTensor}, &outputs, nullptr);
if (!runStatus.ok())
{
LOG(ERROR) << "Running model failed: " << runStatus;
return -1;
}
LOG(INFO) << "RunCallable() output:";
LOG(INFO) << outputs[0].DebugString();
auto tmap = outputs[0].tensor<uint8_t, 1>();
cout << "\033[1;37m"; for(int i=0;i<2;i++){
for (int d = 0; d < 256; d++) {
cout << (int) tmap(d); cout << endl;
if (d!=255) cout << ", ";
if (i==0){
LOG(INFO) << "\033[1;32m" << "RunCallable()... No feeding (zeroes)" << "\033[0m";
}
if (i==1){
LOG(INFO) << "\033[1;32m" << "RunCallable()... Feeding from CUDA kernel"<< "\033[0m";
// CUDA kernel call
// NOTE: do not allocate memory for p inside the kernel
myCreateCUDAArray(p);
}
runStatus = session->RunCallable(feed_gpu_fetch_cpu, {inputTensor}, &outputs, nullptr);
if (!runStatus.ok()){
LOG(ERROR) << "Running model failed: " << runStatus;
return -1;
}
LOG(INFO) << "RunCallable() output:";
LOG(INFO) << outputs[0].DebugString();
auto tmap = outputs[0].tensor<uint8_t, 1>();
cout << "\033[1;37m";
for (int d = 0; d < 256; d++){
cout << (int) tmap(d);
if (d!=255) cout << ", ";
}
cout << "\033[0m" << endl;
} }
cout << "\033[0m" << endl;
session->ReleaseCallable(feed_gpu_fetch_cpu); session->ReleaseCallable(feed_gpu_fetch_cpu);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment