Commit 4e45d987 authored by Oleg Dzhimiev's avatar Oleg Dzhimiev

cleanup

parent e35a9afe
......@@ -70,7 +70,7 @@ int myCreateCUDAArray(uint8_t *tf_ptr){
// check if the GPU did the same as the CPU
bool workedCorrectly = true;
printf("CUDA kernel incrementing test:\n");
printf("CUDA kernel simple incrementing test:\n");
for(int i=0;i<numberOfNumbers;i++)
{
if (numbers1[i] != numbers2[i])
......
......@@ -32,27 +32,19 @@ using tensorflow::Status;
using tensorflow::Tensor;
Status loadGraph(unique_ptr<tensorflow::Session> *session){
Status createGraphAndSession(unique_ptr<tensorflow::Session> *session){
tensorflow::GraphDef graph_def;
using namespace tensorflow;
using namespace tensorflow::ops;
auto scope = Scope::NewRootScope();
// TF likes power of 2
tensorflow::TensorShape shape = tensorflow::TensorShape({256});
auto a = Placeholder(scope.WithOpName("array_tensor_in"), DT_UINT8, Placeholder::Shape(shape));
//auto c0 = Const(scope,{256});
//auto c1 = Const(scope, (uint8_t)1, {});
//auto c2 = Fill(scope, c0, c1);
auto b = Identity(scope.WithOpName("array_tensor_out"), a);
//auto b = Identity(scope.WithOpName("array_tensor_out"), c2);
//auto b = Add(scope.WithOpName("array_tensor_out"),c2,a);
auto a = ops::Placeholder(scope.WithOpName("array_tensor_in"), DT_UINT8, ops::Placeholder::Shape(shape));
auto b = ops::Identity(scope.WithOpName("array_tensor_out"), a);
TF_CHECK_OK(scope.ToGraphDef(&graph_def));
......@@ -65,7 +57,7 @@ Status loadGraph(unique_ptr<tensorflow::Session> *session){
session->reset(tensorflow::NewSession(session_options));
Status session_create_status = (*session)->Create(graph_def);
if (!session_create_status.ok()){
LOG(ERROR) << "loadGraph(): ERROR" << session_create_status;
LOG(ERROR) << "createGraphAndSession(): ERROR" << session_create_status;
}
return Status::OK();
}
......@@ -109,7 +101,7 @@ int main(int, char**) {
LOG(INFO) << "Tensorflow built with CUDA, keep running" << endl;
}
// check and init CUDA drivers and libs
// check and init CUDA drivers and libs?
void *hHandleDriver = nullptr;
CUresult cuda_res = cuInit(0, __CUDA_API_VERSION, hHandleDriver);
if (cuda_res != CUDA_SUCCESS)
......@@ -123,14 +115,17 @@ int main(int, char**) {
}
LOG(INFO) << "\033[1;32m" << "CUDA init: ok" << "\033[0m";
// construct graph and create TF session
std::unique_ptr<tensorflow::Session> session;
createGraphAndSession(&session);
loadGraph(&session);
// do the opts and allocate tensor in GPU
// NOTE: must match with graph names
const string inputLayer = "array_tensor_in:0";
const string outputLayer = "array_tensor_out:0";
// do the opts
CallableOptions opts;
Session::CallableHandle feed_gpu_fetch_cpu;
......@@ -150,7 +145,6 @@ int main(int, char**) {
// TF likes power of 2 and 256s
tensorflow::TensorShape shape = tensorflow::TensorShape({256});
// allocate tensor on the GPU
tensorflow::PlatformGpuId platform_gpu_id(0);
tensorflow::GPUMemAllocator *sub_allocator =
......@@ -165,32 +159,42 @@ int main(int, char**) {
LOG(INFO) << "\033[1;37m" << "Is CUDA Tensor? " << (IsCUDATensor(inputTensor)?"\033[1;32myes":"\033[1;31mno") << "\033[0m";
// pointer to tensor data
tensorflow::uint8 *p = inputTensor.flat<tensorflow::uint8>().data();
// CUDA kernel call
myCreateCUDAArray(p);
// run and check
vector<Tensor> outputs;
LOG(INFO) << "RunCallable()...";
runStatus = session->RunCallable(feed_gpu_fetch_cpu, {inputTensor}, &outputs, nullptr);
if (!runStatus.ok())
{
LOG(ERROR) << "Running model failed: " << runStatus;
return -1;
}
LOG(INFO) << "RunCallable() output:";
LOG(INFO) << outputs[0].DebugString();
auto tmap = outputs[0].tensor<uint8_t, 1>();
cout << "\033[1;37m";
for (int d = 0; d < 256; d++) {
cout << (int) tmap(d);
if (d!=255) cout << ", ";
for(int i=0;i<2;i++){
cout << endl;
if (i==0){
LOG(INFO) << "\033[1;32m" << "RunCallable()... No feeding (zeroes)" << "\033[0m";
}
if (i==1){
LOG(INFO) << "\033[1;32m" << "RunCallable()... Feeding from CUDA kernel"<< "\033[0m";
// CUDA kernel call
// NOTE: do not allocate memory for p inside the kernel
myCreateCUDAArray(p);
}
runStatus = session->RunCallable(feed_gpu_fetch_cpu, {inputTensor}, &outputs, nullptr);
if (!runStatus.ok()){
LOG(ERROR) << "Running model failed: " << runStatus;
return -1;
}
LOG(INFO) << "RunCallable() output:";
LOG(INFO) << outputs[0].DebugString();
auto tmap = outputs[0].tensor<uint8_t, 1>();
cout << "\033[1;37m";
for (int d = 0; d < 256; d++){
cout << (int) tmap(d);
if (d!=255) cout << ", ";
}
cout << "\033[0m" << endl;
}
cout << "\033[0m" << endl;
session->ReleaseCallable(feed_gpu_fetch_cpu);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment