Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tfhello
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Oleg Dzhimiev
tfhello
Commits
963e5a90
Commit
963e5a90
authored
Mar 24, 2020
by
Oleg Dzhimiev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
kind of works, need more testing
parent
0f343bbb
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
351 additions
and
43 deletions
+351
-43
tfhello.java
src/main/java/tfhello.java
+77
-27
Session.java
tf_jni/Session.java
+86
-2
Tensor.java
tf_jni/Tensor.java
+5
-4
session_jni.cc
tf_jni/session_jni.cc
+136
-0
session_jni.h
tf_jni/session_jni.h
+7
-1
tensor_jni.cc
tf_jni/tensor_jni.cc
+36
-6
tensor_jni.h
tf_jni/tensor_jni.h
+4
-3
No files found.
src/main/java/tfhello.java
View file @
963e5a90
...
...
@@ -40,7 +40,8 @@ import jcuda.runtime.JCuda;
import
jcuda.runtime.cudaError
;
import
java.nio.ByteBuffer
;
import
java.nio.ByteOrder
;
import
java.nio.FloatBuffer
;
import
java.lang.reflect.Field
;
import
java.lang.reflect.Modifier
;
...
...
@@ -86,10 +87,15 @@ public class tfhello{
int
cuSize
=
size
*
Sizeof
.
INT
;
int
[]
px_in
=
new
int
[
size
];
float
[]
px_in_float
=
new
float
[
size
];
byte
[]
px_in_byte
=
new
byte
[
size
];
int
[]
px_out
=
new
int
[
size
];
for
(
int
i
=
0
;
i
<
px_in
.
length
;
i
++)
{
px_in
[
i
]
=
i
+
1
;
px_in_float
[
i
]
=
i
+
1
;
//px_in_byte[i] = (i+1).byteValue();
}
JCudaDriver
.
setExceptionsEnabled
(
true
);
...
...
@@ -167,19 +173,31 @@ public class tfhello{
.
build
();
Operation
x
=
g
.
opBuilder
(
"Placeholder"
,
"array_tensor_in"
)
.
setAttr
(
"dtype"
,
DataType
.
INT32
)
//.setAttr("dtype",DataType.INT32)
.
setAttr
(
"dtype"
,
DataType
.
FLOAT
)
//.setAttr("dtype",DataType.UINT8)
//.setAttr("dtype",DataType.INT64)
.
build
();
Operation
z
=
g
.
opBuilder
(
"Identity"
,
"array_tensor_out"
)
.
addInput
(
x
.
output
(
0
))
.
build
();
Tensor
t
=
Tensor
.
create
(
px_in
);
System
.
out
.
println
(
"Is CUDA tensor? "
+
String
.
valueOf
(
t
.
elphel_isCUDATensor
()));
System
.
out
.
println
(
t
.
elphelTestCUDAPointer
());
// unit8
//Tensor t = Tensor.create(px_in_uint8);
// int
//Tensor t = Tensor.create(px_in);
// float
long
handle1
;
//session.makeCallable(handle1);
ByteBuffer
bbuf
=
ByteBuffer
.
allocateDirect
(
px_in_float
.
length
*
Float
.
BYTES
);
//4 bytes per float
bbuf
.
order
(
ByteOrder
.
nativeOrder
());
FloatBuffer
fbuf
=
bbuf
.
asFloatBuffer
();
fbuf
.
put
(
px_in_float
);
fbuf
.
position
(
0
);
Tensor
<
Float
>
t
=
Tensor
.
create
(
new
long
[]{
px_in_float
.
length
},
fbuf
);
//System.out.println("Is CUDA tensor? "+String.valueOf(t.elphel_isCUDATensor()));
//System.out.println(t.elphelTestCUDAPointer());
try
(
Session
s
=
new
Session
(
g
,
config
.
toByteArray
())
...
...
@@ -191,31 +209,27 @@ public class tfhello{
Tensor
output
=
s
.
runner
().
fetch
(
"array_tensor_out"
).
feed
(
"array_tensor_in"
,
t
).
run
().
get
(
0
);
System
.
out
.
println
(
output
.
numBytes
());
System
.
out
.
println
(
"Numbytes: "
+
output
.
numBytes
());
int
[]
obuf
=
new
int
[
output
.
numBytes
()/
Sizeof
.
INT
];
//int[] obuf = new int[output.numBytes()/Sizeof.INT];
float
[]
obuf
=
new
float
[
output
.
numBytes
()/
Sizeof
.
FLOAT
];
output
.
copyTo
(
obuf
);
System
.
out
.
println
(
"Output from the first run: "
);
System
.
out
.
println
(
Arrays
.
toString
(
obuf
));
// natively got GPU device name to insert into options
// it's the same all the time
String
gpuDeviceName
=
s
.
elphelGPUDeviceName
();
// that's for RunCallable() if it ever gets implemented
CallableOptions
callableOpts
=
CallableOptions
.
newBuilder
()
.
addFetch
(
"array_tensor_out:0"
)
.
addFeed
(
"array_tensor_in:0"
)
.
putFeedDevices
(
"array_tensor_in:0"
,
gpuDeviceName
)
.
build
();
System
.
out
.
println
(
callableOpts
);
// GPU allocation:
Tensor
t3
=
Tensor
.
elphelCreateGPUTensor
(
new
long
[]{
256
},
DataType
.
INT32
);
// GPU allocation: dims must be power of 2?
Tensor
t3
=
Tensor
.
elphelCreateGPUTensor
(
new
long
[]{
256
},
DataType
.
FLOAT
);
//System.out.println(t2.nativeRef);
// Let's check what happended
long
t3_gpuptr
=
t3
.
elphel_GetGPUTensorPointer
();
System
.
out
.
println
(
String
.
format
(
"0x%08x"
,
t3_gpuptr
));
// Print address
//System.out.println("Pointer address: "+String.format("0x%08x", t3_gpuptr));
CUdeviceptr
ptr3
=
longToCUdeviceptr
(
t3_gpuptr
);
...
...
@@ -228,13 +242,49 @@ public class tfhello{
System
.
out
.
println
(
"Not a CUDA device"
);
}
System
.
out
.
println
(
"cuda pointer attributes?! "
+
res
);
System
.
out
.
println
(
attrs
.
toString
());
if
(
attrs
.
device
==
0
)
{
Field
f
=
attrs
.
devicePointer
.
getClass
().
getSuperclass
().
getDeclaredField
(
"nativePointer"
);
f
.
setAccessible
(
true
);
long
addr
=
f
.
getLong
(
attrs
.
devicePointer
);
//System.out.println(String.format("0x%08x",addr));
if
(
addr
!=
0
)
{
System
.
err
.
println
(
"\nTensor is allocated in CUDA: "
+
attrs
.
toString
()+
"\n"
);
}
}
// initialize tensor with values
int
dsize
=
256
;
int
cuDsize
=
dsize
*
Sizeof
.
FLOAT
;
float
[]
din
=
new
float
[
dsize
];
float
[]
dout
=
new
float
[
dsize
];
for
(
int
i
=
0
;
i
<
din
.
length
;
i
++)
{
din
[
i
]
=
i
+
1
;
}
cuMemcpyHtoD
(
ptr3
,
Pointer
.
to
(
din
),
cuDsize
);
cuMemcpyDtoH
(
Pointer
.
to
(
dout
),
ptr3
,
cuDsize
);
System
.
out
.
println
(
Arrays
.
toString
(
dout
));
// that's for RunCallable() if it ever gets implemented
CallableOptions
callableOpts
=
CallableOptions
.
newBuilder
()
.
addFetch
(
"array_tensor_out:0"
)
.
addFeed
(
"array_tensor_in:0"
)
.
putFeedDevices
(
"array_tensor_in:0"
,
gpuDeviceName
)
.
build
();
System
.
out
.
println
(
callableOpts
);
// callable handle
long
feed_gpu_fetch_cpu
=
s
.
MakeCallable
(
callableOpts
.
toByteArray
());
Tensor
<?>
t3out
=
s
.
runner
().
fetch
(
"array_tensor_out"
).
feed
(
"array_tensor_in"
,
t3
).
runElphelCallable
(
feed_gpu_fetch_cpu
).
get
(
0
);
System
.
out
.
println
(
t3
);
System
.
out
.
println
(
t3out
);
cuMemcpyHtoD
(
ptr3
,
Pointer
.
to
(
px_in
),
cuSize
)
;
cuMemcpyDtoH
(
Pointer
.
to
(
px_out
),
ptr3
,
cuSize
);
System
.
out
.
println
(
Arrays
.
toString
(
px_out
)
);
// check if it a GPU pointer
float
[]
obuf2
=
new
float
[
t3out
.
numBytes
()/
Sizeof
.
FLOAT
]
;
t3out
.
copyTo
(
obuf2
);
System
.
out
.
println
(
"Output from the second run: "
);
System
.
out
.
println
(
Arrays
.
toString
(
obuf2
));
}
...
...
tf_jni/Session.java
View file @
963e5a90
...
...
@@ -82,11 +82,16 @@ public final class Session implements AutoCloseable {
}
public
String
elphelGPUDeviceName
(){
//return "CHECKPOINT";
return
elphelGetGPUDeviceName
(
this
.
nativeHandle
);
}
public
native
String
elphelGetGPUDeviceName
(
long
handle
);
private
native
String
elphelGetGPUDeviceName
(
long
handle
);
public
long
MakeCallable
(
byte
[]
config
){
return
elphelMakeCallable
(
this
.
nativeHandle
,
config
);
}
private
native
long
elphelMakeCallable
(
long
nativeHandle
,
byte
[]
config
);
/**
* Release resources associated with the Session.
...
...
@@ -125,6 +130,7 @@ public final class Session implements AutoCloseable {
* #feed(String,int,Tensor)}.
*/
public
final
class
Runner
{
/**
* Avoid evaluating {@code operation} and substitute {@code t} for the value it produces.
*
...
...
@@ -136,6 +142,8 @@ public final class Session implements AutoCloseable {
* SavedModelBundle#metaGraphDef()}.
*/
public
Runner
feed
(
String
operation
,
Tensor
<?>
t
)
{
//debug
System
.
out
.
println
(
"Adding feed to operation: "
+
operation
);
return
feed
(
parseOutput
(
operation
),
t
);
}
...
...
@@ -165,6 +173,14 @@ public final class Session implements AutoCloseable {
return
this
;
}
/**
* Feed for RunCallable - just a tensor
*/
public
Runner
feed
(
Tensor
<?>
t
)
{
inputTensors
.
add
(
t
);
return
this
;
}
/**
* Make {@link #run()} return the output of {@code operation}.
*
...
...
@@ -295,6 +311,64 @@ public final class Session implements AutoCloseable {
return
runHelper
(
true
);
}
public
List
<
Tensor
<?>>
runElphelCallable
(
long
handle
)
{
return
runElphelCallableHelper
(
handle
).
outputs
;
}
// whatever
private
Run
runElphelCallableHelper
(
long
handle
)
{
long
[]
inputTensorHandles
=
new
long
[
inputTensors
.
size
()];
long
[]
outputTensorHandles
=
new
long
[
outputs
.
size
()];
System
.
out
.
println
(
"Number of input handles: "
+
inputTensors
.
size
());
System
.
out
.
println
(
"Number of output handles: "
+
outputs
.
size
());
// It's okay to use Operation.getUnsafeNativeHandle() here since the safety depends on the
// validity of the Graph and graphRef ensures that.
int
idx
=
0
;
for
(
Tensor
<?>
t
:
inputTensors
)
{
inputTensorHandles
[
idx
++]
=
t
.
getNativeHandle
();
}
Reference
runRef
=
new
Reference
();
byte
[]
metadata
=
null
;
try
{
System
.
out
.
println
(
"About to run RunCallable\n"
);
metadata
=
Session
.
elphelRunCallable
(
nativeHandle
,
handle
,
inputTensorHandles
,
outputTensorHandles
);
System
.
out
.
println
(
"Ready to process output\n"
);
}
finally
{
runRef
.
close
();
}
System
.
out
.
println
(
"Processing output\n"
);
// test something here
List
<
Tensor
<?>>
outputs
=
new
ArrayList
<
Tensor
<?>>();
for
(
long
h
:
outputTensorHandles
)
{
try
{
outputs
.
add
(
Tensor
.
fromHandle
(
h
));
}
catch
(
Exception
e
)
{
for
(
Tensor
<?>
t
:
outputs
)
{
t
.
close
();
}
outputs
.
clear
();
throw
e
;
}
}
Run
ret
=
new
Run
();
ret
.
outputs
=
outputs
;
ret
.
metadata
=
metadata
;
return
ret
;
}
private
Run
runHelper
(
boolean
wantMetadata
)
{
long
[]
inputTensorHandles
=
new
long
[
inputTensors
.
size
()];
long
[]
inputOpHandles
=
new
long
[
inputs
.
size
()];
...
...
@@ -495,4 +569,14 @@ public final class Session implements AutoCloseable {
long
[]
targetOpHandles
,
boolean
wantRunMetadata
,
long
[]
outputTensorHandles
);
/**
* Run RunCallable Callable
*/
private
static
native
byte
[]
elphelRunCallable
(
long
sessionHandle
,
long
callableHandle
,
long
[]
inputTensorHandles
,
long
[]
outputTensorHandles
);
}
tf_jni/Tensor.java
View file @
963e5a90
...
...
@@ -164,7 +164,7 @@ public final class Tensor<T> implements AutoCloseable {
long
nativeHandle
;
nativeHandle
=
elphelAllocateGPUTensor
(
t
.
shapeCopy
,
t
.
dtype
.
c
());
t
.
nativeRef
=
new
NativeReference
(
nativeHandle
);
System
.
out
.
println
(
t
.
nativeRef
);
//
System.out.println(t.nativeRef);
return
t
;
}
...
...
@@ -544,11 +544,12 @@ public final class Tensor<T> implements AutoCloseable {
return
String
.
format
(
"%s tensor with shape %s"
,
dtype
.
toString
(),
Arrays
.
toString
(
shape
()));
}
/*
public int elphel_isCUDATensor() {
int result = elphelIsCUDATensor(getNativeHandle());
return result;
}
*/
public
long
elphel_GetGPUTensorPointer
(){
return
elphelGetGPUTensorPointer
(
getNativeHandle
());
...
...
@@ -862,9 +863,9 @@ public final class Tensor<T> implements AutoCloseable {
private
static
native
void
readNDArray
(
long
handle
,
Object
value
);
private
static
native
int
elphelIsCUDATensor
(
long
handle
);
//
private static native int elphelIsCUDATensor(long handle);
public
static
native
int
elphelTestCUDAPointer
();
//
public static native int elphelTestCUDAPointer();
static
{
TensorFlow
.
init
();
...
...
tf_jni/session_jni.cc
View file @
963e5a90
...
...
@@ -208,6 +208,102 @@ JNIEXPORT jbyteArray JNICALL Java_org_tensorflow_Session_run(
return
ret
;
}
// Create an empty tensor of type 'dtype'. 'shape' can be arbitrary, but has to
// result in a zero-sized tensor.
static
TF_Tensor
*
EmptyTensor
(
TF_DataType
dtype
,
const
tensorflow
::
TensorShape
&
shape
)
{
static
char
empty
;
tensorflow
::
int64
nelems
=
1
;
std
::
vector
<
tensorflow
::
int64
>
dims
;
for
(
int
i
=
0
;
i
<
shape
.
dims
();
++
i
)
{
dims
.
push_back
(
shape
.
dim_size
(
i
));
nelems
*=
shape
.
dim_size
(
i
);
}
CHECK_EQ
(
nelems
,
0
);
static_assert
(
sizeof
(
int64_t
)
==
sizeof
(
tensorflow
::
int64
),
"64-bit int types should match in size"
);
return
TF_NewTensor
(
dtype
,
reinterpret_cast
<
const
int64_t
*>
(
dims
.
data
()),
shape
.
dims
(),
reinterpret_cast
<
void
*>
(
&
empty
),
0
,
[](
void
*
,
size_t
,
void
*
)
{},
nullptr
);
}
JNIEXPORT
jbyteArray
JNICALL
Java_org_tensorflow_Session_elphelRunCallable
(
JNIEnv
*
env
,
jclass
clazz
,
jlong
session_handle
,
jlong
callable_handle
,
jlongArray
input_tensor_handles
,
jlongArray
output_tensor_handles
)
{
//printf("Running Callable\n");
TF_Session
*
session
=
requireHandle
(
env
,
session_handle
);
using
namespace
tensorflow
;
Session
::
CallableHandle
feed_gpu_fetch_cpu
=
(
Session
::
CallableHandle
)
reinterpret_cast
<
long
>
(
callable_handle
);
const
jint
ninputs
=
env
->
GetArrayLength
(
input_tensor_handles
);
const
jint
noutputs
=
env
->
GetArrayLength
(
output_tensor_handles
);
//printf("ninputs: %d, noutputs: %d\n",ninputs, noutputs);
std
::
unique_ptr
<
TF_Tensor
*
[]
>
output_values
(
new
TF_Tensor
*
[
noutputs
]);
std
::
unique_ptr
<
TF_Tensor
*
[]
>
input_values
(
new
TF_Tensor
*
[
ninputs
]);
// from input tensor handles to inputs?
resolveHandles
(
env
,
"input Tensors"
,
input_tensor_handles
,
input_values
.
get
(),
ninputs
);
std
::
vector
<
Tensor
>
inputs
(
ninputs
);
for
(
int
i
=
0
;
i
<
ninputs
;
++
i
)
{
TF_TensorToTensor
(
input_values
[
i
],
&
inputs
[
i
]);
}
// figure out how to create stuff from handles
std
::
vector
<
Tensor
>
outputs
(
noutputs
);
auto
runStatus
=
session
->
session
->
RunCallable
(
feed_gpu_fetch_cpu
,
{
inputs
},
&
outputs
,
nullptr
);
if
(
!
runStatus
.
ok
()){
printf
(
"It is with a heavy heart I inform you that RunCallable has failed. Here's the error message:
\n
"
);
printf
(
runStatus
.
error_message
().
c_str
());
return
nullptr
;
}
// get the handles t
jlong
*
t
=
env
->
GetLongArrayElements
(
output_tensor_handles
,
nullptr
);
TF_Status
*
status
=
TF_NewStatus
();
for
(
int
i
=
0
;
i
<
noutputs
;
++
i
)
{
//outputs[i] = inputz[i];
const
Tensor
&
src
=
outputs
[
i
];
/*
std::cout << src.DebugString() << std::endl;
// print values:
std::cout << "Output tensor (printing from session_jni.cc):";
auto tmap = src.tensor<float, 1>();
for (int d = 0; d < 256; d++){
std::cout << (int) tmap(d);
if (d!=255) std::cout << ", ";
}
*/
//output_values[i]->tensor = outputs[i];
if
(
!
src
.
IsInitialized
()
||
src
.
NumElements
()
==
0
)
{
output_values
[
i
]
=
EmptyTensor
(
static_cast
<
TF_DataType
>
(
src
.
dtype
()),
src
.
shape
());
continue
;
}
output_values
[
i
]
=
TF_TensorFromTensor
(
src
,
status
);
// for whatever reason status cannot be a nullptr here
//output_values[i] = TF_TensorFromTensor(src, nullptr);
t
[
i
]
=
reinterpret_cast
<
jlong
>
(
output_values
[
i
]);
}
// this copies back the updated array andit can be accessed up there in Java
env
->
ReleaseLongArrayElements
(
output_tensor_handles
,
t
,
0
);
jbyteArray
ret
=
nullptr
;
return
ret
;
}
JNIEXPORT
jstring
JNICALL
Java_org_tensorflow_Session_elphelGetGPUDeviceName
(
JNIEnv
*
env
,
jclass
clazz
,
jlong
handle
)
{
...
...
@@ -228,3 +324,43 @@ JNIEXPORT jstring JNICALL Java_org_tensorflow_Session_elphelGetGPUDeviceName(JNI
return
env
->
NewStringUTF
(
""
);
}
JNIEXPORT
jlong
JNICALL
Java_org_tensorflow_Session_elphelMakeCallable
(
JNIEnv
*
env
,
jclass
clazz
,
jlong
session_handle
,
jbyteArray
config
){
TF_Session
*
session
=
requireHandle
(
env
,
session_handle
);
using
namespace
tensorflow
;
CallableOptions
opts
;
jbyte
*
cconfig
=
nullptr
;
if
(
config
!=
nullptr
)
{
cconfig
=
env
->
GetByteArrayElements
(
config
,
nullptr
);
opts
.
ParseFromArray
(
cconfig
,
static_cast
<
size_t
>
(
env
->
GetArrayLength
(
config
)));
}
Session
::
CallableHandle
feed_gpu_fetch_cpu
;
auto
runStatus
=
session
->
session
->
MakeCallable
(
opts
,
&
feed_gpu_fetch_cpu
);
if
(
!
runStatus
.
ok
()){
printf
(
"It is with a heavy heart I inform you that MakeCallable has failed. Here's the error message:
\n
"
);
printf
(
runStatus
.
error_message
().
c_str
());
return
-
1
;
}
else
{
/*
jlong* t = env->GetLongArrayElements(callable_handle, nullptr);
t[0] = reinterpret_cast<jlong>((long) feed_gpu_fetch_cpu);
env->ReleaseLongArrayElements(callable_handle, t, 0);
*/
return
reinterpret_cast
<
jlong
>
((
long
)
feed_gpu_fetch_cpu
);
}
}
tf_jni/session_jni.h
View file @
963e5a90
...
...
@@ -58,7 +58,13 @@ JNIEXPORT jbyteArray JNICALL Java_org_tensorflow_Session_run(
JNIEXPORT
jstring
JNICALL
Java_org_tensorflow_Session_elphelGetGPUDeviceName
(
JNIEnv
*
env
,
jclass
clazz
,
jlong
handle
);
JNIEnv
*
,
jclass
,
jlong
handle
);
JNIEXPORT
jlong
JNICALL
Java_org_tensorflow_Session_elphelMakeCallable
(
JNIEnv
*
,
jclass
,
jlong
,
jbyteArray
);
JNIEXPORT
jbyteArray
JNICALL
Java_org_tensorflow_Session_elphelRunCallable
(
JNIEnv
*
,
jclass
,
jlong
,
jlong
,
jlongArray
,
jlongArray
);
#ifdef __cplusplus
}
// extern "C"
...
...
tf_jni/tensor_jni.cc
View file @
963e5a90
...
...
@@ -383,8 +383,31 @@ JNIEXPORT jlong JNICALL Java_org_tensorflow_Tensor_elphelAllocateGPUTensor(JNIEn
TF_Tensor
*
t
;
//t->tensor
tensorflow
::
TensorShape
shapex
=
tensorflow
::
TensorShape
({
256
});
using
namespace
tensorflow
;
DataType
dt_dtype
=
static_cast
<
DataType
>
(
dtype
);
// Actually, don't need TF_*
//TF_DataType tf_dtype = static_cast<TF_DataType>(dtype);
//size_t tf_dtype_size = TF_DataTypeSize(tf_dtype);
const
int
num_dims
=
static_cast
<
int
>
(
env
->
GetArrayLength
(
shape
));
//int64_t* dims = new int64_t[num_dims];
std
::
vector
<
tensorflow
::
int64
>
dims
(
num_dims
);
int64_t
num_elements
=
1
;
{
jlong
*
jdims
=
env
->
GetLongArrayElements
(
shape
,
nullptr
);
for
(
int
i
=
0
;
i
<
num_dims
;
++
i
)
{
dims
[
i
]
=
static_cast
<
int64
>
(
jdims
[
i
]);
num_elements
*=
dims
[
i
];
}
// what's this for?
env
->
ReleaseLongArrayElements
(
shape
,
jdims
,
JNI_ABORT
);
}
TensorShape
ts_shape
=
tensorflow
::
TensorShape
(
dims
);
tensorflow
::
PlatformGpuId
platform_gpu_id
(
0
);
tensorflow
::
GPUMemAllocator
*
sub_allocator
=
...
...
@@ -393,15 +416,19 @@ JNIEXPORT jlong JNICALL Java_org_tensorflow_Tensor_elphelAllocateGPUTensor(JNIEn
platform_gpu_id
,
false
,
{},
{});
tensorflow
::
GPUBFCAllocator
*
allocator
=
new
tensorflow
::
GPUBFCAllocator
(
sub_allocator
,
shapex
.
num_elements
()
*
sizeof
(
tensorflow
::
DT_UINT8
),
"GPU_0_bfc"
);
new
tensorflow
::
GPUBFCAllocator
(
sub_allocator
,
num_elements
*
sizeof
(
dt_dtype
),
"GPU_0_bfc"
);
Tensor
t_cuda
=
Tensor
(
allocator
,
tensorflow
::
DT_UINT8
,
shapex
);
Tensor
t_cuda
=
Tensor
(
allocator
,
dt_dtype
,
ts_shape
);
//TODO:
// Maybe check tensor pointer here - CUDA or not CUDA?
//t->tensor = t_cuda;
TF_Status
*
status
=
TF_NewStatus
();
// TODO: Check what exactly this function does...
t
=
TF_TensorFromTensor
(
t_cuda
,
status
);
printf
(
"Allocating
in GPU!"
);
//printf("Allocated
in GPU!");
return
reinterpret_cast
<
jlong
>
(
t
);
}
...
...
@@ -677,6 +704,7 @@ JNIEXPORT void JNICALL Java_org_tensorflow_Tensor_readNDArray(JNIEnv* env,
static_cast
<
jarray
>
(
value
));
}
/*
JNIEXPORT int JNICALL Java_org_tensorflow_Tensor_elphelIsCUDATensor(JNIEnv* env,
jclass clazz,
jlong handle) {
...
...
@@ -712,11 +740,13 @@ JNIEXPORT int JNICALL Java_org_tensorflow_Tensor_elphelIsCUDATensor(JNIEnv* env,
#endif
}
*/
/*
JNIEXPORT int JNICALL Java_org_tensorflow_Tensor_elphelTestCUDAPointer(JNIEnv* env,
jclass clazz){
return 0x3;
}
*/
...
...
tf_jni/tensor_jni.h
View file @
963e5a90
...
...
@@ -156,14 +156,15 @@ JNIEXPORT jbyteArray JNICALL Java_org_tensorflow_Tensor_scalarBytes(JNIEnv *,
*/
JNIEXPORT
void
JNICALL
Java_org_tensorflow_Tensor_readNDArray
(
JNIEnv
*
,
jclass
,
jlong
,
jobject
);
/*
JNIEXPORT int JNICALL Java_org_tensorflow_Tensor_elphelIsCUDATensor(JNIEnv *,
jclass,
jlong);
*/
/*
JNIEXPORT int JNICALL Java_org_tensorflow_Tensor_elphelTestCUDAPointer(JNIEnv *,
jclass);
*/
#ifdef __cplusplus
}
// extern "C"
#endif // __cplusplus
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment