Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tfhello
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Oleg Dzhimiev
tfhello
Commits
f76d82f2
Commit
f76d82f2
authored
Mar 10, 2020
by
Oleg Dzhimiev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
initial
parents
Pipeline
#1545
canceled with stages
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
247 additions
and
0 deletions
+247
-0
pom.xml
pom.xml
+31
-0
tfhello.java
src/main/java/tfhello.java
+216
-0
No files found.
pom.xml
0 → 100644
View file @
f76d82f2
<project
xmlns=
"http://maven.apache.org/POM/4.0.0"
xmlns:xsi=
"http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation=
"http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"
>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.elphel
</groupId>
<artifactId>
tfhello
</artifactId>
<version>
0.0.1-SNAPSHOT
</version>
<name>
tfhello
</name>
<dependencies>
<dependency>
<groupId>
org.tensorflow
</groupId>
<artifactId>
libtensorflow
</artifactId>
<version>
1.15.0
</version>
</dependency>
<dependency>
<groupId>
org.tensorflow
</groupId>
<artifactId>
libtensorflow_jni_gpu
</artifactId>
<version>
1.15.0
</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.tensorflow/proto -->
<dependency>
<groupId>
org.tensorflow
</groupId>
<artifactId>
proto
</artifactId>
<version>
1.15.0
</version>
</dependency>
<dependency>
<groupId>
org.jcuda
</groupId>
<artifactId>
jcuda
</artifactId>
<version>
10.0.0
</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
src/main/java/tfhello.java
0 → 100644
View file @
f76d82f2
import
org.tensorflow.Graph
;
import
org.tensorflow.Session
;
import
org.tensorflow.Tensor
;
import
org.tensorflow.TensorFlow
;
import
org.tensorflow.Shape
;
import
org.tensorflow.DataType
;
import
org.tensorflow.Operation
;
import
org.tensorflow.framework.ConfigProto
;
import
org.tensorflow.framework.GPUOptions
;
import
static
jcuda
.
driver
.
JCudaDriver
.
cuCtxCreate
;
import
static
jcuda
.
driver
.
JCudaDriver
.
cuCtxSynchronize
;
import
static
jcuda
.
driver
.
JCudaDriver
.
cuDeviceGet
;
import
static
jcuda
.
driver
.
JCudaDriver
.
cuInit
;
import
static
jcuda
.
driver
.
JCudaDriver
.
cuLaunchKernel
;
import
static
jcuda
.
driver
.
JCudaDriver
.
cuMemAlloc
;
import
static
jcuda
.
driver
.
JCudaDriver
.
cuMemFree
;
import
static
jcuda
.
driver
.
JCudaDriver
.
cuMemcpyDtoH
;
import
static
jcuda
.
driver
.
JCudaDriver
.
cuMemcpyHtoD
;
import
static
jcuda
.
driver
.
JCudaDriver
.
cuMemcpyDtoD
;
import
static
jcuda
.
driver
.
JCudaDriver
.
cuModuleGetFunction
;
import
static
jcuda
.
driver
.
JCudaDriver
.
cuModuleLoadData
;
import
java.util.Arrays
;
import
jcuda.NativePointerObject
;
import
jcuda.Pointer
;
import
jcuda.Sizeof
;
import
jcuda.driver.CUdeviceptr
;
import
jcuda.driver.CUcontext
;
import
jcuda.driver.CUdevice
;
import
jcuda.driver.JCudaDriver
;
import
jcuda.nvrtc.JNvrtc
;
import
java.nio.ByteBuffer
;
import
java.lang.reflect.Field
;
import
java.lang.reflect.Modifier
;
public
class
tfhello
{
// from here: https://stackoverflow.com/questions/31993759/how-can-i-create-a-struct-of-native-pointers-in-jcuda
// it works
private
static
long
getPointerAddress
(
CUdeviceptr
p
)
{
// WORKAROUND until a method like CUdeviceptr#getAddress exists
class
PointerWithAddress
extends
Pointer
{
PointerWithAddress
(
Pointer
other
)
{
super
(
other
);
}
long
getAddress
()
{
return
getNativePointer
()
+
getByteOffset
();
}
}
return
new
PointerWithAddress
(
p
).
getAddress
();
}
// use reflection to set CUdeviceptr address:
// CUdeviceptr <- Pointer <- NativePointerObject (private nativePointer)
public
static
CUdeviceptr
longToCUdeviceptr
(
long
addr
){
CUdeviceptr
ptr
=
new
CUdeviceptr
();
try
{
Field
f
=
ptr
.
getClass
().
getSuperclass
().
getSuperclass
().
getDeclaredField
(
"nativePointer"
);
f
.
setAccessible
(
true
);
f
.
setLong
(
ptr
,
addr
);
}
catch
(
Exception
NoSuchField
){
System
.
out
.
println
(
"ERROR: There is no private field 'nativePointer' in CUdeviceptr class."
);
}
return
ptr
;
}
public
static
void
main
(
String
[]
args
)
throws
Exception
{
// CUDA test start
int
size
=
32
;
int
cuSize
=
size
*
Sizeof
.
INT
;
int
[]
px_in
=
new
int
[
size
];
int
[]
px_out
=
new
int
[
size
];
for
(
int
i
=
0
;
i
<
px_in
.
length
;
i
++)
{
px_in
[
i
]
=
i
+
1
;
}
JCudaDriver
.
setExceptionsEnabled
(
true
);
JNvrtc
.
setExceptionsEnabled
(
true
);
// init CUDA
cuInit
(
0
);
CUdevice
device
=
new
CUdevice
();
cuDeviceGet
(
device
,
0
);
CUcontext
context
=
new
CUcontext
();
cuCtxCreate
(
context
,
0
,
device
);
// init CUDA done
System
.
out
.
println
(
"Test 1 start"
);
System
.
out
.
println
(
" - create CUdeviceptr ptr1, allocate device memory"
);
System
.
out
.
println
(
" - get ptr1 address - 4 bytes as long"
);
System
.
out
.
println
(
" - create CUdeviceptr ptr2, set address to ptr1 address"
);
System
.
out
.
println
(
" - copy host array to device ptr1"
);
System
.
out
.
println
(
" - copy device data from ptr2 back to host"
);
CUdeviceptr
ptr1
=
new
CUdeviceptr
();
cuMemAlloc
(
ptr1
,
cuSize
);
System
.
out
.
println
(
"CUdeviceptr ptr1 after cuMemAlloc: "
+
ptr1
);
// we will get pointer address as 'long' from JNI
long
ptr1_addr
=
getPointerAddress
(
ptr1
);
System
.
out
.
println
(
"Extracted ptr1 address as (long): "
+
String
.
format
(
"0x%08x"
,
ptr1_addr
));
// notice, there's no cuMemAlloc
CUdeviceptr
ptr2
=
longToCUdeviceptr
(
ptr1_addr
);
System
.
out
.
println
(
"CUdeviceptr ptr2 created from ptr1's long address using java.reflection: "
+
ptr2
);
// test: copy a test array px_in to CUDA
cuMemcpyHtoD
(
ptr1
,
Pointer
.
to
(
px_in
),
cuSize
);
// test: copy back only a half of it, just for fun
//cuMemcpyDtoH(Pointer.to(px_out), ptr1, cuSize/2);
cuMemcpyDtoH
(
Pointer
.
to
(
px_out
),
ptr2
,
cuSize
/
2
);
// now I need to init CUdeviceptr from long without allocate
// if I ever get pointer to Tensor in gpu - will use:
//cuMemcpyDtoD();
//cuMemFree(ptr);
// CUDA test end
System
.
out
.
println
(
Arrays
.
toString
(
px_in
));
System
.
out
.
println
(
Arrays
.
toString
(
px_out
));
System
.
out
.
println
(
"Test 1 end\n"
);
System
.
out
.
println
(
"Test 2 start\n - Print default java.library.path"
);
System
.
out
.
println
(
System
.
getProperty
(
"java.library.path"
));
System
.
out
.
println
(
"Test 2 end\n"
);
System
.
out
.
println
(
"Test 3 start\n - Print TF version"
);
System
.
out
.
println
(
TensorFlow
.
version
());
System
.
out
.
println
(
"Test 3 end\n"
);
System
.
out
.
println
(
"Test 4 start\n - Test simple custom JNI function added to TF"
);
System
.
out
.
println
(
TensorFlow
.
elphelVersion
());
System
.
out
.
println
(
"Test 4 end\n"
);
try
(
Graph
g
=
new
Graph
())
{
final
String
value
=
"Hello from "
+
TensorFlow
.
version
();
GPUOptions
gpuOptions
=
GPUOptions
.
newBuilder
().
setPerProcessGpuMemoryFraction
(
0.01
).
build
();
// Create a config that will dump out device placement of operations.
ConfigProto
config
=
ConfigProto
.
newBuilder
().
setLogDevicePlacement
(
true
)
.
setAllowSoftPlacement
(
true
)
.
setGpuOptions
(
gpuOptions
)
.
build
();
Operation
x
=
g
.
opBuilder
(
"Placeholder"
,
"array_tensor_in"
)
.
setAttr
(
"dtype"
,
DataType
.
INT32
)
.
build
();
Operation
z
=
g
.
opBuilder
(
"Identity"
,
"array_tensor_out"
)
.
addInput
(
x
.
output
(
0
))
.
build
();
Tensor
t
=
Tensor
.
create
(
px_in
);
System
.
out
.
println
(
"Is CUDA tensor? "
+
String
.
valueOf
(
t
.
elphel_isCUDATensor
()));
System
.
out
.
println
(
t
.
elphelTestCUDAPointer
());
try
(
Session
s
=
new
Session
(
g
,
config
.
toByteArray
());
// Generally, there may be multiple output tensors,
// all of them must be closed to prevent resource leaks.
Tensor
output
=
s
.
runner
().
fetch
(
"array_tensor_out"
).
feed
(
"array_tensor_in"
,
t
).
run
().
get
(
0
);
){
System
.
out
.
println
(
output
.
numBytes
());
int
[]
obuf
=
new
int
[
output
.
numBytes
()/
Sizeof
.
INT
];
output
.
copyTo
(
obuf
);
System
.
out
.
println
(
Arrays
.
toString
(
obuf
));
}
/*
// Construct the computation graph with a single operation, a constant
// named "MyConst" with a value "value".
try (Tensor t = Tensor.create(value.getBytes("UTF-8"))) {
// The Java API doesn't yet include convenience functions for adding operations.
g.opBuilder("Const", "MyConst").setAttr("dtype", t.dataType()).setAttr("value", t).build();
}
try (Tensor t2 = Tensor.create(value.getBytes("UTF-8"))) {
g.opBuilder("Placeholder", "MyPlaceholder").setAttr("dtype", DataType.FLOAT)
.setAttr("shape", Shape.unknown())
.build()
.output(0);
}
// Execute the "MyConst" operation in a Session.
try (Session s = new Session(g, config.toByteArray());
// Generally, there may be multiple output tensors,
// all of them must be closed to prevent resource leaks.
Tensor output = s.runner().fetch("MyConst").run().get(0)) {
System.out.println(new String(output.bytesValue(), "UTF-8"));
}
*/
}
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment