Commit f76d82f2 authored by Oleg Dzhimiev's avatar Oleg Dzhimiev

initial

parents
Pipeline #1545 canceled with stages
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.elphel</groupId>
<artifactId>tfhello</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>tfhello</name>
<dependencies>
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>libtensorflow</artifactId>
<version>1.15.0</version>
</dependency>
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>libtensorflow_jni_gpu</artifactId>
<version>1.15.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.tensorflow/proto -->
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>proto</artifactId>
<version>1.15.0</version>
</dependency>
<dependency>
<groupId>org.jcuda</groupId>
<artifactId>jcuda</artifactId>
<version>10.0.0</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
import org.tensorflow.Graph;
import org.tensorflow.Session;
import org.tensorflow.Tensor;
import org.tensorflow.TensorFlow;
import org.tensorflow.Shape;
import org.tensorflow.DataType;
import org.tensorflow.Operation;
import org.tensorflow.framework.ConfigProto;
import org.tensorflow.framework.GPUOptions;
import static jcuda.driver.JCudaDriver.cuCtxCreate;
import static jcuda.driver.JCudaDriver.cuCtxSynchronize;
import static jcuda.driver.JCudaDriver.cuDeviceGet;
import static jcuda.driver.JCudaDriver.cuInit;
import static jcuda.driver.JCudaDriver.cuLaunchKernel;
import static jcuda.driver.JCudaDriver.cuMemAlloc;
import static jcuda.driver.JCudaDriver.cuMemFree;
import static jcuda.driver.JCudaDriver.cuMemcpyDtoH;
import static jcuda.driver.JCudaDriver.cuMemcpyHtoD;
import static jcuda.driver.JCudaDriver.cuMemcpyDtoD;
import static jcuda.driver.JCudaDriver.cuModuleGetFunction;
import static jcuda.driver.JCudaDriver.cuModuleLoadData;
import java.util.Arrays;
import jcuda.NativePointerObject;
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.driver.CUdeviceptr;
import jcuda.driver.CUcontext;
import jcuda.driver.CUdevice;
import jcuda.driver.JCudaDriver;
import jcuda.nvrtc.JNvrtc;
import java.nio.ByteBuffer;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
public class tfhello{
// from here: https://stackoverflow.com/questions/31993759/how-can-i-create-a-struct-of-native-pointers-in-jcuda
// it works
private static long getPointerAddress(CUdeviceptr p)
{
// WORKAROUND until a method like CUdeviceptr#getAddress exists
class PointerWithAddress extends Pointer
{
PointerWithAddress(Pointer other)
{
super(other);
}
long getAddress()
{
return getNativePointer() + getByteOffset();
}
}
return new PointerWithAddress(p).getAddress();
}
// use reflection to set CUdeviceptr address:
// CUdeviceptr <- Pointer <- NativePointerObject (private nativePointer)
public static CUdeviceptr longToCUdeviceptr(long addr){
CUdeviceptr ptr = new CUdeviceptr();
try {
Field f = ptr.getClass().getSuperclass().getSuperclass().getDeclaredField("nativePointer");
f.setAccessible(true);
f.setLong(ptr, addr);
}catch(Exception NoSuchField){
System.out.println("ERROR: There is no private field 'nativePointer' in CUdeviceptr class.");
}
return ptr;
}
public static void main(String[] args) throws Exception{
// CUDA test start
int size = 32;
int cuSize = size*Sizeof.INT;
int[] px_in = new int[size];
int[] px_out = new int[size];
for(int i = 0; i < px_in.length; i++) {
px_in[i] = i+1;
}
JCudaDriver.setExceptionsEnabled(true);
JNvrtc.setExceptionsEnabled(true);
// init CUDA
cuInit(0);
CUdevice device = new CUdevice();
cuDeviceGet(device, 0);
CUcontext context = new CUcontext();
cuCtxCreate(context, 0, device);
// init CUDA done
System.out.println("Test 1 start");
System.out.println(" - create CUdeviceptr ptr1, allocate device memory");
System.out.println(" - get ptr1 address - 4 bytes as long");
System.out.println(" - create CUdeviceptr ptr2, set address to ptr1 address");
System.out.println(" - copy host array to device ptr1");
System.out.println(" - copy device data from ptr2 back to host");
CUdeviceptr ptr1 = new CUdeviceptr();
cuMemAlloc(ptr1,cuSize);
System.out.println("CUdeviceptr ptr1 after cuMemAlloc: "+ptr1);
// we will get pointer address as 'long' from JNI
long ptr1_addr = getPointerAddress(ptr1);
System.out.println("Extracted ptr1 address as (long): "+String.format("0x%08x", ptr1_addr));
// notice, there's no cuMemAlloc
CUdeviceptr ptr2 = longToCUdeviceptr(ptr1_addr);
System.out.println("CUdeviceptr ptr2 created from ptr1's long address using java.reflection: "+ptr2);
// test: copy a test array px_in to CUDA
cuMemcpyHtoD(ptr1, Pointer.to(px_in), cuSize);
// test: copy back only a half of it, just for fun
//cuMemcpyDtoH(Pointer.to(px_out), ptr1, cuSize/2);
cuMemcpyDtoH(Pointer.to(px_out), ptr2, cuSize/2);
// now I need to init CUdeviceptr from long without allocate
// if I ever get pointer to Tensor in gpu - will use:
//cuMemcpyDtoD();
//cuMemFree(ptr);
// CUDA test end
System.out.println(Arrays.toString(px_in));
System.out.println(Arrays.toString(px_out));
System.out.println("Test 1 end\n");
System.out.println("Test 2 start\n - Print default java.library.path");
System.out.println(System.getProperty("java.library.path"));
System.out.println("Test 2 end\n");
System.out.println("Test 3 start\n - Print TF version");
System.out.println(TensorFlow.version());
System.out.println("Test 3 end\n");
System.out.println("Test 4 start\n - Test simple custom JNI function added to TF");
System.out.println(TensorFlow.elphelVersion());
System.out.println("Test 4 end\n");
try (Graph g = new Graph()) {
final String value = "Hello from " + TensorFlow.version();
GPUOptions gpuOptions = GPUOptions.newBuilder().setPerProcessGpuMemoryFraction(0.01).build();
// Create a config that will dump out device placement of operations.
ConfigProto config = ConfigProto.newBuilder().setLogDevicePlacement(true)
.setAllowSoftPlacement(true)
.setGpuOptions(gpuOptions)
.build();
Operation x = g.opBuilder("Placeholder", "array_tensor_in")
.setAttr("dtype",DataType.INT32)
.build();
Operation z = g.opBuilder("Identity", "array_tensor_out")
.addInput(x.output(0))
.build();
Tensor t = Tensor.create(px_in);
System.out.println("Is CUDA tensor? "+String.valueOf(t.elphel_isCUDATensor()));
System.out.println(t.elphelTestCUDAPointer());
try (
Session s = new Session(g, config.toByteArray());
// Generally, there may be multiple output tensors,
// all of them must be closed to prevent resource leaks.
Tensor output = s.runner().fetch("array_tensor_out").feed("array_tensor_in", t).run().get(0);
){
System.out.println(output.numBytes());
int[] obuf = new int[output.numBytes()/Sizeof.INT];
output.copyTo(obuf);
System.out.println(Arrays.toString(obuf));
}
/*
// Construct the computation graph with a single operation, a constant
// named "MyConst" with a value "value".
try (Tensor t = Tensor.create(value.getBytes("UTF-8"))) {
// The Java API doesn't yet include convenience functions for adding operations.
g.opBuilder("Const", "MyConst").setAttr("dtype", t.dataType()).setAttr("value", t).build();
}
try (Tensor t2 = Tensor.create(value.getBytes("UTF-8"))) {
g.opBuilder("Placeholder", "MyPlaceholder").setAttr("dtype", DataType.FLOAT)
.setAttr("shape", Shape.unknown())
.build()
.output(0);
}
// Execute the "MyConst" operation in a Session.
try (Session s = new Session(g, config.toByteArray());
// Generally, there may be multiple output tensors,
// all of them must be closed to prevent resource leaks.
Tensor output = s.runner().fetch("MyConst").run().get(0)) {
System.out.println(new String(output.bytesValue(), "UTF-8"));
}
*/
}
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment