initial

f76d82f2 · Oleg Dzhimiev · f76d82f2 · f76d82f2
Commit f76d82f2 authored Mar 10, 2020 by Oleg Dzhimiev
Hide whitespace changes
Inline Side-by-side

Showing with 247 additions and 0 deletions

pom.xml pom.xml +31 -0

tfhello.java src/main/java/tfhello.java +216 -0

No files found.
--- a/pom.xml
+++ b/pom.xml
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>com.elphel</groupId>
+  <artifactId>tfhello</artifactId>
+  <version>0.0.1-SNAPSHOT</version>
+  <name>tfhello</name>
+  <dependencies>
+	<dependency>
+	  <groupId>org.tensorflow</groupId>
+	  <artifactId>libtensorflow</artifactId>
+	  <version>1.15.0</version>
+	</dependency>
+	<dependency>
+	  <groupId>org.tensorflow</groupId>
+	  <artifactId>libtensorflow_jni_gpu</artifactId>
+	  <version>1.15.0</version>
+	</dependency>
+	<!-- https://mvnrepository.com/artifact/org.tensorflow/proto -->
+	<dependency>
+	    <groupId>org.tensorflow</groupId>
+	    <artifactId>proto</artifactId>
+	    <version>1.15.0</version>
+	</dependency>
+    <dependency>
+        <groupId>org.jcuda</groupId>
+        <artifactId>jcuda</artifactId>
+        <version>10.0.0</version>
+    </dependency>
+  </dependencies>
+</project>
\ No newline at end of file
--- a/src/main/java/tfhello.java
+++ b/src/main/java/tfhello.java
+import org.tensorflow.Graph;
+import org.tensorflow.Session;
+import org.tensorflow.Tensor;
+import org.tensorflow.TensorFlow;
+import org.tensorflow.Shape;
+import org.tensorflow.DataType;
+import org.tensorflow.Operation;
+import org.tensorflow.framework.ConfigProto;
+import org.tensorflow.framework.GPUOptions;
+import static jcuda.driver.JCudaDriver.cuCtxCreate;
+import static jcuda.driver.JCudaDriver.cuCtxSynchronize;
+import static jcuda.driver.JCudaDriver.cuDeviceGet;
+import static jcuda.driver.JCudaDriver.cuInit;
+import static jcuda.driver.JCudaDriver.cuLaunchKernel;
+import static jcuda.driver.JCudaDriver.cuMemAlloc;
+import static jcuda.driver.JCudaDriver.cuMemFree;
+import static jcuda.driver.JCudaDriver.cuMemcpyDtoH;
+import static jcuda.driver.JCudaDriver.cuMemcpyHtoD;
+import static jcuda.driver.JCudaDriver.cuMemcpyDtoD;
+import static jcuda.driver.JCudaDriver.cuModuleGetFunction;
+import static jcuda.driver.JCudaDriver.cuModuleLoadData;
+import java.util.Arrays;
+import jcuda.NativePointerObject;
+import jcuda.Pointer;
+import jcuda.Sizeof;
+import jcuda.driver.CUdeviceptr;
+import jcuda.driver.CUcontext;
+import jcuda.driver.CUdevice;
+import jcuda.driver.JCudaDriver;
+import jcuda.nvrtc.JNvrtc;
+import java.nio.ByteBuffer;
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+public class tfhello{
+	// from here: https://stackoverflow.com/questions/31993759/how-can-i-create-a-struct-of-native-pointers-in-jcuda
+	// it works
+	private static long getPointerAddress(CUdeviceptr p)
+	{
+	    // WORKAROUND until a method like CUdeviceptr#getAddress exists
+	    class PointerWithAddress extends Pointer
+	    {
+	        PointerWithAddress(Pointer other)
+	        {
+	            super(other);
+	        }
+	        long getAddress()
+	        {
+	            return getNativePointer() + getByteOffset();
+	        }
+	    }
+	    return new PointerWithAddress(p).getAddress();
+	}
+	// use reflection to set CUdeviceptr address:
+	// CUdeviceptr <- Pointer <- NativePointerObject (private nativePointer)
+	public static CUdeviceptr longToCUdeviceptr(long addr){
+		CUdeviceptr ptr = new CUdeviceptr();
+		try {
+			Field f = ptr.getClass().getSuperclass().getSuperclass().getDeclaredField("nativePointer");
+			f.setAccessible(true);
+			f.setLong(ptr, addr);
+		}catch(Exception NoSuchField){
+			System.out.println("ERROR: There is no private field 'nativePointer' in CUdeviceptr class.");
+		}
+		return ptr;
+	}
+	public static void main(String[] args) throws Exception{
+		// CUDA test start
+		int size = 32;
+		int cuSize = size*Sizeof.INT;
+		int[] px_in = new int[size];
+		int[] px_out = new int[size];
+		for(int i = 0; i < px_in.length; i++) {
+            px_in[i] = i+1;
+        }
+		JCudaDriver.setExceptionsEnabled(true);
+        JNvrtc.setExceptionsEnabled(true);
+        // init CUDA
+        cuInit(0);
+        CUdevice device = new CUdevice();
+        cuDeviceGet(device, 0);
+        CUcontext context = new CUcontext();
+        cuCtxCreate(context, 0, device);
+        // init CUDA done
+        System.out.println("Test 1 start");
+        System.out.println("  - create CUdeviceptr ptr1, allocate device memory");
+        System.out.println("  - get ptr1 address - 4 bytes as long");
+        System.out.println("  - create CUdeviceptr ptr2, set address to ptr1 address");
+        System.out.println("  - copy host array to device ptr1");
+        System.out.println("  - copy device data from ptr2 back to host");
+		CUdeviceptr ptr1 = new CUdeviceptr();
+		cuMemAlloc(ptr1,cuSize);
+		System.out.println("CUdeviceptr ptr1 after cuMemAlloc: "+ptr1);
+		// we will get pointer address as 'long' from JNI
+		long ptr1_addr = getPointerAddress(ptr1);
+		System.out.println("Extracted ptr1 address as (long): "+String.format("0x%08x", ptr1_addr));
+		// notice, there's no cuMemAlloc
+		CUdeviceptr ptr2 = longToCUdeviceptr(ptr1_addr);
+		System.out.println("CUdeviceptr ptr2 created from ptr1's long address using java.reflection: "+ptr2);
+		// test: copy a test array px_in to CUDA
+		cuMemcpyHtoD(ptr1, Pointer.to(px_in), cuSize);
+		// test: copy back only a half of it, just for fun
+		//cuMemcpyDtoH(Pointer.to(px_out), ptr1, cuSize/2);
+		cuMemcpyDtoH(Pointer.to(px_out), ptr2, cuSize/2);
+		// now I need to init CUdeviceptr from long without allocate
+		// if I ever get pointer to Tensor in gpu - will use:
+		//cuMemcpyDtoD();
+		//cuMemFree(ptr);
+		// CUDA test end
+		System.out.println(Arrays.toString(px_in));
+		System.out.println(Arrays.toString(px_out));
+		System.out.println("Test 1 end\n");
+		System.out.println("Test 2 start\n  - Print default java.library.path");
+		System.out.println(System.getProperty("java.library.path"));
+		System.out.println("Test 2 end\n");
+		System.out.println("Test 3 start\n  - Print TF version");
+		System.out.println(TensorFlow.version());
+		System.out.println("Test 3 end\n");
+		System.out.println("Test 4 start\n  - Test simple custom JNI function added to TF");
+		System.out.println(TensorFlow.elphelVersion());
+		System.out.println("Test 4 end\n");
+	    try (Graph g = new Graph()) {
+	        final String value = "Hello from " + TensorFlow.version();
+	        GPUOptions gpuOptions = GPUOptions.newBuilder().setPerProcessGpuMemoryFraction(0.01).build();
+	        // Create a config that will dump out device placement of operations.
+	        ConfigProto config = ConfigProto.newBuilder().setLogDevicePlacement(true)
+	        											 .setAllowSoftPlacement(true)
+	        		                                     .setGpuOptions(gpuOptions)
+	        		                                     .build();
+	        Operation x = g.opBuilder("Placeholder", "array_tensor_in")
+     		       		   .setAttr("dtype",DataType.INT32)
+     		       		   .build();
+	        Operation z = g.opBuilder("Identity", "array_tensor_out")
+	        		       .addInput(x.output(0))
+	        		       .build();
+	        Tensor t = Tensor.create(px_in);
+	        System.out.println("Is CUDA tensor? "+String.valueOf(t.elphel_isCUDATensor()));
+	        System.out.println(t.elphelTestCUDAPointer());
+	        try (
+	        	Session s = new Session(g, config.toByteArray());
+		        // Generally, there may be multiple output tensors,
+		        // all of them must be closed to prevent resource leaks.
+		        Tensor output = s.runner().fetch("array_tensor_out").feed("array_tensor_in", t).run().get(0);
+		    ){
+	        	System.out.println(output.numBytes());
+	        	int[] obuf = new int[output.numBytes()/Sizeof.INT];
+	        	output.copyTo(obuf);
+	        	System.out.println(Arrays.toString(obuf));
+		    }
+	        /*
+	        // Construct the computation graph with a single operation, a constant
+	        // named "MyConst" with a value "value".
+	        try (Tensor t = Tensor.create(value.getBytes("UTF-8"))) {
+	          // The Java API doesn't yet include convenience functions for adding operations.
+	          g.opBuilder("Const", "MyConst").setAttr("dtype", t.dataType()).setAttr("value", t).build();
+	        }
+	        try (Tensor t2 = Tensor.create(value.getBytes("UTF-8"))) {
+	        	g.opBuilder("Placeholder", "MyPlaceholder").setAttr("dtype", DataType.FLOAT)
+		         .setAttr("shape", Shape.unknown())
+		         .build()
+		         .output(0);
+		    }
+	        // Execute the "MyConst" operation in a Session.
+	        try (Session s = new Session(g, config.toByteArray());
+	            // Generally, there may be multiple output tensors,
+	            // all of them must be closed to prevent resource leaks.
+	            Tensor output = s.runner().fetch("MyConst").run().get(0)) {
+	          System.out.println(new String(output.bytesValue(), "UTF-8"));
+	        }
+	        */
+	    }
+	}
+}
\ No newline at end of file