Commit 8b3dd30b authored by Andrey Filippov's avatar Andrey Filippov

CLAUDE: gpuTrace(-Dtp.trace=1) on uncertain GpuQuad methods — JCuda/JNA oracle comparison

Add GpuQuad.gpuTrace(m) printing "[GPUTRACE] "+getClass().getSimpleName()+"."+m (off unless
-Dtp.trace=1). Instrument the un-overridden GPU methods (potential oracle gaps): getCltData,
presentCltData, eraseGpuCorrs, execCorr2D (bundled), readbackTasks, setFullFrameImages, getCorrTdData,
getCorrIndices, getCorrComboIndices, getExtra, getTextureIndices, getRBGA, execRBGA, execTextures.
Since GpuQuadJna extends GpuQuad, the trace prints "GpuQuad.X" under JCuda and "GpuQuadJna.X" if a JNA
run falls through to one (= coverage gap) -> reveals oracle's real GPU usage before any NPE.
Co-Authored-By: 's avatarClaude Opus 4.8 (1M context) <noreply@anthropic.com>
parent 31772785
...@@ -490,6 +490,12 @@ public class GpuQuad{ // quad camera description ...@@ -490,6 +490,12 @@ public class GpuQuad{ // quad camera description
texture_stride_rgba = (int)(device_stride[0] / Sizeof.FLOAT); texture_stride_rgba = (int)(device_stride[0] / Sizeof.FLOAT);
} }
// Diagnostic trace (set -Dtp.trace=1). getClass().getSimpleName() prints "GpuQuad" under JCuda and
// "GpuQuadJna" if a JNA run falls through to an un-overridden base method -> reveals oracle's actual
// GPU-method usage (run JCuda first) and any JNA coverage gap, before an NPE. By Claude 2026-06-25.
public static boolean GPU_TRACE = "1".equals(System.getProperty("tp.trace"));
protected void gpuTrace(String m) { if (GPU_TRACE) System.out.println("[GPUTRACE] " + getClass().getSimpleName() + "." + m); }
// Backend selector (architecture B). Default JCUDA; set -Dtp.backend=jna to use the native // Backend selector (architecture B). Default JCUDA; set -Dtp.backend=jna to use the native
// (libtileproc.so via JNA) backend GpuQuadJna instead. JNA mode never initializes JCuda. // (libtileproc.so via JNA) backend GpuQuadJna instead. JNA mode never initializes JCuda.
// Validate by running the same workflow both ways and diffing saved outputs. // Validate by running the same workflow both ways and diffing saved outputs.
...@@ -995,6 +1001,7 @@ public class GpuQuad{ // quad camera description ...@@ -995,6 +1001,7 @@ public class GpuQuad{ // quad camera description
int num_sensors, int num_sensors,
boolean use_aux // while is it in class member? - just to be able to free boolean use_aux // while is it in class member? - just to be able to free
) { ) {
gpuTrace("readbackTasks");
num_task_tiles = num_tasks; num_task_tiles = num_tasks;
TpTask [] tile_tasks = new TpTask[num_task_tiles]; TpTask [] tile_tasks = new TpTask[num_task_tiles];
int task_size = getTaskSize(); int task_size = getTaskSize();
...@@ -1060,6 +1067,7 @@ public class GpuQuad{ // quad camera description ...@@ -1060,6 +1067,7 @@ public class GpuQuad{ // quad camera description
public int [] getTextureIndices() public int [] getTextureIndices()
{ {
gpuTrace("getTextureIndices");
float [] ftexture_indices_len = new float[1]; float [] ftexture_indices_len = new float[1];
cuMemcpyDtoH(Pointer.to(ftexture_indices_len), gpu_texture_indices_len, 1 * Sizeof.FLOAT); cuMemcpyDtoH(Pointer.to(ftexture_indices_len), gpu_texture_indices_len, 1 * Sizeof.FLOAT);
int num_tiles = Float.floatToIntBits(ftexture_indices_len[0]); int num_tiles = Float.floatToIntBits(ftexture_indices_len[0]);
...@@ -1234,6 +1242,7 @@ public class GpuQuad{ // quad camera description ...@@ -1234,6 +1242,7 @@ public class GpuQuad{ // quad camera description
*/ */
public float [][] getCltData( // only for color=0 public float [][] getCltData( // only for color=0
boolean use_ref){ boolean use_ref){
gpuTrace("getCltData");
CUdeviceptr [] gpu_sel_clt_h = use_ref ? gpu_clt_ref_h : gpu_clt_h; CUdeviceptr [] gpu_sel_clt_h = use_ref ? gpu_clt_ref_h : gpu_clt_h;
int [] wh = use_ref ? gpu_clt_ref_wh : gpu_clt_wh; int [] wh = use_ref ? gpu_clt_ref_wh : gpu_clt_wh;
int tilesX = wh[0] / GPUTileProcessor.DTT_SIZE; int tilesX = wh[0] / GPUTileProcessor.DTT_SIZE;
...@@ -1257,6 +1266,7 @@ public class GpuQuad{ // quad camera description ...@@ -1257,6 +1266,7 @@ public class GpuQuad{ // quad camera description
} }
public float [][] presentCltData(boolean use_ref){ public float [][] presentCltData(boolean use_ref){
gpuTrace("presentCltData");
float [][] fclt = getCltData(use_ref); float [][] fclt = getCltData(use_ref);
float [][] pfclt = new float [fclt.length][fclt[0].length]; float [][] pfclt = new float [fclt.length][fclt[0].length];
int [] wh = use_ref ? gpu_clt_ref_wh : gpu_clt_wh; int [] wh = use_ref ? gpu_clt_ref_wh : gpu_clt_wh;
...@@ -1473,6 +1483,7 @@ public class GpuQuad{ // quad camera description ...@@ -1473,6 +1483,7 @@ public class GpuQuad{ // quad camera description
int corr_mask, // which correlation pairs to generate (maybe later - reduce size from 15x15) int corr_mask, // which correlation pairs to generate (maybe later - reduce size from 15x15)
// final int threadsMax, // maximal number of threads to launch // final int threadsMax, // maximal number of threads to launch
final int debugLevel) { final int debugLevel) {
gpuTrace("setFullFrameImages");
int tilesX = img_width / GPUTileProcessor.DTT_SIZE; int tilesX = img_width / GPUTileProcessor.DTT_SIZE;
int tilesY = img_height / GPUTileProcessor.DTT_SIZE; int tilesY = img_height / GPUTileProcessor.DTT_SIZE;
float [] target_disparities = new float [tilesX * tilesY]; float [] target_disparities = new float [tilesX * tilesY];
...@@ -2392,6 +2403,7 @@ public class GpuQuad{ // quad camera description ...@@ -2392,6 +2403,7 @@ public class GpuQuad{ // quad camera description
double [] scales, double [] scales,
double fat_zero, double fat_zero,
int corr_radius) { int corr_radius) {
gpuTrace("execCorr2D");
if (this.gpuTileProcessor.GPU_CORRELATE2D_kernel == null) if (this.gpuTileProcessor.GPU_CORRELATE2D_kernel == null)
{ {
IJ.showMessage("Error", "No GPU kernel: GPU_CORRELATE2D_kernel"); IJ.showMessage("Error", "No GPU kernel: GPU_CORRELATE2D_kernel");
...@@ -2692,6 +2704,7 @@ public class GpuQuad{ // quad camera description ...@@ -2692,6 +2704,7 @@ public class GpuQuad{ // quad camera description
double min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) double min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
boolean dust_remove, boolean dust_remove,
int keep_weights) { int keep_weights) {
gpuTrace("execRBGA");
if (GPUTileProcessor.USE_DS_DP) { if (GPUTileProcessor.USE_DS_DP) {
execRBGA_DP( execRBGA_DP(
color_weights, // double [] color_weights, color_weights, // double [] color_weights,
...@@ -3261,6 +3274,7 @@ public class GpuQuad{ // quad camera description ...@@ -3261,6 +3274,7 @@ public class GpuQuad{ // quad camera description
boolean calc_extra, boolean calc_extra,
boolean linescan_order boolean linescan_order
) { ) {
gpuTrace("execTextures");
if (GPUTileProcessor.USE_DS_DP) { if (GPUTileProcessor.USE_DS_DP) {
execTextures_DP( execTextures_DP(
color_weights, color_weights,
...@@ -3983,6 +3997,7 @@ public class GpuQuad{ // quad camera description ...@@ -3983,6 +3997,7 @@ public class GpuQuad{ // quad camera description
} }
public int [] getCorrIndices() { public int [] getCorrIndices() {
gpuTrace("getCorrIndices");
int [] inum_corrs = new int[1]; int [] inum_corrs = new int[1];
cuMemcpyDtoH(Pointer.to(inum_corrs), gpu_num_corr_tiles, 1 * Sizeof.INT); cuMemcpyDtoH(Pointer.to(inum_corrs), gpu_num_corr_tiles, 1 * Sizeof.INT);
int num_corrs = inum_corrs[0]; int num_corrs = inum_corrs[0];
...@@ -3994,6 +4009,7 @@ public class GpuQuad{ // quad camera description ...@@ -3994,6 +4009,7 @@ public class GpuQuad{ // quad camera description
public float [] getCorrTdData(){ public float [] getCorrTdData(){
gpuTrace("getCorrTdData");
int corr_size_td = 4 * GPUTileProcessor.DTT_SIZE * GPUTileProcessor.DTT_SIZE; int corr_size_td = 4 * GPUTileProcessor.DTT_SIZE * GPUTileProcessor.DTT_SIZE;
float [] cpu_corrs = new float [ num_corr_tiles * corr_size_td]; float [] cpu_corrs = new float [ num_corr_tiles * corr_size_td];
CUDA_MEMCPY2D copyD2H = new CUDA_MEMCPY2D(); CUDA_MEMCPY2D copyD2H = new CUDA_MEMCPY2D();
...@@ -4040,6 +4056,7 @@ public class GpuQuad{ // quad camera description ...@@ -4040,6 +4056,7 @@ public class GpuQuad{ // quad camera description
public int [] getCorrComboIndices() { public int [] getCorrComboIndices() {
gpuTrace("getCorrComboIndices");
// float [] fnum_corrs = new float[1]; // float [] fnum_corrs = new float[1];
// cuMemcpyDtoH(Pointer.to(fnum_corrs), gpu_num_corr_tiles, 1 * Sizeof.FLOAT); // cuMemcpyDtoH(Pointer.to(fnum_corrs), gpu_num_corr_tiles, 1 * Sizeof.FLOAT);
// int num_corrs = Float.floatToIntBits(fnum_corrs[0]); // int num_corrs = Float.floatToIntBits(fnum_corrs[0]);
...@@ -4077,6 +4094,7 @@ public class GpuQuad{ // quad camera description ...@@ -4077,6 +4094,7 @@ public class GpuQuad{ // quad camera description
public float [][] getCorr2D(int corr_rad){ public float [][] getCorr2D(int corr_rad){
gpuTrace("eraseGpuCorrs");
int corr_size = (2 * corr_rad + 1) * (2 * corr_rad + 1); int corr_size = (2 * corr_rad + 1) * (2 * corr_rad + 1);
float [] cpu_corrs = new float [ num_corr_tiles * corr_size]; float [] cpu_corrs = new float [ num_corr_tiles * corr_size];
CUDA_MEMCPY2D copyD2H = new CUDA_MEMCPY2D(); CUDA_MEMCPY2D copyD2H = new CUDA_MEMCPY2D();
...@@ -4132,6 +4150,7 @@ public class GpuQuad{ // quad camera description ...@@ -4132,6 +4150,7 @@ public class GpuQuad{ // quad camera description
* @return [ num_cams*(num_colors+1)][tilesX*tilesY] array for macro generation * @return [ num_cams*(num_colors+1)][tilesX*tilesY] array for macro generation
*/ */
public float [][] getExtra(){ public float [][] getExtra(){
gpuTrace("getExtra");
int [] texture_indices = getTextureIndices(); int [] texture_indices = getTextureIndices();
int num_tile_extra = num_cams*(num_colors+1); int num_tile_extra = num_cams*(num_colors+1);
float [] diff_rgb_combo = new float[texture_indices.length * num_tile_extra]; float [] diff_rgb_combo = new float[texture_indices.length * num_tile_extra];
...@@ -4181,6 +4200,7 @@ public class GpuQuad{ // quad camera description ...@@ -4181,6 +4200,7 @@ public class GpuQuad{ // quad camera description
copy_rbga.srcXInBytes = 4 * Sizeof.FLOAT; copy_rbga.srcXInBytes = 4 * Sizeof.FLOAT;
for (int ncol = 0; ncol<= num_colors; ncol++ ) { for (int ncol = 0; ncol<= num_colors; ncol++ ) {
gpuTrace("getRBGA");
copy_rbga.dstHost = Pointer.to(rslt[ncol]); copy_rbga.dstHost = Pointer.to(rslt[ncol]);
copy_rbga.srcY = 4 + (woi.height +GPUTileProcessor.DTT_SIZE) * ncol; copy_rbga.srcY = 4 + (woi.height +GPUTileProcessor.DTT_SIZE) * ncol;
cuMemcpy2D(copy_rbga); // run copy cuMemcpy2D(copy_rbga); // run copy
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment