Commit 54f134b1 authored by Andrey Filippov's avatar Andrey Filippov

CLAUDE: add tp_proc_set_corr_indices_td (TD->PD re-upload for JNA CUAS)

Inverse of tp_proc_get_corr_indices/get_corr_td: uploads host-built per-pair TD
corr indices + data back to the device (gpu_corr_indices / gpu_num_corr_tiles /
gpu_corrs_td, pitched) and sets last_num_corr_tiles so the following
corr2d_normalize + get_corr2d use the right count.

Backs GpuQuad.setCorrIndicesTdData (TDCorrTile.convertTDtoPD) on the JNA
rectilinear CUAS path, which previously fell through to base JCuda and NPE'd on
a null device pointer. Buffers are sized num_pairs*ntiles (num_pairs=3 for the
rectilinear config), giving ample headroom for the selected-tile count.
Co-Authored-By: 's avatarClaude Opus 4.8 (1M context) <noreply@anthropic.com>
parent 2aa4e6e0
...@@ -878,6 +878,19 @@ int tp_proc_get_corr_td(TpProc* p, float* out){ if(!p||!p->have_corr)return -1; ...@@ -878,6 +878,19 @@ int tp_proc_get_corr_td(TpProc* p, float* out){ if(!p||!p->have_corr)return -1;
int w=4*8*8; int w=4*8*8;
return cudaMemcpy2D(out,(size_t)w*sizeof(float), p->gpu_corrs_td, p->dstride_corr_td, return cudaMemcpy2D(out,(size_t)w*sizeof(float), p->gpu_corrs_td, p->dstride_corr_td,
(size_t)w*sizeof(float), p->last_num_corr_tiles, cudaMemcpyDeviceToHost)==cudaSuccess?0:-2; } (size_t)w*sizeof(float), p->last_num_corr_tiles, cudaMemcpyDeviceToHost)==cudaSuccess?0:-2; }
// Upload host-built TD per-pair corr indices + data (inverse of get_corr_indices/get_corr_td): the TD->PD
// re-conversion path (GpuQuad.setCorrIndicesTdData / TDCorrTile.convertTDtoPD). corr_indices(int) ->
// gpu_corr_indices, num_tiles -> gpu_num_corr_tiles + last_num_corr_tiles (so normalize/get use this count),
// fdata (num_tiles x 256 floats) -> pitched gpu_corrs_td. By Claude on 06/26/2026.
int tp_proc_set_corr_indices_td(TpProc* p, const int* corr_indices, const float* fdata, int num_tiles){
if(!p||!p->have_corr) return -1; cuCtxSetCurrent(p->mod->ctx);
if(cudaMemcpy(p->gpu_corr_indices, corr_indices, (size_t)num_tiles*sizeof(int), cudaMemcpyHostToDevice)!=cudaSuccess) return -2;
if(cudaMemcpy(p->gpu_num_corr_tiles, &num_tiles, sizeof(int), cudaMemcpyHostToDevice)!=cudaSuccess) return -3;
int w=4*8*8; // corr_size_td = 4*DTT_SIZE^2 = 256 floats/tile
if(cudaMemcpy2D(p->gpu_corrs_td, p->dstride_corr_td, fdata, (size_t)w*sizeof(float),
(size_t)w*sizeof(float), num_tiles, cudaMemcpyHostToDevice)!=cudaSuccess) return -4;
p->last_num_corr_tiles = num_tiles;
return 0; }
// zero the per-pair pixel-domain correlation buffer (before accumulation) // zero the per-pair pixel-domain correlation buffer (before accumulation)
int tp_proc_erase_corrs(TpProc* p){ if(!p||!p->have_corr)return -1; cuCtxSetCurrent(p->mod->ctx); int tp_proc_erase_corrs(TpProc* p){ if(!p||!p->have_corr)return -1; cuCtxSetCurrent(p->mod->ctx);
return cudaMemset2D(p->gpu_corrs, p->dstride_corr, 0, (size_t)p->corr_length*sizeof(float), return cudaMemset2D(p->gpu_corrs, p->dstride_corr, 0, (size_t)p->corr_length*sizeof(float),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment