Commit 33827862 authored by Andrey Filippov's avatar Andrey Filippov

CLAUDE: tp_proc_get_corr_indices/combo_indices/corr_td + erase_corrs (oracle TD-corr readback)

Native readback of gpu_corr_indices / gpu_corrs_combo_indices / gpu_corrs_td (DtoH) and cudaMemset2D
erase of gpu_corrs, for GpuQuadJna's getCorrIndices/getCorrComboIndices/getCorrTdData/eraseGpuCorrs
(oracle TD-correlation path).
Co-Authored-By: 's avatarClaude Opus 4.8 (1M context) <noreply@anthropic.com>
parent 0399a26d
...@@ -859,6 +859,21 @@ int tp_proc_get_corr2d(TpProc* p, float* out, int corr_rad){ if(!p||!p->have_cor ...@@ -859,6 +859,21 @@ int tp_proc_get_corr2d(TpProc* p, float* out, int corr_rad){ if(!p||!p->have_cor
int tp_proc_num_corr_tiles(TpProc* p){ return p?p->last_num_corr_tiles:-1; } int tp_proc_num_corr_tiles(TpProc* p){ return p?p->last_num_corr_tiles:-1; }
int tp_proc_num_corr_combo(TpProc* p){ return p?p->last_num_corr_combo:-1; } int tp_proc_num_corr_combo(TpProc* p){ return p?p->last_num_corr_combo:-1; }
// ---- oracle TD-correlation readback / erase (getCorrIndices/getCorrTdData/getCorrComboIndices/eraseGpuCorrs) ----
int tp_proc_get_corr_indices(TpProc* p, int* out, int n){ if(!p||!p->have_corr)return -1; cuCtxSetCurrent(p->mod->ctx);
return cudaMemcpy(out, p->gpu_corr_indices, (size_t)n*sizeof(int), cudaMemcpyDeviceToHost)==cudaSuccess?0:-2; }
int tp_proc_get_corr_combo_indices(TpProc* p, int* out, int n){ if(!p||!p->have_corr)return -1; cuCtxSetCurrent(p->mod->ctx);
return cudaMemcpy(out, p->gpu_corrs_combo_indices, (size_t)n*sizeof(int), cudaMemcpyDeviceToHost)==cudaSuccess?0:-2; }
// de-pitch gpu_corrs_td (TD per-pair, 4*dtt^2=256 floats/tile, last_num_corr_tiles rows)
int tp_proc_get_corr_td(TpProc* p, float* out){ if(!p||!p->have_corr)return -1; cuCtxSetCurrent(p->mod->ctx);
int w=4*8*8;
return cudaMemcpy2D(out,(size_t)w*sizeof(float), p->gpu_corrs_td, p->dstride_corr_td,
(size_t)w*sizeof(float), p->last_num_corr_tiles, cudaMemcpyDeviceToHost)==cudaSuccess?0:-2; }
// zero the per-pair pixel-domain correlation buffer (before accumulation)
int tp_proc_erase_corrs(TpProc* p){ if(!p||!p->have_corr)return -1; cuCtxSetCurrent(p->mod->ctx);
return cudaMemset2D(p->gpu_corrs, p->dstride_corr, 0, (size_t)p->corr_length*sizeof(float),
(size_t)p->num_pairs*p->tilesx*p->tilesy)==cudaSuccess?0:-2; }
void tp_proc_destroy(TpProc* p); // fwd void tp_proc_destroy(TpProc* p); // fwd
// Validate the persistent TpProc convert path end-to-end (file-driven), comparing CLT to // Validate the persistent TpProc convert path end-to-end (file-driven), comparing CLT to
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment