7
#include "normxcorr_hw.h"
9
int dictGetCorrelations(DICTContext ps, int icp, float *res) {
10
int size = ps->fft_size;
11
int size2 = size * size;
13
cufftReal *cuda_result_buffer = (cufftReal*)ps->cuda_temp_buffer;
14
float *cuda_final_buffer = cuda_result_buffer + CP_BLOCK * ps->fft_alloc_size;
15
cudaMemcpy(res, cuda_final_buffer, size2*sizeof(cufftReal), cudaMemcpyDeviceToHost);
20
int dictGetCorrections(DICTContext ps, float *res_x, float *res_y) {
22
int ncp_alloc = ps->ncp_alloc_size;
24
float *move_x = ps->points + 6 * ncp_alloc;
25
float *move_y = move_x + ncp_alloc;
28
move_x, ncp_alloc * sizeof(float),
29
ps->cuda_points, ncp_alloc * sizeof(float),
30
ps->ncp * sizeof(float), 2,
31
cudaMemcpyDeviceToHost
34
memcpy(res_x, move_x, ncp * sizeof(float));
35
memcpy(res_y, move_y, ncp * sizeof(float));
40
int dictGetLocalSum(DICTContext ps, int icp, float *lsum, float *denom) {
41
int size = ps->fft_size;
42
int size2 = size*size;
43
int alloc_size = ps->fft_alloc_size;
46
cudaMemcpy(lsum, ps->cuda_lsum_cache + icp * alloc_size, size2*sizeof(float), cudaMemcpyDeviceToHost);
50
cudaMemcpy(denom, ps->cuda_denom_cache + icp * alloc_size, size2*sizeof(float), cudaMemcpyDeviceToHost);