55
int dims[2] = { ps->fft_real_size, ps->fft_real_size };
57
cufft_err = cufftPlanMany(&ps->cufft_r2c_plan, 2, dims, NULL, 1, 0, NULL, 1, 0, CUFFT_R2C, CP_BLOCK);
58
#else /* CUDA_BATCH_FFT */
54
59
cufft_err = cufftPlan2d(&ps->cufft_r2c_plan, ps->fft_real_size, ps->fft_real_size, CUFFT_R2C);
60
#endif /* CUDA_BATCH_FFT */
56
62
reportError("Problem initializing c2r plan, cufft code: %i", cufft_err);
57
63
return DICT_ERROR_CUFFT;
67
cufft_err = cufftPlanMany(&ps->cufft_c2r_plan, 2, dims, NULL, 1, 0, NULL, 1, 0, CUFFT_C2R, CP_BLOCK);
68
#else /* CUDA_BATCH_FFT */
60
69
cufft_err = cufftPlan2d(&ps->cufft_c2r_plan, ps->fft_real_size, ps->fft_real_size, CUFFT_C2R);
70
#endif /* CUDA_BATCH_FFT */
62
72
reportError("Problem initializing r2c plan, cufft code: %i", cufft_err);
63
73
cufftDestroy(ps->cufft_r2c_plan);
493
503
lsum_temp + (2 * lsum_step), lsum_temp + (3 * lsum_step),
494
504
lsum_temp, lsum_temp + lsum_step);
506
#ifndef CUDA_BATCH_FFT
496
507
cufftExecR2C(ps->cufft_r2c_plan, cuda_base_buffer + i * alloc_size, cache + i * alloc_size);
508
#endif /* ! CUDA_BATCH_FFT */
512
#ifdef CUDA_BATCH_FFT
513
cufftExecR2C(ps->cufft_r2c_plan, cuda_base_buffer, cache);
514
#endif /* CUDA_BATCH_FFT */
817
835
// Performing FFT's
818
836
cufftComplex *cuda_fft_buffer = ((cufftComplex*)ps->cuda_temp_buffer) + alloc_size;
838
#ifdef CUDA_BATCH_FFT
839
cufftExecR2C(ps->cufft_r2c_plan, cuda_data_buffer, cuda_fft_buffer);
840
#else /* CUDA_BATCH_FFT */
820
841
for (int i = 0;i < ncp;i++) {
821
842
if (banlist[i]) continue;
822
843
cufftExecR2C(ps->cufft_r2c_plan, cuda_data_buffer + i * alloc_size, cuda_fft_buffer + i * alloc_size);
845
#endif /* CUDA_BATCH_FFT */
847
#ifdef CUDA_BATCH_FFT
848
// No interleave in current version
849
int complex_blocks = calc_blocks(fft_real_size * fft_real_size, SIDE_BLOCK_SIZE);
850
#else /* CUDA_BATCH_FFT */
825
851
int complex_blocks = calc_blocks(fft_real_size * (fft_real_size / 2 + 1), SIDE_BLOCK_SIZE);
852
#endif /* CUDA_BATCH_FFT */
826
854
dim3 complex_grid_dim(complex_blocks, cp_blocks, 1);
827
855
vecMul<<<complex_grid_dim,block_side_cp>>>(cuda_fft_buffer, ps->cuda_fft_cache + cache_icp * alloc_size, alloc_size, fft_real_size/2+1);
829
857
// First in-place transform for some reason is failing, therefore we
830
858
// have one alloc_size spacing between starts (see cuda_fft_buffer set above)
831
859
cufftReal *cuda_result_buffer = (cufftReal*)ps->cuda_temp_buffer;
861
#ifdef CUDA_BATCH_FFT
862
cufftExecC2R(ps->cufft_c2r_plan, cuda_fft_buffer, cuda_result_buffer);
863
#else /* CUDA_BATCH_FFT */
832
864
for (int i = 0;i < ncp;i++) {
833
865
if (banlist[i]) continue;
834
866
cufftExecC2R(ps->cufft_c2r_plan, cuda_fft_buffer + i * alloc_size, cuda_result_buffer + i * alloc_size);
868
#endif /* CUDA_BATCH_FFT */