87
static void fftInit(TProcessingState *ps) {
105
static int fftInit(TProcessingState *ps) {
106
cufftResult cufft_err;
91
112
// cufftPlan2d(&ps->cufft_plan, ps->fft_size, ps->fft_size, CUFFT_C2C);
93
cufftPlan2d(&ps->cufft_r2c_plan, ps->fft_size, ps->fft_size, CUFFT_R2C);
94
cufftPlan2d(&ps->cufft_c2r_plan, ps->fft_size, ps->fft_size, CUFFT_C2R);
114
cufft_err = cufftPlan2d(&ps->cufft_r2c_plan, ps->fft_size, ps->fft_size, CUFFT_R2C);
116
reportError("Problem initializing c2r plan, cufft code: %i", cufft_err);
120
cufft_err = cufftPlan2d(&ps->cufft_c2r_plan, ps->fft_size, ps->fft_size, CUFFT_C2R);
122
reportError("Problem initializing r2c plan, cufft code: %i", cufft_err);
123
cufftDestroy(ps->cufft_r2c_plan);
96
127
ps->fft_initialized = true;
98
cudaMalloc((void**)&ps->cuda_base_buffer, ps->ncp * ps->fft_alloc_size * sizeof(cufftComplex));
100
cudaMalloc((void**)&ps->cuda_data_buffer, ps->fft_alloc_size * sizeof(cufftComplex));
101
cudaMalloc((void**)&ps->cuda_input_buffer, ps->ncp * ps->fft_alloc_size * sizeof(uint8_t));
102
cudaMalloc((void**)&ps->cuda_final_buffer, ps->ncp * ps->fft_alloc_size * sizeof(float));
129
cuda_err = cudaMalloc((void**)&ps->cuda_base_buffer, ps->ncp * ps->fft_alloc_size * sizeof(cufftComplex));
131
reportError("Device memory allocation of %u*%u*cufftComplex bytes for cuda_base_buffer is failed", ps->ncp, ps->fft_alloc_size);
133
return ERROR_CUDA_MALLOC;
136
cuda_err = cudaMalloc((void**)&ps->cuda_data_buffer, ps->fft_alloc_size * sizeof(cufftComplex));
138
reportError("Device memory allocation of %u*cufftComplex bytes for cuda_data_buffer is failed", ps->fft_alloc_size);
140
return ERROR_CUDA_MALLOC;
143
cuda_err = cudaMalloc((void**)&ps->cuda_input_buffer, ps->ncp * ps->fft_alloc_size * sizeof(uint8_t));
145
reportError("Device memory allocation of %u*%u*uint8 bytes for cuda_input_buffer is failed", ps->ncp, ps->fft_alloc_size);
147
return ERROR_CUDA_MALLOC;
150
cuda_err = cudaMalloc((void**)&ps->cuda_final_buffer, ps->ncp * ps->fft_alloc_size * sizeof(float));
152
reportError("Device memory allocation of %u*%u*float bytes for cuda_final_buffer is failed", ps->ncp, ps->fft_alloc_size);
154
return ERROR_CUDA_MALLOC;
103
156
cudaMemset((void*)ps->cuda_final_buffer, 0, ps->ncp * ps->fft_alloc_size * sizeof(float));
104
cudaMalloc((void**)&ps->cuda_result_buffer, ps->fft_alloc_size * sizeof(cufftReal));
105
cudaMalloc((void**)&ps->cuda_temp_buffer, ps->fft_alloc_size * sizeof(cufftReal));
158
cuda_err = cudaMalloc((void**)&ps->cuda_result_buffer, ps->fft_alloc_size * sizeof(cufftReal));
160
reportError("Device memory allocation of %u*cufftReal bytes for cuda_result_buffer is failed", ps->fft_alloc_size);
162
return ERROR_CUDA_MALLOC;
165
cuda_err = cudaMalloc((void**)&ps->cuda_temp_buffer, ps->fft_alloc_size * sizeof(cufftReal));
167
reportError("Device memory allocation of %u*cufftReal bytes for cuda_temp_buffer is failed", ps->fft_alloc_size);
169
return ERROR_CUDA_MALLOC;
106
171
cudaMemset((void*)ps->cuda_temp_buffer, 0, ps->fft_alloc_size * sizeof(cufftReal));
108
cudaMalloc((void**)&ps->cuda_lsum_buffer, ps->ncp * ps->fft_alloc_size * sizeof(float));
109
cudaMalloc((void**)&ps->cuda_denom_buffer, ps->ncp * ps->fft_alloc_size * sizeof(float));
110
cudaMalloc((void**)&ps->cuda_nonzero_buffer, ps->ncp * ps->fft_alloc_size * sizeof(uint16_t));
173
cuda_err = cudaMalloc((void**)&ps->cuda_lsum_buffer, ps->ncp * ps->fft_alloc_size * sizeof(float));
175
reportError("Device memory allocation of %u*%u*float bytes for cuda_lsum_buffer is failed", ps->ncp, ps->fft_alloc_size);
177
return ERROR_CUDA_MALLOC;
180
cuda_err = cudaMalloc((void**)&ps->cuda_denom_buffer, ps->ncp * ps->fft_alloc_size * sizeof(float));
182
reportError("Device memory allocation of %u*%u*float bytes for cuda_denom_buffer is failed", ps->ncp, ps->fft_alloc_size);
184
return ERROR_CUDA_MALLOC;
187
cuda_err = cudaMalloc((void**)&ps->cuda_nonzero_buffer, ps->ncp * ps->fft_alloc_size * sizeof(uint16_t));
189
reportError("Device memory allocation of %u*%u*uint16 bytes for cuda_nonzero_buffer is failed", ps->ncp, ps->fft_alloc_size);
191
return ERROR_CUDA_MALLOC;
111
193
cudaMemset((void*)ps->cuda_nonzero_buffer, 0, ps->ncp * ps->fft_alloc_size * sizeof(uint16_t));
113
195
ps->cuda_nonzero_items = (uint16_t*)malloc(ps->ncp * sizeof(uint16_t));
196
if (!ps->cuda_nonzero_items) {
197
reportError("Host memory allocation of %u*uint16 bytes for cuda_nonzero_items is failed", ps->ncp);
115
202
ps->grid_size = (int*)malloc(ps->ncp*sizeof(int));
203
if (!ps->grid_size) {
204
reportError("Host memory allocation of %u*int bytes for grid_size is failed", ps->ncp);
118
212
static void fftPrepare(TProcessingState *ps) {