diff options
| author | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2021-11-16 12:19:07 +0100 | 
|---|---|---|
| committer | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2021-11-16 14:08:57 +0100 | 
| commit | 4ee4f090a8eb6dc3dd9ee4be254d70a2e7f213f1 (patch) | |
| tree | 05bf29ce288b24d52f2a40b40bc1b64ae75aeee0 | |
| parent | 7f5a50d5b142fe8aeea22754b9895d1fae25e662 (diff) | |
Remove unnecessary costly syncs in FFT
| -rw-r--r-- | cuda/2d/fft.cu | 10 | 
1 files changed, 8 insertions, 2 deletions
diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 413f3aa..e72ee85 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -232,7 +232,10 @@ bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource,  		const float * pfSourceLocation = _pfDevRealSource + iProjectionIndex * _iSourcePitch;  		float * pfTargetLocation = pfDevRealFFTSource + iProjectionIndex * _iFFTRealDetectorCount; -		SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice)); +		if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaFFT memcpy")) { +			cudaFree(pfDevRealFFTSource); +			return false; +		}  	}  	bool bResult = invokeCudaFFT(_iProjectionCount, _iFFTRealDetectorCount, @@ -274,7 +277,10 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,  		const float * pfSourceLocation = pfDevRealFFTTarget + iProjectionIndex * _iFFTRealDetectorCount;  		float* pfTargetLocation = _pfRealTarget + iProjectionIndex * _iTargetPitch; -		SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice)); +		if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaIFFT memcpy")) { +			cudaFree(pfDevRealFFTTarget); +			return false; +		}  	}  	SAFE_CALL(cudaFree(pfDevRealFFTTarget));  | 
