From e0b3ad8e57f269e34085ba319aa399ee3476811a Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 16 Apr 2014 11:13:46 +0000 Subject: Replace direct cudaMemcpy2D calls by utility functions --- cuda/2d/astra.cu | 2 +- cuda/2d/cgls.cu | 8 ++++---- cuda/2d/em.cu | 4 ++-- cuda/2d/sart.cu | 8 ++++---- cuda/2d/sirt.cu | 8 ++++---- cuda/2d/util.cu | 10 +++++++++- cuda/2d/util.h | 4 ++++ 7 files changed, 28 insertions(+), 16 deletions(-) (limited to 'cuda/2d') diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index 15e487c..f4d4717 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -612,7 +612,7 @@ float BPalgo::computeDiffNorm() allocateProjectionData(D_projData, projPitch, dims); - cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*dims.iProjDets, dims.iProjAngles, cudaMemcpyDeviceToDevice); + duplicateProjectionData(D_projData, D_sinoData, sinoPitch, dims); callFP(D_volumeData, volumePitch, D_projData, projPitch, -1.0f); float s = dotProduct2D(D_projData, projPitch, dims.iProjDets, dims.iProjAngles); diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu index fce8beb..066ac5d 100644 --- a/cuda/2d/cgls.cu +++ b/cuda/2d/cgls.cu @@ -120,12 +120,12 @@ bool CGLS::iterate(unsigned int iterations) if (!sliceInitialized) { // copy sinogram - cudaMemcpy2D(D_r, sizeof(float)*rPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice); + duplicateProjectionData(D_r, D_sinoData, sinoPitch, dims); // r = sino - A*x if (useVolumeMask) { // Use z as temporary storage here since it is unused - cudaMemcpy2D(D_z, sizeof(float)*zPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice); + duplicateVolumeData(D_z, D_volumeData, volumePitch, dims); processVol(D_z, D_maskData, zPitch, dims); callFP(D_z, zPitch, D_r, rPitch, -1.0f); } else { @@ -189,11 +189,11 @@ float CGLS::computeDiffNorm() // used outside of iterations. // copy sinogram to w - cudaMemcpy2D(D_w, sizeof(float)*wPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice); + duplicateProjectionData(D_w, D_sinoData, sinoPitch, dims); // do FP, subtracting projection from sinogram if (useVolumeMask) { - cudaMemcpy2D(D_z, sizeof(float)*zPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice); + duplicateVolumeData(D_z, D_volumeData, volumePitch, dims); processVol(D_z, D_maskData, zPitch, dims); callFP(D_z, zPitch, D_w, wPitch, -1.0f); } else { diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu index c75f250..ebb76b5 100644 --- a/cuda/2d/em.cu +++ b/cuda/2d/em.cu @@ -150,11 +150,11 @@ bool EM::iterate(unsigned int iterations) float EM::computeDiffNorm() { // copy sinogram to projection data - cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice); + duplicateProjectionData(D_projData, D_sinoData, sinoPitch, dims); // do FP, subtracting projection from sinogram if (useVolumeMask) { - cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice); + duplicateVolumeData(D_tmpData, D_volumeData, volumePitch, dims); processVol(D_tmpData, D_maskData, tmpPitch, dims); callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f); } else { diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu index 048661f..64d6f28 100644 --- a/cuda/2d/sart.cu +++ b/cuda/2d/sart.cu @@ -180,11 +180,11 @@ bool SART::iterate(unsigned int iterations) } // copy one line of sinogram to projection data - cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData + angle*sinoPitch, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), 1, cudaMemcpyDeviceToDevice); + duplicateProjectionData(D_projData, D_sinoData, sinoPitch, dims); // do FP, subtracting projection from sinogram if (useVolumeMask) { - cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice); + duplicateVolumeData(D_tmpData, D_volumeData, volumePitch, dims); processVol(D_tmpData, D_maskData, tmpPitch, dims); callFP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, -1.0f); } else { @@ -223,11 +223,11 @@ float SART::computeDiffNorm() zeroProjectionData(D_p, pPitch, dims); // copy sinogram to D_p - cudaMemcpy2D(D_p, sizeof(float)*pPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice); + duplicateProjectionData(D_p, D_sinoData, sinoPitch, dims); // do FP, subtracting projection from sinogram if (useVolumeMask) { - cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice); + duplicateVolumeData(D_tmpData, D_volumeData, volumePitch, dims); processVol(D_tmpData, D_maskData, tmpPitch, dims); callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f); } else { diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu index c402864..d34a180 100644 --- a/cuda/2d/sirt.cu +++ b/cuda/2d/sirt.cu @@ -191,11 +191,11 @@ bool SIRT::iterate(unsigned int iterations) for (unsigned int iter = 0; iter < iterations && !shouldAbort; ++iter) { // copy sinogram to projection data - cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice); + duplicateProjectionData(D_projData, D_sinoData, projPitch, dims); // do FP, subtracting projection from sinogram if (useVolumeMask) { - cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice); + duplicateVolumeData(D_tmpData, D_volumeData, volumePitch, dims); processVol(D_tmpData, D_maskData, tmpPitch, dims); callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f); } else { @@ -226,11 +226,11 @@ bool SIRT::iterate(unsigned int iterations) float SIRT::computeDiffNorm() { // copy sinogram to projection data - cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice); + duplicateProjectionData(D_projData, D_sinoData, projPitch, dims); // do FP, subtracting projection from sinogram if (useVolumeMask) { - cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice); + duplicateVolumeData(D_tmpData, D_volumeData, volumePitch, dims); processVol(D_tmpData, D_maskData, tmpPitch, dims); callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f); } else { diff --git a/cuda/2d/util.cu b/cuda/2d/util.cu index 8d3b625..dba70d9 100644 --- a/cuda/2d/util.cu +++ b/cuda/2d/util.cu @@ -129,6 +129,15 @@ void zeroProjectionData(float* D_ptr, unsigned int pitch, const SDimensions& dim zeroVolume(D_ptr, pitch, dims.iProjDets, dims.iProjAngles); } +void duplicateVolumeData(float* D_dst, float* D_src, unsigned int pitch, const SDimensions& dims) +{ + cudaMemcpy2D(D_dst, sizeof(float)*pitch, D_src, sizeof(float)*pitch, sizeof(float)*dims.iVolWidth, dims.iVolHeight, cudaMemcpyDeviceToDevice); +} + +void duplicateProjectionData(float* D_dst, float* D_src, unsigned int pitch, const SDimensions& dims) +{ + cudaMemcpy2D(D_dst, sizeof(float)*pitch, D_src, sizeof(float)*pitch, sizeof(float)*dims.iProjDets, dims.iProjAngles, cudaMemcpyDeviceToDevice); +} template __global__ void reduce1D(float *g_idata, float *g_odata, unsigned int n) @@ -206,7 +215,6 @@ __global__ void reduce2D(float *g_idata, float *g_odata, float dotProduct2D(float* D_data, unsigned int pitch, unsigned int width, unsigned int height) { -#warning FIX MEMORY ORDER unsigned int bx = (width + 15) / 16; unsigned int by = (height + 127) / 128; unsigned int shared_mem2 = sizeof(float) * 16 * 16; diff --git a/cuda/2d/util.h b/cuda/2d/util.h index 83cb794..c0ec49e 100644 --- a/cuda/2d/util.h +++ b/cuda/2d/util.h @@ -80,6 +80,10 @@ bool allocateProjectionData(float*& D_ptr, unsigned int& pitch, const SDimension void zeroVolumeData(float* D_ptr, unsigned int pitch, const SDimensions& dims); void zeroProjectionData(float* D_ptr, unsigned int pitch, const SDimensions& dims); +void duplicateVolumeData(float* D_dst, float* D_src, unsigned int pitch, const SDimensions& dims); +void duplicateProjectionData(float* D_dst, float* D_src, unsigned int pitch, const SDimensions& dims); + + bool cudaTextForceKernelsCompletion(); void reportCudaError(cudaError_t err); -- cgit v1.2.3