From 3a6769465bee7d56d0ddff36613b886446421e07 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 16 Apr 2014 11:12:55 +0000 Subject: Remove padding in 2D cuda in favour of Border mode --- cuda/2d/sart.cu | 60 +++++++++++++++++++++++++++------------------------------ 1 file changed, 28 insertions(+), 32 deletions(-) (limited to 'cuda/2d/sart.cu') diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu index a40176d..7f499ce 100644 --- a/cuda/2d/sart.cu +++ b/cuda/2d/sart.cu @@ -39,14 +39,13 @@ $Id$ namespace astraCUDA { - +// FIXME: Remove these functions. (Outdated) __global__ void devMUL_SART(float* pfOut, const float* pfIn, unsigned int pitch, unsigned int width) { unsigned int x = threadIdx.x + 16*blockIdx.x; if (x >= width) return; - // Copy result down and left one pixel. - pfOut[x + pitch] = pfOut[x + 1] * pfIn[x + 1]; + pfOut[x] *= pfIn[x]; } void MUL_SART(float* pfOut, const float* pfIn, unsigned int pitch, unsigned int width) @@ -106,18 +105,15 @@ void SART::reset() bool SART::init() { if (useVolumeMask) { - allocateVolume(D_tmpData, dims.iVolWidth+2, dims.iVolHeight+2, tmpPitch); - zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth+2, dims.iVolHeight+2); + allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch); + zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight); } - // HACK: D_projData consists of two lines. The first is used padded, - // the second unpadded. This is to satisfy the alignment requirements - // of resp. FP and BP_SART. - allocateVolume(D_projData, dims.iProjDets+2, 2, projPitch); - zeroVolume(D_projData, projPitch, dims.iProjDets+2, 1); + allocateVolume(D_projData, dims.iProjDets, 1, projPitch); + zeroVolume(D_projData, projPitch, dims.iProjDets, 1); - allocateVolume(D_lineWeight, dims.iProjDets+2, dims.iProjAngles, linePitch); - zeroVolume(D_lineWeight, linePitch, dims.iProjDets+2, dims.iProjAngles); + allocateVolume(D_lineWeight, dims.iProjDets, dims.iProjAngles, linePitch); + zeroVolume(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles); // We can't precompute lineWeights when using a mask if (!useVolumeMask) @@ -142,23 +138,23 @@ bool SART::setProjectionOrder(int* _projectionOrder, int _projectionCount) bool SART::precomputeWeights() { - zeroVolume(D_lineWeight, linePitch, dims.iProjDets+2, dims.iProjAngles); + zeroVolume(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles); if (useVolumeMask) { callFP(D_maskData, maskPitch, D_lineWeight, linePitch, 1.0f); } else { // Allocate tmpData temporarily - allocateVolume(D_tmpData, dims.iVolWidth+2, dims.iVolHeight+2, tmpPitch); - zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth+2, dims.iVolHeight+2); + allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch); + zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight); - processVol(D_tmpData, 1.0f, tmpPitch, dims.iVolWidth, dims.iVolHeight); + processVol(D_tmpData, 1.0f, tmpPitch, dims.iVolWidth, dims.iVolHeight); callFP(D_tmpData, tmpPitch, D_lineWeight, linePitch, 1.0f); cudaFree(D_tmpData); D_tmpData = 0; } - processVol(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles); + processVol(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles); return true; } @@ -181,12 +177,12 @@ bool SART::iterate(unsigned int iterations) } // copy one line of sinogram to projection data - cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData + angle*sinoPitch, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets+2), 1, cudaMemcpyDeviceToDevice); + cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData + angle*sinoPitch, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), 1, cudaMemcpyDeviceToDevice); // do FP, subtracting projection from sinogram if (useVolumeMask) { - cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth+2), dims.iVolHeight+2, cudaMemcpyDeviceToDevice); - processVol(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight); + cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice); + processVol(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight); callFP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, -1.0f); } else { callFP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, -1.0f); @@ -197,17 +193,17 @@ bool SART::iterate(unsigned int iterations) if (useVolumeMask) { // BP, mask, and add back // TODO: Try putting the masking directly in the BP - zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth+2, dims.iVolHeight+2); + zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight); callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle); - processVol(D_volumeData, D_maskData, D_tmpData, volumePitch, dims.iVolWidth, dims.iVolHeight); + processVol(D_volumeData, D_maskData, D_tmpData, volumePitch, dims.iVolWidth, dims.iVolHeight); } else { callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle); } if (useMinConstraint) - processVol(D_volumeData, fMinConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight); + processVol(D_volumeData, fMinConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight); if (useMaxConstraint) - processVol(D_volumeData, fMaxConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight); + processVol(D_volumeData, fMaxConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight); iteration++; @@ -220,16 +216,16 @@ float SART::computeDiffNorm() { unsigned int pPitch; float *D_p; - allocateVolume(D_p, dims.iProjDets+2, dims.iProjAngles, pPitch); - zeroVolume(D_p, pPitch, dims.iProjDets+2, dims.iProjAngles); + allocateVolume(D_p, dims.iProjDets, dims.iProjAngles, pPitch); + zeroVolume(D_p, pPitch, dims.iProjDets, dims.iProjAngles); // copy sinogram to D_p - cudaMemcpy2D(D_p, sizeof(float)*pPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets+2), dims.iProjAngles, cudaMemcpyDeviceToDevice); + cudaMemcpy2D(D_p, sizeof(float)*pPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice); // do FP, subtracting projection from sinogram if (useVolumeMask) { - cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth+2), dims.iVolHeight+2, cudaMemcpyDeviceToDevice); - processVol(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight); + cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice); + processVol(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight); callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f); } else { callFP(D_volumeData, volumePitch, D_projData, projPitch, -1.0f); @@ -237,7 +233,7 @@ float SART::computeDiffNorm() // compute norm of D_p - float s = dotProduct2D(D_p, pPitch, dims.iProjDets, dims.iProjAngles, 1, 0); + float s = dotProduct2D(D_p, pPitch, dims.iProjDets, dims.iProjAngles); cudaFree(D_p); @@ -267,11 +263,11 @@ bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch, { if (angles) { assert(!fanProjs); - return BP_SART(D_volumeData, volumePitch, D_projData + projPitch, projPitch, + return BP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, dims, angles, TOffsets); } else { assert(fanProjs); - return FanBP_SART(D_volumeData, volumePitch, D_projData + projPitch, projPitch, + return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, dims, fanProjs); } -- cgit v1.2.3