summaryrefslogtreecommitdiffstats
path: root/cuda/2d/sart.cu
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <WillemJan.Palenstijn@uantwerpen.be>2014-04-16 11:12:55 +0000
committerwpalenst <WillemJan.Palenstijn@uantwerpen.be>2014-04-16 11:12:55 +0000
commit3a6769465bee7d56d0ddff36613b886446421e07 (patch)
tree624e85c5d6a4ab19c958a388e3436219693a6296 /cuda/2d/sart.cu
parent4dfb881ceb82b07630437e952dec62323977ab56 (diff)
downloadastra-3a6769465bee7d56d0ddff36613b886446421e07.tar.gz
astra-3a6769465bee7d56d0ddff36613b886446421e07.tar.bz2
astra-3a6769465bee7d56d0ddff36613b886446421e07.tar.xz
astra-3a6769465bee7d56d0ddff36613b886446421e07.zip
Remove padding in 2D cuda in favour of Border mode
Diffstat (limited to 'cuda/2d/sart.cu')
-rw-r--r--cuda/2d/sart.cu60
1 files changed, 28 insertions, 32 deletions
diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu
index a40176d..7f499ce 100644
--- a/cuda/2d/sart.cu
+++ b/cuda/2d/sart.cu
@@ -39,14 +39,13 @@ $Id$
namespace astraCUDA {
-
+// FIXME: Remove these functions. (Outdated)
__global__ void devMUL_SART(float* pfOut, const float* pfIn, unsigned int pitch, unsigned int width)
{
unsigned int x = threadIdx.x + 16*blockIdx.x;
if (x >= width) return;
- // Copy result down and left one pixel.
- pfOut[x + pitch] = pfOut[x + 1] * pfIn[x + 1];
+ pfOut[x] *= pfIn[x];
}
void MUL_SART(float* pfOut, const float* pfIn, unsigned int pitch, unsigned int width)
@@ -106,18 +105,15 @@ void SART::reset()
bool SART::init()
{
if (useVolumeMask) {
- allocateVolume(D_tmpData, dims.iVolWidth+2, dims.iVolHeight+2, tmpPitch);
- zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth+2, dims.iVolHeight+2);
+ allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch);
+ zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
}
- // HACK: D_projData consists of two lines. The first is used padded,
- // the second unpadded. This is to satisfy the alignment requirements
- // of resp. FP and BP_SART.
- allocateVolume(D_projData, dims.iProjDets+2, 2, projPitch);
- zeroVolume(D_projData, projPitch, dims.iProjDets+2, 1);
+ allocateVolume(D_projData, dims.iProjDets, 1, projPitch);
+ zeroVolume(D_projData, projPitch, dims.iProjDets, 1);
- allocateVolume(D_lineWeight, dims.iProjDets+2, dims.iProjAngles, linePitch);
- zeroVolume(D_lineWeight, linePitch, dims.iProjDets+2, dims.iProjAngles);
+ allocateVolume(D_lineWeight, dims.iProjDets, dims.iProjAngles, linePitch);
+ zeroVolume(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles);
// We can't precompute lineWeights when using a mask
if (!useVolumeMask)
@@ -142,23 +138,23 @@ bool SART::setProjectionOrder(int* _projectionOrder, int _projectionCount)
bool SART::precomputeWeights()
{
- zeroVolume(D_lineWeight, linePitch, dims.iProjDets+2, dims.iProjAngles);
+ zeroVolume(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles);
if (useVolumeMask) {
callFP(D_maskData, maskPitch, D_lineWeight, linePitch, 1.0f);
} else {
// Allocate tmpData temporarily
- allocateVolume(D_tmpData, dims.iVolWidth+2, dims.iVolHeight+2, tmpPitch);
- zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth+2, dims.iVolHeight+2);
+ allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch);
+ zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
- processVol<opSet, VOL>(D_tmpData, 1.0f, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opSet>(D_tmpData, 1.0f, tmpPitch, dims.iVolWidth, dims.iVolHeight);
callFP(D_tmpData, tmpPitch, D_lineWeight, linePitch, 1.0f);
cudaFree(D_tmpData);
D_tmpData = 0;
}
- processVol<opInvert, SINO>(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles);
+ processVol<opInvert>(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles);
return true;
}
@@ -181,12 +177,12 @@ bool SART::iterate(unsigned int iterations)
}
// copy one line of sinogram to projection data
- cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData + angle*sinoPitch, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets+2), 1, cudaMemcpyDeviceToDevice);
+ cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData + angle*sinoPitch, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), 1, cudaMemcpyDeviceToDevice);
// do FP, subtracting projection from sinogram
if (useVolumeMask) {
- cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth+2), dims.iVolHeight+2, cudaMemcpyDeviceToDevice);
- processVol<opMul, VOL>(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
+ processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
callFP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, -1.0f);
} else {
callFP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, -1.0f);
@@ -197,17 +193,17 @@ bool SART::iterate(unsigned int iterations)
if (useVolumeMask) {
// BP, mask, and add back
// TODO: Try putting the masking directly in the BP
- zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth+2, dims.iVolHeight+2);
+ zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle);
- processVol<opAddMul, VOL>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opAddMul>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims.iVolWidth, dims.iVolHeight);
} else {
callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle);
}
if (useMinConstraint)
- processVol<opClampMin, VOL>(D_volumeData, fMinConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opClampMin>(D_volumeData, fMinConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight);
if (useMaxConstraint)
- processVol<opClampMax, VOL>(D_volumeData, fMaxConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opClampMax>(D_volumeData, fMaxConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight);
iteration++;
@@ -220,16 +216,16 @@ float SART::computeDiffNorm()
{
unsigned int pPitch;
float *D_p;
- allocateVolume(D_p, dims.iProjDets+2, dims.iProjAngles, pPitch);
- zeroVolume(D_p, pPitch, dims.iProjDets+2, dims.iProjAngles);
+ allocateVolume(D_p, dims.iProjDets, dims.iProjAngles, pPitch);
+ zeroVolume(D_p, pPitch, dims.iProjDets, dims.iProjAngles);
// copy sinogram to D_p
- cudaMemcpy2D(D_p, sizeof(float)*pPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets+2), dims.iProjAngles, cudaMemcpyDeviceToDevice);
+ cudaMemcpy2D(D_p, sizeof(float)*pPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice);
// do FP, subtracting projection from sinogram
if (useVolumeMask) {
- cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth+2), dims.iVolHeight+2, cudaMemcpyDeviceToDevice);
- processVol<opMul, VOL>(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
+ processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f);
} else {
callFP(D_volumeData, volumePitch, D_projData, projPitch, -1.0f);
@@ -237,7 +233,7 @@ float SART::computeDiffNorm()
// compute norm of D_p
- float s = dotProduct2D(D_p, pPitch, dims.iProjDets, dims.iProjAngles, 1, 0);
+ float s = dotProduct2D(D_p, pPitch, dims.iProjDets, dims.iProjAngles);
cudaFree(D_p);
@@ -267,11 +263,11 @@ bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch,
{
if (angles) {
assert(!fanProjs);
- return BP_SART(D_volumeData, volumePitch, D_projData + projPitch, projPitch,
+ return BP_SART(D_volumeData, volumePitch, D_projData, projPitch,
angle, dims, angles, TOffsets);
} else {
assert(fanProjs);
- return FanBP_SART(D_volumeData, volumePitch, D_projData + projPitch, projPitch,
+ return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch,
angle, dims, fanProjs);
}