summaryrefslogtreecommitdiffstats
path: root/cuda/2d
diff options
context:
space:
mode:
Diffstat (limited to 'cuda/2d')
-rw-r--r--cuda/2d/algo.cu17
-rw-r--r--cuda/2d/arith.cu248
-rw-r--r--cuda/2d/arith.h38
-rw-r--r--cuda/2d/astra.cu29
-rw-r--r--cuda/2d/cgls.cu14
-rw-r--r--cuda/2d/darthelper.cu41
-rw-r--r--cuda/2d/em.cu12
-rw-r--r--cuda/2d/sart.cu14
-rw-r--r--cuda/2d/sirt.cu34
-rw-r--r--cuda/2d/util.cu29
-rw-r--r--cuda/2d/util.h8
11 files changed, 304 insertions, 180 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu
index 333481a..33ca1a3 100644
--- a/cuda/2d/algo.cu
+++ b/cuda/2d/algo.cu
@@ -264,20 +264,18 @@ bool ReconAlgo::copyDataToGPU(const float* pfSinogram, unsigned int iSinogramPit
return false;
bool ok = copySinogramToDevice(pfSinogram, iSinogramPitch,
- dims.iProjDets,
- dims.iProjAngles,
+ dims,
D_sinoData, sinoPitch);
if (!ok)
return false;
// rescale sinogram to adjust for pixel size
- processVol<opMul>(D_sinoData, fSinogramScale,
+ processSino<opMul>(D_sinoData, fSinogramScale,
//1.0f/(fPixelSize*fPixelSize),
- sinoPitch,
- dims.iProjDets, dims.iProjAngles);
+ sinoPitch, dims);
ok = copyVolumeToDevice(pfReconstruction, iReconstructionPitch,
- dims.iVolWidth, dims.iVolHeight,
+ dims,
D_volumeData, volumePitch);
if (!ok)
return false;
@@ -289,7 +287,7 @@ bool ReconAlgo::copyDataToGPU(const float* pfSinogram, unsigned int iSinogramPit
return false;
ok = copyVolumeToDevice(pfVolMask, iVolMaskPitch,
- dims.iVolWidth, dims.iVolHeight,
+ dims,
D_maskData, maskPitch);
if (!ok)
return false;
@@ -300,7 +298,7 @@ bool ReconAlgo::copyDataToGPU(const float* pfSinogram, unsigned int iSinogramPit
return false;
ok = copySinogramToDevice(pfSinoMask, iSinoMaskPitch,
- dims.iProjDets, dims.iProjAngles,
+ dims,
D_smaskData, smaskPitch);
if (!ok)
return false;
@@ -313,8 +311,7 @@ bool ReconAlgo::getReconstruction(float* pfReconstruction,
unsigned int iReconstructionPitch) const
{
bool ok = copyVolumeFromDevice(pfReconstruction, iReconstructionPitch,
- dims.iVolWidth,
- dims.iVolHeight,
+ dims,
D_volumeData, volumePitch);
if (!ok)
return false;
diff --git a/cuda/2d/arith.cu b/cuda/2d/arith.cu
index 42c2c98..9544026 100644
--- a/cuda/2d/arith.cu
+++ b/cuda/2d/arith.cu
@@ -279,55 +279,57 @@ __global__ void devDDFtoD(float* pfOut, const float* pfIn1, const float* pfIn2,
-
template<typename op>
-void processVolCopy(float* out, unsigned int width, unsigned int height)
+void processVolCopy(float* out, const SDimensions& dims)
{
float* D_out;
+ size_t width = dims.iVolWidth;
unsigned int pitch;
- allocateVolume(D_out, width, height, pitch);
- copyVolumeToDevice(out, width, width, height, D_out, pitch);
+ allocateVolumeData(D_out, pitch, dims);
+ copyVolumeToDevice(out, width, dims, D_out, pitch);
- processVol<op>(D_out, pitch, width, height);
+ processVol<op>(D_out, pitch, dims);
- copyVolumeFromDevice(out, width, width, height, D_out, pitch);
+ copyVolumeFromDevice(out, width, dims, D_out, pitch);
cudaFree(D_out);
}
template<typename op>
-void processVolCopy(float* out, float param, unsigned int width, unsigned int height)
+void processVolCopy(float* out, float param, const SDimensions& dims)
{
float* D_out;
+ size_t width = dims.iVolWidth;
unsigned int pitch;
- allocateVolume(D_out, width, height, pitch);
- copyVolumeToDevice(out, width, width, height, D_out, pitch);
+ allocateVolumeData(D_out, pitch, dims);
+ copyVolumeToDevice(out, width, dims, D_out, pitch);
- processVol<op>(D_out, param, pitch, width, height);
+ processVol<op>(D_out, param, pitch, dims);
- copyVolumeFromDevice(out, width, width, height, D_out, pitch);
+ copyVolumeFromDevice(out, width, dims, D_out, pitch);
cudaFree(D_out);
}
template<typename op>
-void processVolCopy(float* out1, float* out2, float param1, float param2, unsigned int width, unsigned int height)
+void processVolCopy(float* out1, float* out2, float param1, float param2, const SDimensions& dims)
{
float* D_out1;
float* D_out2;
+ size_t width = dims.iVolWidth;
unsigned int pitch;
- allocateVolume(D_out1, width, height, pitch);
- copyVolumeToDevice(out1, width, width, height, D_out1, pitch);
- allocateVolume(D_out2, width, height, pitch);
- copyVolumeToDevice(out2, width, width, height, D_out2, pitch);
+ allocateVolumeData(D_out1, pitch, dims);
+ copyVolumeToDevice(out1, width, dims, D_out1, pitch);
+ allocateVolumeData(D_out2, pitch, dims);
+ copyVolumeToDevice(out2, width, dims, D_out2, pitch);
- processVol<op>(D_out1, D_out2, param1, param2, pitch, width, height);
+ processVol<op>(D_out1, D_out2, param1, param2, pitch, dims);
- copyVolumeFromDevice(out1, width, width, height, D_out1, pitch);
- copyVolumeFromDevice(out2, width, width, height, D_out2, pitch);
+ copyVolumeFromDevice(out1, width, dims, D_out1, pitch);
+ copyVolumeFromDevice(out2, width, dims, D_out2, pitch);
cudaFree(D_out1);
cudaFree(D_out2);
@@ -335,63 +337,66 @@ void processVolCopy(float* out1, float* out2, float param1, float param2, unsign
template<typename op>
-void processVolCopy(float* out, const float* in, unsigned int width, unsigned int height)
+void processVolCopy(float* out, const float* in, const SDimensions& dims)
{
float* D_out;
float* D_in;
+ size_t width = dims.iVolWidth;
unsigned int pitch;
- allocateVolume(D_out, width, height, pitch);
- copyVolumeToDevice(out, width, width, height, D_out, pitch);
- allocateVolume(D_in, width, height, pitch);
- copyVolumeToDevice(in, width, width, height, D_in, pitch);
+ allocateVolumeData(D_out, pitch, dims);
+ copyVolumeToDevice(out, width, dims, D_out, pitch);
+ allocateVolumeData(D_in, pitch, dims);
+ copyVolumeToDevice(in, width, dims, D_in, pitch);
- processVol<op>(D_out, D_in, pitch, width, height);
+ processVol<op>(D_out, D_in, pitch, dims);
- copyVolumeFromDevice(out, width, width, height, D_out, pitch);
+ copyVolumeFromDevice(out, width, dims, D_out, pitch);
cudaFree(D_out);
cudaFree(D_in);
}
template<typename op>
-void processVolCopy(float* out, const float* in, float param, unsigned int width, unsigned int height)
+void processVolCopy(float* out, const float* in, float param, const SDimensions& dims)
{
float* D_out;
float* D_in;
+ size_t width = dims.iVolWidth;
unsigned int pitch;
- allocateVolume(D_out, width, height, pitch);
- copyVolumeToDevice(out, width, width, height, D_out, pitch);
- allocateVolume(D_in, width, height, pitch);
- copyVolumeToDevice(in, width, width, height, D_in, pitch);
+ allocateVolumeData(D_out, pitch, dims);
+ copyVolumeToDevice(out, width, dims, D_out, pitch);
+ allocateVolumeData(D_in, pitch, dims);
+ copyVolumeToDevice(in, width, dims, D_in, pitch);
- processVol<op>(D_out, D_in, param, pitch, width, height);
+ processVol<op>(D_out, D_in, param, pitch, dims);
- copyVolumeFromDevice(out, width, width, height, D_out, pitch);
+ copyVolumeFromDevice(out, width, dims, D_out, pitch);
cudaFree(D_out);
cudaFree(D_in);
}
template<typename op>
-void processVolCopy(float* out, const float* in1, const float* in2, unsigned int width, unsigned int height)
+void processVolCopy(float* out, const float* in1, const float* in2, const SDimensions& dims)
{
float* D_out;
float* D_in1;
float* D_in2;
+ size_t width = dims.iVolWidth;
unsigned int pitch;
- allocateVolume(D_out, width, height, pitch);
- copyVolumeToDevice(out, width, width, height, D_out, pitch);
- allocateVolume(D_in1, width, height, pitch);
- copyVolumeToDevice(in1, width, width, height, D_in1, pitch);
- allocateVolume(D_in2, width, height, pitch);
- copyVolumeToDevice(in2, width, width, height, D_in2, pitch);
+ allocateVolumeData(D_out, pitch, dims);
+ copyVolumeToDevice(out, width, dims, D_out, pitch);
+ allocateVolumeData(D_in1, pitch, dims);
+ copyVolumeToDevice(in1, width, dims, D_in1, pitch);
+ allocateVolumeData(D_in2, pitch, dims);
+ copyVolumeToDevice(in2, width, dims, D_in2, pitch);
- processVol<op>(D_out, D_in1, D_in2, pitch, width, height);
+ processVol<op>(D_out, D_in1, D_in2, pitch, dims);
- copyVolumeFromDevice(out, width, width, height, D_out, pitch);
+ copyVolumeFromDevice(out, width, dims, D_out, pitch);
cudaFree(D_out);
cudaFree(D_in1);
@@ -399,23 +404,24 @@ void processVolCopy(float* out, const float* in1, const float* in2, unsigned int
}
template<typename op>
-void processVolCopy(float* out, const float* in1, const float* in2, float param, unsigned int width, unsigned int height)
+void processVolCopy(float* out, const float* in1, const float* in2, float param, const SDimensions& dims)
{
float* D_out;
float* D_in1;
float* D_in2;
+ size_t width = dims.iVolWidth;
unsigned int pitch;
- allocateVolume(D_out, width, height, pitch);
- copyVolumeToDevice(out, width, width, height, D_out, pitch);
- allocateVolume(D_in1, width, height, pitch);
- copyVolumeToDevice(in1, width, width, height, D_in1, pitch);
- allocateVolume(D_in2, width, height, pitch);
- copyVolumeToDevice(in2, width, width, height, D_in2, pitch);
+ allocateVolumeData(D_out, pitch, dims);
+ copyVolumeToDevice(out, width, dims, D_out, pitch);
+ allocateVolumeData(D_in1, pitch, dims);
+ copyVolumeToDevice(in1, width, dims, D_in1, pitch);
+ allocateVolumeData(D_in2, pitch, dims);
+ copyVolumeToDevice(in2, width, dims, D_in2, pitch);
- processVol<op>(D_out, D_in1, D_in2, param, pitch, width, height);
+ processVol<op>(D_out, D_in1, D_in2, param, pitch, dims);
- copyVolumeFromDevice(out, width, width, height, D_out, pitch);
+ copyVolumeFromDevice(out, width, dims, D_out, pitch);
cudaFree(D_out);
cudaFree(D_in1);
@@ -429,9 +435,8 @@ void processVolCopy(float* out, const float* in1, const float* in2, float param,
-
template<typename op>
-void processVol(float* pfOut, unsigned int pitch, unsigned int width, unsigned int height)
+void processData(float* pfOut, unsigned int pitch, unsigned int width, unsigned int height)
{
dim3 blockSize(16,16);
dim3 gridSize((width+15)/16, (height+511)/512);
@@ -442,7 +447,7 @@ void processVol(float* pfOut, unsigned int pitch, unsigned int width, unsigned i
}
template<typename op>
-void processVol(float* pfOut, float fParam, unsigned int pitch, unsigned int width, unsigned int height)
+void processData(float* pfOut, float fParam, unsigned int pitch, unsigned int width, unsigned int height)
{
dim3 blockSize(16,16);
dim3 gridSize((width+15)/16, (height+15)/16);
@@ -453,7 +458,7 @@ void processVol(float* pfOut, float fParam, unsigned int pitch, unsigned int wid
}
template<typename op>
-void processVol(float* pfOut1, float* pfOut2, float fParam1, float fParam2, unsigned int pitch, unsigned int width, unsigned int height)
+void processData(float* pfOut1, float* pfOut2, float fParam1, float fParam2, unsigned int pitch, unsigned int width, unsigned int height)
{
dim3 blockSize(16,16);
dim3 gridSize((width+15)/16, (height+15)/16);
@@ -465,7 +470,7 @@ void processVol(float* pfOut1, float* pfOut2, float fParam1, float fParam2, unsi
template<typename op>
-void processVol(float* pfOut, const float* pfIn, unsigned int pitch, unsigned int width, unsigned int height)
+void processData(float* pfOut, const float* pfIn, unsigned int pitch, unsigned int width, unsigned int height)
{
dim3 blockSize(16,16);
dim3 gridSize((width+15)/16, (height+15)/16);
@@ -476,7 +481,7 @@ void processVol(float* pfOut, const float* pfIn, unsigned int pitch, unsigned in
}
template<typename op>
-void processVol(float* pfOut, const float* pfIn, float fParam, unsigned int pitch, unsigned int width, unsigned int height)
+void processData(float* pfOut, const float* pfIn, float fParam, unsigned int pitch, unsigned int width, unsigned int height)
{
dim3 blockSize(16,16);
dim3 gridSize((width+15)/16, (height+15)/16);
@@ -487,7 +492,7 @@ void processVol(float* pfOut, const float* pfIn, float fParam, unsigned int pitc
}
template<typename op>
-void processVol(float* pfOut, const float* pfIn1, const float* pfIn2, float fParam, unsigned int pitch, unsigned int width, unsigned int height)
+void processData(float* pfOut, const float* pfIn1, const float* pfIn2, float fParam, unsigned int pitch, unsigned int width, unsigned int height)
{
dim3 blockSize(16,16);
dim3 gridSize((width+15)/16, (height+15)/16);
@@ -498,7 +503,7 @@ void processVol(float* pfOut, const float* pfIn1, const float* pfIn2, float fPar
}
template<typename op>
-void processVol(float* pfOut, const float* pfIn1, const float* pfIn2, unsigned int pitch, unsigned int width, unsigned int height)
+void processData(float* pfOut, const float* pfIn1, const float* pfIn2, unsigned int pitch, unsigned int width, unsigned int height)
{
dim3 blockSize(16,16);
dim3 gridSize((width+15)/16, (height+15)/16);
@@ -515,6 +520,96 @@ void processVol(float* pfOut, const float* pfIn1, const float* pfIn2, unsigned i
+template<typename op>
+void processVol(float* out, unsigned int pitch, const SDimensions& dims)
+{
+ processData<op>(out, pitch, dims.iVolWidth, dims.iVolHeight);
+}
+
+template<typename op>
+void processVol(float* out, float param, unsigned int pitch, const SDimensions& dims)
+{
+ processData<op>(out, param, pitch, dims.iVolWidth, dims.iVolHeight);
+}
+
+template<typename op>
+void processVol(float* out1, float* out2, float param1, float param2, unsigned int pitch, const SDimensions& dims)
+{
+ processData<op>(out1, out2, param1, param2, pitch, dims.iVolWidth, dims.iVolHeight);
+}
+
+
+template<typename op>
+void processVol(float* out, const float* in, unsigned int pitch, const SDimensions& dims)
+{
+ processData<op>(out, in, pitch, dims.iVolWidth, dims.iVolHeight);
+}
+
+template<typename op>
+void processVol(float* out, const float* in, float param, unsigned int pitch, const SDimensions& dims)
+{
+ processData<op>(out, in, param, pitch, dims.iVolWidth, dims.iVolHeight);
+}
+
+template<typename op>
+void processVol(float* out, const float* in1, const float* in2, unsigned int pitch, const SDimensions& dims)
+{
+ processData<op>(out, in1, in2, pitch, dims.iVolWidth, dims.iVolHeight);
+}
+
+template<typename op>
+void processVol(float* out, const float* in1, const float* in2, float param, unsigned int pitch, const SDimensions& dims)
+{
+ processData<op>(out, in2, in2, param, pitch, dims.iVolWidth, dims.iVolHeight);
+}
+
+
+
+
+template<typename op>
+void processSino(float* out, unsigned int pitch, const SDimensions& dims)
+{
+ processData<op>(out, pitch, dims.iProjDets, dims.iProjAngles);
+}
+
+template<typename op>
+void processSino(float* out, float param, unsigned int pitch, const SDimensions& dims)
+{
+ processData<op>(out, param, pitch, dims.iProjDets, dims.iProjAngles);
+}
+
+template<typename op>
+void processSino(float* out1, float* out2, float param1, float param2, unsigned int pitch, const SDimensions& dims)
+{
+ processData<op>(out1, out2, param1, param2, pitch, dims.iProjDets, dims.iProjAngles);
+}
+
+
+template<typename op>
+void processSino(float* out, const float* in, unsigned int pitch, const SDimensions& dims)
+{
+ processData<op>(out, in, pitch, dims.iProjDets, dims.iProjAngles);
+}
+
+template<typename op>
+void processSino(float* out, const float* in, float param, unsigned int pitch, const SDimensions& dims)
+{
+ processData<op>(out, in, param, pitch, dims.iProjDets, dims.iProjAngles);
+}
+
+template<typename op>
+void processSino(float* out, const float* in1, const float* in2, unsigned int pitch, const SDimensions& dims)
+{
+ processData<op>(out, in1, in2, pitch, dims.iProjDets, dims.iProjAngles);
+}
+
+template<typename op>
+void processSino(float* out, const float* in1, const float* in2, float param, unsigned int pitch, const SDimensions& dims)
+{
+ processData<op>(out, in2, in2, param, pitch, dims.iProjDets, dims.iProjAngles);
+}
+
+
@@ -808,45 +903,52 @@ void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPit
#define INST_DFtoD(name) \
- template void processVolCopy<name>(float* out, const float* in, float param, unsigned int width, unsigned int height); \
- template void processVol<name>(float* out, const float* in, float param, unsigned int pitch, unsigned int width, unsigned int height); \
+ template void processVolCopy<name>(float* out, const float* in, float param, const SDimensions& dims); \
+ template void processVol<name>(float* out, const float* in, float param, unsigned int pitch, const SDimensions& dims); \
+ template void processSino<name>(float* out, const float* in, float param, unsigned int pitch, const SDimensions& dims); \
template void processVol3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in, float fParam, const SDimensions3D& dims); \
template void processSino3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in, float fParam, const SDimensions3D& dims);
#define INST_DtoD(name) \
- template void processVolCopy<name>(float* out, const float* in, unsigned int width, unsigned int height); \
- template void processVol<name>(float* out, const float* in, unsigned int pitch, unsigned int width, unsigned int height); \
+ template void processVolCopy<name>(float* out, const float* in, const SDimensions& dims); \
+ template void processVol<name>(float* out, const float* in, unsigned int pitch, const SDimensions& dims); \
+ template void processSino<name>(float* out, const float* in, unsigned int pitch, const SDimensions& dims); \
template void processVol3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in, const SDimensions3D& dims); \
template void processSino3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in, const SDimensions3D& dims);
#define INST_DDtoD(name) \
- template void processVolCopy<name>(float* out, const float* in1, const float* in2, unsigned int width, unsigned int height); \
- template void processVol<name>(float* out, const float* in1, const float* in2, unsigned int pitch, unsigned int width, unsigned int height); \
+ template void processVolCopy<name>(float* out, const float* in1, const float* in2, const SDimensions& dims); \
+ template void processVol<name>(float* out, const float* in1, const float* in2, unsigned int pitch, const SDimensions& dims); \
+ template void processSino<name>(float* out, const float* in1, const float* in2, unsigned int pitch, const SDimensions& dims); \
template void processVol3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, const SDimensions3D& dims); \
template void processSino3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, const SDimensions3D& dims);
#define INST_DDFtoD(name) \
- template void processVolCopy<name>(float* out, const float* in1, const float* in2, float fParam, unsigned int width, unsigned int height); \
- template void processVol<name>(float* out, const float* in1, const float* in2, float fParam, unsigned int pitch, unsigned int width, unsigned int height); \
+ template void processVolCopy<name>(float* out, const float* in1, const float* in2, float fParam, const SDimensions& dims); \
+ template void processVol<name>(float* out, const float* in1, const float* in2, float fParam, unsigned int pitch, const SDimensions& dims); \
+ template void processSino<name>(float* out, const float* in1, const float* in2, float fParam, unsigned int pitch, const SDimensions& dims); \
template void processVol3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, float fParam, const SDimensions3D& dims); \
template void processSino3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, float fParam, const SDimensions3D& dims);
#define INST_toD(name) \
- template void processVolCopy<name>(float* out, unsigned int width, unsigned int height); \
- template void processVol<name>(float* out, unsigned int pitch, unsigned int width, unsigned int height); \
+ template void processVolCopy<name>(float* out, const SDimensions& dims); \
+ template void processVol<name>(float* out, unsigned int pitch, const SDimensions& dims); \
+ template void processSino<name>(float* out, unsigned int pitch, const SDimensions& dims); \
template void processVol3D<name>(cudaPitchedPtr& out, const SDimensions3D& dims); \
template void processSino3D<name>(cudaPitchedPtr& out, const SDimensions3D& dims);
#define INST_FtoD(name) \
- template void processVolCopy<name>(float* out, float param, unsigned int width, unsigned int height); \
- template void processVol<name>(float* out, float param, unsigned int pitch, unsigned int width, unsigned int height); \
+ template void processVolCopy<name>(float* out, float param, const SDimensions& dims); \
+ template void processVol<name>(float* out, float param, unsigned int pitch, const SDimensions& dims); \
+ template void processSino<name>(float* out, float param, unsigned int pitch, const SDimensions& dims); \
template void processVol3D<name>(cudaPitchedPtr& out, float param, const SDimensions3D& dims); \
template void processSino3D<name>(cudaPitchedPtr& out, float param, const SDimensions3D& dims);
#define INST_FFtoDD(name) \
- template void processVolCopy<name>(float* out1, float* out2, float fParam1, float fParam2, unsigned int width, unsigned int height); \
- template void processVol<name>(float* out1, float* out2, float fParam1, float fParam2, unsigned int pitch, unsigned int width, unsigned int height); \
+ template void processVolCopy<name>(float* out1, float* out2, float fParam1, float fParam2, const SDimensions& dims); \
+ template void processVol<name>(float* out1, float* out2, float fParam1, float fParam2, unsigned int pitch, const SDimensions& dims); \
+ template void processSino<name>(float* out1, float* out2, float fParam1, float fParam2, unsigned int pitch, const SDimensions& dims); \
template void processVol3D<name>(cudaPitchedPtr& out1, cudaPitchedPtr& out2, float fParam1, float fParam2, const SDimensions3D& dims); \
template void processSino3D<name>(cudaPitchedPtr& out1, cudaPitchedPtr& out2, float fParam1, float fParam2, const SDimensions3D& dims);
diff --git a/cuda/2d/arith.h b/cuda/2d/arith.h
index d745aef..c32a63a 100644
--- a/cuda/2d/arith.h
+++ b/cuda/2d/arith.h
@@ -55,21 +55,29 @@ struct opSetMaskedValues;
struct opMulMask;
-template<typename op> void processVolCopy(float* out, unsigned int width, unsigned int height);
-template<typename op> void processVolCopy(float* out, float param, unsigned int width, unsigned int height);
-template<typename op> void processVolCopy(float* out1, float* out2, float param1, float param2, unsigned int width, unsigned int height);
-template<typename op> void processVolCopy(float* out, const float* in, unsigned int width, unsigned int height);
-template<typename op> void processVolCopy(float* out, const float* in, float param, unsigned int width, unsigned int height);
-template<typename op> void processVolCopy(float* out, const float* in1, const float* in2, unsigned int width, unsigned int height);
-template<typename op> void processVolCopy(float* out, const float* in1, const float* in2, float param, unsigned int width, unsigned int height);
-
-template<typename op> void processVol(float* out, unsigned int pitch, unsigned int width, unsigned int height);
-template<typename op> void processVol(float* out, float fParam, unsigned int pitch, unsigned int width, unsigned int height);
-template<typename op> void processVol(float* out1, float* out2, float fParam1, float fParam2, unsigned int pitch, unsigned int width, unsigned int height);
-template<typename op> void processVol(float* out, const float* in, unsigned int pitch, unsigned int width, unsigned int height);
-template<typename op> void processVol(float* out, const float* in, float fParam, unsigned int pitch, unsigned int width, unsigned int height);
-template<typename op> void processVol(float* out, const float* in1, const float* in2, float fParam, unsigned int pitch, unsigned int width, unsigned int height);
-template<typename op> void processVol(float* out, const float* in1, const float* in2, unsigned int pitch, unsigned int width, unsigned int height);
+template<typename op> void processVolCopy(float* out, const SDimensions& dims);
+template<typename op> void processVolCopy(float* out, float param, const SDimensions& dims);
+template<typename op> void processVolCopy(float* out1, float* out2, float param1, float param2, const SDimensions& dims);
+template<typename op> void processVolCopy(float* out, const float* in, const SDimensions& dims);
+template<typename op> void processVolCopy(float* out, const float* in, float param, const SDimensions& dims);
+template<typename op> void processVolCopy(float* out, const float* in1, const float* in2, const SDimensions& dims);
+template<typename op> void processVolCopy(float* out, const float* in1, const float* in2, float param, const SDimensions& dims);
+
+template<typename op> void processVol(float* out, unsigned int pitch, const SDimensions& dims);
+template<typename op> void processVol(float* out, float fParam, unsigned int pitch, const SDimensions& dims);
+template<typename op> void processVol(float* out1, float* out2, float fParam1, float fParam2, unsigned int pitch, const SDimensions& dims);
+template<typename op> void processVol(float* out, const float* in, unsigned int pitch, const SDimensions& dims);
+template<typename op> void processVol(float* out, const float* in, float fParam, unsigned int pitch, const SDimensions& dims);
+template<typename op> void processVol(float* out, const float* in1, const float* in2, float fParam, unsigned int pitch, const SDimensions& dims);
+template<typename op> void processVol(float* out, const float* in1, const float* in2, unsigned int pitch, const SDimensions& dims);
+
+template<typename op> void processSino(float* out, unsigned int pitch, const SDimensions& dims);
+template<typename op> void processSino(float* out, float fParam, unsigned int pitch, const SDimensions& dims);
+template<typename op> void processSino(float* out1, float* out2, float fParam1, float fParam2, unsigned int pitch, const SDimensions& dims);
+template<typename op> void processSino(float* out, const float* in, unsigned int pitch, const SDimensions& dims);
+template<typename op> void processSino(float* out, const float* in, float fParam, unsigned int pitch, const SDimensions& dims);
+template<typename op> void processSino(float* out, const float* in1, const float* in2, float fParam, unsigned int pitch, const SDimensions& dims);
+template<typename op> void processSino(float* out, const float* in1, const float* in2, unsigned int pitch, const SDimensions& dims);
template<typename op> void processVol3D(cudaPitchedPtr& out, const SDimensions3D& dims);
template<typename op> void processVol3D(cudaPitchedPtr& out, float fParam, const SDimensions3D& dims);
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu
index 4e69e8f..15e487c 100644
--- a/cuda/2d/astra.cu
+++ b/cuda/2d/astra.cu
@@ -268,17 +268,15 @@ bool AstraFBP::setSinogram(const float* pfSinogram,
return false;
bool ok = copySinogramToDevice(pfSinogram, iSinogramPitch,
- pData->dims.iProjDets,
- pData->dims.iProjAngles,
+ pData->dims,
pData->D_sinoData, pData->sinoPitch);
if (!ok)
return false;
// rescale sinogram to adjust for pixel size
- processVol<opMul>(pData->D_sinoData,
+ processSino<opMul>(pData->D_sinoData,
1.0f/(pData->fPixelSize*pData->fPixelSize),
- pData->sinoPitch,
- pData->dims.iProjDets, pData->dims.iProjAngles);
+ pData->sinoPitch, pData->dims);
pData->setStartReconstruction = false;
@@ -390,8 +388,7 @@ bool AstraFBP::run()
processVol<opMul>(pData->D_volumeData,
(M_PI / 2.0f) / (float)pData->dims.iProjAngles,
- pData->volumePitch,
- pData->dims.iVolWidth, pData->dims.iVolHeight);
+ pData->volumePitch, pData->dims);
return true;
}
@@ -402,8 +399,7 @@ bool AstraFBP::getReconstruction(float* pfReconstruction, unsigned int iReconstr
return false;
bool ok = copyVolumeFromDevice(pfReconstruction, iReconstructionPitch,
- pData->dims.iVolWidth,
- pData->dims.iVolHeight,
+ pData->dims,
pData->D_volumeData, pData->volumePitch);
if (!ok)
return false;
@@ -682,7 +678,7 @@ bool astraCudaFP(const float* pfVolume, float* pfSinogram,
}
ok = copyVolumeToDevice(pfVolume, dims.iVolWidth,
- dims.iVolWidth, dims.iVolHeight,
+ dims,
D_volumeData, volumePitch);
if (!ok) {
cudaFree(D_volumeData);
@@ -699,8 +695,7 @@ bool astraCudaFP(const float* pfVolume, float* pfSinogram,
}
ok = copySinogramFromDevice(pfSinogram, dims.iProjDets,
- dims.iProjDets,
- dims.iProjAngles,
+ dims,
D_sinoData, sinoPitch);
if (!ok) {
cudaFree(D_volumeData);
@@ -769,7 +764,7 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
}
ok = copyVolumeToDevice(pfVolume, dims.iVolWidth,
- dims.iVolWidth, dims.iVolHeight,
+ dims,
D_volumeData, volumePitch);
if (!ok) {
cudaFree(D_volumeData);
@@ -808,8 +803,7 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
}
ok = copySinogramFromDevice(pfSinogram, dims.iProjDets,
- dims.iProjDets,
- dims.iProjAngles,
+ dims,
D_sinoData, sinoPitch);
if (!ok) {
cudaFree(D_volumeData);
@@ -880,7 +874,7 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
}
ok = copyVolumeToDevice(pfVolume, dims.iVolWidth,
- dims.iVolWidth, dims.iVolHeight,
+ dims,
D_volumeData, volumePitch);
if (!ok) {
cudaFree(D_volumeData);
@@ -899,8 +893,7 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
}
ok = copySinogramFromDevice(pfSinogram, dims.iProjDets,
- dims.iProjDets,
- dims.iProjAngles,
+ dims,
D_sinoData, sinoPitch);
if (!ok) {
cudaFree(D_volumeData);
diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu
index f4175e1..fce8beb 100644
--- a/cuda/2d/cgls.cu
+++ b/cuda/2d/cgls.cu
@@ -126,7 +126,7 @@ bool CGLS::iterate(unsigned int iterations)
if (useVolumeMask) {
// Use z as temporary storage here since it is unused
cudaMemcpy2D(D_z, sizeof(float)*zPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
- processVol<opMul>(D_z, D_maskData, zPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opMul>(D_z, D_maskData, zPitch, dims);
callFP(D_z, zPitch, D_r, rPitch, -1.0f);
} else {
callFP(D_volumeData, volumePitch, D_r, rPitch, -1.0f);
@@ -137,7 +137,7 @@ bool CGLS::iterate(unsigned int iterations)
zeroVolumeData(D_p, pPitch, dims);
callBP(D_p, pPitch, D_r, rPitch);
if (useVolumeMask)
- processVol<opMul>(D_p, D_maskData, pPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opMul>(D_p, D_maskData, pPitch, dims);
gamma = dotProduct2D(D_p, pPitch, dims.iVolWidth, dims.iVolHeight);
@@ -158,24 +158,24 @@ bool CGLS::iterate(unsigned int iterations)
float alpha = gamma / ww;
// x += alpha*p
- processVol<opAddScaled>(D_volumeData, D_p, alpha, volumePitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opAddScaled>(D_volumeData, D_p, alpha, volumePitch, dims);
// r -= alpha*w
- processVol<opAddScaled>(D_r, D_w, -alpha, rPitch, dims.iProjDets, dims.iProjAngles);
+ processSino<opAddScaled>(D_r, D_w, -alpha, rPitch, dims);
// z = A'*r
zeroVolumeData(D_z, zPitch, dims);
callBP(D_z, zPitch, D_r, rPitch);
if (useVolumeMask)
- processVol<opMul>(D_z, D_maskData, zPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opMul>(D_z, D_maskData, zPitch, dims);
float beta = 1.0f / gamma;
gamma = dotProduct2D(D_z, zPitch, dims.iVolWidth, dims.iVolHeight);
beta *= gamma;
// p = z + beta*p
- processVol<opScaleAndAdd>(D_p, D_z, beta, pPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opScaleAndAdd>(D_p, D_z, beta, pPitch, dims);
}
@@ -194,7 +194,7 @@ float CGLS::computeDiffNorm()
// do FP, subtracting projection from sinogram
if (useVolumeMask) {
cudaMemcpy2D(D_z, sizeof(float)*zPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
- processVol<opMul>(D_z, D_maskData, zPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opMul>(D_z, D_maskData, zPitch, dims);
callFP(D_z, zPitch, D_w, wPitch, -1.0f);
} else {
callFP(D_volumeData, volumePitch, D_w, wPitch, -1.0f);
diff --git a/cuda/2d/darthelper.cu b/cuda/2d/darthelper.cu
index 064913a..9b5141b 100644
--- a/cuda/2d/darthelper.cu
+++ b/cuda/2d/darthelper.cu
@@ -54,14 +54,19 @@ void roiSelect(float* out, float radius, unsigned int width, unsigned int height
float* D_data;
unsigned int pitch;
- allocateVolume(D_data, width, height, pitch);
- copyVolumeToDevice(out, width, width, height, D_data, pitch);
+ // We abuse dims here...
+ SDimensions dims;
+ dims.iVolWidth = width;
+ dims.iVolHeight = width;
+
+ allocateVolumeData(D_data, pitch, dims);
+ copyVolumeToDevice(out, width, dims, D_data, pitch);
dim3 blockSize(16,16);
dim3 gridSize((width+15)/16, (height+15)/16);
devRoiSelect<<<gridSize, blockSize>>>(D_data, radius, pitch, width, height);
- copyVolumeFromDevice(out, width, width, height, D_data, pitch);
+ copyVolumeFromDevice(out, width, dims, D_data, pitch);
cudaFree(D_data);
}
@@ -237,11 +242,16 @@ void dartMask(float* mask, const float* segmentation, unsigned int conn, unsigne
float* D_maskData;
unsigned int pitch;
- allocateVolume(D_segmentationData, width, height, pitch);
- copyVolumeToDevice(segmentation, width, width, height, D_segmentationData, pitch);
+ // We abuse dims here...
+ SDimensions dims;
+ dims.iVolWidth = width;
+ dims.iVolHeight = width;
+
+ allocateVolumeData(D_segmentationData, pitch, dims);
+ copyVolumeToDevice(segmentation, width, dims, D_segmentationData, pitch);
- allocateVolume(D_maskData, width, height, pitch);
- zeroVolume(D_maskData, pitch, width, height);
+ allocateVolumeData(D_maskData, pitch, dims);
+ zeroVolumeData(D_maskData, pitch, dims);
dim3 blockSize(16,16);
dim3 gridSize((width+15)/16, (height+15)/16);
@@ -255,7 +265,7 @@ void dartMask(float* mask, const float* segmentation, unsigned int conn, unsigne
else
devADartMaskRadius<<<gridSize, blockSize>>>(D_maskData, D_segmentationData, conn, radius, threshold, pitch, width, height);
- copyVolumeFromDevice(mask, width, width, height, D_maskData, pitch);
+ copyVolumeFromDevice(mask, width, dims, D_maskData, pitch);
cudaFree(D_segmentationData);
cudaFree(D_maskData);
@@ -320,11 +330,16 @@ void dartSmoothing(float* out, const float* in, float b, unsigned int radius, un
float* D_outData;
unsigned int pitch;
- allocateVolume(D_inData, width, height, pitch);
- copyVolumeToDevice(in, width, width, height, D_inData, pitch);
+ // We abuse dims here...
+ SDimensions dims;
+ dims.iVolWidth = width;
+ dims.iVolHeight = width;
+
+ allocateVolumeData(D_inData, pitch, dims);
+ copyVolumeToDevice(in, width, dims, D_inData, pitch);
- allocateVolume(D_outData, width, height, pitch);
- zeroVolume(D_outData, pitch, width, height);
+ allocateVolumeData(D_outData, pitch, dims);
+ zeroVolumeData(D_outData, pitch, dims);
dim3 blockSize(16,16);
dim3 gridSize((width+15)/16, (height+15)/16);
@@ -333,7 +348,7 @@ void dartSmoothing(float* out, const float* in, float b, unsigned int radius, un
else
devDartSmoothingRadius<<<gridSize, blockSize>>>(D_outData, D_inData, b, radius, pitch, width, height);
- copyVolumeFromDevice(out, width, width, height, D_outData, pitch);
+ copyVolumeFromDevice(out, width, dims, D_outData, pitch);
cudaFree(D_outData);
cudaFree(D_inData);
diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu
index b281516..c75f250 100644
--- a/cuda/2d/em.cu
+++ b/cuda/2d/em.cu
@@ -101,15 +101,15 @@ bool EM::precomputeWeights()
} else
#endif
{
- processVol<opSet>(D_projData, 1.0f, projPitch, dims.iProjDets, dims.iProjAngles);
+ processSino<opSet>(D_projData, 1.0f, projPitch, dims);
callBP(D_pixelWeight, pixelPitch, D_projData, projPitch);
}
- processVol<opInvert>(D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opInvert>(D_pixelWeight, pixelPitch, dims);
#if 0
if (useVolumeMask) {
// scale pixel weights with mask to zero out masked pixels
- processVol<opMul>(D_pixelWeight, D_maskData, pixelPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opMul>(D_pixelWeight, D_maskData, pixelPitch, dims);
}
#endif
@@ -133,14 +133,14 @@ bool EM::iterate(unsigned int iterations)
callFP(D_volumeData, volumePitch, D_projData, projPitch, 1.0f);
// Divide sinogram by FP (into projData)
- processVol<opDividedBy>(D_projData, D_sinoData, projPitch, dims.iProjDets, dims.iProjAngles);
+ processSino<opDividedBy>(D_projData, D_sinoData, projPitch, dims);
// Do BP of projData into tmpData
zeroVolumeData(D_tmpData, tmpPitch, dims);
callBP(D_tmpData, tmpPitch, D_projData, projPitch);
// Multiply volumeData with tmpData divided by pixel weights
- processVol<opMul2>(D_volumeData, D_tmpData, D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opMul2>(D_volumeData, D_tmpData, D_pixelWeight, pixelPitch, dims);
}
@@ -155,7 +155,7 @@ float EM::computeDiffNorm()
// do FP, subtracting projection from sinogram
if (useVolumeMask) {
cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
- processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims);
callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f);
} else {
callFP(D_volumeData, volumePitch, D_projData, projPitch, -1.0f);
diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu
index 79c00ef..048661f 100644
--- a/cuda/2d/sart.cu
+++ b/cuda/2d/sart.cu
@@ -150,14 +150,14 @@ bool SART::precomputeWeights()
zeroVolumeData(D_tmpData, tmpPitch, dims);
- processVol<opSet>(D_tmpData, 1.0f, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opSet>(D_tmpData, 1.0f, tmpPitch, dims);
callFP(D_tmpData, tmpPitch, D_lineWeight, linePitch, 1.0f);
cudaFree(D_tmpData);
D_tmpData = 0;
}
- processVol<opInvert>(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles);
+ processSino<opInvert>(D_lineWeight, linePitch, dims);
return true;
}
@@ -185,7 +185,7 @@ bool SART::iterate(unsigned int iterations)
// do FP, subtracting projection from sinogram
if (useVolumeMask) {
cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
- processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims);
callFP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, -1.0f);
} else {
callFP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, -1.0f);
@@ -198,15 +198,15 @@ bool SART::iterate(unsigned int iterations)
// TODO: Try putting the masking directly in the BP
zeroVolumeData(D_tmpData, tmpPitch, dims);
callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle);
- processVol<opAddMul>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opAddMul>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims);
} else {
callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle);
}
if (useMinConstraint)
- processVol<opClampMin>(D_volumeData, fMinConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opClampMin>(D_volumeData, fMinConstraint, volumePitch, dims);
if (useMaxConstraint)
- processVol<opClampMax>(D_volumeData, fMaxConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opClampMax>(D_volumeData, fMaxConstraint, volumePitch, dims);
iteration++;
@@ -228,7 +228,7 @@ float SART::computeDiffNorm()
// do FP, subtracting projection from sinogram
if (useVolumeMask) {
cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
- processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims);
callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f);
} else {
callFP(D_volumeData, volumePitch, D_projData, projPitch, -1.0f);
diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu
index 1b0891a..c402864 100644
--- a/cuda/2d/sirt.cu
+++ b/cuda/2d/sirt.cu
@@ -114,14 +114,14 @@ bool SIRT::precomputeWeights()
if (useVolumeMask) {
callFP(D_maskData, maskPitch, D_lineWeight, linePitch, 1.0f);
} else {
- processVol<opSet>(D_tmpData, 1.0f, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opSet>(D_tmpData, 1.0f, tmpPitch, dims);
callFP(D_tmpData, tmpPitch, D_lineWeight, linePitch, 1.0f);
}
- processVol<opInvert>(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles);
+ processSino<opInvert>(D_lineWeight, linePitch, dims);
if (useSinogramMask) {
// scale line weights with sinogram mask to zero out masked sinogram pixels
- processVol<opMul>(D_lineWeight, D_smaskData, linePitch, dims.iProjDets, dims.iProjAngles);
+ processSino<opMul>(D_lineWeight, D_smaskData, linePitch, dims);
}
@@ -129,14 +129,14 @@ bool SIRT::precomputeWeights()
if (useSinogramMask) {
callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch);
} else {
- processVol<opSet>(D_projData, 1.0f, projPitch, dims.iProjDets, dims.iProjAngles);
+ processSino<opSet>(D_projData, 1.0f, projPitch, dims);
callBP(D_pixelWeight, pixelPitch, D_projData, projPitch);
}
- processVol<opInvert>(D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opInvert>(D_pixelWeight, pixelPitch, dims);
if (useVolumeMask) {
// scale pixel weights with mask to zero out masked pixels
- processVol<opMul>(D_pixelWeight, D_maskData, pixelPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opMul>(D_pixelWeight, D_maskData, pixelPitch, dims);
}
return true;
@@ -162,7 +162,7 @@ bool SIRT::uploadMinMaxMasks(const float* pfMinMaskData, const float* pfMaxMaskD
if (pfMinMaskData) {
allocateVolumeData(D_minMaskData, minMaskPitch, dims);
ok = copyVolumeToDevice(pfMinMaskData, iPitch,
- dims.iVolWidth, dims.iVolHeight,
+ dims,
D_minMaskData, minMaskPitch);
}
if (!ok)
@@ -171,7 +171,7 @@ bool SIRT::uploadMinMaxMasks(const float* pfMinMaskData, const float* pfMaxMaskD
if (pfMaxMaskData) {
allocateVolumeData(D_maxMaskData, maxMaskPitch, dims);
ok = copyVolumeToDevice(pfMaxMaskData, iPitch,
- dims.iVolWidth, dims.iVolHeight,
+ dims,
D_maxMaskData, maxMaskPitch);
}
if (!ok)
@@ -196,28 +196,28 @@ bool SIRT::iterate(unsigned int iterations)
// do FP, subtracting projection from sinogram
if (useVolumeMask) {
cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
- processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims);
callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f);
} else {
callFP(D_volumeData, volumePitch, D_projData, projPitch, -1.0f);
}
- processVol<opMul>(D_projData, D_lineWeight, projPitch, dims.iProjDets, dims.iProjAngles);
+ processSino<opMul>(D_projData, D_lineWeight, projPitch, dims);
zeroVolumeData(D_tmpData, tmpPitch, dims);
callBP(D_tmpData, tmpPitch, D_projData, projPitch);
- processVol<opAddMul>(D_volumeData, D_pixelWeight, D_tmpData, volumePitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opAddMul>(D_volumeData, D_pixelWeight, D_tmpData, volumePitch, dims);
if (useMinConstraint)
- processVol<opClampMin>(D_volumeData, fMinConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opClampMin>(D_volumeData, fMinConstraint, volumePitch, dims);
if (useMaxConstraint)
- processVol<opClampMax>(D_volumeData, fMaxConstraint, volumePitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opClampMax>(D_volumeData, fMaxConstraint, volumePitch, dims);
if (D_minMaskData)
- processVol<opClampMinMask>(D_volumeData, D_minMaskData, volumePitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opClampMinMask>(D_volumeData, D_minMaskData, volumePitch, dims);
if (D_maxMaskData)
- processVol<opClampMaxMask>(D_volumeData, D_maxMaskData, volumePitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opClampMaxMask>(D_volumeData, D_maxMaskData, volumePitch, dims);
}
return true;
@@ -231,7 +231,7 @@ float SIRT::computeDiffNorm()
// do FP, subtracting projection from sinogram
if (useVolumeMask) {
cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice);
- processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight);
+ processVol<opMul>(D_tmpData, D_maskData, tmpPitch, dims);
callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f);
} else {
callFP(D_volumeData, volumePitch, D_projData, projPitch, -1.0f);
@@ -332,7 +332,7 @@ int main()
delete[] angle;
- copyVolumeFromDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
+ copyVolumeFromDevice(img, dims.iVolWidth, dims, D_volumeData, volumePitch);
saveImage("vol.png",dims.iVolHeight,dims.iVolWidth,img);
diff --git a/cuda/2d/util.cu b/cuda/2d/util.cu
index d5cbe44..8d3b625 100644
--- a/cuda/2d/util.cu
+++ b/cuda/2d/util.cu
@@ -33,9 +33,12 @@ $Id$
namespace astraCUDA {
bool copyVolumeToDevice(const float* in_data, unsigned int in_pitch,
- unsigned int width, unsigned int height,
+ const SDimensions& dims,
float* outD_data, unsigned int out_pitch)
{
+ size_t width = dims.iVolWidth;
+ size_t height = dims.iVolHeight;
+ // TODO: memory order
cudaError_t err;
err = cudaMemcpy2D(outD_data, sizeof(float)*out_pitch, in_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyHostToDevice);
ASTRA_CUDA_ASSERT(err);
@@ -44,9 +47,12 @@ bool copyVolumeToDevice(const float* in_data, unsigned int in_pitch,
}
bool copyVolumeFromDevice(float* out_data, unsigned int out_pitch,
- unsigned int width, unsigned int height,
+ const SDimensions& dims,
float* inD_data, unsigned int in_pitch)
{
+ size_t width = dims.iVolWidth;
+ size_t height = dims.iVolHeight;
+ // TODO: memory order
cudaError_t err = cudaMemcpy2D(out_data, sizeof(float)*out_pitch, inD_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyDeviceToHost);
ASTRA_CUDA_ASSERT(err);
return true;
@@ -54,18 +60,24 @@ bool copyVolumeFromDevice(float* out_data, unsigned int out_pitch,
bool copySinogramFromDevice(float* out_data, unsigned int out_pitch,
- unsigned int width, unsigned int height,
+ const SDimensions& dims,
float* inD_data, unsigned int in_pitch)
-{
+{
+ size_t width = dims.iProjDets;
+ size_t height = dims.iProjAngles;
+ // TODO: memory order
cudaError_t err = cudaMemcpy2D(out_data, sizeof(float)*out_pitch, inD_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyDeviceToHost);
ASTRA_CUDA_ASSERT(err);
return true;
}
bool copySinogramToDevice(const float* in_data, unsigned int in_pitch,
- unsigned int width, unsigned int height,
+ const SDimensions& dims,
float* outD_data, unsigned int out_pitch)
-{
+{
+ size_t width = dims.iProjDets;
+ size_t height = dims.iProjAngles;
+ // TODO: memory order
cudaError_t err;
err = cudaMemcpy2D(outD_data, sizeof(float)*out_pitch, in_data, sizeof(float)*in_pitch, sizeof(float)*width, height, cudaMemcpyHostToDevice);
ASTRA_CUDA_ASSERT(err);
@@ -99,25 +111,21 @@ void zeroVolume(float* data, unsigned int pitch, unsigned int width, unsigned in
bool allocateVolumeData(float*& D_ptr, unsigned int& pitch, const SDimensions& dims)
{
- // TODO: memory order
return allocateVolume(D_ptr, dims.iVolWidth, dims.iVolHeight, pitch);
}
bool allocateProjectionData(float*& D_ptr, unsigned int& pitch, const SDimensions& dims)
{
- // TODO: memory order
return allocateVolume(D_ptr, dims.iProjDets, dims.iProjAngles, pitch);
}
void zeroVolumeData(float* D_ptr, unsigned int pitch, const SDimensions& dims)
{
- // TODO: memory order
zeroVolume(D_ptr, pitch, dims.iVolWidth, dims.iVolHeight);
}
void zeroProjectionData(float* D_ptr, unsigned int pitch, const SDimensions& dims)
{
- // TODO: memory order
zeroVolume(D_ptr, pitch, dims.iProjDets, dims.iProjAngles);
}
@@ -198,6 +206,7 @@ __global__ void reduce2D(float *g_idata, float *g_odata,
float dotProduct2D(float* D_data, unsigned int pitch,
unsigned int width, unsigned int height)
{
+#warning FIX MEMORY ORDER
unsigned int bx = (width + 15) / 16;
unsigned int by = (height + 127) / 128;
unsigned int shared_mem2 = sizeof(float) * 16 * 16;
diff --git a/cuda/2d/util.h b/cuda/2d/util.h
index 3cffa08..83cb794 100644
--- a/cuda/2d/util.h
+++ b/cuda/2d/util.h
@@ -60,16 +60,16 @@ $Id$
namespace astraCUDA {
bool copyVolumeToDevice(const float* in_data, unsigned int in_pitch,
- unsigned int width, unsigned int height,
+ const SDimensions& dims,
float* outD_data, unsigned int out_pitch);
bool copyVolumeFromDevice(float* out_data, unsigned int out_pitch,
- unsigned int width, unsigned int height,
+ const SDimensions& dims,
float* inD_data, unsigned int in_pitch);
bool copySinogramFromDevice(float* out_data, unsigned int out_pitch,
- unsigned int width, unsigned int height,
+ const SDimensions& dims,
float* inD_data, unsigned int in_pitch);
bool copySinogramToDevice(const float* in_data, unsigned int in_pitch,
- unsigned int width, unsigned int height,
+ const SDimensions& dims,
float* outD_data, unsigned int out_pitch);
bool allocateVolume(float*& D_ptr, unsigned int width, unsigned int height, unsigned int& pitch);