summaryrefslogtreecommitdiffstats
path: root/cuda/3d
diff options
context:
space:
mode:
Diffstat (limited to 'cuda/3d')
-rw-r--r--cuda/3d/algo3d.cu7
-rw-r--r--cuda/3d/algo3d.h3
-rw-r--r--cuda/3d/astra3d.cu12
-rw-r--r--cuda/3d/cgls3d.cu4
-rw-r--r--cuda/3d/cone_bp.cu24
-rw-r--r--cuda/3d/cone_bp.h7
-rw-r--r--cuda/3d/par3d_bp.cu23
-rw-r--r--cuda/3d/par3d_bp.h6
-rw-r--r--cuda/3d/sirt3d.cu6
9 files changed, 54 insertions, 38 deletions
diff --git a/cuda/3d/algo3d.cu b/cuda/3d/algo3d.cu
index 7f61280..b775438 100644
--- a/cuda/3d/algo3d.cu
+++ b/cuda/3d/algo3d.cu
@@ -94,12 +94,13 @@ bool ReconAlgo3D::callFP(cudaPitchedPtr& D_volumeData,
}
bool ReconAlgo3D::callBP(cudaPitchedPtr& D_volumeData,
- cudaPitchedPtr& D_projData)
+ cudaPitchedPtr& D_projData,
+ float outputScale)
{
if (coneProjs) {
- return ConeBP(D_volumeData, D_projData, dims, coneProjs);
+ return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale);
} else {
- return Par3DBP(D_volumeData, D_projData, dims, par3DProjs);
+ return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale);
}
}
diff --git a/cuda/3d/algo3d.h b/cuda/3d/algo3d.h
index f4c6a87..35ffc49 100644
--- a/cuda/3d/algo3d.h
+++ b/cuda/3d/algo3d.h
@@ -51,7 +51,8 @@ protected:
cudaPitchedPtr& D_projData,
float outputScale);
bool callBP(cudaPitchedPtr& D_volumeData,
- cudaPitchedPtr& D_projData);
+ cudaPitchedPtr& D_projData,
+ float outputScale);
SDimensions3D dims;
SConeProjection* coneProjs;
diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index b2375f3..7589416 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -1252,9 +1252,9 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections,
}
if (pParProjs)
- ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs);
+ ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, 1.0f);
else
- ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs);
+ ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, 1.0f);
ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX);
@@ -1330,9 +1330,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
processSino3D<opSet>(D_projData, 1.0f, dims);
if (pParProjs)
- ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs);
+ ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, 1.0f);
else
- ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs);
+ ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, 1.0f);
processVol3D<opInvert>(D_pixelWeight, dims);
if (!ok) {
@@ -1347,9 +1347,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
ok &= zeroVolumeData(D_volumeData, dims);
// Do BP into D_volumeData
if (pParProjs)
- ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs);
+ ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, 1.0f);
else
- ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs);
+ ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, 1.0f);
// Multiply with weights
processVol3D<opMul>(D_volumeData, D_pixelWeight, dims);
diff --git a/cuda/3d/cgls3d.cu b/cuda/3d/cgls3d.cu
index 5071a9b..4f632f3 100644
--- a/cuda/3d/cgls3d.cu
+++ b/cuda/3d/cgls3d.cu
@@ -165,7 +165,7 @@ bool CGLS::iterate(unsigned int iterations)
// p = A'*r
zeroVolumeData(D_p, dims);
- callBP(D_p, D_r);
+ callBP(D_p, D_r, 1.0f);
if (useVolumeMask)
processVol3D<opMul>(D_p, D_maskData, dims);
@@ -195,7 +195,7 @@ bool CGLS::iterate(unsigned int iterations)
// z = A'*r
zeroVolumeData(D_z, dims);
- callBP(D_z, D_r);
+ callBP(D_z, D_r, 1.0f);
if (useVolumeMask)
processVol3D<opMul>(D_z, D_maskData, dims);
diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu
index 5648d6f..5e67980 100644
--- a/cuda/3d/cone_bp.cu
+++ b/cuda/3d/cone_bp.cu
@@ -78,7 +78,8 @@ bool bindProjDataTexture(const cudaArray* array)
//__launch_bounds__(32*16, 4)
__global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle,
- int angleOffset, const astraCUDA3d::SDimensions3D dims)
+ int angleOffset, const astraCUDA3d::SDimensions3D dims,
+ float fOutputScale)
{
float* volData = (float*)D_volData;
@@ -147,13 +148,13 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng
endZ = dims.iVolZ - startZ;
for(int i=0; i < endZ; i++)
- volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i];
+ volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i] * fOutputScale;
} //End kernel
// supersampling version
-__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims)
+__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
{
float* volData = (float*)D_volData;
@@ -189,6 +190,9 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
const float fSubStep = 1.0f/dims.iRaysPerVoxelDim;
+ fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+
+
for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)
{
@@ -236,14 +240,15 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
}
- volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal / (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+ volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal * fOutputScale;
}
}
bool ConeBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
- const SDimensions3D& dims, const SConeProjection* angles)
+ const SDimensions3D& dims, const SConeProjection* angles,
+ float fOutputScale)
{
bindProjDataTexture(D_projArray);
@@ -291,9 +296,9 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,
for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {
// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);
if (dims.iRaysPerVoxelDim == 1)
- dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims);
+ dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
else
- dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims);
+ dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
}
cudaTextForceKernelsCompletion();
@@ -309,14 +314,15 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,
bool ConeBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
- const SDimensions3D& dims, const SConeProjection* angles)
+ const SDimensions3D& dims, const SConeProjection* angles,
+ float fOutputScale)
{
// transfer projections to array
cudaArray* cuArray = allocateProjectionArray(dims);
transferProjectionsToArray(D_projData, cuArray, dims);
- bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles);
+ bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale);
cudaFreeArray(cuArray);
diff --git a/cuda/3d/cone_bp.h b/cuda/3d/cone_bp.h
index cba6d9f..4d3d2dd 100644
--- a/cuda/3d/cone_bp.h
+++ b/cuda/3d/cone_bp.h
@@ -33,13 +33,14 @@ namespace astraCUDA3d {
_AstraExport bool ConeBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
- const SDimensions3D& dims, const SConeProjection* angles);
+ const SDimensions3D& dims, const SConeProjection* angles,
+ float fOutputScale);
_AstraExport bool ConeBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
- const SDimensions3D& dims, const SConeProjection* angles);
+ const SDimensions3D& dims, const SConeProjection* angles,
+ float fOutputScale);
-
}
#endif
diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu
index 0c33280..1217949 100644
--- a/cuda/3d/par3d_bp.cu
+++ b/cuda/3d/par3d_bp.cu
@@ -77,7 +77,7 @@ static bool bindProjDataTexture(const cudaArray* array)
}
-__global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims)
+__global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
{
float* volData = (float*)D_volData;
@@ -139,11 +139,11 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAn
endZ = dims.iVolZ - startZ;
for(int i=0; i < endZ; i++)
- volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i];
+ volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i] * fOutputScale;
}
// supersampling version
-__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims)
+__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
{
float* volData = (float*)D_volData;
@@ -180,6 +180,9 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star
const float fSubStep = 1.0f/dims.iRaysPerVoxelDim;
+ fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+
+
for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)
{
@@ -217,14 +220,15 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star
}
- volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal / (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+ volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal * fOutputScale;
}
}
bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
- const SDimensions3D& dims, const SPar3DProjection* angles)
+ const SDimensions3D& dims, const SPar3DProjection* angles,
+ float fOutputScale)
{
bindProjDataTexture(D_projArray);
@@ -271,9 +275,9 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {
// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);
if (dims.iRaysPerVoxelDim == 1)
- dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims);
+ dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
else
- dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims);
+ dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
}
cudaTextForceKernelsCompletion();
@@ -288,14 +292,15 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
bool Par3DBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
- const SDimensions3D& dims, const SPar3DProjection* angles)
+ const SDimensions3D& dims, const SPar3DProjection* angles,
+ float fOutputScale)
{
// transfer projections to array
cudaArray* cuArray = allocateProjectionArray(dims);
transferProjectionsToArray(D_projData, cuArray, dims);
- bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles);
+ bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale);
cudaFreeArray(cuArray);
diff --git a/cuda/3d/par3d_bp.h b/cuda/3d/par3d_bp.h
index ece37d1..f1fc62d 100644
--- a/cuda/3d/par3d_bp.h
+++ b/cuda/3d/par3d_bp.h
@@ -33,11 +33,13 @@ namespace astraCUDA3d {
_AstraExport bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
- const SDimensions3D& dims, const SPar3DProjection* angles);
+ const SDimensions3D& dims, const SPar3DProjection* angles,
+ float fOutputScale);
_AstraExport bool Par3DBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
- const SDimensions3D& dims, const SPar3DProjection* angles);
+ const SDimensions3D& dims, const SPar3DProjection* angles,
+ float fOutputScale);
}
diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu
index 389ee6b..0e6630a 100644
--- a/cuda/3d/sirt3d.cu
+++ b/cuda/3d/sirt3d.cu
@@ -160,10 +160,10 @@ bool SIRT::precomputeWeights()
zeroVolumeData(D_pixelWeight, dims);
if (useSinogramMask) {
- callBP(D_pixelWeight, D_smaskData);
+ callBP(D_pixelWeight, D_smaskData, 1.0f);
} else {
processSino3D<opSet>(D_projData, 1.0f, dims);
- callBP(D_pixelWeight, D_projData);
+ callBP(D_pixelWeight, D_projData, 1.0f);
}
#if 0
float* bufp = new float[512*512];
@@ -293,7 +293,7 @@ bool SIRT::iterate(unsigned int iterations)
#endif
- callBP(D_tmpData, D_projData);
+ callBP(D_tmpData, D_projData, 1.0f);
#if 0
printf("Dumping tmpData: %p\n", (void*)D_tmpData.ptr);
float* buf = new float[256*256];