From 248639b4fee8659a4106dcc44d721149a1885018 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Thu, 5 Mar 2015 17:01:47 +0100 Subject: Add 3d geometry normalization functions --- cuda/3d/astra3d.cu | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++++ cuda/3d/astra3d.h | 16 ++++++ 2 files changed, 166 insertions(+) (limited to 'cuda') diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 0b9c70b..f672d6c 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -40,6 +40,12 @@ $Id$ #include "arith3d.h" #include "astra3d.h" +#include "astra/ParallelProjectionGeometry3D.h" +#include "astra/ParallelVecProjectionGeometry3D.h" +#include "astra/ConeProjectionGeometry3D.h" +#include "astra/ConeVecProjectionGeometry3D.h" +#include "astra/VolumeGeometry3D.h" + #include using namespace astraCUDA3d; @@ -137,6 +143,150 @@ static SPar3DProjection* genPar3DProjections(unsigned int iProjAngles, + +// adjust pProjs to normalize volume geometry +template +static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom, + unsigned int iProjectionAngleCount, + ProjectionT*& pProjs, + float& fOutputScale) +{ + assert(pVolGeom); + assert(pProjs); + + // TODO: Relative instead of absolute + const float EPS = 0.00001f; + if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthY()) > EPS) + return false; + if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthZ()) > EPS) + return false; + + + // Translate + float dx = -(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2; + float dy = -(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2; + float dz = -(pVolGeom->getWindowMinZ() + pVolGeom->getWindowMaxZ()) / 2; + + float factor = 1.0f / pVolGeom->getPixelLengthX(); + + for (int i = 0; i < iProjectionAngleCount; ++i) { + // CHECKME: Order of scaling and translation + pProjs[i].translate(dx, dy, dz); + pProjs[i].scale(factor); + } + + // CHECKME: Check factor + fOutputScale *= pVolGeom->getPixelLengthX(); + + return true; +} + + + +bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CParallelProjectionGeometry3D* pProjGeom, + SPar3DProjection*& pProjs, float& fOutputScale) +{ + assert(pVolGeom); + assert(pProjGeom); + assert(pProjGeom->getProjectionAngles()); + + int nth = pProjGeom->getProjectionCount(); + + pProjs = genPar3DProjections(nth, + pProjGeom->getDetectorColCount(), + pProjGeom->getDetectorRowCount(), + pProjGeom->getDetectorSpacingX(), + pProjGeom->getDetectorSpacingY(), + pProjGeom->getProjectionAngles()); + + bool ok; + + fOutputScale = 1.0f; + + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + + return ok; +} + +bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CParallelVecProjectionGeometry3D* pProjGeom, + SPar3DProjection*& pProjs, float& fOutputScale) +{ + assert(pVolGeom); + assert(pProjGeom); + assert(pProjGeom->getProjectionVectors()); + + int nth = pProjGeom->getProjectionCount(); + + pProjs = new SPar3DProjection[nth]; + for (int i = 0; i < nth; ++i) + pProjs[i] = pProjGeom->getProjectionVectors()[i]; + + bool ok; + + fOutputScale = 1.0f; + + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + + return ok; +} + +bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CConeProjectionGeometry3D* pProjGeom, + SConeProjection*& pProjs, float& fOutputScale) +{ + assert(pVolGeom); + assert(pProjGeom); + assert(pProjGeom->getProjectionAngles()); + + int nth = pProjGeom->getProjectionCount(); + + pProjs = genConeProjections(nth, + pProjGeom->getDetectorColCount(), + pProjGeom->getDetectorRowCount(), + pProjGeom->getOriginSourceDistance(), + pProjGeom->getOriginDetectorDistance(), + pProjGeom->getDetectorSpacingX(), + pProjGeom->getDetectorSpacingY(), + pProjGeom->getProjectionAngles()); + + bool ok; + + fOutputScale = 1.0f; + + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + + return ok; +} + +bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CConeVecProjectionGeometry3D* pProjGeom, + SConeProjection*& pProjs, float& fOutputScale) +{ + assert(pVolGeom); + assert(pProjGeom); + assert(pProjGeom->getProjectionAngles()); + + int nth = pProjGeom->getProjectionCount(); + + pProjs = new SConeProjection[nth]; + for (int i = 0; i < nth; ++i) + pProjs[i] = pProjGeom->getProjectionVectors()[i]; + + bool ok; + + fOutputScale = 1.0f; + + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + + return ok; +} + + + + + class AstraSIRT3d_internal { public: SDimensions3D dims; diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index f91fe26..47e252e 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -466,6 +466,22 @@ _AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections, bool bShortScan, int iGPUIndex, int iVoxelSuperSampling); +_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CParallelProjectionGeometry3D* pProjGeom, + SPar3DProjection*& pProjs, float& fOutputScale); + +_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CParallelVecProjectionGeometry3D* pProjGeom, + SPar3DProjection*& pProjs, float& fOutputScale); + +_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CConeProjectionGeometry3D* pProjGeom, + SConeProjection*& pProjs, float& fOutputScale); + +_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CConeVecProjectionGeometry3D* pProjGeom, + SConeProjection*& pProjs, float& fOutputScale); + } -- cgit v1.2.3 From 5304d08cd1ab7b8d778c367912934376eb92370f Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Mon, 9 Mar 2015 15:43:56 +0100 Subject: Allow non-centered volume geometry in SIRT3D and CGLS3D --- cuda/3d/astra3d.cu | 284 ++++++++++++-------------------------------- cuda/3d/astra3d.h | 90 ++------------ src/CudaCglsAlgorithm3D.cpp | 39 +----- src/CudaSirtAlgorithm3D.cpp | 38 +----- 4 files changed, 90 insertions(+), 361 deletions(-) (limited to 'cuda') diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index f672d6c..426f3a0 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -182,6 +182,20 @@ static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom, } +void convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, + SDimensions3D& dims) +{ + dims.iVolX = pVolGeom->getGridColCount(); + dims.iVolY = pVolGeom->getGridRowCount(); + dims.iVolZ = pVolGeom->getGridSliceCount(); + dims.iProjAngles = pProjGeom->getProjectionCount(); + dims.iProjU = pProjGeom->getDetectorColCount(), + dims.iProjV = pProjGeom->getDetectorRowCount(), + dims.iRaysPerDetDim = 1; + dims.iRaysPerVoxelDim = 1; +} + bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, const CParallelProjectionGeometry3D* pProjGeom, @@ -370,127 +384,55 @@ AstraSIRT3d::~AstraSIRT3d() pData = 0; } -bool AstraSIRT3d::setReconstructionGeometry(unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ/*, - float fPixelSize = 1.0f*/) +bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom) { if (pData->initialized) return false; - pData->dims.iVolX = iVolX; - pData->dims.iVolY = iVolY; - pData->dims.iVolZ = iVolZ; + convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims); - return (iVolX > 0 && iVolY > 0 && iVolZ > 0); -} - - -bool AstraSIRT3d::setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection* projs) -{ - if (pData->initialized) + if (pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0) return false; - - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0) + if (pData->dims.iProjAngles <= 0 || pData->dims.iProjU <= 0 || pData->dims.iProjV <= 0) return false; - pData->parprojs = new SPar3DProjection[iProjAngles]; - memcpy(pData->parprojs, projs, iProjAngles * sizeof(projs[0])); - - pData->projType = PROJ_PARALLEL; - - return true; -} - -bool AstraSIRT3d::setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles) -{ - if (pData->initialized) - return false; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SPar3DProjection* p = genPar3DProjections(iProjAngles, - iProjU, iProjV, - fDetUSize, fDetVSize, - pfAngles); - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; - - pData->parprojs = p; - pData->projType = PROJ_PARALLEL; - - return true; -} - - + const CConeProjectionGeometry3D* conegeom = dynamic_cast(pProjGeom); + const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast(pProjGeom); + const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast(pProjGeom); + const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast(pProjGeom); -bool AstraSIRT3d::setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection* projs) -{ - if (pData->initialized) - return false; + float outputScale; + bool ok; - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; + pData->projs = 0; + pData->parprojs = 0; + + if (conegeom) { + ok = convertAstraGeometry(pVolGeom, conegeom, pData->projs, outputScale); + pData->projType = PROJ_PARALLEL; + } else if (conevec3dgeom) { + ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pData->projs, outputScale); + pData->projType = PROJ_PARALLEL; + } else if (par3dgeom) { + ok = convertAstraGeometry(pVolGeom, par3dgeom, pData->parprojs, outputScale); + pData->projType = PROJ_CONE; + } else if (parvec3dgeom) { + ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pData->parprojs, outputScale); + pData->projType = PROJ_CONE; + } else { + ok = false; + } - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0) + if (!ok) return false; - pData->projs = new SConeProjection[iProjAngles]; - memcpy(pData->projs, projs, iProjAngles * sizeof(projs[0])); - pData->projType = PROJ_CONE; + // TODO: Handle outputScale return true; } -bool AstraSIRT3d::setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles) -{ - if (pData->initialized) - return false; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SConeProjection* p = genConeProjections(iProjAngles, - iProjU, iProjV, - fOriginSourceDistance, - fOriginDetectorDistance, - fDetUSize, fDetVSize, - pfAngles); - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; - - pData->projs = p; - pData->projType = PROJ_CONE; - - return true; -} bool AstraSIRT3d::enableSuperSampling(unsigned int iVoxelSuperSampling, unsigned int iDetectorSuperSampling) @@ -837,125 +779,51 @@ AstraCGLS3d::~AstraCGLS3d() pData = 0; } -bool AstraCGLS3d::setReconstructionGeometry(unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ/*, - float fPixelSize = 1.0f*/) +bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom) { if (pData->initialized) return false; - pData->dims.iVolX = iVolX; - pData->dims.iVolY = iVolY; - pData->dims.iVolZ = iVolZ; + convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims); - return (iVolX > 0 && iVolY > 0 && iVolZ > 0); -} - - -bool AstraCGLS3d::setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection* projs) -{ - if (pData->initialized) + if (pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0) return false; - - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0) + if (pData->dims.iProjAngles <= 0 || pData->dims.iProjU <= 0 || pData->dims.iProjV <= 0) return false; - pData->parprojs = new SPar3DProjection[iProjAngles]; - memcpy(pData->parprojs, projs, iProjAngles * sizeof(projs[0])); + const CConeProjectionGeometry3D* conegeom = dynamic_cast(pProjGeom); + const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast(pProjGeom); + const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast(pProjGeom); + const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast(pProjGeom); - pData->projType = PROJ_PARALLEL; - - return true; -} - -bool AstraCGLS3d::setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles) -{ - if (pData->initialized) - return false; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SPar3DProjection* p = genPar3DProjections(iProjAngles, - iProjU, iProjV, - fDetUSize, fDetVSize, - pfAngles); - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; - - pData->parprojs = p; - pData->projType = PROJ_PARALLEL; - - return true; -} - - - -bool AstraCGLS3d::setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection* projs) -{ - if (pData->initialized) - return false; - - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0) - return false; - - pData->projs = new SConeProjection[iProjAngles]; - memcpy(pData->projs, projs, iProjAngles * sizeof(projs[0])); - - pData->projType = PROJ_CONE; - - return true; -} + float outputScale; + bool ok; -bool AstraCGLS3d::setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles) -{ - if (pData->initialized) - return false; + pData->projs = 0; + pData->parprojs = 0; + + if (conegeom) { + ok = convertAstraGeometry(pVolGeom, conegeom, pData->projs, outputScale); + pData->projType = PROJ_PARALLEL; + } else if (conevec3dgeom) { + ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pData->projs, outputScale); + pData->projType = PROJ_PARALLEL; + } else if (par3dgeom) { + ok = convertAstraGeometry(pVolGeom, par3dgeom, pData->parprojs, outputScale); + pData->projType = PROJ_CONE; + } else if (parvec3dgeom) { + ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pData->parprojs, outputScale); + pData->projType = PROJ_CONE; + } else { + ok = false; + } - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) + if (!ok) return false; - SConeProjection* p = genConeProjections(iProjAngles, - iProjU, iProjV, - fOriginSourceDistance, - fOriginDetectorDistance, - fDetUSize, fDetVSize, - pfAngles); - - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; - pData->projs = p; - pData->projType = PROJ_CONE; + // TODO: Handle outputScale return true; } diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index 47e252e..cab5479 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -42,7 +42,12 @@ enum Cuda3DProjectionKernel { ker3d_sum_square_weights }; - +class CProjectionGeometry3D; +class CParallelProjectionGeometry3D; +class CParallelVecProjectionGeometry3D; +class CConeProjectionGeometry3D; +class CConeVecProjectionGeometry3D; +class CVolumeGeometry3D; class AstraSIRT3d_internal; @@ -52,37 +57,9 @@ public: AstraSIRT3d(); ~AstraSIRT3d(); - // Set the number of pixels in the reconstruction rectangle, - // and the length of the edge of a pixel. - // Volume pixels are assumed to be square. - // This must be called before setting the projection geometry. - bool setReconstructionGeometry(unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ/*, - float fPixelSize = 1.0f*/); - - bool setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection* projs); - bool setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fSourceZ, - float fDetSize, - const float *pfAngles); - bool setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection* projs); - bool setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fSourceZ, - float fDetSize, - const float *pfAngles); + // Set the volume and projection geometry + bool setGeometry(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom); // Enable supersampling. // @@ -197,37 +174,9 @@ public: AstraCGLS3d(); ~AstraCGLS3d(); - // Set the number of pixels in the reconstruction rectangle, - // and the length of the edge of a pixel. - // Volume pixels are assumed to be square. - // This must be called before setting the projection geometry. - bool setReconstructionGeometry(unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ/*, - float fPixelSize = 1.0f*/); - - bool setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection* projs); - bool setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fSourceZ, - float fDetSize, - const float *pfAngles); - bool setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection* projs); - bool setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fSourceZ, - float fDetSize, - const float *pfAngles); + // Set the volume and projection geometry + bool setGeometry(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom); // Enable supersampling. // @@ -466,21 +415,6 @@ _AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections, bool bShortScan, int iGPUIndex, int iVoxelSuperSampling); -_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, - const CParallelProjectionGeometry3D* pProjGeom, - SPar3DProjection*& pProjs, float& fOutputScale); - -_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, - const CParallelVecProjectionGeometry3D* pProjGeom, - SPar3DProjection*& pProjs, float& fOutputScale); - -_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, - const CConeProjectionGeometry3D* pProjGeom, - SConeProjection*& pProjs, float& fOutputScale); - -_AstraExport bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, - const CConeVecProjectionGeometry3D* pProjGeom, - SConeProjection*& pProjs, float& fOutputScale); } diff --git a/src/CudaCglsAlgorithm3D.cpp b/src/CudaCglsAlgorithm3D.cpp index a5500d6..3677458 100644 --- a/src/CudaCglsAlgorithm3D.cpp +++ b/src/CudaCglsAlgorithm3D.cpp @@ -171,9 +171,6 @@ void CCudaCglsAlgorithm3D::run(int _iNrIterations) ASTRA_ASSERT(m_bIsInitialized); const CProjectionGeometry3D* projgeom = m_pSinogram->getGeometry(); - const CConeProjectionGeometry3D* conegeom = dynamic_cast(projgeom); - const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast(projgeom); - const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast(projgeom); const CVolumeGeometry3D& volgeom = *m_pReconstruction->getGeometry(); bool ok = true; @@ -182,41 +179,7 @@ void CCudaCglsAlgorithm3D::run(int _iNrIterations) ok &= m_pCgls->setGPUIndex(m_iGPUIndex); - ok &= m_pCgls->setReconstructionGeometry(volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount()); -/* - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles) -*/ - if (conegeom) { - ok &= m_pCgls->setConeGeometry(conegeom->getProjectionCount(), - conegeom->getDetectorColCount(), - conegeom->getDetectorRowCount(), - conegeom->getOriginSourceDistance(), - conegeom->getOriginDetectorDistance(), - conegeom->getDetectorSpacingX(), - conegeom->getDetectorSpacingY(), - conegeom->getProjectionAngles()); - } else if (parvec3dgeom) { - ok &= m_pCgls->setPar3DGeometry(parvec3dgeom->getProjectionCount(), - parvec3dgeom->getDetectorColCount(), - parvec3dgeom->getDetectorRowCount(), - parvec3dgeom->getProjectionVectors()); - } else if (conevec3dgeom) { - ok &= m_pCgls->setConeGeometry(conevec3dgeom->getProjectionCount(), - conevec3dgeom->getDetectorColCount(), - conevec3dgeom->getDetectorRowCount(), - conevec3dgeom->getProjectionVectors()); - } else { - ASTRA_ASSERT(false); - } + ok &= m_pCgls->setGeometry(&volgeom, projgeom); ok &= m_pCgls->enableSuperSampling(m_iVoxelSuperSampling, m_iDetectorSuperSampling); diff --git a/src/CudaSirtAlgorithm3D.cpp b/src/CudaSirtAlgorithm3D.cpp index da83c7e..d67778f 100644 --- a/src/CudaSirtAlgorithm3D.cpp +++ b/src/CudaSirtAlgorithm3D.cpp @@ -172,10 +172,6 @@ void CCudaSirtAlgorithm3D::run(int _iNrIterations) ASTRA_ASSERT(m_bIsInitialized); const CProjectionGeometry3D* projgeom = m_pSinogram->getGeometry(); - const CConeProjectionGeometry3D* conegeom = dynamic_cast(projgeom); - const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast(projgeom); - const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast(projgeom); - const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast(projgeom); const CVolumeGeometry3D& volgeom = *m_pReconstruction->getGeometry(); bool ok = true; @@ -184,39 +180,7 @@ void CCudaSirtAlgorithm3D::run(int _iNrIterations) ok &= m_pSirt->setGPUIndex(m_iGPUIndex); - ok &= m_pSirt->setReconstructionGeometry(volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount()); - - if (conegeom) { - ok &= m_pSirt->setConeGeometry(conegeom->getProjectionCount(), - conegeom->getDetectorColCount(), - conegeom->getDetectorRowCount(), - conegeom->getOriginSourceDistance(), - conegeom->getOriginDetectorDistance(), - conegeom->getDetectorSpacingX(), - conegeom->getDetectorSpacingY(), - conegeom->getProjectionAngles()); - } else if (par3dgeom) { - ok &= m_pSirt->setPar3DGeometry(par3dgeom->getProjectionCount(), - par3dgeom->getDetectorColCount(), - par3dgeom->getDetectorRowCount(), - par3dgeom->getDetectorSpacingX(), - par3dgeom->getDetectorSpacingY(), - par3dgeom->getProjectionAngles()); - } else if (parvec3dgeom) { - ok &= m_pSirt->setPar3DGeometry(parvec3dgeom->getProjectionCount(), - parvec3dgeom->getDetectorColCount(), - parvec3dgeom->getDetectorRowCount(), - parvec3dgeom->getProjectionVectors()); - } else if (conevec3dgeom) { - ok &= m_pSirt->setConeGeometry(conevec3dgeom->getProjectionCount(), - conevec3dgeom->getDetectorColCount(), - conevec3dgeom->getDetectorRowCount(), - conevec3dgeom->getProjectionVectors()); - } else { - ASTRA_ASSERT(false); - } + ok &= m_pSirt->setGeometry(&volgeom, projgeom); ok &= m_pSirt->enableSuperSampling(m_iVoxelSuperSampling, m_iDetectorSuperSampling); -- cgit v1.2.3 From 9eb68c39c62a8e674e3dbe50252528226c6593ff Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 11 Mar 2015 12:06:11 +0100 Subject: Adjust interface slightly --- cuda/3d/astra3d.cu | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'cuda') diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 426f3a0..5b1f363 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -182,7 +182,7 @@ static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom, } -void convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom, +bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom, const CProjectionGeometry3D* pProjGeom, SDimensions3D& dims) { @@ -194,6 +194,13 @@ void convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom, dims.iProjV = pProjGeom->getDetectorRowCount(), dims.iRaysPerDetDim = 1; dims.iRaysPerVoxelDim = 1; + + if (dims.iVolX <= 0 || dims.iVolX <= 0 || dims.iVolX <= 0) + return false; + if (dims.iProjAngles <= 0 || dims.iProjU <= 0 || dims.iProjV <= 0) + return false; + + return true; } @@ -390,11 +397,9 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom, if (pData->initialized) return false; - convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims); + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims); - if (pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0) - return false; - if (pData->dims.iProjAngles <= 0 || pData->dims.iProjU <= 0 || pData->dims.iProjV <= 0) + if (!ok) return false; const CConeProjectionGeometry3D* conegeom = dynamic_cast(pProjGeom); @@ -403,7 +408,6 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom, const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast(pProjGeom); float outputScale; - bool ok; pData->projs = 0; pData->parprojs = 0; @@ -785,11 +789,9 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom, if (pData->initialized) return false; - convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims); + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims); - if (pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0 || pData->dims.iVolX <= 0) - return false; - if (pData->dims.iProjAngles <= 0 || pData->dims.iProjU <= 0 || pData->dims.iProjV <= 0) + if (!ok) return false; const CConeProjectionGeometry3D* conegeom = dynamic_cast(pProjGeom); @@ -798,7 +800,6 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom, const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast(pProjGeom); float outputScale; - bool ok; pData->projs = 0; pData->parprojs = 0; -- cgit v1.2.3 From 140f64028a6c06895ba7dad8997e14b7a05aadab Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 11 Mar 2015 12:07:48 +0100 Subject: Let astraCudaFDK use utility functions --- cuda/3d/astra3d.cu | 36 ++++++++++++++---------------------- cuda/3d/astra3d.h | 13 ++----------- src/CudaFDKAlgorithm3D.cpp | 12 +----------- 3 files changed, 17 insertions(+), 44 deletions(-) (limited to 'cuda') diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 5b1f363..0e94fb8 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -1679,33 +1679,19 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, bool astraCudaFDK(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles, + const CVolumeGeometry3D* pVolGeom, + const CConeProjectionGeometry3D* pProjGeom, bool bShortScan, int iGPUIndex, int iVoxelSuperSampling) { SDimensions3D dims; - dims.iVolX = iVolX; - dims.iVolY = iVolY; - dims.iVolZ = iVolZ; - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); - dims.iProjAngles = iProjAngles; - dims.iProjU = iProjU; - dims.iProjV = iProjV; + // TODO: Check that pVolGeom is normalized, since we don't support + // other volume geometries yet - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) + if (!ok) return false; dims.iRaysPerVoxelDim = iVoxelSuperSampling; @@ -1722,9 +1708,8 @@ bool astraCudaFDK(float* pfVolume, const float* pfProjections, return false; } - cudaPitchedPtr D_volumeData = allocateVolumeData(dims); - bool ok = D_volumeData.ptr; + ok = D_volumeData.ptr; if (!ok) return false; @@ -1745,6 +1730,13 @@ bool astraCudaFDK(float* pfVolume, const float* pfProjections, return false; } + float fOriginSourceDistance = pProjGeom->getOriginSourceDistance(); + float fOriginDetectorDistance = pProjGeom->getOriginDetectorDistance(); + float fDetUSize = pProjGeom->getDetectorSpacingX(); + float fDetVSize = pProjGeom->getDetectorSpacingY(); + const float *pfAngles = pProjGeom->getProjectionAngles(); + + // TODO: Offer interface for SrcZ, DetZ ok &= FDK(D_volumeData, D_projData, fOriginSourceDistance, fOriginDetectorDistance, 0, 0, fDetUSize, fDetVSize, diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index cab5479..6bac8b2 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -401,17 +401,8 @@ _AstraExport bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pf int iGPUIndex, int iVoxelSuperSampling); _AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles, + const CVolumeGeometry3D* pVolGeom, + const CConeProjectionGeometry3D* pProjGeom, bool bShortScan, int iGPUIndex, int iVoxelSuperSampling); diff --git a/src/CudaFDKAlgorithm3D.cpp b/src/CudaFDKAlgorithm3D.cpp index 7638696..0a46ff6 100644 --- a/src/CudaFDKAlgorithm3D.cpp +++ b/src/CudaFDKAlgorithm3D.cpp @@ -171,17 +171,7 @@ void CCudaFDKAlgorithm3D::run(int _iNrIterations) bool ok = true; ok = astraCudaFDK(pReconMem->getData(), pSinoMem->getDataConst(), - volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount(), - conegeom->getProjectionCount(), - conegeom->getDetectorColCount(), - conegeom->getDetectorRowCount(), - conegeom->getOriginSourceDistance(), - conegeom->getOriginDetectorDistance(), - conegeom->getDetectorSpacingX(), - conegeom->getDetectorSpacingY(), - conegeom->getProjectionAngles(), + &volgeom, conegeom, m_bShortScan, m_iGPUIndex, m_iVoxelSuperSampling); ASTRA_ASSERT(ok); -- cgit v1.2.3 From e188bcdaaffee075adf5fa4371453d91bcb71225 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 11 Mar 2015 14:43:49 +0100 Subject: Add another utility function --- cuda/3d/astra3d.cu | 99 +++++++++++++++++++++++++++++------------------------- 1 file changed, 53 insertions(+), 46 deletions(-) (limited to 'cuda') diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 0e94fb8..eff928d 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -305,6 +305,37 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, } +bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CConeVecProjectionGeometry3D* pProjGeom, + SPar3DProjection*& pParProjs, + SConeProjection*& pConeProjs, + float& fOutputScale) +{ + const CConeProjectionGeometry3D* conegeom = dynamic_cast(pProjGeom); + const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast(pProjGeom); + const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast(pProjGeom); + const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast(pProjGeom); + + pConeProjs = 0; + pParProjs = 0; + + bool ok; + + if (conegeom) { + ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, outputScale); + } else if (conevec3dgeom) { + ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, outputScale); + } else if (par3dgeom) { + ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, outputScale); + } else if (parvec3dgeom) { + ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, outputScale); + } else { + ok = false; + } + + return ok; +} + @@ -402,35 +433,23 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom, if (!ok) return false; - const CConeProjectionGeometry3D* conegeom = dynamic_cast(pProjGeom); - const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast(pProjGeom); - const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast(pProjGeom); - const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast(pProjGeom); - - float outputScale; - pData->projs = 0; pData->parprojs = 0; + float outputScale; - if (conegeom) { - ok = convertAstraGeometry(pVolGeom, conegeom, pData->projs, outputScale); - pData->projType = PROJ_PARALLEL; - } else if (conevec3dgeom) { - ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pData->projs, outputScale); - pData->projType = PROJ_PARALLEL; - } else if (par3dgeom) { - ok = convertAstraGeometry(pVolGeom, par3dgeom, pData->parprojs, outputScale); - pData->projType = PROJ_CONE; - } else if (parvec3dgeom) { - ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pData->parprojs, outputScale); - pData->projType = PROJ_CONE; - } else { - ok = false; - } - + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pData->parprojs, pData->projs, + outputScale); if (!ok) return false; + if (pData->projs) { + assert(pData->parprojs == 0); + pData->projType = PROJ_CONE; + } else { + assert(pData->parprojs != 0); + pData->projType = PROJ_PARALLEL; + } // TODO: Handle outputScale @@ -794,35 +813,23 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom, if (!ok) return false; - const CConeProjectionGeometry3D* conegeom = dynamic_cast(pProjGeom); - const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast(pProjGeom); - const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast(pProjGeom); - const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast(pProjGeom); - - float outputScale; - pData->projs = 0; pData->parprojs = 0; + float outputScale; - if (conegeom) { - ok = convertAstraGeometry(pVolGeom, conegeom, pData->projs, outputScale); - pData->projType = PROJ_PARALLEL; - } else if (conevec3dgeom) { - ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pData->projs, outputScale); - pData->projType = PROJ_PARALLEL; - } else if (par3dgeom) { - ok = convertAstraGeometry(pVolGeom, par3dgeom, pData->parprojs, outputScale); - pData->projType = PROJ_CONE; - } else if (parvec3dgeom) { - ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pData->parprojs, outputScale); - pData->projType = PROJ_CONE; - } else { - ok = false; - } - + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pData->parprojs, pData->projs, + outputScale); if (!ok) return false; + if (pData->projs) { + assert(pData->parprojs == 0); + pData->projType = PROJ_CONE; + } else { + assert(pData->parprojs != 0); + pData->projType = PROJ_PARALLEL; + } // TODO: Handle outputScale -- cgit v1.2.3 From 18d12242207d1113c3015b451f522531168e626a Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 11 Mar 2015 17:27:44 +0100 Subject: Add flexible volgeom3d support to astraCudaBP_SIRTWeighted --- cuda/3d/astra3d.cu | 95 +++++++++++++---------------------- cuda/3d/astra3d.h | 23 ++------- src/CudaBackProjectionAlgorithm3D.cpp | 87 +++++++++++--------------------- 3 files changed, 66 insertions(+), 139 deletions(-) (limited to 'cuda') diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index eff928d..2f7ea99 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -306,7 +306,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, - const CConeVecProjectionGeometry3D* pProjGeom, + const CProjectionGeometry3D* pProjGeom, SPar3DProjection*& pParProjs, SConeProjection*& pConeProjs, float& fOutputScale) @@ -322,13 +322,13 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, bool ok; if (conegeom) { - ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, outputScale); + ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, fOutputScale); } else if (conevec3dgeom) { - ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, outputScale); + ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, fOutputScale); } else if (par3dgeom) { - ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, outputScale); + ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, fOutputScale); } else if (parvec3dgeom) { - ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, outputScale); + ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, fOutputScale); } else { ok = false; } @@ -1471,40 +1471,6 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, return ok; } -// This computes the column weights, divides by them, and adds the -// result to the current volume. This is both more expensive and more -// GPU memory intensive than the regular BP, but allows saving system RAM. -bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iVoxelSuperSampling) -{ - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SPar3DProjection* p = genPar3DProjections(iProjAngles, - iProjU, iProjV, - fDetUSize, fDetVSize, - pfAngles); - - bool ok; - ok = astraCudaPar3DBP_SIRTWeighted(pfVolume, pfProjections, iVolX, iVolY, iVolZ, - iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling); - - delete[] p; - - return ok; -} - bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, unsigned int iVolX, @@ -1582,33 +1548,30 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, // This computes the column weights, divides by them, and adds the // result to the current volume. This is both more expensive and more // GPU memory intensive than the regular BP, but allows saving system RAM. -bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, +bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection *pfAngles, + const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, int iGPUIndex, int iVoxelSuperSampling) { SDimensions3D dims; - dims.iVolX = iVolX; - dims.iVolY = iVolY; - dims.iVolZ = iVolZ; - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); + if (!ok) return false; - dims.iProjAngles = iProjAngles; - dims.iProjU = iProjU; - dims.iProjV = iProjV; + dims.iRaysPerVoxelDim = iVoxelSuperSampling; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; + SPar3DProjection* pParProjs; + SConeProjection* pConeProjs; - dims.iRaysPerVoxelDim = iVoxelSuperSampling; + float outputScale; + + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pParProjs, pConeProjs, + outputScale); + + // TODO: OutputScale if (iGPUIndex != -1) { cudaSetDevice(iGPUIndex); @@ -1621,7 +1584,7 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, cudaPitchedPtr D_pixelWeight = allocateVolumeData(dims); - bool ok = D_pixelWeight.ptr; + ok = D_pixelWeight.ptr; if (!ok) return false; @@ -1643,7 +1606,12 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, // Compute weights ok &= zeroVolumeData(D_pixelWeight, dims); processSino3D(D_projData, 1.0f, dims); - ok &= Par3DBP(D_pixelWeight, D_projData, dims, pfAngles); + + if (pParProjs) + ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs); + else + ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs); + processVol3D(D_pixelWeight, dims); if (!ok) { cudaFree(D_pixelWeight.ptr); @@ -1656,7 +1624,11 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, dims, dims.iProjU); ok &= zeroVolumeData(D_volumeData, dims); // Do BP into D_volumeData - ok &= Par3DBP(D_volumeData, D_projData, dims, pfAngles); + if (pParProjs) + ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs); + else + ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs); + // Multiply with weights processVol3D(D_volumeData, D_pixelWeight, dims); @@ -1679,6 +1651,9 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, cudaFree(D_volumeData.ptr); cudaFree(D_projData.ptr); + delete[] pParProjs; + delete[] pConeProjs; + return ok; } diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index 6bac8b2..b2e4e08 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -378,26 +378,9 @@ _AstraExport bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, const SPar3DProjection *pfAngles, int iGPUIndex, int iVoxelSuperSampling); -_AstraExport bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iVoxelSuperSampling); - -_AstraExport bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection *pfAngles, +_AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProjections, + const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, int iGPUIndex, int iVoxelSuperSampling); _AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections, diff --git a/src/CudaBackProjectionAlgorithm3D.cpp b/src/CudaBackProjectionAlgorithm3D.cpp index abcf096..7117cfc 100644 --- a/src/CudaBackProjectionAlgorithm3D.cpp +++ b/src/CudaBackProjectionAlgorithm3D.cpp @@ -107,16 +107,8 @@ bool CCudaBackProjectionAlgorithm3D::initialize(const Config& _cfg) m_iVoxelSuperSampling = (int)_cfg.self->getOptionNumerical("VoxelSuperSampling", 1); CC.markOptionParsed("VoxelSuperSampling"); - CFloat32ProjectionData3DMemory* pSinoMem = dynamic_cast(m_pSinogram); - ASTRA_ASSERT(pSinoMem); - const CProjectionGeometry3D* projgeom = pSinoMem->getGeometry(); -const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast(projgeom); - const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast(projgeom); - if (parvec3dgeom || par3dgeom) { - // This option is only supported for Par3D currently - m_bSIRTWeighting = _cfg.self->getOptionBool("SIRTWeighting", false); - CC.markOptionParsed("SIRTWeighting"); - } + m_bSIRTWeighting = _cfg.self->getOptionBool("SIRTWeighting", false); + CC.markOptionParsed("SIRTWeighting"); // success m_bIsInitialized = _check(); @@ -178,7 +170,12 @@ void CCudaBackProjectionAlgorithm3D::run(int _iNrIterations) const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast(projgeom); const CVolumeGeometry3D& volgeom = *pReconMem->getGeometry(); - if (conegeom) { + if (m_bSIRTWeighting) { + astraCudaBP_SIRTWeighted(pReconMem->getData(), + pSinoMem->getDataConst(), + &volgeom, projgeom, + m_iGPUIndex, m_iVoxelSuperSampling); + } else if (conegeom) { astraCudaConeBP(pReconMem->getData(), pSinoMem->getDataConst(), volgeom.getGridColCount(), volgeom.getGridRowCount(), @@ -193,55 +190,27 @@ void CCudaBackProjectionAlgorithm3D::run(int _iNrIterations) conegeom->getProjectionAngles(), m_iGPUIndex, m_iVoxelSuperSampling); } else if (par3dgeom) { - if (!m_bSIRTWeighting) { - astraCudaPar3DBP(pReconMem->getData(), pSinoMem->getDataConst(), - volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount(), - par3dgeom->getProjectionCount(), - par3dgeom->getDetectorColCount(), - par3dgeom->getDetectorRowCount(), - par3dgeom->getDetectorSpacingX(), - par3dgeom->getDetectorSpacingY(), - par3dgeom->getProjectionAngles(), - m_iGPUIndex, m_iVoxelSuperSampling); - } else { - astraCudaPar3DBP_SIRTWeighted(pReconMem->getData(), - pSinoMem->getDataConst(), - volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount(), - par3dgeom->getProjectionCount(), - par3dgeom->getDetectorColCount(), - par3dgeom->getDetectorRowCount(), - par3dgeom->getDetectorSpacingX(), - par3dgeom->getDetectorSpacingY(), - par3dgeom->getProjectionAngles(), - m_iGPUIndex, m_iVoxelSuperSampling); - } + astraCudaPar3DBP(pReconMem->getData(), pSinoMem->getDataConst(), + volgeom.getGridColCount(), + volgeom.getGridRowCount(), + volgeom.getGridSliceCount(), + par3dgeom->getProjectionCount(), + par3dgeom->getDetectorColCount(), + par3dgeom->getDetectorRowCount(), + par3dgeom->getDetectorSpacingX(), + par3dgeom->getDetectorSpacingY(), + par3dgeom->getProjectionAngles(), + m_iGPUIndex, m_iVoxelSuperSampling); } else if (parvec3dgeom) { - if (!m_bSIRTWeighting) { - astraCudaPar3DBP(pReconMem->getData(), pSinoMem->getDataConst(), - volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount(), - parvec3dgeom->getProjectionCount(), - parvec3dgeom->getDetectorColCount(), - parvec3dgeom->getDetectorRowCount(), - parvec3dgeom->getProjectionVectors(), - m_iGPUIndex, m_iVoxelSuperSampling); - } else { - astraCudaPar3DBP_SIRTWeighted(pReconMem->getData(), - pSinoMem->getDataConst(), - volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount(), - parvec3dgeom->getProjectionCount(), - parvec3dgeom->getDetectorColCount(), - parvec3dgeom->getDetectorRowCount(), - parvec3dgeom->getProjectionVectors(), - m_iGPUIndex, m_iVoxelSuperSampling); - } + astraCudaPar3DBP(pReconMem->getData(), pSinoMem->getDataConst(), + volgeom.getGridColCount(), + volgeom.getGridRowCount(), + volgeom.getGridSliceCount(), + parvec3dgeom->getProjectionCount(), + parvec3dgeom->getDetectorColCount(), + parvec3dgeom->getDetectorRowCount(), + parvec3dgeom->getProjectionVectors(), + m_iGPUIndex, m_iVoxelSuperSampling); } else if (conevecgeom) { astraCudaConeBP(pReconMem->getData(), pSinoMem->getDataConst(), volgeom.getGridColCount(), -- cgit v1.2.3 From 6909836555afe155ffc3897ef2189ed0562bb045 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 11 Mar 2015 18:44:53 +0100 Subject: Add flexible volgeom3d support to astraCudaBP --- cuda/3d/astra3d.cu | 176 ++++------------------------------ cuda/3d/astra3d.h | 47 +-------- src/CudaBackProjectionAlgorithm3D.cpp | 54 +---------- 3 files changed, 24 insertions(+), 253 deletions(-) (limited to 'cuda') diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 2f7ea99..97bebf4 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -1331,173 +1331,30 @@ bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, } -bool astraCudaConeBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iVoxelSuperSampling) -{ - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SConeProjection* p = genConeProjections(iProjAngles, - iProjU, iProjV, - fOriginSourceDistance, - fOriginDetectorDistance, - fDetUSize, fDetVSize, - pfAngles); - - bool ok; - ok = astraCudaConeBP(pfVolume, pfProjections, iVolX, iVolY, iVolZ, - iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling); - - delete[] p; - - return ok; -} -bool astraCudaConeBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection *pfAngles, - int iGPUIndex, int iVoxelSuperSampling) +bool astraCudaBP(float* pfVolume, const float* pfProjections, + const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, + int iGPUIndex, int iVoxelSuperSampling) { SDimensions3D dims; - dims.iVolX = iVolX; - dims.iVolY = iVolY; - dims.iVolZ = iVolZ; - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - - dims.iProjAngles = iProjAngles; - dims.iProjU = iProjU; - dims.iProjV = iProjV; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - dims.iRaysPerVoxelDim = iVoxelSuperSampling; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - if (iGPUIndex != -1) { - cudaSetDevice(iGPUIndex); - cudaError_t err = cudaGetLastError(); - - // Ignore errors caused by calling cudaSetDevice multiple times - if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) - return false; - } - - cudaPitchedPtr D_volumeData = allocateVolumeData(dims); - bool ok = D_volumeData.ptr; + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); if (!ok) return false; - cudaPitchedPtr D_projData = allocateProjectionData(dims); - ok = D_projData.ptr; - if (!ok) { - cudaFree(D_volumeData.ptr); - return false; - } - - ok &= copyProjectionsToDevice(pfProjections, D_projData, - dims, dims.iProjU); - - ok &= zeroVolumeData(D_volumeData, dims); - - if (!ok) { - cudaFree(D_volumeData.ptr); - cudaFree(D_projData.ptr); - return false; - } - - ok &= ConeBP(D_volumeData, D_projData, dims, pfAngles); - - ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); - - - cudaFree(D_volumeData.ptr); - cudaFree(D_projData.ptr); - - return ok; - -} - -bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iVoxelSuperSampling) -{ - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SPar3DProjection* p = genPar3DProjections(iProjAngles, - iProjU, iProjV, - fDetUSize, fDetVSize, - pfAngles); - - bool ok; - ok = astraCudaPar3DBP(pfVolume, pfProjections, iVolX, iVolY, iVolZ, - iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling); - - delete[] p; - - return ok; -} - - -bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection *pfAngles, - int iGPUIndex, int iVoxelSuperSampling) -{ - SDimensions3D dims; + dims.iRaysPerVoxelDim = iVoxelSuperSampling; - dims.iVolX = iVolX; - dims.iVolY = iVolY; - dims.iVolZ = iVolZ; - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; + SPar3DProjection* pParProjs; + SConeProjection* pConeProjs; - dims.iProjAngles = iProjAngles; - dims.iProjU = iProjU; - dims.iProjV = iProjV; + float outputScale; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pParProjs, pConeProjs, + outputScale); - dims.iRaysPerVoxelDim = iVoxelSuperSampling; + // TODO: OutputScale if (iGPUIndex != -1) { cudaSetDevice(iGPUIndex); @@ -1510,7 +1367,7 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, cudaPitchedPtr D_volumeData = allocateVolumeData(dims); - bool ok = D_volumeData.ptr; + ok = D_volumeData.ptr; if (!ok) return false; @@ -1532,7 +1389,10 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, return false; } - ok &= Par3DBP(D_volumeData, D_projData, dims, pfAngles); + if (pParProjs) + ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs); + else + ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs); ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index b2e4e08..5464d2f 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -332,50 +332,9 @@ _AstraExport bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, Cuda3DProjectionKernel projKernel); -_AstraExport bool astraCudaConeBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iVoxelSuperSampling); - -_AstraExport bool astraCudaConeBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection *pfAngles, - int iGPUIndex, int iVoxelSuperSampling); - -_AstraExport bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iVoxelSuperSampling); - -_AstraExport bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection *pfAngles, +_AstraExport bool astraCudaBP(float* pfVolume, const float* pfProjections, + const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, int iGPUIndex, int iVoxelSuperSampling); _AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProjections, diff --git a/src/CudaBackProjectionAlgorithm3D.cpp b/src/CudaBackProjectionAlgorithm3D.cpp index 7117cfc..a8a1b0a 100644 --- a/src/CudaBackProjectionAlgorithm3D.cpp +++ b/src/CudaBackProjectionAlgorithm3D.cpp @@ -164,10 +164,6 @@ void CCudaBackProjectionAlgorithm3D::run(int _iNrIterations) ASTRA_ASSERT(pReconMem); const CProjectionGeometry3D* projgeom = pSinoMem->getGeometry(); - const CConeProjectionGeometry3D* conegeom = dynamic_cast(projgeom); - const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast(projgeom); - const CConeVecProjectionGeometry3D* conevecgeom = dynamic_cast(projgeom); - const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast(projgeom); const CVolumeGeometry3D& volgeom = *pReconMem->getGeometry(); if (m_bSIRTWeighting) { @@ -175,54 +171,10 @@ void CCudaBackProjectionAlgorithm3D::run(int _iNrIterations) pSinoMem->getDataConst(), &volgeom, projgeom, m_iGPUIndex, m_iVoxelSuperSampling); - } else if (conegeom) { - astraCudaConeBP(pReconMem->getData(), pSinoMem->getDataConst(), - volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount(), - conegeom->getProjectionCount(), - conegeom->getDetectorColCount(), - conegeom->getDetectorRowCount(), - conegeom->getOriginSourceDistance(), - conegeom->getOriginDetectorDistance(), - conegeom->getDetectorSpacingX(), - conegeom->getDetectorSpacingY(), - conegeom->getProjectionAngles(), - m_iGPUIndex, m_iVoxelSuperSampling); - } else if (par3dgeom) { - astraCudaPar3DBP(pReconMem->getData(), pSinoMem->getDataConst(), - volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount(), - par3dgeom->getProjectionCount(), - par3dgeom->getDetectorColCount(), - par3dgeom->getDetectorRowCount(), - par3dgeom->getDetectorSpacingX(), - par3dgeom->getDetectorSpacingY(), - par3dgeom->getProjectionAngles(), - m_iGPUIndex, m_iVoxelSuperSampling); - } else if (parvec3dgeom) { - astraCudaPar3DBP(pReconMem->getData(), pSinoMem->getDataConst(), - volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount(), - parvec3dgeom->getProjectionCount(), - parvec3dgeom->getDetectorColCount(), - parvec3dgeom->getDetectorRowCount(), - parvec3dgeom->getProjectionVectors(), - m_iGPUIndex, m_iVoxelSuperSampling); - } else if (conevecgeom) { - astraCudaConeBP(pReconMem->getData(), pSinoMem->getDataConst(), - volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount(), - conevecgeom->getProjectionCount(), - conevecgeom->getDetectorColCount(), - conevecgeom->getDetectorRowCount(), - conevecgeom->getProjectionVectors(), - m_iGPUIndex, m_iVoxelSuperSampling); } else { - ASTRA_ASSERT(false); + astraCudaBP(pReconMem->getData(), pSinoMem->getDataConst(), + &volgeom, projgeom, + m_iGPUIndex, m_iVoxelSuperSampling); } } -- cgit v1.2.3 From 57ee6b85884b8226b26b7415ef151b4a6e63337c Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Thu, 12 Mar 2015 11:53:40 +0100 Subject: Add flexible volgeom3d support to astraCudaFP --- cuda/3d/astra3d.cu | 204 +++++-------------------------- cuda/3d/astra3d.h | 49 +------- src/CudaForwardProjectionAlgorithm3D.cpp | 59 +-------- 3 files changed, 39 insertions(+), 273 deletions(-) (limited to 'cuda') diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 97bebf4..b2375f3 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -1103,179 +1103,31 @@ float AstraCGLS3d::computeDiffNorm() -bool astraCudaConeFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iDetectorSuperSampling) -{ - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SConeProjection* p = genConeProjections(iProjAngles, - iProjU, iProjV, - fOriginSourceDistance, - fOriginDetectorDistance, - fDetUSize, fDetVSize, - pfAngles); - - bool ok; - ok = astraCudaConeFP(pfVolume, pfProjections, iVolX, iVolY, iVolZ, - iProjAngles, iProjU, iProjV, p, iGPUIndex, iDetectorSuperSampling); - - delete[] p; - - return ok; -} - -bool astraCudaConeFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection *pfAngles, - int iGPUIndex, int iDetectorSuperSampling) +bool astraCudaFP(const float* pfVolume, float* pfProjections, + const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, + int iGPUIndex, int iDetectorSuperSampling, + Cuda3DProjectionKernel projKernel) { SDimensions3D dims; - dims.iVolX = iVolX; - dims.iVolY = iVolY; - dims.iVolZ = iVolZ; - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - - dims.iProjAngles = iProjAngles; - dims.iProjU = iProjU; - dims.iProjV = iProjV; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); + if (!ok) return false; dims.iRaysPerDetDim = iDetectorSuperSampling; - if (iDetectorSuperSampling == 0) return false; - if (iGPUIndex != -1) { - cudaSetDevice(iGPUIndex); - cudaError_t err = cudaGetLastError(); - - // Ignore errors caused by calling cudaSetDevice multiple times - if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) - return false; - } - - cudaPitchedPtr D_volumeData = allocateVolumeData(dims); - bool ok = D_volumeData.ptr; - if (!ok) - return false; - - cudaPitchedPtr D_projData = allocateProjectionData(dims); - ok = D_projData.ptr; - if (!ok) { - cudaFree(D_volumeData.ptr); - return false; - } - - ok &= copyVolumeToDevice(pfVolume, D_volumeData, dims, dims.iVolX); - - ok &= zeroProjectionData(D_projData, dims); - - if (!ok) { - cudaFree(D_volumeData.ptr); - cudaFree(D_projData.ptr); - return false; - } - - ok &= ConeFP(D_volumeData, D_projData, dims, pfAngles, 1.0f); - - ok &= copyProjectionsFromDevice(pfProjections, D_projData, - dims, dims.iProjU); - - - cudaFree(D_volumeData.ptr); - cudaFree(D_projData.ptr); - - return ok; - -} - -bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iDetectorSuperSampling, - Cuda3DProjectionKernel projKernel) -{ - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SPar3DProjection* p = genPar3DProjections(iProjAngles, - iProjU, iProjV, - fDetUSize, fDetVSize, - pfAngles); - - bool ok; - ok = astraCudaPar3DFP(pfVolume, pfProjections, iVolX, iVolY, iVolZ, - iProjAngles, iProjU, iProjV, p, iGPUIndex, iDetectorSuperSampling, - projKernel); - - delete[] p; - - return ok; -} - - -bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection *pfAngles, - int iGPUIndex, int iDetectorSuperSampling, - Cuda3DProjectionKernel projKernel) -{ - SDimensions3D dims; - - dims.iVolX = iVolX; - dims.iVolY = iVolY; - dims.iVolZ = iVolZ; - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - - dims.iProjAngles = iProjAngles; - dims.iProjU = iProjU; - dims.iProjV = iProjV; + SPar3DProjection* pParProjs; + SConeProjection* pConeProjs; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; + float outputScale; - dims.iRaysPerDetDim = iDetectorSuperSampling; + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pParProjs, pConeProjs, + outputScale); - if (iDetectorSuperSampling == 0) - return false; if (iGPUIndex != -1) { cudaSetDevice(iGPUIndex); @@ -1288,7 +1140,7 @@ bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, cudaPitchedPtr D_volumeData = allocateVolumeData(dims); - bool ok = D_volumeData.ptr; + ok = D_volumeData.ptr; if (!ok) return false; @@ -1309,15 +1161,25 @@ bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, return false; } - switch (projKernel) { - case ker3d_default: - ok &= Par3DFP(D_volumeData, D_projData, dims, pfAngles, 1.0f); - break; - case ker3d_sum_square_weights: - ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pfAngles, 1.0f); - break; - default: - assert(false); + if (pParProjs) { + switch (projKernel) { + case ker3d_default: + ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, 1.0f); + break; + case ker3d_sum_square_weights: + ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, 1.0f); + break; + default: + assert(false); + } + } else { + switch (projKernel) { + case ker3d_default: + ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, 1.0f); + break; + default: + assert(false); + } } ok &= copyProjectionsFromDevice(pfProjections, D_projData, diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index 5464d2f..6c3fcfb 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -282,52 +282,9 @@ protected: }; - -_AstraExport bool astraCudaConeFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iDetectorSuperSampling); - -_AstraExport bool astraCudaConeFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection *pfAngles, - int iGPUIndex, int iDetectorSuperSampling); - -_AstraExport bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iDetectorSuperSampling, - Cuda3DProjectionKernel projKernel); - -_AstraExport bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection *pfAngles, +_AstraExport bool astraCudaFP(const float* pfVolume, float* pfProjections, + const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, int iGPUIndex, int iDetectorSuperSampling, Cuda3DProjectionKernel projKernel); diff --git a/src/CudaForwardProjectionAlgorithm3D.cpp b/src/CudaForwardProjectionAlgorithm3D.cpp index bb122e0..914ee2f 100644 --- a/src/CudaForwardProjectionAlgorithm3D.cpp +++ b/src/CudaForwardProjectionAlgorithm3D.cpp @@ -239,10 +239,6 @@ void CCudaForwardProjectionAlgorithm3D::run(int) assert(m_bIsInitialized); const CProjectionGeometry3D* projgeom = m_pProjections->getGeometry(); - const CConeProjectionGeometry3D* conegeom = dynamic_cast(projgeom); - const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast(projgeom); - const CConeVecProjectionGeometry3D* conevecgeom = dynamic_cast(projgeom); - const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast(projgeom); const CVolumeGeometry3D& volgeom = *m_pVolume->getGeometry(); Cuda3DProjectionKernel projKernel = ker3d_default; @@ -270,58 +266,9 @@ void CCudaForwardProjectionAlgorithm3D::run(int) } #endif - if (conegeom) { - astraCudaConeFP(m_pVolume->getDataConst(), m_pProjections->getData(), - volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount(), - conegeom->getProjectionCount(), - conegeom->getDetectorColCount(), - conegeom->getDetectorRowCount(), - conegeom->getOriginSourceDistance(), - conegeom->getOriginDetectorDistance(), - conegeom->getDetectorSpacingX(), - conegeom->getDetectorSpacingY(), - conegeom->getProjectionAngles(), - m_iGPUIndex, m_iDetectorSuperSampling); - } else if (par3dgeom) { - astraCudaPar3DFP(m_pVolume->getDataConst(), m_pProjections->getData(), - volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount(), - par3dgeom->getProjectionCount(), - par3dgeom->getDetectorColCount(), - par3dgeom->getDetectorRowCount(), - par3dgeom->getDetectorSpacingX(), - par3dgeom->getDetectorSpacingY(), - par3dgeom->getProjectionAngles(), - m_iGPUIndex, m_iDetectorSuperSampling, - projKernel); - } else if (parvec3dgeom) { - astraCudaPar3DFP(m_pVolume->getDataConst(), m_pProjections->getData(), - volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount(), - parvec3dgeom->getProjectionCount(), - parvec3dgeom->getDetectorColCount(), - parvec3dgeom->getDetectorRowCount(), - parvec3dgeom->getProjectionVectors(), - m_iGPUIndex, m_iDetectorSuperSampling, - projKernel); - } else if (conevecgeom) { - astraCudaConeFP(m_pVolume->getDataConst(), m_pProjections->getData(), - volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getGridSliceCount(), - conevecgeom->getProjectionCount(), - conevecgeom->getDetectorColCount(), - conevecgeom->getDetectorRowCount(), - conevecgeom->getProjectionVectors(), - m_iGPUIndex, m_iDetectorSuperSampling); - } else { - ASTRA_ASSERT(false); - } - + astraCudaFP(m_pVolume->getDataConst(), m_pProjections->getData(), + &volgeom, projgeom, + m_iGPUIndex, m_iDetectorSuperSampling, projKernel); } -- cgit v1.2.3 From a70ad8df8fc2a3da63fc91dd18bbfd55be7a89dd Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Thu, 12 Mar 2015 12:30:47 +0100 Subject: Add outputScale argument to 3D CUDA BP --- cuda/3d/algo3d.cu | 7 ++++--- cuda/3d/algo3d.h | 3 ++- cuda/3d/astra3d.cu | 12 ++++++------ cuda/3d/cgls3d.cu | 4 ++-- cuda/3d/cone_bp.cu | 24 +++++++++++++++--------- cuda/3d/cone_bp.h | 7 ++++--- cuda/3d/par3d_bp.cu | 23 ++++++++++++++--------- cuda/3d/par3d_bp.h | 6 ++++-- cuda/3d/sirt3d.cu | 6 +++--- 9 files changed, 54 insertions(+), 38 deletions(-) (limited to 'cuda') diff --git a/cuda/3d/algo3d.cu b/cuda/3d/algo3d.cu index 7f61280..b775438 100644 --- a/cuda/3d/algo3d.cu +++ b/cuda/3d/algo3d.cu @@ -94,12 +94,13 @@ bool ReconAlgo3D::callFP(cudaPitchedPtr& D_volumeData, } bool ReconAlgo3D::callBP(cudaPitchedPtr& D_volumeData, - cudaPitchedPtr& D_projData) + cudaPitchedPtr& D_projData, + float outputScale) { if (coneProjs) { - return ConeBP(D_volumeData, D_projData, dims, coneProjs); + return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale); } else { - return Par3DBP(D_volumeData, D_projData, dims, par3DProjs); + return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale); } } diff --git a/cuda/3d/algo3d.h b/cuda/3d/algo3d.h index f4c6a87..35ffc49 100644 --- a/cuda/3d/algo3d.h +++ b/cuda/3d/algo3d.h @@ -51,7 +51,8 @@ protected: cudaPitchedPtr& D_projData, float outputScale); bool callBP(cudaPitchedPtr& D_volumeData, - cudaPitchedPtr& D_projData); + cudaPitchedPtr& D_projData, + float outputScale); SDimensions3D dims; SConeProjection* coneProjs; diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index b2375f3..7589416 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -1252,9 +1252,9 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections, } if (pParProjs) - ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs); + ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, 1.0f); else - ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs); + ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, 1.0f); ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); @@ -1330,9 +1330,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, processSino3D(D_projData, 1.0f, dims); if (pParProjs) - ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs); + ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, 1.0f); else - ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs); + ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, 1.0f); processVol3D(D_pixelWeight, dims); if (!ok) { @@ -1347,9 +1347,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, ok &= zeroVolumeData(D_volumeData, dims); // Do BP into D_volumeData if (pParProjs) - ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs); + ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, 1.0f); else - ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs); + ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, 1.0f); // Multiply with weights processVol3D(D_volumeData, D_pixelWeight, dims); diff --git a/cuda/3d/cgls3d.cu b/cuda/3d/cgls3d.cu index 5071a9b..4f632f3 100644 --- a/cuda/3d/cgls3d.cu +++ b/cuda/3d/cgls3d.cu @@ -165,7 +165,7 @@ bool CGLS::iterate(unsigned int iterations) // p = A'*r zeroVolumeData(D_p, dims); - callBP(D_p, D_r); + callBP(D_p, D_r, 1.0f); if (useVolumeMask) processVol3D(D_p, D_maskData, dims); @@ -195,7 +195,7 @@ bool CGLS::iterate(unsigned int iterations) // z = A'*r zeroVolumeData(D_z, dims); - callBP(D_z, D_r); + callBP(D_z, D_r, 1.0f); if (useVolumeMask) processVol3D(D_z, D_maskData, dims); diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index 5648d6f..5e67980 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -78,7 +78,8 @@ bool bindProjDataTexture(const cudaArray* array) //__launch_bounds__(32*16, 4) __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle, - int angleOffset, const astraCUDA3d::SDimensions3D dims) + int angleOffset, const astraCUDA3d::SDimensions3D dims, + float fOutputScale) { float* volData = (float*)D_volData; @@ -147,13 +148,13 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng endZ = dims.iVolZ - startZ; for(int i=0; i < endZ; i++) - volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i]; + volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i] * fOutputScale; } //End kernel // supersampling version -__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims) +__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) { float* volData = (float*)D_volData; @@ -189,6 +190,9 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; const float fSubStep = 1.0f/dims.iRaysPerVoxelDim; + fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + + for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) { @@ -236,14 +240,15 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start } - volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal / (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal * fOutputScale; } } bool ConeBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, - const SDimensions3D& dims, const SConeProjection* angles) + const SDimensions3D& dims, const SConeProjection* angles, + float fOutputScale) { bindProjDataTexture(D_projArray); @@ -291,9 +296,9 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); if (dims.iRaysPerVoxelDim == 1) - dev_cone_BP<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); + dev_cone_BP<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); else - dev_cone_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); + dev_cone_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); } cudaTextForceKernelsCompletion(); @@ -309,14 +314,15 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, bool ConeBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - const SDimensions3D& dims, const SConeProjection* angles) + const SDimensions3D& dims, const SConeProjection* angles, + float fOutputScale) { // transfer projections to array cudaArray* cuArray = allocateProjectionArray(dims); transferProjectionsToArray(D_projData, cuArray, dims); - bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles); + bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale); cudaFreeArray(cuArray); diff --git a/cuda/3d/cone_bp.h b/cuda/3d/cone_bp.h index cba6d9f..4d3d2dd 100644 --- a/cuda/3d/cone_bp.h +++ b/cuda/3d/cone_bp.h @@ -33,13 +33,14 @@ namespace astraCUDA3d { _AstraExport bool ConeBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, - const SDimensions3D& dims, const SConeProjection* angles); + const SDimensions3D& dims, const SConeProjection* angles, + float fOutputScale); _AstraExport bool ConeBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - const SDimensions3D& dims, const SConeProjection* angles); + const SDimensions3D& dims, const SConeProjection* angles, + float fOutputScale); - } #endif diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu index 0c33280..1217949 100644 --- a/cuda/3d/par3d_bp.cu +++ b/cuda/3d/par3d_bp.cu @@ -77,7 +77,7 @@ static bool bindProjDataTexture(const cudaArray* array) } -__global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims) +__global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) { float* volData = (float*)D_volData; @@ -139,11 +139,11 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAn endZ = dims.iVolZ - startZ; for(int i=0; i < endZ; i++) - volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i]; + volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i] * fOutputScale; } // supersampling version -__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims) +__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) { float* volData = (float*)D_volData; @@ -180,6 +180,9 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star const float fSubStep = 1.0f/dims.iRaysPerVoxelDim; + fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + + for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) { @@ -217,14 +220,15 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star } - volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal / (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal * fOutputScale; } } bool Par3DBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, - const SDimensions3D& dims, const SPar3DProjection* angles) + const SDimensions3D& dims, const SPar3DProjection* angles, + float fOutputScale) { bindProjDataTexture(D_projArray); @@ -271,9 +275,9 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); if (dims.iRaysPerVoxelDim == 1) - dev_par3D_BP<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); + dev_par3D_BP<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); else - dev_par3D_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); + dev_par3D_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); } cudaTextForceKernelsCompletion(); @@ -288,14 +292,15 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData, bool Par3DBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - const SDimensions3D& dims, const SPar3DProjection* angles) + const SDimensions3D& dims, const SPar3DProjection* angles, + float fOutputScale) { // transfer projections to array cudaArray* cuArray = allocateProjectionArray(dims); transferProjectionsToArray(D_projData, cuArray, dims); - bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles); + bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale); cudaFreeArray(cuArray); diff --git a/cuda/3d/par3d_bp.h b/cuda/3d/par3d_bp.h index ece37d1..f1fc62d 100644 --- a/cuda/3d/par3d_bp.h +++ b/cuda/3d/par3d_bp.h @@ -33,11 +33,13 @@ namespace astraCUDA3d { _AstraExport bool Par3DBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, - const SDimensions3D& dims, const SPar3DProjection* angles); + const SDimensions3D& dims, const SPar3DProjection* angles, + float fOutputScale); _AstraExport bool Par3DBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - const SDimensions3D& dims, const SPar3DProjection* angles); + const SDimensions3D& dims, const SPar3DProjection* angles, + float fOutputScale); } diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu index 389ee6b..0e6630a 100644 --- a/cuda/3d/sirt3d.cu +++ b/cuda/3d/sirt3d.cu @@ -160,10 +160,10 @@ bool SIRT::precomputeWeights() zeroVolumeData(D_pixelWeight, dims); if (useSinogramMask) { - callBP(D_pixelWeight, D_smaskData); + callBP(D_pixelWeight, D_smaskData, 1.0f); } else { processSino3D(D_projData, 1.0f, dims); - callBP(D_pixelWeight, D_projData); + callBP(D_pixelWeight, D_projData, 1.0f); } #if 0 float* bufp = new float[512*512]; @@ -293,7 +293,7 @@ bool SIRT::iterate(unsigned int iterations) #endif - callBP(D_tmpData, D_projData); + callBP(D_tmpData, D_projData, 1.0f); #if 0 printf("Dumping tmpData: %p\n", (void*)D_tmpData.ptr); float* buf = new float[256*256]; -- cgit v1.2.3 From 42db106d9b66639312d874e4f35e4e9ff7a407d0 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Thu, 12 Mar 2015 14:03:17 +0100 Subject: Scale CUDA 3D FP/BP output with volume pixel size --- cuda/3d/algo3d.cu | 15 +++++++++------ cuda/3d/algo3d.h | 6 ++++-- cuda/3d/astra3d.cu | 48 +++++++++++++++++++++--------------------------- cuda/3d/cgls3d.cu | 2 +- cuda/3d/sirt3d.cu | 2 +- 5 files changed, 36 insertions(+), 37 deletions(-) (limited to 'cuda') diff --git a/cuda/3d/algo3d.cu b/cuda/3d/algo3d.cu index b775438..cc86b70 100644 --- a/cuda/3d/algo3d.cu +++ b/cuda/3d/algo3d.cu @@ -41,6 +41,7 @@ ReconAlgo3D::ReconAlgo3D() coneProjs = 0; par3DProjs = 0; shouldAbort = false; + fOutputScale = 1.0f; } ReconAlgo3D::~ReconAlgo3D() @@ -57,9 +58,10 @@ void ReconAlgo3D::reset() shouldAbort = false; } -bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles) +bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles, float _outputScale) { dims = _dims; + fOutputScale = _outputScale; coneProjs = new SConeProjection[dims.iProjAngles]; par3DProjs = 0; @@ -69,9 +71,10 @@ bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProject return true; } -bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles) +bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles, float _outputScale) { dims = _dims; + fOutputScale = _outputScale; par3DProjs = new SPar3DProjection[dims.iProjAngles]; coneProjs = 0; @@ -87,9 +90,9 @@ bool ReconAlgo3D::callFP(cudaPitchedPtr& D_volumeData, float outputScale) { if (coneProjs) { - return ConeFP(D_volumeData, D_projData, dims, coneProjs, outputScale); + return ConeFP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale); } else { - return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, outputScale); + return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale); } } @@ -98,9 +101,9 @@ bool ReconAlgo3D::callBP(cudaPitchedPtr& D_volumeData, float outputScale) { if (coneProjs) { - return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale); + return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale); } else { - return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale); + return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale); } } diff --git a/cuda/3d/algo3d.h b/cuda/3d/algo3d.h index 35ffc49..886b092 100644 --- a/cuda/3d/algo3d.h +++ b/cuda/3d/algo3d.h @@ -39,8 +39,8 @@ public: ReconAlgo3D(); ~ReconAlgo3D(); - bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs); - bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs); + bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs, float fOutputScale); + bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs, float fOutputScale); void signalAbort() { shouldAbort = true; } @@ -58,6 +58,8 @@ protected: SConeProjection* coneProjs; SPar3DProjection* par3DProjs; + float fOutputScale; + volatile bool shouldAbort; }; diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 7589416..ae79efb 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -353,7 +353,7 @@ public: SConeProjection* projs; SPar3DProjection* parprojs; - float fPixelSize; + float fOutputScale; bool initialized; bool setStartReconstruction; @@ -390,6 +390,8 @@ AstraSIRT3d::AstraSIRT3d() pData->dims.iRaysPerVoxelDim = 1; pData->projs = 0; + pData->parprojs = 0; + pData->fOutputScale = 1.0f; pData->initialized = false; pData->setStartReconstruction = false; @@ -435,11 +437,10 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom, pData->projs = 0; pData->parprojs = 0; - float outputScale; ok = convertAstraGeometry(pVolGeom, pProjGeom, pData->parprojs, pData->projs, - outputScale); + pData->fOutputScale); if (!ok) return false; @@ -451,8 +452,6 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom, pData->projType = PROJ_PARALLEL; } - // TODO: Handle outputScale - return true; } @@ -519,9 +518,9 @@ bool AstraSIRT3d::init() bool ok; if (pData->projType == PROJ_PARALLEL) { - ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs); + ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale); } else { - ok = pData->sirt.setConeGeometry(pData->dims, pData->projs); + ok = pData->sirt.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale); } if (!ok) @@ -733,7 +732,7 @@ public: SConeProjection* projs; SPar3DProjection* parprojs; - float fPixelSize; + float fOutputScale; bool initialized; bool setStartReconstruction; @@ -770,6 +769,8 @@ AstraCGLS3d::AstraCGLS3d() pData->dims.iRaysPerVoxelDim = 1; pData->projs = 0; + pData->parprojs = 0; + pData->fOutputScale = 1.0f; pData->initialized = false; pData->setStartReconstruction = false; @@ -815,11 +816,10 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom, pData->projs = 0; pData->parprojs = 0; - float outputScale; ok = convertAstraGeometry(pVolGeom, pProjGeom, pData->parprojs, pData->projs, - outputScale); + pData->fOutputScale); if (!ok) return false; @@ -831,8 +831,6 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom, pData->projType = PROJ_PARALLEL; } - // TODO: Handle outputScale - return true; } @@ -900,9 +898,9 @@ bool AstraCGLS3d::init() bool ok; if (pData->projType == PROJ_PARALLEL) { - ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs); + ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale); } else { - ok = pData->cgls.setConeGeometry(pData->dims, pData->projs); + ok = pData->cgls.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale); } if (!ok) @@ -1164,10 +1162,10 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections, if (pParProjs) { switch (projKernel) { case ker3d_default: - ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, 1.0f); + ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, outputScale); break; case ker3d_sum_square_weights: - ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, 1.0f); + ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, outputScale*outputScale); break; default: assert(false); @@ -1175,7 +1173,7 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections, } else { switch (projKernel) { case ker3d_default: - ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, 1.0f); + ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, outputScale); break; default: assert(false); @@ -1216,8 +1214,6 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections, pParProjs, pConeProjs, outputScale); - // TODO: OutputScale - if (iGPUIndex != -1) { cudaSetDevice(iGPUIndex); cudaError_t err = cudaGetLastError(); @@ -1252,9 +1248,9 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections, } if (pParProjs) - ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, 1.0f); + ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale); else - ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, 1.0f); + ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale); ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); @@ -1293,8 +1289,6 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, pParProjs, pConeProjs, outputScale); - // TODO: OutputScale - if (iGPUIndex != -1) { cudaSetDevice(iGPUIndex); cudaError_t err = cudaGetLastError(); @@ -1330,9 +1324,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, processSino3D(D_projData, 1.0f, dims); if (pParProjs) - ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, 1.0f); + ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, outputScale); else - ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, 1.0f); + ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, outputScale); processVol3D(D_pixelWeight, dims); if (!ok) { @@ -1347,9 +1341,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, ok &= zeroVolumeData(D_volumeData, dims); // Do BP into D_volumeData if (pParProjs) - ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, 1.0f); + ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale); else - ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, 1.0f); + ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale); // Multiply with weights processVol3D(D_volumeData, D_pixelWeight, dims); diff --git a/cuda/3d/cgls3d.cu b/cuda/3d/cgls3d.cu index 4f632f3..dd0e8a0 100644 --- a/cuda/3d/cgls3d.cu +++ b/cuda/3d/cgls3d.cu @@ -242,7 +242,7 @@ bool doCGLS(cudaPitchedPtr& D_volumeData, CGLS cgls; bool ok = true; - ok &= cgls.setConeGeometry(dims, angles); + ok &= cgls.setConeGeometry(dims, angles, 1.0f); if (D_maskData.ptr) ok &= cgls.enableVolumeMask(); diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu index 0e6630a..484521e 100644 --- a/cuda/3d/sirt3d.cu +++ b/cuda/3d/sirt3d.cu @@ -347,7 +347,7 @@ bool doSIRT(cudaPitchedPtr& D_volumeData, SIRT sirt; bool ok = true; - ok &= sirt.setConeGeometry(dims, angles); + ok &= sirt.setConeGeometry(dims, angles, 1.0f); if (D_maskData.ptr) ok &= sirt.enableVolumeMask(); -- cgit v1.2.3 From 1a7dfb0964fa7686aa01f0d836f95910dc4dc07f Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Thu, 12 Mar 2015 14:18:54 +0100 Subject: Adapt standalone test programs to outputscale --- cuda/3d/cone_bp.cu | 2 +- cuda/3d/par3d_bp.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'cuda') diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index 5e67980..4a41f6a 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -479,7 +479,7 @@ int main() } #endif - astraCUDA3d::ConeBP(volData, projData, dims, angle); + astraCUDA3d::ConeBP(volData, projData, dims, angle, 1.0f); #if 0 float* buf = new float[256*256]; diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu index 1217949..cafab46 100644 --- a/cuda/3d/par3d_bp.cu +++ b/cuda/3d/par3d_bp.cu @@ -450,7 +450,7 @@ int main() cudaMemcpy3D(&p); } - astraCUDA3d::Par3DBP(volData, projData, dims, angle); + astraCUDA3d::Par3DBP(volData, projData, dims, angle, 1.0f); #if 1 float* buf = new float[256*256]; -- cgit v1.2.3 From 9458268a8b9192af98fc1b88bf0a5fbbc7696a77 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Thu, 12 Mar 2015 14:28:03 +0100 Subject: Add outputScale argument to 2D CUDA BP --- cuda/2d/algo.cu | 7 ++++--- cuda/2d/algo.h | 3 ++- cuda/2d/astra.cu | 12 +++++------- cuda/2d/cgls.cu | 4 ++-- cuda/2d/em.cu | 4 ++-- cuda/2d/fan_bp.cu | 47 +++++++++++++++++++++++++++-------------------- cuda/2d/fan_bp.h | 9 ++++++--- cuda/2d/par_bp.cu | 31 +++++++++++++++++-------------- cuda/2d/par_bp.h | 4 ++-- cuda/2d/sart.cu | 10 +++++----- cuda/2d/sart.h | 2 +- cuda/2d/sirt.cu | 6 +++--- 12 files changed, 76 insertions(+), 63 deletions(-) (limited to 'cuda') diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu index 144fabd..dc74e51 100644 --- a/cuda/2d/algo.cu +++ b/cuda/2d/algo.cu @@ -336,16 +336,17 @@ bool ReconAlgo::callFP(float* D_volumeData, unsigned int volumePitch, } bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch, - float* D_projData, unsigned int projPitch) + float* D_projData, unsigned int projPitch, + float outputScale) { if (angles) { assert(!fanProjs); return BP(D_volumeData, volumePitch, D_projData, projPitch, - dims, angles, TOffsets); + dims, angles, TOffsets, outputScale); } else { assert(fanProjs); return FanBP(D_volumeData, volumePitch, D_projData, projPitch, - dims, fanProjs); + dims, fanProjs, outputScale); } } diff --git a/cuda/2d/algo.h b/cuda/2d/algo.h index a75905e..99959c8 100644 --- a/cuda/2d/algo.h +++ b/cuda/2d/algo.h @@ -118,7 +118,8 @@ protected: float* D_projData, unsigned int projPitch, float outputScale); bool callBP(float* D_volumeData, unsigned int volumePitch, - float* D_projData, unsigned int projPitch); + float* D_projData, unsigned int projPitch, + float outputScale); SDimensions dims; diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index 0b5be06..5e2a07a 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -367,21 +367,19 @@ bool AstraFBP::run() } + float fOutputScale = (M_PI / 2.0f) / (float)pData->dims.iProjAngles; + if (pData->bFanBeam) { - ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections); + ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections, fOutputScale); } else { - ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets); + ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets, fOutputScale); } if(!ok) { return false; } - processVol(pData->D_volumeData, - (M_PI / 2.0f) / (float)pData->dims.iProjAngles, - pData->volumePitch, pData->dims); - return true; } @@ -593,7 +591,7 @@ bool BPalgo::iterate(unsigned int) { // TODO: This zeroVolume makes an earlier memcpy of D_volumeData redundant zeroVolumeData(D_volumeData, volumePitch, dims); - callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch); + callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch, 1.0f); return true; } diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu index 9ead563..f402914 100644 --- a/cuda/2d/cgls.cu +++ b/cuda/2d/cgls.cu @@ -135,7 +135,7 @@ bool CGLS::iterate(unsigned int iterations) // p = A'*r zeroVolumeData(D_p, pPitch, dims); - callBP(D_p, pPitch, D_r, rPitch); + callBP(D_p, pPitch, D_r, rPitch, 1.0f); if (useVolumeMask) processVol(D_p, D_maskData, pPitch, dims); @@ -166,7 +166,7 @@ bool CGLS::iterate(unsigned int iterations) // z = A'*r zeroVolumeData(D_z, zPitch, dims); - callBP(D_z, zPitch, D_r, rPitch); + callBP(D_z, zPitch, D_r, rPitch, 1.0f); if (useVolumeMask) processVol(D_z, D_maskData, zPitch, dims); diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu index 00127c0..8593b08 100644 --- a/cuda/2d/em.cu +++ b/cuda/2d/em.cu @@ -102,7 +102,7 @@ bool EM::precomputeWeights() #endif { processSino(D_projData, 1.0f, projPitch, dims); - callBP(D_pixelWeight, pixelPitch, D_projData, projPitch); + callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f); } processVol(D_pixelWeight, pixelPitch, dims); @@ -137,7 +137,7 @@ bool EM::iterate(unsigned int iterations) // Do BP of projData into tmpData zeroVolumeData(D_tmpData, tmpPitch, dims); - callBP(D_tmpData, tmpPitch, D_projData, projPitch); + callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f); // Multiply volumeData with tmpData divided by pixel weights processVol(D_volumeData, D_tmpData, D_pixelWeight, pixelPitch, dims); diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu index 74e8b12..b4321ba 100644 --- a/cuda/2d/fan_bp.cu +++ b/cuda/2d/fan_bp.cu @@ -77,7 +77,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi return true; } -__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims) +__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -121,11 +121,11 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s fA += 1.0f; } - volData[Y*volPitch+X] += fVal; + volData[Y*volPitch+X] += fVal * fOutputScale; } // supersampling version -__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims) +__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -146,6 +146,8 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in float* volData = (float*)D_volData; + fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); + float fVal = 0.0f; float fA = startAngle + 0.5f; @@ -180,14 +182,14 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in fA += 1.0f; } - volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); + volData[Y*volPitch+X] += fVal * fOutputScale; } // BP specifically for SART. // It includes (free) weighting with voxel weight. // It assumes the proj texture is set up _without_ padding, unlike regular BP. -__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims) +__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -222,12 +224,12 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi const float fT = fNum / fDen; const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f); - volData[Y*volPitch+X] += fVal; + volData[Y*volPitch+X] += fVal * fOutputScale; } // Weighted BP for use in fan beam FBP // Each pixel/ray is weighted by 1/L^2 where L is the distance to the source. -__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims) +__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -273,13 +275,14 @@ __global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, un fA += 1.0f; } - volData[Y*volPitch+X] += fVal; + volData[Y*volPitch+X] += fVal * fOutputScale; } bool FanBP_internal(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { assert(dims.iProjAngles <= g_MaxAngles); @@ -310,9 +313,9 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch, for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { if (dims.iRaysPerPixelDim > 1) - devFanBP_SS<<>>(D_volumeData, volumePitch, i, dims); + devFanBP_SS<<>>(D_volumeData, volumePitch, i, dims, fOutputScale); else - devFanBP<<>>(D_volumeData, volumePitch, i, dims); + devFanBP<<>>(D_volumeData, volumePitch, i, dims, fOutputScale); } cudaThreadSynchronize(); @@ -325,7 +328,8 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch, bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { assert(dims.iProjAngles <= g_MaxAngles); @@ -355,7 +359,7 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch, cudaStreamCreate(&stream); for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { - devFanBP_FBPWeighted<<>>(D_volumeData, volumePitch, i, dims); + devFanBP_FBPWeighted<<>>(D_volumeData, volumePitch, i, dims, fOutputScale); } cudaThreadSynchronize(); @@ -370,7 +374,8 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch, bool FanBP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { // only one angle bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp); @@ -391,7 +396,7 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch, dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); - devFanBP_SART<<>>(D_volumeData, volumePitch, dims); + devFanBP_SART<<>>(D_volumeData, volumePitch, dims, fOutputScale); cudaThreadSynchronize(); cudaTextForceKernelsCompletion(); @@ -401,7 +406,8 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch, bool FanBP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) { SDimensions subdims = dims; @@ -413,7 +419,7 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch, bool ret; ret = FanBP_internal(D_volumeData, volumePitch, D_projData + iAngle * projPitch, projPitch, - subdims, angles + iAngle); + subdims, angles + iAngle, fOutputScale); if (!ret) return false; } @@ -422,7 +428,8 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch, bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) { SDimensions subdims = dims; @@ -434,7 +441,7 @@ bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch, bool ret; ret = FanBP_FBPWeighted_internal(D_volumeData, volumePitch, D_projData + iAngle * projPitch, projPitch, - subdims, angles + iAngle); + subdims, angles + iAngle, fOutputScale); if (!ret) return false; @@ -498,7 +505,7 @@ int main() copyVolumeToDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch); copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch); - FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs); + FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs, 1.0f); copyVolumeFromDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch); diff --git a/cuda/2d/fan_bp.h b/cuda/2d/fan_bp.h index e4e69b0..3ebe1e8 100644 --- a/cuda/2d/fan_bp.h +++ b/cuda/2d/fan_bp.h @@ -33,16 +33,19 @@ namespace astraCUDA { _AstraExport bool FanBP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles); + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale); _AstraExport bool FanBP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, - const SDimensions& dims, const SFanProjection* angles); + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale); _AstraExport bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles); + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale); } diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu index 635200f..d9f7325 100644 --- a/cuda/2d/par_bp.cu +++ b/cuda/2d/par_bp.cu @@ -73,7 +73,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi return true; } -__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims) +__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -123,11 +123,11 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star } - volData[Y*volPitch+X] += fVal; + volData[Y*volPitch+X] += fVal * fOutputScale; } // supersampling version -__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims) +__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -152,6 +152,8 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s float fA = startAngle + 0.5f; const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f; + fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); + if (offsets) { for (int angle = startAngle; angle < endAngle; ++angle) @@ -196,10 +198,10 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s } - volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); + volData[Y*volPitch+X] += fVal * fOutputScale; } -__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims) +__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -218,13 +220,13 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset const float fT = fT_base + fX * angle_cos - fY * angle_sin + offset; const float fVal = tex2D(gT_projTexture, fT, 0.5f); - D_volData[Y*volPitch+X] += fVal; + D_volData[Y*volPitch+X] += fVal * fOutputScale; } bool BP_internal(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const float* angles, const float* TOffsets) + const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale) { // TODO: process angles block by block assert(dims.iProjAngles <= g_MaxAngles); @@ -261,9 +263,9 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch, for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { if (dims.iRaysPerPixelDim > 1) - devBP_SS<<>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims); + devBP_SS<<>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale); else - devBP<<>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims); + devBP<<>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale); } cudaThreadSynchronize(); @@ -276,7 +278,7 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch, bool BP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const float* angles, const float* TOffsets) + const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale) { for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) { SDimensions subdims = dims; @@ -289,7 +291,8 @@ bool BP(float* D_volumeData, unsigned int volumePitch, ret = BP_internal(D_volumeData, volumePitch, D_projData + iAngle * projPitch, projPitch, subdims, angles + iAngle, - TOffsets ? TOffsets + iAngle : 0); + TOffsets ? TOffsets + iAngle : 0, + fOutputScale); if (!ret) return false; } @@ -300,7 +303,7 @@ bool BP(float* D_volumeData, unsigned int volumePitch, bool BP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, const SDimensions& dims, - const float* angles, const float* TOffsets) + const float* angles, const float* TOffsets, float fOutputScale) { // Only one angle. // We need to Clamp to the border pixels instead of to zero, because @@ -318,7 +321,7 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch, dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); - devBP_SART<<>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims); + devBP_SART<<>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims, fOutputScale); cudaThreadSynchronize(); cudaTextForceKernelsCompletion(); @@ -369,7 +372,7 @@ int main() for (unsigned int i = 0; i < dims.iProjAngles; ++i) angle[i] = i*(M_PI/dims.iProjAngles); - BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0); + BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0, 1.0f); delete[] angle; diff --git a/cuda/2d/par_bp.h b/cuda/2d/par_bp.h index eaeafd8..64bcd34 100644 --- a/cuda/2d/par_bp.h +++ b/cuda/2d/par_bp.h @@ -36,12 +36,12 @@ namespace astraCUDA { _AstraExport bool BP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, const SDimensions& dims, const float* angles, - const float* TOffsets); + const float* TOffsets, float fOutputScale); _AstraExport bool BP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, const SDimensions& dims, - const float* angles, const float* TOffsets); + const float* angles, const float* TOffsets, float fOutputScale); } diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu index 29670c3..e5cb5bb 100644 --- a/cuda/2d/sart.cu +++ b/cuda/2d/sart.cu @@ -200,10 +200,10 @@ bool SART::iterate(unsigned int iterations) // BP, mask, and add back // TODO: Try putting the masking directly in the BP zeroVolumeData(D_tmpData, tmpPitch, dims); - callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle); + callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, 1.0f); processVol(D_volumeData, D_maskData, D_tmpData, volumePitch, dims); } else { - callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle); + callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, 1.0f); } if (useMinConstraint) @@ -264,16 +264,16 @@ bool SART::callFP_SART(float* D_volumeData, unsigned int volumePitch, bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - unsigned int angle) + unsigned int angle, float outputScale) { if (angles) { assert(!fanProjs); return BP_SART(D_volumeData, volumePitch, D_projData, projPitch, - angle, dims, angles, TOffsets); + angle, dims, angles, TOffsets, outputScale); } else { assert(fanProjs); return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch, - angle, dims, fanProjs); + angle, dims, fanProjs, outputScale); } } diff --git a/cuda/2d/sart.h b/cuda/2d/sart.h index 6574a6f..7dcd641 100644 --- a/cuda/2d/sart.h +++ b/cuda/2d/sart.h @@ -59,7 +59,7 @@ protected: unsigned int angle, float outputScale); bool callBP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - unsigned int angle); + unsigned int angle, float outputScale); // projection angle variables diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu index a6194a5..162ee77 100644 --- a/cuda/2d/sirt.cu +++ b/cuda/2d/sirt.cu @@ -127,10 +127,10 @@ bool SIRT::precomputeWeights() zeroVolumeData(D_pixelWeight, pixelPitch, dims); if (useSinogramMask) { - callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch); + callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch, 1.0f); } else { processSino(D_projData, 1.0f, projPitch, dims); - callBP(D_pixelWeight, pixelPitch, D_projData, projPitch); + callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f); } processVol(D_pixelWeight, pixelPitch, dims); @@ -251,7 +251,7 @@ bool SIRT::iterate(unsigned int iterations) zeroVolumeData(D_tmpData, tmpPitch, dims); - callBP(D_tmpData, tmpPitch, D_projData, projPitch); + callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f); processVol(D_volumeData, D_pixelWeight, D_tmpData, volumePitch, dims); -- cgit v1.2.3 From 7584ffbd6748bcca8c3f7ed2dc961be01f2fcfdc Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Thu, 16 Jul 2015 18:09:51 +0200 Subject: Fix assert --- cuda/3d/astra3d.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cuda') diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index ae79efb..3815a1a 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -287,7 +287,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, { assert(pVolGeom); assert(pProjGeom); - assert(pProjGeom->getProjectionAngles()); + assert(pProjGeom->getProjectionVectors()); int nth = pProjGeom->getProjectionCount(); -- cgit v1.2.3 From b14fb531ad9ae3d565f2cf28f5506408ab10dbed Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 18 Nov 2015 11:26:15 +0100 Subject: Add CompositeGeometryManager This handles FP and BP operations on multiple data objects at once, splitting them to fit in GPU memory where necessary. --- astra_vc09.vcproj | 48 ++ astra_vc11.vcxproj | 9 + astra_vc11.vcxproj.filters | 12 + build/linux/Makefile.in | 4 +- build/msvc/gen.py | 4 + cuda/3d/astra3d.cu | 82 --- cuda/3d/astra3d.h | 9 + cuda/3d/mem3d.cu | 270 ++++++++ cuda/3d/mem3d.h | 99 +++ include/astra/CompositeGeometryManager.h | 150 ++++ include/astra/ConeProjectionGeometry3D.h | 10 +- include/astra/ConeVecProjectionGeometry3D.h | 11 +- include/astra/GeometryUtil3D.h | 17 + include/astra/ParallelProjectionGeometry3D.h | 11 +- include/astra/ParallelVecProjectionGeometry3D.h | 10 +- include/astra/ProjectionGeometry3D.h | 19 +- src/CompositeGeometryManager.cpp | 884 ++++++++++++++++++++++++ src/ConeProjectionGeometry3D.cpp | 92 ++- src/ConeVecProjectionGeometry3D.cpp | 58 +- src/CudaBackProjectionAlgorithm3D.cpp | 8 + src/CudaForwardProjectionAlgorithm3D.cpp | 9 + src/GeometryUtil3D.cpp | 172 +++++ src/ParallelProjectionGeometry3D.cpp | 81 ++- src/ParallelVecProjectionGeometry3D.cpp | 61 +- 24 files changed, 2023 insertions(+), 107 deletions(-) create mode 100644 cuda/3d/mem3d.cu create mode 100644 cuda/3d/mem3d.h create mode 100644 include/astra/CompositeGeometryManager.h create mode 100644 src/CompositeGeometryManager.cpp (limited to 'cuda') diff --git a/astra_vc09.vcproj b/astra_vc09.vcproj index e5d7731..b928662 100644 --- a/astra_vc09.vcproj +++ b/astra_vc09.vcproj @@ -932,6 +932,10 @@ RelativePath=".\include\astra\clog.h" > + + @@ -988,6 +992,10 @@ RelativePath=".\src\AstraObjectManager.cpp" > + + @@ -2228,6 +2236,10 @@ RelativePath=".\cuda\3d\fdk.h" > + + @@ -3040,6 +3052,42 @@ /> + + + + + + + + + + + + + + diff --git a/astra_vc11.vcxproj b/astra_vc11.vcxproj index bc11b23..fc8b9ce 100644 --- a/astra_vc11.vcxproj +++ b/astra_vc11.vcxproj @@ -380,6 +380,7 @@ + @@ -582,6 +583,7 @@ + @@ -594,6 +596,7 @@ + @@ -804,6 +807,12 @@ true true + + true + true + true + true + true true diff --git a/astra_vc11.vcxproj.filters b/astra_vc11.vcxproj.filters index a597962..af8ca39 100644 --- a/astra_vc11.vcxproj.filters +++ b/astra_vc11.vcxproj.filters @@ -67,6 +67,9 @@ CUDA\cuda source + + CUDA\cuda source + CUDA\cuda source @@ -153,6 +156,9 @@ Global & Other\source + + Global & Other\source + Global & Other\source @@ -398,6 +404,9 @@ Global & Other\headers + + Global & Other\headers + Global & Other\headers @@ -641,6 +650,9 @@ CUDA\cuda headers + + CUDA\cuda headers + CUDA\cuda headers diff --git a/build/linux/Makefile.in b/build/linux/Makefile.in index abbebe2..c555bca 100644 --- a/build/linux/Makefile.in +++ b/build/linux/Makefile.in @@ -99,6 +99,7 @@ BASE_OBJECTS=\ src/AstraObjectManager.lo \ src/BackProjectionAlgorithm.lo \ src/CglsAlgorithm.lo \ + src/CompositeGeometryManager.lo \ src/ConeProjectionGeometry3D.lo \ src/ConeVecProjectionGeometry3D.lo \ src/Config.lo \ @@ -197,7 +198,8 @@ CUDA_OBJECTS=\ cuda/3d/sirt3d.lo \ cuda/3d/astra3d.lo \ cuda/3d/util3d.lo \ - cuda/3d/arith3d.lo + cuda/3d/arith3d.lo \ + cuda/3d/mem3d.lo ALL_OBJECTS=$(BASE_OBJECTS) ifeq ($(cuda),yes) diff --git a/build/msvc/gen.py b/build/msvc/gen.py index 72d4582..c18c1e8 100644 --- a/build/msvc/gen.py +++ b/build/msvc/gen.py @@ -168,6 +168,7 @@ P_astra["filters"]["CUDA\\cuda source"] = [ "cuda\\3d\\cone_fp.cu", "cuda\\3d\\darthelper3d.cu", "cuda\\3d\\fdk.cu", +"cuda\\3d\\mem3d.cu", "cuda\\3d\\par3d_bp.cu", "cuda\\3d\\par3d_fp.cu", "cuda\\3d\\sirt3d.cu", @@ -205,6 +206,7 @@ P_astra["filters"]["Global & Other\\source"] = [ "1546cb47-7e5b-42c2-b695-ef172024c14b", "src\\AstraObjectFactory.cpp", "src\\AstraObjectManager.cpp", +"src\\CompositeGeometryManager.cpp", "src\\Config.cpp", "src\\Fourier.cpp", "src\\Globals.cpp", @@ -295,6 +297,7 @@ P_astra["filters"]["CUDA\\cuda headers"] = [ "cuda\\3d\\darthelper3d.h", "cuda\\3d\\dims3d.h", "cuda\\3d\\fdk.h", +"cuda\\3d\\mem3d.h", "cuda\\3d\\par3d_bp.h", "cuda\\3d\\par3d_fp.h", "cuda\\3d\\sirt3d.h", @@ -336,6 +339,7 @@ P_astra["filters"]["Global & Other\\headers"] = [ "include\\astra\\AstraObjectFactory.h", "include\\astra\\AstraObjectManager.h", "include\\astra\\clog.h", +"include\\astra\\CompositeGeometryManager.h", "include\\astra\\Config.h", "include\\astra\\Fourier.h", "include\\astra\\Globals.h", diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 3815a1a..8328229 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -58,88 +58,6 @@ enum CUDAProjectionType3d { }; -static SConeProjection* genConeProjections(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - double fOriginSourceDistance, - double fOriginDetectorDistance, - double fDetUSize, - double fDetVSize, - const float *pfAngles) -{ - SConeProjection base; - base.fSrcX = 0.0f; - base.fSrcY = -fOriginSourceDistance; - base.fSrcZ = 0.0f; - - base.fDetSX = iProjU * fDetUSize * -0.5f; - base.fDetSY = fOriginDetectorDistance; - base.fDetSZ = iProjV * fDetVSize * -0.5f; - - base.fDetUX = fDetUSize; - base.fDetUY = 0.0f; - base.fDetUZ = 0.0f; - - base.fDetVX = 0.0f; - base.fDetVY = 0.0f; - base.fDetVZ = fDetVSize; - - SConeProjection* p = new SConeProjection[iProjAngles]; - -#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); p[i].f##name##Z = base.f##name##Z; } while(0) - - for (unsigned int i = 0; i < iProjAngles; ++i) { - ROTATE0(Src, i, pfAngles[i]); - ROTATE0(DetS, i, pfAngles[i]); - ROTATE0(DetU, i, pfAngles[i]); - ROTATE0(DetV, i, pfAngles[i]); - } - -#undef ROTATE0 - - return p; -} - -static SPar3DProjection* genPar3DProjections(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - double fDetUSize, - double fDetVSize, - const float *pfAngles) -{ - SPar3DProjection base; - base.fRayX = 0.0f; - base.fRayY = 1.0f; - base.fRayZ = 0.0f; - - base.fDetSX = iProjU * fDetUSize * -0.5f; - base.fDetSY = 0.0f; - base.fDetSZ = iProjV * fDetVSize * -0.5f; - - base.fDetUX = fDetUSize; - base.fDetUY = 0.0f; - base.fDetUZ = 0.0f; - - base.fDetVX = 0.0f; - base.fDetVY = 0.0f; - base.fDetVZ = fDetVSize; - - SPar3DProjection* p = new SPar3DProjection[iProjAngles]; - -#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); p[i].f##name##Z = base.f##name##Z; } while(0) - - for (unsigned int i = 0; i < iProjAngles; ++i) { - ROTATE0(Ray, i, pfAngles[i]); - ROTATE0(DetS, i, pfAngles[i]); - ROTATE0(DetU, i, pfAngles[i]); - ROTATE0(DetV, i, pfAngles[i]); - } - -#undef ROTATE0 - - return p; -} - diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index 6c3fcfb..2782994 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -281,6 +281,15 @@ protected: AstraCGLS3d_internal *pData; }; +bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, + astraCUDA3d::SDimensions3D& dims); + +bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, + SPar3DProjection*& pParProjs, + SConeProjection*& pConeProjs, + float& fOutputScale); _AstraExport bool astraCudaFP(const float* pfVolume, float* pfProjections, const CVolumeGeometry3D* pVolGeom, diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu new file mode 100644 index 0000000..6d81dc0 --- /dev/null +++ b/cuda/3d/mem3d.cu @@ -0,0 +1,270 @@ +/* +----------------------------------------------------------------------- +Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp + 2014-2015, CWI, Amsterdam + +Contact: astra@uantwerpen.be +Website: http://sf.net/projects/astra-toolbox + +This file is part of the ASTRA Toolbox. + + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see . + +----------------------------------------------------------------------- +$Id$ +*/ + +#include +#include + +#include "util3d.h" + +#include "mem3d.h" + +#include "astra3d.h" +#include "cone_fp.h" +#include "cone_bp.h" +#include "par3d_fp.h" +#include "par3d_bp.h" + +#include "astra/Logging.h" + + +namespace astraCUDA3d { + + +struct SMemHandle3D_internal +{ + cudaPitchedPtr ptr; + unsigned int nx; + unsigned int ny; + unsigned int nz; +}; + +size_t availableGPUMemory() +{ + size_t free, total; + cudaError_t err = cudaMemGetInfo(&free, &total); + if (err != cudaSuccess) + return 0; + return free; +} + +MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero) +{ + SMemHandle3D_internal hnd; + hnd.nx = x; + hnd.ny = y; + hnd.nz = z; + + size_t free = availableGPUMemory(); + + cudaError_t err; + err = cudaMalloc3D(&hnd.ptr, make_cudaExtent(sizeof(float)*x, y, z)); + + if (err != cudaSuccess) { + return MemHandle3D(); + } + + size_t free2 = availableGPUMemory(); + + ASTRA_DEBUG("Allocated %d x %d x %d on GPU. (Pre: %lu, post: %lu)", x, y, z, free, free2); + + + + if (zero == INIT_ZERO) { + err = cudaMemset3D(hnd.ptr, 0, make_cudaExtent(sizeof(float)*x, y, z)); + if (err != cudaSuccess) { + cudaFree(hnd.ptr.ptr); + return MemHandle3D(); + } + } + + MemHandle3D ret; + ret.d = boost::shared_ptr(new SMemHandle3D_internal); + *ret.d = hnd; + + return ret; +} + +bool freeGPUMemory(MemHandle3D handle) +{ + size_t free = availableGPUMemory(); + cudaError_t err = cudaFree(handle.d->ptr.ptr); + size_t free2 = availableGPUMemory(); + + ASTRA_DEBUG("Freeing memory. (Pre: %lu, post: %lu)", free, free2); + + return err == cudaSuccess; +} + +bool copyToGPUMemory(const float *src, MemHandle3D dst, const SSubDimensions3D &pos) +{ + ASTRA_DEBUG("Copying %d x %d x %d to GPU", pos.subnx, pos.subny, pos.subnz); + ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz); + cudaPitchedPtr s; + s.ptr = (void*)src; // const cast away + s.pitch = pos.pitch * sizeof(float); + s.xsize = pos.nx * sizeof(float); + s.ysize = pos.ny; + ASTRA_DEBUG("Pitch %d, xsize %d, ysize %d", s.pitch, s.xsize, s.ysize); + + cudaMemcpy3DParms p; + p.srcArray = 0; + p.srcPos = make_cudaPos(pos.subx * sizeof(float), pos.suby, pos.subz); + p.srcPtr = s; + + p.dstArray = 0; + p.dstPos = make_cudaPos(0, 0, 0); + p.dstPtr = dst.d->ptr; + + p.extent = make_cudaExtent(pos.subnx * sizeof(float), pos.subny, pos.subnz); + + p.kind = cudaMemcpyHostToDevice; + + cudaError_t err = cudaMemcpy3D(&p); + + return err == cudaSuccess; +} + + +bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos) +{ + ASTRA_DEBUG("Copying %d x %d x %d from GPU", pos.subnx, pos.subny, pos.subnz); + ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz); + cudaPitchedPtr d; + d.ptr = (void*)dst; + d.pitch = pos.pitch * sizeof(float); + d.xsize = pos.nx * sizeof(float); + d.ysize = pos.ny; + ASTRA_DEBUG("Pitch %d, xsize %d, ysize %d", d.pitch, d.xsize, d.ysize); + + cudaMemcpy3DParms p; + p.srcArray = 0; + p.srcPos = make_cudaPos(0, 0, 0); + p.srcPtr = src.d->ptr; + + p.dstArray = 0; + p.dstPos = make_cudaPos(pos.subx * sizeof(float), pos.suby, pos.subz); + p.dstPtr = d; + + p.extent = make_cudaExtent(pos.subnx * sizeof(float), pos.subny, pos.subnz); + + p.kind = cudaMemcpyDeviceToHost; + + cudaError_t err = cudaMemcpy3D(&p); + + return err == cudaSuccess; + +} + + +bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel) +{ + SDimensions3D dims; + + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); + if (!ok) + return false; + +#if 1 + dims.iRaysPerDetDim = iDetectorSuperSampling; + if (iDetectorSuperSampling == 0) + return false; +#else + dims.iRaysPerDetDim = 1; + astra::Cuda3DProjectionKernel projKernel = astra::ker3d_default; +#endif + + + SPar3DProjection* pParProjs; + SConeProjection* pConeProjs; + + float outputScale = 1.0f; + + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pParProjs, pConeProjs, + outputScale); + + if (pParProjs) { +#if 0 + for (int i = 0; i < dims.iProjAngles; ++i) { + ASTRA_DEBUG("Vec: %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f\n", + pParProjs[i].fRayX, pParProjs[i].fRayY, pParProjs[i].fRayZ, + pParProjs[i].fDetSX, pParProjs[i].fDetSY, pParProjs[i].fDetSZ, + pParProjs[i].fDetUX, pParProjs[i].fDetUY, pParProjs[i].fDetUZ, + pParProjs[i].fDetVX, pParProjs[i].fDetVY, pParProjs[i].fDetVZ); + } +#endif + + switch (projKernel) { + case astra::ker3d_default: + ok &= Par3DFP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale); + break; + case astra::ker3d_sum_square_weights: + ok &= Par3DFP_SumSqW(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale*outputScale); + break; + default: + ok = false; + } + } else { + switch (projKernel) { + case astra::ker3d_default: + ok &= ConeFP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale); + break; + default: + ok = false; + } + } + + return ok; +} + +bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling) +{ + SDimensions3D dims; + + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); + if (!ok) + return false; + +#if 1 + dims.iRaysPerVoxelDim = iVoxelSuperSampling; +#else + dims.iRaysPerVoxelDim = 1; +#endif + + SPar3DProjection* pParProjs; + SConeProjection* pConeProjs; + + float outputScale = 1.0f; + + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pParProjs, pConeProjs, + outputScale); + + if (pParProjs) + ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale); + else + ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale); + + return ok; + +} + + + + +} diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h new file mode 100644 index 0000000..82bad19 --- /dev/null +++ b/cuda/3d/mem3d.h @@ -0,0 +1,99 @@ +/* +----------------------------------------------------------------------- +Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp + 2014-2015, CWI, Amsterdam + +Contact: astra@uantwerpen.be +Website: http://sf.net/projects/astra-toolbox + +This file is part of the ASTRA Toolbox. + + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see . + +----------------------------------------------------------------------- +*/ + +#ifndef _CUDA_MEM3D_H +#define _CUDA_MEM3D_H + +#include + +#include "astra3d.h" + +namespace astra { +class CVolumeGeometry3D; +class CProjectionGeometry3D; +} + +namespace astraCUDA3d { + +// TODO: Make it possible to delete these handles when they're no longer +// necessary inside the FP/BP +// +// TODO: Add functions for querying capacity + +struct SMemHandle3D_internal; + +struct MemHandle3D { + boost::shared_ptr d; + operator bool() const { return (bool)d; } +}; + +struct SSubDimensions3D { + unsigned int nx; + unsigned int ny; + unsigned int nz; + unsigned int pitch; + unsigned int subnx; + unsigned int subny; + unsigned int subnz; + unsigned int subx; + unsigned int suby; + unsigned int subz; +}; + +/* +// Useful or not? +enum Mem3DCopyMode { + MODE_SET, + MODE_ADD +}; +*/ + +enum Mem3DZeroMode { + INIT_NO, + INIT_ZERO +}; + +size_t availableGPUMemory(); + +MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero); + +bool copyToGPUMemory(const float *src, MemHandle3D dst, const SSubDimensions3D &pos); + +bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos); + +bool freeGPUMemory(MemHandle3D handle); + + +bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel); + +bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling); + + + +} + +#endif diff --git a/include/astra/CompositeGeometryManager.h b/include/astra/CompositeGeometryManager.h new file mode 100644 index 0000000..a6e57f1 --- /dev/null +++ b/include/astra/CompositeGeometryManager.h @@ -0,0 +1,150 @@ +/* +----------------------------------------------------------------------- +Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp + 2014-2015, CWI, Amsterdam + +Contact: astra@uantwerpen.be +Website: http://sf.net/projects/astra-toolbox + +This file is part of the ASTRA Toolbox. + + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see . + +----------------------------------------------------------------------- +*/ + +#ifndef _INC_ASTRA_COMPOSITEGEOMETRYMANAGER +#define _INC_ASTRA_COMPOSITEGEOMETRYMANAGER + +#include "Globals.h" + +#ifdef ASTRA_CUDA + +#include +#include +#include +#include + + +namespace astra { + +class CCompositeVolume; +class CCompositeProjections; +class CFloat32Data3DMemory; +class CFloat32ProjectionData3DMemory; +class CFloat32VolumeData3DMemory; +class CVolumeGeometry3D; +class CProjectionGeometry3D; +class CProjector3D; + + + +class _AstraExport CCompositeGeometryManager { +public: + class CPart; + typedef std::list > TPartList; + class CPart { + public: + CPart() { } + CPart(const CPart& other); + virtual ~CPart() { } + + enum { + PART_VOL, PART_PROJ + } eType; + + CFloat32Data3DMemory* pData; + unsigned int subX; + unsigned int subY; + unsigned int subZ; + + bool uploadToGPU(); + bool downloadFromGPU(/*mode?*/); + virtual TPartList split(size_t maxSize, int div) = 0; + virtual CPart* reduce(const CPart *other) = 0; + virtual void getDims(size_t &x, size_t &y, size_t &z) = 0; + size_t getSize(); + }; + + class CVolumePart : public CPart { + public: + CVolumePart() { eType = PART_VOL; } + CVolumePart(const CVolumePart& other); + virtual ~CVolumePart(); + + CVolumeGeometry3D* pGeom; + + virtual TPartList split(size_t maxSize, int div); + virtual CPart* reduce(const CPart *other); + virtual void getDims(size_t &x, size_t &y, size_t &z); + + CVolumePart* clone() const; + }; + class CProjectionPart : public CPart { + public: + CProjectionPart() { eType = PART_PROJ; } + CProjectionPart(const CProjectionPart& other); + virtual ~CProjectionPart(); + + CProjectionGeometry3D* pGeom; + + virtual TPartList split(size_t maxSize, int div); + virtual CPart* reduce(const CPart *other); + virtual void getDims(size_t &x, size_t &y, size_t &z); + + CProjectionPart* clone() const; + }; + + struct SJob { + public: + boost::shared_ptr pInput; + boost::shared_ptr pOutput; + CProjector3D *pProjector; // For a `global' geometry. It will not match + // the geometries of the input and output. + + + enum { + JOB_FP, JOB_BP, JOB_NOP + } eType; + enum { + MODE_ADD, MODE_SET + } eMode; + + }; + + typedef std::list TJobList; + // output part -> list of jobs for that output + typedef std::map TJobSet; + + bool doJobs(TJobList &jobs); + + // Convenience functions for creating and running a single FP or BP job + bool doFP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, + CFloat32ProjectionData3DMemory *pProjData); + bool doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, + CFloat32ProjectionData3DMemory *pProjData); + + +protected: + + bool splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split); + +}; + +} + +#endif + +#endif diff --git a/include/astra/ConeProjectionGeometry3D.h b/include/astra/ConeProjectionGeometry3D.h index 00e72ce..dede6e1 100644 --- a/include/astra/ConeProjectionGeometry3D.h +++ b/include/astra/ConeProjectionGeometry3D.h @@ -186,9 +186,15 @@ public: */ virtual CVector3D getProjectionDirection(int _iProjectionIndex, int _iDetectorIndex) const; - virtual void projectPoint(float32 fX, float32 fY, float32 fZ, + virtual void projectPoint(double fX, double fY, double fZ, int iAngleIndex, - float32 &fU, float32 &fV) const; + double &fU, double &fV) const; + virtual void backprojectPointX(int iAngleIndex, double fU, double fV, + double fX, double &fY, double &fZ) const; + virtual void backprojectPointY(int iAngleIndex, double fU, double fV, + double fY, double &fX, double &fZ) const; + virtual void backprojectPointZ(int iAngleIndex, double fU, double fV, + double fZ, double &fX, double &fY) const; }; diff --git a/include/astra/ConeVecProjectionGeometry3D.h b/include/astra/ConeVecProjectionGeometry3D.h index 71e8010..f76f9dd 100644 --- a/include/astra/ConeVecProjectionGeometry3D.h +++ b/include/astra/ConeVecProjectionGeometry3D.h @@ -148,9 +148,16 @@ public: const SConeProjection* getProjectionVectors() const { return m_pProjectionAngles; } - virtual void projectPoint(float32 fX, float32 fY, float32 fZ, + virtual void projectPoint(double fX, double fY, double fZ, int iAngleIndex, - float32 &fU, float32 &fV) const; + double &fU, double &fV) const; + virtual void backprojectPointX(int iAngleIndex, double fU, double fV, + double fX, double &fY, double &fZ) const; + virtual void backprojectPointY(int iAngleIndex, double fU, double fV, + double fY, double &fX, double &fZ) const; + virtual void backprojectPointZ(int iAngleIndex, double fU, double fV, + double fZ, double &fX, double &fY) const; + }; } // namespace astra diff --git a/include/astra/GeometryUtil3D.h b/include/astra/GeometryUtil3D.h index 6ceac63..e4d73e4 100644 --- a/include/astra/GeometryUtil3D.h +++ b/include/astra/GeometryUtil3D.h @@ -119,6 +119,23 @@ void computeBP_UV_Coeffs(const SConeProjection& proj, double &fDX, double &fDY, double &fDZ, double &fDC); +SConeProjection* genConeProjections(unsigned int iProjAngles, + unsigned int iProjU, + unsigned int iProjV, + double fOriginSourceDistance, + double fOriginDetectorDistance, + double fDetUSize, + double fDetVSize, + const float *pfAngles); + +SPar3DProjection* genPar3DProjections(unsigned int iProjAngles, + unsigned int iProjU, + unsigned int iProjV, + double fDetUSize, + double fDetVSize, + const float *pfAngles); + + } diff --git a/include/astra/ParallelProjectionGeometry3D.h b/include/astra/ParallelProjectionGeometry3D.h index 72401e5..d95c050 100644 --- a/include/astra/ParallelProjectionGeometry3D.h +++ b/include/astra/ParallelProjectionGeometry3D.h @@ -147,9 +147,16 @@ public: */ virtual CVector3D getProjectionDirection(int _iProjectionIndex, int _iDetectorIndex) const; - virtual void projectPoint(float32 fX, float32 fY, float32 fZ, + virtual void projectPoint(double fX, double fY, double fZ, int iAngleIndex, - float32 &fU, float32 &fV) const; + double &fU, double &fV) const; + virtual void backprojectPointX(int iAngleIndex, double fU, double fV, + double fX, double &fY, double &fZ) const; + virtual void backprojectPointY(int iAngleIndex, double fU, double fV, + double fY, double &fX, double &fZ) const; + virtual void backprojectPointZ(int iAngleIndex, double fU, double fV, + double fZ, double &fX, double &fY) const; + /** * Creates (= allocates) a 2D projection geometry used when projecting one slice using a 2D projector diff --git a/include/astra/ParallelVecProjectionGeometry3D.h b/include/astra/ParallelVecProjectionGeometry3D.h index 59238c8..ec91086 100644 --- a/include/astra/ParallelVecProjectionGeometry3D.h +++ b/include/astra/ParallelVecProjectionGeometry3D.h @@ -149,9 +149,15 @@ public: const SPar3DProjection* getProjectionVectors() const { return m_pProjectionAngles; } - virtual void projectPoint(float32 fX, float32 fY, float32 fZ, + virtual void projectPoint(double fX, double fY, double fZ, int iAngleIndex, - float32 &fU, float32 &fV) const; + double &fU, double &fV) const; + virtual void backprojectPointX(int iAngleIndex, double fU, double fV, + double fX, double &fY, double &fZ) const; + virtual void backprojectPointY(int iAngleIndex, double fU, double fV, + double fY, double &fX, double &fZ) const; + virtual void backprojectPointZ(int iAngleIndex, double fU, double fV, + double fZ, double &fX, double &fY) const; }; } // namespace astra diff --git a/include/astra/ProjectionGeometry3D.h b/include/astra/ProjectionGeometry3D.h index 19ac3ab..0b60287 100644 --- a/include/astra/ProjectionGeometry3D.h +++ b/include/astra/ProjectionGeometry3D.h @@ -317,9 +317,24 @@ public: * @param iAngleIndex the index of the angle to use * @param fU,fV the projected point. */ - virtual void projectPoint(float32 fX, float32 fY, float32 fZ, + virtual void projectPoint(double fX, double fY, double fZ, int iAngleIndex, - float32 &fU, float32 &fV) const = 0; + double &fU, double &fV) const = 0; + + /* Backproject a point onto a plane parallel to a coordinate plane. + * The 2D point coordinates are the (unrounded) indices of the detector + * column and row. The output is in 3D coordinates in units. + * are in units. The output fU,fV are the (unrounded) indices of the + * detector column and row. + * This may fall outside of the actual detector. + */ + virtual void backprojectPointX(int iAngleIndex, double fU, double fV, + double fX, double &fY, double &fZ) const = 0; + virtual void backprojectPointY(int iAngleIndex, double fU, double fV, + double fY, double &fX, double &fZ) const = 0; + virtual void backprojectPointZ(int iAngleIndex, double fU, double fV, + double fZ, double &fX, double &fY) const = 0; + /** Returns true if the type of geometry defined in this class is the one specified in _sType. * diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp new file mode 100644 index 0000000..fc8bc2e --- /dev/null +++ b/src/CompositeGeometryManager.cpp @@ -0,0 +1,884 @@ +/* +----------------------------------------------------------------------- +Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp + 2014-2015, CWI, Amsterdam + +Contact: astra@uantwerpen.be +Website: http://sf.net/projects/astra-toolbox + +This file is part of the ASTRA Toolbox. + + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see . + +----------------------------------------------------------------------- +*/ + +#include "astra/CompositeGeometryManager.h" + +#ifdef ASTRA_CUDA + +#include "astra/GeometryUtil3D.h" +#include "astra/VolumeGeometry3D.h" +#include "astra/ConeProjectionGeometry3D.h" +#include "astra/ConeVecProjectionGeometry3D.h" +#include "astra/ParallelProjectionGeometry3D.h" +#include "astra/ParallelVecProjectionGeometry3D.h" +#include "astra/Projector3D.h" +#include "astra/CudaProjector3D.h" +#include "astra/Float32ProjectionData3DMemory.h" +#include "astra/Float32VolumeData3DMemory.h" +#include "astra/Logging.h" + +#include "../cuda/3d/mem3d.h" + +#include + +namespace astra { + +// JOB: +// +// VolumePart +// ProjectionPart +// FP-or-BP +// SET-or-ADD + + +// Running a set of jobs: +// +// [ Assume OUTPUT Parts in a single JobSet don't alias?? ] +// Group jobs by output Part +// One thread per group? + +// Automatically split parts if too large +// Performance model for odd-sized tasks? +// Automatically split parts if not enough tasks to fill available GPUs + + +// Splitting: +// Constraints: +// number of sub-parts divisible by N +// max size of sub-parts + +// For splitting on both input and output side: +// How to divide up memory? (Optimization problem; compute/benchmark) +// (First approach: 0.5/0.5) + + + +bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split) +{ + split.clear(); + + for (TJobSet::const_iterator i = jobs.begin(); i != jobs.end(); ++i) + { + CPart* pOutput = i->first; + const TJobList &L = i->second; + + // 1. Split output part + // 2. Per sub-part: + // a. reduce input part + // b. split input part + // c. create jobs for new (input,output) subparts + + TPartList splitOutput = pOutput->split(maxSize/3, div); + + for (TJobList::const_iterator j = L.begin(); j != L.end(); ++j) + { + const SJob &job = *j; + + for (TPartList::iterator i_out = splitOutput.begin(); + i_out != splitOutput.end(); ++i_out) + { + boost::shared_ptr outputPart = *i_out; + split[outputPart.get()] = TJobList(); + + SJob newjob; + newjob.pOutput = outputPart; + newjob.eType = j->eType; + newjob.eMode = j->eMode; + newjob.pProjector = j->pProjector; + + CPart* input = job.pInput->reduce(outputPart.get()); + + if (input->getSize() == 0) { + ASTRA_DEBUG("Empty input"); + newjob.eType = SJob::JOB_NOP; + split[outputPart.get()].push_back(newjob); + continue; + } + + size_t remainingSize = ( maxSize - outputPart->getSize() ) / 2; + + TPartList splitInput = input->split(remainingSize, 1); + delete input; + ASTRA_DEBUG("Input split into %d parts", splitInput.size()); + + for (TPartList::iterator i_in = splitInput.begin(); + i_in != splitInput.end(); ++i_in) + { + newjob.pInput = *i_in; + + split[outputPart.get()].push_back(newjob); + + // Second and later (input) parts should always be added to + // output of first (input) part. + newjob.eMode = SJob::MODE_ADD; + } + + + } + + } + } + + return true; +} + +CCompositeGeometryManager::CPart::CPart(const CPart& other) +{ + eType = other.eType; + pData = other.pData; + subX = other.subX; + subY = other.subY; + subZ = other.subZ; +} + +CCompositeGeometryManager::CVolumePart::CVolumePart(const CVolumePart& other) + : CPart(other) +{ + pGeom = other.pGeom->clone(); +} + +CCompositeGeometryManager::CVolumePart::~CVolumePart() +{ + delete pGeom; +} + +void CCompositeGeometryManager::CVolumePart::getDims(size_t &x, size_t &y, size_t &z) +{ + if (!pGeom) { + x = y = z = 0; + return; + } + + x = pGeom->getGridColCount(); + y = pGeom->getGridRowCount(); + z = pGeom->getGridSliceCount(); +} + +size_t CCompositeGeometryManager::CPart::getSize() +{ + size_t x, y, z; + getDims(x, y, z); + return x * y * z; +} + + + +CCompositeGeometryManager::CPart* CCompositeGeometryManager::CVolumePart::reduce(const CPart *_other) +{ + const CProjectionPart *other = dynamic_cast(_other); + assert(other); + + // TODO: Is 0.5 sufficient? + double umin = -0.5; + double umax = other->pGeom->getDetectorColCount() + 0.5; + double vmin = -0.5; + double vmax = other->pGeom->getDetectorRowCount() + 0.5; + + double uu[4]; + double vv[4]; + uu[0] = umin; vv[0] = vmin; + uu[1] = umin; vv[1] = vmax; + uu[2] = umax; vv[2] = vmin; + uu[3] = umax; vv[3] = vmax; + + double pixx = pGeom->getPixelLengthX(); + double pixy = pGeom->getPixelLengthY(); + double pixz = pGeom->getPixelLengthZ(); + + double xmin = pGeom->getWindowMinX() - 0.5 * pixx; + double xmax = pGeom->getWindowMaxX() + 0.5 * pixx; + double ymin = pGeom->getWindowMinY() - 0.5 * pixy; + double ymax = pGeom->getWindowMaxY() + 0.5 * pixy; + + // NB: Flipped + double zmax = pGeom->getWindowMinZ() - 2.5 * pixz; + double zmin = pGeom->getWindowMaxZ() + 2.5 * pixz; + + // TODO: This isn't as tight as it could be. + // In particular it won't detect the detector being + // missed entirely on the u side. + + for (int i = 0; i < other->pGeom->getProjectionCount(); ++i) { + for (int j = 0; j < 4; ++j) { + double px, py, pz; + + other->pGeom->backprojectPointX(i, uu[j], vv[j], xmin, py, pz); + //ASTRA_DEBUG("%f %f (%f - %f)", py, pz, ymin, ymax); + if (pz < zmin) zmin = pz; + if (pz > zmax) zmax = pz; + other->pGeom->backprojectPointX(i, uu[j], vv[j], xmax, py, pz); + //ASTRA_DEBUG("%f %f (%f - %f)", py, pz, ymin, ymax); + if (pz < zmin) zmin = pz; + if (pz > zmax) zmax = pz; + + other->pGeom->backprojectPointY(i, uu[j], vv[j], ymin, px, pz); + //ASTRA_DEBUG("%f %f (%f - %f)", px, pz, xmin, xmax); + if (pz < zmin) zmin = pz; + if (pz > zmax) zmax = pz; + other->pGeom->backprojectPointY(i, uu[j], vv[j], ymax, px, pz); + //ASTRA_DEBUG("%f %f (%f - %f)", px, pz, xmin, xmax); + if (pz < zmin) zmin = pz; + if (pz > zmax) zmax = pz; + } + } + + //ASTRA_DEBUG("coord extent: %f - %f", zmin, zmax); + + zmin = (zmin - pixz - pGeom->getWindowMinZ()) / pixz; + zmax = (zmax + pixz - pGeom->getWindowMinZ()) / pixz; + + int _zmin = (int)floor(zmin); + int _zmax = (int)ceil(zmax); + + //ASTRA_DEBUG("index extent: %d - %d", _zmin, _zmax); + + if (_zmin < 0) + _zmin = 0; + if (_zmax > pGeom->getGridSliceCount()) + _zmax = pGeom->getGridSliceCount(); + + if (_zmax <= _zmin) { + _zmin = _zmax = 0; + } + //ASTRA_DEBUG("adjusted extent: %d - %d", _zmin, _zmax); + + CVolumePart *sub = new CVolumePart(); + sub->subX = this->subX; + sub->subY = this->subY; + sub->subZ = this->subZ + _zmin; + sub->pData = pData; + + if (_zmin == _zmax) { + sub->pGeom = 0; + } else { + sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(), + pGeom->getGridRowCount(), + _zmax - _zmin, + pGeom->getWindowMinX(), + pGeom->getWindowMinY(), + pGeom->getWindowMinZ() + _zmin * pixz, + pGeom->getWindowMaxX(), + pGeom->getWindowMaxY(), + pGeom->getWindowMinZ() + _zmax * pixz); + } + + ASTRA_DEBUG("Reduce volume from %d - %d to %d - %d", this->subZ, this->subZ + pGeom->getGridSliceCount(), this->subZ + _zmin, this->subZ + _zmax); + + return sub; +} + + + +static size_t ceildiv(size_t a, size_t b) { + return (a + b - 1) / b; +} + +static size_t computeVerticalSplit(size_t maxBlock, int div, size_t sliceCount) +{ + size_t blockSize = maxBlock; + size_t blockCount = ceildiv(sliceCount, blockSize); + + // Increase number of blocks to be divisible by div + size_t divCount = div * ceildiv(blockCount, div); + + // If divCount is above sqrt(number of slices), then + // we can't guarantee divisibility by div, but let's try anyway + if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) { + blockCount = divCount; + } else { + // If divisibility isn't achievable, we may want to optimize + // differently. + // TODO: Figure out how to model and optimize this. + } + + // Final adjustment to make blocks more evenly sized + // (This can't make the blocks larger) + blockSize = ceildiv(sliceCount, blockCount); + + ASTRA_DEBUG("%ld %ld -> %ld * %ld\n", sliceCount, maxBlock, blockCount, blockSize); + + assert(blockSize <= maxBlock); + assert((divCount * divCount > sliceCount) || (blockCount % div) == 0); + + return blockSize; +} + +template +static V* getProjectionVectors(const P* geom); + +template<> +SConeProjection* getProjectionVectors(const CConeProjectionGeometry3D* pProjGeom) +{ + return genConeProjections(pProjGeom->getProjectionCount(), + pProjGeom->getDetectorColCount(), + pProjGeom->getDetectorRowCount(), + pProjGeom->getOriginSourceDistance(), + pProjGeom->getOriginDetectorDistance(), + pProjGeom->getDetectorSpacingX(), + pProjGeom->getDetectorSpacingY(), + pProjGeom->getProjectionAngles()); +} + +template<> +SConeProjection* getProjectionVectors(const CConeVecProjectionGeometry3D* pProjGeom) +{ + int nth = pProjGeom->getProjectionCount(); + + SConeProjection* pProjs = new SConeProjection[nth]; + for (int i = 0; i < nth; ++i) + pProjs[i] = pProjGeom->getProjectionVectors()[i]; + + return pProjs; +} + +template<> +SPar3DProjection* getProjectionVectors(const CParallelProjectionGeometry3D* pProjGeom) +{ + return genPar3DProjections(pProjGeom->getProjectionCount(), + pProjGeom->getDetectorColCount(), + pProjGeom->getDetectorRowCount(), + pProjGeom->getDetectorSpacingX(), + pProjGeom->getDetectorSpacingY(), + pProjGeom->getProjectionAngles()); +} + +template<> +SPar3DProjection* getProjectionVectors(const CParallelVecProjectionGeometry3D* pProjGeom) +{ + int nth = pProjGeom->getProjectionCount(); + + SPar3DProjection* pProjs = new SPar3DProjection[nth]; + for (int i = 0; i < nth; ++i) + pProjs[i] = pProjGeom->getProjectionVectors()[i]; + + return pProjs; +} + + +template +static void translateProjectionVectors(V* pProjs, int count, double dv) +{ + for (int i = 0; i < count; ++i) { + pProjs[i].fDetSX += dv * pProjs[i].fDetVX; + pProjs[i].fDetSY += dv * pProjs[i].fDetVY; + pProjs[i].fDetSZ += dv * pProjs[i].fDetVZ; + } +} + + + +static CProjectionGeometry3D* getSubProjectionGeometry(const CProjectionGeometry3D* pProjGeom, int v, int size) +{ + // First convert to vectors, then translate, then convert into new object + + const CConeProjectionGeometry3D* conegeom = dynamic_cast(pProjGeom); + const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast(pProjGeom); + const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast(pProjGeom); + const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast(pProjGeom); + + if (conegeom || conevec3dgeom) { + SConeProjection* pConeProjs; + if (conegeom) { + pConeProjs = getProjectionVectors(conegeom); + } else { + pConeProjs = getProjectionVectors(conevec3dgeom); + } + + translateProjectionVectors(pConeProjs, pProjGeom->getProjectionCount(), v); + + CProjectionGeometry3D* ret = new CConeVecProjectionGeometry3D(pProjGeom->getProjectionCount(), + size, + pProjGeom->getDetectorColCount(), + pConeProjs); + + + delete[] pConeProjs; + return ret; + } else { + assert(par3dgeom || parvec3dgeom); + SPar3DProjection* pParProjs; + if (par3dgeom) { + pParProjs = getProjectionVectors(par3dgeom); + } else { + pParProjs = getProjectionVectors(parvec3dgeom); + } + + translateProjectionVectors(pParProjs, pProjGeom->getProjectionCount(), v); + + CProjectionGeometry3D* ret = new CParallelVecProjectionGeometry3D(pProjGeom->getProjectionCount(), + size, + pProjGeom->getDetectorColCount(), + pParProjs); + + delete[] pParProjs; + return ret; + } + +} + + + +// split self into sub-parts: +// - each no bigger than maxSize +// - number of sub-parts is divisible by div +// - maybe all approximately the same size? +CCompositeGeometryManager::TPartList CCompositeGeometryManager::CVolumePart::split(size_t maxSize, int div) +{ + TPartList ret; + + if (true) { + // Split in vertical direction only at first, until we figure out + // a model for splitting in other directions + + size_t sliceSize = ((size_t) pGeom->getGridColCount()) * pGeom->getGridRowCount(); + int sliceCount = pGeom->getGridSliceCount(); + size_t blockSize = computeVerticalSplit(maxSize / sliceSize, div, sliceCount); + + int rem = sliceCount % blockSize; + + ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize); + + for (int z = -(rem / 2); z < sliceCount; z += blockSize) { + int newsubZ = z; + if (newsubZ < 0) newsubZ = 0; + int endZ = z + blockSize; + if (endZ > sliceCount) endZ = sliceCount; + int size = endZ - newsubZ; + + CVolumePart *sub = new CVolumePart(); + sub->subX = this->subX; + sub->subY = this->subY; + sub->subZ = this->subZ + newsubZ; + + ASTRA_DEBUG("VolumePart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub); + + double shift = pGeom->getPixelLengthZ() * newsubZ; + + sub->pData = pData; + sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(), + pGeom->getGridRowCount(), + size, + pGeom->getWindowMinX(), + pGeom->getWindowMinY(), + pGeom->getWindowMinZ() + shift, + pGeom->getWindowMaxX(), + pGeom->getWindowMaxY(), + pGeom->getWindowMinZ() + shift + size * pGeom->getPixelLengthZ()); + + ret.push_back(boost::shared_ptr(sub)); + } + } + + return ret; +} + +CCompositeGeometryManager::CVolumePart* CCompositeGeometryManager::CVolumePart::clone() const +{ + return new CVolumePart(*this); +} + +CCompositeGeometryManager::CProjectionPart::CProjectionPart(const CProjectionPart& other) + : CPart(other) +{ + pGeom = other.pGeom->clone(); +} + +CCompositeGeometryManager::CProjectionPart::~CProjectionPart() +{ + delete pGeom; +} + +void CCompositeGeometryManager::CProjectionPart::getDims(size_t &x, size_t &y, size_t &z) +{ + if (!pGeom) { + x = y = z = 0; + return; + } + + x = pGeom->getDetectorColCount(); + y = pGeom->getProjectionCount(); + z = pGeom->getDetectorRowCount(); +} + + +CCompositeGeometryManager::CPart* CCompositeGeometryManager::CProjectionPart::reduce(const CPart *_other) +{ + const CVolumePart *other = dynamic_cast(_other); + assert(other); + + double vmin_g, vmax_g; + + // reduce self to only cover intersection with projection of VolumePart + // (Project corners of volume, take bounding box) + + for (int i = 0; i < pGeom->getProjectionCount(); ++i) { + + double vol_u[8]; + double vol_v[8]; + + double pixx = other->pGeom->getPixelLengthX(); + double pixy = other->pGeom->getPixelLengthY(); + double pixz = other->pGeom->getPixelLengthZ(); + + // TODO: Is 0.5 sufficient? + double xmin = other->pGeom->getWindowMinX() - 0.5 * pixx; + double xmax = other->pGeom->getWindowMaxX() + 0.5 * pixx; + double ymin = other->pGeom->getWindowMinY() - 0.5 * pixy; + double ymax = other->pGeom->getWindowMaxY() + 0.5 * pixy; + double zmin = other->pGeom->getWindowMinZ() - 0.5 * pixz; + double zmax = other->pGeom->getWindowMaxZ() + 0.5 * pixz; + + pGeom->projectPoint(xmin, ymin, zmin, i, vol_u[0], vol_v[0]); + pGeom->projectPoint(xmin, ymin, zmax, i, vol_u[1], vol_v[1]); + pGeom->projectPoint(xmin, ymax, zmin, i, vol_u[2], vol_v[2]); + pGeom->projectPoint(xmin, ymax, zmax, i, vol_u[3], vol_v[3]); + pGeom->projectPoint(xmax, ymin, zmin, i, vol_u[4], vol_v[4]); + pGeom->projectPoint(xmax, ymin, zmax, i, vol_u[5], vol_v[5]); + pGeom->projectPoint(xmax, ymax, zmin, i, vol_u[6], vol_v[6]); + pGeom->projectPoint(xmax, ymax, zmax, i, vol_u[7], vol_v[7]); + + double vmin = vol_v[0]; + double vmax = vol_v[0]; + + for (int j = 1; j < 8; ++j) { + if (vol_v[j] < vmin) + vmin = vol_v[j]; + if (vol_v[j] > vmax) + vmax = vol_v[j]; + } + + if (i == 0 || vmin < vmin_g) + vmin_g = vmin; + if (i == 0 || vmax > vmax_g) + vmax_g = vmax; + } + + // fprintf(stderr, "v extent: %f %f\n", vmin_g, vmax_g); + + int _vmin = (int)floor(vmin_g - 1.0f); + int _vmax = (int)ceil(vmax_g + 1.0f); + if (_vmin < 0) + _vmin = 0; + if (_vmax > pGeom->getDetectorRowCount()) + _vmax = pGeom->getDetectorRowCount(); + + if (_vmin >= _vmax) { + _vmin = _vmax = 0; + } + + CProjectionPart *sub = new CProjectionPart(); + sub->subX = this->subX; + sub->subY = this->subY; + sub->subZ = this->subZ + _vmin; + + sub->pData = pData; + + if (_vmin == _vmax) { + sub->pGeom = 0; + } else { + sub->pGeom = getSubProjectionGeometry(pGeom, _vmin, _vmax - _vmin); + } + + ASTRA_DEBUG("Reduce projection from %d - %d to %d - %d", this->subZ, this->subZ + pGeom->getDetectorRowCount(), this->subZ + _vmin, this->subZ + _vmax); + + return sub; +} + + +CCompositeGeometryManager::TPartList CCompositeGeometryManager::CProjectionPart::split(size_t maxSize, int div) +{ + TPartList ret; + + if (true) { + // Split in vertical direction only at first, until we figure out + // a model for splitting in other directions + + size_t sliceSize = ((size_t) pGeom->getDetectorColCount()) * pGeom->getProjectionCount(); + int sliceCount = pGeom->getDetectorRowCount(); + size_t blockSize = computeVerticalSplit(maxSize / sliceSize, div, sliceCount); + + int rem = sliceCount % blockSize; + + for (int z = -(rem / 2); z < sliceCount; z += blockSize) { + int newsubZ = z; + if (newsubZ < 0) newsubZ = 0; + int endZ = z + blockSize; + if (endZ > sliceCount) endZ = sliceCount; + int size = endZ - newsubZ; + + CProjectionPart *sub = new CProjectionPart(); + sub->subX = this->subX; + sub->subY = this->subY; + sub->subZ = this->subZ + newsubZ; + + ASTRA_DEBUG("ProjectionPart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub); + + sub->pData = pData; + + sub->pGeom = getSubProjectionGeometry(pGeom, newsubZ, size); + + ret.push_back(boost::shared_ptr(sub)); + } + } + + return ret; + +} + +CCompositeGeometryManager::CProjectionPart* CCompositeGeometryManager::CProjectionPart::clone() const +{ + return new CProjectionPart(*this); +} + + +bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, + CFloat32ProjectionData3DMemory *pProjData) +{ + ASTRA_DEBUG("CCompositeGeometryManager::doFP"); + // Create single job for FP + // Run result + + CVolumePart *input = new CVolumePart(); + input->pData = pVolData; + input->subX = 0; + input->subY = 0; + input->subZ = 0; + input->pGeom = pVolData->getGeometry()->clone(); + ASTRA_DEBUG("Main FP VolumePart -> %p", (void*)input); + + CProjectionPart *output = new CProjectionPart(); + output->pData = pProjData; + output->subX = 0; + output->subY = 0; + output->subZ = 0; + output->pGeom = pProjData->getGeometry()->clone(); + ASTRA_DEBUG("Main FP ProjectionPart -> %p", (void*)output); + + SJob FP; + FP.pInput = boost::shared_ptr(input); + FP.pOutput = boost::shared_ptr(output); + FP.pProjector = pProjector; + FP.eType = SJob::JOB_FP; + FP.eMode = SJob::MODE_SET; + + TJobList L; + L.push_back(FP); + + return doJobs(L); +} + +bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, + CFloat32ProjectionData3DMemory *pProjData) +{ + ASTRA_DEBUG("CCompositeGeometryManager::doBP"); + // Create single job for BP + // Run result + + CProjectionPart *input = new CProjectionPart(); + input->pData = pProjData; + input->subX = 0; + input->subY = 0; + input->subZ = 0; + input->pGeom = pProjData->getGeometry()->clone(); + + CVolumePart *output = new CVolumePart(); + output->pData = pVolData; + output->subX = 0; + output->subY = 0; + output->subZ = 0; + output->pGeom = pVolData->getGeometry()->clone(); + + SJob BP; + BP.pInput = boost::shared_ptr(input); + BP.pOutput = boost::shared_ptr(output); + BP.pProjector = pProjector; + BP.eType = SJob::JOB_BP; + BP.eMode = SJob::MODE_SET; + + TJobList L; + L.push_back(BP); + + return doJobs(L); +} + + + +bool CCompositeGeometryManager::doJobs(TJobList &jobs) +{ + ASTRA_DEBUG("CCompositeGeometryManager::doJobs"); + + // Sort job list into job set by output part + TJobSet jobset; + + for (TJobList::iterator i = jobs.begin(); i != jobs.end(); ++i) { + jobset[i->pOutput.get()].push_back(*i); + } + + size_t maxSize = astraCUDA3d::availableGPUMemory(); + if (maxSize == 0) { + ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB."); + maxSize = 1024 * 1024 * 1024; + } else { + ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize); + } + maxSize = (maxSize * 9) / 10; + + maxSize /= sizeof(float); + int div = 1; + + // TODO: Multi-GPU support + + // Split jobs to fit + TJobSet split; + splitJobs(jobset, maxSize, div, split); + jobset.clear(); + + // Run jobs + + for (TJobSet::iterator iter = split.begin(); iter != split.end(); ++iter) { + + CPart* output = iter->first; + TJobList& L = iter->second; + + assert(!L.empty()); + + bool zero = L.begin()->eMode == SJob::MODE_SET; + + size_t outx, outy, outz; + output->getDims(outx, outy, outz); + + if (L.begin()->eType == SJob::JOB_NOP) { + // just zero output? + if (zero) { + for (size_t z = 0; z < outz; ++z) { + for (size_t y = 0; y < outy; ++y) { + float* ptr = output->pData->getData(); + ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth(); + ptr += (y + output->subY) * (size_t)output->pData->getWidth(); + ptr += output->subX; + memset(ptr, 0, sizeof(float) * outx); + } + } + } + continue; + } + + + astraCUDA3d::SSubDimensions3D dstdims; + dstdims.nx = output->pData->getWidth(); + dstdims.pitch = dstdims.nx; + dstdims.ny = output->pData->getHeight(); + dstdims.nz = output->pData->getDepth(); + dstdims.subnx = outx; + dstdims.subny = outy; + dstdims.subnz = outz; + ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz); + dstdims.subx = output->subX; + dstdims.suby = output->subY; + dstdims.subz = output->subZ; + float *dst = output->pData->getData(); + + astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO); + bool ok = outputMem; + + for (TJobList::iterator i = L.begin(); i != L.end(); ++i) { + SJob &j = *i; + + assert(j.pInput); + + CCudaProjector3D *projector = dynamic_cast(j.pProjector); + Cuda3DProjectionKernel projKernel = ker3d_default; + int detectorSuperSampling = 1; + int voxelSuperSampling = 1; + if (projector) { + projKernel = projector->getProjectionKernel(); + detectorSuperSampling = projector->getDetectorSuperSampling(); + voxelSuperSampling = projector->getVoxelSuperSampling(); + } + + size_t inx, iny, inz; + j.pInput->getDims(inx, iny, inz); + astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO); + + astraCUDA3d::SSubDimensions3D srcdims; + srcdims.nx = j.pInput->pData->getWidth(); + srcdims.pitch = srcdims.nx; + srcdims.ny = j.pInput->pData->getHeight(); + srcdims.nz = j.pInput->pData->getDepth(); + srcdims.subnx = inx; + srcdims.subny = iny; + srcdims.subnz = inz; + srcdims.subx = j.pInput->subX; + srcdims.suby = j.pInput->subY; + srcdims.subz = j.pInput->subZ; + const float *src = j.pInput->pData->getDataConst(); + + ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims); + if (!ok) ASTRA_ERROR("Error copying input data to GPU"); + + if (j.eType == SJob::JOB_FP) { + assert(dynamic_cast(j.pInput.get())); + assert(dynamic_cast(j.pOutput.get())); + + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP"); + + ok = astraCUDA3d::FP(((CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel); + if (!ok) ASTRA_ERROR("Error performing sub-FP"); + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); + } else if (j.eType == SJob::JOB_BP) { + assert(dynamic_cast(j.pOutput.get())); + assert(dynamic_cast(j.pInput.get())); + + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP"); + + ok = astraCUDA3d::BP(((CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling); + if (!ok) ASTRA_ERROR("Error performing sub-BP"); + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); + } else { + assert(false); + } + + ok = astraCUDA3d::freeGPUMemory(inputMem); + if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + + } + + ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims); + if (!ok) ASTRA_ERROR("Error copying output data from GPU"); + + ok = astraCUDA3d::freeGPUMemory(outputMem); + if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + } + + return true; +} + + + +} + +#endif diff --git a/src/ConeProjectionGeometry3D.cpp b/src/ConeProjectionGeometry3D.cpp index dd22eba..18f0f8a 100644 --- a/src/ConeProjectionGeometry3D.cpp +++ b/src/ConeProjectionGeometry3D.cpp @@ -29,6 +29,7 @@ $Id$ #include "astra/ConeProjectionGeometry3D.h" #include "astra/Logging.h" +#include "astra/GeometryUtil3D.h" #include #include @@ -230,14 +231,14 @@ CVector3D CConeProjectionGeometry3D::getProjectionDirection(int _iProjectionInde return ret; } -void CConeProjectionGeometry3D::projectPoint(float32 fX, float32 fY, float32 fZ, - int iAngleIndex, - float32 &fU, float32 &fV) const +void CConeProjectionGeometry3D::projectPoint(double fX, double fY, double fZ, + int iAngleIndex, + double &fU, double &fV) const { ASTRA_ASSERT(iAngleIndex >= 0); ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); - float alpha = m_pfProjectionAngles[iAngleIndex]; + double alpha = m_pfProjectionAngles[iAngleIndex]; // Project point onto optical axis @@ -245,14 +246,14 @@ void CConeProjectionGeometry3D::projectPoint(float32 fX, float32 fY, float32 fZ, // Vector source->origin is (-sin(alpha), cos(alpha)) // Distance from source, projected on optical axis - float fD = -sin(alpha) * fX + cos(alpha) * fY + m_fOriginSourceDistance; + double fD = -sin(alpha) * fX + cos(alpha) * fY + m_fOriginSourceDistance; // Scale fZ to detector plane fV = detectorOffsetYToRowIndexFloat( (fZ * (m_fOriginSourceDistance + m_fOriginDetectorDistance)) / fD ); // Orthogonal distance in XY-plane to optical axis - float fS = cos(alpha) * fX + sin(alpha) * fY; + double fS = cos(alpha) * fX + sin(alpha) * fY; // Scale fS to detector plane fU = detectorOffsetXToColIndexFloat( (fS * (m_fOriginSourceDistance + m_fOriginDetectorDistance)) / fD ); @@ -261,5 +262,84 @@ void CConeProjectionGeometry3D::projectPoint(float32 fX, float32 fY, float32 fZ, } +void CConeProjectionGeometry3D::backprojectPointX(int iAngleIndex, double fU, double fV, + double fX, double &fY, double &fZ) const +{ + ASTRA_ASSERT(iAngleIndex >= 0); + ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); + + SConeProjection *projs = genConeProjections(1, m_iDetectorColCount, m_iDetectorRowCount, + m_fOriginSourceDistance, + m_fOriginDetectorDistance, + m_fDetectorSpacingX, m_fDetectorSpacingY, + &m_pfProjectionAngles[iAngleIndex]); + + SConeProjection &proj = projs[0]; + + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fX - proj.fSrcX) / (px - proj.fSrcX); + + fY = proj.fSrcY + a * (py - proj.fSrcY); + fZ = proj.fSrcZ + a * (pz - proj.fSrcZ); + + delete[] projs; +} + +void CConeProjectionGeometry3D::backprojectPointY(int iAngleIndex, double fU, double fV, + double fY, double &fX, double &fZ) const +{ + ASTRA_ASSERT(iAngleIndex >= 0); + ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); + + SConeProjection *projs = genConeProjections(1, m_iDetectorColCount, m_iDetectorRowCount, + m_fOriginSourceDistance, + m_fOriginDetectorDistance, + m_fDetectorSpacingX, m_fDetectorSpacingY, + &m_pfProjectionAngles[iAngleIndex]); + + SConeProjection &proj = projs[0]; + + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fY - proj.fSrcY) / (py - proj.fSrcY); + + fX = proj.fSrcX + a * (px - proj.fSrcX); + fZ = proj.fSrcZ + a * (pz - proj.fSrcZ); + + delete[] projs; +} + +void CConeProjectionGeometry3D::backprojectPointZ(int iAngleIndex, double fU, double fV, + double fZ, double &fX, double &fY) const +{ + ASTRA_ASSERT(iAngleIndex >= 0); + ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); + + SConeProjection *projs = genConeProjections(1, m_iDetectorColCount, m_iDetectorRowCount, + m_fOriginSourceDistance, + m_fOriginDetectorDistance, + m_fDetectorSpacingX, m_fDetectorSpacingY, + &m_pfProjectionAngles[iAngleIndex]); + + SConeProjection &proj = projs[0]; + + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fZ - proj.fSrcZ) / (pz - proj.fSrcZ); + + fX = proj.fSrcX + a * (px - proj.fSrcX); + fY = proj.fSrcY + a * (py - proj.fSrcY); + + delete[] projs; +} + + } // end namespace astra diff --git a/src/ConeVecProjectionGeometry3D.cpp b/src/ConeVecProjectionGeometry3D.cpp index 47ed630..86e3bd6 100644 --- a/src/ConeVecProjectionGeometry3D.cpp +++ b/src/ConeVecProjectionGeometry3D.cpp @@ -241,9 +241,9 @@ CVector3D CConeVecProjectionGeometry3D::getProjectionDirection(int _iProjectionI return CVector3D(p.fDetSX + (u+0.5)*p.fDetUX + (v+0.5)*p.fDetVX - p.fSrcX, p.fDetSY + (u+0.5)*p.fDetUY + (v+0.5)*p.fDetVY - p.fSrcY, p.fDetSZ + (u+0.5)*p.fDetUZ + (v+0.5)*p.fDetVZ - p.fSrcZ); } -void CConeVecProjectionGeometry3D::projectPoint(float32 fX, float32 fY, float32 fZ, +void CConeVecProjectionGeometry3D::projectPoint(double fX, double fY, double fZ, int iAngleIndex, - float32 &fU, float32 &fV) const + double &fU, double &fV) const { ASTRA_ASSERT(iAngleIndex >= 0); ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); @@ -262,6 +262,60 @@ void CConeVecProjectionGeometry3D::projectPoint(float32 fX, float32 fY, float32 } +void CConeVecProjectionGeometry3D::backprojectPointX(int iAngleIndex, double fU, double fV, + double fX, double &fY, double &fZ) const +{ + ASTRA_ASSERT(iAngleIndex >= 0); + ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); + + SConeProjection &proj = m_pProjectionAngles[iAngleIndex]; + + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fX - proj.fSrcX) / (px - proj.fSrcX); + + fY = proj.fSrcY + a * (py - proj.fSrcY); + fZ = proj.fSrcZ + a * (pz - proj.fSrcZ); +} + +void CConeVecProjectionGeometry3D::backprojectPointY(int iAngleIndex, double fU, double fV, + double fY, double &fX, double &fZ) const +{ + ASTRA_ASSERT(iAngleIndex >= 0); + ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); + + SConeProjection &proj = m_pProjectionAngles[iAngleIndex]; + + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fY - proj.fSrcY) / (py - proj.fSrcY); + + fX = proj.fSrcX + a * (px - proj.fSrcX); + fZ = proj.fSrcZ + a * (pz - proj.fSrcZ); +} + +void CConeVecProjectionGeometry3D::backprojectPointZ(int iAngleIndex, double fU, double fV, + double fZ, double &fX, double &fY) const +{ + ASTRA_ASSERT(iAngleIndex >= 0); + ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); + + SConeProjection &proj = m_pProjectionAngles[iAngleIndex]; + + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fZ - proj.fSrcZ) / (pz - proj.fSrcZ); + + fX = proj.fSrcX + a * (px - proj.fSrcX); + fY = proj.fSrcY + a * (py - proj.fSrcY); +} + //---------------------------------------------------------------------------------------- bool CConeVecProjectionGeometry3D::_check() diff --git a/src/CudaBackProjectionAlgorithm3D.cpp b/src/CudaBackProjectionAlgorithm3D.cpp index 8cf4c3b..ce8e111 100644 --- a/src/CudaBackProjectionAlgorithm3D.cpp +++ b/src/CudaBackProjectionAlgorithm3D.cpp @@ -37,6 +37,7 @@ $Id$ #include "astra/ParallelProjectionGeometry3D.h" #include "astra/ParallelVecProjectionGeometry3D.h" #include "astra/ConeVecProjectionGeometry3D.h" +#include "astra/CompositeGeometryManager.h" #include "astra/Logging.h" @@ -203,9 +204,16 @@ void CCudaBackProjectionAlgorithm3D::run(int _iNrIterations) &volgeom, projgeom, m_iGPUIndex, m_iVoxelSuperSampling); } else { + +#if 1 + CCompositeGeometryManager cgm; + + cgm.doBP(m_pProjector, pReconMem, pSinoMem); +#else astraCudaBP(pReconMem->getData(), pSinoMem->getDataConst(), &volgeom, projgeom, m_iGPUIndex, m_iVoxelSuperSampling); +#endif } } diff --git a/src/CudaForwardProjectionAlgorithm3D.cpp b/src/CudaForwardProjectionAlgorithm3D.cpp index e57e077..209f5a5 100644 --- a/src/CudaForwardProjectionAlgorithm3D.cpp +++ b/src/CudaForwardProjectionAlgorithm3D.cpp @@ -40,6 +40,8 @@ $Id$ #include "astra/ParallelVecProjectionGeometry3D.h" #include "astra/ConeVecProjectionGeometry3D.h" +#include "astra/CompositeGeometryManager.h" + #include "astra/Logging.h" #include "../cuda/3d/astra3d.h" @@ -263,6 +265,12 @@ void CCudaForwardProjectionAlgorithm3D::run(int) // check initialized assert(m_bIsInitialized); +#if 1 + CCompositeGeometryManager cgm; + + cgm.doFP(m_pProjector, m_pVolume, m_pProjections); + +#else const CProjectionGeometry3D* projgeom = m_pProjections->getGeometry(); const CVolumeGeometry3D& volgeom = *m_pVolume->getGeometry(); @@ -294,6 +302,7 @@ void CCudaForwardProjectionAlgorithm3D::run(int) astraCudaFP(m_pVolume->getDataConst(), m_pProjections->getData(), &volgeom, projgeom, m_iGPUIndex, m_iDetectorSuperSampling, projKernel); +#endif } diff --git a/src/GeometryUtil3D.cpp b/src/GeometryUtil3D.cpp index 52dd5a9..c6bfd8b 100644 --- a/src/GeometryUtil3D.cpp +++ b/src/GeometryUtil3D.cpp @@ -28,8 +28,96 @@ $Id$ #include "astra/GeometryUtil3D.h" +#include + namespace astra { + +SConeProjection* genConeProjections(unsigned int iProjAngles, + unsigned int iProjU, + unsigned int iProjV, + double fOriginSourceDistance, + double fOriginDetectorDistance, + double fDetUSize, + double fDetVSize, + const float *pfAngles) +{ + SConeProjection base; + base.fSrcX = 0.0f; + base.fSrcY = -fOriginSourceDistance; + base.fSrcZ = 0.0f; + + base.fDetSX = iProjU * fDetUSize * -0.5f; + base.fDetSY = fOriginDetectorDistance; + base.fDetSZ = iProjV * fDetVSize * -0.5f; + + base.fDetUX = fDetUSize; + base.fDetUY = 0.0f; + base.fDetUZ = 0.0f; + + base.fDetVX = 0.0f; + base.fDetVY = 0.0f; + base.fDetVZ = fDetVSize; + + SConeProjection* p = new SConeProjection[iProjAngles]; + +#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); p[i].f##name##Z = base.f##name##Z; } while(0) + + for (unsigned int i = 0; i < iProjAngles; ++i) { + ROTATE0(Src, i, pfAngles[i]); + ROTATE0(DetS, i, pfAngles[i]); + ROTATE0(DetU, i, pfAngles[i]); + ROTATE0(DetV, i, pfAngles[i]); + } + +#undef ROTATE0 + + return p; +} + +SPar3DProjection* genPar3DProjections(unsigned int iProjAngles, + unsigned int iProjU, + unsigned int iProjV, + double fDetUSize, + double fDetVSize, + const float *pfAngles) +{ + SPar3DProjection base; + base.fRayX = 0.0f; + base.fRayY = 1.0f; + base.fRayZ = 0.0f; + + base.fDetSX = iProjU * fDetUSize * -0.5f; + base.fDetSY = 0.0f; + base.fDetSZ = iProjV * fDetVSize * -0.5f; + + base.fDetUX = fDetUSize; + base.fDetUY = 0.0f; + base.fDetUZ = 0.0f; + + base.fDetVX = 0.0f; + base.fDetVY = 0.0f; + base.fDetVZ = fDetVSize; + + SPar3DProjection* p = new SPar3DProjection[iProjAngles]; + +#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); p[i].f##name##Z = base.f##name##Z; } while(0) + + for (unsigned int i = 0; i < iProjAngles; ++i) { + ROTATE0(Ray, i, pfAngles[i]); + ROTATE0(DetS, i, pfAngles[i]); + ROTATE0(DetU, i, pfAngles[i]); + ROTATE0(DetV, i, pfAngles[i]); + } + +#undef ROTATE0 + + return p; +} + + + + // (See declaration in header for (mathematical) description of these functions) @@ -72,4 +160,88 @@ void computeBP_UV_Coeffs(const SConeProjection& proj, double &fUX, double &fUY, } +// TODO: Handle cases of rays parallel to coordinate planes + +void backprojectPointX(const SPar3DProjection& proj, double fU, double fV, + double fX, double &fY, double &fZ) +{ + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fX - px) / proj.fRayX; + + fY = py + a * proj.fRayY; + fZ = pz + a * proj.fRayZ; +} + +void backprojectPointY(const SPar3DProjection& proj, double fU, double fV, + double fY, double &fX, double &fZ) +{ + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fY - py) / proj.fRayY; + + fX = px + a * proj.fRayX; + fZ = pz + a * proj.fRayZ; + +} + +void backprojectPointZ(const SPar3DProjection& proj, double fU, double fV, + double fZ, double &fX, double &fY) +{ + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fZ - pz) / proj.fRayZ; + + fX = px + a * proj.fRayX; + fY = py + a * proj.fRayY; +} + + + +void backprojectPointX(const SConeProjection& proj, double fU, double fV, + double fX, double &fY, double &fZ) +{ + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fX - proj.fSrcX) / (px - proj.fSrcX); + + fY = proj.fSrcY + a * (py - proj.fSrcY); + fZ = proj.fSrcZ + a * (pz - proj.fSrcZ); +} + +void backprojectPointY(const SConeProjection& proj, double fU, double fV, + double fY, double &fX, double &fZ) +{ + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fY - proj.fSrcY) / (py - proj.fSrcY); + + fX = proj.fSrcX + a * (px - proj.fSrcX); + fZ = proj.fSrcZ + a * (pz - proj.fSrcZ); +} + +void backprojectPointZ(const SConeProjection& proj, double fU, double fV, + double fZ, double &fX, double &fY) +{ + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fZ - proj.fSrcZ) / (pz - proj.fSrcZ); + + fX = proj.fSrcX + a * (px - proj.fSrcX); + fY = proj.fSrcY + a * (py - proj.fSrcY); +} + + } diff --git a/src/ParallelProjectionGeometry3D.cpp b/src/ParallelProjectionGeometry3D.cpp index 1c87157..7b64fd9 100644 --- a/src/ParallelProjectionGeometry3D.cpp +++ b/src/ParallelProjectionGeometry3D.cpp @@ -27,8 +27,10 @@ $Id$ */ #include "astra/ParallelProjectionGeometry3D.h" -#include +#include "astra/GeometryUtil3D.h" + +#include #include using namespace std; @@ -185,9 +187,9 @@ CVector3D CParallelProjectionGeometry3D::getProjectionDirection(int _iProjection return CVector3D(fDirX, fDirY, fDirZ); } -void CParallelProjectionGeometry3D::projectPoint(float32 fX, float32 fY, float32 fZ, +void CParallelProjectionGeometry3D::projectPoint(double fX, double fY, double fZ, int iAngleIndex, - float32 &fU, float32 &fV) const + double &fU, double &fV) const { ASTRA_ASSERT(iAngleIndex >= 0); ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); @@ -214,6 +216,79 @@ CParallelProjectionGeometry2D * CParallelProjectionGeometry3D::createProjectionG return pOutput; } +void CParallelProjectionGeometry3D::backprojectPointX(int iAngleIndex, double fU, double fV, + double fX, double &fY, double &fZ) const +{ + ASTRA_ASSERT(iAngleIndex >= 0); + ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); + + SPar3DProjection *projs = genPar3DProjections(1, m_iDetectorColCount, m_iDetectorRowCount, + m_fDetectorSpacingX, m_fDetectorSpacingY, + &m_pfProjectionAngles[iAngleIndex]); + + SPar3DProjection &proj = projs[0]; + + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fX - px) / proj.fRayX; + + fY = py + a * proj.fRayY; + fZ = pz + a * proj.fRayZ; + + delete[] projs; +} + +void CParallelProjectionGeometry3D::backprojectPointY(int iAngleIndex, double fU, double fV, + double fY, double &fX, double &fZ) const +{ + ASTRA_ASSERT(iAngleIndex >= 0); + ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); + + SPar3DProjection *projs = genPar3DProjections(1, m_iDetectorColCount, m_iDetectorRowCount, + m_fDetectorSpacingX, m_fDetectorSpacingY, + &m_pfProjectionAngles[iAngleIndex]); + + SPar3DProjection &proj = projs[0]; + + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fY - py) / proj.fRayY; + + fX = px + a * proj.fRayX; + fZ = pz + a * proj.fRayZ; + + delete[] projs; +} + +void CParallelProjectionGeometry3D::backprojectPointZ(int iAngleIndex, double fU, double fV, + double fZ, double &fX, double &fY) const +{ + ASTRA_ASSERT(iAngleIndex >= 0); + ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); + + SPar3DProjection *projs = genPar3DProjections(1, m_iDetectorColCount, m_iDetectorRowCount, + m_fDetectorSpacingX, m_fDetectorSpacingY, + &m_pfProjectionAngles[iAngleIndex]); + + SPar3DProjection &proj = projs[0]; + + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fZ - pz) / proj.fRayZ; + + fX = px + a * proj.fRayX; + fY = py + a * proj.fRayY; + + delete[] projs; +} + + //---------------------------------------------------------------------------------------- } // end namespace astra diff --git a/src/ParallelVecProjectionGeometry3D.cpp b/src/ParallelVecProjectionGeometry3D.cpp index ffad6d0..d04400b 100644 --- a/src/ParallelVecProjectionGeometry3D.cpp +++ b/src/ParallelVecProjectionGeometry3D.cpp @@ -239,9 +239,9 @@ CVector3D CParallelVecProjectionGeometry3D::getProjectionDirection(int _iProject return CVector3D(p.fRayX, p.fRayY, p.fRayZ); } -void CParallelVecProjectionGeometry3D::projectPoint(float32 fX, float32 fY, float32 fZ, - int iAngleIndex, - float32 &fU, float32 &fV) const +void CParallelVecProjectionGeometry3D::projectPoint(double fX, double fY, double fZ, + int iAngleIndex, + double &fU, double &fV) const { ASTRA_ASSERT(iAngleIndex >= 0); ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); @@ -258,6 +258,61 @@ void CParallelVecProjectionGeometry3D::projectPoint(float32 fX, float32 fY, floa } +void CParallelVecProjectionGeometry3D::backprojectPointX(int iAngleIndex, double fU, double fV, + double fX, double &fY, double &fZ) const +{ + ASTRA_ASSERT(iAngleIndex >= 0); + ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); + + SPar3DProjection &proj = m_pProjectionAngles[iAngleIndex]; + + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fX - px) / proj.fRayX; + + fY = py + a * proj.fRayY; + fZ = pz + a * proj.fRayZ; +} + +void CParallelVecProjectionGeometry3D::backprojectPointY(int iAngleIndex, double fU, double fV, + double fY, double &fX, double &fZ) const +{ + ASTRA_ASSERT(iAngleIndex >= 0); + ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); + + SPar3DProjection &proj = m_pProjectionAngles[iAngleIndex]; + + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fY - py) / proj.fRayY; + + fX = px + a * proj.fRayX; + fZ = pz + a * proj.fRayZ; +} + +void CParallelVecProjectionGeometry3D::backprojectPointZ(int iAngleIndex, double fU, double fV, + double fZ, double &fX, double &fY) const +{ + ASTRA_ASSERT(iAngleIndex >= 0); + ASTRA_ASSERT(iAngleIndex < m_iProjectionAngleCount); + + SPar3DProjection &proj = m_pProjectionAngles[iAngleIndex]; + + double px = proj.fDetSX + fU * proj.fDetUX + fV * proj.fDetVX; + double py = proj.fDetSY + fU * proj.fDetUY + fV * proj.fDetVY; + double pz = proj.fDetSZ + fU * proj.fDetUZ + fV * proj.fDetVZ; + + double a = (fZ - pz) / proj.fRayZ; + + fX = px + a * proj.fRayX; + fY = py + a * proj.fRayY; +} + + //---------------------------------------------------------------------------------------- bool CParallelVecProjectionGeometry3D::_check() -- cgit v1.2.3 From c66e4b030467ddadac71e5bd4803737cf94c0a07 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Tue, 22 Dec 2015 14:00:50 +0100 Subject: Reduce FP3D CUDA kernel runtime This reduces the chance of the Windows display driver watchdog triggering, and doesn't seem to hurt performance. --- cuda/3d/cone_fp.cu | 2 +- cuda/3d/par3d_fp.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'cuda') diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu index b36d2bc..13b184f 100644 --- a/cuda/3d/cone_fp.cu +++ b/cuda/3d/cone_fp.cu @@ -49,7 +49,7 @@ namespace astraCUDA3d { static const unsigned int g_anglesPerBlock = 4; // thickness of the slices we're splitting the volume up into -static const unsigned int g_blockSlices = 64; +static const unsigned int g_blockSlices = 32; static const unsigned int g_detBlockU = 32; static const unsigned int g_detBlockV = 32; diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index b14c494..3ce3d42 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -49,7 +49,7 @@ namespace astraCUDA3d { static const unsigned int g_anglesPerBlock = 4; // thickness of the slices we're splitting the volume up into -static const unsigned int g_blockSlices = 64; +static const unsigned int g_blockSlices = 32; static const unsigned int g_detBlockU = 32; static const unsigned int g_detBlockV = 32; -- cgit v1.2.3 From 687c5e244e46e51786afad77f5015cae9abad129 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 6 Jan 2016 15:10:34 +0100 Subject: Add multi-GPU support to CompositeGeometryManager --- cuda/3d/mem3d.h | 2 + include/astra/CompositeGeometryManager.h | 16 ++ matlab/mex/astra_mex_c.cpp | 45 +++- src/CompositeGeometryManager.cpp | 434 +++++++++++++++++++++++-------- 4 files changed, 378 insertions(+), 119 deletions(-) (limited to 'cuda') diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h index 82bad19..acb72cb 100644 --- a/cuda/3d/mem3d.h +++ b/cuda/3d/mem3d.h @@ -87,6 +87,8 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos) bool freeGPUMemory(MemHandle3D handle); +bool setGPUIndex(int index); + bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel); diff --git a/include/astra/CompositeGeometryManager.h b/include/astra/CompositeGeometryManager.h index 6610151..49d02a7 100644 --- a/include/astra/CompositeGeometryManager.h +++ b/include/astra/CompositeGeometryManager.h @@ -50,9 +50,16 @@ class CProjectionGeometry3D; class CProjector3D; +struct SGPUParams { + std::vector GPUIndices; + size_t memory; +}; + class _AstraExport CCompositeGeometryManager { public: + CCompositeGeometryManager(); + class CPart; typedef std::list > TPartList; class CPart { @@ -139,10 +146,19 @@ public: bool doFP(CProjector3D *pProjector, const std::vector& volData, const std::vector& projData); bool doBP(CProjector3D *pProjector, const std::vector& volData, const std::vector& projData); + void setGPUIndices(const std::vector& GPUIndices); + + static void setGlobalGPUParams(const SGPUParams& params); + protected: bool splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split); + std::vector m_GPUIndices; + size_t m_iMaxSize; + + + static SGPUParams* s_params; }; } diff --git a/matlab/mex/astra_mex_c.cpp b/matlab/mex/astra_mex_c.cpp index d34334c..fdf4f33 100644 --- a/matlab/mex/astra_mex_c.cpp +++ b/matlab/mex/astra_mex_c.cpp @@ -38,6 +38,7 @@ $Id$ #include "astra/Globals.h" #ifdef ASTRA_CUDA #include "../cuda/2d/darthelper.h" +#include "astra/CompositeGeometryManager.h" #endif using namespace std; using namespace astra; @@ -83,12 +84,46 @@ void astra_mex_use_cuda(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs * Set active GPU */ void astra_mex_set_gpu_index(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) -{ +{ #ifdef ASTRA_CUDA - if (nrhs >= 2) { - bool ret = astraCUDA::setGPUIndex((int)mxGetScalar(prhs[1])); - if (!ret) - mexPrintf("Failed to set GPU %d\n", (int)mxGetScalar(prhs[1])); + bool usage = false; + if (nrhs != 2 && nrhs != 4) { + usage = true; + } + + astra::SGPUParams params; + params.memory = 0; + + if (!usage && nrhs >= 4) { + std::string s = mexToString(prhs[2]); + if (s != "memory") { + usage = true; + } else { + params.memory = (size_t)mxGetScalar(prhs[3]); + } + } + + if (!usage && nrhs >= 2) { + int n = mxGetN(prhs[1]) * mxGetM(prhs[1]); + params.GPUIndices.resize(n); + double* pdMatlabData = mxGetPr(prhs[1]); + for (int i = 0; i < n; ++i) + params.GPUIndices[i] = (int)pdMatlabData[i]; + + + astra::CCompositeGeometryManager::setGlobalGPUParams(params); + + + // Set first GPU + if (n >= 1) { + bool ret = astraCUDA::setGPUIndex((int)pdMatlabData[0]); + if (!ret) + mexPrintf("Failed to set GPU %d\n", (int)pdMatlabData[0]); + } + } + + if (usage) { + mexPrintf("Usage: astra_mex('set_gpu_index', index/indices [, 'memory', memory])"); } #endif } diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp index eed06c4..d1b713e 100644 --- a/src/CompositeGeometryManager.cpp +++ b/src/CompositeGeometryManager.cpp @@ -44,11 +44,31 @@ along with the ASTRA Toolbox. If not, see . #include "../cuda/3d/mem3d.h" #include +#include + +#ifndef USE_PTHREADS +#include +#include +#endif namespace astra { + +SGPUParams* CCompositeGeometryManager::s_params = 0; + +CCompositeGeometryManager::CCompositeGeometryManager() +{ + m_iMaxSize = 0; + + if (s_params) { + m_iMaxSize = s_params->memory; + m_GPUIndices = s_params->GPUIndices; + } +} + + // JOB: -// +// // VolumePart // ProjectionPart // FP-or-BP @@ -76,7 +96,6 @@ namespace astra { // (First approach: 0.5/0.5) - bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split) { split.clear(); @@ -848,6 +867,260 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, const std::vector +static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter) +{ + CCompositeGeometryManager::CPart* output = iter->first; + const CCompositeGeometryManager::TJobList& L = iter->second; + + assert(!L.empty()); + + bool zero = L.begin()->eMode == CCompositeGeometryManager::SJob::MODE_SET; + + size_t outx, outy, outz; + output->getDims(outx, outy, outz); + + if (L.begin()->eType == CCompositeGeometryManager::SJob::JOB_NOP) { + // just zero output? + if (zero) { + for (size_t z = 0; z < outz; ++z) { + for (size_t y = 0; y < outy; ++y) { + float* ptr = output->pData->getData(); + ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth(); + ptr += (y + output->subY) * (size_t)output->pData->getWidth(); + ptr += output->subX; + memset(ptr, 0, sizeof(float) * outx); + } + } + } + return true; + } + + + astraCUDA3d::SSubDimensions3D dstdims; + dstdims.nx = output->pData->getWidth(); + dstdims.pitch = dstdims.nx; + dstdims.ny = output->pData->getHeight(); + dstdims.nz = output->pData->getDepth(); + dstdims.subnx = outx; + dstdims.subny = outy; + dstdims.subnz = outz; + ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz); + dstdims.subx = output->subX; + dstdims.suby = output->subY; + dstdims.subz = output->subZ; + float *dst = output->pData->getData(); + + astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO); + bool ok = outputMem; + + for (CCompositeGeometryManager::TJobList::const_iterator i = L.begin(); i != L.end(); ++i) { + const CCompositeGeometryManager::SJob &j = *i; + + assert(j.pInput); + + CCudaProjector3D *projector = dynamic_cast(j.pProjector); + Cuda3DProjectionKernel projKernel = ker3d_default; + int detectorSuperSampling = 1; + int voxelSuperSampling = 1; + if (projector) { + projKernel = projector->getProjectionKernel(); + detectorSuperSampling = projector->getDetectorSuperSampling(); + voxelSuperSampling = projector->getVoxelSuperSampling(); + } + + size_t inx, iny, inz; + j.pInput->getDims(inx, iny, inz); + astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO); + + astraCUDA3d::SSubDimensions3D srcdims; + srcdims.nx = j.pInput->pData->getWidth(); + srcdims.pitch = srcdims.nx; + srcdims.ny = j.pInput->pData->getHeight(); + srcdims.nz = j.pInput->pData->getDepth(); + srcdims.subnx = inx; + srcdims.subny = iny; + srcdims.subnz = inz; + srcdims.subx = j.pInput->subX; + srcdims.suby = j.pInput->subY; + srcdims.subz = j.pInput->subZ; + const float *src = j.pInput->pData->getDataConst(); + + ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims); + if (!ok) ASTRA_ERROR("Error copying input data to GPU"); + + if (j.eType == CCompositeGeometryManager::SJob::JOB_FP) { + assert(dynamic_cast(j.pInput.get())); + assert(dynamic_cast(j.pOutput.get())); + + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP"); + + ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel); + if (!ok) ASTRA_ERROR("Error performing sub-FP"); + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); + } else if (j.eType == CCompositeGeometryManager::SJob::JOB_BP) { + assert(dynamic_cast(j.pOutput.get())); + assert(dynamic_cast(j.pInput.get())); + + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP"); + + ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling); + if (!ok) ASTRA_ERROR("Error performing sub-BP"); + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); + } else { + assert(false); + } + + ok = astraCUDA3d::freeGPUMemory(inputMem); + if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + + } + + ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims); + if (!ok) ASTRA_ERROR("Error copying output data from GPU"); + + ok = astraCUDA3d::freeGPUMemory(outputMem); + if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + + return true; +} + + +class WorkQueue { +public: + WorkQueue(CCompositeGeometryManager::TJobSet &_jobs) : m_jobs(_jobs) { +#ifdef USE_PTHREADS + pthread_mutex_init(&m_mutex, 0); +#endif + m_iter = m_jobs.begin(); + } + bool receive(CCompositeGeometryManager::TJobSet::const_iterator &i) { + lock(); + + if (m_iter == m_jobs.end()) { + unlock(); + return false; + } + + i = m_iter++; + + unlock(); + + return true; + } +#ifdef USE_PTHREADS + void lock() { + // TODO: check mutex op return values + pthread_mutex_lock(&m_mutex); + } + void unlock() { + // TODO: check mutex op return values + pthread_mutex_unlock(&m_mutex); + } +#else + void lock() { + m_mutex.lock(); + } + void unlock() { + m_mutex.unlock(); + } +#endif + +private: + CCompositeGeometryManager::TJobSet &m_jobs; + CCompositeGeometryManager::TJobSet::const_iterator m_iter; +#ifdef USE_PTHREADS + pthread_mutex_t m_mutex; +#else + boost::mutex m_mutex; +#endif +}; + +struct WorkThreadInfo { + WorkQueue* m_queue; + unsigned int m_iGPU; +}; + +#ifndef USE_PTHREADS + +void runEntries_boost(WorkThreadInfo* info) +{ + ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU); + CCompositeGeometryManager::TJobSet::const_iterator i; + while (info->m_queue->receive(i)) { + ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU); + astraCUDA3d::setGPUIndex(info->m_iGPU); + boost::this_thread::interruption_point(); + doJob(i); + boost::this_thread::interruption_point(); + } + ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU); +} + + +#else + +void* runEntries_pthreads(void* data) { + WorkThreadInfo* info = (WorkThreadInfo*)data; + + ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU); + + CCompositeGeometryManager::TJobSet::const_iterator i; + + while (info->m_queue->receive(i)) { + ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU); + astraCUDA3d::setGPUIndex(info->m_iGPU); + pthread_testcancel(); + doJob(i); + pthread_testcancel(); + } + ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU); + + return 0; +} + +#endif + + +void runWorkQueue(WorkQueue &queue, const std::vector & iGPUIndices) { + int iThreadCount = iGPUIndices.size(); + + std::vector infos; +#ifdef USE_PTHREADS + std::vector threads; +#else + std::vector threads; +#endif + infos.resize(iThreadCount); + threads.resize(iThreadCount); + + for (int i = 0; i < iThreadCount; ++i) { + infos[i].m_queue = &queue; + infos[i].m_iGPU = iGPUIndices[i]; +#ifdef USE_PTHREADS + pthread_create(&threads[i], 0, runEntries_pthreads, (void*)&infos[i]); +#else + threads[i] = new boost::thread(runEntries_boost, &infos[i]); +#endif + } + + // Wait for them to finish + for (int i = 0; i < iThreadCount; ++i) { +#ifdef USE_PTHREADS + pthread_join(threads[i], 0); +#else + threads[i]->join(); + delete threads[i]; + threads[i] = 0; +#endif + } +} + + +void CCompositeGeometryManager::setGPUIndices(const std::vector& GPUIndices) +{ + m_GPUIndices = GPUIndices; +} + bool CCompositeGeometryManager::doJobs(TJobList &jobs) { ASTRA_DEBUG("CCompositeGeometryManager::doJobs"); @@ -859,140 +1132,53 @@ bool CCompositeGeometryManager::doJobs(TJobList &jobs) jobset[i->pOutput.get()].push_back(*i); } - size_t maxSize = astraCUDA3d::availableGPUMemory(); + size_t maxSize = m_iMaxSize; if (maxSize == 0) { - ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB."); - maxSize = 1024 * 1024 * 1024; + // Get memory from first GPU. Not optimal... + if (!m_GPUIndices.empty()) + astraCUDA3d::setGPUIndex(m_GPUIndices[0]); + maxSize = astraCUDA3d::availableGPUMemory(); + if (maxSize == 0) { + ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB."); + maxSize = 1024 * 1024 * 1024; + } else { + ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize); + } } else { - ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize); + ASTRA_DEBUG("Set to %lu bytes of GPU memory", maxSize); } maxSize = (maxSize * 9) / 10; maxSize /= sizeof(float); int div = 1; - - // TODO: Multi-GPU support + if (!m_GPUIndices.empty()) + div = m_GPUIndices.size(); // Split jobs to fit TJobSet split; splitJobs(jobset, maxSize, div, split); jobset.clear(); - // Run jobs - - for (TJobSet::iterator iter = split.begin(); iter != split.end(); ++iter) { - - CPart* output = iter->first; - TJobList& L = iter->second; - - assert(!L.empty()); + if (m_GPUIndices.size() <= 1) { - bool zero = L.begin()->eMode == SJob::MODE_SET; + // Run jobs + ASTRA_DEBUG("Running single-threaded"); - size_t outx, outy, outz; - output->getDims(outx, outy, outz); + if (!m_GPUIndices.empty()) + astraCUDA3d::setGPUIndex(m_GPUIndices[0]); - if (L.begin()->eType == SJob::JOB_NOP) { - // just zero output? - if (zero) { - for (size_t z = 0; z < outz; ++z) { - for (size_t y = 0; y < outy; ++y) { - float* ptr = output->pData->getData(); - ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth(); - ptr += (y + output->subY) * (size_t)output->pData->getWidth(); - ptr += output->subX; - memset(ptr, 0, sizeof(float) * outx); - } - } - } - continue; + for (TJobSet::const_iterator iter = split.begin(); iter != split.end(); ++iter) { + doJob(iter); } + } else { - astraCUDA3d::SSubDimensions3D dstdims; - dstdims.nx = output->pData->getWidth(); - dstdims.pitch = dstdims.nx; - dstdims.ny = output->pData->getHeight(); - dstdims.nz = output->pData->getDepth(); - dstdims.subnx = outx; - dstdims.subny = outy; - dstdims.subnz = outz; - ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz); - dstdims.subx = output->subX; - dstdims.suby = output->subY; - dstdims.subz = output->subZ; - float *dst = output->pData->getData(); - - astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO); - bool ok = outputMem; - - for (TJobList::iterator i = L.begin(); i != L.end(); ++i) { - SJob &j = *i; - - assert(j.pInput); - - CCudaProjector3D *projector = dynamic_cast(j.pProjector); - Cuda3DProjectionKernel projKernel = ker3d_default; - int detectorSuperSampling = 1; - int voxelSuperSampling = 1; - if (projector) { - projKernel = projector->getProjectionKernel(); - detectorSuperSampling = projector->getDetectorSuperSampling(); - voxelSuperSampling = projector->getVoxelSuperSampling(); - } - - size_t inx, iny, inz; - j.pInput->getDims(inx, iny, inz); - astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO); - - astraCUDA3d::SSubDimensions3D srcdims; - srcdims.nx = j.pInput->pData->getWidth(); - srcdims.pitch = srcdims.nx; - srcdims.ny = j.pInput->pData->getHeight(); - srcdims.nz = j.pInput->pData->getDepth(); - srcdims.subnx = inx; - srcdims.subny = iny; - srcdims.subnz = inz; - srcdims.subx = j.pInput->subX; - srcdims.suby = j.pInput->subY; - srcdims.subz = j.pInput->subZ; - const float *src = j.pInput->pData->getDataConst(); - - ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims); - if (!ok) ASTRA_ERROR("Error copying input data to GPU"); - - if (j.eType == SJob::JOB_FP) { - assert(dynamic_cast(j.pInput.get())); - assert(dynamic_cast(j.pOutput.get())); - - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP"); - - ok = astraCUDA3d::FP(((CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel); - if (!ok) ASTRA_ERROR("Error performing sub-FP"); - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); - } else if (j.eType == SJob::JOB_BP) { - assert(dynamic_cast(j.pOutput.get())); - assert(dynamic_cast(j.pInput.get())); - - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP"); - - ok = astraCUDA3d::BP(((CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling); - if (!ok) ASTRA_ERROR("Error performing sub-BP"); - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); - } else { - assert(false); - } + ASTRA_DEBUG("Running multi-threaded"); - ok = astraCUDA3d::freeGPUMemory(inputMem); - if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + WorkQueue wq(split); - } + runWorkQueue(wq, m_GPUIndices); - ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims); - if (!ok) ASTRA_ERROR("Error copying output data from GPU"); - - ok = astraCUDA3d::freeGPUMemory(outputMem); - if (!ok) ASTRA_ERROR("Error freeing GPU memory"); } return true; @@ -1000,6 +1186,26 @@ bool CCompositeGeometryManager::doJobs(TJobList &jobs) + +//static +void CCompositeGeometryManager::setGlobalGPUParams(const SGPUParams& params) +{ + delete s_params; + + s_params = new SGPUParams; + *s_params = params; + + ASTRA_DEBUG("CompositeGeometryManager: Setting global GPU params:"); + std::ostringstream s; + s << "GPU indices:"; + for (unsigned int i = 0; i < params.GPUIndices.size(); ++i) + s << " " << params.GPUIndices[i]; + std::string ss = s.str(); + ASTRA_DEBUG(ss.c_str()); + ASTRA_DEBUG("Memory: %llu", params.memory); +} + + } #endif -- cgit v1.2.3 From 3743fdc534b39958c105f4124ad1130d3e8b042a Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Tue, 16 Feb 2016 17:53:24 +0100 Subject: Query max texture size instead of hardcoding it --- cuda/3d/mem3d.cu | 19 +++++++++++++++++++ cuda/3d/mem3d.h | 1 + src/CompositeGeometryManager.cpp | 12 ++++++------ 3 files changed, 26 insertions(+), 6 deletions(-) (limited to 'cuda') diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu index 6d81dc0..0320117 100644 --- a/cuda/3d/mem3d.cu +++ b/cuda/3d/mem3d.cu @@ -62,6 +62,25 @@ size_t availableGPUMemory() return free; } +int maxBlockDimension() +{ + int dev; + cudaError_t err = cudaGetDevice(&dev); + if (err != cudaSuccess) { + ASTRA_WARN("Error querying device"); + return 0; + } + + cudaDeviceProp props; + err = cudaGetDeviceProperties(&props, dev); + if (err != cudaSuccess) { + ASTRA_WARN("Error querying device %d properties", dev); + return 0; + } + + return std::min(props.maxTexture3D[0], std::min(props.maxTexture3D[1], props.maxTexture3D[2])); +} + MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero) { SMemHandle3D_internal hnd; diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h index acb72cb..6fff80b 100644 --- a/cuda/3d/mem3d.h +++ b/cuda/3d/mem3d.h @@ -78,6 +78,7 @@ enum Mem3DZeroMode { }; size_t availableGPUMemory(); +int maxBlockDimension(); MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero); diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp index cafc452..c9cbaaa 100644 --- a/src/CompositeGeometryManager.cpp +++ b/src/CompositeGeometryManager.cpp @@ -55,9 +55,6 @@ along with the ASTRA Toolbox. If not, see . namespace astra { -static const size_t MAX_BLOCK_DIM = 4096; - - SGPUParams* CCompositeGeometryManager::s_params = 0; CCompositeGeometryManager::CCompositeGeometryManager() @@ -102,6 +99,9 @@ CCompositeGeometryManager::CCompositeGeometryManager() bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split) { + int maxBlockDim = astraCUDA3d::maxBlockDimension(); + ASTRA_DEBUG("Found max block dim %d", maxBlockDim); + split.clear(); for (TJobSet::const_iterator i = jobs.begin(); i != jobs.end(); ++i) @@ -159,17 +159,17 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div size_t remainingSize = ( maxSize - outputPart->getSize() ) / 2; TPartList splitInput; - input->splitZ(splitInput, remainingSize, MAX_BLOCK_DIM, 1); + input->splitZ(splitInput, remainingSize, maxBlockDim, 1); delete input; TPartList splitInput2; for (TPartList::iterator i_in = splitInput.begin(); i_in != splitInput.end(); ++i_in) { boost::shared_ptr inputPart = *i_in; - inputPart.get()->splitX(splitInput2, SIZE_MAX, MAX_BLOCK_DIM, 1); + inputPart.get()->splitX(splitInput2, SIZE_MAX, maxBlockDim, 1); } splitInput.clear(); for (TPartList::iterator i_in = splitInput2.begin(); i_in != splitInput2.end(); ++i_in) { boost::shared_ptr inputPart = *i_in; - inputPart.get()->splitY(splitInput, SIZE_MAX, MAX_BLOCK_DIM, 1); + inputPart.get()->splitY(splitInput, SIZE_MAX, maxBlockDim, 1); } splitInput2.clear(); -- cgit v1.2.3 From 5edb35edc2c721b458334a65512b534912c2c542 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 23 Mar 2016 15:30:56 +0100 Subject: Add relaxation parameters to SIRT, SART --- cuda/2d/sart.cu | 7 +++++-- cuda/2d/sart.h | 4 ++++ cuda/2d/sirt.cu | 8 ++++++++ cuda/2d/sirt.h | 4 ++++ include/astra/CudaSartAlgorithm.h | 10 ++++++++++ include/astra/CudaSirtAlgorithm.h | 6 ++++++ include/astra/DataProjectorPolicies.h | 4 +++- include/astra/DataProjectorPolicies.inl | 6 ++++-- include/astra/SartAlgorithm.h | 8 ++++++-- include/astra/SirtAlgorithm.h | 9 ++++++++- src/CudaSartAlgorithm.cpp | 17 ++++++++++++++++- src/CudaSirtAlgorithm.cpp | 6 ++++++ src/SartAlgorithm.cpp | 8 +++++++- src/SirtAlgorithm.cpp | 11 +++++++++-- 14 files changed, 96 insertions(+), 12 deletions(-) (limited to 'cuda') diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu index e5cb5bb..c8608a3 100644 --- a/cuda/2d/sart.cu +++ b/cuda/2d/sart.cu @@ -71,6 +71,8 @@ SART::SART() : ReconAlgo() projectionCount = 0; iteration = 0; customOrder = false; + + fRelaxation = 1.0f; } @@ -98,6 +100,7 @@ void SART::reset() projectionCount = 0; iteration = 0; customOrder = false; + fRelaxation = 1.0f; ReconAlgo::reset(); } @@ -200,10 +203,10 @@ bool SART::iterate(unsigned int iterations) // BP, mask, and add back // TODO: Try putting the masking directly in the BP zeroVolumeData(D_tmpData, tmpPitch, dims); - callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, 1.0f); + callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, fRelaxation); processVol(D_volumeData, D_maskData, D_tmpData, volumePitch, dims); } else { - callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, 1.0f); + callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, fRelaxation); } if (useMinConstraint) diff --git a/cuda/2d/sart.h b/cuda/2d/sart.h index 7dcd641..eff9ecf 100644 --- a/cuda/2d/sart.h +++ b/cuda/2d/sart.h @@ -50,6 +50,8 @@ public: virtual float computeDiffNorm(); + void setRelaxation(float r) { fRelaxation = r; } + protected: void reset(); bool precomputeWeights(); @@ -78,6 +80,8 @@ protected: // Geometry-specific precomputed data float* D_lineWeight; unsigned int linePitch; + + float fRelaxation; }; } diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu index 162ee77..4baaccb 100644 --- a/cuda/2d/sirt.cu +++ b/cuda/2d/sirt.cu @@ -50,6 +50,8 @@ SIRT::SIRT() : ReconAlgo() D_minMaskData = 0; D_maxMaskData = 0; + fRelaxation = 1.0f; + freeMinMaxMasks = false; } @@ -83,6 +85,8 @@ void SIRT::reset() useVolumeMask = false; useSinogramMask = false; + fRelaxation = 1.0f; + ReconAlgo::reset(); } @@ -139,6 +143,9 @@ bool SIRT::precomputeWeights() processVol(D_pixelWeight, D_maskData, pixelPitch, dims); } + // Also fold the relaxation factor into pixel weights + processVol(D_pixelWeight, fRelaxation, pixelPitch, dims); + return true; } @@ -253,6 +260,7 @@ bool SIRT::iterate(unsigned int iterations) callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f); + // pixel weights also contain the volume mask and relaxation factor processVol(D_volumeData, D_pixelWeight, D_tmpData, volumePitch, dims); if (useMinConstraint) diff --git a/cuda/2d/sirt.h b/cuda/2d/sirt.h index 21094a1..bc913f4 100644 --- a/cuda/2d/sirt.h +++ b/cuda/2d/sirt.h @@ -53,6 +53,8 @@ public: bool uploadMinMaxMasks(const float* minMaskData, const float* maxMaskData, unsigned int pitch); + void setRelaxation(float r) { fRelaxation = r; } + virtual bool iterate(unsigned int iterations); virtual float computeDiffNorm(); @@ -81,6 +83,8 @@ protected: unsigned int minMaskPitch; float* D_maxMaskData; unsigned int maxMaskPitch; + + float fRelaxation; }; bool doSIRT(float* D_volumeData, unsigned int volumePitch, diff --git a/include/astra/CudaSartAlgorithm.h b/include/astra/CudaSartAlgorithm.h index c22dc4f..e5e18f0 100644 --- a/include/astra/CudaSartAlgorithm.h +++ b/include/astra/CudaSartAlgorithm.h @@ -46,6 +46,7 @@ namespace astra { * \astra_xml_item{ProjectionDataId, integer, Identifier of a projection data object as it is stored in the DataManager.} * \astra_xml_item{ReconstructionDataId, integer, Identifier of a volume data object as it is stored in the DataManager.} * \astra_xml_item_option{ReconstructionMaskId, integer, not used, Identifier of a volume data object that acts as a reconstruction mask. 0 = reconstruct on this pixel. 1 = don't reconstruct on this pixel.} + * \astra_xml_item_option{Relaxation, float, 1, The relaxation factor.} * * \par MATLAB example * \astra_code{ @@ -53,6 +54,7 @@ namespace astra { * cfg.ProjectionDataId = sino_id;\n * cfg.ReconstructionDataId = recon_id;\n * cfg.option.ReconstructionMaskId = mask_id;\n + * cfg.option.Relaxation = 1.0;\n * alg_id = astra_mex_algorithm('create'\, cfg);\n * astra_mex_algorithm('iterate'\, alg_id\, 10);\n * astra_mex_algorithm('delete'\, alg_id);\n @@ -97,6 +99,14 @@ public: * @return description string */ virtual std::string description() const; + +protected: + + /** Relaxation factor + */ + float m_fLambda; + + virtual void initCUDAAlgorithm(); }; // inline functions diff --git a/include/astra/CudaSirtAlgorithm.h b/include/astra/CudaSirtAlgorithm.h index 91cc206..3cd8acc 100644 --- a/include/astra/CudaSirtAlgorithm.h +++ b/include/astra/CudaSirtAlgorithm.h @@ -53,6 +53,7 @@ namespace astra { * \astra_xml_item{ProjectionDataId, integer, Identifier of a projection data object as it is stored in the DataManager.} * \astra_xml_item{ReconstructionDataId, integer, Identifier of a volume data object as it is stored in the DataManager.} * \astra_xml_item_option{ReconstructionMaskId, integer, not used, Identifier of a volume data object that acts as a reconstruction mask. 0 = reconstruct on this pixel. 1 = don't reconstruct on this pixel.} + * \astra_xml_item_option{Relaxation, float, 1, Relaxation parameter.} * * \par MATLAB example * \astra_code{ @@ -62,6 +63,7 @@ namespace astra { * cfg.ProjectionDataId = sino_id;\n * cfg.ReconstructionDataId = recon_id;\n * cfg.option.ReconstructionMaskId = mask_id;\n + * cfg.option.Relaxation = 1.0;\n * alg_id = astra_mex_algorithm('create'\, cfg);\n * astra_mex_algorithm('iterate'\, alg_id\, 10);\n * astra_mex_algorithm('delete'\, alg_id);\n @@ -113,6 +115,10 @@ protected: CFloat32VolumeData2D* m_pMinMask; CFloat32VolumeData2D* m_pMaxMask; + /** Relaxation factor + */ + float m_fLambda; + virtual void initCUDAAlgorithm(); }; diff --git a/include/astra/DataProjectorPolicies.h b/include/astra/DataProjectorPolicies.h index c258f5c..acfb36f 100644 --- a/include/astra/DataProjectorPolicies.h +++ b/include/astra/DataProjectorPolicies.h @@ -319,10 +319,12 @@ class SIRTBPPolicy { CFloat32ProjectionData2D* m_pTotalRayLength; CFloat32VolumeData2D* m_pTotalPixelWeight; + float m_fRelaxation; + public: FORCEINLINE SIRTBPPolicy(); - FORCEINLINE SIRTBPPolicy(CFloat32VolumeData2D* _pReconstruction, CFloat32ProjectionData2D* _pSinogram, CFloat32VolumeData2D* _pTotalPixelWeight, CFloat32ProjectionData2D* _pTotalRayLength); + FORCEINLINE SIRTBPPolicy(CFloat32VolumeData2D* _pReconstruction, CFloat32ProjectionData2D* _pSinogram, CFloat32VolumeData2D* _pTotalPixelWeight, CFloat32ProjectionData2D* _pTotalRayLength, float _fRelaxation); FORCEINLINE ~SIRTBPPolicy(); FORCEINLINE bool rayPrior(int _iRayIndex); diff --git a/include/astra/DataProjectorPolicies.inl b/include/astra/DataProjectorPolicies.inl index 0c0eddd..f300761 100644 --- a/include/astra/DataProjectorPolicies.inl +++ b/include/astra/DataProjectorPolicies.inl @@ -712,12 +712,14 @@ SIRTBPPolicy::SIRTBPPolicy() SIRTBPPolicy::SIRTBPPolicy(CFloat32VolumeData2D* _pReconstruction, CFloat32ProjectionData2D* _pSinogram, CFloat32VolumeData2D* _pTotalPixelWeight, - CFloat32ProjectionData2D* _pTotalRayLength) + CFloat32ProjectionData2D* _pTotalRayLength, + float _fRelaxation) { m_pReconstruction = _pReconstruction; m_pSinogram = _pSinogram; m_pTotalPixelWeight = _pTotalPixelWeight; m_pTotalRayLength = _pTotalRayLength; + m_fRelaxation = _fRelaxation; } //---------------------------------------------------------------------------------------- SIRTBPPolicy::~SIRTBPPolicy() @@ -739,7 +741,7 @@ void SIRTBPPolicy::addWeight(int _iRayIndex, int _iVolumeIndex, float32 _fWeight { float32 fGammaBeta = m_pTotalPixelWeight->getData()[_iVolumeIndex] * m_pTotalRayLength->getData()[_iRayIndex]; if ((fGammaBeta > 0.001f) || (fGammaBeta < -0.001f)) { - m_pReconstruction->getData()[_iVolumeIndex] += _fWeight * m_pSinogram->getData()[_iRayIndex] / fGammaBeta; + m_pReconstruction->getData()[_iVolumeIndex] += _fWeight * m_fRelaxation * m_pSinogram->getData()[_iRayIndex] / fGammaBeta; } } //---------------------------------------------------------------------------------------- diff --git a/include/astra/SartAlgorithm.h b/include/astra/SartAlgorithm.h index eb4c61e..cdae029 100644 --- a/include/astra/SartAlgorithm.h +++ b/include/astra/SartAlgorithm.h @@ -49,7 +49,7 @@ namespace astra { * * The update step of pixel \f$v_j\f$ for projection \f$phi\f$ and iteration \f$k\f$ is given by: * \f[ - * v_j^{(k+1)} = v_j^{(k)} + \frac{\sum_{p_i \in P_\phi} \left( \lambda \frac{p_i - \sum_{r=1}^{N} w_{ir}v_r^{(k)}} {\sum_{r=1}^{N}w_{ir} } \right)} {\sum_{p_i \in P_\phi}w_{ij}} + * v_j^{(k+1)} = v_j^{(k)} + \lambda \frac{\sum_{p_i \in P_\phi} \left( \frac{p_i - \sum_{r=1}^{N} w_{ir}v_r^{(k)}} {\sum_{r=1}^{N}w_{ir} } \right)} {\sum_{p_i \in P_\phi}w_{ij}} * \f] * * \par XML Configuration @@ -64,6 +64,7 @@ namespace astra { * \astra_xml_item_option{MaxConstraintValue, float, 255, Maximum constraint value.} * \astra_xml_item_option{ProjectionOrder, string, "sequential", the order in which the projections are updated. 'sequential', 'random' or 'custom'} * \astra_xml_item_option{ProjectionOrderList, vector of float, not used, if ProjectionOrder='custom': use this order.} + * \astra_xml_item_option{Relaxation, float, 1, The relaxation parameter.} * * \par MATLAB example * \astra_code{ @@ -76,7 +77,8 @@ namespace astra { * cfg.option.UseMaxConstraint = 'yes';\n * cfg.option.MaxConstraintValue = 1024;\n * cfg.option.ProjectionOrder = 'custom';\n -* cfg.option.ProjectionOrderList = randperm(100);\n + * cfg.option.ProjectionOrderList = randperm(100);\n + * cfg.option.Relaxation = 1.0;\n * alg_id = astra_mex_algorithm('create'\, cfg);\n * astra_mex_algorithm('iterate'\, alg_id\, 10);\n * astra_mex_algorithm('delete'\, alg_id);\n @@ -215,6 +217,8 @@ protected: //< Current index in the projection order array. int m_iCurrentProjection; + //< Relaxation parameter + float m_fLambda; }; // inline functions diff --git a/include/astra/SirtAlgorithm.h b/include/astra/SirtAlgorithm.h index 05b3fa9..8044d09 100644 --- a/include/astra/SirtAlgorithm.h +++ b/include/astra/SirtAlgorithm.h @@ -49,7 +49,7 @@ namespace astra { * * The update step of pixel \f$v_j\f$ for iteration \f$k\f$ is given by: * \f[ - * v_j^{(k+1)} = v_j^{(k)} + \alpha \sum_{i=1}^{M} \left( \frac{w_{ij}\left( p_i - \sum_{r=1}^{N} w_{ir}v_r^{(k)}\right)}{\sum_{k=1}^{N} w_{ik}} \right) \frac{1}{\sum_{l=1}^{M}w_{lj}} + * v_j^{(k+1)} = v_j^{(k)} + \lambda \sum_{i=1}^{M} \left( \frac{w_{ij}\left( p_i - \sum_{r=1}^{N} w_{ir}v_r^{(k)}\right)}{\sum_{k=1}^{N} w_{ik}} \right) \frac{1}{\sum_{l=1}^{M}w_{lj}} * \f] * * \par XML Configuration @@ -62,6 +62,7 @@ namespace astra { * \astra_xml_item_option{MinConstraintValue, float, 0, Minimum constraint value.} * \astra_xml_item_option{UseMaxConstraint, bool, false, Use maximum value constraint.} * \astra_xml_item_option{MaxConstraintValue, float, 255, Maximum constraint value.} + * \astra_xml_item_option{Relaxation, float, 1, The relaxation factor.} * * \par XML Example * \astra_code{ @@ -74,6 +75,7 @@ namespace astra { * <Option key="UseMinConstraint" value="yes"/>\n * <Option key="UseMaxConstraint" value="yes"/>\n * <Option key="MaxConstraintValue" value="1024"/>\n + * <Option key="Relaxation" value="1"/>\n * </Algorithm> * } * @@ -88,6 +90,7 @@ namespace astra { * cfg.option.UseMinConstraint = 'yes';\n * cfg.option.UseMaxConstraint = 'yes';\n * cfg.option.MaxConstraintValue = 1024;\n + * cfg.option.Relaxation = 1.0;\n * alg_id = astra_mex_algorithm('create'\, cfg);\n * astra_mex_algorithm('iterate'\, alg_id\, 10);\n * astra_mex_algorithm('delete'\, alg_id);\n @@ -136,6 +139,10 @@ protected: */ int m_iIterationCount; + /** Relaxation parameter + */ + float m_fLambda; + public: // type of the algorithm, needed to register with CAlgorithmFactory diff --git a/src/CudaSartAlgorithm.cpp b/src/CudaSartAlgorithm.cpp index d202847..bf97224 100644 --- a/src/CudaSartAlgorithm.cpp +++ b/src/CudaSartAlgorithm.cpp @@ -107,7 +107,8 @@ bool CCudaSartAlgorithm::initialize(const Config& _cfg) CC.markOptionParsed("ProjectionOrderList"); } - + m_fLambda = _cfg.self.getOptionNumerical("Relaxation", 1.0f); + CC.markOptionParsed("Relaxation"); return true; } @@ -123,12 +124,26 @@ bool CCudaSartAlgorithm::initialize(CProjector2D* _pProjector, if (!m_bIsInitialized) return false; + m_fLambda = 1.0f; + m_pAlgo = new astraCUDA::SART(); m_bAlgoInit = false; return true; } +//---------------------------------------------------------------------------------------- + +void CCudaSartAlgorithm::initCUDAAlgorithm() +{ + CCudaReconstructionAlgorithm2D::initCUDAAlgorithm(); + + astraCUDA::SART* pSart = dynamic_cast(m_pAlgo); + + pSart->setRelaxation(m_fLambda); +} + + } // namespace astra diff --git a/src/CudaSirtAlgorithm.cpp b/src/CudaSirtAlgorithm.cpp index 7beb30e..c8dc677 100644 --- a/src/CudaSirtAlgorithm.cpp +++ b/src/CudaSirtAlgorithm.cpp @@ -50,6 +50,8 @@ CCudaSirtAlgorithm::CCudaSirtAlgorithm() m_pMinMask = 0; m_pMaxMask = 0; + + m_fLambda = 1.0f; } //---------------------------------------------------------------------------------------- @@ -86,6 +88,8 @@ bool CCudaSirtAlgorithm::initialize(const Config& _cfg) } CC.markOptionParsed("MaxMaskId"); + m_fLambda = _cfg.self.getOptionNumerical("Relaxation", 1.0f); + CC.markOptionParsed("Relaxation"); m_pAlgo = new astraCUDA::SIRT(); m_bAlgoInit = false; @@ -108,6 +112,7 @@ bool CCudaSirtAlgorithm::initialize(CProjector2D* _pProjector, m_pAlgo = new astraCUDA::SIRT(); m_bAlgoInit = false; + m_fLambda = 1.0f; return true; } @@ -130,6 +135,7 @@ void CCudaSirtAlgorithm::initCUDAAlgorithm() ASTRA_ASSERT(ok); } + pSirt->setRelaxation(m_fLambda); } diff --git a/src/SartAlgorithm.cpp b/src/SartAlgorithm.cpp index 9346160..403f851 100644 --- a/src/SartAlgorithm.cpp +++ b/src/SartAlgorithm.cpp @@ -151,6 +151,9 @@ bool CSartAlgorithm::initialize(const Config& _cfg) CC.markOptionParsed("ProjectionOrderList"); } + m_fLambda = _cfg.self.getOptionNumerical("Relaxation", 1.0f); + CC.markOptionParsed("Relaxation"); + // create data objects m_pTotalRayLength = new CFloat32ProjectionData2D(m_pProjector->getProjectionGeometry()); m_pTotalPixelWeight = new CFloat32VolumeData2D(m_pProjector->getVolumeGeometry()); @@ -246,6 +249,7 @@ map CSartAlgorithm::getInformation() { map res; res["ProjectionOrder"] = getInformation("ProjectionOrder"); + res["Relaxation"] = getInformation("Relaxation"); return mergeMap(CReconstructionAlgorithm2D::getInformation(), res); }; @@ -253,6 +257,8 @@ map CSartAlgorithm::getInformation() // Information - Specific boost::any CSartAlgorithm::getInformation(std::string _sIdentifier) { + if (_sIdentifier == "Relaxation") + return m_fLambda; if (_sIdentifier == "ProjectionOrder") { vector res; for (int i = 0; i < m_iProjectionCount; i++) { @@ -286,7 +292,7 @@ void CSartAlgorithm::run(int _iNrIterations) m_pProjector, SinogramMaskPolicy(m_pSinogramMask), // sinogram mask ReconstructionMaskPolicy(m_pReconstructionMask), // reconstruction mask - SIRTBPPolicy(m_pReconstruction, m_pDiffSinogram, m_pTotalPixelWeight, m_pTotalRayLength), // SIRT backprojection + SIRTBPPolicy(m_pReconstruction, m_pDiffSinogram, m_pTotalPixelWeight, m_pTotalRayLength, m_fLambda), // SIRT backprojection m_bUseSinogramMask, m_bUseReconstructionMask, true // options on/off ); diff --git a/src/SirtAlgorithm.cpp b/src/SirtAlgorithm.cpp index d9f3a65..ff25648 100644 --- a/src/SirtAlgorithm.cpp +++ b/src/SirtAlgorithm.cpp @@ -76,6 +76,7 @@ void CSirtAlgorithm::_clear() m_pDiffSinogram = NULL; m_pTmpVolume = NULL; + m_fLambda = 1.0f; m_iIterationCount = 0; } @@ -91,6 +92,7 @@ void CSirtAlgorithm::clear() ASTRA_DELETE(m_pDiffSinogram); ASTRA_DELETE(m_pTmpVolume); + m_fLambda = 1.0f; m_iIterationCount = 0; } @@ -128,6 +130,9 @@ bool CSirtAlgorithm::initialize(const Config& _cfg) return false; } + m_fLambda = _cfg.self.getOptionNumerical("Relaxation", 1.0f); + CC.markOptionParsed("Relaxation"); + // init data objects and data projectors _init(); @@ -152,6 +157,8 @@ bool CSirtAlgorithm::initialize(CProjector2D* _pProjector, m_pSinogram = _pSinogram; m_pReconstruction = _pReconstruction; + m_fLambda = 1.0f; + // init data objects and data projectors _init(); @@ -248,7 +255,7 @@ void CSirtAlgorithm::run(int _iNrIterations) x = 1.0f / x; else x = 0.0f; - pfT[i] = x; + pfT[i] = m_fLambda * x; } pfT = m_pTotalRayLength->getData(); for (int i = 0; i < m_pTotalRayLength->getSize(); ++i) { @@ -296,7 +303,7 @@ void CSirtAlgorithm::run(int _iNrIterations) m_pTmpVolume->setData(0.0f); pBackProjector->project(); - // divide by pixel weights + // multiply with relaxation factor divided by pixel weights (*m_pTmpVolume) *= (*m_pTotalPixelWeight); (*m_pReconstruction) += (*m_pTmpVolume); -- cgit v1.2.3 From 16430239d04ff738a21146c410918c285552543f Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 23 Mar 2016 15:50:24 +0100 Subject: Add relaxation parameters to SIRT3D --- cuda/3d/astra3d.cu | 13 +++++++++++++ cuda/3d/astra3d.h | 2 ++ cuda/3d/sirt3d.cu | 8 +++++++- cuda/3d/sirt3d.h | 5 +++++ include/astra/CudaSirtAlgorithm3D.h | 3 ++- src/CudaSirtAlgorithm3D.cpp | 8 ++++++++ 6 files changed, 37 insertions(+), 2 deletions(-) (limited to 'cuda') diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 8328229..5670873 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -267,6 +267,7 @@ public: float fOriginDetectorDistance; float fSourceZ; float fDetSize; + float fRelaxation; SConeProjection* projs; SPar3DProjection* parprojs; @@ -311,6 +312,8 @@ AstraSIRT3d::AstraSIRT3d() pData->parprojs = 0; pData->fOutputScale = 1.0f; + pData->fRelaxation = 1.0f; + pData->initialized = false; pData->setStartReconstruction = false; @@ -389,6 +392,14 @@ bool AstraSIRT3d::enableSuperSampling(unsigned int iVoxelSuperSampling, return true; } +void AstraSIRT3d::setRelaxation(float r) +{ + if (pData->initialized) + return; + + pData->fRelaxation = r; +} + bool AstraSIRT3d::enableVolumeMask() { if (pData->initialized) @@ -448,6 +459,8 @@ bool AstraSIRT3d::init() if (!ok) return false; + pData->sirt.setRelaxation(pData->fRelaxation); + pData->D_volumeData = allocateVolumeData(pData->dims); ok = pData->D_volumeData.ptr; if (!ok) diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index 2782994..2137587 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -68,6 +68,8 @@ public: bool enableSuperSampling(unsigned int iVoxelSuperSampling, unsigned int iDetectorSuperSampling); + void setRelaxation(float r); + // Enable volume/sinogram masks // // This may optionally be called before init(). diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu index 484521e..713944b 100644 --- a/cuda/3d/sirt3d.cu +++ b/cuda/3d/sirt3d.cu @@ -59,6 +59,8 @@ SIRT::SIRT() : ReconAlgo3D() useMinConstraint = false; useMaxConstraint = false; + + fRelaxation = 1.0f; } @@ -89,6 +91,8 @@ void SIRT::reset() useVolumeMask = false; useSinogramMask = false; + fRelaxation = 1.0f; + ReconAlgo3D::reset(); } @@ -196,6 +200,8 @@ bool SIRT::precomputeWeights() // scale pixel weights with mask to zero out masked pixels processVol3D(D_pixelWeight, D_maskData, dims); } + processVol3D(D_pixelWeight, fRelaxation, dims); + return true; } @@ -307,7 +313,7 @@ bool SIRT::iterate(unsigned int iterations) } #endif - + // pixel weights also contain the volume mask and relaxation factor processVol3D(D_volumeData, D_tmpData, D_pixelWeight, dims); if (useMinConstraint) diff --git a/cuda/3d/sirt3d.h b/cuda/3d/sirt3d.h index bb3864a..5e93deb 100644 --- a/cuda/3d/sirt3d.h +++ b/cuda/3d/sirt3d.h @@ -48,6 +48,9 @@ public: // init should be called after setting all geometry bool init(); + // Set relaxation factor. This may be called after init and before iterate. + void setRelaxation(float r) { fRelaxation = r; } + // setVolumeMask should be called after init and before iterate, // but only if enableVolumeMask was called before init. // It may be called again after iterate. @@ -91,6 +94,8 @@ protected: float fMinConstraint; float fMaxConstraint; + float fRelaxation; + cudaPitchedPtr D_maskData; cudaPitchedPtr D_smaskData; diff --git a/include/astra/CudaSirtAlgorithm3D.h b/include/astra/CudaSirtAlgorithm3D.h index 379720e..60191cd 100644 --- a/include/astra/CudaSirtAlgorithm3D.h +++ b/include/astra/CudaSirtAlgorithm3D.h @@ -50,7 +50,7 @@ class AstraSIRT3d; * * The update step of pixel \f$v_j\f$ for iteration \f$k\f$ is given by: * \f[ - * v_j^{(k+1)} = v_j^{(k)} + \alpha \sum_{i=1}^{M} \left( \frac{w_{ij}\left( p_i - \sum_{r=1}^{N} w_{ir}v_r^{(k)}\right)}{\sum_{k=1}^{N} w_{ik}} \right) \frac{1}{\sum_{l=1}^{M}w_{lj}} + * v_j^{(k+1)} = v_j^{(k)} + \lambda \sum_{i=1}^{M} \left( \frac{w_{ij}\left( p_i - \sum_{r=1}^{N} w_{ir}v_r^{(k)}\right)}{\sum_{k=1}^{N} w_{ik}} \right) \frac{1}{\sum_{l=1}^{M}w_{lj}} * \f] * * \par XML Configuration @@ -175,6 +175,7 @@ protected: bool m_bAstraSIRTInit; int m_iDetectorSuperSampling; int m_iVoxelSuperSampling; + float m_fLambda; void initializeFromProjector(); }; diff --git a/src/CudaSirtAlgorithm3D.cpp b/src/CudaSirtAlgorithm3D.cpp index 605c470..c819f8e 100644 --- a/src/CudaSirtAlgorithm3D.cpp +++ b/src/CudaSirtAlgorithm3D.cpp @@ -56,6 +56,7 @@ CCudaSirtAlgorithm3D::CCudaSirtAlgorithm3D() m_iGPUIndex = -1; m_iVoxelSuperSampling = 1; m_iDetectorSuperSampling = 1; + m_fLambda = 1.0f; } //---------------------------------------------------------------------------------------- @@ -128,6 +129,8 @@ bool CCudaSirtAlgorithm3D::initialize(const Config& _cfg) return false; } + m_fLambda = _cfg.self.getOptionNumerical("Relaxation"); + initializeFromProjector(); // Deprecated options @@ -135,6 +138,7 @@ bool CCudaSirtAlgorithm3D::initialize(const Config& _cfg) m_iDetectorSuperSampling = (int)_cfg.self.getOptionNumerical("DetectorSuperSampling", m_iDetectorSuperSampling); m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", m_iGPUIndex); m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex); + CC.markOptionParsed("VoxelSuperSampling"); CC.markOptionParsed("DetectorSuperSampling"); CC.markOptionParsed("GPUIndex"); @@ -164,6 +168,8 @@ bool CCudaSirtAlgorithm3D::initialize(CProjector3D* _pProjector, clear(); } + m_fLambda = 1.0f; + // required classes m_pProjector = _pProjector; m_pSinogram = _pSinogram; @@ -224,6 +230,8 @@ void CCudaSirtAlgorithm3D::run(int _iNrIterations) ASTRA_ASSERT(ok); + m_pSirt->setRelaxation(m_fLambda); + m_bAstraSIRTInit = true; } -- cgit v1.2.3 From 558820f1421e79b32e542c133c83683e58df6d5e Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Fri, 24 Jun 2016 15:28:47 +0200 Subject: Compute FBP filter in spatial domain --- cuda/2d/fft.cu | 46 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) (limited to 'cuda') diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 2bfd493..2d259a9 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -35,6 +35,7 @@ $Id$ #include #include "../../include/astra/Logging.h" +#include "astra/Fourier.h" using namespace astra; @@ -303,16 +304,48 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, float * pfFilt = new float[_iFFTFourierDetectorCount]; float * pfW = new float[_iFFTFourierDetectorCount]; + // We cache one Fourier transform for repeated FBP's of the same size + static float *pfData = 0; + static int iFilterCacheSize = 0; + + if (!pfData || iFilterCacheSize != _iFFTRealDetectorCount) { + // Compute filter in spatial domain + + delete[] pfData; + pfData = new float[2*_iFFTRealDetectorCount]; + int *ip = new int[int(2+sqrt(_iFFTRealDetectorCount)+1)]; + ip[0] = 0; + float32 *w = new float32[_iFFTRealDetectorCount/2]; + + for (int i = 0; i < _iFFTRealDetectorCount; ++i) { + pfData[2*i+1] = 0.0f; + + if (i & 1) { + int j = i; + if (2*j > _iFFTRealDetectorCount) + j = _iFFTRealDetectorCount - j; + float f = M_PI * j; + pfData[2*i] = -1 / (f*f); + } else { + pfData[2*i] = 0.0f; + } + } + + pfData[0] = 0.25f; + + cdft(2*_iFFTRealDetectorCount, -1, pfData, ip, w); + delete[] ip; + delete[] w; + + iFilterCacheSize = _iFFTRealDetectorCount; + } + for(int iDetectorIndex = 0; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++) { float fRelIndex = (float)iDetectorIndex / (float)_iFFTRealDetectorCount; - // filt = 2*( 0:(order/2) )./order; - pfFilt[iDetectorIndex] = 2.0f * fRelIndex; - //pfFilt[iDetectorIndex] = 1.0f; - - // w = 2*pi*(0:size(filt,2)-1)/order - pfW[iDetectorIndex] = 3.1415f * 2.0f * fRelIndex; + pfFilt[iDetectorIndex] = 2.0f * pfData[2*iDetectorIndex]; + pfW[iDetectorIndex] = M_PI * 2.0f * fRelIndex; } switch(_eFilter) @@ -866,5 +899,4 @@ void downloadDebugFilterReal(float * _pfHostSinogram, int _iProjectionCount, free(pfHostFilter); } - #endif -- cgit v1.2.3