From 048755bab6b77c1da0050ed091e5007a60564adf Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Tue, 8 Mar 2016 15:41:38 +0100 Subject: Use CompositeGeometryManager for FDK Also fix a number of scaling/weighting issues in FDK, and switch to standard cone_bp with FDKWeighting for the BP step. --- cuda/3d/astra3d.cu | 80 ----------------- cuda/3d/astra3d.h | 7 -- cuda/3d/fdk.cu | 250 +++++++++++++++++++++-------------------------------- cuda/3d/fdk.h | 8 +- cuda/3d/mem3d.cu | 28 ++++++ cuda/3d/mem3d.h | 1 + 6 files changed, 130 insertions(+), 244 deletions(-) (limited to 'cuda/3d') diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index dd6d551..91f4530 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -1291,84 +1291,4 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, -bool astraCudaFDK(float* pfVolume, const float* pfProjections, - const CVolumeGeometry3D* pVolGeom, - const CConeProjectionGeometry3D* pProjGeom, - bool bShortScan, - int iGPUIndex, int iVoxelSuperSampling) -{ - SDimensions3D dims; - SProjectorParams3D params; - - params.iRaysPerVoxelDim = iVoxelSuperSampling; - - bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); - - // TODO: Check that pVolGeom is normalized, since we don't support - // other volume geometries yet - - if (!ok) - return false; - - - if (iVoxelSuperSampling == 0) - return false; - - if (iGPUIndex != -1) { - cudaSetDevice(iGPUIndex); - cudaError_t err = cudaGetLastError(); - - // Ignore errors caused by calling cudaSetDevice multiple times - if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) - return false; - } - - cudaPitchedPtr D_volumeData = allocateVolumeData(dims); - ok = D_volumeData.ptr; - if (!ok) - return false; - - cudaPitchedPtr D_projData = allocateProjectionData(dims); - ok = D_projData.ptr; - if (!ok) { - cudaFree(D_volumeData.ptr); - return false; - } - - ok &= copyProjectionsToDevice(pfProjections, D_projData, dims, dims.iProjU); - - ok &= zeroVolumeData(D_volumeData, dims); - - if (!ok) { - cudaFree(D_volumeData.ptr); - cudaFree(D_projData.ptr); - return false; - } - - float fOriginSourceDistance = pProjGeom->getOriginSourceDistance(); - float fOriginDetectorDistance = pProjGeom->getOriginDetectorDistance(); - float fDetUSize = pProjGeom->getDetectorSpacingX(); - float fDetVSize = pProjGeom->getDetectorSpacingY(); - const float *pfAngles = pProjGeom->getProjectionAngles(); - - - // TODO: Offer interface for SrcZ, DetZ - // TODO: Voxel scaling - ok &= FDK(D_volumeData, D_projData, fOriginSourceDistance, - fOriginDetectorDistance, 0, 0, fDetUSize, fDetVSize, - dims, pfAngles, bShortScan); - - ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); - - - cudaFree(D_volumeData.ptr); - cudaFree(D_projData.ptr); - - return ok; - -} - - - - } diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index 3345ab8..93bf576 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -309,13 +309,6 @@ _AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProje const CProjectionGeometry3D* pProjGeom, int iGPUIndex, int iVoxelSuperSampling); -_AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections, - const CVolumeGeometry3D* pVolGeom, - const CConeProjectionGeometry3D* pProjGeom, - bool bShortScan, - int iGPUIndex, int iVoxelSuperSampling); - - } diff --git a/cuda/3d/fdk.cu b/cuda/3d/fdk.cu index 0e13be1..d847eee 100644 --- a/cuda/3d/fdk.cu +++ b/cuda/3d/fdk.cu @@ -41,8 +41,11 @@ $Id$ #include "dims3d.h" #include "arith3d.h" +#include "cone_bp.h" #include "../2d/fft.h" +#include "../../include/astra/Logging.h" + typedef texture texture3D; static texture3D gT_coneProjTexture; @@ -86,129 +89,7 @@ static bool bindProjDataTexture(const cudaArray* array) } -__global__ void devBP_FDK(void* D_volData, unsigned int volPitch, int startAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fInvDetUSize, float fInvDetVSize, const SDimensions3D dims) -{ - float* volData = (float*)D_volData; - - int endAngle = startAngle + g_anglesPerBlock; - if (endAngle > dims.iProjAngles) - endAngle = dims.iProjAngles; - - // threadIdx: x = rel x - // y = rel y - - // blockIdx: x = x + y - // y = z - - - // TO TRY: precompute part of detector intersection formulas in shared mem? - // TO TRY: inner loop over z, gather ray values in shared mem - - const int X = blockIdx.x % ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockX + threadIdx.x; - const int Y = blockIdx.x / ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockY + threadIdx.y; - - if (X > dims.iVolX) - return; - if (Y > dims.iVolY) - return; - - const int startZ = blockIdx.y * g_volBlockZ; - int endZ = startZ + g_volBlockZ; - if (endZ > dims.iVolZ) - endZ = dims.iVolZ; - - float fX = X - 0.5f*dims.iVolX + 0.5f; - float fY = Y - 0.5f*dims.iVolY + 0.5f; - float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - fSrcZ; - - const float fU_base = 0.5f*dims.iProjU - 0.5f + 0.5f; - const float fV_base = 0.5f*dims.iProjV - 0.5f + 0.5f + (fDetZ-fSrcZ); - - // Note re. fZ/rV_base: the computations below are all relative to the - // optical axis, so we do the Z-adjustments beforehand. - - for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) - { - - float fVal = 0.0f; - float fAngle = startAngle + 0.5f; - - for (int angle = startAngle; angle < endAngle; ++angle, fAngle += 1.0f) - { - - const float cos_theta = gC_angle_cos[angle]; - const float sin_theta = gC_angle_sin[angle]; - - const float fR = fSrcOrigin; - const float fD = fR - fX * sin_theta + fY * cos_theta; - float fWeight = fR / fD; - fWeight *= fWeight; - - const float fScaleFactor = (fR + fDetOrigin) / fD; - const float fU = fU_base + (fX*cos_theta+fY*sin_theta) * fScaleFactor * fInvDetUSize; - const float fV = fV_base + fZ * fScaleFactor * fInvDetVSize; - - fVal += tex3D(gT_coneProjTexture, fU, fAngle, fV); - - } - - volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal; -// projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU] = 10.0f; -// if (threadIdx.x == 0 && threadIdx.y == 0) { printf("%d,%d,%d [%d / %d] -> %f\n", angle, detectorU, detectorV, (angle*dims.iProjV+detectorV)*projPitch+detectorU, projPitch, projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU]); } - } - -} - - -bool FDK_BP(cudaPitchedPtr D_volumeData, - cudaPitchedPtr D_projData, - float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, - const SDimensions3D& dims, const float* angles) -{ - // transfer projections to array - - cudaArray* cuArray = allocateProjectionArray(dims); - transferProjectionsToArray(D_projData, cuArray, dims); - - bindProjDataTexture(cuArray); - - float* angle_sin = new float[dims.iProjAngles]; - float* angle_cos = new float[dims.iProjAngles]; - - for (unsigned int i = 0; i < dims.iProjAngles; ++i) { - angle_sin[i] = sinf(angles[i]); - angle_cos[i] = cosf(angles[i]); - } - cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_sin, angle_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_cos, angle_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - assert(e1 == cudaSuccess); - assert(e2 == cudaSuccess); - - delete[] angle_sin; - delete[] angle_cos; - - dim3 dimBlock(g_volBlockX, g_volBlockY); - - dim3 dimGrid(((dims.iVolX+g_volBlockX-1)/g_volBlockX)*((dims.iVolY+g_volBlockY-1)/g_volBlockY), (dims.iVolZ+g_volBlockZ-1)/g_volBlockZ); - - // timeval t; - // tic(t); - - for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { - devBP_FDK<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, 1.0f / fDetUSize, 1.0f / fDetVSize, dims); - } - - cudaTextForceKernelsCompletion(); - - cudaFreeArray(cuArray); - - // printf("%f\n", toc(t)); - - return true; -} - -__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, const SDimensions3D dims) +__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fZShift, float fDetUSize, float fDetVSize, const SDimensions3D dims) { float* projData = (float*)D_projData; int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y; @@ -230,21 +111,26 @@ __global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsig const float fT = fCentralRayLength * fCentralRayLength + fU * fU; - float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fDetZ - fSrcZ; + float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fZShift; + + //const float fW = fCentralRayLength; + //const float fW = fCentralRayLength * (M_PI / 2.0f) / (float)dims.iProjAngles; + const float fW1 = fSrcOrigin * fDetUSize * fDetVSize; + const float fW = fCentralRayLength * fW1 * fW1 * (M_PI / 2.0f) / (float)dims.iProjAngles; for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV) { const float fRayLength = sqrtf(fT + fV * fV); - const float fWeight = fCentralRayLength / fRayLength; + const float fWeight = fW / fRayLength; projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] *= fWeight; - fV += 1.0f; + fV += fDetVSize; } } -__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims) +__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims) { float* projData = (float*)D_projData; int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y; @@ -305,7 +191,7 @@ __global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, un // Perform the FDK pre-weighting and filtering bool FDK_PreWeight(cudaPitchedPtr D_projData, float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, + float fZShift, float fDetUSize, float fDetVSize, bool bShortScan, const SDimensions3D& dims, const float* angles) { @@ -318,23 +204,57 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData, int projPitch = D_projData.pitch/sizeof(float); - devFDK_preweight<<>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fDetVSize, dims); + devFDK_preweight<<>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fZShift, fDetUSize, fDetVSize, dims); cudaTextForceKernelsCompletion(); - if (bShortScan) { + if (bShortScan && dims.iProjAngles > 1) { + ASTRA_DEBUG("Doing Parker weighting"); // We do short-scan Parker weighting - cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, angles, + // First, determine (in a very basic way) the interval that's + // been scanned. We assume angles[0] is one of the endpoints of the + // range. + float fdA = angles[1] - angles[0]; + + while (fdA < -M_PI) + fdA += 2*M_PI; + while (fdA >= M_PI) + fdA -= 2*M_PI; + + float fAngleBase; + if (fdA >= 0.0f) { + // going up from angles[0] + fAngleBase = angles[dims.iProjAngles - 1]; + } else { + // going down from angles[0] + fAngleBase = angles[0]; + } + + // We pick the highest end of the range, and then + // move all angles so they fall in (-2pi,0] + + float *fRelAngles = new float[dims.iProjAngles]; + for (unsigned int i = 0; i < dims.iProjAngles; ++i) { + float f = angles[i] - fAngleBase; + while (f > 0) + f -= 2*M_PI; + while (f <= -2*M_PI) + f += 2*M_PI; + fRelAngles[i] = f; + + } + + cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, fRelAngles, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); assert(!e1); + delete[] fRelAngles; - // TODO: detector pixel size! - float fCentralFanAngle = atanf((dims.iProjU*0.5f) / + float fCentralFanAngle = atanf(fDetUSize * (dims.iProjU*0.5f) / (fSrcOrigin + fDetOrigin)); - devFDK_ParkerWeight<<>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fCentralFanAngle, dims); + devFDK_ParkerWeight<<>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fDetUSize, fCentralFanAngle, dims); } @@ -344,10 +264,7 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData, bool FDK_Filter(cudaPitchedPtr D_projData, cufftComplex * D_filter, - float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, - float fDetUSize, float fDetVSize, bool bShortScan, - const SDimensions3D& dims, const float* angles) + const SDimensions3D& dims) { // The filtering is a regular ramp filter per detector line. @@ -392,9 +309,8 @@ bool FDK_Filter(cudaPitchedPtr D_projData, bool FDK(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, - const SDimensions3D& dims, const float* angles, bool bShortScan) + const SConeProjection* angles, + const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan) { bool ok; // Generate filter @@ -403,12 +319,45 @@ bool FDK(cudaPitchedPtr D_volumeData, int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU); int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount); + + // NB: We don't support arbitrary cone_vec geometries here. + // Only those that are vertical sub-geometries + // (cf. CompositeGeometryManager) of regular cone geometries. + assert(dims.iProjAngles > 0); + const SConeProjection& p0 = angles[0]; + + // assuming U is in the XY plane, V is parallel to Z axis + float fDetCX = p0.fDetSX + 0.5*dims.iProjU*p0.fDetUX; + float fDetCY = p0.fDetSY + 0.5*dims.iProjU*p0.fDetUY; + float fDetCZ = p0.fDetSZ + 0.5*dims.iProjV*p0.fDetVZ; + + float fSrcOrigin = sqrt(p0.fSrcX*p0.fSrcX + p0.fSrcY*p0.fSrcY); + float fDetOrigin = sqrt(fDetCX*fDetCX + fDetCY*fDetCY); + float fDetUSize = sqrt(p0.fDetUX*p0.fDetUX + p0.fDetUY*p0.fDetUY); + float fDetVSize = abs(p0.fDetVZ); + + float fZShift = fDetCZ - p0.fSrcZ; + + float *pfAngles = new float[dims.iProjAngles]; + for (unsigned int i = 0; i < dims.iProjAngles; ++i) { + // FIXME: Sign/order + pfAngles[i] = -atan2(angles[i].fSrcX, angles[i].fSrcY) + M_PI; + } + + +#if 1 ok = FDK_PreWeight(D_projData, fSrcOrigin, fDetOrigin, - fSrcZ, fDetZ, fDetUSize, fDetVSize, - bShortScan, dims, angles); + fZShift, fDetUSize, fDetVSize, + bShortScan, dims, pfAngles); +#else + ok = true; +#endif + delete[] pfAngles; + if (!ok) return false; +#if 1 cufftComplex *pHostFilter = new cufftComplex[dims.iProjAngles * iHalfFFTSize]; memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iHalfFFTSize); @@ -425,28 +374,25 @@ bool FDK(cudaPitchedPtr D_volumeData, - ok = FDK_Filter(D_projData, D_filter, fSrcOrigin, fDetOrigin, - fSrcZ, fDetZ, fDetUSize, fDetVSize, - bShortScan, dims, angles); + ok = FDK_Filter(D_projData, D_filter, dims); // Clean up filter freeComplexOnDevice(D_filter); - +#endif if (!ok) return false; // Perform BP - ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, - fDetUSize, fDetVSize, dims, angles); + params.bFDKWeighting = true; + + //ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, 0.0f, 0.0f, fDetUSize, fDetVSize, dims, pfAngles); + ok = ConeBP(D_volumeData, D_projData, dims, angles, params); if (!ok) return false; - processVol3D(D_volumeData, - (M_PI / 2.0f) / (float)dims.iProjAngles, dims); - return true; } diff --git a/cuda/3d/fdk.h b/cuda/3d/fdk.h index de7fe53..8d2a128 100644 --- a/cuda/3d/fdk.h +++ b/cuda/3d/fdk.h @@ -35,16 +35,14 @@ namespace astraCUDA3d { bool FDK_PreWeight(cudaPitchedPtr D_projData, float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, + float fZShift, float fDetUSize, float fDetVSize, bool bShortScan, const SDimensions3D& dims, const float* angles); bool FDK(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, - const SDimensions3D& dims, const float* angles, bool bShortScan); - + const SConeProjection* angles, + const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan); } diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu index 9a239da..4fb30a2 100644 --- a/cuda/3d/mem3d.cu +++ b/cuda/3d/mem3d.cu @@ -38,6 +38,7 @@ $Id$ #include "cone_bp.h" #include "par3d_fp.h" #include "par3d_bp.h" +#include "fdk.h" #include "astra/Logging.h" @@ -278,6 +279,33 @@ bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con } +bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan) +{ + SDimensions3D dims; + SProjectorParams3D params; + + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); + if (!ok) + return false; + + SPar3DProjection* pParProjs; + SConeProjection* pConeProjs; + + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pParProjs, pConeProjs, + params); + + if (!ok || !pConeProjs) + return false; + + ok &= FDK(volData.d->ptr, projData.d->ptr, pConeProjs, dims, params, bShortScan); + + return ok; + + + +} + diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h index 6fff80b..2d0fb27 100644 --- a/cuda/3d/mem3d.h +++ b/cuda/3d/mem3d.h @@ -95,6 +95,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling); +bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan); } -- cgit v1.2.3