diff options
Diffstat (limited to 'cuda/2d')
| -rw-r--r-- | cuda/2d/astra.cu | 7 | ||||
| -rw-r--r-- | cuda/2d/darthelper.cu | 13 | ||||
| -rw-r--r-- | cuda/2d/fft.cu | 45 | ||||
| -rw-r--r-- | cuda/2d/par_fp.cu | 2 | ||||
| -rw-r--r-- | cuda/2d/util.cu | 8 | 
5 files changed, 38 insertions, 37 deletions
| diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index 5e2a07a..4c69628 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -42,12 +42,13 @@ $Id$  #include <fstream>  #include <cuda.h> -#include "../../include/astra/Logger.h"  #include "../../include/astra/VolumeGeometry2D.h"  #include "../../include/astra/ParallelProjectionGeometry2D.h"  #include "../../include/astra/FanFlatProjectionGeometry2D.h"  #include "../../include/astra/FanFlatVecProjectionGeometry2D.h" +#include "../../include/astra/Logging.h" +  // For fan beam FBP weighting  #include "../3d/fdk.h" @@ -536,7 +537,7 @@ bool AstraFBP::setFilter(E_FBPFILTER _eFilter, const float * _pfHostFilter /* =  			int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth;  			int iFilterShiftSize = _iFilterWidth / 2; -			 +  			for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++)  			{  				int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; @@ -561,7 +562,7 @@ bool AstraFBP::setFilter(E_FBPFILTER _eFilter, const float * _pfHostFilter /* =  		}  		default:  		{ -			fprintf(stderr, "AstraFBP::setFilter: Unknown filter type requested"); +			ASTRA_ERROR("AstraFBP::setFilter: Unknown filter type requested");  			delete [] pHostFilter;  			return false;  		} diff --git a/cuda/2d/darthelper.cu b/cuda/2d/darthelper.cu index 28ca557..1d10d49 100644 --- a/cuda/2d/darthelper.cu +++ b/cuda/2d/darthelper.cu @@ -57,7 +57,7 @@ void roiSelect(float* out, float radius, unsigned int width, unsigned int height  	// We abuse dims here...  	SDimensions dims;  	dims.iVolWidth = width; -	dims.iVolHeight = width; +	dims.iVolHeight = height;  	allocateVolumeData(D_data, pitch, dims);  	copyVolumeToDevice(out, width, dims, D_data, pitch); @@ -245,7 +245,7 @@ void dartMask(float* mask, const float* segmentation, unsigned int conn, unsigne  	// We abuse dims here...  	SDimensions dims;  	dims.iVolWidth = width; -	dims.iVolHeight = width; +	dims.iVolHeight = height;  	allocateVolumeData(D_segmentationData, pitch, dims);  	copyVolumeToDevice(segmentation, width, dims, D_segmentationData, pitch); @@ -278,7 +278,7 @@ __global__ void devDartSmoothingRadius(float* out, const float* in, float b, uns  	unsigned int x = threadIdx.x + 16*blockIdx.x;  	unsigned int y = threadIdx.y + 16*blockIdx.y; -	// Sacrifice the border pixels to simplify the implementation.  +	// Sacrifice the border pixels to simplify the implementation.  	if (x > radius-1 && x < width - radius && y > radius-1 && y < height - radius)  	{  		float* d = (float*)in; @@ -286,9 +286,10 @@ __global__ void devDartSmoothingRadius(float* out, const float* in, float b, uns  		unsigned int o2 = y*pitch+x;  		int r = radius; +                float count = 4*r*(r+1);  		float res = -d[o2]; -		for (int row = -r; row < r; row++)  +		for (int row = -r; row <= r; row++)   		{  			unsigned int o1 = (y+row)*pitch+x;   			for (int col = -r; col <= r; col++)  @@ -297,7 +298,7 @@ __global__ void devDartSmoothingRadius(float* out, const float* in, float b, uns  			}  		} -		res *= b / 4*r*(r+1); +		res *= b / count;  		res += (1.0f-b) * d[o2];  		m[o2] = res; @@ -333,7 +334,7 @@ void dartSmoothing(float* out, const float* in, float b, unsigned int radius, un  	// We abuse dims here...  	SDimensions dims;  	dims.iVolWidth = width; -	dims.iVolHeight = width; +	dims.iVolHeight = height;  	allocateVolumeData(D_inData, pitch, dims);  	copyVolumeToDevice(in, width, dims, D_inData, pitch); diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index d105e29..2bfd493 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -34,7 +34,7 @@ $Id$  #include <cuda.h>  #include <fstream> -#include "../../include/astra/Logger.h" +#include "../../include/astra/Logging.h"  using namespace astra; @@ -43,25 +43,22 @@ using namespace astra;  #define CHECK_ERROR(errorMessage) do {                                     \    cudaError_t err = cudaThreadSynchronize();                               \    if( cudaSuccess != err) {                                                \ -      fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n",    \ -              errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ -			  CLogger::writeTerminalCUDAError(__FILE__, __LINE__, cudaGetErrorString( err)); \ +      ASTRA_ERROR("Cuda error %s : %s",                                    \ +              errorMessage,cudaGetErrorString( err));                      \        exit(EXIT_FAILURE);                                                  \    } } while (0)  #define SAFE_CALL( call) do {                                              \    cudaError err = call;                                                    \    if( cudaSuccess != err) {                                                \ -      fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n",        \ -              __FILE__, __LINE__, cudaGetErrorString( err) );              \ -	  CLogger::writeTerminalCUDAError(__FILE__, __LINE__, cudaGetErrorString( err)); \ +      ASTRA_ERROR("Cuda error: %s ",                                       \ +              cudaGetErrorString( err));                                   \        exit(EXIT_FAILURE);                                                  \    }                                                                        \    err = cudaThreadSynchronize();                                           \    if( cudaSuccess != err) {                                                \ -      fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n",        \ -              __FILE__, __LINE__, cudaGetErrorString( err) );              \ -	  CLogger::writeTerminalCUDAError(__FILE__, __LINE__, cudaGetErrorString( err)); \ +      ASTRA_ERROR("Cuda error: %s : ",                                     \ +              cudaGetErrorString( err));                                   \        exit(EXIT_FAILURE);                                                  \    } } while (0) @@ -140,7 +137,7 @@ static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount,  	result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_R2C, _iProjectionCount);  	if(result != CUFFT_SUCCESS)  	{ -		std::cerr << "Failed to plan 1d r2c fft" << std::endl; +		ASTRA_ERROR("Failed to plan 1d r2c fft");  		return false;  	} @@ -149,7 +146,7 @@ static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount,  	if(result != CUFFT_SUCCESS)  	{ -		std::cerr << "Failed to exec 1d r2c fft" << std::endl; +		ASTRA_ERROR("Failed to exec 1d r2c fft");  		return false;  	} @@ -166,18 +163,18 @@ static bool invokeCudaIFFT(int _iProjectionCount, int _iDetectorCount,  	result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_C2R, _iProjectionCount);  	if(result != CUFFT_SUCCESS)  	{ -		std::cerr << "Failed to plan 1d c2r fft" << std::endl; +		ASTRA_ERROR("Failed to plan 1d c2r fft");  		return false;  	}  	// todo: why do we have to get rid of the const qualifier?  	result = cufftExecC2R(plan, (cufftComplex *)_pDevSourceComplex, -                          (cufftReal *)_pfDevTarget); +	                      (cufftReal *)_pfDevTarget);  	cufftDestroy(plan);  	if(result != CUFFT_SUCCESS)  	{ -		std::cerr << "Failed to exec 1d c2r fft" << std::endl; +		ASTRA_ERROR("Failed to exec 1d c2r fft");  		return false;  	} @@ -257,7 +254,7 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,  	}  	rescaleInverseFourier(_iProjectionCount, _iFFTRealDetectorCount, -                          pfDevRealFFTTarget); +	                      pfDevRealFFTTarget);  	SAFE_CALL(cudaMemset(_pfRealTarget, 0, sizeof(float) * _iProjectionCount * _iTargetPitch)); @@ -460,7 +457,7 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,  			const float fA1 = 0.48f;  			const float fA2 = 0.38f;  			float fNMinusOne = (float)(_iFFTFourierDetectorCount) - 1.0f; -			 +  			for(int iDetectorIndex = 1; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++)  			{  				float fSmallN = (float)iDetectorIndex; @@ -633,7 +630,7 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,  		}  		default:  		{ -			std::cerr << "Cannot serve requested filter" << std::endl; +			ASTRA_ERROR("Cannot serve requested filter");  		}  	} @@ -746,7 +743,7 @@ void testCudaFFT()  	{  		for(int iDetectorIndex = 0; iDetectorIndex < iDetectorCount; iDetectorIndex++)  		{ -//			int  +//			int  //			pfHostProj[iIndex] = (float)rand() / (float)RAND_MAX;  		} @@ -767,13 +764,13 @@ void testCudaFFT()  	result = cufftPlan1d(&plan, iDetectorCount, CUFFT_R2C, iProjectionCount);  	if(result != CUFFT_SUCCESS)  	{ -		cerr << "Failed to plan 1d r2c fft" << endl; +		ASTRA_ERROR("Failed to plan 1d r2c fft");  	}  	result = cufftExecR2C(plan, pfDevProj, pDevFourProj);  	if(result != CUFFT_SUCCESS)  	{ -		cerr << "Failed to exec 1d r2c fft" << endl; +		ASTRA_ERROR("Failed to exec 1d r2c fft");  	}  	cufftDestroy(plan); @@ -787,7 +784,7 @@ void testCudaFFT()  	float * pfHostFourProjImaginary = new float[iTotalElementCount];  	convertComplexToRealImg(pHostFourProj, iTotalElementCount, pfHostFourProjReal, pfHostFourProjImaginary); -	 +  	writeToMatlabFile("proj_four_real.mat", pfHostFourProjReal, iProjectionCount, iDetectorCount);  	writeToMatlabFile("proj_four_imaginary.mat", pfHostFourProjImaginary, iProjectionCount, iDetectorCount); @@ -797,13 +794,13 @@ void testCudaFFT()  	result = cufftPlan1d(&plan, iDetectorCount, CUFFT_C2R, iProjectionCount);  	if(result != CUFFT_SUCCESS)  	{ -		cerr << "Failed to plan 1d c2r fft" << endl; +		ASTRA_ERROR("Failed to plan 1d c2r fft");  	}  	result = cufftExecC2R(plan, pDevFourProj, pfDevInFourProj);  	if(result != CUFFT_SUCCESS)  	{ -		cerr << "Failed to exec 1d c2r fft" << endl; +		ASTRA_ERROR("Failed to exec 1d c2r fft");  	}  	cufftDestroy(plan); diff --git a/cuda/2d/par_fp.cu b/cuda/2d/par_fp.cu index d0ca7ff..bb8b909 100644 --- a/cuda/2d/par_fp.cu +++ b/cuda/2d/par_fp.cu @@ -487,7 +487,7 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,  	unsigned int blockEnd = 0;  	bool blockVertical = false;  	for (unsigned int a = 0; a <= dims.iProjAngles; ++a) { -		bool vertical; +		bool vertical = false;  		// TODO: Having <= instead of < below causes a 5% speedup.  		// Maybe we should detect corner cases and put them in the optimal  		// group of angles. diff --git a/cuda/2d/util.cu b/cuda/2d/util.cu index 81e368f..a4f8f3e 100644 --- a/cuda/2d/util.cu +++ b/cuda/2d/util.cu @@ -30,6 +30,8 @@ $Id$  #include <cassert>  #include "util.h" +#include "../../include/astra/Logging.h" +  namespace astraCUDA {  bool copyVolumeToDevice(const float* in_data, unsigned int in_pitch, @@ -91,7 +93,7 @@ bool allocateVolume(float*& ptr, unsigned int width, unsigned int height, unsign  	cudaError_t ret = cudaMallocPitch((void**)&ptr, &p, sizeof(float)*width, height);  	if (ret != cudaSuccess) {  		reportCudaError(ret); -		fprintf(stderr, "Failed to allocate %dx%d GPU buffer\n", width, height); +		ASTRA_ERROR("Failed to allocate %dx%d GPU buffer", width, height);  		return false;  	} @@ -259,7 +261,7 @@ bool cudaTextForceKernelsCompletion()  	cudaError_t returnedCudaError = cudaThreadSynchronize();  	if(returnedCudaError != cudaSuccess) { -		fprintf(stderr, "Failed to force completion of cuda kernels: %d: %s.\n", returnedCudaError, cudaGetErrorString(returnedCudaError)); +		ASTRA_ERROR("Failed to force completion of cuda kernels: %d: %s.", returnedCudaError, cudaGetErrorString(returnedCudaError));  		return false;  	} @@ -269,7 +271,7 @@ bool cudaTextForceKernelsCompletion()  void reportCudaError(cudaError_t err)  {  	if(err != cudaSuccess) -		fprintf(stderr, "CUDA error %d: %s.\n", err, cudaGetErrorString(err)); +		ASTRA_ERROR("CUDA error %d: %s.", err, cudaGetErrorString(err));  } | 
