diff options
| author | Daniil Kazantsev <dkazanc3@googlemail.com> | 2018-12-19 15:42:38 +0000 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-12-19 15:42:38 +0000 | 
| commit | 07fb80445f83758e4aed94a461cf1cf2b869318a (patch) | |
| tree | e93c03bcfbe2eb88a13cdd42edaea045f7f13c06 | |
| parent | c04b85a6fdd8c63e3363c8072cbfe4b97409dc60 (diff) | |
| parent | ec59b600885a1c7a60e1b528f3d09588aa972609 (diff) | |
Merge pull request #80 from vais-ral/dev-jenkins
Dev jenkins
28 files changed, 338 insertions, 330 deletions
diff --git a/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu b/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu index fd586ef..a4dbe70 100644 --- a/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu +++ b/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License.  */   #include "Diffus_4thO_GPU_core.h" +#include "shared.h"  /* CUDA implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)   * The minimisation is performed using explicit scheme.  @@ -36,18 +37,6 @@ limitations under the License.   * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.   */ -#define CHECK(call)                                                            \ -{                                                                              \ -    const cudaError_t error = call;                                            \ -    if (error != cudaSuccess)                                                  \ -    {                                                                          \ -        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \ -        fprintf(stderr, "code: %d, reason: %s\n", error,                       \ -                cudaGetErrorString(error));                                    \ -        exit(1);                                                               \ -    }                                                                          \ -} -      #define BLKXSIZE 8  #define BLKYSIZE 8  #define BLKZSIZE 8 @@ -228,7 +217,7 @@ __global__ void Diffusion_update_step3D_kernel(float *Output, float *Input, floa  /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/  /********************* MAIN HOST FUNCTION ******************/  /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ -extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z) +extern "C" int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z)  {  		int dimTotal, dev = 0;  		CHECK(cudaSetDevice(dev)); @@ -242,7 +231,7 @@ extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar,          CHECK(cudaMalloc((void**)&d_W_Lapl,dimTotal*sizeof(float)));          CHECK(cudaMemcpy(d_input,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice)); -        CHECK(cudaMemcpy(d_output,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice));       +        CHECK(cudaMemcpy(d_output,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice));  	if (Z == 1) {  	     /*2D case */ @@ -275,4 +264,5 @@ extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar,          CHECK(cudaFree(d_input));          CHECK(cudaFree(d_output));          CHECK(cudaFree(d_W_Lapl)); +        return 0;  } diff --git a/Core/regularisers_GPU/Diffus_4thO_GPU_core.h b/Core/regularisers_GPU/Diffus_4thO_GPU_core.h index 6314c1f..77d5d79 100644 --- a/Core/regularisers_GPU/Diffus_4thO_GPU_core.h +++ b/Core/regularisers_GPU/Diffus_4thO_GPU_core.h @@ -3,6 +3,6 @@  #include "CCPiDefines.h"  #include <stdio.h> -extern "C" CCPI_EXPORT void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z); +extern "C" CCPI_EXPORT int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);  #endif  diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu index 0228bf0..87871be 100644 --- a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu +++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License.  */   #include "LLT_ROF_GPU_core.h" +#include "shared.h"  /* CUDA implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.   *  @@ -40,18 +41,6 @@ limitations under the License.  * [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"  */ -#define CHECK(call)                                                            \ -{                                                                              \ -    const cudaError_t error = call;                                            \ -    if (error != cudaSuccess)                                                  \ -    {                                                                          \ -        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \ -        fprintf(stderr, "code: %d, reason: %s\n", error,                       \ -                cudaGetErrorString(error));                                    \ -        exit(1);                                                               \ -    }                                                                          \ -} -      #define BLKXSIZE 8  #define BLKYSIZE 8  #define BLKZSIZE 8 @@ -403,7 +392,7 @@ __global__ void Update3D_LLT_ROF_kernel(float *U0, float *U, float *D1_LLT, floa  /************************ HOST FUNCTION ****************************/  /*******************************************************************/ -extern "C" void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z) +extern "C" int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z)  {  	    // set up device  		int dev = 0; @@ -480,4 +469,5 @@ extern "C" void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, f          CHECK(cudaFree(D1_ROF));          CHECK(cudaFree(D2_ROF));          CHECK(cudaFree(D3_ROF)); +        return 0;  } diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.h b/Core/regularisers_GPU/LLT_ROF_GPU_core.h index 4a19d09..a6bfcc7 100644 --- a/Core/regularisers_GPU/LLT_ROF_GPU_core.h +++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.h @@ -3,6 +3,6 @@  #include "CCPiDefines.h"  #include <stdio.h> -extern "C" CCPI_EXPORT void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z); +extern "C" CCPI_EXPORT int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);  #endif  diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.cu b/Core/regularisers_GPU/NonlDiff_GPU_core.cu index 8048830..ff7ce4d 100644 --- a/Core/regularisers_GPU/NonlDiff_GPU_core.cu +++ b/Core/regularisers_GPU/NonlDiff_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License.  */   #include "NonlDiff_GPU_core.h" +#include "shared.h"  /* CUDA implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)   * The minimisation is performed using explicit scheme.  @@ -38,18 +39,7 @@ limitations under the License.   * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.   */ -#define CHECK(call)                                                            \ -{                                                                              \ -    const cudaError_t error = call;                                            \ -    if (error != cudaSuccess)                                                  \ -    {                                                                          \ -        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \ -        fprintf(stderr, "code: %d, reason: %s\n", error,                       \ -                cudaGetErrorString(error));                                    \ -        exit(1);                                                               \ -    }                                                                          \ -} -     +  #define BLKXSIZE 8  #define BLKYSIZE 8  #define BLKZSIZE 8 @@ -295,7 +285,7 @@ __global__ void NonLinearDiff3D_kernel(float *Input, float *Output, float lambda  /////////////////////////////////////////////////  // HOST FUNCTION -extern "C" void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z) +extern "C" int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z)  {  	    // set up device  		int dev = 0; @@ -350,5 +340,6 @@ extern "C" void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar,          CHECK(cudaMemcpy(Output,d_output,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));          CHECK(cudaFree(d_input));          CHECK(cudaFree(d_output)); -        //cudaDeviceReset();  +        //cudaDeviceReset(); +        return 0;  } diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.h b/Core/regularisers_GPU/NonlDiff_GPU_core.h index afd712b..5fe457e 100644 --- a/Core/regularisers_GPU/NonlDiff_GPU_core.h +++ b/Core/regularisers_GPU/NonlDiff_GPU_core.h @@ -3,6 +3,6 @@  #include "CCPiDefines.h"  #include <stdio.h> -extern "C" CCPI_EXPORT void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z); +extern "C" CCPI_EXPORT int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);  #endif  diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/Core/regularisers_GPU/PatchSelect_GPU_core.cu index f558b0f..d173124 100644 --- a/Core/regularisers_GPU/PatchSelect_GPU_core.cu +++ b/Core/regularisers_GPU/PatchSelect_GPU_core.cu @@ -19,7 +19,8 @@   */  #include "PatchSelect_GPU_core.h" -
 +#include "shared.h" +  /* CUDA implementation of non-local weight pre-calculation for non-local priors   * Weights and associated indices are stored into pre-allocated arrays and passed   * to the regulariser @@ -36,32 +37,20 @@   * 1. AR_i - indeces of i neighbours   * 2. AR_j - indeces of j neighbours   * 3. Weights_ij - associated weights - */
 -
 -// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
 -#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
 -
 -inline void __checkCudaErrors(cudaError err, const char *file, const int line)
 -{
 -    if (cudaSuccess != err)
 -    {
 -        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
 -                file, line, (int)err, cudaGetErrorString(err));
 -        exit(EXIT_FAILURE);
 -    }
 -}
 -
 -#define BLKXSIZE 16
 -#define BLKYSIZE 16
 -#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
 -#define M_PI 3.14159265358979323846
 -#define EPS 1.0e-8
 + */ + + +#define BLKXSIZE 16 +#define BLKYSIZE 16 +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) +#define M_PI 3.14159265358979323846 +#define EPS 1.0e-8  #define CONSTVECSIZE5 121  #define CONSTVECSIZE7 225  #define CONSTVECSIZE9 361  #define CONSTVECSIZE11 529  #define CONSTVECSIZE13 729 -
 +  __device__ void swap(float *xp, float *yp)   {      float temp = *xp;  @@ -75,9 +64,9 @@ __device__ void swapUS(unsigned short *xp, unsigned short *yp)      *yp = temp;   } -/********************************************************************************/
 -__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
 -{          
 +/********************************************************************************/ +__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{                long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;      float normsum; @@ -85,10 +74,10 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne      float Weight_Vec[CONSTVECSIZE5];      unsigned short ind_i[CONSTVECSIZE5];      unsigned short ind_j[CONSTVECSIZE5]; -
 -    int i = blockDim.x * blockIdx.x + threadIdx.x;
 -    int j = blockDim.y * blockIdx.y + threadIdx.y;
 -    
 + +    int i = blockDim.x * blockIdx.x + threadIdx.x; +    int j = blockDim.y * blockIdx.y + threadIdx.y; +          long index = i*M+j;            counter = 0; @@ -139,10 +128,10 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne          H_j_d[index2] = ind_j[x];          Weights_d[index2] = Weight_Vec[x];      } -} 
 -/********************************************************************************/
 -__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
 -{          
 +}  +/********************************************************************************/ +__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{                long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;      float normsum; @@ -150,10 +139,10 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne      float Weight_Vec[CONSTVECSIZE7];      unsigned short ind_i[CONSTVECSIZE7];      unsigned short ind_j[CONSTVECSIZE7]; -
 -    int i = blockDim.x * blockIdx.x + threadIdx.x;
 -    int j = blockDim.y * blockIdx.y + threadIdx.y;
 -    
 + +    int i = blockDim.x * blockIdx.x + threadIdx.x; +    int j = blockDim.y * blockIdx.y + threadIdx.y; +          long index = i*M+j;            counter = 0; @@ -204,9 +193,9 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne          H_j_d[index2] = ind_j[x];          Weights_d[index2] = Weight_Vec[x];      } -}
 -__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
 -{          
 +} +__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{                long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;      float normsum; @@ -214,10 +203,10 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne      float Weight_Vec[CONSTVECSIZE9];      unsigned short ind_i[CONSTVECSIZE9];      unsigned short ind_j[CONSTVECSIZE9]; -
 -    int i = blockDim.x * blockIdx.x + threadIdx.x;
 -    int j = blockDim.y * blockIdx.y + threadIdx.y;
 -    
 + +    int i = blockDim.x * blockIdx.x + threadIdx.x; +    int j = blockDim.y * blockIdx.y + threadIdx.y; +          long index = i*M+j;            counter = 0; @@ -269,8 +258,8 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne          Weights_d[index2] = Weight_Vec[x];      }                       } -__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
 -{          
 +__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{                long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;      float normsum; @@ -278,10 +267,10 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign      float Weight_Vec[CONSTVECSIZE11];      unsigned short ind_i[CONSTVECSIZE11];      unsigned short ind_j[CONSTVECSIZE11]; -
 -    int i = blockDim.x * blockIdx.x + threadIdx.x;
 -    int j = blockDim.y * blockIdx.y + threadIdx.y;
 -    
 + +    int i = blockDim.x * blockIdx.x + threadIdx.x; +    int j = blockDim.y * blockIdx.y + threadIdx.y; +          long index = i*M+j;            counter = 0; @@ -333,8 +322,8 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign          Weights_d[index2] = Weight_Vec[x];      }  }  -__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
 -{          
 +__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{                long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;      float normsum; @@ -342,10 +331,10 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign      float Weight_Vec[CONSTVECSIZE13];      unsigned short ind_i[CONSTVECSIZE13];      unsigned short ind_j[CONSTVECSIZE13]; -
 -    int i = blockDim.x * blockIdx.x + threadIdx.x;
 -    int j = blockDim.y * blockIdx.y + threadIdx.y;
 -    
 + +    int i = blockDim.x * blockIdx.x + threadIdx.x; +    int j = blockDim.y * blockIdx.y + threadIdx.y; +          long index = i*M+j;            counter = 0; @@ -398,29 +387,29 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign      }  }  -   
 +     /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/  /********************* MAIN HOST FUNCTION ******************/ -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
 -extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
 -{
 -    int deviceCount = -1; // number of devices
 -    cudaGetDeviceCount(&deviceCount);
 -    if (deviceCount == 0) {
 -        fprintf(stderr, "No CUDA devices found\n");
 -        return;
 +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ +extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h) +{ +    int deviceCount = -1; // number of devices +    cudaGetDeviceCount(&deviceCount); +    if (deviceCount == 0) { +        fprintf(stderr, "No CUDA devices found\n"); +        return -1;      }   -      
 -    int SearchW_full, SimilW_full, counterG, i, j;
 +       +    int SearchW_full, SimilW_full, counterG, i, j;      float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d;     -    unsigned short *H_i_d, *H_j_d;
 +    unsigned short *H_i_d, *H_j_d;      h2 = h*h; -    
 -    dim3 dimBlock(BLKXSIZE,BLKYSIZE);
 -    dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));    
 -       
 -    SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window  size */
 -    SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1);   /* the full similarity window  size */
 +     +    dim3 dimBlock(BLKXSIZE,BLKYSIZE); +    dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));     +        +    SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window  size */ +    SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1);   /* the full similarity window  size */      /* generate a 2D Gaussian kernel for NLM procedure */      Eucl_Vec = (float*) calloc (SimilW_full,sizeof(float)); @@ -432,16 +421,16 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho      }} /*main neighb loop */ -    /*allocate space on the device*/
 -    checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );
 +    /*allocate space on the device*/ +    checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );      checkCudaErrors( cudaMalloc((void**)&H_i_d, N*M*NumNeighb*sizeof(unsigned short)) );      checkCudaErrors( cudaMalloc((void**)&H_j_d, N*M*NumNeighb*sizeof(unsigned short)) );      checkCudaErrors( cudaMalloc((void**)&Weights_d, N*M*NumNeighb*sizeof(float)) ); -    checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) );
 -
 -    /* copy data from the host to the device */
 +    checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) ); + +    /* copy data from the host to the device */      checkCudaErrors( cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice) ); -    checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );    
 +    checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );          /********************** Run CUDA kernel here ********************/      if (SearchWindow == 5)  IndexSelect2D_5_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); @@ -450,19 +439,20 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho      else if (SearchWindow == 11)  IndexSelect2D_11_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);      else if (SearchWindow == 13)  IndexSelect2D_13_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);      else { -    fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");
 -        return;}     -    checkCudaErrors(cudaPeekAtLastError() );        
 -    checkCudaErrors(cudaDeviceSynchronize());   
 -    /***************************************************************/    
 -        
 +    fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n"); +        return -1;}     +    checkCudaErrors(cudaPeekAtLastError() );         +    checkCudaErrors(cudaDeviceSynchronize());    +    /***************************************************************/     +              checkCudaErrors(cudaMemcpy(H_i, H_i_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );      checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );        checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) );    -    
 +          cudaFree(Ad);       cudaFree(H_i_d);       cudaFree(H_j_d);          cudaFree(Weights_d);     -    cudaFree(Eucl_Vec_d);
 +    cudaFree(Eucl_Vec_d); +    return 0;  } diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.h b/Core/regularisers_GPU/PatchSelect_GPU_core.h index d20fe9f..8c124d3 100644 --- a/Core/regularisers_GPU/PatchSelect_GPU_core.h +++ b/Core/regularisers_GPU/PatchSelect_GPU_core.h @@ -3,6 +3,6 @@  #include "CCPiDefines.h"  #include <stdio.h> -extern "C" CCPI_EXPORT void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h); +extern "C" CCPI_EXPORT int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);  #endif  diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu index 3081011..73232a9 100644 --- a/Core/regularisers_GPU/TGV_GPU_core.cu +++ b/Core/regularisers_GPU/TGV_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License.  */   #include "TGV_GPU_core.h" +#include "shared.h"  /* CUDA implementation of Primal-Dual denoising method for    * Total Generilized Variation (TGV)-L2 model [1] (2D case only) @@ -36,19 +37,6 @@ limitations under the License.   * References:   * [1] K. Bredies "Total Generalized Variation"   */ - -#define CHECK(call)                                                            \ -{                                                                              \ -    const cudaError_t error = call;                                            \ -    if (error != cudaSuccess)                                                  \ -    {                                                                          \ -        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \ -        fprintf(stderr, "code: %d, reason: %s\n", error,                       \ -                cudaGetErrorString(error));                                    \ -        exit(1);                                                               \ -    }                                                                          \ -} -      #define BLKXSIZE2D 16  #define BLKYSIZE2D 16 @@ -239,7 +227,7 @@ __global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total)  /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/  /********************* MAIN HOST FUNCTION ******************/  /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ -extern "C" void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY) +extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY)  {  		int dimTotal, dev = 0;  		CHECK(cudaSetDevice(dev)); @@ -320,4 +308,5 @@ extern "C" void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, fl          CHECK(cudaFree(V2));          CHECK(cudaFree(V1_old));          CHECK(cudaFree(V2_old)); +        return 0;  } diff --git a/Core/regularisers_GPU/TGV_GPU_core.h b/Core/regularisers_GPU/TGV_GPU_core.h index 663378f..5a4eb76 100644 --- a/Core/regularisers_GPU/TGV_GPU_core.h +++ b/Core/regularisers_GPU/TGV_GPU_core.h @@ -3,6 +3,6 @@  #include "CCPiDefines.h"  #include <stdio.h> -extern "C" CCPI_EXPORT void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY); +extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);  #endif  diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu index eab7a44..b371c5d 100755 --- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/TV_FGP_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License.  */   #include "TV_FGP_GPU_core.h" +#include "shared.h"  #include <thrust/device_vector.h>  #include <thrust/transform_reduce.h> @@ -39,18 +40,6 @@ limitations under the License.   * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"   */ -// This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ -    if (cudaSuccess != err) -    { -        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", -                file, line, (int)err, cudaGetErrorString(err)); -        exit(EXIT_FAILURE); -    } -}  #define BLKXSIZE2D 16  #define BLKYSIZE2D 16 @@ -354,13 +343,13 @@ __global__ void FGPResidCalc3D_kernel(float *Input1, float *Input2, float* Outpu  /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/  ////////////MAIN HOST FUNCTION /////////////// -extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)  {      int deviceCount = -1; // number of devices      cudaGetDeviceCount(&deviceCount);      if (deviceCount == 0) {          fprintf(stderr, "No CUDA devices found\n"); -        return; +        return -1;      }      int count = 0, i; @@ -570,5 +559,6 @@ extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, in              cudaFree(R2);                      cudaFree(R3);              }  -    //cudaDeviceReset();  +    //cudaDeviceReset(); +    return 0;  } diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.h b/Core/regularisers_GPU/TV_FGP_GPU_core.h index 107d243..b28cdf3 100755 --- a/Core/regularisers_GPU/TV_FGP_GPU_core.h +++ b/Core/regularisers_GPU/TV_FGP_GPU_core.h @@ -5,6 +5,6 @@  #ifndef _TV_FGP_GPU_  #define _TV_FGP_GPU_ -extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);  +extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);  #endif  diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/Core/regularisers_GPU/TV_ROF_GPU_core.cu index 57de63d..76f5be9 100755 --- a/Core/regularisers_GPU/TV_ROF_GPU_core.cu +++ b/Core/regularisers_GPU/TV_ROF_GPU_core.cu @@ -35,18 +35,7 @@ limitations under the License.  *  * D. Kazantsev, 2016-18  */ - -#define CHECK(call)                                                            \ -{                                                                              \ -    const cudaError_t error = call;                                            \ -    if (error != cudaSuccess)                                                  \ -    {                                                                          \ -        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \ -        fprintf(stderr, "code: %d, reason: %s\n", error,                       \ -                cudaGetErrorString(error));                                    \ -        exit(1);                                                               \ -    }                                                                          \ -} +#include "shared.h"  #define BLKXSIZE 8  #define BLKYSIZE 8 @@ -304,7 +293,7 @@ __host__ __device__ int sign (float x)  /////////////////////////////////////////////////  // HOST FUNCTION -extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z) +extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z)  {  	    // set up device  		int dev = 0; @@ -364,5 +353,6 @@ extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, in          CHECK(cudaFree(d_update));          CHECK(cudaFree(d_D1));          CHECK(cudaFree(d_D2));         -        //cudaDeviceReset();  +        //cudaDeviceReset(); +        return 0;  } diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.h b/Core/regularisers_GPU/TV_ROF_GPU_core.h index d772aba..3a09296 100755 --- a/Core/regularisers_GPU/TV_ROF_GPU_core.h +++ b/Core/regularisers_GPU/TV_ROF_GPU_core.h @@ -3,6 +3,6 @@  #include "CCPiDefines.h"  #include <stdio.h> -extern "C" CCPI_EXPORT void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); +extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);  #endif  diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.cu b/Core/regularisers_GPU/TV_SB_GPU_core.cu index 68b9221..1f494ee 100755 --- a/Core/regularisers_GPU/TV_SB_GPU_core.cu +++ b/Core/regularisers_GPU/TV_SB_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License.  */   #include "TV_SB_GPU_core.h" +#include "shared.h"  #include <thrust/device_vector.h>  #include <thrust/transform_reduce.h> @@ -39,17 +40,6 @@ limitations under the License.  */  // This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ -    if (cudaSuccess != err) -    { -        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", -                file, line, (int)err, cudaGetErrorString(err)); -        exit(EXIT_FAILURE); -    } -}  #define BLKXSIZE2D 16  #define BLKYSIZE2D 16 @@ -363,13 +353,13 @@ __global__ void SBResidCalc3D_kernel(float *Input1, float *Input2, float* Output  /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/  /********************* MAIN HOST FUNCTION ******************/  /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ -extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ) +extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)  {      int deviceCount = -1; // number of devices      cudaGetDeviceCount(&deviceCount);      if (deviceCount == 0) {          fprintf(stderr, "No CUDA devices found\n"); -        return; +        return -1;      }  	int ll, DimTotal; @@ -557,5 +547,6 @@ extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter,              cudaFree(By);              cudaFree(Bz);      }  -    //cudaDeviceReset();  +    //cudaDeviceReset(); +    return 0;  } diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.h b/Core/regularisers_GPU/TV_SB_GPU_core.h index bdc9219..d44ab77 100755 --- a/Core/regularisers_GPU/TV_SB_GPU_core.h +++ b/Core/regularisers_GPU/TV_SB_GPU_core.h @@ -5,6 +5,6 @@  #ifndef _SB_TV_GPU_  #define _SB_TV_GPU_ -extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ); +extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);  #endif  diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu index 80a78da..7503ec7 100644 --- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu @@ -16,7 +16,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the specific language governing permissions and  limitations under the License.  */  - +#include "shared.h"  #include "dTV_FGP_GPU_core.h"  #include <thrust/device_vector.h>  #include <thrust/transform_reduce.h> @@ -45,19 +45,6 @@ limitations under the License.   */ -// This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ -    if (cudaSuccess != err) -    { -        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", -                file, line, (int)err, cudaGetErrorString(err)); -        exit(EXIT_FAILURE); -    } -} -  #define BLKXSIZE2D 16  #define BLKYSIZE2D 16 @@ -468,13 +455,13 @@ __global__ void dTVnonneg3D_kernel(float* Output, int N, int M, int Z, int num_t  /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/  ////////////MAIN HOST FUNCTION /////////////// -extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)  {      int deviceCount = -1; // number of devices      cudaGetDeviceCount(&deviceCount);      if (deviceCount == 0) {          fprintf(stderr, "No CUDA devices found\n"); -        return; +        return -1;      }      int count = 0, i; @@ -748,6 +735,7 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f              cudaFree(InputRef_y);              cudaFree(InputRef_z);              cudaFree(d_InputRef); -    }  -    //cudaDeviceReset();  +    } +    //cudaDeviceReset(); +    return 0;  } diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.h b/Core/regularisers_GPU/dTV_FGP_GPU_core.h index b906636..9020b1a 100644 --- a/Core/regularisers_GPU/dTV_FGP_GPU_core.h +++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.h @@ -5,6 +5,6 @@  #ifndef _dTV_FGP_GPU_  #define _dTV_FGP_GPU_ -extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);  #endif  diff --git a/Core/regularisers_GPU/shared.h b/Core/regularisers_GPU/shared.h new file mode 100644 index 0000000..fe98cd6 --- /dev/null +++ b/Core/regularisers_GPU/shared.h @@ -0,0 +1,42 @@ +/*shared macros*/ + + +/*checks CUDA call, should be used in functions returning <int> value +if error happens, writes to standard error and explicitly returns -1*/ +#define CHECK(call)                                                            \ +{                                                                              \ +    const cudaError_t error = call;                                            \ +    if (error != cudaSuccess)                                                  \ +    {                                                                          \ +        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \ +        fprintf(stderr, "code: %d, reason: %s\n", error,                       \ +                cudaGetErrorString(error));                                    \ +        return -1;                                                             \ +    }                                                                          \ +} + +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(call)                                                            \ +{                                                                              \ +    const cudaError_t error = call;                                            \ +    if (error != cudaSuccess)                                                  \ +    {                                                                          \ +        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \ +        fprintf(stderr, "code: %d, reason: %s\n", error,                       \ +                cudaGetErrorString(error));                                    \ +        return -1;                                                                \ +    }                                                                          \ +} +/*#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__) + +inline void __checkCudaErrors(cudaError err, const char *file, const int line) +{ +    if (cudaSuccess != err) +    { +        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", +                file, line, (int)err, cudaGetErrorString(err)); +        return; +    } +} +*/ + @@ -69,7 +69,6 @@ Here an example of build on Linux (see also `run.sh` for additional info):  ```bash  git clone https://github.com/vais-ral/CCPi-Regularisation-Toolkit.git -mkdir build  cd build  cmake .. -DCONDA_BUILD=OFF -DBUILD_MATLAB_WRAPPER=ON -DBUILD_PYTHON_WRAPPER=ON -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install  make install @@ -88,7 +87,7 @@ conda install ccpi-regulariser -c ccpi -c conda-forge  #### Python (conda-build)  ``` -	export CIL_VERSION=0.10.2 +	export CIL_VERSION=0.10.3  	conda build Wrappers/Python/conda-recipe --numpy 1.12 --python 3.5   	conda install ccpi-regulariser=${CIL_VERSION} --use-local --force  	cd demos/ @@ -124,7 +123,7 @@ On Windows the `dll` and the mex modules must reside in the same directory. It i  addpath(/path/to/library);  ``` -#### Legacy Matlab installation +#### Legacy Matlab installation (partly supported, please use Cmake)  ```  	cd /Wrappers/Matlab/mex_compile diff --git a/Wrappers/Matlab/mex_compile/compileGPU_mex.m b/Wrappers/Matlab/mex_compile/compileGPU_mex.m index e0311ea..dd1475c 100644 --- a/Wrappers/Matlab/mex_compile/compileGPU_mex.m +++ b/Wrappers/Matlab/mex_compile/compileGPU_mex.m @@ -7,11 +7,10 @@  % In the code bellow we provide a full explicit path to nvcc compiler   % ! paths to matlab and CUDA sdk can be different, modify accordingly ! -% Tested on Ubuntu 16.04/MATLAB 2016b/cuda7.5/gcc4.9 - -% Installation HAS NOT been tested on Windows, please contact me if you'll be able to -% install software on Windows and I gratefully include it into the master release.  +% Tested on Ubuntu 18.04/MATLAB 2016b/cuda10.0/gcc7.3 +% Installation HAS NOT been tested on Windows, please you Cmake build or +% modify the code bellow accordingly  fsep = '/';  pathcopyFrom = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'regularisers_GPU'], 1i); @@ -28,44 +27,45 @@ fprintf('%s \n', '<<<<<<<<<<<Compiling GPU regularisers (CUDA)>>>>>>>>>>>>>');  fprintf('%s \n', 'Compiling ROF-TV...');  !/usr/local/cuda/bin/nvcc -O0 -c TV_ROF_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu ROF_TV_GPU.cpp TV_ROF_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu ROF_TV_GPU.cpp TV_ROF_GPU_core.o  movefile('ROF_TV_GPU.mex*',Pathmove);  fprintf('%s \n', 'Compiling FGP-TV...');  !/usr/local/cuda/bin/nvcc -O0 -c TV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_TV_GPU.cpp TV_FGP_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu FGP_TV_GPU.cpp TV_FGP_GPU_core.o  movefile('FGP_TV_GPU.mex*',Pathmove);  fprintf('%s \n', 'Compiling SB-TV...');  !/usr/local/cuda/bin/nvcc -O0 -c TV_SB_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu SB_TV_GPU.cpp TV_SB_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu SB_TV_GPU.cpp TV_SB_GPU_core.o  movefile('SB_TV_GPU.mex*',Pathmove);  fprintf('%s \n', 'Compiling TGV...');  !/usr/local/cuda/bin/nvcc -O0 -c TGV_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu TGV_GPU.cpp TGV_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu TGV_GPU.cpp TGV_GPU_core.o  movefile('TGV_GPU.mex*',Pathmove);  fprintf('%s \n', 'Compiling dFGP-TV...');  !/usr/local/cuda/bin/nvcc -O0 -c dTV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_dTV_GPU.cpp dTV_FGP_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu FGP_dTV_GPU.cpp dTV_FGP_GPU_core.o  movefile('FGP_dTV_GPU.mex*',Pathmove);  fprintf('%s \n', 'Compiling NonLinear Diffusion...');  !/usr/local/cuda/bin/nvcc -O0 -c NonlDiff_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu NonlDiff_GPU.cpp NonlDiff_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu NonlDiff_GPU.cpp NonlDiff_GPU_core.o  movefile('NonlDiff_GPU.mex*',Pathmove);  fprintf('%s \n', 'Compiling Anisotropic diffusion of higher order...');  !/usr/local/cuda/bin/nvcc -O0 -c Diffus_4thO_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu Diffusion_4thO_GPU.cpp Diffus_4thO_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu Diffusion_4thO_GPU.cpp Diffus_4thO_GPU_core.o  movefile('Diffusion_4thO_GPU.mex*',Pathmove);  fprintf('%s \n', 'Compiling ROF-LLT...');  !/usr/local/cuda/bin/nvcc -O0 -c LLT_ROF_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu LLT_ROF_GPU.cpp LLT_ROF_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu LLT_ROF_GPU.cpp LLT_ROF_GPU_core.o  movefile('LLT_ROF_GPU.mex*',Pathmove); +  delete TV_ROF_GPU_core* TV_FGP_GPU_core* TV_SB_GPU_core* dTV_FGP_GPU_core* NonlDiff_GPU_core* Diffus_4thO_GPU_core* TGV_GPU_core* LLT_ROF_GPU_core* CCPiDefines.h  fprintf('%s \n', 'All successfully compiled!'); diff --git a/Wrappers/Python/conda-recipe/build.sh b/Wrappers/Python/conda-recipe/build.sh index 54bc8e2..eec7c2f 100644 --- a/Wrappers/Python/conda-recipe/build.sh +++ b/Wrappers/Python/conda-recipe/build.sh @@ -4,7 +4,7 @@ cp -rv "$RECIPE_DIR/../.." "$SRC_DIR/ccpi"  cp -rv "$RECIPE_DIR/../../../Core" "$SRC_DIR/Core"  cd $SRC_DIR - +##cuda=off  cmake -G "Unix Makefiles" $RECIPE_DIR/../../../ -DBUILD_PYTHON_WRAPPER=ON -DCONDA_BUILD=ON -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE="Release" -DLIBRARY_LIB=$CONDA_PREFIX/lib -DLIBRARY_INC=$CONDA_PREFIX -DCMAKE_INSTALL_PREFIX=$PREFIX diff --git a/Wrappers/Python/conda-recipe/meta.yaml b/Wrappers/Python/conda-recipe/meta.yaml index ed73165..808493e 100644 --- a/Wrappers/Python/conda-recipe/meta.yaml +++ b/Wrappers/Python/conda-recipe/meta.yaml @@ -1,6 +1,6 @@  package:    name: ccpi-regulariser -  version: 0.10.2 +  version: 0.10.3  build: diff --git a/Wrappers/Python/conda-recipe/run_test.py b/Wrappers/Python/conda-recipe/run_test.py index 499ae7f..cfb3f53 100755 --- a/Wrappers/Python/conda-recipe/run_test.py +++ b/Wrappers/Python/conda-recipe/run_test.py @@ -2,7 +2,7 @@ import unittest  import numpy as np
  import os
  import timeit
 -from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th +from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th
  from PIL import Image
  class TiffReader(object):
 @@ -37,6 +37,8 @@ class TestRegularisers(unittest.TestCase):      def test_ROF_TV_CPU_vs_GPU(self):
 +        #print ("tomas debug test function")
 +        print(__name__)
          filename = os.path.join("lena_gray_512.tif")
          plt = TiffReader()
          # read image
 @@ -63,11 +65,11 @@ class TestRegularisers(unittest.TestCase):          # set parameters
          pars = {'algorithm': ROF_TV, \
 -                'input' : u0,\
 -                'regularisation_parameter':0.04,\
 -                'number_of_iterations': 1000,\
 -                'time_marching_parameter': 0.0001
 -                }
 +        'input' : u0,\
 +        'regularisation_parameter':0.04,\
 +        'number_of_iterations': 2500,\
 +        'time_marching_parameter': 0.00002
 +        }
          print ("#############ROF TV CPU####################")
          start_time = timeit.default_timer()
          rof_cpu = ROF_TV(pars['input'],
 @@ -88,8 +90,8 @@ class TestRegularisers(unittest.TestCase):                               pars['number_of_iterations'], 
                               pars['time_marching_parameter'],'gpu')
          except ValueError as ve:
 -            self.assertTrue(True)
 -            return
 +            self.skipTest("Results not comparable. GPU computing error.")
 +
          rms = rmse(Im, rof_gpu)
          pars['rmse'] = rms
          pars['algorithm'] = ROF_TV
 @@ -101,10 +103,10 @@ class TestRegularisers(unittest.TestCase):          diff_im = np.zeros(np.shape(rof_cpu))
          diff_im = abs(rof_cpu - rof_gpu)
          diff_im[diff_im > tolerance] = 1
 -
          self.assertLessEqual(diff_im.sum() , 1)
      def test_FGP_TV_CPU_vs_GPU(self):
 +        print(__name__)
          filename = os.path.join("lena_gray_512.tif")
          plt = TiffReader()
          # read image
 @@ -169,10 +171,10 @@ class TestRegularisers(unittest.TestCase):                        pars['methodTV'],
                        pars['nonneg'],
                        pars['printingOut'],'gpu')
 -                                           
 +
          except ValueError as ve:
 -            self.assertTrue(True)
 -            return
 +            self.skipTest("Results not comparable. GPU computing error.")
 +
          rms = rmse(Im, fgp_gpu)
          pars['rmse'] = rms
          pars['algorithm'] = FGP_TV
 @@ -189,6 +191,7 @@ class TestRegularisers(unittest.TestCase):          self.assertLessEqual(diff_im.sum() , 1)
      def test_SB_TV_CPU_vs_GPU(self):
 +        print(__name__)
          filename = os.path.join("lena_gray_512.tif")
          plt = TiffReader()
          # read image
 @@ -251,10 +254,10 @@ class TestRegularisers(unittest.TestCase):                        pars['tolerance_constant'], 
                        pars['methodTV'],
                        pars['printingOut'],'gpu')
 -                                           
 +
          except ValueError as ve:
 -            self.assertTrue(True)
 -            return
 +            self.skipTest("Results not comparable. GPU computing error.")
 +
          rms = rmse(Im, sb_gpu)
          pars['rmse'] = rms
          pars['algorithm'] = SB_TV
 @@ -269,6 +272,7 @@ class TestRegularisers(unittest.TestCase):          self.assertLessEqual(diff_im.sum(), 1)
      def test_TGV_CPU_vs_GPU(self):
 +        print(__name__)
          filename = os.path.join("lena_gray_512.tif")
          plt = TiffReader()
          # read image
 @@ -329,10 +333,10 @@ class TestRegularisers(unittest.TestCase):                        pars['alpha0'],
                        pars['number_of_iterations'],
                        pars['LipshitzConstant'],'gpu')
 -                                           
 +
          except ValueError as ve:
 -            self.assertTrue(True)
 -            return
 +            self.skipTest("Results not comparable. GPU computing error.")
 +
          rms = rmse(Im, tgv_gpu)
          pars['rmse'] = rms
          pars['algorithm'] = TGV
 @@ -347,6 +351,7 @@ class TestRegularisers(unittest.TestCase):          self.assertLessEqual(diff_im.sum() , 1)
      def test_LLT_ROF_CPU_vs_GPU(self):
 +        print(__name__)
          filename = os.path.join("lena_gray_512.tif")
          plt = TiffReader()
          # read image
 @@ -405,8 +410,8 @@ class TestRegularisers(unittest.TestCase):                        pars['time_marching_parameter'],'gpu')
          except ValueError as ve:
 -            self.assertTrue(True)
 -            return
 +            self.skipTest("Results not comparable. GPU computing error.")
 +
          rms = rmse(Im, lltrof_gpu)
          pars['rmse'] = rms
          pars['algorithm'] = LLT_ROF
 @@ -421,6 +426,7 @@ class TestRegularisers(unittest.TestCase):          self.assertLessEqual(diff_im.sum(), 1)
      def test_NDF_CPU_vs_GPU(self):
 +        print(__name__)
          filename = os.path.join("lena_gray_512.tif")
          plt = TiffReader()
          # read image
 @@ -483,8 +489,7 @@ class TestRegularisers(unittest.TestCase):                        pars['penalty_type'],'gpu')
          except ValueError as ve:
 -            self.assertTrue(True)
 -            return
 +            self.skipTest("Results not comparable. GPU computing error.")
          rms = rmse(Im, ndf_gpu)
          pars['rmse'] = rms
          pars['algorithm'] = NDF
 @@ -557,8 +562,7 @@ class TestRegularisers(unittest.TestCase):                        pars['time_marching_parameter'], 'gpu')
          except ValueError as ve:
 -            self.assertTrue(True)
 -            return
 +            self.skipTest("Results not comparable. GPU computing error.")
          rms = rmse(Im, diff4th_gpu)
          pars['rmse'] = rms
          pars['algorithm'] = DIFF4th
 @@ -603,8 +607,8 @@ class TestRegularisers(unittest.TestCase):                  'input' : u0,\
                  'refdata' : u_ref,\
                  'regularisation_parameter':0.04, \
 -                'number_of_iterations' :2000 ,\
 -                'tolerance_constant':1e-06,\
 +                'number_of_iterations' :1000 ,\
 +                'tolerance_constant':1e-07,\
                  'eta_const':0.2,\
                  'methodTV': 0 ,\
                  'nonneg': 0 ,\
 @@ -643,8 +647,7 @@ class TestRegularisers(unittest.TestCase):                        pars['nonneg'],
                        pars['printingOut'],'gpu')
          except ValueError as ve:
 -            self.assertTrue(True)
 -            return
 +            self.skipTest("Results not comparable. GPU computing error.")
          rms = rmse(Im, fgp_dtv_gpu)
          pars['rmse'] = rms
          pars['algorithm'] = FGP_dTV
 @@ -765,8 +768,8 @@ class TestRegularisers(unittest.TestCase):               pars_rof_tv['number_of_iterations'],
               pars_rof_tv['time_marching_parameter'],'gpu')
          except ValueError as ve:
 -            self.assertTrue(True)
 -            return
 +            self.skipTest("Results not comparable. GPU computing error.")
 +
          rms_rof = rmse(Im, rof_gpu)
          # now compare obtained rms with the expected value
          self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance)
 @@ -806,10 +809,10 @@ class TestRegularisers(unittest.TestCase):                pars_fgp_tv['nonneg'],
                pars_fgp_tv['printingOut'],'gpu')  
          except ValueError as ve:
 -            self.assertTrue(True)
 -            return
 +            self.skipTest("Results not comparable. GPU computing error.")
          rms_fgp = rmse(Im, fgp_gpu)
          # now compare obtained rms with the expected value
 +
          self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance)
  if __name__ == '__main__':
 diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py index 616eab0..6529b5c 100644 --- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py +++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py @@ -656,8 +656,8 @@ pars = {'algorithm' : FGP_dTV, \          'input' : u0,\          'refdata' : u_ref,\          'regularisation_parameter':0.04, \ -        'number_of_iterations' :2000 ,\ -        'tolerance_constant':1e-06,\ +        'number_of_iterations' :1000 ,\ +        'tolerance_constant':1e-07,\          'eta_const':0.2,\          'methodTV': 0 ,\          'nonneg': 0 ,\ diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx index 302727e..2b97865 100644 --- a/Wrappers/Python/src/gpu_regularisers.pyx +++ b/Wrappers/Python/src/gpu_regularisers.pyx @@ -18,15 +18,17 @@ import cython  import numpy as np  cimport numpy as np -cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); -cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); -cdef extern void TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z); -cdef extern void TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY); -cdef extern void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z); -cdef extern void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z); -cdef extern void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z); -cdef extern void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z); -cdef extern void PatchSelect_GPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h); +CUDAErrorMessage = 'CUDA error' + +cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); +cdef extern int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); +cdef extern int TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z); +cdef extern int TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY); +cdef extern int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z); +cdef extern int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z); +cdef extern int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z); +cdef extern int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z); +cdef extern int PatchSelect_GPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);  # Total-variation Rudin-Osher-Fatemi (ROF)  def TV_ROF_GPU(inputData, @@ -186,15 +188,16 @@ def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,      cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \  		    np.zeros([dims[0],dims[1]], dtype='float32') -    # Running CUDA code here     -    TV_ROF_GPU_main(             +    # Running CUDA code here +    if (TV_ROF_GPU_main(              &inputData[0,0], &outputData[0,0],                          regularisation_parameter,                         iterations ,                          time_marching_parameter,  -                       dims[1], dims[0], 1);    -      -    return outputData +                       dims[1], dims[0], 1)==0): +        return outputData; +    else: +        raise ValueError(CUDAErrorMessage);  def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,                        float regularisation_parameter, @@ -210,14 +213,15 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,  		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')      # Running CUDA code here     -    TV_ROF_GPU_main(             +    if (TV_ROF_GPU_main(              &inputData[0,0,0], &outputData[0,0,0],                          regularisation_parameter,                         iterations ,                          time_marching_parameter,  -                       dims[2], dims[1], dims[0]);    -      -    return outputData +                       dims[2], dims[1], dims[0])==0): +        return outputData; +    else: +        raise ValueError(CUDAErrorMessage);  #****************************************************************#  #********************** Total-variation FGP *********************#  #****************************************************************# @@ -238,16 +242,18 @@ def FGPTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,  		    np.zeros([dims[0],dims[1]], dtype='float32')      # Running CUDA code here     -    TV_FGP_GPU_main(&inputData[0,0], &outputData[0,0],                         +    if (TV_FGP_GPU_main(&inputData[0,0], &outputData[0,0],                         regularisation_parameter,                          iterations,                          tolerance_param,                         methodTV,                         nonneg,                         printM, -                       dims[1], dims[0], 1);    -      -    return outputData +                       dims[1], dims[0], 1)==0): +        return outputData; +    else: +        raise ValueError(CUDAErrorMessage); +  def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,                        float regularisation_parameter, @@ -266,16 +272,18 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,  		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')      # Running CUDA code here     -    TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0],  +    if (TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0],                         regularisation_parameter ,                          iterations,                          tolerance_param,                         methodTV,                         nonneg,                         printM, -                       dims[2], dims[1], dims[0]);    -      -    return outputData  +                       dims[2], dims[1], dims[0])==0): +        return outputData; +    else: +        raise ValueError(CUDAErrorMessage); +  #***************************************************************#  #********************** Total-variation SB *********************#  #***************************************************************# @@ -295,15 +303,17 @@ def SBTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,  		    np.zeros([dims[0],dims[1]], dtype='float32')      # Running CUDA code here     -    TV_SB_GPU_main(&inputData[0,0], &outputData[0,0],                         +    if (TV_SB_GPU_main(&inputData[0,0], &outputData[0,0],                         regularisation_parameter,                          iterations,                          tolerance_param,                         methodTV,                         printM, -                       dims[1], dims[0], 1);    -      -    return outputData +                       dims[1], dims[0], 1)==0): +        return outputData; +    else: +        raise ValueError(CUDAErrorMessage); +  def SBTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,                        float regularisation_parameter, @@ -321,15 +331,17 @@ def SBTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,  		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')      # Running CUDA code here     -    TV_SB_GPU_main(&inputData[0,0,0], &outputData[0,0,0],  +    if (TV_SB_GPU_main(&inputData[0,0,0], &outputData[0,0,0],                         regularisation_parameter ,                          iterations,                          tolerance_param,                         methodTV,                         printM, -                       dims[2], dims[1], dims[0]); -      -    return outputData  +                       dims[2], dims[1], dims[0])==0): +        return outputData; +    else: +        raise ValueError(CUDAErrorMessage); +  #***************************************************************#  #************************ LLT-ROF model ************************# @@ -349,8 +361,11 @@ def LLT_ROF_GPU2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,  		    np.zeros([dims[0],dims[1]], dtype='float32')      # Running CUDA code here     -    LLT_ROF_GPU_main(&inputData[0,0], &outputData[0,0],regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[1],dims[0],1); -    return outputData +    if (LLT_ROF_GPU_main(&inputData[0,0], &outputData[0,0],regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[1],dims[0],1)==0): +        return outputData; +    else: +        raise ValueError(CUDAErrorMessage); +  def LLT_ROF_GPU3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,                        float regularisation_parameterROF, @@ -367,8 +382,11 @@ def LLT_ROF_GPU3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,  		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')      # Running CUDA code here     -    LLT_ROF_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[2], dims[1], dims[0]); -    return outputData  +    if (LLT_ROF_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[2], dims[1], dims[0])==0): +        return outputData; +    else: +        raise ValueError(CUDAErrorMessage); +  #***************************************************************# @@ -389,13 +407,16 @@ def TGV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,              np.zeros([dims[0],dims[1]], dtype='float32')      #/* Run TGV iterations for 2D data */ -    TGV_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter,  +    if (TGV_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter,                         alpha1,                         alpha0,                         iterationsNumb,                          LipshitzConst, -                       dims[1],dims[0]) -    return outputData +                       dims[1],dims[0])==0): +        return outputData +    else: +        raise ValueError(CUDAErrorMessage); +  #****************************************************************#  #**************Directional Total-variation FGP ******************# @@ -419,7 +440,7 @@ def FGPdTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,  		    np.zeros([dims[0],dims[1]], dtype='float32')      # Running CUDA code here     -    dTV_FGP_GPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0],                         +    if (dTV_FGP_GPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0],                         regularisation_parameter,                          iterations,                          tolerance_param, @@ -427,9 +448,11 @@ def FGPdTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,                         methodTV,                         nonneg,                         printM, -                       dims[1], dims[0], 1);    -      -    return outputData +                       dims[1], dims[0], 1)==0): +        return outputData +    else: +        raise ValueError(CUDAErrorMessage); +  def FGPdTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,                np.ndarray[np.float32_t, ndim=3, mode="c"] refdata,  @@ -450,7 +473,7 @@ def FGPdTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,  		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')      # Running CUDA code here     -    dTV_FGP_GPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0],  +    if (dTV_FGP_GPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0],                         regularisation_parameter ,                          iterations,                          tolerance_param, @@ -458,8 +481,11 @@ def FGPdTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,                         methodTV,                         nonneg,                         printM, -                       dims[2], dims[1], dims[0]); -    return outputData  +                       dims[2], dims[1], dims[0])==0): +        return outputData; +    else: +        raise ValueError(CUDAErrorMessage); +  #****************************************************************#  #***************Nonlinear (Isotropic) Diffusion******************# @@ -483,8 +509,11 @@ def NDF_GPU_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,      # Run Nonlinear Diffusion iterations for 2D data       # Running CUDA code here   -    NonlDiff_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1) -    return outputData +    if (NonlDiff_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1)==0): +        return outputData; +    else: +        raise ValueError(CUDAErrorMessage); +  def NDF_GPU_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,                        float regularisation_parameter, @@ -502,9 +531,11 @@ def NDF_GPU_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,      # Run Nonlinear Diffusion iterations for  3D data       # Running CUDA code here   -    NonlDiff_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0]) +    if (NonlDiff_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0])==0): +        return outputData; +    else: +        raise ValueError(CUDAErrorMessage); -    return outputData  #****************************************************************#  #************Anisotropic Fourth-Order diffusion******************#  #****************************************************************# @@ -522,8 +553,11 @@ def Diff4th_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,      # Run Anisotropic Fourth-Order diffusion for 2D data       # Running CUDA code here   -    Diffus4th_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[1], dims[0], 1) -    return outputData +    if (Diffus4th_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[1], dims[0], 1)==0): +        return outputData +    else: +        raise ValueError(CUDAErrorMessage); +  def Diff4th_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,                        float regularisation_parameter, @@ -540,9 +574,11 @@ def Diff4th_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,      # Run Anisotropic Fourth-Order diffusion for  3D data       # Running CUDA code here   -    Diffus4th_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[2], dims[1], dims[0]) +    if (Diffus4th_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[2], dims[1], dims[0])==0): +        return outputData; +    else: +        raise ValueError(CUDAErrorMessage); -    return outputData  #****************************************************************#  #************Patch-based weights pre-selection******************#  #****************************************************************# @@ -571,6 +607,8 @@ def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,              np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')      # Run patch-based weight selection function -    PatchSelect_GPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], searchwindow, patchwindow,  neighbours,  edge_parameter) -     -    return H_i, H_j, Weights       +    if (PatchSelect_GPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], searchwindow, patchwindow,  neighbours,  edge_parameter)==0): +        return H_i, H_j, Weights; +    else: +        raise ValueError(CUDAErrorMessage); + diff --git a/build/jenkins-build.sh b/build/jenkins-build.sh index 04f8da6..0c397b1 100755 --- a/build/jenkins-build.sh +++ b/build/jenkins-build.sh @@ -1,12 +1,29 @@  #!/usr/bin/env bash  # Script to builds source code in Jenkins environment +module try-load conda -module avail -module load conda -# it expects that git clone is done before this script launch +# install miniconda if the module is not present +if hash conda 2>/dev/null; then +  echo using conda +else +  if [ ! -f Miniconda3-latest-Linux-x86_64.sh ]; then +    wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh +    chmod +x Miniconda3-latest-Linux-x86_64.sh +  fi +  ./Miniconda3-latest-Linux-x86_64.sh -u -b -p . +  PATH=$PATH:./bin +fi + +# presume that git clone is done before this script launch  # git clone https://github.com/vais-ral/CCPi-Regularisation-Toolkit -conda install conda-build +conda install -y conda-build  #export CIL_VERSION=0.10.2 -export CIL_VERSION=0.10.2 -cd CCPi-Regularisation-Toolkit +if [[ -n ${CIL_VERSION} ]] +then +  echo Using defined version: $CIL_VERSION +else +  export CIL_VERSION=0.10.3 +  echo Defining version: $CIL_VERSION +fi +#cd CCPi-Regularisation-Toolkit # already there by jenkins  conda build Wrappers/Python/conda-recipe @@ -3,7 +3,7 @@ echo "Building CCPi-regularisation Toolkit using CMake"  # rm -r build  # Requires Cython, install it first:   # pip install cython -mkdir build +# mkdir build  cd build/  make clean  # install Python modules only without CUDA  | 
