17 files changed, 11 insertions, 2168 deletions
diff --git a/Wrappers/Matlab/mex_compile/compile_mex.m b/Wrappers/Matlab/mex_compile/compile_mex.m
index 1353859..e1debf3 100644
--- a/Wrappers/Matlab/mex_compile/compile_mex.m
+++ b/Wrappers/Matlab/mex_compile/compile_mex.m
@@ -1,11 +1,22 @@
 % compile mex's in Matlab once
+copyfile ../../../Core/regularizers_CPU/ regularizers_CPU/
+copyfile ../../../Core/regularizers_GPU/ regularizers_GPU/
+copyfile ../../../Core/CCPiDefines.h regularizers_CPU/
+
 cd regularizers_CPU/
 
+% compile C regularizers
+
 mex LLT_model.c LLT_model_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 mex FGP_TV.c FGP_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 mex SplitBregman_TV.c SplitBregman_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 mex TGV_PD.c TGV_PD_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 mex PatchBased_Regul.c PatchBased_Regul_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 
+delete LLT_model_core.c LLT_model_core.h FGP_TV_core.c FGP_TV_core.h SplitBregman_TV_core.c SplitBregman_TV_core.h  TGV_PD_core.c  TGV_PD_core.h PatchBased_Regul_core.c PatchBased_Regul_core.h utils.c utils.h CCPiDefines.h
+
+% compile CUDA-based regularizers
+%cd regularizers_GPU/
+
 cd ../../
 cd demos
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.c
deleted file mode 100644
index 03cd445..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "FGP_TV_core.h"
-
-/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
- *
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
- * 3. Number of iterations [OPTIONAL parameter]
- * 4. eplsilon: tolerance constant [OPTIONAL parameter]
- * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
- *
- * Output:
- * [1] Filtered/regularized image
- * [2] last function value 
- *
- * Example of image denoising:
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .05*randn(size(Im)); % adding noise
- * u = FGP_TV(single(u0), 0.05, 100, 1e-04);
- *
- * This function is based on the Matlab's code and paper by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- *
- * D. Kazantsev, 2016-17
- *
- */
-
-/* 2D-case related Functions */
-/*****************************************************************/
-float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY)
-{   
-    int i,j;
-    float f1, f2, val1, val2;
-    
-    /*data-related term */
-    f1 = 0.0f;
-    for(i=0; i<dimX*dimY; i++) f1 += pow(D[i] - A[i],2);    
-    
-    /*TV-related term */
-    f2 = 0.0f;
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* boundary conditions  */
-            if (i == dimX-1) {val1 = 0.0f;} else {val1 = A[(i+1)*dimY + (j)] - A[(i)*dimY + (j)];}
-            if (j == dimY-1) {val2 = 0.0f;} else {val2 = A[(i)*dimY + (j+1)] - A[(i)*dimY + (j)];}    
-            f2 += sqrt(pow(val1,2) + pow(val2,2));
-        }}  
-    
-    /* sum of two terms */
-    funcvalA[0] = 0.5f*f1 + lambda*f2;     
-    return *funcvalA;
-}
-
-float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
-{
-	float val1, val2;
-	int i, j;
-#pragma omp parallel for shared(A,D,R1,R2) private(i,j,val1,val2)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			/* boundary conditions  */
-			if (i == 0) { val1 = 0.0f; }
-			else { val1 = R1[(i - 1)*dimY + (j)]; }
-			if (j == 0) { val2 = 0.0f; }
-			else { val2 = R2[(i)*dimY + (j - 1)]; }
-			D[(i)*dimY + (j)] = A[(i)*dimY + (j)] - lambda*(R1[(i)*dimY + (j)] + R2[(i)*dimY + (j)] - val1 - val2);
-		}
-	}
-	return *D;
-}
-float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
-{
-	float val1, val2, multip;
-	int i, j;
-	multip = (1.0f / (8.0f*lambda));
-#pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(i,j,val1,val2)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			/* boundary conditions */
-			if (i == dimX - 1) val1 = 0.0f; else val1 = D[(i)*dimY + (j)] - D[(i + 1)*dimY + (j)];
-			if (j == dimY - 1) val2 = 0.0f; else val2 = D[(i)*dimY + (j)] - D[(i)*dimY + (j + 1)];
-			P1[(i)*dimY + (j)] = R1[(i)*dimY + (j)] + multip*val1;
-			P2[(i)*dimY + (j)] = R2[(i)*dimY + (j)] + multip*val2;
-		}
-	}
-	return 1;
-}
-float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY)
-{
-	float val1, val2, denom;
-	int i, j;
-	if (methTV == 0) {
-		/* isotropic TV*/
-#pragma omp parallel for shared(P1,P2) private(i,j,denom)
-		for (i = 0; i<dimX; i++) {
-			for (j = 0; j<dimY; j++) {
-				denom = pow(P1[(i)*dimY + (j)], 2) + pow(P2[(i)*dimY + (j)], 2);
-				if (denom > 1) {
-					P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] / sqrt(denom);
-					P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] / sqrt(denom);
-				}
-			}
-		}
-	}
-	else {
-		/* anisotropic TV*/
-#pragma omp parallel for shared(P1,P2) private(i,j,val1,val2)
-		for (i = 0; i<dimX; i++) {
-			for (j = 0; j<dimY; j++) {
-				val1 = fabs(P1[(i)*dimY + (j)]);
-				val2 = fabs(P2[(i)*dimY + (j)]);
-				if (val1 < 1.0f) { val1 = 1.0f; }
-				if (val2 < 1.0f) { val2 = 1.0f; }
-				P1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] / val1;
-				P2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] / val2;
-			}
-		}
-	}
-	return 1;
-}
-float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY)
-{
-	int i, j;
-	float multip;
-	multip = ((tk - 1.0f) / tkp1);
-#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i,j)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			R1[(i)*dimY + (j)] = P1[(i)*dimY + (j)] + multip*(P1[(i)*dimY + (j)] - P1_old[(i)*dimY + (j)]);
-			R2[(i)*dimY + (j)] = P2[(i)*dimY + (j)] + multip*(P2[(i)*dimY + (j)] - P2_old[(i)*dimY + (j)]);
-		}
-	}
-	return 1;
-}
-
-/* 3D-case related Functions */
-/*****************************************************************/
-float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ)
-{   
-    int i,j,k;
-    float f1, f2, val1, val2, val3;
-    
-    /*data-related term */
-    f1 = 0.0f;
-    for(i=0; i<dimX*dimY*dimZ; i++) f1 += pow(D[i] - A[i],2);    
-    
-    /*TV-related term */
-    f2 = 0.0f;
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-            /* boundary conditions  */
-            if (i == dimX-1) {val1 = 0.0f;} else {val1 = A[(dimX*dimY)*k + (i+1)*dimY + (j)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}
-            if (j == dimY-1) {val2 = 0.0f;} else {val2 = A[(dimX*dimY)*k + (i)*dimY + (j+1)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}    
-            if (k == dimZ-1) {val3 = 0.0f;} else {val3 = A[(dimX*dimY)*(k+1) + (i)*dimY + (j)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}    
-            f2 += sqrt(pow(val1,2) + pow(val2,2)  + pow(val3,2));
-        }}}     
-    /* sum of two terms */
-    funcvalA[0] = 0.5f*f1 + lambda*f2;     
-    return *funcvalA;
-}
-
-float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
-{
-	float val1, val2, val3;
-	int i, j, k;
-#pragma omp parallel for shared(A,D,R1,R2,R3) private(i,j,k,val1,val2,val3)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			for (k = 0; k<dimZ; k++) {
-				/* boundary conditions */
-				if (i == 0) { val1 = 0.0f; }
-				else { val1 = R1[(dimX*dimY)*k + (i - 1)*dimY + (j)]; }
-				if (j == 0) { val2 = 0.0f; }
-				else { val2 = R2[(dimX*dimY)*k + (i)*dimY + (j - 1)]; }
-				if (k == 0) { val3 = 0.0f; }
-				else { val3 = R3[(dimX*dimY)*(k - 1) + (i)*dimY + (j)]; }
-				D[(dimX*dimY)*k + (i)*dimY + (j)] = A[(dimX*dimY)*k + (i)*dimY + (j)] - lambda*(R1[(dimX*dimY)*k + (i)*dimY + (j)] + R2[(dimX*dimY)*k + (i)*dimY + (j)] + R3[(dimX*dimY)*k + (i)*dimY + (j)] - val1 - val2 - val3);
-			}
-		}
-	}
-	return *D;
-}
-float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
-{
-	float val1, val2, val3, multip;
-	int i, j, k;
-	multip = (1.0f / (8.0f*lambda));
-#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(i,j,k,val1,val2,val3)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			for (k = 0; k<dimZ; k++) {
-				/* boundary conditions */
-				if (i == dimX - 1) val1 = 0.0f; else val1 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*k + (i + 1)*dimY + (j)];
-				if (j == dimY - 1) val2 = 0.0f; else val2 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*k + (i)*dimY + (j + 1)];
-				if (k == dimZ - 1) val3 = 0.0f; else val3 = D[(dimX*dimY)*k + (i)*dimY + (j)] - D[(dimX*dimY)*(k + 1) + (i)*dimY + (j)];
-				P1[(dimX*dimY)*k + (i)*dimY + (j)] = R1[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val1;
-				P2[(dimX*dimY)*k + (i)*dimY + (j)] = R2[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val2;
-				P3[(dimX*dimY)*k + (i)*dimY + (j)] = R3[(dimX*dimY)*k + (i)*dimY + (j)] + multip*val3;
-			}
-		}
-	}
-	return 1;
-}
-float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ)
-{
-	float val1, val2, val3;
-	int i, j, k;
-#pragma omp parallel for shared(P1,P2,P3) private(i,j,k,val1,val2,val3)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			for (k = 0; k<dimZ; k++) {
-				val1 = fabs(P1[(dimX*dimY)*k + (i)*dimY + (j)]);
-				val2 = fabs(P2[(dimX*dimY)*k + (i)*dimY + (j)]);
-				val3 = fabs(P3[(dimX*dimY)*k + (i)*dimY + (j)]);
-				if (val1 < 1.0f) { val1 = 1.0f; }
-				if (val2 < 1.0f) { val2 = 1.0f; }
-				if (val3 < 1.0f) { val3 = 1.0f; }
-
-				P1[(dimX*dimY)*k + (i)*dimY + (j)] = P1[(dimX*dimY)*k + (i)*dimY + (j)] / val1;
-				P2[(dimX*dimY)*k + (i)*dimY + (j)] = P2[(dimX*dimY)*k + (i)*dimY + (j)] / val2;
-				P3[(dimX*dimY)*k + (i)*dimY + (j)] = P3[(dimX*dimY)*k + (i)*dimY + (j)] / val3;
-			}
-		}
-	}
-	return 1;
-}
-float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ)
-{
-	int i, j, k;
-	float multip;
-	multip = ((tk - 1.0f) / tkp1);
-#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i,j,k)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			for (k = 0; k<dimZ; k++) {
-				R1[(dimX*dimY)*k + (i)*dimY + (j)] = P1[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P1[(dimX*dimY)*k + (i)*dimY + (j)] - P1_old[(dimX*dimY)*k + (i)*dimY + (j)]);
-				R2[(dimX*dimY)*k + (i)*dimY + (j)] = P2[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P2[(dimX*dimY)*k + (i)*dimY + (j)] - P2_old[(dimX*dimY)*k + (i)*dimY + (j)]);
-				R3[(dimX*dimY)*k + (i)*dimY + (j)] = P3[(dimX*dimY)*k + (i)*dimY + (j)] + multip*(P3[(dimX*dimY)*k + (i)*dimY + (j)] - P3_old[(dimX*dimY)*k + (i)*dimY + (j)]);
-			}
-		}
-	}
-	return 1;
-}
-
-
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.h b/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.h
deleted file mode 100644
index 6430bf2..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_CPU/FGP_TV_core.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-//#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-
-/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
-*
-* Input Parameters:
-* 1. Noisy image/volume [REQUIRED]
-* 2. lambda - regularization parameter [REQUIRED]
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon: tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-*
-* Output:
-* [1] Filtered/regularized image
-* [2] last function value
-*
-* Example of image denoising:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .05*randn(size(Im)); % adding noise
-* u = FGP_TV(single(u0), 0.05, 100, 1e-04);
-*
-* to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-* This function is based on the Matlab's code and paper by
-* [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
-*
-* D. Kazantsev, 2016-17
-*
-*/
-#ifdef __cplusplus
-extern "C" {
-#endif
-//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
-float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
-float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
-float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY);
-float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY);
-float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY);
-
-float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
-float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
-float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ);
-float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ);
-float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ);
-#ifdef __cplusplus
-}
-#endif
-\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.c
deleted file mode 100644
index 3a853d2..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.c
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "LLT_model_core.h"
-
-/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty
-*
-* Input Parameters:
-* 1. U0 - origanal noise image/volume
-* 2. lambda - regularization parameter
-* 3. tau - time-step  for explicit scheme
-* 4. iter - iterations number
-* 5. epsil  - tolerance constant (to terminate earlier)
-* 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test)
-*
-* Output:
-* Filtered/regularized image
-*
-* Example:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .03*randn(size(Im)); % adding noise
-* [Den] = LLT_model(single(u0), 10, 0.1, 1);
-*
-* References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE
-*
-* 28.11.16/Harwell
-*/
-
-
-float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ)
-{
-	int i, j, i_p, i_m, j_m, j_p;
-	float dxx, dyy, denom_xx, denom_yy;
-#pragma omp parallel for shared(U,D1,D2) private(i, j, i_p, i_m, j_m, j_p, denom_xx, denom_yy, dxx, dyy)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			/* symmetric boundary conditions (Neuman) */
-			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-			i_m = i - 1; if (i_m < 0) i_m = i + 1;
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-
-			dxx = U[i_p*dimY + j] - 2.0f*U[i*dimY + j] + U[i_m*dimY + j];
-			dyy = U[i*dimY + j_p] - 2.0f*U[i*dimY + j] + U[i*dimY + j_m];
-
-			denom_xx = fabs(dxx) + EPS;
-			denom_yy = fabs(dyy) + EPS;
-
-			D1[i*dimY + j] = dxx / denom_xx;
-			D2[i*dimY + j] = dyy / denom_yy;
-		}
-	}
-	return 1;
-}
-float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau)
-{
-	int i, j, i_p, i_m, j_m, j_p;
-	float div, dxx, dyy;
-#pragma omp parallel for shared(U,U0,D1,D2) private(i, j, i_p, i_m, j_m, j_p, div, dxx, dyy)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			/* symmetric boundary conditions (Neuman) */
-			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-			i_m = i - 1; if (i_m < 0) i_m = i + 1;
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-
-			dxx = D1[i_p*dimY + j] - 2.0f*D1[i*dimY + j] + D1[i_m*dimY + j];
-			dyy = D2[i*dimY + j_p] - 2.0f*D2[i*dimY + j] + D2[i*dimY + j_m];
-
-			div = dxx + dyy;
-
-			U[i*dimY + j] = U[i*dimY + j] - tau*div - tau*lambda*(U[i*dimY + j] - U0[i*dimY + j]);
-		}
-	}
-	return *U0;
-}
-
-float der3D(float *U, float *D1, float *D2, float *D3, int dimX, int dimY, int dimZ)
-{
-	int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m;
-	float dxx, dyy, dzz, denom_xx, denom_yy, denom_zz;
-#pragma omp parallel for shared(U,D1,D2,D3) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, dxx, dyy, dzz)
-	for (i = 0; i<dimX; i++) {
-		/* symmetric boundary conditions (Neuman) */
-		i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-		i_m = i - 1; if (i_m < 0) i_m = i + 1;
-		for (j = 0; j<dimY; j++) {
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-			for (k = 0; k<dimZ; k++) {
-				k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
-				k_m = k - 1; if (k_m < 0) k_m = k + 1;
-
-				dxx = U[dimX*dimY*k + i_p*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i_m*dimY + j];
-				dyy = U[dimX*dimY*k + i*dimY + j_p] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i*dimY + j_m];
-				dzz = U[dimX*dimY*k_p + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m + i*dimY + j];
-
-				denom_xx = fabs(dxx) + EPS;
-				denom_yy = fabs(dyy) + EPS;
-				denom_zz = fabs(dzz) + EPS;
-
-				D1[dimX*dimY*k + i*dimY + j] = dxx / denom_xx;
-				D2[dimX*dimY*k + i*dimY + j] = dyy / denom_yy;
-				D3[dimX*dimY*k + i*dimY + j] = dzz / denom_zz;
-
-			}
-		}
-	}
-	return 1;
-}
-
-float div_upd3D(float *U0, float *U, float *D1, float *D2, float *D3, unsigned short *Map, int switcher, int dimX, int dimY, int dimZ, float lambda, float tau)
-{
-	int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m;
-	float div, dxx, dyy, dzz;
-#pragma omp parallel for shared(U,U0,D1,D2,D3) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, div, dxx, dyy, dzz)
-	for (i = 0; i<dimX; i++) {
-		/* symmetric boundary conditions (Neuman) */
-		i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-		i_m = i - 1; if (i_m < 0) i_m = i + 1;
-		for (j = 0; j<dimY; j++) {
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-			for (k = 0; k<dimZ; k++) {
-				k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
-				k_m = k - 1; if (k_m < 0) k_m = k + 1;
-				//                 k_p1 = k + 2; if (k_p1 >= dimZ) k_p1 = k - 2;
-				//                 k_m1 = k - 2; if (k_m1 < 0) k_m1 = k + 2;   
-
-				dxx = D1[dimX*dimY*k + i_p*dimY + j] - 2.0f*D1[dimX*dimY*k + i*dimY + j] + D1[dimX*dimY*k + i_m*dimY + j];
-				dyy = D2[dimX*dimY*k + i*dimY + j_p] - 2.0f*D2[dimX*dimY*k + i*dimY + j] + D2[dimX*dimY*k + i*dimY + j_m];
-				dzz = D3[dimX*dimY*k_p + i*dimY + j] - 2.0f*D3[dimX*dimY*k + i*dimY + j] + D3[dimX*dimY*k_m + i*dimY + j];
-
-				if ((switcher == 1) && (Map[dimX*dimY*k + i*dimY + j] == 0)) dzz = 0;
-				div = dxx + dyy + dzz;
-
-				//                 if (switcher == 1) {                    
-				// if (Map2[dimX*dimY*k + i*dimY + j] == 0) dzz2 = 0;
-				//else dzz2 = D4[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*D4[dimX*dimY*k + i*dimY + j] + D4[dimX*dimY*k_m1 + i*dimY + j];
-				//                     div = dzz + dzz2;
-				//                 }
-
-				//                 dzz = D3[dimX*dimY*k_p + i*dimY + j] - 2.0f*D3[dimX*dimY*k + i*dimY + j] + D3[dimX*dimY*k_m + i*dimY + j];
-				//                 dzz2 = D4[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*D4[dimX*dimY*k + i*dimY + j] + D4[dimX*dimY*k_m1 + i*dimY + j];  
-				//                 div = dzz + dzz2;
-
-				U[dimX*dimY*k + i*dimY + j] = U[dimX*dimY*k + i*dimY + j] - tau*div - tau*lambda*(U[dimX*dimY*k + i*dimY + j] - U0[dimX*dimY*k + i*dimY + j]);
-			}
-		}
-	}
-	return *U0;
-}
-
-// float der3D_2(float *U, float *D1, float *D2, float *D3, float *D4, int dimX, int dimY, int dimZ)
-// {
-//     int i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, k_p1, k_m1;
-//     float dxx, dyy, dzz, dzz2, denom_xx, denom_yy, denom_zz, denom_zz2;
-// #pragma omp parallel for shared(U,D1,D2,D3,D4) private(i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, denom_zz2, dxx, dyy, dzz, dzz2, k_p1, k_m1)
-//     for(i=0; i<dimX; i++) {
-//         /* symmetric boundary conditions (Neuman) */
-//         i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-//         i_m = i - 1; if (i_m < 0) i_m = i + 1;
-//         for(j=0; j<dimY; j++) {
-//             j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-//             j_m = j - 1; if (j_m < 0) j_m = j + 1;
-//             for(k=0; k<dimZ; k++) {
-//                 k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
-//                 k_m = k - 1; if (k_m < 0) k_m = k + 1;
-//                 k_p1 = k + 2; if (k_p1 >= dimZ) k_p1 = k - 2;
-//                 k_m1 = k - 2; if (k_m1 < 0) k_m1 = k + 2;                
-//                 
-//                 dxx = U[dimX*dimY*k + i_p*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i_m*dimY + j];
-//                 dyy = U[dimX*dimY*k + i*dimY + j_p] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k + i*dimY + j_m];
-//                 dzz = U[dimX*dimY*k_p + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m + i*dimY + j];                
-//                 dzz2 = U[dimX*dimY*k_p1 + i*dimY + j] - 2.0f*U[dimX*dimY*k + i*dimY + j] + U[dimX*dimY*k_m1 + i*dimY + j];                
-//                 
-//                 denom_xx = fabs(dxx) + EPS;
-//                 denom_yy = fabs(dyy) + EPS;
-//                 denom_zz = fabs(dzz) + EPS;
-//                 denom_zz2 = fabs(dzz2) + EPS;
-//                 
-//                 D1[dimX*dimY*k + i*dimY + j] = dxx/denom_xx;
-//                 D2[dimX*dimY*k + i*dimY + j] = dyy/denom_yy;
-//                 D3[dimX*dimY*k + i*dimY + j] = dzz/denom_zz;               
-//                 D4[dimX*dimY*k + i*dimY + j] = dzz2/denom_zz2;                               
-//             }}}
-//     return 1;
-// }
-
-float calcMap(float *U, unsigned short *Map, int dimX, int dimY, int dimZ)
-{
-	int i, j, k, i1, j1, i2, j2, windowSize;
-	float val1, val2, thresh_val, maxval;
-	windowSize = 1;
-	thresh_val = 0.0001; /*thresh_val = 0.0035;*/
-
-						 /* normalize volume first */
-	maxval = 0.0f;
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			for (k = 0; k<dimZ; k++) {
-				if (U[dimX*dimY*k + i*dimY + j] > maxval) maxval = U[dimX*dimY*k + i*dimY + j];
-			}
-		}
-	}
-
-	if (maxval != 0.0f) {
-		for (i = 0; i<dimX; i++) {
-			for (j = 0; j<dimY; j++) {
-				for (k = 0; k<dimZ; k++) {
-					U[dimX*dimY*k + i*dimY + j] = U[dimX*dimY*k + i*dimY + j] / maxval;
-				}
-			}
-		}
-	}
-	else {
-		printf("%s \n", "Maximum value is zero!");
-		return 0;
-	}
-
-#pragma omp parallel for shared(U,Map) private(i, j, k, i1, j1, i2, j2, val1, val2)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			for (k = 0; k<dimZ; k++) {
-
-				Map[dimX*dimY*k + i*dimY + j] = 0;
-				//                 Map2[dimX*dimY*k + i*dimY + j] = 0; 
-
-				val1 = 0.0f; val2 = 0.0f;
-				for (i1 = -windowSize; i1 <= windowSize; i1++) {
-					for (j1 = -windowSize; j1 <= windowSize; j1++) {
-						i2 = i + i1;
-						j2 = j + j1;
-
-						if ((i2 >= 0) && (i2 < dimX) && (j2 >= 0) && (j2 < dimY)) {
-							if (k == 0) {
-								val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k + 1) + i2*dimY + j2], 2);
-								//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);                                                  
-							}
-							else if (k == dimZ - 1) {
-								val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k - 1) + i2*dimY + j2], 2);
-								//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2);                           
-							}
-							//                       else if (k == 1) {
-							//                           val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); 
-							//                           val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2);  
-							//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);                           
-							//                       }
-							//                       else if (k == dimZ-2) {
-							//                           val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-1) + i2*dimY + j2],2); 
-							//                           val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+1) + i2*dimY + j2],2);      
-							//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2);                           
-							//                       }                      
-							else {
-								val1 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k - 1) + i2*dimY + j2], 2);
-								val2 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k + 1) + i2*dimY + j2], 2);
-								//                           val3 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k-2) + i2*dimY + j2],2); 
-								//                           val4 += pow(U[dimX*dimY*k + i2*dimY + j2] - U[dimX*dimY*(k+2) + i2*dimY + j2],2);  
-							}
-						}
-					}
-				}
-
-				val1 = 0.111f*val1; val2 = 0.111f*val2;
-				//                  val3 = 0.111f*val3; val4 = 0.111f*val4;
-				if ((val1 <= thresh_val) && (val2 <= thresh_val)) Map[dimX*dimY*k + i*dimY + j] = 1;
-				//                  if ((val3 <= thresh_val) && (val4 <= thresh_val)) Map2[dimX*dimY*k + i*dimY + j] = 1;                        
-			}
-		}
-	}
-	return 1;
-}
-
-float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ)
-{
-	int i, j, k, i1, j1, i2, j2, counter;
-#pragma omp parallel for shared(Map) private(i, j, k, i1, j1, i2, j2, counter)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			for (k = 0; k<dimZ; k++) {
-
-				counter = 0;
-				for (i1 = -3; i1 <= 3; i1++) {
-					for (j1 = -3; j1 <= 3; j1++) {
-						i2 = i + i1;
-						j2 = j + j1;
-						if ((i2 >= 0) && (i2 < dimX) && (j2 >= 0) && (j2 < dimY)) {
-							if (Map[dimX*dimY*k + i2*dimY + j2] == 0) counter++;
-						}
-					}
-				}
-				if (counter < 24) Map[dimX*dimY*k + i*dimY + j] = 1;
-			}
-		}
-	}
-	return *Map;
-}
-
-
-/*********************3D *********************/
-\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.h b/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.h
deleted file mode 100644
index 13fce5a..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_CPU/LLT_model_core.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-//#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-
-#define EPS 0.01
-
-/* 2D functions */
-#ifdef __cplusplus
-extern "C" {
-#endif
-float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ);
-float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau);
-
-float der3D(float *U, float *D1, float *D2, float *D3, int dimX, int dimY, int dimZ);
-float div_upd3D(float *U0, float *U, float *D1, float *D2, float *D3, unsigned short *Map, int switcher, int dimX, int dimY, int dimZ, float lambda, float tau);
-
-float calcMap(float *U, unsigned short *Map, int dimX, int dimY, int dimZ);
-float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ);
-
-//float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
-#ifdef __cplusplus
-}
-#endif
-\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.c
deleted file mode 100644
index acfb464..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazanteev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "PatchBased_Regul_core.h"
-
-/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases). 
- * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
- * 
- * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
- *             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
- *
- * Input Parameters:
- * 1. Image (2D or 3D) [required]
- * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) [optional]
- * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) [optional]
- * 4. h - parameter for the PB penalty function [optional]
- * 5. lambda - regularization parameter  [optional]
-
- * Output:
- * 1. regularized (denoised) Image (N x N)/volume (N x N x N)
- *
- * 2D denoising example in Matlab:   
-   Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-   u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
-   ImDen = PatchBased_Regul(single(u0), 3, 1, 0.08, 0.05); 
- 
- * D. Kazantsev *
- * 02/07/2014
- * Harwell, UK
- */
-
-/*2D version function */
-float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda)
-{
-    int i, j, i_n, j_n, i_m, j_m, i_p, j_p, i_l, j_l, i1, j1, i2, j2, i3, j3, i5,j5, count, SimilW_full;
-    float *Eucl_Vec, h2, denh2, normsum, Weight, Weight_norm, value, denom, WeightGlob, t1;
-                 
-    /*SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
-    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
-    h2 = h*h;
-    denh2 = 1/(2*h2);   
-    
-     /*Gaussian kernel */
-    Eucl_Vec = (float*) calloc (SimilW_full*SimilW_full,sizeof(float));
-    count = 0;
-    for(i_n=-SimilW; i_n<=SimilW; i_n++) {
-        for(j_n=-SimilW; j_n<=SimilW; j_n++) {
-            t1 = pow(((float)i_n), 2) + pow(((float)j_n), 2);
-            Eucl_Vec[count] = exp(-(t1)/(2*SimilW*SimilW));
-            count = count + 1;                       
-        }} /*main neighb loop */   
-    
-    /*The NLM code starts here*/         
-    /* setting OMP here */
-    #pragma omp parallel for shared (A, B, dimX, dimY, Eucl_Vec, lambda, denh2) private(denom, i, j, WeightGlob, count,  i1, j1, i2, j2, i3, j3, i5, j5, Weight_norm, normsum, i_m, j_m, i_n, j_n, i_l, j_l, i_p, j_p, Weight,  value)
-    
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-             if (((i >= padXY) && (i < dimX-padXY)) &&  ((j >= padXY) && (j < dimY-padXY))) {
-          
-                /* Massive Search window loop */
-                Weight_norm = 0; value = 0.0;
-                for(i_m=-SearchW; i_m<=SearchW; i_m++) {
-                    for(j_m=-SearchW; j_m<=SearchW; j_m++) {
-                       /*checking boundaries*/
-                        i1 = i+i_m; j1 = j+j_m;
-                        
-                        WeightGlob = 0.0;
-                        /* if inside the searching window */                        
-                         for(i_l=-SimilW; i_l<=SimilW; i_l++) {
-                             for(j_l=-SimilW; j_l<=SimilW; j_l++) {
-                                 i2 = i1+i_l; j2 = j1+j_l;
-                                 
-                                 i3 = i+i_l; j3 = j+j_l;   /*coordinates of the inner patch loop */
-                                
-                                 count = 0; normsum = 0.0;
-                                 for(i_p=-SimilW; i_p<=SimilW; i_p++) {
-                                     for(j_p=-SimilW; j_p<=SimilW; j_p++) {
-                                         i5 = i2 + i_p; j5 = j2 + j_p;
-                                         normsum = normsum + Eucl_Vec[count]*pow(A[(i3+i_p)*dimY+(j3+j_p)]-A[i5*dimY+j5], 2);        
-                                         count = count + 1;
-                                     }}
-                                  if (normsum != 0) Weight = (exp(-normsum*denh2)); 
-                                  else Weight = 0.0;
-                                 WeightGlob += Weight;
-                             }}                      
-      
-                         value += A[i1*dimY+j1]*WeightGlob;
-                         Weight_norm += WeightGlob;           
-                    }}      /*search window loop end*/
-                
-                /* the final loop to average all values in searching window with weights */
-                denom = 1 + lambda*Weight_norm;
-                B[i*dimY+j] = (A[i*dimY+j] + lambda*value)/denom;         
-             }
-        }}   /*main loop*/     
-    return (*B);
-    free(Eucl_Vec);    
-}
- 
-/*3D version*/ 
- float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda)       
- {
-    int SimilW_full, count, i, j, k,  i_n, j_n, k_n, i_m, j_m, k_m, i_p, j_p, k_p, i_l, j_l, k_l, i1, j1, k1, i2, j2, k2, i3, j3, k3, i5, j5, k5;
-    float *Eucl_Vec, h2, denh2, normsum, Weight, Weight_norm, value, denom, WeightGlob;
-        
-    /*SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
-    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
-    h2 = h*h;
-    denh2 = 1/(2*h2);
-    
-    /*Gaussian kernel */
-    Eucl_Vec = (float*) calloc (SimilW_full*SimilW_full*SimilW_full,sizeof(float));
-    count = 0;
-    for(i_n=-SimilW; i_n<=SimilW; i_n++) {
-        for(j_n=-SimilW; j_n<=SimilW; j_n++) {
-            for(k_n=-SimilW; k_n<=SimilW; k_n++) {
-                Eucl_Vec[count] = exp(-(pow((float)i_n, 2) + pow((float)j_n, 2) + pow((float)k_n, 2))/(2*SimilW*SimilW*SimilW));
-                count = count + 1;
-            }}} /*main neighb loop */
-    
-    /*The NLM code starts here*/         
-    /* setting OMP here */
-    #pragma omp parallel for shared (A, B, dimX, dimY, dimZ, Eucl_Vec, lambda, denh2) private(denom, i, j, k, WeightGlob,count,  i1, j1, k1, i2, j2, k2, i3, j3, k3, i5, j5, k5, Weight_norm, normsum, i_m, j_m, k_m, i_n, j_n, k_n, i_l, j_l, k_l, i_p, j_p, k_p, Weight, value)    
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-            if (((i >= padXY) && (i < dimX-padXY)) &&  ((j >= padXY) && (j < dimY-padXY)) &&  ((k >= padXY) && (k < dimZ-padXY))) {
-            /* take all elements around the pixel of interest */                             
-               /* Massive Search window loop */
-                Weight_norm = 0;  value = 0.0;
-                for(i_m=-SearchW; i_m<=SearchW; i_m++) {
-                    for(j_m=-SearchW; j_m<=SearchW; j_m++) {
-                        for(k_m=-SearchW; k_m<=SearchW; k_m++) {
-                         /*checking boundaries*/
-                        i1 = i+i_m; j1 = j+j_m; k1 = k+k_m;
-                        
-                        WeightGlob = 0.0;
-                        /* if inside the searching window */                        
-                         for(i_l=-SimilW; i_l<=SimilW; i_l++) {
-                             for(j_l=-SimilW; j_l<=SimilW; j_l++) {
-                                 for(k_l=-SimilW; k_l<=SimilW; k_l++) {                                 
-                                 i2 = i1+i_l; j2 = j1+j_l; k2 = k1+k_l;
-                                 
-                                 i3 = i+i_l; j3 = j+j_l; k3 = k+k_l;   /*coordinates of the inner patch loop */
-                                
-                                 count = 0; normsum = 0.0;
-                                 for(i_p=-SimilW; i_p<=SimilW; i_p++) {
-                                     for(j_p=-SimilW; j_p<=SimilW; j_p++) {
-                                         for(k_p=-SimilW; k_p<=SimilW; k_p++) {
-                                         i5 = i2 + i_p; j5 = j2 + j_p; k5 = k2 + k_p;
-                                         normsum = normsum + Eucl_Vec[count]*pow(A[(dimX*dimY)*(k3+k_p)+(i3+i_p)*dimY+(j3+j_p)]-A[(dimX*dimY)*k5 + i5*dimY+j5], 2);        
-                                         count = count + 1;
-                                     }}}
-                                  if (normsum != 0) Weight = (exp(-normsum*denh2)); 
-                                  else Weight = 0.0;
-                                 WeightGlob += Weight;
-                             }}}                                                 
-                         value += A[(dimX*dimY)*k1 + i1*dimY+j1]*WeightGlob;
-                         Weight_norm += WeightGlob;
-             
-                    }}}      /*search window loop end*/
-                
-                /* the final loop to average all values in searching window with weights */
-                denom = 1 + lambda*Weight_norm;               
-                B[(dimX*dimY)*k + i*dimY+j] = (A[(dimX*dimY)*k + i*dimY+j] + lambda*value)/denom;      
-            }            
-        }}}   /*main loop*/              
-       free(Eucl_Vec);        
-       return *B;
-}
-
-float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop)
-{
-    /* padding-cropping function */
-    int i,j,k;    
-    if (NewSizeZ > 1) {    
-           for (i=0; i < NewSizeX; i++) {
-            for (j=0; j < NewSizeY; j++) {
-              for (k=0; k < NewSizeZ; k++) {
-                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY)) &&  ((k >= padXY) && (k < NewSizeZ-padXY))) {
-                    if (switchpad_crop == 0)  Ap[NewSizeX*NewSizeY*k + i*NewSizeY+j] = A[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)];
-                    else  Ap[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)] = A[NewSizeX*NewSizeY*k + i*NewSizeY+j];
-                }
-            }}}   
-    }
-    else {
-        for (i=0; i < NewSizeX; i++) {
-            for (j=0; j < NewSizeY; j++) {
-                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY))) {
-                    if (switchpad_crop == 0)  Ap[i*NewSizeY+j] = A[(i-padXY)*(OldSizeY)+(j-padXY)];
-                    else  Ap[(i-padXY)*(OldSizeY)+(j-padXY)] = A[i*NewSizeY+j];
-                }
-            }}
-    }
-    return *Ap;
-}
-\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.h b/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.h
deleted file mode 100644
index d4a8a46..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_CPU/PatchBased_Regul_core.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazanteev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#define _USE_MATH_DEFINES
-
-//#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-
-/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases).
-* This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
-*
-* References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
-*             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
-*
-* Input Parameters (mandatory):
-* 1. Image (2D or 3D)
-* 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)
-* 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)
-* 4. h - parameter for the PB penalty function
-* 5. lambda - regularization parameter
-
-* Output:
-* 1. regularized (denoised) Image (N x N)/volume (N x N x N)
-*
-* Quick 2D denoising example in Matlab:
-Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
-ImDen = PB_Regul_CPU(single(u0), 3, 1, 0.08, 0.05);
-*
-* Please see more tests in a file:
-TestTemporalSmoothing.m
-
-*
-* Matlab + C/mex compilers needed
-* to compile with OMP support: mex PB_Regul_CPU.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp"
-*
-* D. Kazantsev *
-* 02/07/2014
-* Harwell, UK
-*/
-#ifdef __cplusplus
-extern "C" {
-#endif
-float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop);
-float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda);
-float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda);
-#ifdef __cplusplus
-}
-#endif
-\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.c
deleted file mode 100644
index 4109a4b..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "SplitBregman_TV_core.h"
-
-/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D)
-*
-* Input Parameters:
-* 1. Noisy image/volume
-* 2. lambda - regularization parameter
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon - tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-*
-* Output:
-* Filtered/regularized image
-*
-* Example:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
-* u = SplitBregman_TV(single(u0), 10, 30, 1e-04);
-*
-* References:
-* The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher.
-* D. Kazantsev, 2016*
-*/
-
-
-/* 2D-case related Functions */
-/*****************************************************************/
-float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu)
-{
-    float sum, normConst;
-    int i,j,i1,i2,j1,j2;
-    normConst = 1.0f/(mu + 4.0f*lambda);
-    
-#pragma omp parallel for shared(U) private(i,j,i1,i2,j1,j2,sum)
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-            
-            sum = Dx[(i2)*dimY + (j)] - Dx[(i)*dimY + (j)] + Dy[(i)*dimY + (j2)] - Dy[(i)*dimY + (j)] - Bx[(i2)*dimY + (j)] + Bx[(i)*dimY + (j)] - By[(i)*dimY + (j2)] + By[(i)*dimY + (j)];
-            sum += (U[(i1)*dimY + (j)] + U[(i2)*dimY + (j)] + U[(i)*dimY + (j1)] + U[(i)*dimY + (j2)]);
-            sum *= lambda;
-            sum += mu*A[(i)*dimY + (j)];
-            U[(i)*dimY + (j)] = normConst*sum;
-        }}
-    return *U;
-}
-
-float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda)
-{
-    int i,j,i1,j1;
-    float val1, val11, val2, val22, denom_lam;
-    denom_lam = 1.0f/lambda;
-#pragma omp parallel for shared(U,denom_lam) private(i,j,i1,j1,val1,val11,val2,val22)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            
-            val1 = (U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) + Bx[(i)*dimY + (j)];
-            val2 = (U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) + By[(i)*dimY + (j)];
-            
-            val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
-            val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
-            
-            if (val1 !=0) Dx[(i)*dimY + (j)] = (val1/fabs(val1))*val11; else Dx[(i)*dimY + (j)] = 0;
-            if (val2 !=0) Dy[(i)*dimY + (j)] = (val2/fabs(val2))*val22; else Dy[(i)*dimY + (j)] = 0;
-            
-        }}
-    return 1;
-}
-float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda)
-{
-    int i,j,i1,j1;
-    float val1, val11, val2, denom, denom_lam;
-    denom_lam = 1.0f/lambda;
-    
-#pragma omp parallel for shared(U,denom_lam) private(i,j,i1,j1,val1,val11,val2,denom)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            
-            val1 = (U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) + Bx[(i)*dimY + (j)];
-            val2 = (U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) + By[(i)*dimY + (j)];
-            
-            denom = sqrt(val1*val1 + val2*val2);
-            
-            val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
-            
-            if (denom != 0.0f) {
-                Dx[(i)*dimY + (j)] = val11*(val1/denom);
-                Dy[(i)*dimY + (j)] = val11*(val2/denom);
-            }
-            else {
-                Dx[(i)*dimY + (j)] = 0;
-                Dy[(i)*dimY + (j)] = 0;
-            }
-        }}
-    return 1;
-}
-float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY)
-{
-    int i,j,i1,j1;
-#pragma omp parallel for shared(U) private(i,j,i1,j1)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            
-            Bx[(i)*dimY + (j)] = Bx[(i)*dimY + (j)] + ((U[(i1)*dimY + (j)] - U[(i)*dimY + (j)]) - Dx[(i)*dimY + (j)]);
-            By[(i)*dimY + (j)] = By[(i)*dimY + (j)] + ((U[(i)*dimY + (j1)] - U[(i)*dimY + (j)]) - Dy[(i)*dimY + (j)]);
-        }}
-    return 1;
-}
-
-
-/* 3D-case related Functions */
-/*****************************************************************/
-float gauss_seidel3D(float *U, float *A, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu)
-{
-    float normConst, d_val, b_val, sum;
-    int i,j,i1,i2,j1,j2,k,k1,k2;
-    normConst = 1.0f/(mu + 6.0f*lambda);
-#pragma omp parallel for shared(U) private(i,j,i1,i2,j1,j2,k,k1,k2,d_val,b_val,sum)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                i2 = i-1; if (i2 < 0) i2 = i+1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                j2 = j-1; if (j2 < 0) j2 = j+1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                k2 = k-1; if (k2 < 0) k2 = k+1;
-                
-                d_val = Dx[(dimX*dimY)*k + (i2)*dimY + (j)] - Dx[(dimX*dimY)*k + (i)*dimY + (j)] + Dy[(dimX*dimY)*k + (i)*dimY + (j2)] - Dy[(dimX*dimY)*k + (i)*dimY + (j)] + Dz[(dimX*dimY)*k2 + (i)*dimY + (j)] - Dz[(dimX*dimY)*k + (i)*dimY + (j)];
-                b_val = -Bx[(dimX*dimY)*k + (i2)*dimY + (j)] + Bx[(dimX*dimY)*k + (i)*dimY + (j)] - By[(dimX*dimY)*k + (i)*dimY + (j2)] + By[(dimX*dimY)*k + (i)*dimY + (j)] - Bz[(dimX*dimY)*k2 + (i)*dimY + (j)] + Bz[(dimX*dimY)*k + (i)*dimY + (j)];
-                sum =  d_val + b_val;
-                sum += U[(dimX*dimY)*k + (i1)*dimY + (j)] + U[(dimX*dimY)*k + (i2)*dimY + (j)] + U[(dimX*dimY)*k + (i)*dimY + (j1)] + U[(dimX*dimY)*k + (i)*dimY + (j2)] + U[(dimX*dimY)*k1 + (i)*dimY + (j)] + U[(dimX*dimY)*k2 + (i)*dimY + (j)];
-                sum *= lambda;
-                sum += mu*A[(dimX*dimY)*k + (i)*dimY + (j)];
-                U[(dimX*dimY)*k + (i)*dimY + (j)] = normConst*sum;
-            }}}
-    return *U;
-}
-
-float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda)
-{
-    int i,j,i1,j1,k,k1,index;
-    float val1, val11, val2, val22, val3, val33, denom_lam;
-    denom_lam = 1.0f/lambda;
-#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,k,k1,val1,val11,val2,val22,val3,val33)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                index = (dimX*dimY)*k + (i)*dimY + (j);
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                
-                val1 = (U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[index]) + Bx[index];
-                val2 = (U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[index]) + By[index];
-                val3 = (U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[index]) + Bz[index];
-                
-                val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
-                val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
-                val33 = fabs(val3) - denom_lam; if (val33 < 0) val33 = 0;
-                
-                if (val1 !=0) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0;
-                if (val2 !=0) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0;
-                if (val3 !=0) Dz[index] = (val3/fabs(val3))*val33; else Dz[index] = 0;
-                
-            }}}
-    return 1;
-}
-float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda)
-{
-    int i,j,i1,j1,k,k1,index;
-    float val1, val11, val2, val3, denom, denom_lam;
-    denom_lam = 1.0f/lambda;
-#pragma omp parallel for shared(U,denom_lam) private(index,denom,i,j,i1,j1,k,k1,val1,val11,val2,val3)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                index = (dimX*dimY)*k + (i)*dimY + (j);
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                
-                val1 = (U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[index]) + Bx[index];
-                val2 = (U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[index]) + By[index];
-                val3 = (U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[index]) + Bz[index];
-                
-                denom = sqrt(val1*val1 + val2*val2 + val3*val3);
-                
-                val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
-                
-                if (denom != 0.0f) {
-                    Dx[index] = val11*(val1/denom);
-                    Dy[index] = val11*(val2/denom);
-                    Dz[index] = val11*(val3/denom);
-                }
-                else {
-                    Dx[index] = 0;
-                    Dy[index] = 0;
-                    Dz[index] = 0;
-                }               
-            }}}
-    return 1;
-}
-float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ)
-{
-    int i,j,k,i1,j1,k1;
-#pragma omp parallel for shared(U) private(i,j,k,i1,j1,k1)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                
-                Bx[(dimX*dimY)*k + (i)*dimY + (j)] = Bx[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k + (i1)*dimY + (j)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dx[(dimX*dimY)*k + (i)*dimY + (j)]);
-                By[(dimX*dimY)*k + (i)*dimY + (j)] = By[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k + (i)*dimY + (j1)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dy[(dimX*dimY)*k + (i)*dimY + (j)]);
-                Bz[(dimX*dimY)*k + (i)*dimY + (j)] = Bz[(dimX*dimY)*k + (i)*dimY + (j)] + ((U[(dimX*dimY)*k1 + (i)*dimY + (j)] - U[(dimX*dimY)*k + (i)*dimY + (j)]) - Dz[(dimX*dimY)*k + (i)*dimY + (j)]);
-                
-            }}}
-    return 1;
-}
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.h b/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.h
deleted file mode 100644
index 6ed3ff9..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_CPU/SplitBregman_TV_core.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-//#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-
-#include "utils.h"
-
-/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D)
-*
-* Input Parameters:
-* 1. Noisy image/volume
-* 2. lambda - regularization parameter
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon - tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-*
-* Output:
-* Filtered/regularized image
-*
-* Example:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
-* u = SplitBregman_TV(single(u0), 10, 30, 1e-04);
-*
-* to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-* References:
-* The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher.
-* D. Kazantsev, 2016*
-*/
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
-float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu);
-float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
-float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
-float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY);
-
-float gauss_seidel3D(float *U, float *A, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda, float mu);
-float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
-float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
-float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ);
-
-#ifdef __cplusplus
-}
-#endif
-\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.c
deleted file mode 100644
index 4139d10..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazanteev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "TGV_PD_core.h"
-
-/* C-OMP implementation of Primal-Dual denoising method for 
- * Total Generilized Variation (TGV)-L2 model (2D case only)
- *
- * Input Parameters:
- * 1. Noisy image/volume (2D)
- * 2. lambda - regularization parameter
- * 3. parameter to control first-order term (alpha1)
- * 4. parameter to control the second-order term (alpha0)
- * 5. Number of CP iterations
- *
- * Output:
- * Filtered/regularized image 
- *
- * Example:
- * figure;
- * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
- * u0 = Im + .03*randn(size(Im)); % adding noise
- * tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc;
- *
- * References:
- * K. Bredies "Total Generalized Variation"
- *
- * 28.11.16/Harwell
- */
- 
-
-
-
-/*Calculating dual variable P (using forward differences)*/
-float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma)
-{
-    int i,j;
-#pragma omp parallel for shared(U,V1,V2,P1,P2) private(i,j)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            if (i == dimX-1) P1[i*dimY + (j)] = P1[i*dimY + (j)] + sigma*((U[(i-1)*dimY + (j)] - U[i*dimY + (j)])  - V1[i*dimY + (j)]);
-            else P1[i*dimY + (j)] = P1[i*dimY + (j)] + sigma*((U[(i + 1)*dimY + (j)] - U[i*dimY + (j)])  - V1[i*dimY + (j)]);
-            if (j == dimY-1) P2[i*dimY + (j)] = P2[i*dimY + (j)] + sigma*((U[(i)*dimY + (j-1)] - U[i*dimY + (j)])  - V2[i*dimY + (j)]);
-            else  P2[i*dimY + (j)] = P2[i*dimY + (j)] + sigma*((U[(i)*dimY + (j+1)] - U[i*dimY + (j)])  - V2[i*dimY + (j)]);
-        }}
-    return 1;
-}
-/*Projection onto convex set for P*/
-float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1)
-{
-    float grad_magn;
-    int i,j;
-#pragma omp parallel for shared(P1,P2) private(i,j,grad_magn)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            grad_magn = sqrt(pow(P1[i*dimY + (j)],2) + pow(P2[i*dimY + (j)],2));
-            grad_magn = grad_magn/alpha1;
-            if (grad_magn > 1.0) {
-                P1[i*dimY + (j)] = P1[i*dimY + (j)]/grad_magn;
-                P2[i*dimY + (j)] = P2[i*dimY + (j)]/grad_magn;
-            }
-        }}
-    return 1;
-}
-/*Calculating dual variable Q (using forward differences)*/
-float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float sigma)
-{
-    int i,j;
-    float q1, q2, q11, q22;
-#pragma omp parallel for shared(Q1,Q2,Q3,V1,V2) private(i,j,q1,q2,q11,q22)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            if (i == dimX-1)
-            { q1 = (V1[(i-1)*dimY + (j)] - V1[i*dimY + (j)]);
-              q11 = (V2[(i-1)*dimY + (j)] - V2[i*dimY + (j)]);
-            }
-            else {
-                q1 = (V1[(i+1)*dimY + (j)] - V1[i*dimY + (j)]);
-                q11 = (V2[(i+1)*dimY + (j)] - V2[i*dimY + (j)]);
-            }
-            if (j == dimY-1) {
-                q2 = (V2[(i)*dimY + (j-1)] - V2[i*dimY + (j)]);
-                q22 = (V1[(i)*dimY + (j-1)] - V1[i*dimY + (j)]);
-            }
-            else {
-                q2 = (V2[(i)*dimY + (j+1)] - V2[i*dimY + (j)]);
-                q22 = (V1[(i)*dimY + (j+1)] - V1[i*dimY + (j)]);
-            }
-            Q1[i*dimY + (j)] = Q1[i*dimY + (j)] + sigma*(q1);
-            Q2[i*dimY + (j)] = Q2[i*dimY + (j)] + sigma*(q2);
-            Q3[i*dimY + (j)] = Q3[i*dimY + (j)]  + sigma*(0.5f*(q11 + q22));
-        }}
-    return 1;
-}
-
-float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float alpha0)
-{
-    float grad_magn;
-    int i,j;
-#pragma omp parallel for shared(Q1,Q2,Q3) private(i,j,grad_magn)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            grad_magn = sqrt(pow(Q1[i*dimY + (j)],2) + pow(Q2[i*dimY + (j)],2) + 2*pow(Q3[i*dimY + (j)],2));
-            grad_magn = grad_magn/alpha0;
-            if (grad_magn > 1.0) {
-                Q1[i*dimY + (j)] = Q1[i*dimY + (j)]/grad_magn;
-                Q2[i*dimY + (j)] = Q2[i*dimY + (j)]/grad_magn;
-                Q3[i*dimY + (j)] = Q3[i*dimY + (j)]/grad_magn;
-            }
-        }}
-    return 1;
-}
-/* Divergence and projection for P*/
-float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau)
-{
-    int i,j;
-    float P_v1, P_v2, div;
-#pragma omp parallel for shared(U,A,P1,P2) private(i,j,P_v1,P_v2,div)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            if (i == 0) P_v1 = (P1[i*dimY + (j)]);
-            else P_v1 = (P1[i*dimY + (j)] - P1[(i-1)*dimY + (j)]);
-            if (j == 0) P_v2 = (P2[i*dimY + (j)]);
-            else  P_v2 = (P2[i*dimY + (j)] - P2[(i)*dimY + (j-1)]);
-            div = P_v1 + P_v2;
-            U[i*dimY + (j)] = (lambda*(U[i*dimY + (j)] + tau*div) + tau*A[i*dimY + (j)])/(lambda + tau);
-        }}
-    return *U;
-}
-/*get updated solution U*/
-float newU(float *U, float *U_old, int dimX, int dimY, int dimZ)
-{
-    int i;
-#pragma omp parallel for shared(U,U_old) private(i)
-    for(i=0; i<dimX*dimY*dimZ; i++) U[i] = 2*U[i] - U_old[i];
-    return *U;
-}
-
-/*get update for V*/
-float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau)
-{
-    int i,j;
-    float q1, q11, q2, q22, div1, div2;
-#pragma omp parallel for shared(V1,V2,P1,P2,Q1,Q2,Q3) private(i,j, q1, q11, q2, q22, div1, div2)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            if (i == 0) {
-                q1 = (Q1[i*dimY + (j)]);
-                q11 = (Q3[i*dimY + (j)]);
-            }
-            else {
-                q1 = (Q1[i*dimY + (j)] - Q1[(i-1)*dimY + (j)]);
-                q11 = (Q3[i*dimY + (j)] - Q3[(i-1)*dimY + (j)]);
-            }
-            if (j == 0) {
-                q2 = (Q2[i*dimY + (j)]);
-                q22 = (Q3[i*dimY + (j)]);
-            }
-            else  {
-                q2 = (Q2[i*dimY + (j)] - Q2[(i)*dimY + (j-1)]);
-                q22 = (Q3[i*dimY + (j)] - Q3[(i)*dimY + (j-1)]);
-            }
-            div1 = q1 + q22;
-            div2 = q2 + q11;
-            V1[i*dimY + (j)] = V1[i*dimY + (j)] + tau*(P1[i*dimY + (j)] + div1);
-            V2[i*dimY + (j)] = V2[i*dimY + (j)] + tau*(P2[i*dimY + (j)] + div2);
-        }}
-    return 1;
-}
-/*********************3D *********************/
-
-/*Calculating dual variable P (using forward differences)*/
-float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma)
-{
-    int i,j,k;
-#pragma omp parallel for shared(U,V1,V2,V3,P1,P2,P3) private(i,j,k)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                /* symmetric boundary conditions (Neuman) */
-                if (i == dimX-1) P1[dimX*dimY*k + i*dimY + (j)] = P1[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i-1)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V1[dimX*dimY*k + i*dimY + (j)]);
-                else P1[dimX*dimY*k + i*dimY + (j)] = P1[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i + 1)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V1[dimX*dimY*k + i*dimY + (j)]);
-                if (j == dimY-1) P2[dimX*dimY*k + i*dimY + (j)] = P2[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i)*dimY + (j-1)] - U[dimX*dimY*k + i*dimY + (j)])  - V2[dimX*dimY*k + i*dimY + (j)]);
-                else  P2[dimX*dimY*k + i*dimY + (j)] = P2[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*k + (i)*dimY + (j+1)] - U[dimX*dimY*k + i*dimY + (j)])  - V2[dimX*dimY*k + i*dimY + (j)]);
-                if (k == dimZ-1) P3[dimX*dimY*k + i*dimY + (j)] = P3[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*(k-1) + (i)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V3[dimX*dimY*k + i*dimY + (j)]);
-                else  P3[dimX*dimY*k + i*dimY + (j)] = P3[dimX*dimY*k + i*dimY + (j)] + sigma*((U[dimX*dimY*(k+1) + (i)*dimY + (j)] - U[dimX*dimY*k + i*dimY + (j)])  - V3[dimX*dimY*k + i*dimY + (j)]);
-            }}}
-    return 1;
-}
-\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.h b/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.h
deleted file mode 100644
index d5378df..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_CPU/TGV_PD_core.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-//#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-
-/* C-OMP implementation of Primal-Dual denoising method for
-* Total Generilized Variation (TGV)-L2 model (2D case only)
-*
-* Input Parameters:
-* 1. Noisy image/volume (2D)
-* 2. lambda - regularization parameter
-* 3. parameter to control first-order term (alpha1)
-* 4. parameter to control the second-order term (alpha0)
-* 5. Number of CP iterations
-*
-* Output:
-* Filtered/regularized image
-*
-* Example:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .03*randn(size(Im)); % adding noise
-* tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc;
-*
-* to compile with OMP support: mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-* References:
-* K. Bredies "Total Generalized Variation"
-*
-* 28.11.16/Harwell
-*/
-#ifdef __cplusplus
-extern "C" {
-#endif
-/* 2D functions */
-float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma);
-float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1);
-float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float sigma);
-float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float alpha0);
-float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau);
-float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau);
-float newU(float *U, float *U_old, int dimX, int dimY, int dimZ);
-//float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
-#ifdef __cplusplus
-}
-#endif
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.c b/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.c
deleted file mode 100644
index 0e83d2c..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazanteev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "utils.h"
-
-/* Copy Image */
-float copyIm(float *A, float *U, int dimX, int dimY, int dimZ)
-{
-	int j;
-#pragma omp parallel for shared(A, U) private(j)
-	for (j = 0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
-	return *U;
-}
-\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.h b/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.h
deleted file mode 100644
index 53463a3..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_CPU/utils.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-//#include <matrix.h>
-//#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-//#include <stdio.h>
-#include "omp.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
-#ifdef __cplusplus
-}
-#endif
diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu b/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu
deleted file mode 100644
index 178af00..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu
+++ /dev/null
@@ -1,270 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <memory.h>
-#include "Diff4th_GPU_kernel.h"
-
-#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
-    if (cudaSuccess != err)
-    {
-        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
-                file, line, (int)err, cudaGetErrorString(err));
-        exit(EXIT_FAILURE);
-    }
-}
-
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-#define sizeT (sizeX*sizeY*sizeZ)
-#define epsilon 0.00000001
-
-/////////////////////////////////////////////////
-// 2D Image denosing - Second Step (The second derrivative)
-__global__ void Diff4th2D_derriv(float* B, float* A, float *A0, int N, int M, float sigma, int iter, float tau, float lambda)
-{
-    float gradXXc = 0, gradYYc = 0;
-    int i = blockIdx.x*blockDim.x + threadIdx.x;
-    int j = blockIdx.y*blockDim.y + threadIdx.y;
-    
-    int index = j + i*N;
-    
-    if (((i < 1) || (i > N-2)) || ((j < 1) || (j > M-2))) {
-        return;    }
-    
-    int indexN = (j)+(i-1)*(N); if (A[indexN] == 0) indexN = index;
-    int indexS = (j)+(i+1)*(N); if (A[indexS] == 0) indexS = index;
-    int indexW = (j-1)+(i)*(N); if (A[indexW] == 0) indexW = index;
-    int indexE = (j+1)+(i)*(N); if (A[indexE] == 0) indexE = index;
-    
-    gradXXc = B[indexN] + B[indexS] - 2*B[index] ;
-    gradYYc = B[indexW] + B[indexE] - 2*B[index] ;
-    A[index]  = A[index] - tau*((A[index] - A0[index]) + lambda*(gradXXc + gradYYc));
-}
-
-// 2D Image denosing - The First Step
-__global__ void Diff4th2D(float* A, float* B, int N, int M, float sigma, int iter, float tau)
-{
-    float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, sq_sum, xy_2,  V_norm, V_orth, c, c_sq;
-    
-    int i = blockIdx.x*blockDim.x + threadIdx.x;
-    int j = blockIdx.y*blockDim.y + threadIdx.y;
-    
-    int index = j + i*N;
-    
-    V_norm = 0.0f; V_orth = 0.0f;
-    
-    if (((i < 1) || (i > N-2)) || ((j < 1) || (j > M-2))) {
-        return;    }
-    
-    int indexN = (j)+(i-1)*(N); if (A[indexN] == 0) indexN = index;
-    int indexS = (j)+(i+1)*(N); if (A[indexS] == 0) indexS = index;
-    int indexW = (j-1)+(i)*(N); if (A[indexW] == 0) indexW = index;
-    int indexE = (j+1)+(i)*(N); if (A[indexE] == 0) indexE = index;
-    int indexNW = (j-1)+(i-1)*(N); if (A[indexNW] == 0) indexNW = index;
-    int indexNE = (j+1)+(i-1)*(N); if (A[indexNE] == 0) indexNE = index;
-    int indexWS = (j-1)+(i+1)*(N); if (A[indexWS] == 0) indexWS = index;
-    int indexES = (j+1)+(i+1)*(N); if (A[indexES] == 0) indexES = index;
-    
-    gradX = 0.5f*(A[indexN]-A[indexS]);
-    gradX_sq = gradX*gradX;
-    gradXX = A[indexN] + A[indexS] - 2*A[index];
-    
-    gradY = 0.5f*(A[indexW]-A[indexE]);
-    gradY_sq = gradY*gradY;
-    gradYY = A[indexW] + A[indexE] - 2*A[index];
-    
-    gradXY = 0.25f*(A[indexNW] - A[indexNE] - A[indexWS] + A[indexES]);
-    xy_2 = 2.0f*gradX*gradY*gradXY;
-    sq_sum =  gradX_sq + gradY_sq;
-    
-    if (sq_sum <= epsilon) {
-        V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/epsilon;
-        V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/epsilon; }
-    else  {
-        V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/sq_sum;
-        V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/sq_sum;  }
-    
-    c = 1.0f/(1.0f + sq_sum/sigma);
-    c_sq = c*c;
-    B[index] =  c_sq*V_norm + c*V_orth;
-}
-
-/////////////////////////////////////////////////
-// 3D data parocerssing
-__global__ void Diff4th3D_derriv(float *B, float *A, float *A0, int N, int M, int Z, float sigma, int iter, float tau, float lambda)
-{
-    float gradXXc = 0, gradYYc = 0, gradZZc = 0;
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    int zIndex = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = xIndex + M*yIndex + N*M*zIndex;
-    
-    if (((xIndex < 1) || (xIndex > N-2)) || ((yIndex < 1) || (yIndex > M-2)) || ((zIndex < 1) || (zIndex > Z-2))) {
-        return;    }
-    
-    int indexN = (xIndex-1) + M*yIndex + N*M*zIndex; if (A[indexN] == 0) indexN = index;
-    int indexS = (xIndex+1) + M*yIndex + N*M*zIndex; if (A[indexS] == 0) indexS = index;
-    int indexW = xIndex + M*(yIndex-1) + N*M*zIndex; if (A[indexW] == 0) indexW = index;
-    int indexE = xIndex + M*(yIndex+1) + N*M*zIndex; if (A[indexE] == 0) indexE = index;
-    int indexU = xIndex + M*yIndex + N*M*(zIndex-1); if (A[indexU] == 0) indexU = index;
-    int indexD = xIndex + M*yIndex + N*M*(zIndex+1); if (A[indexD] == 0) indexD = index;
-    
-    gradXXc = B[indexN] + B[indexS] - 2*B[index] ;
-    gradYYc = B[indexW] + B[indexE] - 2*B[index] ;
-    gradZZc = B[indexU] + B[indexD] - 2*B[index] ;   
-        
-    A[index]  = A[index] - tau*((A[index] - A0[index]) + lambda*(gradXXc + gradYYc + gradZZc));    
-}
-
-__global__ void Diff4th3D(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau)
-{
-    float gradX, gradX_sq, gradY, gradY_sq, gradZ, gradZ_sq, gradXX, gradYY, gradZZ, gradXY, gradXZ, gradYZ, sq_sum, xy_2, xyz_1, xyz_2, V_norm, V_orth, c, c_sq;
-    
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    int zIndex = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = xIndex + M*yIndex + N*M*zIndex;
-    V_norm = 0.0f; V_orth = 0.0f;
-    
-    if (((xIndex < 1) || (xIndex > N-2)) || ((yIndex < 1) || (yIndex > M-2)) || ((zIndex < 1) || (zIndex > Z-2))) {
-        return;    }
-    
-    B[index] = 0;
-    
-    int indexN = (xIndex-1) + M*yIndex + N*M*zIndex; if (A[indexN] == 0) indexN = index;
-    int indexS = (xIndex+1) + M*yIndex + N*M*zIndex; if (A[indexS] == 0) indexS = index;
-    int indexW = xIndex + M*(yIndex-1) + N*M*zIndex; if (A[indexW] == 0) indexW = index;
-    int indexE = xIndex + M*(yIndex+1) + N*M*zIndex; if (A[indexE] == 0) indexE = index;
-    int indexU = xIndex + M*yIndex + N*M*(zIndex-1); if (A[indexU] == 0) indexU = index;
-    int indexD = xIndex + M*yIndex + N*M*(zIndex+1); if (A[indexD] == 0) indexD = index;
-    
-    int indexNW = (xIndex-1) + M*(yIndex-1) + N*M*zIndex;  if (A[indexNW] == 0) indexNW = index;
-    int indexNE = (xIndex-1) + M*(yIndex+1) + N*M*zIndex;  if (A[indexNE] == 0) indexNE = index;
-    int indexWS =  (xIndex+1) + M*(yIndex-1) + N*M*zIndex; if (A[indexWS] == 0) indexWS = index;
-    int indexES = (xIndex+1) + M*(yIndex+1) + N*M*zIndex;  if (A[indexES] == 0) indexES = index;
-    
-    int indexUW = (xIndex-1) + M*(yIndex) + N*M*(zIndex-1); if (A[indexUW] == 0) indexUW = index;
-    int indexUE = (xIndex+1) + M*(yIndex) + N*M*(zIndex-1); if (A[indexUE] == 0) indexUE = index;
-    int indexDW =  (xIndex-1) + M*(yIndex) + N*M*(zIndex+1); if (A[indexDW] == 0) indexDW = index;
-    int indexDE = (xIndex+1) + M*(yIndex) + N*M*(zIndex+1); if (A[indexDE] == 0) indexDE = index;
-    
-    int indexUN = (xIndex) + M*(yIndex-1) + N*M*(zIndex-1);  if (A[indexUN] == 0) indexUN = index;
-    int indexUS = (xIndex) + M*(yIndex+1) + N*M*(zIndex-1);  if (A[indexUS] == 0) indexUS = index;
-    int indexDN =  (xIndex) + M*(yIndex-1) + N*M*(zIndex+1); if (A[indexDN] == 0) indexDN = index;
-    int indexDS = (xIndex) + M*(yIndex+1) + N*M*(zIndex+1);  if (A[indexDS] == 0) indexDS = index;
-    
-    gradX = 0.5f*(A[indexN]-A[indexS]);
-    gradX_sq = gradX*gradX;
-    gradXX = A[indexN] + A[indexS] - 2*A[index];
-    
-    gradY = 0.5f*(A[indexW]-A[indexE]);
-    gradY_sq = gradY*gradY;
-    gradYY = A[indexW] + A[indexE] - 2*A[index];
-    
-    gradZ = 0.5f*(A[indexU]-A[indexD]);
-    gradZ_sq = gradZ*gradZ;
-    gradZZ = A[indexU] + A[indexD] - 2*A[index];
-    
-    gradXY = 0.25f*(A[indexNW] - A[indexNE] - A[indexWS] + A[indexES]);
-    gradXZ = 0.25f*(A[indexUW] - A[indexUE] - A[indexDW] + A[indexDE]);
-    gradYZ = 0.25f*(A[indexUN] - A[indexUS] - A[indexDN] + A[indexDS]);
-    
-    xy_2  = 2.0f*gradX*gradY*gradXY;
-    xyz_1 = 2.0f*gradX*gradZ*gradXZ;
-    xyz_2 = 2.0f*gradY*gradZ*gradYZ;
-    
-    sq_sum =  gradX_sq + gradY_sq + gradZ_sq;
-    
-    if (sq_sum <= epsilon) {
-        V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/epsilon;
-        V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/epsilon;  }
-    else  {
-        V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/sq_sum;
-        V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/sq_sum;  }
-    
-    c = 1;
-    if ((1.0f + sq_sum/sigma) != 0.0f)  {c = 1.0f/(1.0f + sq_sum/sigma);}
-    
-    c_sq = c*c;
-    B[index] =  c_sq*V_norm + c*V_orth;
-}
-
-/******************************************************/
-/********* HOST FUNCTION*************/
-extern "C" void Diff4th_GPU_kernel(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau, float lambda)
-{
-     int deviceCount = -1; // number of devices
-    cudaGetDeviceCount(&deviceCount);
-    if (deviceCount == 0) {
-        fprintf(stderr, "No CUDA devices found\n");
-        return;
-    }    
-    
-      int BLKXSIZE, BLKYSIZE,BLKZSIZE;
-      float *Ad, *Bd, *Cd;
-      sigma = sigma*sigma;
-    
-    if (Z == 0){
-        // 4th order diffusion for 2D case     
-        BLKXSIZE = 8;
-        BLKYSIZE = 16;
-        
-        dim3 dimBlock(BLKXSIZE,BLKYSIZE);
-        dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
-        
-        checkCudaErrors(cudaMalloc((void**)&Ad,N*M*sizeof(float)));
-        checkCudaErrors(cudaMalloc((void**)&Bd,N*M*sizeof(float)));
-        checkCudaErrors(cudaMalloc((void**)&Cd,N*M*sizeof(float)));
-        
-        checkCudaErrors(cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice));
-        checkCudaErrors(cudaMemcpy(Bd,A,N*M*sizeof(float),cudaMemcpyHostToDevice));
-        checkCudaErrors(cudaMemcpy(Cd,A,N*M*sizeof(float),cudaMemcpyHostToDevice));
-        
-        int n = 1;
-        while (n <= iter) {
-            Diff4th2D<<<dimGrid,dimBlock>>>(Bd, Cd, N, M, sigma, iter, tau);
-            cudaDeviceSynchronize();
-            checkCudaErrors( cudaPeekAtLastError() );
-            Diff4th2D_derriv<<<dimGrid,dimBlock>>>(Cd, Bd, Ad, N, M, sigma, iter, tau, lambda);
-            cudaDeviceSynchronize();
-            checkCudaErrors( cudaPeekAtLastError() );
-            n++;
-        }
-        checkCudaErrors(cudaMemcpy(B,Bd,N*M*sizeof(float),cudaMemcpyDeviceToHost));
-        cudaFree(Ad); cudaFree(Bd); cudaFree(Cd);
-    }
-    
-    if (Z != 0){
-        // 4th order diffusion for 3D case
-        BLKXSIZE = 8;
-        BLKYSIZE = 8;
-        BLKZSIZE = 8;        
-        
-        dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
-        dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE));
-        
-        checkCudaErrors(cudaMalloc((void**)&Ad,N*M*Z*sizeof(float)));
-        checkCudaErrors(cudaMalloc((void**)&Bd,N*M*Z*sizeof(float)));
-        checkCudaErrors(cudaMalloc((void**)&Cd,N*M*Z*sizeof(float)));
-        
-        checkCudaErrors(cudaMemcpy(Ad,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
-        checkCudaErrors(cudaMemcpy(Bd,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));        
-        checkCudaErrors(cudaMemcpy(Cd,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
-        
-        int n = 1;
-        while (n <= iter) {
-            Diff4th3D<<<dimGrid,dimBlock>>>(Bd, Cd, N, M, Z, sigma, iter, tau);
-            cudaDeviceSynchronize();
-            checkCudaErrors( cudaPeekAtLastError() );
-            Diff4th3D_derriv<<<dimGrid,dimBlock>>>(Cd, Bd, Ad, N, M, Z, sigma, iter, tau, lambda);
-            cudaDeviceSynchronize();
-            checkCudaErrors( cudaPeekAtLastError() );
-            n++;
-        }
-        checkCudaErrors(cudaMemcpy(B,Bd,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
-        cudaFree(Ad); cudaFree(Bd); cudaFree(Cd);
-    }
-}
-\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h b/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h
deleted file mode 100644
index cfbb45a..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __DIFF_HO_H_
-#define __DIFF_HO_H_
-
-extern "C" void Diff4th_GPU_kernel(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau, float lambda);
-
-#endif 
diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu b/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu
deleted file mode 100644
index 17da3a8..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu
+++ /dev/null
@@ -1,239 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <memory.h>
-#include "NLM_GPU_kernel.h"
-
-#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
-    if (cudaSuccess != err)
-    {
-        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
-                file, line, (int)err, cudaGetErrorString(err));
-        exit(EXIT_FAILURE);
-    }
-}
-
-extern __shared__ float sharedmem[];
-
-// run PB den kernel here
-__global__ void NLM_kernel(float *Ad, float* Bd, float *Eucl_Vec_d, int N, int M, int Z, int SearchW, int SimilW, int SearchW_real, int SearchW_full, int SimilW_full, int padXY, float h2, float lambda,  dim3 imagedim, dim3 griddim, dim3 kerneldim, dim3 sharedmemdim, int nUpdatePerThread, float neighborsize)
-{
-    
-    int  i1, j1, k1, i2, j2, k2, i3, j3, k3, i_l, j_l, k_l, count;
-    float value, Weight_norm, normsum, Weight;
-    
-    int bidx = blockIdx.x;
-    int bidy = blockIdx.y%griddim.y;
-    int bidz = (int)((blockIdx.y)/griddim.y);
-    
-    // global index for block endpoint
-    int beidx = __mul24(bidx,blockDim.x);
-    int beidy = __mul24(bidy,blockDim.y);
-    int beidz = __mul24(bidz,blockDim.z);
-    
-    int tid = __mul24(threadIdx.z,__mul24(blockDim.x,blockDim.y)) +
-            __mul24(threadIdx.y,blockDim.x) + threadIdx.x;
-    
-    #ifdef __DEVICE_EMULATION__
-            printf("tid : %d", tid);
-    #endif
-            
-    // update shared memory
-    int nthreads = blockDim.x*blockDim.y*blockDim.z;
-    int sharedMemSize = sharedmemdim.x * sharedmemdim.y * sharedmemdim.z;
-    for(int i=0; i<nUpdatePerThread; i++)
-    {
-        int sid = tid + i*nthreads; // index in shared memory
-        if (sid < sharedMemSize)
-        {
-            // global x/y/z index in volume
-            int gidx, gidy, gidz;
-            int sidx, sidy, sidz, tid;
-            
-            sidz = sid / (sharedmemdim.x*sharedmemdim.y);
-            tid  = sid - sidz*(sharedmemdim.x*sharedmemdim.y);
-            sidy = tid / (sharedmemdim.x);
-            sidx = tid - sidy*(sharedmemdim.x);
-            
-            gidx = (int)sidx - (int)kerneldim.x + (int)beidx;
-            gidy = (int)sidy - (int)kerneldim.y + (int)beidy;
-            gidz = (int)sidz - (int)kerneldim.z + (int)beidz;
-            
-            // Neumann boundary condition
-            int cx = (int) min(max(0,gidx),imagedim.x-1);
-            int cy = (int) min(max(0,gidy),imagedim.y-1);
-            int cz = (int) min(max(0,gidz),imagedim.z-1);
-            
-            int gid = cz*imagedim.x*imagedim.y + cy*imagedim.x + cx;
-            
-            sharedmem[sid] = Ad[gid];
-        }
-    }
-    __syncthreads();
-    
-    // global index of the current voxel in the input volume
-    int idx = beidx + threadIdx.x;
-    int idy = beidy + threadIdx.y;
-    int idz = beidz + threadIdx.z;
-    
-    if (Z == 1) {
-        /* 2D case */
-        /*checking boundaries to be within the image and avoid padded spaces */
-        if( idx >= padXY && idx < (imagedim.x - padXY) &&
-                idy >= padXY && idy < (imagedim.y - padXY))
-        {
-            int i_centr = threadIdx.x + (SearchW); /*indices of the centrilized (main) pixel */
-            int j_centr = threadIdx.y + (SearchW); /*indices of the centrilized (main) pixel */
-            
-            if ((i_centr > 0) && (i_centr < N) && (j_centr > 0) && (j_centr < M)) {
-                
-                Weight_norm = 0; value = 0.0;
-                /* Massive Search window loop */
-                for(i1 = i_centr - SearchW_real ; i1 <= i_centr + SearchW_real; i1++) {
-                    for(j1 = j_centr - SearchW_real ; j1<= j_centr + SearchW_real ; j1++) {
-                        /* if inside the searching window */
-                        count = 0; normsum = 0.0;
-                        for(i_l=-SimilW; i_l<=SimilW; i_l++) {
-                            for(j_l=-SimilW; j_l<=SimilW; j_l++) {
-                                i2 = i1+i_l; j2 = j1+j_l;
-                                i3 = i_centr+i_l; j3 = j_centr+j_l;  /*coordinates of the inner patch loop */                                
-                                if ((i2 > 0) && (i2 < N) && (j2 > 0) && (j2 < M)) {
-                                       if ((i3 > 0) && (i3 < N) && (j3 > 0) && (j3 < M)) {
-                                            normsum += Eucl_Vec_d[count]*pow((sharedmem[(j3)*sharedmemdim.x+(i3)] - sharedmem[j2*sharedmemdim.x+i2]), 2);
-                                            }}
-                                        count++;
-                                }}
-                                if (normsum != 0) Weight = (expf(-normsum/h2));
-                                else Weight = 0.0;
-                                Weight_norm += Weight;
-                                value += sharedmem[j1*sharedmemdim.x+i1]*Weight;
-                            }}      
-                                
-                if (Weight_norm != 0) Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = value/Weight_norm;
-                else Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = Ad[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx];
-            }
-        }      /*boundary conditions end*/
-    }
-    else {
-        /*3D case*/
-        /*checking boundaries to be within the image and avoid padded spaces */
-        if( idx >= padXY && idx < (imagedim.x - padXY) &&
-                idy >= padXY && idy < (imagedim.y - padXY) &&
-                idz >= padXY && idz < (imagedim.z - padXY) )
-        {
-            int i_centr = threadIdx.x + SearchW; /*indices of the centrilized (main) pixel */
-            int j_centr = threadIdx.y + SearchW; /*indices of the centrilized (main) pixel */
-            int k_centr = threadIdx.z + SearchW; /*indices of the centrilized (main) pixel */
-            
-            if ((i_centr > 0) && (i_centr < N) && (j_centr > 0) && (j_centr < M) && (k_centr > 0) && (k_centr < Z)) {
-                
-                Weight_norm = 0; value = 0.0;
-                /* Massive Search window loop */
-                for(i1 = i_centr - SearchW_real ; i1 <= i_centr + SearchW_real; i1++) {
-                    for(j1 = j_centr - SearchW_real ; j1<= j_centr + SearchW_real ; j1++) {
-                        for(k1 = k_centr - SearchW_real ; k1<= k_centr + SearchW_real ; k1++) {
-                            /* if inside the searching window */
-                            count = 0; normsum = 0.0;
-                            for(i_l=-SimilW; i_l<=SimilW; i_l++) {
-                                for(j_l=-SimilW; j_l<=SimilW; j_l++) {
-                                    for(k_l=-SimilW; k_l<=SimilW; k_l++) {
-                                        i2 = i1+i_l; j2 = j1+j_l; k2 = k1+k_l;
-                                        i3 = i_centr+i_l; j3 = j_centr+j_l; k3 = k_centr+k_l;   /*coordinates of the inner patch loop */                              
-                                                    if ((i2 > 0) && (i2 < N) && (j2 > 0) && (j2 < M) && (k2 > 0) && (k2 < Z)) {
-                                                        if ((i3 > 0) && (i3 < N) && (j3 > 0) && (j3 < M) && (k3 > 0) && (k3 < Z)) {
-                                                            normsum += Eucl_Vec_d[count]*pow((sharedmem[(k3)*sharedmemdim.x*sharedmemdim.y + (j3)*sharedmemdim.x+(i3)] - sharedmem[(k2)*sharedmemdim.x*sharedmemdim.y + j2*sharedmemdim.x+i2]), 2);
-                                                        }}
-                                                    count++;
-                                                }}}
-                                       if (normsum != 0) Weight = (expf(-normsum/h2));
-                                       else Weight = 0.0;
-                                       Weight_norm += Weight;
-                                       value += sharedmem[k1*sharedmemdim.x*sharedmemdim.y + j1*sharedmemdim.x+i1]*Weight;                                                                
-                        }}}      /* BIG search window loop end*/
-                
-               
-                if (Weight_norm != 0) Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = value/Weight_norm;
-                else Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = Ad[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx];
-            }
-        }      /* boundary conditions end */
-    }
-}
-        
-/////////////////////////////////////////////////
-// HOST FUNCTION
-extern "C" void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float h2, float lambda)
-{
-    int deviceCount = -1; // number of devices
-    cudaGetDeviceCount(&deviceCount);
-    if (deviceCount == 0) {
-        fprintf(stderr, "No CUDA devices found\n");
-        return;
-    }
-    
-//     cudaDeviceReset();
-    
-    int padXY, SearchW_full, SimilW_full,  blockWidth, blockHeight, blockDepth, nBlockX, nBlockY, nBlockZ, kernel_depth;
-    float *Ad, *Bd, *Eucl_Vec_d;
-    
-    if (dimension == 2) {
-        blockWidth  = 16;
-        blockHeight = 16;
-        blockDepth  = 1;
-        Z = 1;
-        kernel_depth = 0;
-    }
-    else {
-        blockWidth  = 8;
-        blockHeight = 8;
-        blockDepth  = 8;
-        kernel_depth = SearchW;
-    }
-    
-    // compute how many blocks are needed
-    nBlockX = ceil((float)N / (float)blockWidth);
-    nBlockY = ceil((float)M / (float)blockHeight);
-    nBlockZ = ceil((float)Z / (float)blockDepth);
-    
-    dim3 dimGrid(nBlockX,nBlockY*nBlockZ);
-    dim3 dimBlock(blockWidth, blockHeight, blockDepth);
-    dim3 imagedim(N,M,Z);
-    dim3 griddim(nBlockX,nBlockY,nBlockZ);
-    
-    dim3 kerneldim(SearchW,SearchW,kernel_depth);
-    dim3 sharedmemdim((SearchW*2)+blockWidth,(SearchW*2)+blockHeight,(kernel_depth*2)+blockDepth);
-    int sharedmemsize = sizeof(float)*sharedmemdim.x*sharedmemdim.y*sharedmemdim.z;
-    int updateperthread = ceil((float)(sharedmemdim.x*sharedmemdim.y*sharedmemdim.z)/(float)(blockWidth*blockHeight*blockDepth));
-    float neighborsize = (2*SearchW+1)*(2*SearchW+1)*(2*kernel_depth+1);
-    
-    padXY = SearchW + 2*SimilW; /* padding sizes */
-    
-    SearchW_full = 2*SearchW + 1; /* the full searching window  size */
-    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
-    
-    /*allocate space for images on device*/
-    checkCudaErrors( cudaMalloc((void**)&Ad,N*M*Z*sizeof(float)) );
-    checkCudaErrors( cudaMalloc((void**)&Bd,N*M*Z*sizeof(float)) );
-    /*allocate space for vectors on device*/
-    if (dimension == 2) {
-        checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d,SimilW_full*SimilW_full*sizeof(float)) );
-        checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
-    }
-    else {
-        checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d,SimilW_full*SimilW_full*SimilW_full*sizeof(float)) );
-        checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*SimilW_full*SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
-    }
-    
-    /* copy data from the host to device */
-    checkCudaErrors( cudaMemcpy(Ad,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice) );
-    
-    // Run CUDA kernel here
-    NLM_kernel<<<dimGrid,dimBlock,sharedmemsize>>>(Ad, Bd, Eucl_Vec_d, M, N, Z, SearchW, SimilW, SearchW_real, SearchW_full, SimilW_full, padXY, h2, lambda, imagedim, griddim, kerneldim, sharedmemdim, updateperthread, neighborsize);
-    
-    checkCudaErrors( cudaPeekAtLastError() );
-//     gpuErrchk( cudaDeviceSynchronize() );
-    
-    checkCudaErrors( cudaMemcpy(B,Bd,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost) );
-    cudaFree(Ad); cudaFree(Bd); cudaFree(Eucl_Vec_d);
-}
diff --git a/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h b/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h
deleted file mode 100644
index bc9d4a3..0000000
--- a/Wrappers/Matlab/mex_compile/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __NLMREG_KERNELS_H_
-#define __NLMREG_KERNELS_H_
-
-extern "C" void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float denh2, float lambda);
-
-#endif