From bfe917daf4a5d6ce17f1b5d2497a0f7a345f88a5 Mon Sep 17 00:00:00 2001
From: Daniil Kazantsev <dkazanc@hotmail.com>
Date: Wed, 17 Apr 2019 12:03:27 +0100
Subject: var reg for GPU, ROF TV model

---
 src/Core/regularisers_CPU/ROF_TV_core.c      |  5 +-
 src/Core/regularisers_CPU/ROF_TV_core.h      | 12 ++---
 src/Core/regularisers_GPU/TV_ROF_GPU_core.cu | 81 ++++++++++++++--------------
 src/Core/regularisers_GPU/TV_ROF_GPU_core.h  |  4 +-
 src/Python/src/cpu_regularisers.pyx          |  8 +--
 src/Python/src/gpu_regularisers.pyx          | 62 +++++++++++++++------
 6 files changed, 100 insertions(+), 72 deletions(-)

(limited to 'src')

diff --git a/src/Core/regularisers_CPU/ROF_TV_core.c b/src/Core/regularisers_CPU/ROF_TV_core.c
index 955cabc..6951268 100644
--- a/src/Core/regularisers_CPU/ROF_TV_core.c
+++ b/src/Core/regularisers_CPU/ROF_TV_core.c
@@ -30,17 +30,16 @@ int sign(float x) {
 
 
 /* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
- *
  *
  * Input Parameters:
  * 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
+ * 2. lambda - regularization parameter (a constant or the same size as input (1))
  * 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
  * 4. Number of iterations, for explicit scheme >= 150 is recommended  [REQUIRED]
  * 5. eplsilon: tolerance constant
  *
  * Output:
- * [1] Regularized image/volume
+ * [1] Regularised image/volume
  * [2] Information vector which contains [iteration no., reached tolerance]
  *
  * This function is based on the paper by
diff --git a/src/Core/regularisers_CPU/ROF_TV_core.h b/src/Core/regularisers_CPU/ROF_TV_core.h
index 8b5e2e4..7e29ddb 100644
--- a/src/Core/regularisers_CPU/ROF_TV_core.h
+++ b/src/Core/regularisers_CPU/ROF_TV_core.h
@@ -27,29 +27,25 @@ limitations under the License.
 
 /* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
  *
- * 
  * Input Parameters:
  * 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
+ * 2. lambda - regularization parameter (a constant or the same size as input (1))
  * 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
  * 4. Number of iterations, for explicit scheme >= 150 is recommended  [REQUIRED]
- * 5. eplsilon: tolerance constant 
+ * 5. eplsilon: tolerance constant
  *
  * Output:
- * [1] Regularized image/volume 
+ * [1] Regularised image/volume
  * [2] Information vector which contains [iteration no., reached tolerance]
  *
  * This function is based on the paper by
  * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
- *
- * D. Kazantsev, 2016-18
  */
- 
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 CCPI_EXPORT float TV_ROF_CPU_main(float *Input, float *Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ);
-
 CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float *lambda, int lambda_is_arr, float tau, long dimX, long dimY, long dimZ);
 CCPI_EXPORT float D1_func(float *A, float *D1, long dimX, long dimY, long dimZ);
 CCPI_EXPORT float D2_func(float *A, float *D2, long dimX, long dimY, long dimZ);
diff --git a/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu
index 193cf53..c6c6e88 100755
--- a/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu
+++ b/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu
@@ -27,17 +27,16 @@ limitations under the License.
 *
 * Input Parameters:
 * 1. Noisy image/volume [REQUIRED]
-* 2. lambda - regularization parameter [REQUIRED]
-* 3. tau - marching step for explicit scheme, ~0.1 is recommended [REQUIRED]
-* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
+* 2. lambda - regularization parameter (a constant or the same size as input (1))
+* 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
+* 4. Number of iterations, for explicit scheme >= 150 is recommended  [REQUIRED]
 * 5. eplsilon: tolerance constant
-
- * Output:
- * [1] Filtered/regularized image/volume
- * [2] Information vector which contains [iteration no., reached tolerance]
-
-
- * This function is based on the paper by
+*
+* Output:
+* [1] Regularised image/volume
+* [2] Information vector which contains [iteration no., reached tolerance]
+*
+* This function is based on the paper by
 * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
 */
 
@@ -121,27 +120,25 @@ __host__ __device__ int sign (float x)
 		}
 	}
 
-    __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float lambda, float tau, int N, int M)
+    __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float *lambdaPar_d, int lambda_is_arr, float tau, int N, int M)
     {
 		int i2, j2;
-		float dv1,dv2;
+		float dv1,dv2,lambda_val;
 		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
 
-        int index = i + N*j;
+    int index = i + N*j;
+    lambda_val = *(lambdaPar_d + index* lambda_is_arr);
 
         if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) {
-
 				/* boundary conditions (Neumann reflections) */
                 i2 = i - 1; if (i2 < 0) i2 = i+1;
                 j2 = j - 1; if (j2 < 0) j2 = j+1;
-
 				/* divergence components  */
                 dv1 = D1[index] - D1[j2*N + i];
                 dv2 = D2[index] - D2[j*N + i2];
 
-                Update[index] += tau*(lambda*(dv1 + dv2) - (Update[index] - Input[index]));
-
+                Update[index] += tau*(lambda_val*(dv1 + dv2) - (Update[index] - Input[index]));
 		}
 	}
 /*********************3D case****************************/
@@ -265,19 +262,20 @@ __host__ __device__ int sign (float x)
 		}
 	}
 
-    __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float lambda, float tau, int dimX, int dimY, int dimZ)
+    __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float *lambda, int lambda_is_arr, float tau, int dimX, int dimY, int dimZ)
     {
-		float dv1, dv2, dv3;
+		float dv1, dv2, dv3, lambda_val;
 		int i1,i2,k1,j1,j2,k2;
 		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
 
-        int index = (dimX*dimY)*k + j*dimX+i;
+    int index = (dimX*dimY)*k + j*dimX+i;
+    lambda_val = *(lambda + index* lambda_is_arr);
 
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+    if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
 
-					/* symmetric boundary conditions (Neuman) */
+		/* symmetric boundary conditions (Neuman) */
                     i1 = i + 1; if (i1 >= dimX) i1 = i-1;
                     i2 = i - 1; if (i2 < 0) i2 = i+1;
                     j1 = j + 1; if (j1 >= dimY) j1 = j-1;
@@ -290,7 +288,7 @@ __host__ __device__ int sign (float x)
                     dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2];
                     dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i];
 
-                    Update[index] += tau*(lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index]));
+                    Update[index] += tau*(lambda_val*(dv1 + dv2 + dv3) - (Update[index] - Input[index]));
 
 		}
 	}
@@ -348,7 +346,7 @@ __global__ void ROFResidCalc3D_kernel(float *Input1, float *Input2, float* Outpu
 
 /////////////////////////////////////////////////
 ///////////////// HOST FUNCTION /////////////////
-extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float lambdaPar, int iter, float tau, float epsil, int N, int M, int Z)
+extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iter, float tau, float epsil, int N, int M, int Z)
 {
      int deviceCount = -1; // number of devices
      cudaGetDeviceCount(&deviceCount);
@@ -360,10 +358,10 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
         re = 0.0f;
 	int ImSize, count, n;
 	count = 0; n = 0;
-        float *d_input, *d_update, *d_D1, *d_D2, *d_update_prev=NULL;
+  float *d_input, *d_update, *d_D1, *d_D2, *d_update_prev=NULL, *lambdaPar_d=NULL;
 
 	if (Z == 0) Z = 1;
-	ImSize = N*M*Z;
+	      ImSize = N*M*Z;
         CHECK(cudaMalloc((void**)&d_input,ImSize*sizeof(float)));
         CHECK(cudaMalloc((void**)&d_update,ImSize*sizeof(float)));
         CHECK(cudaMalloc((void**)&d_D1,ImSize*sizeof(float)));
@@ -373,6 +371,16 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
         CHECK(cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
         CHECK(cudaMemcpy(d_update,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
 
+        /*dealing with spatially variant reglariser */
+        if (lambda_is_arr == 1) {
+          CHECK(cudaMalloc((void**)&lambdaPar_d,ImSize*sizeof(float)));
+          CHECK(cudaMemcpy(lambdaPar_d,lambdaPar,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+        }
+        else {
+          CHECK(cudaMalloc((void**)&lambdaPar_d,1*sizeof(float)));
+          CHECK(cudaMemcpy(lambdaPar_d,lambdaPar,1*sizeof(float),cudaMemcpyHostToDevice));
+        }
+
         if (Z == 1) {
              // TV - 2D case
             dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
@@ -391,7 +399,7 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
 		            D2_func2D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M);
                 CHECK(cudaDeviceSynchronize());
                 /*running main kernel*/
-                TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambdaPar, tau, N, M);
+                TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambdaPar_d, lambda_is_arr, tau, N, M);
                 CHECK(cudaDeviceSynchronize());
 
                 if ((epsil != 0.0f) && (n % 5 == 0)) {
@@ -417,7 +425,7 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
             }
         }
 	 else {
-	    // TV - 3D case
+	           // TV - 3D case
             dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
             dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE));
 
@@ -431,7 +439,6 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
               checkCudaErrors( cudaDeviceSynchronize() );
               checkCudaErrors(cudaPeekAtLastError() );
               }
-
                 /* calculate differences */
                 D1_func3D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M, Z);
                 CHECK(cudaDeviceSynchronize());
@@ -440,7 +447,7 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
                 D3_func3D<<<dimGrid,dimBlock>>>(d_update, d_D3, N, M, Z);
                 CHECK(cudaDeviceSynchronize());
                 /*running main kernel*/
-                TV_kernel3D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_D3, d_update, d_input, lambdaPar, tau, N, M, Z);
+                TV_kernel3D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_D3, d_update, d_input, lambdaPar_d, lambda_is_arr, tau, N, M, Z);
                 CHECK(cudaDeviceSynchronize());
 
                 if ((epsil != 0.0f) && (n % 5 == 0)) {
@@ -461,12 +468,8 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
                 re = (reduction/reduction2);
                 if (re < epsil)  count++;
                 if (count > 3) break;
-
-            }
-
-
+              }
             }
-
             CHECK(cudaFree(d_D3));
         }
         CHECK(cudaMemcpy(Output,d_update,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
@@ -475,9 +478,9 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f
         CHECK(cudaFree(d_update));
         CHECK(cudaFree(d_D1));
         CHECK(cudaFree(d_D2));
+        CHECK(cudaFree(lambdaPar_d));
 
 	      infovector[0] = (float)(n);  /*iterations number (if stopped earlier based on tolerance)*/
         infovector[1] = re;  /* reached tolerance */
-
         return 0;
 }
diff --git a/src/Core/regularisers_GPU/TV_ROF_GPU_core.h b/src/Core/regularisers_GPU/TV_ROF_GPU_core.h
index 0a75124..30514ad 100755
--- a/src/Core/regularisers_GPU/TV_ROF_GPU_core.h
+++ b/src/Core/regularisers_GPU/TV_ROF_GPU_core.h
@@ -3,6 +3,6 @@
 #include "CCPiDefines.h"
 #include <stdio.h>
 
-extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float lambdaPar, int iter, float tau, float epsil, int N, int M, int Z);
+extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iter, float tau, float epsil, int N, int M, int Z);
 
-#endif 
+#endif
diff --git a/src/Python/src/cpu_regularisers.pyx b/src/Python/src/cpu_regularisers.pyx
index a2c4c32..9855eca 100644
--- a/src/Python/src/cpu_regularisers.pyx
+++ b/src/Python/src/cpu_regularisers.pyx
@@ -18,7 +18,7 @@ import cython
 import numpy as np
 cimport numpy as np
 
-cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float *infovector, float *lambdaPar, int arrayscal, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ);
+cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ);
 cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float *infovector, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int dimX, int dimY, int dimZ);
 cdef extern float SB_TV_CPU_main(float *Input, float *Output, float *infovector, float mu, int iter, float epsil, int methodTV, int dimX, int dimY, int dimZ);
 cdef extern float LLT_ROF_CPU_main(float *Input, float *Output, float *infovector, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ);
@@ -721,20 +721,20 @@ def MASK_CORR_CPU_2D(np.ndarray[np.uint8_t, ndim=2, mode="c"] maskData,
                     np.ndarray[np.uint8_t, ndim=1, mode="c"] select_classes,
                      int total_classesNum,
                      int CorrectionWindow):
-    
+
     cdef long dims[2]
     dims[0] = maskData.shape[0]
     dims[1] = maskData.shape[1]
 
     select_classes_length = select_classes.shape[0]
-    
+
     cdef np.ndarray[np.uint8_t, ndim=2, mode="c"] mask_upd = \
             np.zeros([dims[0],dims[1]], dtype='uint8')
     cdef np.ndarray[np.uint8_t, ndim=2, mode="c"] corr_regions = \
             np.zeros([dims[0],dims[1]], dtype='uint8')
 
     # Run the function to process given MASK
-    Mask_merge_main(&maskData[0,0], &mask_upd[0,0], &corr_regions[0,0], &select_classes[0], select_classes_length, 
+    Mask_merge_main(&maskData[0,0], &mask_upd[0,0], &corr_regions[0,0], &select_classes[0], select_classes_length,
     total_classesNum, CorrectionWindow, dims[1], dims[0], 1)
     return (mask_upd,corr_regions)
 
diff --git a/src/Python/src/gpu_regularisers.pyx b/src/Python/src/gpu_regularisers.pyx
index 84ee981..8cd8c93 100644
--- a/src/Python/src/gpu_regularisers.pyx
+++ b/src/Python/src/gpu_regularisers.pyx
@@ -20,7 +20,7 @@ cimport numpy as np
 
 CUDAErrorMessage = 'CUDA error'
 
-cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float lambdaPar, int iter, float tau, float epsil, int N, int M, int Z);
+cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iter, float tau, float epsil, int N, int M, int Z);
 cdef extern int TV_FGP_GPU_main(float *Input, float *Output, float *infovector, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int N, int M, int Z);
 cdef extern int TV_SB_GPU_main(float *Input, float *Output, float *infovector, float lambdaPar, int iter, float epsil, int methodTV, int N, int M, int Z);
 cdef extern int LLT_ROF_GPU_main(float *Input, float *Output, float *infovector, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau,  float epsil, int N, int M, int Z);
@@ -177,7 +177,7 @@ def Diff4th_GPU(inputData,
 #********************** Total-variation ROF *********************#
 #****************************************************************#
 def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
-                     float regularisation_parameter,
+                     regularisation_parameter,
                      int iterations,
                      float time_marching_parameter,
                      float tolerance_param):
@@ -185,26 +185,39 @@ def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
     cdef long dims[2]
     dims[0] = inputData.shape[0]
     dims[1] = inputData.shape[1]
-
+    cdef float lambdareg
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] reg
     cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
 		    np.zeros([dims[0],dims[1]], dtype='float32')
     cdef np.ndarray[np.float32_t, ndim=1, mode="c"] infovec = \
             np.ones([2], dtype='float32')
 
-    # Running CUDA code here
-    if (TV_ROF_GPU_main(
-            &inputData[0,0], &outputData[0,0], &infovec[0],
-                       regularisation_parameter,
+    if isinstance (regularisation_parameter, np.ndarray):
+        reg = regularisation_parameter.copy()
+        # Running CUDA code here
+        if (TV_ROF_GPU_main(&inputData[0,0], &outputData[0,0], &infovec[0], 
+                       &reg[0,0], 1,
                        iterations,
                        time_marching_parameter,
                        tolerance_param,
                        dims[1], dims[0], 1)==0):
-        return (outputData,infovec)
+                return (outputData,infovec)
+        else:
+            raise ValueError(CUDAErrorMessage);
     else:
-        raise ValueError(CUDAErrorMessage);
+        lambdareg = regularisation_parameter
+        if (TV_ROF_GPU_main(&inputData[0,0], &outputData[0,0], &infovec[0],
+                       &lambdareg,  0,
+                       iterations,
+                       time_marching_parameter,
+                       tolerance_param,
+                       dims[1], dims[0], 1)==0):
+            return (outputData,infovec)
+        else:
+            raise ValueError(CUDAErrorMessage);
 
 def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
-                     float regularisation_parameter,
+                     regularisation_parameter,
                      int iterations,
                      float time_marching_parameter,
                      float tolerance_param):
@@ -213,23 +226,40 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
     dims[0] = inputData.shape[0]
     dims[1] = inputData.shape[1]
     dims[2] = inputData.shape[2]
-
+    cdef float lambdareg
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] reg
     cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
 		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
     cdef np.ndarray[np.float32_t, ndim=1, mode="c"] infovec = \
             np.ones([2], dtype='float32')
 
-    # Running CUDA code here
-    if (TV_ROF_GPU_main(
+    if isinstance (regularisation_parameter, np.ndarray):
+        reg = regularisation_parameter.copy()
+        # Running CUDA code here
+        if (TV_ROF_GPU_main(
             &inputData[0,0,0], &outputData[0,0,0], &infovec[0],
-                       regularisation_parameter,
+                       &reg[0,0,0], 1,
                        iterations,
                        time_marching_parameter,
                        tolerance_param,
                        dims[2], dims[1], dims[0])==0):
-        return (outputData,infovec)
+            return (outputData,infovec)
+        else:
+            raise ValueError(CUDAErrorMessage);
     else:
-        raise ValueError(CUDAErrorMessage);
+        lambdareg = regularisation_parameter
+        # Running CUDA code here
+        if (TV_ROF_GPU_main(
+            &inputData[0,0,0], &outputData[0,0,0], &infovec[0],
+                       &lambdareg,  0,
+                       iterations,
+                       time_marching_parameter,
+                       tolerance_param,
+                       dims[2], dims[1], dims[0])==0):
+            return (outputData,infovec)
+        else:
+            raise ValueError(CUDAErrorMessage);
+
 #****************************************************************#
 #********************** Total-variation FGP *********************#
 #****************************************************************#
-- 
cgit v1.2.3