diff options
| author | vagrant <vagrant@localhost.localdomain> | 2019-02-28 15:00:39 +0000 | 
|---|---|---|
| committer | vagrant <vagrant@localhost.localdomain> | 2019-02-28 15:00:39 +0000 | 
| commit | 364a703de9f31b35d4301f3e913f519be9d3a14f (patch) | |
| tree | a398909ff87b22745829657f3e62b0439a64ad77 | |
| parent | 7bb99cfd904b23c041be273ffc2746296e6eb814 (diff) | |
| parent | 4c728cf72345f7ab7967380cb536529fd9b1403d (diff) | |
Merge remote-tracking branch 'remotes/origin/master' into newdirstructure
| -rw-r--r-- | Wrappers/Python/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py | 231 | ||||
| -rw-r--r-- | Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_ParOptimis_SX.py | 161 | ||||
| -rw-r--r-- | Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py | 309 | ||||
| -rw-r--r-- | Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_SX.py | 117 | ||||
| -rw-r--r-- | Wrappers/Python/demos/SoftwareX_supp/Readme.md | 26 | ||||
| -rw-r--r-- | Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_rofllt.h5 | bin | 0 -> 2408 bytes | |||
| -rw-r--r-- | Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_sbtv.h5 | bin | 0 -> 2408 bytes | |||
| -rw-r--r-- | Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_tgv.h5 | bin | 0 -> 2408 bytes | |||
| -rw-r--r-- | demos/demoMatlab_denoise.m | 104 | ||||
| -rw-r--r-- | src/Core/regularisers_GPU/TGV_GPU_core.cu | 487 | ||||
| -rw-r--r-- | src/Core/regularisers_GPU/TGV_GPU_core.h | 2 | 
11 files changed, 1158 insertions, 279 deletions
diff --git a/Wrappers/Python/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py b/Wrappers/Python/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py new file mode 100644 index 0000000..01491d9 --- /dev/null +++ b/Wrappers/Python/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +This demo scripts support the following publication:  +"CCPi-Regularisation Toolkit for computed tomographic image reconstruction with  +proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner, + Philip J. Withers; Software X, 2019 +____________________________________________________________________________ +* Reads real tomographic data (stored at Zenodo) +--- https://doi.org/10.5281/zenodo.2578893 +* Reconstructs using TomoRec software +* Saves reconstructed images  +____________________________________________________________________________ +>>>>> Dependencies: <<<<< +1. ASTRA toolbox: conda install -c astra-toolbox astra-toolbox +2. TomoRec: conda install -c dkazanc tomorec +or install from https://github.com/dkazanc/TomoRec +3. libtiff if one needs to save tiff images: +    install pip install libtiff + +@author: Daniil Kazantsev, e:mail daniil.kazantsev@diamond.ac.uk +GPLv3 license (ASTRA toolbox) +""" +import numpy as np +import matplotlib.pyplot as plt +import h5py +from tomorec.supp.suppTools import normaliser +import time + +# load dendritic projection data +h5f = h5py.File('data/DendrData_3D.h5','r') +dataRaw = h5f['dataRaw'][:] +flats = h5f['flats'][:] +darks = h5f['darks'][:] +angles_rad = h5f['angles_rad'][:] +h5f.close() +#%% +# normalise the data [detectorsVert, Projections, detectorsHoriz] +data_norm = normaliser(dataRaw, flats, darks, log='log') +del dataRaw, darks, flats + +intens_max = 2.3 +plt.figure()  +plt.subplot(131) +plt.imshow(data_norm[:,150,:],vmin=0, vmax=intens_max) +plt.title('2D Projection (analytical)') +plt.subplot(132) +plt.imshow(data_norm[300,:,:],vmin=0, vmax=intens_max) +plt.title('Sinogram view') +plt.subplot(133) +plt.imshow(data_norm[:,:,600],vmin=0, vmax=intens_max) +plt.title('Tangentogram view') +plt.show() + +detectorHoriz = np.size(data_norm,2) +det_y_crop = [i for i in range(0,detectorHoriz-22)] +N_size = 950 # reconstruction domain +time_label = int(time.time()) +#%% +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("%%%%%%%%%%%%Reconstructing with FBP method %%%%%%%%%%%%%%%%%") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +from tomorec.methodsDIR import RecToolsDIR + +RectoolsDIR = RecToolsDIR(DetectorsDimH = np.size(det_y_crop),  # DetectorsDimH # detector dimension (horizontal) +                    DetectorsDimV = 100,  # DetectorsDimV # detector dimension (vertical) for 3D case only +                    AnglesVec = angles_rad, # array of angles in radians +                    ObjSize = N_size, # a scalar to define reconstructed object dimensions +                    device='gpu') + +FBPrec = RectoolsDIR.FBP(data_norm[0:100,:,det_y_crop]) + +sliceSel = 50 +max_val = 0.003 +plt.figure()  +plt.subplot(131) +plt.imshow(FBPrec[sliceSel,:,:],vmin=0, vmax=max_val, cmap="gray") +plt.title('FBP Reconstruction, axial view') + +plt.subplot(132) +plt.imshow(FBPrec[:,sliceSel,:],vmin=0, vmax=max_val, cmap="gray") +plt.title('FBP Reconstruction, coronal view') + +plt.subplot(133) +plt.imshow(FBPrec[:,:,sliceSel],vmin=0, vmax=max_val, cmap="gray") +plt.title('FBP Reconstruction, sagittal view') +plt.show() + +# saving to tiffs (16bit) +""" +from libtiff import TIFF +FBPrec += np.abs(np.min(FBPrec)) +multiplier = (int)(65535/(np.max(FBPrec))) + +# saving to tiffs (16bit) +for i in range(0,np.size(FBPrec,0)): +    tiff = TIFF.open('Dendr_FBP'+'_'+str(i)+'.tiff', mode='w') +    tiff.write_image(np.uint16(FBPrec[i,:,:]*multiplier)) +    tiff.close() +""" +#%% +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("Reconstructing with ADMM method using TomoRec software") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +# initialise TomoRec ITERATIVE reconstruction class ONCE +from tomorec.methodsIR import RecToolsIR +RectoolsIR = RecToolsIR(DetectorsDimH =  np.size(det_y_crop),  # DetectorsDimH # detector dimension (horizontal) +                    DetectorsDimV = 100,  # DetectorsDimV # detector dimension (vertical) for 3D case only +                    AnglesVec = angles_rad, # array of angles in radians +                    ObjSize = N_size, # a scalar to define reconstructed object dimensions +                    datafidelity='LS',# data fidelity, choose LS, PWLS (wip), GH (wip), Student (wip) +                    nonnegativity='ENABLE', # enable nonnegativity constraint (set to 'ENABLE') +                    OS_number = None, # the number of subsets, NONE/(or > 1) ~ classical / ordered subsets +                    tolerance = 1e-08, # tolerance to stop outer iterations earlier +                    device='gpu') +#%% +print ("Reconstructing with ADMM method using SB-TV penalty") +RecADMM_reg_sbtv = RectoolsIR.ADMM(data_norm[0:100,:,det_y_crop], +                              rho_const = 2000.0, \ +                              iterationsADMM = 15, \ +                              regularisation = 'SB_TV', \ +                              regularisation_parameter = 0.00085,\ +                              regularisation_iterations = 50) + +sliceSel = 50 +max_val = 0.003 +plt.figure()  +plt.subplot(131) +plt.imshow(RecADMM_reg_sbtv[sliceSel,:,:],vmin=0, vmax=max_val, cmap="gray") +plt.title('3D ADMM-SB-TV Reconstruction, axial view') + +plt.subplot(132) +plt.imshow(RecADMM_reg_sbtv[:,sliceSel,:],vmin=0, vmax=max_val, cmap="gray") +plt.title('3D ADMM-SB-TV Reconstruction, coronal view') + +plt.subplot(133) +plt.imshow(RecADMM_reg_sbtv[:,:,sliceSel],vmin=0, vmax=max_val, cmap="gray") +plt.title('3D ADMM-SB-TV Reconstruction, sagittal view') +plt.show() + + +# saving to tiffs (16bit) +""" +from libtiff import TIFF +multiplier = (int)(65535/(np.max(RecADMM_reg_sbtv))) +for i in range(0,np.size(RecADMM_reg_sbtv,0)): +    tiff = TIFF.open('Dendr_ADMM_SBTV'+'_'+str(i)+'.tiff', mode='w') +    tiff.write_image(np.uint16(RecADMM_reg_sbtv[i,:,:]*multiplier)) +    tiff.close() +""" +# Saving recpnstructed data with a unique time label +np.save('Dendr_ADMM_SBTV'+str(time_label)+'.npy', RecADMM_reg_sbtv) +del RecADMM_reg_sbtv +#%% +print ("Reconstructing with ADMM method using ROF-LLT penalty") +RecADMM_reg_rofllt = RectoolsIR.ADMM(data_norm[0:100,:,det_y_crop], +                              rho_const = 2000.0, \ +                              iterationsADMM = 15, \ +                              regularisation = 'LLT_ROF', \ +                              regularisation_parameter = 0.0009,\ +                              regularisation_parameter2 = 0.0007,\ +                              time_marching_parameter = 0.001,\ +                              regularisation_iterations = 550) + +sliceSel = 50 +max_val = 0.003 +plt.figure()  +plt.subplot(131) +plt.imshow(RecADMM_reg_rofllt[sliceSel,:,:],vmin=0, vmax=max_val) +plt.title('3D ADMM-ROFLLT Reconstruction, axial view') + +plt.subplot(132) +plt.imshow(RecADMM_reg_rofllt[:,sliceSel,:],vmin=0, vmax=max_val) +plt.title('3D ADMM-ROFLLT Reconstruction, coronal view') + +plt.subplot(133) +plt.imshow(RecADMM_reg_rofllt[:,:,sliceSel],vmin=0, vmax=max_val) +plt.title('3D ADMM-ROFLLT Reconstruction, sagittal view') +plt.show() + +# saving to tiffs (16bit) +""" +from libtiff import TIFF +multiplier = (int)(65535/(np.max(RecADMM_reg_rofllt))) +for i in range(0,np.size(RecADMM_reg_rofllt,0)): +    tiff = TIFF.open('Dendr_ADMM_ROFLLT'+'_'+str(i)+'.tiff', mode='w') +    tiff.write_image(np.uint16(RecADMM_reg_rofllt[i,:,:]*multiplier)) +    tiff.close() +""" + +# Saving recpnstructed data with a unique time label +np.save('Dendr_ADMM_ROFLLT'+str(time_label)+'.npy', RecADMM_reg_rofllt) +del RecADMM_reg_rofllt +#%% +print ("Reconstructing with ADMM method using TGV penalty") +RecADMM_reg_tgv = RectoolsIR.ADMM(data_norm[0:100,:,det_y_crop], +                              rho_const = 2000.0, \ +                              iterationsADMM = 15, \ +                              regularisation = 'TGV', \ +                              regularisation_parameter = 0.01,\ +                              regularisation_iterations = 500) + +sliceSel = 50 +max_val = 0.003 +plt.figure()  +plt.subplot(131) +plt.imshow(RecADMM_reg_tgv[sliceSel,:,:],vmin=0, vmax=max_val) +plt.title('3D ADMM-TGV Reconstruction, axial view') + +plt.subplot(132) +plt.imshow(RecADMM_reg_tgv[:,sliceSel,:],vmin=0, vmax=max_val) +plt.title('3D ADMM-TGV Reconstruction, coronal view') + +plt.subplot(133) +plt.imshow(RecADMM_reg_tgv[:,:,sliceSel],vmin=0, vmax=max_val) +plt.title('3D ADMM-TGV Reconstruction, sagittal view') +plt.show() + +# saving to tiffs (16bit) +""" +from libtiff import TIFF +multiplier = (int)(65535/(np.max(RecADMM_reg_tgv))) +for i in range(0,np.size(RecADMM_reg_tgv,0)): +    tiff = TIFF.open('Dendr_ADMM_TGV'+'_'+str(i)+'.tiff', mode='w') +    tiff.write_image(np.uint16(RecADMM_reg_tgv[i,:,:]*multiplier)) +    tiff.close() +""" +# Saving recpnstructed data with a unique time label +np.save('Dendr_ADMM_TGV'+str(time_label)+'.npy', RecADMM_reg_tgv) +del RecADMM_reg_tgv +#%%
\ No newline at end of file diff --git a/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_ParOptimis_SX.py b/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_ParOptimis_SX.py new file mode 100644 index 0000000..59ffc0e --- /dev/null +++ b/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_ParOptimis_SX.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +This demo scripts support the following publication:  +"CCPi-Regularisation Toolkit for computed tomographic image reconstruction with  +proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner, + Philip J. Withers; Software X, 2019 +____________________________________________________________________________ +* Reads data which is previosly generated by TomoPhantom software (Zenodo link) +--- https://doi.org/10.5281/zenodo.2578893 +* Optimises for the regularisation parameters which later used in the script: +Demo_SimulData_Recon_SX.py +____________________________________________________________________________ +>>>>> Dependencies: <<<<< +>>>>> Dependencies: <<<<< +1. ASTRA toolbox: conda install -c astra-toolbox astra-toolbox +2. TomoRec: conda install -c dkazanc tomorec +or install from https://github.com/dkazanc/TomoRec + +@author: Daniil Kazantsev, e:mail daniil.kazantsev@diamond.ac.uk +GPLv3 license (ASTRA toolbox) +""" +#import timeit +import matplotlib.pyplot as plt +import numpy as np +import h5py +from ccpi.supp.qualitymetrics import QualityTools + +# loading the data  +h5f = h5py.File('data/TomoSim_data1550671417.h5','r') +phantom = h5f['phantom'][:] +projdata_norm = h5f['projdata_norm'][:] +proj_angles = h5f['proj_angles'][:] +h5f.close() + +[Vert_det, AnglesNum, Horiz_det] = np.shape(projdata_norm) +N_size = Vert_det + +sliceSel = 128 +#plt.gray() +plt.figure()  +plt.subplot(131) +plt.imshow(phantom[sliceSel,:,:],vmin=0, vmax=1) +plt.title('3D Phantom, axial view') + +plt.subplot(132) +plt.imshow(phantom[:,sliceSel,:],vmin=0, vmax=1) +plt.title('3D Phantom, coronal view') + +plt.subplot(133) +plt.imshow(phantom[:,:,sliceSel],vmin=0, vmax=1) +plt.title('3D Phantom, sagittal view') +plt.show() + +intens_max = 240 +plt.figure()  +plt.subplot(131) +plt.imshow(projdata_norm[:,sliceSel,:],vmin=0, vmax=intens_max) +plt.title('2D Projection (erroneous)') +plt.subplot(132) +plt.imshow(projdata_norm[sliceSel,:,:],vmin=0, vmax=intens_max) +plt.title('Sinogram view') +plt.subplot(133) +plt.imshow(projdata_norm[:,:,sliceSel],vmin=0, vmax=intens_max) +plt.title('Tangentogram view') +plt.show() +#%% +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("Reconstructing with ADMM method using TomoRec software") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +# initialise TomoRec ITERATIVE reconstruction class ONCE +from tomorec.methodsIR import RecToolsIR +RectoolsIR = RecToolsIR(DetectorsDimH = Horiz_det,  # DetectorsDimH # detector dimension (horizontal) +                    DetectorsDimV = Vert_det,  # DetectorsDimV # detector dimension (vertical) for 3D case only +                    AnglesVec = proj_angles, # array of angles in radians +                    ObjSize = N_size, # a scalar to define reconstructed object dimensions +                    datafidelity='LS',# data fidelity, choose LS, PWLS (wip), GH (wip), Student (wip) +                    nonnegativity='ENABLE', # enable nonnegativity constraint (set to 'ENABLE') +                    OS_number = None, # the number of subsets, NONE/(or > 1) ~ classical / ordered subsets +                    tolerance = 1e-08, # tolerance to stop outer iterations earlier +                    device='gpu') +#%% +param_space = 30 +reg_param_sb_vec = np.linspace(0.03,0.15,param_space,dtype='float32') # a vector of parameters +erros_vec_sbtv = np.zeros((param_space)) # a vector of errors + +print ("Reconstructing with ADMM method using SB-TV penalty") +for i in range(0,param_space): +    RecADMM_reg_sbtv = RectoolsIR.ADMM(projdata_norm, +                                  rho_const = 2000.0, \ +                                  iterationsADMM = 15, \ +                                  regularisation = 'SB_TV', \ +                                  regularisation_parameter = reg_param_sb_vec[i],\ +                                  regularisation_iterations = 50) +    # calculate errors  +    Qtools = QualityTools(phantom, RecADMM_reg_sbtv) +    erros_vec_sbtv[i] = Qtools.rmse() +    print("RMSE for regularisation parameter {} for ADMM-SB-TV is {}".format(reg_param_sb_vec[i],erros_vec_sbtv[i])) + +plt.figure()  +plt.plot(erros_vec_sbtv) + +# Saving generated data with a unique time label +h5f = h5py.File('Optim_admm_sbtv.h5', 'w') +h5f.create_dataset('reg_param_sb_vec', data=reg_param_sb_vec) +h5f.create_dataset('erros_vec_sbtv', data=erros_vec_sbtv) +h5f.close() +#%% +param_space = 30 +reg_param_rofllt_vec = np.linspace(0.03,0.15,param_space,dtype='float32') # a vector of parameters +erros_vec_rofllt = np.zeros((param_space)) # a vector of errors + +print ("Reconstructing with ADMM method using ROF-LLT penalty") +for i in range(0,param_space): +    RecADMM_reg_rofllt = RectoolsIR.ADMM(projdata_norm, +                                  rho_const = 2000.0, \ +                                  iterationsADMM = 15, \ +                                  regularisation = 'LLT_ROF', \ +                                  regularisation_parameter = reg_param_rofllt_vec[i],\ +                                  regularisation_parameter2 = 0.005,\ +                                  regularisation_iterations = 600) +    # calculate errors  +    Qtools = QualityTools(phantom, RecADMM_reg_rofllt) +    erros_vec_rofllt[i] = Qtools.rmse() +    print("RMSE for regularisation parameter {} for ADMM-ROF-LLT is {}".format(reg_param_rofllt_vec[i],erros_vec_rofllt[i])) + +plt.figure()  +plt.plot(erros_vec_rofllt) + +# Saving generated data with a unique time label +h5f = h5py.File('Optim_admm_rofllt.h5', 'w') +h5f.create_dataset('reg_param_rofllt_vec', data=reg_param_rofllt_vec) +h5f.create_dataset('erros_vec_rofllt', data=erros_vec_rofllt) +h5f.close() +#%% +param_space = 30 +reg_param_tgv_vec = np.linspace(0.03,0.15,param_space,dtype='float32') # a vector of parameters +erros_vec_tgv = np.zeros((param_space)) # a vector of errors + +print ("Reconstructing with ADMM method using TGV penalty") +for i in range(0,param_space): +    RecADMM_reg_tgv = RectoolsIR.ADMM(projdata_norm, +                                  rho_const = 2000.0, \ +                                  iterationsADMM = 15, \ +                                  regularisation = 'TGV', \ +                                  regularisation_parameter = reg_param_tgv_vec[i],\ +                                  regularisation_iterations = 600) +    # calculate errors  +    Qtools = QualityTools(phantom, RecADMM_reg_tgv) +    erros_vec_tgv[i] = Qtools.rmse() +    print("RMSE for regularisation parameter {} for ADMM-TGV is {}".format(reg_param_tgv_vec[i],erros_vec_tgv[i])) + +plt.figure()  +plt.plot(erros_vec_tgv) + +# Saving generated data with a unique time label +h5f = h5py.File('Optim_admm_tgv.h5', 'w') +h5f.create_dataset('reg_param_tgv_vec', data=reg_param_tgv_vec) +h5f.create_dataset('erros_vec_tgv', data=erros_vec_tgv) +h5f.close() +#%%
\ No newline at end of file diff --git a/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py b/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py new file mode 100644 index 0000000..93b0cef --- /dev/null +++ b/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +This demo scripts support the following publication:  +"CCPi-Regularisation Toolkit for computed tomographic image reconstruction with  +proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner, + Philip J. Withers; Software X, 2019 +____________________________________________________________________________ +* Reads data which is previously generated by TomoPhantom software (Zenodo link) +--- https://doi.org/10.5281/zenodo.2578893 +* Reconstruct using optimised regularisation parameters (see Demo_SimulData_ParOptimis_SX.py) +____________________________________________________________________________ +>>>>> Dependencies: <<<<< +1. ASTRA toolbox: conda install -c astra-toolbox astra-toolbox +2. TomoRec: conda install -c dkazanc tomorec +or install from https://github.com/dkazanc/TomoRec + +@author: Daniil Kazantsev, e:mail daniil.kazantsev@diamond.ac.uk +GPLv3 license (ASTRA toolbox) +""" +#import timeit +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import numpy as np +import h5py +from ccpi.supp.qualitymetrics import QualityTools +from scipy.signal import gaussian + +# loading the data  +h5f = h5py.File('data/TomoSim_data1550671417.h5','r') +phantom = h5f['phantom'][:] +projdata_norm = h5f['projdata_norm'][:] +proj_angles = h5f['proj_angles'][:] +h5f.close() + +[Vert_det, AnglesNum, Horiz_det] = np.shape(projdata_norm) +N_size = Vert_det + +# loading optmisation parameters (the result of running Demo_SimulData_ParOptimis_SX) +h5f = h5py.File('optim_param/Optim_admm_sbtv.h5','r') +reg_param_sb_vec = h5f['reg_param_sb_vec'][:] +erros_vec_sbtv = h5f['erros_vec_sbtv'][:] +h5f.close() + +h5f = h5py.File('optim_param/Optim_admm_rofllt.h5','r') +reg_param_rofllt_vec = h5f['reg_param_rofllt_vec'][:] +erros_vec_rofllt = h5f['erros_vec_rofllt'][:] +h5f.close() + +h5f = h5py.File('optim_param/Optim_admm_tgv.h5','r') +reg_param_tgv_vec = h5f['reg_param_tgv_vec'][:] +erros_vec_tgv = h5f['erros_vec_tgv'][:] +h5f.close() + +index_minSBTV = min(xrange(len(erros_vec_sbtv)), key=erros_vec_sbtv.__getitem__) +index_minROFLLT = min(xrange(len(erros_vec_rofllt)), key=erros_vec_rofllt.__getitem__) +index_minTGV = min(xrange(len(erros_vec_tgv)), key=erros_vec_tgv.__getitem__) +# assign optimal regularisation parameters: +optimReg_sbtv = reg_param_sb_vec[index_minSBTV] +optimReg_rofllt = reg_param_rofllt_vec[index_minROFLLT] +optimReg_tgv = reg_param_tgv_vec[index_minTGV] +#%% +# plot loaded data +sliceSel = 128 +#plt.figure()  +fig, (ax1, ax2) = plt.subplots(figsize=(15, 5), ncols=2) +plt.rcParams.update({'xtick.labelsize': 'x-small'}) +plt.rcParams.update({'ytick.labelsize':'x-small'}) +plt.subplot(121) +one = plt.imshow(phantom[sliceSel,:,:],vmin=0, vmax=1, interpolation='none', cmap="PuOr") +fig.colorbar(one, ax=ax1) +plt.title('3D Phantom, axial (X-Y) view') +plt.subplot(122) +two = plt.imshow(phantom[:,sliceSel,:],vmin=0, vmax=1,interpolation='none', cmap="PuOr") +fig.colorbar(two, ax=ax2) +plt.title('3D Phantom, coronal (Y-Z) view') +""" +plt.subplot(133) +plt.imshow(phantom[:,:,sliceSel],vmin=0, vmax=1, cmap="PuOr") +plt.title('3D Phantom, sagittal view') + +""" +plt.show() +#%% +intens_max = 220 +plt.figure()  +plt.rcParams.update({'xtick.labelsize': 'x-small'}) +plt.rcParams.update({'ytick.labelsize':'x-small'}) +plt.subplot(131) +plt.imshow(projdata_norm[:,sliceSel,:],vmin=0, vmax=intens_max, cmap="PuOr") +plt.xlabel('X-detector', fontsize=16) +plt.ylabel('Z-detector', fontsize=16) +plt.title('2D Projection (X-Z) view', fontsize=19) +plt.subplot(132) +plt.imshow(projdata_norm[sliceSel,:,:],vmin=0, vmax=intens_max, cmap="PuOr") +plt.xlabel('X-detector', fontsize=16) +plt.ylabel('Projection angle', fontsize=16) +plt.title('Sinogram (X-Y) view', fontsize=19) +plt.subplot(133) +plt.imshow(projdata_norm[:,:,sliceSel],vmin=0, vmax=intens_max, cmap="PuOr") +plt.xlabel('Projection angle', fontsize=16) +plt.ylabel('Z-detector', fontsize=16) +plt.title('Vertical (Y-Z) view', fontsize=19) +plt.show() +#plt.savefig('projdata.pdf', format='pdf', dpi=1200) +#%% +# initialise TomoRec DIRECT reconstruction class ONCE +from tomorec.methodsDIR import RecToolsDIR +RectoolsDIR = RecToolsDIR(DetectorsDimH = Horiz_det,  # DetectorsDimH # detector dimension (horizontal) +                    DetectorsDimV = Vert_det,  # DetectorsDimV # detector dimension (vertical) for 3D case only +                    AnglesVec = proj_angles, # array of angles in radians +                    ObjSize = N_size, # a scalar to define reconstructed object dimensions +                    device = 'gpu') +#%% +print ("Reconstruction using FBP from TomoRec") +recFBP= RectoolsDIR.FBP(projdata_norm) # FBP reconstruction +#%% +x0, y0 = 0, 127 # These are in _pixel_ coordinates!! +x1, y1 = 255, 127 + +sliceSel = int(0.5*N_size) +max_val = 1 +plt.figure(figsize = (20,5)) +gs1 = gridspec.GridSpec(1, 3) +gs1.update(wspace=0.1, hspace=0.05) # set the spacing between axes.  +ax1 = plt.subplot(gs1[0]) +plt.imshow(recFBP[sliceSel,:,:],vmin=0, vmax=max_val, cmap="PuOr") +ax1.plot([x0, x1], [y0, y1], 'ko-', linestyle='--') +plt.colorbar(ax=ax1) +plt.title('FBP Reconstruction, axial (X-Y) view', fontsize=19) +ax1.set_aspect('equal') +ax3 = plt.subplot(gs1[1]) +plt.plot(phantom[sliceSel,sliceSel,0:N_size],color='k',linewidth=2) +plt.plot(recFBP[sliceSel,sliceSel,0:N_size],linestyle='--',color='g') +plt.title('Profile', fontsize=19) +ax2 = plt.subplot(gs1[2]) +plt.imshow(recFBP[:,sliceSel,:],vmin=0, vmax=max_val, cmap="PuOr") +plt.title('FBP Reconstruction, coronal (Y-Z) view', fontsize=19) +ax2.set_aspect('equal') +plt.show() +#plt.savefig('FBP_phantom.pdf', format='pdf', dpi=1600) + +# calculate errors  +Qtools = QualityTools(phantom, recFBP) +RMSE_fbp = Qtools.rmse() +print("Root Mean Square Error for FBP is {}".format(RMSE_fbp)) + +# SSIM measure +Qtools = QualityTools(phantom[128,:,:]*255, recFBP[128,:,:]*235) +win = np.array([gaussian(11, 1.5)]) +win2d = win * (win.T) +ssim_fbp = Qtools.ssim(win2d) +print("Mean SSIM for FBP is {}".format(ssim_fbp[0])) +#%% +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("Reconstructing with ADMM method using TomoRec software") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +# initialise TomoRec ITERATIVE reconstruction class ONCE +from tomorec.methodsIR import RecToolsIR +RectoolsIR = RecToolsIR(DetectorsDimH = Horiz_det,  # DetectorsDimH # detector dimension (horizontal) +                    DetectorsDimV = Vert_det,  # DetectorsDimV # detector dimension (vertical) for 3D case only +                    AnglesVec = proj_angles, # array of angles in radians +                    ObjSize = N_size, # a scalar to define reconstructed object dimensions +                    datafidelity='LS',# data fidelity, choose LS, PWLS (wip), GH (wip), Student (wip) +                    nonnegativity='ENABLE', # enable nonnegativity constraint (set to 'ENABLE') +                    OS_number = None, # the number of subsets, NONE/(or > 1) ~ classical / ordered subsets +                    tolerance = 1e-08, # tolerance to stop outer iterations earlier +                    device='gpu') +#%% +print ("Reconstructing with ADMM method using SB-TV penalty") +RecADMM_reg_sbtv = RectoolsIR.ADMM(projdata_norm, +                                  rho_const = 2000.0, \ +                                  iterationsADMM = 25, \ +                                  regularisation = 'SB_TV', \ +                                  regularisation_parameter = optimReg_sbtv,\ +                                  regularisation_iterations = 50) + +sliceSel = int(0.5*N_size) +max_val = 1 +plt.figure(figsize = (20,3)) +gs1 = gridspec.GridSpec(1, 4) +gs1.update(wspace=0.02, hspace=0.01) # set the spacing between axes.  +ax1 = plt.subplot(gs1[0]) +plt.plot(reg_param_sb_vec, erros_vec_sbtv, color='k',linewidth=2) +plt.xlabel('Regularisation parameter', fontsize=16) +plt.ylabel('RMSE value', fontsize=16) +plt.title('Regularisation selection', fontsize=19) +ax2 = plt.subplot(gs1[1]) +plt.imshow(RecADMM_reg_sbtv[sliceSel,:,:],vmin=0, vmax=max_val, cmap="PuOr") +ax2.plot([x0, x1], [y0, y1], 'ko-', linestyle='--') +plt.title('ADMM-SBTV (X-Y) view', fontsize=19) +#ax2.set_aspect('equal') +ax3 = plt.subplot(gs1[2]) +plt.plot(phantom[sliceSel,sliceSel,0:N_size],color='k',linewidth=2) +plt.plot(RecADMM_reg_sbtv[sliceSel,sliceSel,0:N_size],linestyle='--',color='g') +plt.title('Profile', fontsize=19) +ax4 = plt.subplot(gs1[3]) +plt.imshow(RecADMM_reg_sbtv[:,sliceSel,:],vmin=0, vmax=max_val, cmap="PuOr") +plt.title('ADMM-SBTV (Y-Z) view', fontsize=19) +plt.colorbar(ax=ax4) +plt.show() +plt.savefig('SBTV_phantom.pdf', format='pdf', dpi=1600) + +# calculate errors  +Qtools = QualityTools(phantom, RecADMM_reg_sbtv) +RMSE_admm_sbtv = Qtools.rmse() +print("Root Mean Square Error for ADMM-SB-TV is {}".format(RMSE_admm_sbtv)) + +# SSIM measure +Qtools = QualityTools(phantom[128,:,:]*255, RecADMM_reg_sbtv[128,:,:]*235) +win = np.array([gaussian(11, 1.5)]) +win2d = win * (win.T) +ssim_admm_sbtv = Qtools.ssim(win2d) +print("Mean SSIM ADMM-SBTV is {}".format(ssim_admm_sbtv[0])) +#%% +print ("Reconstructing with ADMM method using ROFLLT penalty") +RecADMM_reg_rofllt = RectoolsIR.ADMM(projdata_norm, +                                  rho_const = 2000.0, \ +                                  iterationsADMM = 25, \ +                                  regularisation = 'LLT_ROF', \ +                                  regularisation_parameter = optimReg_rofllt,\ +                                  regularisation_parameter2 = 0.0085,\ +                                  regularisation_iterations = 600) + +sliceSel = int(0.5*N_size) +max_val = 1 +plt.figure(figsize = (20,3)) +gs1 = gridspec.GridSpec(1, 4) +gs1.update(wspace=0.02, hspace=0.01) # set the spacing between axes.  +ax1 = plt.subplot(gs1[0]) +plt.plot(reg_param_rofllt_vec, erros_vec_rofllt, color='k',linewidth=2) +plt.xlabel('Regularisation parameter', fontsize=16) +plt.ylabel('RMSE value', fontsize=16) +plt.title('Regularisation selection', fontsize=19) +ax2 = plt.subplot(gs1[1]) +plt.imshow(RecADMM_reg_rofllt[sliceSel,:,:],vmin=0, vmax=max_val, cmap="PuOr") +ax2.plot([x0, x1], [y0, y1], 'ko-', linestyle='--') +plt.title('ADMM-ROFLLT (X-Y) view', fontsize=19) +#ax2.set_aspect('equal') +ax3 = plt.subplot(gs1[2]) +plt.plot(phantom[sliceSel,sliceSel,0:N_size],color='k',linewidth=2) +plt.plot(RecADMM_reg_rofllt[sliceSel,sliceSel,0:N_size],linestyle='--',color='g') +plt.title('Profile', fontsize=19) +ax4 = plt.subplot(gs1[3]) +plt.imshow(RecADMM_reg_rofllt[:,sliceSel,:],vmin=0, vmax=max_val, cmap="PuOr") +plt.title('ADMM-ROFLLT (Y-Z) view', fontsize=19) +plt.colorbar(ax=ax4) +plt.show() +#plt.savefig('ROFLLT_phantom.pdf', format='pdf', dpi=1600) + +# calculate errors  +Qtools = QualityTools(phantom, RecADMM_reg_rofllt) +RMSE_admm_rofllt = Qtools.rmse() +print("Root Mean Square Error for ADMM-ROF-LLT is {}".format(RMSE_admm_rofllt)) + +# SSIM measure +Qtools = QualityTools(phantom[128,:,:]*255, RecADMM_reg_rofllt[128,:,:]*235) +win = np.array([gaussian(11, 1.5)]) +win2d = win * (win.T) +ssim_admm_rifllt = Qtools.ssim(win2d) +print("Mean SSIM ADMM-ROFLLT is {}".format(ssim_admm_rifllt[0])) +#%% +print ("Reconstructing with ADMM method using TGV penalty") +RecADMM_reg_tgv = RectoolsIR.ADMM(projdata_norm, +                                  rho_const = 2000.0, \ +                                  iterationsADMM = 25, \ +                                  regularisation = 'TGV', \ +                                  regularisation_parameter = optimReg_tgv,\ +                                  regularisation_iterations = 600) +#%% +sliceSel = int(0.5*N_size) +max_val = 1 +plt.figure(figsize = (20,3)) +gs1 = gridspec.GridSpec(1, 4) +gs1.update(wspace=0.02, hspace=0.01) # set the spacing between axes.  +ax1 = plt.subplot(gs1[0]) +plt.plot(reg_param_tgv_vec, erros_vec_tgv, color='k',linewidth=2) +plt.xlabel('Regularisation parameter', fontsize=16) +plt.ylabel('RMSE value', fontsize=16) +plt.title('Regularisation selection', fontsize=19) +ax2 = plt.subplot(gs1[1]) +plt.imshow(RecADMM_reg_tgv[sliceSel,:,:],vmin=0, vmax=max_val, cmap="PuOr") +ax2.plot([x0, x1], [y0, y1], 'ko-', linestyle='--') +plt.title('ADMM-TGV (X-Y) view', fontsize=19) +#ax2.set_aspect('equal') +ax3 = plt.subplot(gs1[2]) +plt.plot(phantom[sliceSel,sliceSel,0:N_size],color='k',linewidth=2) +plt.plot(RecADMM_reg_tgv[sliceSel,sliceSel,0:N_size],linestyle='--',color='g') +plt.title('Profile', fontsize=19) +ax4 = plt.subplot(gs1[3]) +plt.imshow(RecADMM_reg_tgv[:,sliceSel,:],vmin=0, vmax=max_val, cmap="PuOr") +plt.title('ADMM-TGV (Y-Z) view', fontsize=19) +plt.colorbar(ax=ax4) +plt.show() +#plt.savefig('TGV_phantom.pdf', format='pdf', dpi=1600) + +# calculate errors  +Qtools = QualityTools(phantom, RecADMM_reg_tgv) +RMSE_admm_tgv = Qtools.rmse() +print("Root Mean Square Error for ADMM-TGV is {}".format(RMSE_admm_tgv)) + +# SSIM measure +#Create a 2d gaussian for the window parameter +Qtools = QualityTools(phantom[128,:,:]*255, RecADMM_reg_tgv[128,:,:]*235) +win = np.array([gaussian(11, 1.5)]) +win2d = win * (win.T) +ssim_admm_tgv = Qtools.ssim(win2d) +print("Mean SSIM ADMM-TGV is {}".format(ssim_admm_tgv[0])) +#%%
\ No newline at end of file diff --git a/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_SX.py b/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_SX.py new file mode 100644 index 0000000..cdf4325 --- /dev/null +++ b/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_SX.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +This demo scripts support the following publication:  +"CCPi-Regularisation Toolkit for computed tomographic image reconstruction with  +proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner, + Philip J. Withers; Software X, 2019 +____________________________________________________________________________ +* Runs TomoPhantom software to simulate tomographic projection data with +some imaging errors and noise +* Saves the data into hdf file to be uploaded in reconstruction scripts +__________________________________________________________________________ + +>>>>> Dependencies: <<<<< +1. TomoPhantom software for phantom and data generation + +@author: Daniil Kazantsev, e:mail daniil.kazantsev@diamond.ac.uk +Apache 2.0 license +""" +import timeit +import os +import matplotlib.pyplot as plt +import numpy as np +import tomophantom +from tomophantom import TomoP3D +from tomophantom.supp.flatsgen import flats +from tomophantom.supp.normraw import normaliser_sim + +print ("Building 3D phantom using TomoPhantom software") +tic=timeit.default_timer() +model = 16 # select a model number from the library +N_size = 256 # Define phantom dimensions using a scalar value (cubic phantom) +path = os.path.dirname(tomophantom.__file__) +path_library3D = os.path.join(path, "Phantom3DLibrary.dat") +#This will generate a N_size x N_size x N_size phantom (3D) +phantom_tm = TomoP3D.Model(model, N_size, path_library3D) +toc=timeit.default_timer() +Run_time = toc - tic +print("Phantom has been built in {} seconds".format(Run_time)) + +sliceSel = int(0.5*N_size) +#plt.gray() +plt.figure()  +plt.subplot(131) +plt.imshow(phantom_tm[sliceSel,:,:],vmin=0, vmax=1) +plt.title('3D Phantom, axial view') + +plt.subplot(132) +plt.imshow(phantom_tm[:,sliceSel,:],vmin=0, vmax=1) +plt.title('3D Phantom, coronal view') + +plt.subplot(133) +plt.imshow(phantom_tm[:,:,sliceSel],vmin=0, vmax=1) +plt.title('3D Phantom, sagittal view') +plt.show() + +# Projection geometry related parameters: +Horiz_det = int(np.sqrt(2)*N_size) # detector column count (horizontal) +Vert_det = N_size # detector row count (vertical) (no reason for it to be > N) +angles_num = int(0.35*np.pi*N_size); # angles number +angles = np.linspace(0.0,179.9,angles_num,dtype='float32') # in degrees +angles_rad = angles*(np.pi/180.0) +#%% +print ("Building 3D analytical projection data with TomoPhantom") +projData3D_analyt= TomoP3D.ModelSino(model, N_size, Horiz_det, Vert_det, angles, path_library3D) + +intens_max = N_size +sliceSel = int(0.5*N_size) +plt.figure()  +plt.subplot(131) +plt.imshow(projData3D_analyt[:,sliceSel,:],vmin=0, vmax=intens_max) +plt.title('2D Projection (analytical)') +plt.subplot(132) +plt.imshow(projData3D_analyt[sliceSel,:,:],vmin=0, vmax=intens_max) +plt.title('Sinogram view') +plt.subplot(133) +plt.imshow(projData3D_analyt[:,:,sliceSel],vmin=0, vmax=intens_max) +plt.title('Tangentogram view') +plt.show() +#%% +print ("Simulate flat fields, add noise and normalise projections...") +flatsnum = 20 # generate 20 flat fields +flatsSIM = flats(Vert_det, Horiz_det, maxheight = 0.1, maxthickness = 3, sigma_noise = 0.2, sigmasmooth = 3, flatsnum=flatsnum) + +plt.figure()  +plt.imshow(flatsSIM[0,:,:],vmin=0, vmax=1) +plt.title('A selected simulated flat-field') +#%% +# Apply normalisation of data and add noise +flux_intensity = 60000 # controls the level of noise  +sigma_flats = 0.01 # contro the level of noise in flats (higher creates more ring artifacts) +projData3D_norm = normaliser_sim(projData3D_analyt, flatsSIM, sigma_flats, flux_intensity) + +intens_max = N_size +sliceSel = int(0.5*N_size) +plt.figure()  +plt.subplot(131) +plt.imshow(projData3D_norm[:,sliceSel,:],vmin=0, vmax=intens_max) +plt.title('2D Projection (erroneous)') +plt.subplot(132) +plt.imshow(projData3D_norm[sliceSel,:,:],vmin=0, vmax=intens_max) +plt.title('Sinogram view') +plt.subplot(133) +plt.imshow(projData3D_norm[:,:,sliceSel],vmin=0, vmax=intens_max) +plt.title('Tangentogram view') +plt.show() +#%% +import h5py +import time +time_label = int(time.time()) +# Saving generated data with a unique time label +h5f = h5py.File('TomoSim_data'+str(time_label)+'.h5', 'w') +h5f.create_dataset('phantom', data=phantom_tm) +h5f.create_dataset('projdata_norm', data=projData3D_norm) +h5f.create_dataset('proj_angles', data=angles_rad) +h5f.close() +#%%
\ No newline at end of file diff --git a/Wrappers/Python/demos/SoftwareX_supp/Readme.md b/Wrappers/Python/demos/SoftwareX_supp/Readme.md new file mode 100644 index 0000000..54e83f1 --- /dev/null +++ b/Wrappers/Python/demos/SoftwareX_supp/Readme.md @@ -0,0 +1,26 @@ + +# SoftwareX publication [1] supporting files + +## Decription: +The scripts here support publication in SoftwareX journal [1] to ensure reproducibility of the research. The scripts linked with data shared at Zenodo.  + +## Data: +Data is shared at Zenodo [here](https://doi.org/10.5281/zenodo.2578893) + +## Dependencies: +1. [ASTRA toolbox](https://github.com/astra-toolbox/astra-toolbox): `conda install -c astra-toolbox astra-toolbox` +2. [TomoRec](https://github.com/dkazanc/TomoRec): `conda install -c dkazanc tomorec` +3. [Tomophantom](https://github.com/dkazanc/TomoPhantom): `conda install tomophantom -c ccpi` + +## Files description:  +- `Demo_SimulData_SX.py` - simulates 3D projection data using [Tomophantom](https://github.com/dkazanc/TomoPhantom) software. One can skip this module if the data is taken from [Zenodo](https://doi.org/10.5281/zenodo.2578893) +- `Demo_SimulData_ParOptimis_SX.py` - runs computationally extensive calculations for optimal regularisation parameters, the result are saved into directory `optim_param`. This script can be also skipped.  +- `Demo_SimulData_Recon_SX.py` - using established regularisation parameters, one runs iterative reconstruction +- `Demo_RealData_Recon_SX.py` - runs real data reconstructions. Can be quite intense on memory so reduce the size of the reconstructed volume if needed.  + +### References: +[1] "CCPi-Regularisation Toolkit for computed tomographic image reconstruction with proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner and Philip J. Withers; SoftwareX, 2019.  + +### Acknowledgments: +CCPi-RGL software is a product of the [CCPi](https://www.ccpi.ac.uk/) group, STFC SCD software developers and Diamond Light Source (DLS). Any relevant questions/comments can be e-mailed to Daniil Kazantsev at dkazanc@hotmail.com + diff --git a/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_rofllt.h5 b/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_rofllt.h5 Binary files differnew file mode 100644 index 0000000..63bc4fd --- /dev/null +++ b/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_rofllt.h5 diff --git a/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_sbtv.h5 b/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_sbtv.h5 Binary files differnew file mode 100644 index 0000000..03c0c14 --- /dev/null +++ b/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_sbtv.h5 diff --git a/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_tgv.h5 b/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_tgv.h5 Binary files differnew file mode 100644 index 0000000..056d915 --- /dev/null +++ b/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_tgv.h5 diff --git a/demos/demoMatlab_denoise.m b/demos/demoMatlab_denoise.m index 5135129..5e92ee1 100644 --- a/demos/demoMatlab_denoise.m +++ b/demos/demoMatlab_denoise.m @@ -13,87 +13,119 @@ Im = double(imread('lena_gray_512.tif'))/255;  % loading image  u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;  figure; imshow(u0, [0 1]); title('Noisy image'); -lambda_reg = 0.03; % regularsation parameter for all methods  %%  fprintf('Denoise using the ROF-TV model (CPU) \n'); +lambda_reg = 0.017; % regularsation parameter for all methods  tau_rof = 0.0025; % time-marching constant  -iter_rof = 750; % number of ROF iterations +iter_rof = 1200; % number of ROF iterations  tic; u_rof = ROF_TV(single(u0), lambda_reg, iter_rof, tau_rof); toc;   energyfunc_val_rof = TV_energy(single(u_rof),single(u0),lambda_reg, 1);  % get energy function value  rmseROF = (RMSE(u_rof(:),Im(:)));  fprintf('%s %f \n', 'RMSE error for ROF-TV is:', rmseROF); +[ssimval] = ssim(u_rof*255,single(Im)*255); +fprintf('%s %f \n', 'MSSIM error for ROF-TV is:', ssimval);  figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)');  %%  % fprintf('Denoise using the ROF-TV model (GPU) \n');  % tau_rof = 0.0025; % time-marching constant  -% iter_rof = 750; % number of ROF iterations +% iter_rof = 1200; % number of ROF iterations  % tic; u_rofG = ROF_TV_GPU(single(u0), lambda_reg, iter_rof, tau_rof); toc;  % figure; imshow(u_rofG, [0 1]); title('ROF-TV denoised image (GPU)');  %%  fprintf('Denoise using the FGP-TV model (CPU) \n'); -iter_fgp = 1300; % number of FGP iterations -epsil_tol =  1.0e-06; % tolerance +lambda_reg = 0.033; +iter_fgp = 300; % number of FGP iterations +epsil_tol =  1.0e-09; % tolerance  tic; u_fgp = FGP_TV(single(u0), lambda_reg, iter_fgp, epsil_tol); toc;   energyfunc_val_fgp = TV_energy(single(u_fgp),single(u0),lambda_reg, 1); % get energy function value  rmseFGP = (RMSE(u_fgp(:),Im(:)));  fprintf('%s %f \n', 'RMSE error for FGP-TV is:', rmseFGP); +[ssimval] = ssim(u_fgp*255,single(Im)*255); +fprintf('%s %f \n', 'MSSIM error for FGP-TV is:', ssimval);  figure; imshow(u_fgp, [0 1]); title('FGP-TV denoised image (CPU)'); -  %%  % fprintf('Denoise using the FGP-TV model (GPU) \n'); -% iter_fgp = 1300; % number of FGP iterations -% epsil_tol =  1.0e-06; % tolerance +% iter_fgp = 300; % number of FGP iterations +% epsil_tol =  1.0e-09; % tolerance  % tic; u_fgpG = FGP_TV_GPU(single(u0), lambda_reg, iter_fgp, epsil_tol); toc;   % figure; imshow(u_fgpG, [0 1]); title('FGP-TV denoised image (GPU)');  %%  fprintf('Denoise using the SB-TV model (CPU) \n'); -iter_sb = 150; % number of SB iterations -epsil_tol =  1.0e-06; % tolerance +iter_sb = 80; % number of SB iterations +epsil_tol =  1.0e-08; % tolerance  tic; u_sb = SB_TV(single(u0), lambda_reg, iter_sb, epsil_tol); toc;   energyfunc_val_sb = TV_energy(single(u_sb),single(u0),lambda_reg, 1);  % get energy function value  rmseSB = (RMSE(u_sb(:),Im(:)));  fprintf('%s %f \n', 'RMSE error for SB-TV is:', rmseSB); +[ssimval] = ssim(u_sb*255,single(Im)*255); +fprintf('%s %f \n', 'MSSIM error for SB-TV is:', ssimval);  figure; imshow(u_sb, [0 1]); title('SB-TV denoised image (CPU)');  %%  % fprintf('Denoise using the SB-TV model (GPU) \n'); -% iter_sb = 150; % number of SB iterations +% iter_sb = 80; % number of SB iterations  % epsil_tol =  1.0e-06; % tolerance  % tic; u_sbG = SB_TV_GPU(single(u0), lambda_reg, iter_sb, epsil_tol); toc;   % figure; imshow(u_sbG, [0 1]); title('SB-TV denoised image (GPU)');  %% +fprintf('Denoise using Nonlinear-Diffusion model (CPU) \n'); +iter_diff = 450; % number of diffusion iterations +lambda_regDiff = 0.025; % regularisation for the diffusivity  +sigmaPar = 0.015; % edge-preserving parameter +tau_param = 0.02; % time-marching constant  +tic; u_diff = NonlDiff(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc;  +rmseDiffus = (RMSE(u_diff(:),Im(:))); +fprintf('%s %f \n', 'RMSE error for Nonlinear Diffusion is:', rmseDiffus); +[ssimval] = ssim(u_diff*255,single(Im)*255); +fprintf('%s %f \n', 'MSSIM error for NDF is:', ssimval); +figure; imshow(u_diff, [0 1]); title('Diffusion denoised image (CPU)'); +%% +% fprintf('Denoise using Nonlinear-Diffusion model (GPU) \n'); +% iter_diff = 450; % number of diffusion iterations +% lambda_regDiff = 0.025; % regularisation for the diffusivity  +% sigmaPar = 0.015; % edge-preserving parameter +% tau_param = 0.025; % time-marching constant  +% tic; u_diff_g = NonlDiff_GPU(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc;  +% figure; imshow(u_diff_g, [0 1]); title('Diffusion denoised image (GPU)'); +%%  fprintf('Denoise using the TGV model (CPU) \n'); -lambda_TGV = 0.045; % regularisation parameter +lambda_TGV = 0.034; % regularisation parameter  alpha1 = 1.0; % parameter to control the first-order term -alpha0 = 2.0; % parameter to control the second-order term -iter_TGV = 1500; % number of Primal-Dual iterations for TGV +alpha0 = 1.0; % parameter to control the second-order term +iter_TGV = 500; % number of Primal-Dual iterations for TGV  tic; u_tgv = TGV(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc;   rmseTGV = (RMSE(u_tgv(:),Im(:)));  fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV); +[ssimval] = ssim(u_tgv*255,single(Im)*255); +fprintf('%s %f \n', 'MSSIM error for TGV is:', ssimval);  figure; imshow(u_tgv, [0 1]); title('TGV denoised image (CPU)'); - +%%  % fprintf('Denoise using the TGV model (GPU) \n'); -% lambda_TGV = 0.045; % regularisation parameter +% lambda_TGV = 0.034; % regularisation parameter  % alpha1 = 1.0; % parameter to control the first-order term -% alpha0 = 2.0; % parameter to control the second-order term -% iter_TGV = 1500; % number of Primal-Dual iterations for TGV +% alpha0 = 1.0; % parameter to control the second-order term +% iter_TGV = 500; % number of Primal-Dual iterations for TGV  % tic; u_tgv_gpu = TGV_GPU(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc;   % rmseTGV_gpu = (RMSE(u_tgv_gpu(:),Im(:)));  % fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV_gpu); +% [ssimval] = ssim(u_tgv_gpu*255,single(Im)*255); +% fprintf('%s %f \n', 'MSSIM error for TGV is:', ssimval);  % figure; imshow(u_tgv_gpu, [0 1]); title('TGV denoised image (GPU)');  %%  fprintf('Denoise using the ROF-LLT model (CPU) \n'); -lambda_ROF = lambda_reg; % ROF regularisation parameter -lambda_LLT = lambda_reg*0.45; % LLT regularisation parameter -iter_LLT = 1; % iterations  +lambda_ROF = 0.016; % ROF regularisation parameter +lambda_LLT = lambda_reg*0.25; % LLT regularisation parameter +iter_LLT = 500; % iterations   tau_rof_llt = 0.0025; % time-marching constant   tic; u_rof_llt = LLT_ROF(single(u0), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc;   rmseROFLLT = (RMSE(u_rof_llt(:),Im(:)));  fprintf('%s %f \n', 'RMSE error for TGV is:', rmseROFLLT); +[ssimval] = ssim(u_rof_llt*255,single(Im)*255); +fprintf('%s %f \n', 'MSSIM error for ROFLLT is:', ssimval);  figure; imshow(u_rof_llt, [0 1]); title('ROF-LLT denoised image (CPU)');  %%  % fprintf('Denoise using the ROF-LLT model (GPU) \n'); -% lambda_ROF = lambda_reg; % ROF regularisation parameter -% lambda_LLT = lambda_reg*0.45; % LLT regularisation parameter +% lambda_ROF = 0.016; % ROF regularisation parameter +% lambda_LLT = lambda_reg*0.25; % LLT regularisation parameter  % iter_LLT = 500; % iterations   % tau_rof_llt = 0.0025; % time-marching constant   % tic; u_rof_llt_g = LLT_ROF_GPU(single(u0), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc;  @@ -101,32 +133,16 @@ figure; imshow(u_rof_llt, [0 1]); title('ROF-LLT denoised image (CPU)');  % fprintf('%s %f \n', 'RMSE error for TGV is:', rmseROFLLT_g);  % figure; imshow(u_rof_llt_g, [0 1]); title('ROF-LLT denoised image (GPU)');  %% -fprintf('Denoise using Nonlinear-Diffusion model (CPU) \n'); -iter_diff = 800; % number of diffusion iterations -lambda_regDiff = 0.025; % regularisation for the diffusivity  -sigmaPar = 0.015; % edge-preserving parameter -tau_param = 0.025; % time-marching constant  -tic; u_diff = NonlDiff(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc;  -rmseDiffus = (RMSE(u_diff(:),Im(:))); -fprintf('%s %f \n', 'RMSE error for Nonlinear Diffusion is:', rmseDiffus); -figure; imshow(u_diff, [0 1]); title('Diffusion denoised image (CPU)'); -%% -% fprintf('Denoise using Nonlinear-Diffusion model (GPU) \n'); -% iter_diff = 800; % number of diffusion iterations -% lambda_regDiff = 0.025; % regularisation for the diffusivity  -% sigmaPar = 0.015; % edge-preserving parameter -% tau_param = 0.025; % time-marching constant  -% tic; u_diff_g = NonlDiff_GPU(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc;  -% figure; imshow(u_diff_g, [0 1]); title('Diffusion denoised image (GPU)'); -%%  fprintf('Denoise using Fourth-order anisotropic diffusion model (CPU) \n');  iter_diff = 800; % number of diffusion iterations  lambda_regDiff = 3.5; % regularisation for the diffusivity  -sigmaPar = 0.02; % edge-preserving parameter +sigmaPar = 0.021; % edge-preserving parameter  tau_param = 0.0015; % time-marching constant   tic; u_diff4 = Diffusion_4thO(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc;   rmseDiffHO = (RMSE(u_diff4(:),Im(:)));  fprintf('%s %f \n', 'RMSE error for Fourth-order anisotropic diffusion is:', rmseDiffHO); +[ssimval] = ssim(u_diff4*255,single(Im)*255); +fprintf('%s %f \n', 'MSSIM error for DIFF4th is:', ssimval);  figure; imshow(u_diff4, [0 1]); title('Diffusion 4thO denoised image (CPU)');  %%  % fprintf('Denoise using Fourth-order anisotropic diffusion model (GPU) \n'); @@ -146,10 +162,12 @@ tic; [H_i, H_j, Weights] = PatchSelect(single(u0), SearchingWindow, PatchWindow,  %%  fprintf('Denoise using Non-local Total Variation (CPU) \n');  iter_nltv = 3; % number of nltv iterations -lambda_nltv = 0.05; % regularisation parameter for nltv +lambda_nltv = 0.055; % regularisation parameter for nltv  tic; u_nltv = Nonlocal_TV(single(u0), H_i, H_j, 0, Weights, lambda_nltv, iter_nltv); toc;   rmse_nltv = (RMSE(u_nltv(:),Im(:)));  fprintf('%s %f \n', 'RMSE error for Non-local Total Variation is:', rmse_nltv); +[ssimval] = ssim(u_nltv*255,single(Im)*255); +fprintf('%s %f \n', 'MSSIM error for NLTV is:', ssimval);  figure; imagesc(u_nltv, [0 1]); colormap(gray); daspect([1 1 1]); title('Non-local Total Variation denoised image (CPU)');  %%  %>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< % diff --git a/src/Core/regularisers_GPU/TGV_GPU_core.cu b/src/Core/regularisers_GPU/TGV_GPU_core.cu index e4abf72..849219b 100644 --- a/src/Core/regularisers_GPU/TGV_GPU_core.cu +++ b/src/Core/regularisers_GPU/TGV_GPU_core.cu @@ -38,49 +38,49 @@ limitations under the License.   * [1] K. Bredies "Total Generalized Variation"   */ +    +#define BLKXSIZE2D 16 +#define BLKYSIZE2D 16 +  #define BLKXSIZE 8  #define BLKYSIZE 8 -#define BLKZSIZE 8     -     -#define BLKXSIZE2D 8 -#define BLKYSIZE2D 8 -#define EPS 1.0e-7 -#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) +#define BLKZSIZE 8 +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )  /********************************************************************/  /***************************2D Functions*****************************/  /********************************************************************/ -__global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, float sigma) +__global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float *P2, long dimX, long dimY, float sigma)  {     -	int num_total = dimX*dimY; -	const int i = blockDim.x * blockIdx.x + threadIdx.x; -        const int j = blockDim.y * blockIdx.y + threadIdx.y; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; -        int index = i + dimX*j;        +        long index = i + (dimX)*j;        -        if (index < num_total) {   +        if ((i < dimX) && (j < dimY)) {          /* symmetric boundary conditions (Neuman) */                      if ((i >= 0) && (i < dimX-1))  P1[index] += sigma*((U[(i+1) + dimX*j] - U[index])  - V1[index]);  -        else P1[index] += sigma*(-V1[index]);  +        else if  (i == dimX-1) P1[index] -= sigma*(V1[index]);  +        else P1[index] = 0.0f;           if ((j >= 0) && (j < dimY-1))  P2[index] += sigma*((U[i + dimX*(j+1)] - U[index])  - V2[index]); -        else P2[index] += sigma*(-V2[index]);                     +        else if  (j == dimY-1) P2[index] -= sigma*(V2[index]);                     +        else P2[index] = 0.0f;   	}  	return;  }  -__global__ void ProjP_2D_kernel(float *P1, float *P2, int dimX, int dimY, float alpha1) +__global__ void ProjP_2D_kernel(float *P1, float *P2, long dimX, long dimY, float alpha1)  {     	float grad_magn; -	int num_total = dimX*dimY; -	const int i = blockDim.x * blockIdx.x + threadIdx.x; -        const int j = blockDim.y * blockIdx.y + threadIdx.y; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; -        int index = i + dimX*j; +        long index = i + (dimX)*j;     -        if (index < num_total) {             -            grad_magn = sqrtf(pow(P1[index],2) + pow(P2[index],2)); +        if ((i < dimX) && (j < dimY)) {             +            grad_magn = sqrtf(powf(P1[index],2) + powf(P2[index],2));              grad_magn = grad_magn/alpha1;              if (grad_magn > 1.0f) {                  P1[index] /= grad_magn; @@ -90,17 +90,15 @@ __global__ void ProjP_2D_kernel(float *P1, float *P2, int dimX, int dimY, float  	return;  }  -__global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float sigma) +__global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float sigma)  {          float q1, q2, q11, q22; -	int num_total = dimX*dimY; -	 -	const int i = blockDim.x * blockIdx.x + threadIdx.x; -        const int j = blockDim.y * blockIdx.y + threadIdx.y; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; -        int index = i + dimX*j;       +        long index = i + (dimX)*j;           -        if (index < num_total) { +        if ((i < dimX) && (j < dimY)) {              q1 = 0.0f; q2  = 0.0f; q11  = 0.0f; q22  = 0.0f;  	        if ((i >= 0) && (i < dimX-1))  {             @@ -120,18 +118,16 @@ __global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, floa  	return;  }  -__global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, int dimX, int dimY, float alpha0) +__global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, long dimX, long dimY, float alpha0)  {  	float grad_magn; -        int num_total = dimX*dimY; - -	const int i = blockDim.x * blockIdx.x + threadIdx.x; -        const int j = blockDim.y * blockIdx.y + threadIdx.y; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; -        int index = i + dimX*j;         +        long index = i + (dimX)*j;      -        if (index < num_total) { -            grad_magn = sqrt(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2)); +        if ((i < dimX) && (j < dimY)) {   +            grad_magn = sqrt(powf(Q1[index],2) + powf(Q2[index],2) + 2*powf(Q3[index],2));              grad_magn = grad_magn/alpha0;              if (grad_magn > 1.0f) {                  Q1[index] /= grad_magn; @@ -142,26 +138,26 @@ __global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, int dimX, int d  	return;  }  -__global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, int dimX, int dimY, float lambda, float tau) +__global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, long dimX, long dimY, float lambda, float tau)  {  	float P_v1, P_v2, div; -	int num_total = dimX*dimY; -	 -	const int i = blockDim.x * blockIdx.x + threadIdx.x; -        const int j = blockDim.y * blockIdx.y + threadIdx.y;        +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; -        int index = i + dimX*j; +        long index = i + (dimX)*j;     -        if (index < num_total) {  -	P_v1 = 0.0f; P_v2 = 0.0f; -         -        if (i == 0) P_v1 = P1[index]; -        if (i == dimX-1) P_v1 = -P1[(i-1) + dimX*j]; +        if ((i < dimX) && (j < dimY)) { +			                  if ((i > 0) && (i < dimX-1)) P_v1 = P1[index] - P1[(i-1) + dimX*j]; +        else if (i == dimX-1) P_v1 = -P1[(i-1) + dimX*j]; +        else if (i == 0) P_v1 = P1[index]; +        else P_v1 = 0.0f; -        if (j == 0) P_v2 = P2[index]; -        if (j == dimY-1) P_v2 = -P2[i + dimX*(j-1)];        	if ((j > 0) && (j < dimY-1))  P_v2 = P2[index] - P2[i + dimX*(j-1)]; +      	else if (j == dimY-1) P_v2 = -P2[i + dimX*(j-1)]; +      	else if (j == 0) P_v2 = P2[index]; +      	else P_v2 = 0.0f; +          div = P_v1 + P_v2;          U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau); @@ -169,18 +165,19 @@ __global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, in  	return;  }  -__global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float tau) +__global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float tau)  {  	float q1, q3_x, q2, q3_y, div1, div2; -	int num_total = dimX*dimY; -	int i1, j1; +	long i1, j1; -	const int i = blockDim.x * blockIdx.x + threadIdx.x; -        const int j = blockDim.y * blockIdx.y + threadIdx.y; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; -        int index = i + dimX*j;       +        long index = i + (dimX)*j;         -	if (index < num_total) {    +        if ((i < dimX) && (j < dimY)) {      + +	q1 = 0.0f; q3_x = 0.0f; q2 = 0.0f; q3_y = 0.0f; div1 = 0.0f; div2= 0.0f;  	    i1 = (i-1) + dimX*j;              j1 = (i) + dimX*(j-1); @@ -222,94 +219,95 @@ __global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float  	return;  }  -__global__ void copyIm_TGV_kernel(float *U, float *U_old, int N, int M, int num_total) +__global__ void copyIm_TGV_kernel(float *U, float *U_old, long dimX, long dimY)  { -    int xIndex = blockDim.x * blockIdx.x + threadIdx.x; -    int yIndex = blockDim.y * blockIdx.y + threadIdx.y; -     -    int index = xIndex + N*yIndex; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; +         +        long index = i + (dimX)*j;     -    if (index < num_total)   { +    if ((i < dimX) && (j < dimY)) {          U_old[index] = U[index];      }  } -__global__ void copyIm_TGV_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, int N, int M, int num_total) +__global__ void copyIm_TGV_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, long dimX, long dimY)  { -    int xIndex = blockDim.x * blockIdx.x + threadIdx.x; -    int yIndex = blockDim.y * blockIdx.y + threadIdx.y; -     -    int index = xIndex + N*yIndex; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; +         +        long index = i + (dimX)*j;     -    if (index < num_total)   { +    if ((i < dimX) && (j < dimY)) {          V1_old[index] = V1[index];          V2_old[index] = V2[index];      }  } -__global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total) +__global__ void newU_kernel(float *U, float *U_old, long dimX, long dimY)  { -    int xIndex = blockDim.x * blockIdx.x + threadIdx.x; -    int yIndex = blockDim.y * blockIdx.y + threadIdx.y; -     -    int index = xIndex + N*yIndex; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; +         +        long index = i + (dimX)*j;     -    if (index < num_total)	{ +    if ((i < dimX) && (j < dimY)) {          U[index] = 2.0f*U[index] - U_old[index];      }  } -__global__ void newU_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, int N, int M, int num_total) +__global__ void newU_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, long dimX, long dimY)  { -    int xIndex = blockDim.x * blockIdx.x + threadIdx.x; -    int yIndex = blockDim.y * blockIdx.y + threadIdx.y; -     -    int index = xIndex + N*yIndex; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; +         +        long index = i + (dimX)*j;     -    if (index < num_total)	{ +    if ((i < dimX) && (j < dimY)) {          V1[index] = 2.0f*V1[index] - V1_old[index];          V2[index] = 2.0f*V2[index] - V2_old[index];        }  } +  /********************************************************************/  /***************************3D Functions*****************************/  /********************************************************************/ -__global__ void DualP_3D_kernel(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma) +__global__ void DualP_3D_kernel(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float sigma)  {     -	int index; -	const int i = blockDim.x * blockIdx.x + threadIdx.x; -        const int j = blockDim.y * blockIdx.y + threadIdx.y; -        const int k = blockDim.z * blockIdx.z + threadIdx.z; - -   	int num_total = dimX*dimY*dimZ; -         -        index = (dimX*dimY)*k + i*dimX+j;     -        if (index < num_total) {                 +	long index; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; +        const long k = blockDim.z * blockIdx.z + threadIdx.z; +                +        index = (dimX*dimY)*k + i*dimX+j; +             +        if ((i < dimX) && (j < dimY)  && (k < dimZ)) {                         /* symmetric boundary conditions (Neuman) */                          if ((i >= 0) && (i < dimX-1)) P1[index] += sigma*((U[(dimX*dimY)*k + (i+1)*dimX+j] - U[index])  - V1[index]);   -	    else P1[index] += sigma*(-V1[index]);  +	    else if (i == dimX-1) P1[index] -= sigma*(V1[index]);  +	    else P1[index] = 0.0f;  	    if ((j >= 0) && (j < dimY-1)) P2[index] += sigma*((U[(dimX*dimY)*k + i*dimX+(j+1)] - U[index])  - V2[index]);         -	    else P2[index] += sigma*(-V2[index]);                 +	    else if (j == dimY-1) P2[index] -= sigma*(V2[index]);                 +	    else P2[index] = 0.0f;	            	    if ((k >= 0) && (k < dimZ-1)) P3[index] += sigma*((U[(dimX*dimY)*(k+1) + i*dimX+(j)] - U[index])  - V3[index]);         -	    else P3[index] += sigma*(-V3[index]);                	     -	 }	 +      	    else if (k == dimZ-1) P3[index] -= sigma*(V3[index]);                	     +      	    else P3[index] = 0.0f; +	 }	   	return; -}  +} -__global__ void ProjP_3D_kernel(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float alpha1) +__global__ void ProjP_3D_kernel(float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float alpha1)  {     	float grad_magn; -   	int index; -   	int num_total = dimX*dimY*dimZ; -   	 -	int i = blockDim.x * blockIdx.x + threadIdx.x; -        int j = blockDim.y * blockIdx.y + threadIdx.y; -        int k = blockDim.z * blockIdx.z + threadIdx.z; +	long index; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; +        const long k = blockDim.z * blockIdx.z + threadIdx.z;          index = (dimX*dimY)*k + i*dimX+j;     -        if (index < num_total) {             -            grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1; +        if ((i < dimX) && (j < dimY)  && (k < dimZ)) {                      +            grad_magn = (sqrtf(powf(P1[index],2) + powf(P2[index],2) + powf(P3[index],2)))/alpha1;              if (grad_magn > 1.0f) {                  P1[index] /= grad_magn;                  P2[index] /= grad_magn; @@ -319,23 +317,22 @@ __global__ void ProjP_3D_kernel(float *P1, float *P2, float *P3, int dimX, int d  	return;  } -__global__ void DualQ_3D_kernel(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float sigma) +__global__ void DualQ_3D_kernel(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float sigma)  { -	int index;  -        float q1, q2, q3, q11, q22, q33, q44, q55, q66; -   	int num_total = dimX*dimY*dimZ; -   	 -	int i = blockDim.x * blockIdx.x + threadIdx.x; -        int j = blockDim.y * blockIdx.y + threadIdx.y; -        int k = blockDim.z * blockIdx.z + threadIdx.z; +        float q1, q2, q3, q11, q22, q33, q44, q55, q66; +	long index; +	 +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; +        const long k = blockDim.z * blockIdx.z + threadIdx.z;          index = (dimX*dimY)*k + i*dimX+j;  -        int i1 = (dimX*dimY)*k + (i+1)*dimX+j; -        int j1 = (dimX*dimY)*k + (i)*dimX+(j+1); -        int k1 = (dimX*dimY)*(k+1) + (i)*dimX+(j); +        long i1 = (dimX*dimY)*k + (i+1)*dimX+j; +        long j1 = (dimX*dimY)*k + (i)*dimX+(j+1); +        long k1 = (dimX*dimY)*(k+1) + (i)*dimX+(j); -        if (index < num_total) { +        if ((i < dimX) && (j < dimY)  && (k < dimZ)) {              	q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f;  	        /* boundary conditions (Neuman) */ @@ -362,20 +359,18 @@ __global__ void DualQ_3D_kernel(float *V1, float *V2, float *V3, float *Q1, floa  	return;  } -__global__ void ProjQ_3D_kernel(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float alpha0) +__global__ void ProjQ_3D_kernel(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float alpha0)  {  	float grad_magn; -	int index; -   	int num_total = dimX*dimY*dimZ; -   	 -	int i = blockDim.x * blockIdx.x + threadIdx.x; -        int j = blockDim.y * blockIdx.y + threadIdx.y; -        int k = blockDim.z * blockIdx.z + threadIdx.z;         +	long index; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; +        const long k = blockDim.z * blockIdx.z + threadIdx.z;                index = (dimX*dimY)*k + i*dimX+j;  -        if (index < num_total) {	 -	grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2)); +        if ((i < dimX) && (j < dimY)  && (k < dimZ)) {            +	grad_magn = sqrtf(powf(Q1[index],2) + powf(Q2[index],2) + powf(Q3[index],2) + 2.0f*powf(Q4[index],2) + 2.0f*powf(Q5[index],2) + 2.0f*powf(Q6[index],2));              grad_magn = grad_magn/alpha0;              if (grad_magn > 1.0f) {                  Q1[index] /= grad_magn; @@ -388,60 +383,56 @@ __global__ void ProjQ_3D_kernel(float *Q1, float *Q2, float *Q3, float *Q4, floa  	}  	return;  }  -__global__ void DivProjP_3D_kernel(float *U, float *U0, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float lambda, float tau) +__global__ void DivProjP_3D_kernel(float *U, float *U0, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float lambda, float tau)  {  	float P_v1, P_v2, P_v3, div; -	int index; -   	int num_total = dimX*dimY*dimZ; -   	 -   	 -	int i = blockDim.x * blockIdx.x + threadIdx.x; -        int j = blockDim.y * blockIdx.y + threadIdx.y; -        int k = blockDim.z * blockIdx.z + threadIdx.z;        +	long index; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; +        const long k = blockDim.z * blockIdx.z + threadIdx.z;                index = (dimX*dimY)*k + i*dimX+j;  -        int i1 = (dimX*dimY)*k + (i-1)*dimX+j; -        int j1 = (dimX*dimY)*k + (i)*dimX+(j-1); -        int k1 = (dimX*dimY)*(k-1) + (i)*dimX+(j);         -         -        if (index < num_total) {  -	P_v1 = 0.0f; P_v2 = 0.0f; P_v3 = 0.0f; +        long i1 = (dimX*dimY)*k + (i-1)*dimX+j; +        long j1 = (dimX*dimY)*k + (i)*dimX+(j-1); +        long k1 = (dimX*dimY)*(k-1) + (i)*dimX+(j);         -        if (i == 0) P_v1 = P1[index]; -        if (i == dimX-1) P_v1 = -P1[i1]; +        if ((i < dimX) && (j < dimY)  && (k < dimZ)) {            +          if ((i > 0) && (i < dimX-1)) P_v1 = P1[index] - P1[i1]; +        else if (i == dimX-1) P_v1 = -P1[i1]; +        else if (i == 0) P_v1 = P1[index]; +        else P_v1 = 0.0f; -        if (j == 0) P_v2 = P2[index]; -        if (j == dimY-1) P_v2 = -P2[j1];        	if ((j > 0) && (j < dimY-1))  P_v2 = P2[index] - P2[j1]; -         -        if (k == 0) P_v3 = P3[index]; -        if (k == dimZ-1) P_v3 = -P3[k1]; -      	if ((k > 0) && (k < dimZ-1))  P_v3 = P3[index] - P3[k1]; -      -                       +      	else if (j == dimY-1) P_v2 = -P2[j1]; +        else if (j == 0) P_v2 = P2[index]; +        else P_v2 = 0.0f; + +      	if ((k > 0) && (k < dimZ-1))  P_v3 = P3[index] - P3[k1];                 +      	else if (k == dimZ-1) P_v3 = -P3[k1]; +        else if (k == 0) P_v3 = P3[index]; +        else P_v3 = 0.0f;         +                               div = P_v1 + P_v2 + P_v3;          U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);               	}  	return;  } -__global__ void UpdV_3D_kernel(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float tau) +__global__ void UpdV_3D_kernel(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float tau)  {  	float q1, q4x, q5x, q2, q4y, q6y, q6z, q5z, q3, div1, div2, div3; -	int index; -	int num_total = dimX*dimY*dimZ; -	 -	int i = blockDim.x * blockIdx.x + threadIdx.x; -        int j = blockDim.y * blockIdx.y + threadIdx.y; -        int k = blockDim.z * blockIdx.z + threadIdx.z; +	long index; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; +        const long k = blockDim.z * blockIdx.z + threadIdx.z;          index = (dimX*dimY)*k + i*dimX+j;  -        int i1 = (dimX*dimY)*k + (i-1)*dimX+j; -        int j1 = (dimX*dimY)*k + (i)*dimX+(j-1); -        int k1 = (dimX*dimY)*(k-1) + (i)*dimX+(j);     +        long i1 = (dimX*dimY)*k + (i-1)*dimX+j; +        long j1 = (dimX*dimY)*k + (i)*dimX+(j-1); +        long k1 = (dimX*dimY)*(k-1) + (i)*dimX+(j);              /* Q1 - Q11, Q2 - Q22, Q3 -  Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/        -        if (index < num_total) {   +        if ((i < dimX) && (j < dimY)  && (k < dimZ)) {               	 /* boundary conditions (Neuman) */                      if ((i > 0) && (i < dimX-1)) { @@ -507,64 +498,60 @@ __global__ void UpdV_3D_kernel(float *V1, float *V2, float *V3, float *P1, float  	return;  }  -__global__ void copyIm_TGV_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ, int num_total) +__global__ void copyIm_TGV_kernel3D(float *U, float *U_old, long dimX, long dimY, long dimZ)  { -    int index; -	 -    int i = blockDim.x * blockIdx.x + threadIdx.x; -    int j = blockDim.y * blockIdx.y + threadIdx.y; -    int k = blockDim.z * blockIdx.z + threadIdx.z;     +    long index; +    const long i = blockDim.x * blockIdx.x + threadIdx.x; +    const long j = blockDim.y * blockIdx.y + threadIdx.y; +    const long k = blockDim.z * blockIdx.z + threadIdx.z;       index = (dimX*dimY)*k + j*dimX+i; -    if (index < num_total) {	 +    if ((i < dimX) && (j < dimY)  && (k < dimZ)) {                   	U_old[index] = U[index];	      }  } -__global__ void copyIm_TGV_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, int dimX, int dimY, int dimZ, int num_total) +__global__ void copyIm_TGV_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ)  { -    int index; -	 -    int i = blockDim.x * blockIdx.x + threadIdx.x; -    int j = blockDim.y * blockIdx.y + threadIdx.y; -    int k = blockDim.z * blockIdx.z + threadIdx.z;     +	long index; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; +        const long k = blockDim.z * blockIdx.z + threadIdx.z;       index = (dimX*dimY)*k + j*dimX+i; -    if (index < num_total) {	 +    if ((i < dimX) && (j < dimY)  && (k < dimZ)) {           	        	V1_old[index] = V1[index];  	V2_old[index] = V2[index];  	V3_old[index] = V3[index];	      }  } -__global__ void newU_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ, int num_total) +__global__ void newU_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ)  { -     int index; -	 -     int i = blockDim.x * blockIdx.x + threadIdx.x; -     int j = blockDim.y * blockIdx.y + threadIdx.y; -     int k = blockDim.z * blockIdx.z + threadIdx.z;     +	long index; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; +        const long k = blockDim.z * blockIdx.z + threadIdx.z;       index = (dimX*dimY)*k + j*dimX+i; -    if (index < num_total) { +    if ((i < dimX) && (j < dimY)  && (k < dimZ)) {             	   U[index] = 2.0f*U[index] - U_old[index];      }  }   -__global__ void newU_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, int dimX, int dimY, int dimZ, int num_total) +__global__ void newU_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ)  { -     int index; -	 -     int i = blockDim.x * blockIdx.x + threadIdx.x; -     int j = blockDim.y * blockIdx.y + threadIdx.y; -     int k = blockDim.z * blockIdx.z + threadIdx.z;     +	long index; +	const long i = blockDim.x * blockIdx.x + threadIdx.x; +        const long j = blockDim.y * blockIdx.y + threadIdx.y; +        const long k = blockDim.z * blockIdx.z + threadIdx.z;       index = (dimX*dimY)*k + j*dimX+i; -    if (index < num_total) { +    if ((i < dimX) && (j < dimY)  && (k < dimZ)) {             	   V1[index] = 2.0f*V1[index] - V1_old[index];  	   V2[index] = 2.0f*V2[index] - V2_old[index];  	   V3[index] = 2.0f*V3[index] - V3_old[index]; @@ -576,14 +563,20 @@ __global__ void newU_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old  /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/  extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ)  { -	int dimTotal, dev = 0; -	CHECK(cudaSetDevice(dev)); -	 -	dimTotal = dimX*dimY*dimZ; + +        int deviceCount = -1; // number of devices +        cudaGetDeviceCount(&deviceCount); +        if (deviceCount == 0) { +         fprintf(stderr, "No CUDA devices found\n"); +        return -1; +        }	 + +	long dimTotal = (long)(dimX*dimY*dimZ); +          float *U_old, *d_U0, *d_U, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma; -        tau = pow(L2,-0.5); -        sigma = pow(L2,-0.5); +        tau = powf(L2,-0.5f); +        sigma = tau;          CHECK(cudaMalloc((void**)&d_U0,dimTotal*sizeof(float)));          CHECK(cudaMalloc((void**)&d_U,dimTotal*sizeof(float))); @@ -611,41 +604,51 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo          if (dimZ == 1) {  	/*2D case */ -        dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); -        dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D)); +	dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); +	dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));          for(int n=0; n < iterationsNumb; n++) {  	    /* Calculate Dual Variable P */ -            DualP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, P1, P2, dimX, dimY, sigma); -      	    CHECK(cudaDeviceSynchronize()); +            DualP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, P1, P2, (long)(dimX), (long)(dimY), sigma); +      	    checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /*Projection onto convex set for P*/ -            ProjP_2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, alpha1); -            CHECK(cudaDeviceSynchronize()); +            ProjP_2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, (long)(dimX), (long)(dimY), alpha1); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );                          /* Calculate Dual Variable Q */ -            DualQ_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, Q1, Q2, Q3, dimX, dimY, sigma); -            CHECK(cudaDeviceSynchronize()); +            DualQ_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), sigma); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /*Projection onto convex set for Q*/ -            ProjQ_2D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, dimX, dimY, alpha0); -            CHECK(cudaDeviceSynchronize()); +            ProjQ_2D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, (long)(dimX), (long)(dimY), alpha0); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /*saving U into U_old*/ -            copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimTotal); -            CHECK(cudaDeviceSynchronize()); +            copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, (long)(dimX), (long)(dimY)); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /*adjoint operation  -> divergence and projection of P*/ -            DivProjP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, dimX, dimY, lambda, tau); -            CHECK(cudaDeviceSynchronize()); +            DivProjP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, (long)(dimX), (long)(dimY), lambda, tau); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /*get updated solution U*/ -            newU_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimTotal); -            CHECK(cudaDeviceSynchronize()); +            newU_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, (long)(dimX), (long)(dimY)); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /*saving V into V_old*/ -            copyIm_TGV_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal); -            CHECK(cudaDeviceSynchronize()); +            copyIm_TGV_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, (long)(dimX), (long)(dimY)); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /* upd V*/ -            UpdV_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, tau); -            CHECK(cudaDeviceSynchronize()); +            UpdV_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, P1, P2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), tau); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /*get new V*/ -            newU_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal); -            CHECK(cudaDeviceSynchronize());             +            newU_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, (long)(dimX), (long)(dimY)); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );  	    }          }          else { @@ -671,35 +674,45 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo          for(int n=0; n < iterationsNumb; n++) {  	    /* Calculate Dual Variable P */ -            DualP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, V3, P1, P2, P3, dimX, dimY, dimZ, sigma); -	    CHECK(cudaDeviceSynchronize()); +            DualP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, V3, P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), sigma); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /*Projection onto convex set for P*/ -            ProjP_3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, alpha1); -            CHECK(cudaDeviceSynchronize()); +            ProjP_3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), alpha1); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /* Calculate Dual Variable Q */ -            DualQ_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, sigma); -            CHECK(cudaDeviceSynchronize()); +            DualQ_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), sigma); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );               /*Projection onto convex set for Q*/ -            ProjQ_3D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, alpha0); -            CHECK(cudaDeviceSynchronize()); +            ProjQ_3D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), alpha0); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /*saving U into U_old*/ -            copyIm_TGV_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal); -            CHECK(cudaDeviceSynchronize()); +            copyIm_TGV_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ)); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /*adjoint operation  -> divergence and projection of P*/ -            DivProjP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, P3, dimX, dimY, dimZ, lambda, tau); -            CHECK(cudaDeviceSynchronize()); +            DivProjP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, tau); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /*get updated solution U*/ -            newU_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal); -            CHECK(cudaDeviceSynchronize()); +            newU_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ)); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /*saving V into V_old*/ -            copyIm_TGV_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal);            -            CHECK(cudaDeviceSynchronize()); +            copyIm_TGV_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));            +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /* upd V*/ -            UpdV_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, tau); -            CHECK(cudaDeviceSynchronize()); +            UpdV_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), tau); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );              /*get new V*/ -            newU_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal); -            CHECK(cudaDeviceSynchronize());             +            newU_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ)); +            checkCudaErrors( cudaDeviceSynchronize() ); +            checkCudaErrors(cudaPeekAtLastError() );  	        }          CHECK(cudaFree(Q4)); @@ -724,5 +737,7 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo          CHECK(cudaFree(V2));          CHECK(cudaFree(V1_old));          CHECK(cudaFree(V2_old)); + +        cudaDeviceReset();          return 0;  } diff --git a/src/Core/regularisers_GPU/TGV_GPU_core.h b/src/Core/regularisers_GPU/TGV_GPU_core.h index 9f73d1c..e8f9c6e 100644 --- a/src/Core/regularisers_GPU/TGV_GPU_core.h +++ b/src/Core/regularisers_GPU/TGV_GPU_core.h @@ -1,6 +1,8 @@  #ifndef __TGV_GPU_H__  #define __TGV_GPU_H__ +  #include "CCPiDefines.h" +#include <memory.h>  #include <stdio.h>  extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);  | 
