summaryrefslogtreecommitdiffstats
path: root/matlab/mex/mexCopyDataHelpFunctions.cpp
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2014-10-30 17:41:58 +0100
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2015-01-19 11:18:24 +0100
commit50598bfd0c4959da66780fe695755e2be1a28b87 (patch)
treea12ab13e2005478fb01804525b1124de7641cab7 /matlab/mex/mexCopyDataHelpFunctions.cpp
parented56403b30389501d1df7fbbeb763abbe7b09654 (diff)
downloadastra-50598bfd0c4959da66780fe695755e2be1a28b87.tar.gz
astra-50598bfd0c4959da66780fe695755e2be1a28b87.tar.bz2
astra-50598bfd0c4959da66780fe695755e2be1a28b87.tar.xz
astra-50598bfd0c4959da66780fe695755e2be1a28b87.zip
Refactor astra_mex_data3d
(Modified) patch by Nicola ViganĂ²
Diffstat (limited to 'matlab/mex/mexCopyDataHelpFunctions.cpp')
-rw-r--r--matlab/mex/mexCopyDataHelpFunctions.cpp257
1 files changed, 257 insertions, 0 deletions
diff --git a/matlab/mex/mexCopyDataHelpFunctions.cpp b/matlab/mex/mexCopyDataHelpFunctions.cpp
new file mode 100644
index 0000000..bbcad30
--- /dev/null
+++ b/matlab/mex/mexCopyDataHelpFunctions.cpp
@@ -0,0 +1,257 @@
+/*
+-----------------------------------------------------------------------
+Copyright 2013 iMinds-Vision Lab, University of Antwerp
+
+Contact: astra@ua.ac.be
+Website: http://astra.ua.ac.be
+
+
+This file is part of the
+All Scale Tomographic Reconstruction Antwerp Toolbox ("ASTRA Toolbox").
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+$Id$
+*/
+
+#include "mexCopyDataHelpFunctions.h"
+
+#include "mexHelpFunctions.h"
+
+#define HAVE_OMP
+
+#define ROUND_DOWN(x, s) ((x) & ~((s)-1))
+
+#ifdef HAVE_OMP
+# include <omp.h>
+#endif
+
+#if defined(__SSE2__)
+# include <emmintrin.h>
+
+# define STORE_32F_64F_CORE8(in, out, count, base) \
+ {\
+ const __m128 inV0 = *((const __m128 *) &in[count + 0 + base]);\
+ const __m128 inV1 = *((const __m128 *) &in[count + 4 + base]);\
+\
+ *((__m128d *)&out[count + 0 + base]) = _mm_cvtps_pd(inV0);\
+ *((__m128d *)&out[count + 2 + base]) = _mm_cvtps_pd(_mm_movehl_ps(inV0, inV0));\
+\
+ *((__m128d *)&out[count + 4 + base]) = _mm_cvtps_pd(inV1);\
+ *((__m128d *)&out[count + 6 + base]) = _mm_cvtps_pd(_mm_movehl_ps(inV1, inV1));\
+ }
+# define STORE_64F_32F_CORE8(in, out, count, base) \
+ {\
+ const __m128d inV0 = *((const __m128d *) &in[count + 0 + base]);\
+ const __m128d inV1 = *((const __m128d *) &in[count + 2 + base]);\
+\
+ const __m128d inV2 = *((const __m128d *) &in[count + 4 + base]);\
+ const __m128d inV3 = *((const __m128d *) &in[count + 6 + base]);\
+\
+ *((__m128 *)&out[count + 0 + base]) = _mm_movelh_ps(_mm_cvtpd_ps(inV0), _mm_cvtpd_ps(inV1));\
+ *((__m128 *)&out[count + 4 + base]) = _mm_movelh_ps(_mm_cvtpd_ps(inV2), _mm_cvtpd_ps(inV3));\
+ }
+# define STORE_32F_32F_CORE8(in, out, count, base) \
+ {\
+ *((__m128 *)&out[count + 0 + base]) = *((const __m128 *)&in[count + 0 + base]);\
+ *((__m128 *)&out[count + 4 + base]) = *((const __m128 *)&in[count + 4 + base]);\
+ }
+# define STORE_CONST_32F_CORE8(val, out, count, base) \
+ {\
+ *((__m128 *)&out[count + 0 + base]) = val;\
+ *((__m128 *)&out[count + 4 + base]) = val;\
+ }
+#else
+# define STORE_32F_64F_CORE8(in, out, count, base) \
+ {\
+ out[count + 0 + base] = (double)in[count + 0 + base];\
+ out[count + 1 + base] = (double)in[count + 1 + base];\
+ out[count + 2 + base] = (double)in[count + 2 + base];\
+ out[count + 3 + base] = (double)in[count + 3 + base];\
+ out[count + 4 + base] = (double)in[count + 4 + base];\
+ out[count + 5 + base] = (double)in[count + 5 + base];\
+ out[count + 6 + base] = (double)in[count + 6 + base];\
+ out[count + 7 + base] = (double)in[count + 7 + base];\
+ }
+# define STORE_64F_32F_CORE8(in, out, count, base) \
+ {\
+ out[count + 0 + base] = (float)in[count + 0 + base];\
+ out[count + 1 + base] = (float)in[count + 1 + base];\
+ out[count + 2 + base] = (float)in[count + 2 + base];\
+ out[count + 3 + base] = (float)in[count + 3 + base];\
+ out[count + 4 + base] = (float)in[count + 4 + base];\
+ out[count + 5 + base] = (float)in[count + 5 + base];\
+ out[count + 6 + base] = (float)in[count + 6 + base];\
+ out[count + 7 + base] = (float)in[count + 7 + base];\
+ }
+# define STORE_32F_32F_CORE8(in, out, count, base) \
+ {\
+ out[count + 0 + base] = in[count + 0 + base];\
+ out[count + 1 + base] = in[count + 1 + base];\
+ out[count + 2 + base] = in[count + 2 + base];\
+ out[count + 3 + base] = in[count + 3 + base];\
+ out[count + 4 + base] = in[count + 4 + base];\
+ out[count + 5 + base] = in[count + 5 + base];\
+ out[count + 6 + base] = in[count + 6 + base];\
+ out[count + 7 + base] = in[count + 7 + base];\
+ }
+#endif
+
+const char * warnDataTypeNotSupported = "Data type not supported: nothing was copied";
+
+void
+_copyMexToCFloat32Array(const mxArray * const inArray, astra::float32 * const out)
+{
+ const long long tot_size = mxGetNumberOfElements(inArray);
+ const long long block = 32;
+ const long long totRoundedPixels = ROUND_DOWN(tot_size, block);
+
+ if (mxIsDouble(inArray)) {
+ const double * const pdMatlabData = mxGetPr(inArray);
+
+#pragma omp for nowait
+ for (long long count = 0; count < totRoundedPixels; count += block) {
+ STORE_64F_32F_CORE8(pdMatlabData, out, count, 0);
+ STORE_64F_32F_CORE8(pdMatlabData, out, count, 8);
+ STORE_64F_32F_CORE8(pdMatlabData, out, count, 16);
+ STORE_64F_32F_CORE8(pdMatlabData, out, count, 24);
+ }
+#pragma omp for nowait
+ for (long long count = totRoundedPixels; count < tot_size; count++) {
+ out[count] = pdMatlabData[count];
+ }
+ }
+ else if (mxIsSingle(inArray)) {
+ const float * const pfMatlabData = (const float *)mxGetData(inArray);
+
+#pragma omp for nowait
+ for (long long count = 0; count < totRoundedPixels; count += block) {
+ STORE_32F_32F_CORE8(pfMatlabData, out, count, 0);
+ STORE_32F_32F_CORE8(pfMatlabData, out, count, 8);
+ STORE_32F_32F_CORE8(pfMatlabData, out, count, 16);
+ STORE_32F_32F_CORE8(pfMatlabData, out, count, 24);
+ }
+#pragma omp for nowait
+ for (long long count = totRoundedPixels; count < tot_size; count++) {
+ out[count] = pfMatlabData[count];
+ }
+ }
+ else {
+#pragma omp single nowait
+ mexWarnMsgIdAndTxt("ASTRA_MEX:wrong_datatype", warnDataTypeNotSupported);
+ }
+}
+
+void
+_copyCFloat32ArrayToMex(const astra::float32 * const in, mxArray * const outArray)
+{
+ const long long tot_size = mxGetNumberOfElements(outArray);
+ const long long block = 32;
+ const long long tot_rounded_size = ROUND_DOWN(tot_size, block);
+
+ if (mxIsDouble(outArray)) {
+ double * const pdMatlabData = mxGetPr(outArray);
+
+#pragma omp for nowait
+ for (long long count = 0; count < tot_rounded_size; count += block) {
+ STORE_32F_64F_CORE8(in, pdMatlabData, count, 0);
+ STORE_32F_64F_CORE8(in, pdMatlabData, count, 8);
+ STORE_32F_64F_CORE8(in, pdMatlabData, count, 16);
+ STORE_32F_64F_CORE8(in, pdMatlabData, count, 24);
+ }
+#pragma omp for nowait
+ for (long long count = tot_rounded_size; count < tot_size; count++) {
+ pdMatlabData[count] = in[count];
+ }
+ }
+ else if (mxIsSingle(outArray)) {
+ float * const pfMatlabData = (float *) mxGetData(outArray);
+
+#pragma omp for nowait
+ for (long long count = 0; count < tot_rounded_size; count += block) {
+ STORE_32F_32F_CORE8(in, pfMatlabData, count, 0);
+ STORE_32F_32F_CORE8(in, pfMatlabData, count, 8);
+ STORE_32F_32F_CORE8(in, pfMatlabData, count, 16);
+ STORE_32F_32F_CORE8(in, pfMatlabData, count, 24);
+ }
+#pragma omp for nowait
+ for (long long count = tot_rounded_size; count < tot_size; count++) {
+ pfMatlabData[count] = in[count];
+ }
+ }
+ else {
+#pragma omp single nowait
+ mexWarnMsgIdAndTxt("ASTRA_MEX:wrong_datatype", warnDataTypeNotSupported);
+ }
+}
+
+void
+_initializeCFloat32Array(const astra::float32 & val, astra::float32 * const out,
+ const size_t & tot_size)
+{
+#ifdef __SSE2__
+ const long long block = 32;
+ const long long tot_rounded_size = ROUND_DOWN(tot_size, block);
+
+ const __m128 vecVal = _mm_set1_ps(val);
+
+ {
+#pragma omp for nowait
+ for (long long count = 0; count < tot_rounded_size; count += block) {
+ STORE_CONST_32F_CORE8(vecVal, out, count, 0);
+ STORE_CONST_32F_CORE8(vecVal, out, count, 8);
+ STORE_CONST_32F_CORE8(vecVal, out, count, 16);
+ STORE_CONST_32F_CORE8(vecVal, out, count, 24);
+ }
+#else
+ const long long tot_rounded_size = 0;
+ {
+#endif
+#pragma omp for nowait
+ for (long long count = tot_rounded_size; count < (long long)tot_size; count++) {
+ out[count] = val;
+ }
+ }
+}
+
+void
+copyMexToCFloat32Array(const mxArray * const in,
+ astra::float32 * const out, const size_t &tot_size)
+{
+#pragma omp parallel
+ {
+ // fill with scalar value
+ if (mex_is_scalar(in)) {
+ astra::float32 fValue = 0.f;
+ if (!mxIsEmpty(in)) {
+ fValue = (astra::float32)mxGetScalar(in);
+ }
+ _initializeCFloat32Array(fValue, out, tot_size);
+ }
+ // fill with array value
+ else {
+ _copyMexToCFloat32Array(in, out);
+ }
+ }
+}
+
+void
+copyCFloat32ArrayToMex(const float * const in, mxArray * const outArray)
+{
+#pragma omp parallel
+ {
+ _copyCFloat32ArrayToMex(in, outArray);
+ }
+}