/* ----------------------------------------------------------------------- Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp 2014-2015, CWI, Amsterdam Contact: astra@uantwerpen.be Website: http://sf.net/projects/astra-toolbox This file is part of the ASTRA Toolbox. The ASTRA Toolbox is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. The ASTRA Toolbox is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with the ASTRA Toolbox. If not, see . ----------------------------------------------------------------------- $Id$ */ #include "astra/Fourier.h" namespace astra { /* Copyright Takuya OOURA, 1996-2001 You may use, copy, modify and distribute this code for any purpose (include commercial use) and without fee. Source: http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html Fast Fourier/Cosine/Sine Transform dimension :one data length :power of 2 decimation :frequency radix :split-radix data :inplace table :use functions cdft: Complex Discrete Fourier Transform rdft: Real Discrete Fourier Transform ddct: Discrete Cosine Transform ddst: Discrete Sine Transform dfct: Cosine Transform of RDFT (Real Symmetric DFT) dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) function prototypes void cdft(int, int, float32 *, int *, float32 *); void rdft(int, int, float32 *, int *, float32 *); void ddct(int, int, float32 *, int *, float32 *); void ddst(int, int, float32 *, int *, float32 *); void dfct(int, float32 *, float32 *, int *, float32 *); void dfst(int, float32 *, float32 *, int *, float32 *); macro definitions USE_CDFT_PTHREADS : default=not defined CDFT_THREADS_BEGIN_N : must be >= 512, default=8192 CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536 USE_CDFT_WINTHREADS : default=not defined CDFT_THREADS_BEGIN_N : must be >= 512, default=32768 CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288 -------- Complex DFT (Discrete Fourier Transform) -------- [definition] X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k ip[0] = 0; // first time only cdft(2*n, 1, a, ip, w); ip[0] = 0; // first time only cdft(2*n, -1, a, ip, w); [parameters] 2*n :data length (int) n >= 1, n = power of 2 a[0...2*n-1] :input/output data (float32 *) input data a[2*j] = Re(x[j]), a[2*j+1] = Im(x[j]), 0<=j= 2+sqrt(n) strictly, length of ip >= 2+(1<<(int)(log(n+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n/2-1] :cos/sin table (float32 *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of cdft(2*n, -1, a, ip, w); is cdft(2*n, 1, a, ip, w); for (j = 0; j <= 2 * n - 1; j++) { a[j] *= 1.0 / n; } . -------- Real DFT / Inverse of Real DFT -------- [definition] RDFT R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0 IRDFT (excluding scale) a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k ip[0] = 0; // first time only rdft(n, 1, a, ip, w); ip[0] = 0; // first time only rdft(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (float32 *) output data a[2*k] = R[k], 0<=k input data a[2*j] = R[j], 0<=j= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n/2-1] :cos/sin table (float32 *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of rdft(n, 1, a, ip, w); is rdft(n, -1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- [definition] IDCT (excluding scale) C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k DCT C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k ip[0] = 0; // first time only ddct(n, 1, a, ip, w); ip[0] = 0; // first time only ddct(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (float32 *) output data a[k] = C[k], 0<=k= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/4-1] :cos/sin table (float32 *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddct(n, -1, a, ip, w); is a[0] *= 0.5; ddct(n, 1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- DST (Discrete Sine Transform) / Inverse of DST -------- [definition] IDST (excluding scale) S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k DST S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0 ip[0] = 0; // first time only ddst(n, 1, a, ip, w); ip[0] = 0; // first time only ddst(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (float32 *) input data a[j] = A[j], 0 output data a[k] = S[k], 0= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/4-1] :cos/sin table (float32 *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddst(n, -1, a, ip, w); is a[0] *= 0.5; ddst(n, 1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- Cosine Transform of RDFT (Real Symmetric DFT) -------- [definition] C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n [usage] ip[0] = 0; // first time only dfct(n, a, t, ip, w); [parameters] n :data length - 1 (int) n >= 2, n = power of 2 a[0...n] :input/output data (float32 *) output data a[k] = C[k], 0<=k<=n t[0...n/2] :work area (float32 *) ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n/4) strictly, length of ip >= 2+(1<<(int)(log(n/4+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/8-1] :cos/sin table (float32 *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of a[0] *= 0.5; a[n] *= 0.5; dfct(n, a, t, ip, w); is a[0] *= 0.5; a[n] *= 0.5; dfct(n, a, t, ip, w); for (j = 0; j <= n; j++) { a[j] *= 2.0 / n; } . -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- [definition] S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0= 2, n = power of 2 a[0...n-1] :input/output data (float32 *) output data a[k] = S[k], 0= 2+sqrt(n/4) strictly, length of ip >= 2+(1<<(int)(log(n/4+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/8-1] :cos/sin table (float32 *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of dfst(n, a, t, ip, w); is dfst(n, a, t, ip, w); for (j = 1; j <= n - 1; j++) { a[j] *= 2.0 / n; } . Appendix : The cos/sin table is recalculated when the larger table required. w[] and ip[] are compatible with all routines. */ void cdft(int n, int isgn, float32 *a, int *ip, float32 *w) { void makewt(int nw, int *ip, float32 *w); void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w); void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w); int nw; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } if (isgn >= 0) { cftfsub(n, a, ip, nw, w); } else { cftbsub(n, a, ip, nw, w); } } void rdft(int n, int isgn, float32 *a, int *ip, float32 *w) { void makewt(int nw, int *ip, float32 *w); void makect(int nc, int *ip, float32 *c); void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w); void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w); void rftfsub(int n, float32 *a, int nc, float32 *c); void rftbsub(int n, float32 *a, int nc, float32 *c); int nw, nc; float32 xi; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 2)) { nc = n >> 2; makect(nc, ip, w + nw); } if (isgn >= 0) { if (n > 4) { cftfsub(n, a, ip, nw, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, ip, nw, w); } xi = a[0] - a[1]; a[0] += a[1]; a[1] = xi; } else { a[1] = 0.5 * (a[0] - a[1]); a[0] -= a[1]; if (n > 4) { rftbsub(n, a, nc, w + nw); cftbsub(n, a, ip, nw, w); } else if (n == 4) { cftbsub(n, a, ip, nw, w); } } } void ddct(int n, int isgn, float32 *a, int *ip, float32 *w) { void makewt(int nw, int *ip, float32 *w); void makect(int nc, int *ip, float32 *c); void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w); void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w); void rftfsub(int n, float32 *a, int nc, float32 *c); void rftbsub(int n, float32 *a, int nc, float32 *c); void dctsub(int n, float32 *a, int nc, float32 *c); int j, nw, nc; float32 xr; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > nc) { nc = n; makect(nc, ip, w + nw); } if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = a[j] - a[j - 1]; a[j] += a[j - 1]; } a[1] = a[0] - xr; a[0] += xr; if (n > 4) { rftbsub(n, a, nc, w + nw); cftbsub(n, a, ip, nw, w); } else if (n == 4) { cftbsub(n, a, ip, nw, w); } } dctsub(n, a, nc, w + nw); if (isgn >= 0) { if (n > 4) { cftfsub(n, a, ip, nw, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, ip, nw, w); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = a[j] - a[j + 1]; a[j] += a[j + 1]; } a[n - 1] = xr; } } void ddst(int n, int isgn, float32 *a, int *ip, float32 *w) { void makewt(int nw, int *ip, float32 *w); void makect(int nc, int *ip, float32 *c); void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w); void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w); void rftfsub(int n, float32 *a, int nc, float32 *c); void rftbsub(int n, float32 *a, int nc, float32 *c); void dstsub(int n, float32 *a, int nc, float32 *c); int j, nw, nc; float32 xr; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > nc) { nc = n; makect(nc, ip, w + nw); } if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = -a[j] - a[j - 1]; a[j] -= a[j - 1]; } a[1] = a[0] + xr; a[0] -= xr; if (n > 4) { rftbsub(n, a, nc, w + nw); cftbsub(n, a, ip, nw, w); } else if (n == 4) { cftbsub(n, a, ip, nw, w); } } dstsub(n, a, nc, w + nw); if (isgn >= 0) { if (n > 4) { cftfsub(n, a, ip, nw, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, ip, nw, w); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = -a[j] - a[j + 1]; a[j] -= a[j + 1]; } a[n - 1] = -xr; } } void dfct(int n, float32 *a, float32 *t, int *ip, float32 *w) { void makewt(int nw, int *ip, float32 *w); void makect(int nc, int *ip, float32 *c); void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w); void rftfsub(int n, float32 *a, int nc, float32 *c); void dctsub(int n, float32 *a, int nc, float32 *c); int j, k, l, m, mh, nw, nc; float32 xr, xi, yr, yi; nw = ip[0]; if (n > (nw << 3)) { nw = n >> 3; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 1)) { nc = n >> 1; makect(nc, ip, w + nw); } m = n >> 1; yi = a[m]; xi = a[0] + a[n]; a[0] -= a[n]; t[0] = xi - yi; t[m] = xi + yi; if (n > 2) { mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; xr = a[j] - a[n - j]; xi = a[j] + a[n - j]; yr = a[k] - a[n - k]; yi = a[k] + a[n - k]; a[j] = xr; a[k] = yr; t[j] = xi - yi; t[k] = xi + yi; } t[mh] = a[mh] + a[n - mh]; a[mh] -= a[n - mh]; dctsub(m, a, nc, w + nw); if (m > 4) { cftfsub(m, a, ip, nw, w); rftfsub(m, a, nc, w + nw); } else if (m == 4) { cftfsub(m, a, ip, nw, w); } a[n - 1] = a[0] - a[1]; a[1] = a[0] + a[1]; for (j = m - 2; j >= 2; j -= 2) { a[2 * j + 1] = a[j] + a[j + 1]; a[2 * j - 1] = a[j] - a[j + 1]; } l = 2; m = mh; while (m >= 2) { dctsub(m, t, nc, w + nw); if (m > 4) { cftfsub(m, t, ip, nw, w); rftfsub(m, t, nc, w + nw); } else if (m == 4) { cftfsub(m, t, ip, nw, w); } a[n - l] = t[0] - t[1]; a[l] = t[0] + t[1]; k = 0; for (j = 2; j < m; j += 2) { k += l << 2; a[k - l] = t[j] - t[j + 1]; a[k + l] = t[j] + t[j + 1]; } l <<= 1; mh = m >> 1; for (j = 0; j < mh; j++) { k = m - j; t[j] = t[m + k] - t[m + j]; t[k] = t[m + k] + t[m + j]; } t[mh] = t[m + mh]; m = mh; } a[l] = t[0]; a[n] = t[2] - t[1]; a[0] = t[2] + t[1]; } else { a[1] = a[0]; a[2] = t[0]; a[0] = t[1]; } } void dfst(int n, float32 *a, float32 *t, int *ip, float32 *w) { void makewt(int nw, int *ip, float32 *w); void makect(int nc, int *ip, float32 *c); void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w); void rftfsub(int n, float32 *a, int nc, float32 *c); void dstsub(int n, float32 *a, int nc, float32 *c); int j, k, l, m, mh, nw, nc; float32 xr, xi, yr, yi; nw = ip[0]; if (n > (nw << 3)) { nw = n >> 3; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 1)) { nc = n >> 1; makect(nc, ip, w + nw); } if (n > 2) { m = n >> 1; mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; xr = a[j] + a[n - j]; xi = a[j] - a[n - j]; yr = a[k] + a[n - k]; yi = a[k] - a[n - k]; a[j] = xr; a[k] = yr; t[j] = xi + yi; t[k] = xi - yi; } t[0] = a[mh] - a[n - mh]; a[mh] += a[n - mh]; a[0] = a[m]; dstsub(m, a, nc, w + nw); if (m > 4) { cftfsub(m, a, ip, nw, w); rftfsub(m, a, nc, w + nw); } else if (m == 4) { cftfsub(m, a, ip, nw, w); } a[n - 1] = a[1] - a[0]; a[1] = a[0] + a[1]; for (j = m - 2; j >= 2; j -= 2) { a[2 * j + 1] = a[j] - a[j + 1]; a[2 * j - 1] = -a[j] - a[j + 1]; } l = 2; m = mh; while (m >= 2) { dstsub(m, t, nc, w + nw); if (m > 4) { cftfsub(m, t, ip, nw, w); rftfsub(m, t, nc, w + nw); } else if (m == 4) { cftfsub(m, t, ip, nw, w); } a[n - l] = t[1] - t[0]; a[l] = t[0] + t[1]; k = 0; for (j = 2; j < m; j += 2) { k += l << 2; a[k - l] = -t[j] - t[j + 1]; a[k + l] = t[j] - t[j + 1]; } l <<= 1; mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; t[j] = t[m + k] + t[m + j]; t[k] = t[m + k] - t[m + j]; } t[0] = t[m + mh]; m = mh; } a[l] = t[0]; } a[0] = 0; } /* -------- initializing routines -------- */ #include void makewt(int nw, int *ip, float32 *w) { void makeipt(int nw, int *ip); int j, nwh, nw0, nw1; float32 delta, wn4r, wk1r, wk1i, wk3r, wk3i; ip[0] = nw; ip[1] = 1; if (nw > 2) { nwh = nw >> 1; delta = atan(1.0) / nwh; wn4r = cos(delta * nwh); w[0] = 1; w[1] = wn4r; if (nwh == 4) { w[2] = cos(delta * 2); w[3] = sin(delta * 2); } else if (nwh > 4) { makeipt(nw, ip); w[2] = 0.5 / cos(delta * 2); w[3] = 0.5 / cos(delta * 6); for (j = 4; j < nwh; j += 4) { w[j] = cos(delta * j); w[j + 1] = sin(delta * j); w[j + 2] = cos(3 * delta * j); w[j + 3] = -sin(3 * delta * j); } } nw0 = 0; while (nwh > 2) { nw1 = nw0 + nwh; nwh >>= 1; w[nw1] = 1; w[nw1 + 1] = wn4r; if (nwh == 4) { wk1r = w[nw0 + 4]; wk1i = w[nw0 + 5]; w[nw1 + 2] = wk1r; w[nw1 + 3] = wk1i; } else if (nwh > 4) { wk1r = w[nw0 + 4]; wk3r = w[nw0 + 6]; w[nw1 + 2] = 0.5 / wk1r; w[nw1 + 3] = 0.5 / wk3r; for (j = 4; j < nwh; j += 4) { wk1r = w[nw0 + 2 * j]; wk1i = w[nw0 + 2 * j + 1]; wk3r = w[nw0 + 2 * j + 2]; wk3i = w[nw0 + 2 * j + 3]; w[nw1 + j] = wk1r; w[nw1 + j + 1] = wk1i; w[nw1 + j + 2] = wk3r; w[nw1 + j + 3] = wk3i; } } nw0 = nw1; } } } void makeipt(int nw, int *ip) { int j, l, m, m2, p, q; ip[2] = 0; ip[3] = 16; m = 2; for (l = nw; l > 32; l >>= 2) { m2 = m << 1; q = m2 << 3; for (j = m; j < m2; j++) { p = ip[j] << 2; ip[m + j] = p; ip[m2 + j] = p + q; } m = m2; } } void makect(int nc, int *ip, float32 *c) { int j, nch; float32 delta; ip[1] = nc; if (nc > 1) { nch = nc >> 1; delta = atan(1.0) / nch; c[0] = cos(delta * nch); c[nch] = 0.5 * c[0]; for (j = 1; j < nch; j++) { c[j] = 0.5 * cos(delta * j); c[nc - j] = 0.5 * sin(delta * j); } } } /* -------- child routines -------- */ #ifdef USE_CDFT_PTHREADS #define USE_CDFT_THREADS #ifndef CDFT_THREADS_BEGIN_N #define CDFT_THREADS_BEGIN_N 8192 #endif #ifndef CDFT_4THREADS_BEGIN_N #define CDFT_4THREADS_BEGIN_N 65536 #endif #include #include #include #define cdft_thread_t pthread_t #define cdft_thread_create(thp,func,argp) { \ if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \ fprintf(stderr, "cdft thread error\n"); \ exit(1); \ } \ } #define cdft_thread_wait(th) { \ if (pthread_join(th, NULL) != 0) { \ fprintf(stderr, "cdft thread error\n"); \ exit(1); \ } \ } #endif /* USE_CDFT_PTHREADS */ #ifdef USE_CDFT_WINTHREADS #define USE_CDFT_THREADS #ifndef CDFT_THREADS_BEGIN_N #define CDFT_THREADS_BEGIN_N 32768 #endif #ifndef CDFT_4THREADS_BEGIN_N #define CDFT_4THREADS_BEGIN_N 524288 #endif #include #include #include #define cdft_thread_t HANDLE #define cdft_thread_create(thp,func,argp) { \ DWORD thid; \ *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \ if (*(thp) == 0) { \ fprintf(stderr, "cdft thread error\n"); \ exit(1); \ } \ } #define cdft_thread_wait(th) { \ WaitForSingleObject(th, INFINITE); \ CloseHandle(th); \ } #endif /* USE_CDFT_WINTHREADS */ void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w) { void bitrv2(int n, int *ip, float32 *a); void bitrv216(float32 *a); void bitrv208(float32 *a); void cftf1st(int n, float32 *a, float32 *w); void cftrec4(int n, float32 *a, int nw, float32 *w); void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w); void cftfx41(int n, float32 *a, int nw, float32 *w); void cftf161(float32 *a, float32 *w); void cftf081(float32 *a, float32 *w); void cftf040(float32 *a); void cftx020(float32 *a); #ifdef USE_CDFT_THREADS void cftrec4_th(int n, float32 *a, int nw, float32 *w); #endif /* USE_CDFT_THREADS */ if (n > 8) { if (n > 32) { cftf1st(n, a, &w[nw - (n >> 2)]); #ifdef USE_CDFT_THREADS if (n > CDFT_THREADS_BEGIN_N) { cftrec4_th(n, a, nw, w); } else #endif /* USE_CDFT_THREADS */ if (n > 512) { cftrec4(n, a, nw, w); } else if (n > 128) { cftleaf(n, 1, a, nw, w); } else { cftfx41(n, a, nw, w); } bitrv2(n, ip, a); } else if (n == 32) { cftf161(a, &w[nw - 8]); bitrv216(a); } else { cftf081(a, w); bitrv208(a); } } else if (n == 8) { cftf040(a); } else if (n == 4) { cftx020(a); } } void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w) { void bitrv2conj(int n, int *ip, float32 *a); void bitrv216neg(float32 *a); void bitrv208neg(float32 *a); void cftb1st(int n, float32 *a, float32 *w); void cftrec4(int n, float32 *a, int nw, float32 *w); void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w); void cftfx41(int n, float32 *a, int nw, float32 *w); void cftf161(float32 *a, float32 *w); void cftf081(float32 *a, float32 *w); void cftb040(float32 *a); void cftx020(float32 *a); #ifdef USE_CDFT_THREADS void cftrec4_th(int n, float32 *a, int nw, float32 *w); #endif /* USE_CDFT_THREADS */ if (n > 8) { if (n > 32) { cftb1st(n, a, &w[nw - (n >> 2)]); #ifdef USE_CDFT_THREADS if (n > CDFT_THREADS_BEGIN_N) { cftrec4_th(n, a, nw, w); } else #endif /* USE_CDFT_THREADS */ if (n > 512) { cftrec4(n, a, nw, w); } else if (n > 128) { cftleaf(n, 1, a, nw, w); } else { cftfx41(n, a, nw, w); } bitrv2conj(n, ip, a); } else if (n == 32) { cftf161(a, &w[nw - 8]); bitrv216neg(a); } else { cftf081(a, w); bitrv208neg(a); } } else if (n == 8) { cftb040(a); } else if (n == 4) { cftx020(a); } } void bitrv2(int n, int *ip, float32 *a) { int j, j1, k, k1, l, m, nh, nm; float32 xr, xi, yr, yi; m = 1; for (l = n >> 2; l > 8; l >>= 2) { m <<= 1; } nh = n >> 1; nm = 4 * m; if (l == 8) { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 4 * j + 2 * ip[m + k]; k1 = 4 * k + 2 * ip[m + j]; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 4 * k + 2 * ip[m + k]; j1 = k1 + 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= 2; k1 -= nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh + 2; k1 += nh + 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh - nm; k1 += 2 * nm - 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } } else { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 4 * j + ip[m + k]; k1 = 4 * k + ip[m + j]; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 4 * k + ip[m + k]; j1 = k1 + 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } } } void bitrv2conj(int n, int *ip, float32 *a) { int j, j1, k, k1, l, m, nh, nm; float32 xr, xi, yr, yi; m = 1; for (l = n >> 2; l > 8; l >>= 2) { m <<= 1; } nh = n >> 1; nm = 4 * m; if (l == 8) { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 4 * j + 2 * ip[m + k]; k1 = 4 * k + 2 * ip[m + j]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 += nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 += nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 4 * k + 2 * ip[m + k]; j1 = k1 + 2; k1 += nh; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= 2; k1 -= nh; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh + 2; k1 += nh + 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh - nm; k1 += 2 * nm - 2; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; } } else { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 4 * j + ip[m + k]; k1 = 4 * k + ip[m + j]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 4 * k + ip[m + k]; j1 = k1 + 2; k1 += nh; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; j1 += nm; k1 += nm; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; } } } void bitrv216(float32 *a) { float32 x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i; x1r = a[2]; x1i = a[3]; x2r = a[4]; x2i = a[5]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x5r = a[10]; x5i = a[11]; x7r = a[14]; x7i = a[15]; x8r = a[16]; x8i = a[17]; x10r = a[20]; x10i = a[21]; x11r = a[22]; x11i = a[23]; x12r = a[24]; x12i = a[25]; x13r = a[26]; x13i = a[27]; x14r = a[28]; x14i = a[29]; a[2] = x8r; a[3] = x8i; a[4] = x4r; a[5] = x4i; a[6] = x12r; a[7] = x12i; a[8] = x2r; a[9] = x2i; a[10] = x10r; a[11] = x10i; a[14] = x14r; a[15] = x14i; a[16] = x1r; a[17] = x1i; a[20] = x5r; a[21] = x5i; a[22] = x13r; a[23] = x13i; a[24] = x3r; a[25] = x3i; a[26] = x11r; a[27] = x11i; a[28] = x7r; a[29] = x7i; } void bitrv216neg(float32 *a) { float32 x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i, x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i, x15r, x15i; x1r = a[2]; x1i = a[3]; x2r = a[4]; x2i = a[5]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x5r = a[10]; x5i = a[11]; x6r = a[12]; x6i = a[13]; x7r = a[14]; x7i = a[15]; x8r = a[16]; x8i = a[17]; x9r = a[18]; x9i = a[19]; x10r = a[20]; x10i = a[21]; x11r = a[22]; x11i = a[23]; x12r = a[24]; x12i = a[25]; x13r = a[26]; x13i = a[27]; x14r = a[28]; x14i = a[29]; x15r = a[30]; x15i = a[31]; a[2] = x15r; a[3] = x15i; a[4] = x7r; a[5] = x7i; a[6] = x11r; a[7] = x11i; a[8] = x3r; a[9] = x3i; a[10] = x13r; a[11] = x13i; a[12] = x5r; a[13] = x5i; a[14] = x9r; a[15] = x9i; a[16] = x1r; a[17] = x1i; a[18] = x14r; a[19] = x14i; a[20] = x6r; a[21] = x6i; a[22] = x10r; a[23] = x10i; a[24] = x2r; a[25] = x2i; a[26] = x12r; a[27] = x12i; a[28] = x4r; a[29] = x4i; a[30] = x8r; a[31] = x8i; } void bitrv208(float32 *a) { float32 x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i; x1r = a[2]; x1i = a[3]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x6r = a[12]; x6i = a[13]; a[2] = x4r; a[3] = x4i; a[6] = x6r; a[7] = x6i; a[8] = x1r; a[9] = x1i; a[12] = x3r; a[13] = x3i; } void bitrv208neg(float32 *a) { float32 x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i; x1r = a[2]; x1i = a[3]; x2r = a[4]; x2i = a[5]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x5r = a[10]; x5i = a[11]; x6r = a[12]; x6i = a[13]; x7r = a[14]; x7i = a[15]; a[2] = x7r; a[3] = x7i; a[4] = x3r; a[5] = x3i; a[6] = x5r; a[7] = x5i; a[8] = x1r; a[9] = x1i; a[10] = x6r; a[11] = x6i; a[12] = x2r; a[13] = x2i; a[14] = x4r; a[15] = x4i; } void cftf1st(int n, float32 *a, float32 *w) { int j, j0, j1, j2, j3, k, m, mh; float32 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i; float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; mh = n >> 3; m = 2 * mh; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] + a[j2]; x0i = a[1] + a[j2 + 1]; x1r = a[0] - a[j2]; x1i = a[1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; a[j2] = x1r - x3i; a[j2 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; wn4r = w[1]; csc1 = w[2]; csc3 = w[3]; wd1r = 1; wd1i = 0; wd3r = 1; wd3i = 0; k = 0; for (j = 2; j < mh - 2; j += 4) { k += 4; wk1r = csc1 * (wd1r + w[k]); wk1i = csc1 * (wd1i + w[k + 1]); wk3r = csc3 * (wd3r + w[k + 2]); wk3i = csc3 * (wd3i + w[k + 3]); wd1r = w[k]; wd1i = w[k + 1]; wd3r = w[k + 2]; wd3i = w[k + 3]; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] + a[j2]; x0i = a[j + 1] + a[j2 + 1]; x1r = a[j] - a[j2]; x1i = a[j + 1] - a[j2 + 1]; y0r = a[j + 2] + a[j2 + 2]; y0i = a[j + 3] + a[j2 + 3]; y1r = a[j + 2] - a[j2 + 2]; y1i = a[j + 3] - a[j2 + 3]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; y2r = a[j1 + 2] + a[j3 + 2]; y2i = a[j1 + 3] + a[j3 + 3]; y3r = a[j1 + 2] - a[j3 + 2]; y3i = a[j1 + 3] - a[j3 + 3]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j + 2] = y0r + y2r; a[j + 3] = y0i + y2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; a[j1 + 2] = y0r - y2r; a[j1 + 3] = y0i - y2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wk1r * x0r - wk1i * x0i; a[j2 + 1] = wk1r * x0i + wk1i * x0r; x0r = y1r - y3i; x0i = y1i + y3r; a[j2 + 2] = wd1r * x0r - wd1i * x0i; a[j2 + 3] = wd1r * x0i + wd1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r + wk3i * x0i; a[j3 + 1] = wk3r * x0i - wk3i * x0r; x0r = y1r + y3i; x0i = y1i - y3r; a[j3 + 2] = wd3r * x0r + wd3i * x0i; a[j3 + 3] = wd3r * x0i - wd3i * x0r; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] + a[j2]; x0i = a[j0 + 1] + a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = a[j0 + 1] - a[j2 + 1]; y0r = a[j0 - 2] + a[j2 - 2]; y0i = a[j0 - 1] + a[j2 - 1]; y1r = a[j0 - 2] - a[j2 - 2]; y1i = a[j0 - 1] - a[j2 - 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; y2r = a[j1 - 2] + a[j3 - 2]; y2i = a[j1 - 1] + a[j3 - 1]; y3r = a[j1 - 2] - a[j3 - 2]; y3i = a[j1 - 1] - a[j3 - 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i + x2i; a[j0 - 2] = y0r + y2r; a[j0 - 1] = y0i + y2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; a[j1 - 2] = y0r - y2r; a[j1 - 1] = y0i - y2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wk1i * x0r - wk1r * x0i; a[j2 + 1] = wk1i * x0i + wk1r * x0r; x0r = y1r - y3i; x0i = y1i + y3r; a[j2 - 2] = wd1i * x0r - wd1r * x0i; a[j2 - 1] = wd1i * x0i + wd1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3i * x0r + wk3r * x0i; a[j3 + 1] = wk3i * x0i - wk3r * x0r; x0r = y1r + y3i; x0i = y1i - y3r; a[j3 - 2] = wd3i * x0r + wd3r * x0i; a[j3 - 1] = wd3i * x0i - wd3r * x0r; } wk1r = csc1 * (wd1r + wn4r); wk1i = csc1 * (wd1i + wn4r); wk3r = csc3 * (wd3r - wn4r); wk3i = csc3 * (wd3i - wn4r); j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0 - 2] + a[j2 - 2]; x0i = a[j0 - 1] + a[j2 - 1]; x1r = a[j0 - 2] - a[j2 - 2]; x1i = a[j0 - 1] - a[j2 - 1]; x2r = a[j1 - 2] + a[j3 - 2]; x2i = a[j1 - 1] + a[j3 - 1]; x3r = a[j1 - 2] - a[j3 - 2]; x3i = a[j1 - 1] - a[j3 - 1]; a[j0 - 2] = x0r + x2r; a[j0 - 1] = x0i + x2i; a[j1 - 2] = x0r - x2r; a[j1 - 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2 - 2] = wk1r * x0r - wk1i * x0i; a[j2 - 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3 - 2] = wk3r * x0r + wk3i * x0i; a[j3 - 1] = wk3r * x0i - wk3i * x0r; x0r = a[j0] + a[j2]; x0i = a[j0 + 1] + a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = a[j0 + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wn4r * (x0r - x0i); a[j2 + 1] = wn4r * (x0i + x0r); x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = -wn4r * (x0r + x0i); a[j3 + 1] = -wn4r * (x0i - x0r); x0r = a[j0 + 2] + a[j2 + 2]; x0i = a[j0 + 3] + a[j2 + 3]; x1r = a[j0 + 2] - a[j2 + 2]; x1i = a[j0 + 3] - a[j2 + 3]; x2r = a[j1 + 2] + a[j3 + 2]; x2i = a[j1 + 3] + a[j3 + 3]; x3r = a[j1 + 2] - a[j3 + 2]; x3i = a[j1 + 3] - a[j3 + 3]; a[j0 + 2] = x0r + x2r; a[j0 + 3] = x0i + x2i; a[j1 + 2] = x0r - x2r; a[j1 + 3] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2 + 2] = wk1i * x0r - wk1r * x0i; a[j2 + 3] = wk1i * x0i + wk1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3 + 2] = wk3i * x0r + wk3r * x0i; a[j3 + 3] = wk3i * x0i - wk3r * x0r; } void cftb1st(int n, float32 *a, float32 *w) { int j, j0, j1, j2, j3, k, m, mh; float32 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i; float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; mh = n >> 3; m = 2 * mh; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] + a[j2]; x0i = -a[1] - a[j2 + 1]; x1r = a[0] - a[j2]; x1i = -a[1] + a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[0] = x0r + x2r; a[1] = x0i - x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; a[j2] = x1r + x3i; a[j2 + 1] = x1i + x3r; a[j3] = x1r - x3i; a[j3 + 1] = x1i - x3r; wn4r = w[1]; csc1 = w[2]; csc3 = w[3]; wd1r = 1; wd1i = 0; wd3r = 1; wd3i = 0; k = 0; for (j = 2; j < mh - 2; j += 4) { k += 4; wk1r = csc1 * (wd1r + w[k]); wk1i = csc1 * (wd1i + w[k + 1]); wk3r = csc3 * (wd3r + w[k + 2]); wk3i = csc3 * (wd3i + w[k + 3]); wd1r = w[k]; wd1i = w[k + 1]; wd3r = w[k + 2]; wd3i = w[k + 3]; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] + a[j2]; x0i = -a[j + 1] - a[j2 + 1]; x1r = a[j] - a[j2]; x1i = -a[j + 1] + a[j2 + 1]; y0r = a[j + 2] + a[j2 + 2]; y0i = -a[j + 3] - a[j2 + 3]; y1r = a[j + 2] - a[j2 + 2]; y1i = -a[j + 3] + a[j2 + 3]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; y2r = a[j1 + 2] + a[j3 + 2]; y2i = a[j1 + 3] + a[j3 + 3]; y3r = a[j1 + 2] - a[j3 + 2]; y3i = a[j1 + 3] - a[j3 + 3]; a[j] = x0r + x2r; a[j + 1] = x0i - x2i; a[j + 2] = y0r + y2r; a[j + 3] = y0i - y2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; a[j1 + 2] = y0r - y2r; a[j1 + 3] = y0i + y2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2] = wk1r * x0r - wk1i * x0i; a[j2 + 1] = wk1r * x0i + wk1i * x0r; x0r = y1r + y3i; x0i = y1i + y3r; a[j2 + 2] = wd1r * x0r - wd1i * x0i; a[j2 + 3] = wd1r * x0i + wd1i * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r + wk3i * x0i; a[j3 + 1] = wk3r * x0i - wk3i * x0r; x0r = y1r - y3i; x0i = y1i - y3r; a[j3 + 2] = wd3r * x0r + wd3i * x0i; a[j3 + 3] = wd3r * x0i - wd3i * x0r; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] + a[j2]; x0i = -a[j0 + 1] - a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = -a[j0 + 1] + a[j2 + 1]; y0r = a[j0 - 2] + a[j2 - 2]; y0i = -a[j0 - 1] - a[j2 - 1]; y1r = a[j0 - 2] - a[j2 - 2]; y1i = -a[j0 - 1] + a[j2 - 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; y2r = a[j1 - 2] + a[j3 - 2]; y2i = a[j1 - 1] + a[j3 - 1]; y3r = a[j1 - 2] - a[j3 - 2]; y3i = a[j1 - 1] - a[j3 - 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i - x2i; a[j0 - 2] = y0r + y2r; a[j0 - 1] = y0i - y2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; a[j1 - 2] = y0r - y2r; a[j1 - 1] = y0i + y2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2] = wk1i * x0r - wk1r * x0i; a[j2 + 1] = wk1i * x0i + wk1r * x0r; x0r = y1r + y3i; x0i = y1i + y3r; a[j2 - 2] = wd1i * x0r - wd1r * x0i; a[j2 - 1] = wd1i * x0i + wd1r * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3] = wk3i * x0r + wk3r * x0i; a[j3 + 1] = wk3i * x0i - wk3r * x0r; x0r = y1r - y3i; x0i = y1i - y3r; a[j3 - 2] = wd3i * x0r + wd3r * x0i; a[j3 - 1] = wd3i * x0i - wd3r * x0r; } wk1r = csc1 * (wd1r + wn4r); wk1i = csc1 * (wd1i + wn4r); wk3r = csc3 * (wd3r - wn4r); wk3i = csc3 * (wd3i - wn4r); j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0 - 2] + a[j2 - 2]; x0i = -a[j0 - 1] - a[j2 - 1]; x1r = a[j0 - 2] - a[j2 - 2]; x1i = -a[j0 - 1] + a[j2 - 1]; x2r = a[j1 - 2] + a[j3 - 2]; x2i = a[j1 - 1] + a[j3 - 1]; x3r = a[j1 - 2] - a[j3 - 2]; x3i = a[j1 - 1] - a[j3 - 1]; a[j0 - 2] = x0r + x2r; a[j0 - 1] = x0i - x2i; a[j1 - 2] = x0r - x2r; a[j1 - 1] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2 - 2] = wk1r * x0r - wk1i * x0i; a[j2 - 1] = wk1r * x0i + wk1i * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3 - 2] = wk3r * x0r + wk3i * x0i; a[j3 - 1] = wk3r * x0i - wk3i * x0r; x0r = a[j0] + a[j2]; x0i = -a[j0 + 1] - a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = -a[j0 + 1] + a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i - x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2] = wn4r * (x0r - x0i); a[j2 + 1] = wn4r * (x0i + x0r); x0r = x1r - x3i; x0i = x1i - x3r; a[j3] = -wn4r * (x0r + x0i); a[j3 + 1] = -wn4r * (x0i - x0r); x0r = a[j0 + 2] + a[j2 + 2]; x0i = -a[j0 + 3] - a[j2 + 3]; x1r = a[j0 + 2] - a[j2 + 2]; x1i = -a[j0 + 3] + a[j2 + 3]; x2r = a[j1 + 2] + a[j3 + 2]; x2i = a[j1 + 3] + a[j3 + 3]; x3r = a[j1 + 2] - a[j3 + 2]; x3i = a[j1 + 3] - a[j3 + 3]; a[j0 + 2] = x0r + x2r; a[j0 + 3] = x0i - x2i; a[j1 + 2] = x0r - x2r; a[j1 + 3] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2 + 2] = wk1i * x0r - wk1r * x0i; a[j2 + 3] = wk1i * x0i + wk1r * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3 + 2] = wk3i * x0r + wk3r * x0i; a[j3 + 3] = wk3i * x0i - wk3r * x0r; } #ifdef USE_CDFT_THREADS struct cdft_arg_st { int n0; int n; float32 *a; int nw; float32 *w; }; typedef struct cdft_arg_st cdft_arg_t; void cftrec4_th(int n, float32 *a, int nw, float32 *w) { void *cftrec1_th(void *p); void *cftrec2_th(void *p); int i, idiv4, m, nthread; cdft_thread_t th[4]; cdft_arg_t ag[4]; nthread = 2; idiv4 = 0; m = n >> 1; if (n > CDFT_4THREADS_BEGIN_N) { nthread = 4; idiv4 = 1; m >>= 1; } for (i = 0; i < nthread; i++) { ag[i].n0 = n; ag[i].n = m; ag[i].a = &a[i * m]; ag[i].nw = nw; ag[i].w = w; if (i != idiv4) { cdft_thread_create(&th[i], cftrec1_th, &ag[i]); } else { cdft_thread_create(&th[i], cftrec2_th, &ag[i]); } } for (i = 0; i < nthread; i++) { cdft_thread_wait(th[i]); } } void *cftrec1_th(void *p) { int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w); void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w); void cftmdl1(int n, float32 *a, float32 *w); int isplt, j, k, m, n, n0, nw; float32 *a, *w; n0 = ((cdft_arg_t *) p)->n0; n = ((cdft_arg_t *) p)->n; a = ((cdft_arg_t *) p)->a; nw = ((cdft_arg_t *) p)->nw; w = ((cdft_arg_t *) p)->w; m = n0; while (m > 512) { m >>= 2; cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]); } cftleaf(m, 1, &a[n - m], nw, w); k = 0; for (j = n - m; j > 0; j -= m) { k++; isplt = cfttree(m, j, k, a, nw, w); cftleaf(m, isplt, &a[j - m], nw, w); } return (void *) 0; } void *cftrec2_th(void *p) { int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w); void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w); void cftmdl2(int n, float32 *a, float32 *w); int isplt, j, k, m, n, n0, nw; float32 *a, *w; n0 = ((cdft_arg_t *) p)->n0; n = ((cdft_arg_t *) p)->n; a = ((cdft_arg_t *) p)->a; nw = ((cdft_arg_t *) p)->nw; w = ((cdft_arg_t *) p)->w; k = 1; m = n0; while (m > 512) { m >>= 2; k <<= 2; cftmdl2(m, &a[n - m], &w[nw - m]); } cftleaf(m, 0, &a[n - m], nw, w); k >>= 1; for (j = n - m; j > 0; j -= m) { k++; isplt = cfttree(m, j, k, a, nw, w); cftleaf(m, isplt, &a[j - m], nw, w); } return (void *) 0; } #endif /* USE_CDFT_THREADS */ void cftrec4(int n, float32 *a, int nw, float32 *w) { int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w); void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w); void cftmdl1(int n, float32 *a, float32 *w); int isplt, j, k, m; m = n; while (m > 512) { m >>= 2; cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]); } cftleaf(m, 1, &a[n - m], nw, w); k = 0; for (j = n - m; j > 0; j -= m) { k++; isplt = cfttree(m, j, k, a, nw, w); cftleaf(m, isplt, &a[j - m], nw, w); } } int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w) { void cftmdl1(int n, float32 *a, float32 *w); void cftmdl2(int n, float32 *a, float32 *w); int i, isplt, m; if ((k & 3) != 0) { isplt = k & 1; if (isplt != 0) { cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]); } else { cftmdl2(n, &a[j - n], &w[nw - n]); } } else { m = n; for (i = k; (i & 3) == 0; i >>= 2) { m <<= 2; } isplt = i & 1; if (isplt != 0) { while (m > 128) { cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]); m >>= 2; } } else { while (m > 128) { cftmdl2(m, &a[j - m], &w[nw - m]); m >>= 2; } } } return isplt; } void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w) { void cftmdl1(int n, float32 *a, float32 *w); void cftmdl2(int n, float32 *a, float32 *w); void cftf161(float32 *a, float32 *w); void cftf162(float32 *a, float32 *w); void cftf081(float32 *a, float32 *w); void cftf082(float32 *a, float32 *w); if (n == 512) { cftmdl1(128, a, &w[nw - 64]); cftf161(a, &w[nw - 8]); cftf162(&a[32], &w[nw - 32]); cftf161(&a[64], &w[nw - 8]); cftf161(&a[96], &w[nw - 8]); cftmdl2(128, &a[128], &w[nw - 128]); cftf161(&a[128], &w[nw - 8]); cftf162(&a[160], &w[nw - 32]); cftf161(&a[192], &w[nw - 8]); cftf162(&a[224], &w[nw - 32]); cftmdl1(128, &a[256], &w[nw - 64]); cftf161(&a[256], &w[nw - 8]); cftf162(&a[288], &w[nw - 32]); cftf161(&a[320], &w[nw - 8]); cftf161(&a[352], &w[nw - 8]); if (isplt != 0) { cftmdl1(128, &a[384], &w[nw - 64]); cftf161(&a[480], &w[nw - 8]); } else { cftmdl2(128, &a[384], &w[nw - 128]); cftf162(&a[480], &w[nw - 32]); } cftf161(&a[384], &w[nw - 8]); cftf162(&a[416], &w[nw - 32]); cftf161(&a[448], &w[nw - 8]); } else { cftmdl1(64, a, &w[nw - 32]); cftf081(a, &w[nw - 8]); cftf082(&a[16], &w[nw - 8]); cftf081(&a[32], &w[nw - 8]); cftf081(&a[48], &w[nw - 8]); cftmdl2(64, &a[64], &w[nw - 64]); cftf081(&a[64], &w[nw - 8]); cftf082(&a[80], &w[nw - 8]); cftf081(&a[96], &w[nw - 8]); cftf082(&a[112], &w[nw - 8]); cftmdl1(64, &a[128], &w[nw - 32]); cftf081(&a[128], &w[nw - 8]); cftf082(&a[144], &w[nw - 8]); cftf081(&a[160], &w[nw - 8]); cftf081(&a[176], &w[nw - 8]); if (isplt != 0) { cftmdl1(64, &a[192], &w[nw - 32]); cftf081(&a[240], &w[nw - 8]); } else { cftmdl2(64, &a[192], &w[nw - 64]); cftf082(&a[240], &w[nw - 8]); } cftf081(&a[192], &w[nw - 8]); cftf082(&a[208], &w[nw - 8]); cftf081(&a[224], &w[nw - 8]); } } void cftmdl1(int n, float32 *a, float32 *w) { int j, j0, j1, j2, j3, k, m, mh; float32 wn4r, wk1r, wk1i, wk3r, wk3i; float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; mh = n >> 3; m = 2 * mh; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] + a[j2]; x0i = a[1] + a[j2 + 1]; x1r = a[0] - a[j2]; x1i = a[1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; a[j2] = x1r - x3i; a[j2 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; wn4r = w[1]; k = 0; for (j = 2; j < mh; j += 2) { k += 4; wk1r = w[k]; wk1i = w[k + 1]; wk3r = w[k + 2]; wk3i = w[k + 3]; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] + a[j2]; x0i = a[j + 1] + a[j2 + 1]; x1r = a[j] - a[j2]; x1i = a[j + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wk1r * x0r - wk1i * x0i; a[j2 + 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r + wk3i * x0i; a[j3 + 1] = wk3r * x0i - wk3i * x0r; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] + a[j2]; x0i = a[j0 + 1] + a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = a[j0 + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wk1i * x0r - wk1r * x0i; a[j2 + 1] = wk1i * x0i + wk1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3i * x0r + wk3r * x0i; a[j3 + 1] = wk3i * x0i - wk3r * x0r; } j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] + a[j2]; x0i = a[j0 + 1] + a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = a[j0 + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wn4r * (x0r - x0i); a[j2 + 1] = wn4r * (x0i + x0r); x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = -wn4r * (x0r + x0i); a[j3 + 1] = -wn4r * (x0i - x0r); } void cftmdl2(int n, float32 *a, float32 *w) { int j, j0, j1, j2, j3, k, kr, m, mh; float32 wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i; float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i; mh = n >> 3; m = 2 * mh; wn4r = w[1]; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] - a[j2 + 1]; x0i = a[1] + a[j2]; x1r = a[0] + a[j2 + 1]; x1i = a[1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wn4r * (x2r - x2i); y0i = wn4r * (x2i + x2r); a[0] = x0r + y0r; a[1] = x0i + y0i; a[j1] = x0r - y0r; a[j1 + 1] = x0i - y0i; y0r = wn4r * (x3r - x3i); y0i = wn4r * (x3i + x3r); a[j2] = x1r - y0i; a[j2 + 1] = x1i + y0r; a[j3] = x1r + y0i; a[j3 + 1] = x1i - y0r; k = 0; kr = 2 * m; for (j = 2; j < mh; j += 2) { k += 4; wk1r = w[k]; wk1i = w[k + 1]; wk3r = w[k + 2]; wk3i = w[k + 3]; kr -= 4; wd1i = w[kr]; wd1r = w[kr + 1]; wd3i = w[kr + 2]; wd3r = w[kr + 3]; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] - a[j2 + 1]; x0i = a[j + 1] + a[j2]; x1r = a[j] + a[j2 + 1]; x1i = a[j + 1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wk1r * x0r - wk1i * x0i; y0i = wk1r * x0i + wk1i * x0r; y2r = wd1r * x2r - wd1i * x2i; y2i = wd1r * x2i + wd1i * x2r; a[j] = y0r + y2r; a[j + 1] = y0i + y2i; a[j1] = y0r - y2r; a[j1 + 1] = y0i - y2i; y0r = wk3r * x1r + wk3i * x1i; y0i = wk3r * x1i - wk3i * x1r; y2r = wd3r * x3r + wd3i * x3i; y2i = wd3r * x3i - wd3i * x3r; a[j2] = y0r + y2r; a[j2 + 1] = y0i + y2i; a[j3] = y0r - y2r; a[j3 + 1] = y0i - y2i; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] - a[j2 + 1]; x0i = a[j0 + 1] + a[j2]; x1r = a[j0] + a[j2 + 1]; x1i = a[j0 + 1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wd1i * x0r - wd1r * x0i; y0i = wd1i * x0i + wd1r * x0r; y2r = wk1i * x2r - wk1r * x2i; y2i = wk1i * x2i + wk1r * x2r; a[j0] = y0r + y2r; a[j0 + 1] = y0i + y2i; a[j1] = y0r - y2r; a[j1 + 1] = y0i - y2i; y0r = wd3i * x1r + wd3r * x1i; y0i = wd3i * x1i - wd3r * x1r; y2r = wk3i * x3r + wk3r * x3i; y2i = wk3i * x3i - wk3r * x3r; a[j2] = y0r + y2r; a[j2 + 1] = y0i + y2i; a[j3] = y0r - y2r; a[j3 + 1] = y0i - y2i; } wk1r = w[m]; wk1i = w[m + 1]; j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] - a[j2 + 1]; x0i = a[j0 + 1] + a[j2]; x1r = a[j0] + a[j2 + 1]; x1i = a[j0 + 1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wk1r * x0r - wk1i * x0i; y0i = wk1r * x0i + wk1i * x0r; y2r = wk1i * x2r - wk1r * x2i; y2i = wk1i * x2i + wk1r * x2r; a[j0] = y0r + y2r; a[j0 + 1] = y0i + y2i; a[j1] = y0r - y2r; a[j1 + 1] = y0i - y2i; y0r = wk1i * x1r - wk1r * x1i; y0i = wk1i * x1i + wk1r * x1r; y2r = wk1r * x3r - wk1i * x3i; y2i = wk1r * x3i + wk1i * x3r; a[j2] = y0r - y2r; a[j2 + 1] = y0i - y2i; a[j3] = y0r + y2r; a[j3 + 1] = y0i + y2i; } void cftfx41(int n, float32 *a, int nw, float32 *w) { void cftf161(float32 *a, float32 *w); void cftf162(float32 *a, float32 *w); void cftf081(float32 *a, float32 *w); void cftf082(float32 *a, float32 *w); if (n == 128) { cftf161(a, &w[nw - 8]); cftf162(&a[32], &w[nw - 32]); cftf161(&a[64], &w[nw - 8]); cftf161(&a[96], &w[nw - 8]); } else { cftf081(a, &w[nw - 8]); cftf082(&a[16], &w[nw - 8]); cftf081(&a[32], &w[nw - 8]); cftf081(&a[48], &w[nw - 8]); } } void cftf161(float32 *a, float32 *w) { float32 wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; wn4r = w[1]; wk1r = w[2]; wk1i = w[3]; x0r = a[0] + a[16]; x0i = a[1] + a[17]; x1r = a[0] - a[16]; x1i = a[1] - a[17]; x2r = a[8] + a[24]; x2i = a[9] + a[25]; x3r = a[8] - a[24]; x3i = a[9] - a[25]; y0r = x0r + x2r; y0i = x0i + x2i; y4r = x0r - x2r; y4i = x0i - x2i; y8r = x1r - x3i; y8i = x1i + x3r; y12r = x1r + x3i; y12i = x1i - x3r; x0r = a[2] + a[18]; x0i = a[3] + a[19]; x1r = a[2] - a[18]; x1i = a[3] - a[19]; x2r = a[10] + a[26]; x2i = a[11] + a[27]; x3r = a[10] - a[26]; x3i = a[11] - a[27]; y1r = x0r + x2r; y1i = x0i + x2i; y5r = x0r - x2r; y5i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; y9r = wk1r * x0r - wk1i * x0i; y9i = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; y13r = wk1i * x0r - wk1r * x0i; y13i = wk1i * x0i + wk1r * x0r; x0r = a[4] + a[20]; x0i = a[5] + a[21]; x1r = a[4] - a[20]; x1i = a[5] - a[21]; x2r = a[12] + a[28]; x2i = a[13] + a[29]; x3r = a[12] - a[28]; x3i = a[13] - a[29]; y2r = x0r + x2r; y2i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; y10r = wn4r * (x0r - x0i); y10i = wn4r * (x0i + x0r); x0r = x1r + x3i; x0i = x1i - x3r; y14r = wn4r * (x0r + x0i); y14i = wn4r * (x0i - x0r); x0r = a[6] + a[22]; x0i = a[7] + a[23]; x1r = a[6] - a[22]; x1i = a[7] - a[23]; x2r = a[14] + a[30]; x2i = a[15] + a[31]; x3r = a[14] - a[30]; x3i = a[15] - a[31]; y3r = x0r + x2r; y3i = x0i + x2i; y7r = x0r - x2r; y7i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; y11r = wk1i * x0r - wk1r * x0i; y11i = wk1i * x0i + wk1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; y15r = wk1r * x0r - wk1i * x0i; y15i = wk1r * x0i + wk1i * x0r; x0r = y12r - y14r; x0i = y12i - y14i; x1r = y12r + y14r; x1i = y12i + y14i; x2r = y13r - y15r; x2i = y13i - y15i; x3r = y13r + y15r; x3i = y13i + y15i; a[24] = x0r + x2r; a[25] = x0i + x2i; a[26] = x0r - x2r; a[27] = x0i - x2i; a[28] = x1r - x3i; a[29] = x1i + x3r; a[30] = x1r + x3i; a[31] = x1i - x3r; x0r = y8r + y10r; x0i = y8i + y10i; x1r = y8r - y10r; x1i = y8i - y10i; x2r = y9r + y11r; x2i = y9i + y11i; x3r = y9r - y11r; x3i = y9i - y11i; a[16] = x0r + x2r; a[17] = x0i + x2i; a[18] = x0r - x2r; a[19] = x0i - x2i; a[20] = x1r - x3i; a[21] = x1i + x3r; a[22] = x1r + x3i; a[23] = x1i - x3r; x0r = y5r - y7i; x0i = y5i + y7r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); x0r = y5r + y7i; x0i = y5i - y7r; x3r = wn4r * (x0r - x0i); x3i = wn4r * (x0i + x0r); x0r = y4r - y6i; x0i = y4i + y6r; x1r = y4r + y6i; x1i = y4i - y6r; a[8] = x0r + x2r; a[9] = x0i + x2i; a[10] = x0r - x2r; a[11] = x0i - x2i; a[12] = x1r - x3i; a[13] = x1i + x3r; a[14] = x1r + x3i; a[15] = x1i - x3r; x0r = y0r + y2r; x0i = y0i + y2i; x1r = y0r - y2r; x1i = y0i - y2i; x2r = y1r + y3r; x2i = y1i + y3i; x3r = y1r - y3r; x3i = y1i - y3i; a[0] = x0r + x2r; a[1] = x0i + x2i; a[2] = x0r - x2r; a[3] = x0i - x2i; a[4] = x1r - x3i; a[5] = x1i + x3r; a[6] = x1r + x3i; a[7] = x1i - x3r; } void cftf162(float32 *a, float32 *w) { float32 wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, x0r, x0i, x1r, x1i, x2r, x2i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; wn4r = w[1]; wk1r = w[4]; wk1i = w[5]; wk3r = w[6]; wk3i = -w[7]; wk2r = w[8]; wk2i = w[9]; x1r = a[0] - a[17]; x1i = a[1] + a[16]; x0r = a[8] - a[25]; x0i = a[9] + a[24]; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); y0r = x1r + x2r; y0i = x1i + x2i; y4r = x1r - x2r; y4i = x1i - x2i; x1r = a[0] + a[17]; x1i = a[1] - a[16]; x0r = a[8] + a[25]; x0i = a[9] - a[24]; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); y8r = x1r - x2i; y8i = x1i + x2r; y12r = x1r + x2i; y12i = x1i - x2r; x0r = a[2] - a[19]; x0i = a[3] + a[18]; x1r = wk1r * x0r - wk1i * x0i; x1i = wk1r * x0i + wk1i * x0r; x0r = a[10] - a[27]; x0i = a[11] + a[26]; x2r = wk3i * x0r - wk3r * x0i; x2i = wk3i * x0i + wk3r * x0r; y1r = x1r + x2r; y1i = x1i + x2i; y5r = x1r - x2r; y5i = x1i - x2i; x0r = a[2] + a[19]; x0i = a[3] - a[18]; x1r = wk3r * x0r - wk3i * x0i; x1i = wk3r * x0i + wk3i * x0r; x0r = a[10] + a[27]; x0i = a[11] - a[26]; x2r = wk1r * x0r + wk1i * x0i; x2i = wk1r * x0i - wk1i * x0r; y9r = x1r - x2r; y9i = x1i - x2i; y13r = x1r + x2r; y13i = x1i + x2i; x0r = a[4] - a[21]; x0i = a[5] + a[20]; x1r = wk2r * x0r - wk2i * x0i; x1i = wk2r * x0i + wk2i * x0r; x0r = a[12] - a[29]; x0i = a[13] + a[28]; x2r = wk2i * x0r - wk2r * x0i; x2i = wk2i * x0i + wk2r * x0r; y2r = x1r + x2r; y2i = x1i + x2i; y6r = x1r - x2r; y6i = x1i - x2i; x0r = a[4] + a[21]; x0i = a[5] - a[20]; x1r = wk2i * x0r - wk2r * x0i; x1i = wk2i * x0i + wk2r * x0r; x0r = a[12] + a[29]; x0i = a[13] - a[28]; x2r = wk2r * x0r - wk2i * x0i; x2i = wk2r * x0i + wk2i * x0r; y10r = x1r - x2r; y10i = x1i - x2i; y14r = x1r + x2r; y14i = x1i + x2i; x0r = a[6] - a[23]; x0i = a[7] + a[22]; x1r = wk3r * x0r - wk3i * x0i; x1i = wk3r * x0i + wk3i * x0r; x0r = a[14] - a[31]; x0i = a[15] + a[30]; x2r = wk1i * x0r - wk1r * x0i; x2i = wk1i * x0i + wk1r * x0r; y3r = x1r + x2r; y3i = x1i + x2i; y7r = x1r - x2r; y7i = x1i - x2i; x0r = a[6] + a[23]; x0i = a[7] - a[22]; x1r = wk1i * x0r + wk1r * x0i; x1i = wk1i * x0i - wk1r * x0r; x0r = a[14] + a[31]; x0i = a[15] - a[30]; x2r = wk3i * x0r - wk3r * x0i; x2i = wk3i * x0i + wk3r * x0r; y11r = x1r + x2r; y11i = x1i + x2i; y15r = x1r - x2r; y15i = x1i - x2i; x1r = y0r + y2r; x1i = y0i + y2i; x2r = y1r + y3r; x2i = y1i + y3i; a[0] = x1r + x2r; a[1] = x1i + x2i; a[2] = x1r - x2r; a[3] = x1i - x2i; x1r = y0r - y2r; x1i = y0i - y2i; x2r = y1r - y3r; x2i = y1i - y3i; a[4] = x1r - x2i; a[5] = x1i + x2r; a[6] = x1r + x2i; a[7] = x1i - x2r; x1r = y4r - y6i; x1i = y4i + y6r; x0r = y5r - y7i; x0i = y5i + y7r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[8] = x1r + x2r; a[9] = x1i + x2i; a[10] = x1r - x2r; a[11] = x1i - x2i; x1r = y4r + y6i; x1i = y4i - y6r; x0r = y5r + y7i; x0i = y5i - y7r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[12] = x1r - x2i; a[13] = x1i + x2r; a[14] = x1r + x2i; a[15] = x1i - x2r; x1r = y8r + y10r; x1i = y8i + y10i; x2r = y9r - y11r; x2i = y9i - y11i; a[16] = x1r + x2r; a[17] = x1i + x2i; a[18] = x1r - x2r; a[19] = x1i - x2i; x1r = y8r - y10r; x1i = y8i - y10i; x2r = y9r + y11r; x2i = y9i + y11i; a[20] = x1r - x2i; a[21] = x1i + x2r; a[22] = x1r + x2i; a[23] = x1i - x2r; x1r = y12r - y14i; x1i = y12i + y14r; x0r = y13r + y15i; x0i = y13i - y15r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[24] = x1r + x2r; a[25] = x1i + x2i; a[26] = x1r - x2r; a[27] = x1i - x2i; x1r = y12r + y14i; x1i = y12i - y14r; x0r = y13r - y15i; x0i = y13i + y15r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[28] = x1r - x2i; a[29] = x1i + x2r; a[30] = x1r + x2i; a[31] = x1i - x2r; } void cftf081(float32 *a, float32 *w) { float32 wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; wn4r = w[1]; x0r = a[0] + a[8]; x0i = a[1] + a[9]; x1r = a[0] - a[8]; x1i = a[1] - a[9]; x2r = a[4] + a[12]; x2i = a[5] + a[13]; x3r = a[4] - a[12]; x3i = a[5] - a[13]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[2] + a[10]; x0i = a[3] + a[11]; x1r = a[2] - a[10]; x1i = a[3] - a[11]; x2r = a[6] + a[14]; x2i = a[7] + a[15]; x3r = a[6] - a[14]; x3i = a[7] - a[15]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x1i - x3r; y5r = wn4r * (x0r - x0i); y5i = wn4r * (x0r + x0i); y7r = wn4r * (x2r - x2i); y7i = wn4r * (x2r + x2i); a[8] = y1r + y5r; a[9] = y1i + y5i; a[10] = y1r - y5r; a[11] = y1i - y5i; a[12] = y3r - y7i; a[13] = y3i + y7r; a[14] = y3r + y7i; a[15] = y3i - y7r; a[0] = y0r + y4r; a[1] = y0i + y4i; a[2] = y0r - y4r; a[3] = y0i - y4i; a[4] = y2r - y6i; a[5] = y2i + y6r; a[6] = y2r + y6i; a[7] = y2i - y6r; } void cftf082(float32 *a, float32 *w) { float32 wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; wn4r = w[1]; wk1r = w[2]; wk1i = w[3]; y0r = a[0] - a[9]; y0i = a[1] + a[8]; y1r = a[0] + a[9]; y1i = a[1] - a[8]; x0r = a[4] - a[13]; x0i = a[5] + a[12]; y2r = wn4r * (x0r - x0i); y2i = wn4r * (x0i + x0r); x0r = a[4] + a[13]; x0i = a[5] - a[12]; y3r = wn4r * (x0r - x0i); y3i = wn4r * (x0i + x0r); x0r = a[2] - a[11]; x0i = a[3] + a[10]; y4r = wk1r * x0r - wk1i * x0i; y4i = wk1r * x0i + wk1i * x0r; x0r = a[2] + a[11]; x0i = a[3] - a[10]; y5r = wk1i * x0r - wk1r * x0i; y5i = wk1i * x0i + wk1r * x0r; x0r = a[6] - a[15]; x0i = a[7] + a[14]; y6r = wk1i * x0r - wk1r * x0i; y6i = wk1i * x0i + wk1r * x0r; x0r = a[6] + a[15]; x0i = a[7] - a[14]; y7r = wk1r * x0r - wk1i * x0i; y7i = wk1r * x0i + wk1i * x0r; x0r = y0r + y2r; x0i = y0i + y2i; x1r = y4r + y6r; x1i = y4i + y6i; a[0] = x0r + x1r; a[1] = x0i + x1i; a[2] = x0r - x1r; a[3] = x0i - x1i; x0r = y0r - y2r; x0i = y0i - y2i; x1r = y4r - y6r; x1i = y4i - y6i; a[4] = x0r - x1i; a[5] = x0i + x1r; a[6] = x0r + x1i; a[7] = x0i - x1r; x0r = y1r - y3i; x0i = y1i + y3r; x1r = y5r - y7r; x1i = y5i - y7i; a[8] = x0r + x1r; a[9] = x0i + x1i; a[10] = x0r - x1r; a[11] = x0i - x1i; x0r = y1r + y3i; x0i = y1i - y3r; x1r = y5r + y7r; x1i = y5i + y7i; a[12] = x0r - x1i; a[13] = x0i + x1r; a[14] = x0r + x1i; a[15] = x0i - x1r; } void cftf040(float32 *a) { float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; x0r = a[0] + a[4]; x0i = a[1] + a[5]; x1r = a[0] - a[4]; x1i = a[1] - a[5]; x2r = a[2] + a[6]; x2i = a[3] + a[7]; x3r = a[2] - a[6]; x3i = a[3] - a[7]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[2] = x1r - x3i; a[3] = x1i + x3r; a[4] = x0r - x2r; a[5] = x0i - x2i; a[6] = x1r + x3i; a[7] = x1i - x3r; } void cftb040(float32 *a) { float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; x0r = a[0] + a[4]; x0i = a[1] + a[5]; x1r = a[0] - a[4]; x1i = a[1] - a[5]; x2r = a[2] + a[6]; x2i = a[3] + a[7]; x3r = a[2] - a[6]; x3i = a[3] - a[7]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[2] = x1r + x3i; a[3] = x1i - x3r; a[4] = x0r - x2r; a[5] = x0i - x2i; a[6] = x1r - x3i; a[7] = x1i + x3r; } void cftx020(float32 *a) { float32 x0r, x0i; x0r = a[0] - a[2]; x0i = a[1] - a[3]; a[0] += a[2]; a[1] += a[3]; a[2] = x0r; a[3] = x0i; } void rftfsub(int n, float32 *a, int nc, float32 *c) { int j, k, kk, ks, m; float32 wkr, wki, xr, xi, yr, yi; m = n >> 1; ks = 2 * nc / m; kk = 0; for (j = 2; j < m; j += 2) { k = n - j; kk += ks; wkr = 0.5 - c[nc - kk]; wki = c[kk]; xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr - wki * xi; yi = wkr * xi + wki * xr; a[j] -= yr; a[j + 1] -= yi; a[k] += yr; a[k + 1] -= yi; } } void rftbsub(int n, float32 *a, int nc, float32 *c) { int j, k, kk, ks, m; float32 wkr, wki, xr, xi, yr, yi; m = n >> 1; ks = 2 * nc / m; kk = 0; for (j = 2; j < m; j += 2) { k = n - j; kk += ks; wkr = 0.5 - c[nc - kk]; wki = c[kk]; xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr + wki * xi; yi = wkr * xi - wki * xr; a[j] -= yr; a[j + 1] -= yi; a[k] += yr; a[k + 1] -= yi; } } void dctsub(int n, float32 *a, int nc, float32 *c) { int j, k, kk, ks, m; float32 wkr, wki, xr; m = n >> 1; ks = nc / n; kk = 0; for (j = 1; j < m; j++) { k = n - j; kk += ks; wkr = c[kk] - c[nc - kk]; wki = c[kk] + c[nc - kk]; xr = wki * a[j] - wkr * a[k]; a[j] = wkr * a[j] + wki * a[k]; a[k] = xr; } a[m] *= c[0]; } void dstsub(int n, float32 *a, int nc, float32 *c) { int j, k, kk, ks, m; float32 wkr, wki, xr; m = n >> 1; ks = nc / n; kk = 0; for (j = 1; j < m; j++) { k = n - j; kk += ks; wkr = c[kk] - c[nc - kk]; wki = c[kk] + c[nc - kk]; xr = wki * a[k] - wkr * a[j]; a[k] = wkr * a[k] + wki * a[j]; a[j] = xr; } a[m] *= c[0]; } }