7
int atlas_spotrf2(const int N, MRSESDataType *A, const int lda)
10
MRSESDataType Ajj, *Ac=A, *An=A+lda;
12
for (j=0; j != N; j++)
14
Ajj = Ac[j] - cblas_sdot(j, Ac, 1, Ac, 1);
17
Ac[j] = Ajj = sqrt(Ajj);
20
cblas_sgemv(CblasColMajor, CblasTrans, j, N-j-1, -1,
21
An, lda, Ac, 1, 1, An+j, lda);
22
cblas_sscal(N-j-1, 1/Ajj, An+j, lda);
37
#define TYPE MRSESDataType
43
inline int ATL_potrfU_4(TYPE *A, const short int lda)
45
TYPE *pA1=A+lda, *pA2=pA1+lda, *pA3=pA2+lda;
46
TYPE L11 = *A, L21 = *pA1, L31 = *pA2, L41 = *pA3;
47
TYPE L22 = pA1[1], L32 = pA2[1], L42 = pA3[1];
48
TYPE L33 = pA2[2], L43 = pA3[2];
59
*pA1 = L21; *pA2 = L31; *pA3 = L41;
63
pA1[1] = L22 = sqrt(L22);
65
L32 = (L32 - L31*L21) * L22;
66
L42 = (L42 - L41*L21) * L22;
67
L33 -= L31*L31 + L32*L32;
68
pA2[1] = L32; pA3[1] = L42;
71
pA2[2] = L33 = sqrt(L33);
72
L43 = (L43 - L41*L31 - L42*L32) / L33;
73
L44 -= L41*L41 + L42*L42 + L43*L43;
90
inline int ATL_potrfU_3(TYPE *A, const short int lda)
92
TYPE *pA1=A+lda, *pA2=pA1+lda;
93
register TYPE L11 = *A, L21 = *pA1, L31 = *pA2;
94
register TYPE L22=pA1[1], L32=pA2[1];
95
register TYPE L33=pA2[2];
100
*A = L11 = sqrt(L11);
101
L11 = ATL_rone / L11;
104
*pA1 = L21; *pA2 = L31;
109
L32 = (L32 - L31*L21) / L22;
110
L33 -= L31*L31 + L32*L32;
111
pA1[1] = L22; pA2[1] = L32;
125
inline int ATL_potrfU_2(TYPE *A, const short int lda)
128
register TYPE L11=*A, L21=*pA1, L22 = pA1[1];
132
*A = L11 = sqrt(L11);
133
*pA1 = L21 = L21 / L11;
146
#define INDEX short int
148
gsl_cblas_strsm_clut_1 (const short int M, const short int N, const float *A, float *B, const short int lda)
150
register short int i, j, k;
153
for (i = 0; i < N; i++) {
154
for (j = 0; j < M; j++) {
155
Ajj = A[lda * j + j];
156
Bij = B[lda * i + j] / Ajj;
158
B[lda * i + j] = Bij;
159
for (k = j + 1; k < M; k++) {
160
B[lda * i + k] -= A[k * lda + j] * Bij;
167
gsl_cblas_ssyrk_cut_m11 (const short int N, const short int M, const float *A, float *C, const short int lda) {
168
register short int i, j, k;
170
for (i = 0; i < N; i++) {
171
for (j = 0; j <= i; j++) {
173
for (k = 0; k < M; k++) {
174
temp += A[i * lda + k] * A[j * lda + k];
176
C[i * lda + j] -= temp;
181
inline void gsl_spotrf_step(const short int M, const short int N, float *A, const short int lda) {
182
register short int i, j, k;
184
float *B = A + M*lda;
187
for (i = 0; i < N; i++) {
188
for (j = 0; j < M; j++) {
191
for (k = 0; k < j; k++) {
192
sum += A[j * lda + k] * B[i * lda + k];
195
B[i * lda + j] = (B[i * lda + j] - sum) / A[j * lda + j];
198
for (j = 0; j <= i; j++) {
201
for (k = 0; k < M; k++) {
202
temp += B[i * lda + k] * B[j * lda + k];
205
C[i * lda + j] -= temp;
211
int atlas_spotrf_u(const short int N, TYPE *A, const short int lda)
214
short int Nleft, Nright, ierr;
220
ierr = atlas_spotrf_u(Nleft, A, lda);
223
Ac = A + lda * Nleft;
224
An = Ac + Nleft; //SHIFT;
225
gsl_spotrf_step(Nleft, Nright, A, lda);
228
// gsl_cblas_strsm_clut_1(Nleft, Nright, A, Ac, lda);
229
// gsl_cblas_ssyrk_cut_m11(Nright, Nleft, Ac, An, lda);
232
// cblas_strsm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans,
233
// CblasNonUnit, Nleft, Nright, ONE, A, lda, Ac, lda);
237
// cblas_ssyrk(CblasColMajor, CblasUpper, CblasTrans, Nright, Nleft,
238
// ATL_rnone, Ac, lda, ATL_rone, An, lda);
241
ierr = atlas_spotrf_u(Nright, An, lda);
242
if (ierr) return(ierr+Nleft);
246
else if (N==4) return(ATL_potrfU_4(A, lda));
247
else if (N==3) return(ATL_potrfU_3(A, lda));
248
else if (N==2) return(ATL_potrfU_2(A, lda));
251
if (*A > ATL_rzero) *A = sqrt(*A);