bzr branch
http://suren.me/webbzr/ani/mrses
1
by Suren A. Chilingaryan
Initial import |
1 |
#! /usr/bin/perl
|
2 |
||
3 |
||
4 |
if ($#ARGV < 1) { |
|
5 |
print("Usage: $0 <N> <K> (12 4)\n"); |
|
6 |
exit(0); |
|
7 |
}
|
|
8 |
||
9 |
my $N = $ARGV[0]; |
|
10 |
my $K = $ARGV[1]; |
|
11 |
||
12 |
open out, ">vec_potrf_mtxmul.h"; |
|
13 |
||
14 |
print out "#define T(i,j) T_ ## i ## _ ## j\n"; |
|
15 |
print out "#define R(i,j) R_ ## i ## _ ## j\n"; |
|
16 |
print out "#define V(i) V_ ## i\n"; |
|
17 |
print out "#define DECLARE_T(i, j, A) register vector float T(i,j) = *((A) + i*lda + j);\n"; |
|
18 |
print out "#define DECLARE_R(i, j) register vector float R(i,j) = spu_splats((float)0.0);\n"; |
|
19 |
print out "#define DECLARE_V(i, A) register vector float V(i) = *((A) + i);\n"; |
|
20 |
||
21 |
$out = "#define DECLARE_TX(i, A)"; |
|
22 |
$vout = "#define DECLARE_VX(A)"; |
|
23 |
for (my $i=0; $i < $K; $i++) { |
|
24 |
$out .= " DECLARE_T(i, $i, A)"; |
|
25 |
$vout .= " DECLARE_V($i, A)"; |
|
26 |
}
|
|
27 |
print out "$out\n"; |
|
28 |
print out "$vout\n"; |
|
29 |
||
30 |
#$rout = "#define DECLARE_RX(i)";
|
|
31 |
#for (my $i=0; $i < $N/2; $i++) {
|
|
32 |
# $rout .= " DECLARE_R(i, $i)";
|
|
33 |
#}
|
|
34 |
#print out "$rout\n";
|
|
35 |
||
36 |
print out "#define DECLARE_T1(A) DECLARE_TX(0, A)\n"; |
|
37 |
#print out "#define DECLARE_V1(A) DECLARE_VX(A)\n";
|
|
38 |
print out "#define DECLARE_R1 DECLARE_R(0,0)\n"; |
|
39 |
print out "#define DECLARE_VR1 DECLARE_R(0,0)\n"; |
|
40 |
||
41 |
for (my $i=1; $i < $N; $i++) { |
|
42 |
print out "#define DECLARE_T". ($i+1) ."(A) DECLARE_T$i(A) DECLARE_TX($i, A)\n"; |
|
43 |
# print out "#define DECLARE_V". ($i+1) ."(A) DECLARE_V$i(A) DECLARE_VX(A)\n";
|
|
44 |
||
45 |
# print out "#define DECLARE_R". ($i+1) ." DECLARE_R$i DECLARE_RX($i)\n";
|
|
46 |
||
47 |
print out "#define DECLARE_VR". ($i+1) ." DECLARE_VR$i DECLARE_R($i,0)\n"; |
|
48 |
||
49 |
print out "#define DECLARE_R". ($i+1) ." DECLARE_R$i"; |
|
50 |
for (my $j=0; $j <= $i; $j++) { |
|
51 |
print out " DECLARE_R($j, $i)"; |
|
52 |
}
|
|
53 |
print out "\n"; |
|
54 |
}
|
|
55 |
print out "\n"; |
|
56 |
||
57 |
print out "#define COMPUTE_T(i, j, l, var) spu_madd(T(i,l), T(j,l), var)\n"; |
|
58 |
||
59 |
print out "#define COMPUTE_V(A, l, k, var) spu_madd(V(k), A[l*lda + k], var)\n"; |
|
60 |
#spu_madd(Arow, A[l * lda + k], temp[l]);
|
|
61 |
||
62 |
||
63 |
#$out = "COMPUTE_T(i, j, 1, spu_mul(T(i,0), T(j,0)))";
|
|
64 |
$out = "COMPUTE_T(i, j, 1, spu_madd(T(i,0), T(j,0), R(i,j)))"; |
|
65 |
$vout = "COMPUTE_V(A, l, 1, spu_madd(V(0), A[l*lda], R(l,0)))"; |
|
66 |
for (my $i = 2; $i < $K; $i++) { |
|
67 |
$out = "COMPUTE_T(i, j, $i, $out)"; |
|
68 |
$vout = "COMPUTE_V(A, l, $i, $vout)"; |
|
69 |
}
|
|
70 |
||
71 |
#print out "#define COMPUTE_TX(i, j, C) *((C) + i * ldc + j) += sum_across_float4($out);\n";
|
|
72 |
print out "#define COMPUTE_TX(i, j, C) R(i,j) = $out;\n"; |
|
73 |
print out "#define COMPUTE_VX(A, l) R(l,0) = $vout;\n"; |
|
74 |
print out "#define SAVE_TX(i, j, C) *((C) + i * ldc + j) = sum_across_float4(R(i,j));\n"; |
|
75 |
#print out "#define SAVE_TX(i, j, C) *((C) + i * ldc + j) = spu_extract(R(i,j),0);\n";
|
|
76 |
||
77 |
print out "#define COMPUTE_T1(C) COMPUTE_TX(0, 0, C)\n"; |
|
78 |
print out "#define COMPUTE_V1(A) COMPUTE_VX(A, 0)\n"; |
|
79 |
print out "#define SAVE_T1(C) SAVE_TX(0, 0, C)\n"; |
|
80 |
||
81 |
for (my $i = 1; $i < $N; $i++) { |
|
82 |
$out = "#define COMPUTE_T".($i+1)."(C) COMPUTE_T$i(C) COMPUTE_TX($i,$i,C)"; |
|
83 |
$vout = "#define COMPUTE_V".($i+1)."(A) COMPUTE_V$i(A) COMPUTE_VX(A,$i)"; |
|
84 |
$sout = "#define SAVE_T".($i+1)."(C) SAVE_T$i(C) SAVE_TX($i,$i,C)"; |
|
85 |
for (my $j = 0; $j < $i; $j++) { |
|
86 |
# $out .= " COMPUTE_TX($j,$i,C) COMPUTE_TX($i,$j,C)";
|
|
87 |
$out .= " COMPUTE_TX($j,$i,C)"; |
|
88 |
$sout .= " SAVE_TX($j,$i,C)"; |
|
89 |
}
|
|
90 |
print out "$out\n"; |
|
91 |
print out "$vout\n"; |
|
92 |
print out "$sout\n"; |
|
93 |
}
|
|
94 |
print out "\n"; |
|
95 |
||
96 |
sub save { |
|
97 |
my $el = shift(@_); |
|
98 |
my $c = shift(@_); |
|
99 |
my @el = @$el; |
|
100 |
my @c = @$c; |
|
101 |
||
102 |
my $out = ""; |
|
103 |
for ($i = 0; $i <= $#c; $i++) { |
|
104 |
$out.=$c[$i]." = spu_extract(tmp5,$i);"; |
|
105 |
}
|
|
106 |
return $out; |
|
107 |
}
|
|
108 |
||
109 |
for (my $i = 0; $i < $N; $i++) { |
|
110 |
$out = "#define SUM_T" . ($i+1) . "(C)"; |
|
111 |
||
112 |
my @el = (); |
|
113 |
my @c = (); |
|
114 |
for (my $l = 0; $l <= $i; $l++) { |
|
115 |
for (my $m = 0; $m <= $l; $m++) { |
|
116 |
push @el, "R($m, $l)"; |
|
117 |
push @c, "*((C) + $l * ldc + $m)"; |
|
118 |
if ($#el == 3) { |
|
119 |
$out .= " HSUM($el[0], $el[1], $el[2], $el[3]);"; |
|
120 |
$out .= save(\@el, \@c); |
|
121 |
@el = (); |
|
122 |
@c = (); |
|
123 |
}
|
|
124 |
}
|
|
125 |
}
|
|
126 |
if (@el) { |
|
127 |
while ($#el < 3) { push @el, "zero"; } |
|
128 |
$out .= " HSUM($el[0], $el[1], $el[2], $el[3]);"; |
|
129 |
$out .= save(\@el, \@c); |
|
130 |
}
|
|
131 |
||
132 |
print out "$out\n"; |
|
133 |
||
134 |
||
135 |
$out = "#define SUM_V" . ($i+1) . "(C)"; |
|
136 |
my @el = (); |
|
137 |
my $cidx = 0; |
|
138 |
for (my $l = 0; $l <= $i; $l++) { |
|
139 |
push @el, "R($l, 0)"; |
|
140 |
if ($#el == 3) { |
|
141 |
$out .= " HSUM($el[0], $el[1], $el[2], $el[3]);"; |
|
142 |
$out .= " C[$cidx] = tmp5;"; |
|
143 |
$cidx++; |
|
144 |
@el = (); |
|
145 |
}
|
|
146 |
}
|
|
147 |
||
148 |
if (@el) { |
|
149 |
while ($#el < 3) { push @el, "zero"; } |
|
150 |
$out .= " HSUM($el[0], $el[1], $el[2], $el[3]);"; |
|
151 |
$out .= " C[$cidx] = tmp5;"; |
|
152 |
}
|
|
153 |
||
154 |
print out "$out\n"; |
|
155 |
}
|
|
156 |
||
157 |
close out; |