/ani/mrses

To get this branch, use:
bzr branch http://suren.me/webbzr/ani/mrses
1 by Suren A. Chilingaryan
Initial import
1
#! /usr/bin/perl 
2
3
4
if ($#ARGV < 1) {
5
    print("Usage: $0 <N> <K> (12 4)\n");
6
    exit(0);
7
}
8
9
my $N = $ARGV[0];
10
my $K = $ARGV[1];
11
12
open out, ">vec_potrf_mtxmul.h";
13
14
print out "#define T(i,j) T_ ## i ## _ ## j\n";
15
print out "#define R(i,j) R_ ## i ## _ ## j\n";
16
print out "#define V(i) V_ ## i\n";
17
print out "#define DECLARE_T(i, j, A) register vector float T(i,j) = *((A) + i*lda + j);\n";
18
print out "#define DECLARE_R(i, j) register vector float R(i,j) = spu_splats((float)0.0);\n";
19
print out "#define DECLARE_V(i, A) register vector float V(i) = *((A) + i);\n";
20
21
$out = "#define DECLARE_TX(i, A)";
22
$vout = "#define DECLARE_VX(A)";
23
for (my $i=0; $i < $K; $i++) {
24
    $out .= " DECLARE_T(i, $i, A)";
25
    $vout .= " DECLARE_V($i, A)";
26
}
27
print out "$out\n";
28
print out "$vout\n";
29
30
#$rout = "#define DECLARE_RX(i)";
31
#for (my $i=0; $i < $N/2; $i++) {
32
#    $rout .= " DECLARE_R(i, $i)";
33
#}
34
#print out "$rout\n";
35
36
print out "#define DECLARE_T1(A) DECLARE_TX(0, A)\n";
37
#print out "#define DECLARE_V1(A) DECLARE_VX(A)\n";
38
print out "#define DECLARE_R1 DECLARE_R(0,0)\n";
39
print out "#define DECLARE_VR1 DECLARE_R(0,0)\n";
40
41
for (my $i=1; $i < $N; $i++) {
42
    print out "#define DECLARE_T". ($i+1) ."(A) DECLARE_T$i(A) DECLARE_TX($i, A)\n";
43
#    print out "#define DECLARE_V". ($i+1) ."(A) DECLARE_V$i(A) DECLARE_VX(A)\n";
44
45
#    print out "#define DECLARE_R". ($i+1) ." DECLARE_R$i DECLARE_RX($i)\n";
46
47
    print out "#define DECLARE_VR". ($i+1) ." DECLARE_VR$i DECLARE_R($i,0)\n";
48
49
    print out "#define DECLARE_R". ($i+1) ." DECLARE_R$i";
50
    for (my $j=0; $j <= $i; $j++) {
51
	print out " DECLARE_R($j, $i)";
52
    }
53
    print out "\n";
54
}
55
print out "\n";
56
57
print out "#define COMPUTE_T(i, j, l, var) spu_madd(T(i,l), T(j,l), var)\n";
58
59
print out "#define COMPUTE_V(A, l, k, var) spu_madd(V(k), A[l*lda + k], var)\n";
60
#spu_madd(Arow, A[l * lda + k], temp[l]);
61
62
63
#$out = "COMPUTE_T(i, j,  1, spu_mul(T(i,0), T(j,0)))";
64
$out = "COMPUTE_T(i, j,  1, spu_madd(T(i,0), T(j,0), R(i,j)))";
65
$vout = "COMPUTE_V(A, l, 1, spu_madd(V(0), A[l*lda], R(l,0)))";
66
for (my $i = 2; $i < $K; $i++) {
67
    $out = "COMPUTE_T(i, j, $i, $out)";
68
    $vout = "COMPUTE_V(A, l, $i, $vout)";
69
}
70
71
#print out "#define COMPUTE_TX(i, j, C) *((C) + i * ldc + j) += sum_across_float4($out);\n";
72
print out "#define COMPUTE_TX(i, j, C) R(i,j) = $out;\n";
73
print out "#define COMPUTE_VX(A, l) R(l,0) = $vout;\n";
74
print out "#define SAVE_TX(i, j, C) *((C) + i * ldc + j) = sum_across_float4(R(i,j));\n";
75
#print out "#define SAVE_TX(i, j, C) *((C) + i * ldc + j) = spu_extract(R(i,j),0);\n";
76
77
print out "#define COMPUTE_T1(C) COMPUTE_TX(0, 0, C)\n";
78
print out "#define COMPUTE_V1(A) COMPUTE_VX(A, 0)\n";
79
print out "#define SAVE_T1(C) SAVE_TX(0, 0, C)\n";
80
81
for (my $i = 1; $i < $N; $i++) {
82
    $out = "#define COMPUTE_T".($i+1)."(C) COMPUTE_T$i(C) COMPUTE_TX($i,$i,C)";
83
    $vout = "#define COMPUTE_V".($i+1)."(A) COMPUTE_V$i(A) COMPUTE_VX(A,$i)";
84
    $sout = "#define SAVE_T".($i+1)."(C) SAVE_T$i(C) SAVE_TX($i,$i,C)";
85
    for (my $j = 0; $j < $i; $j++) {
86
#	$out .= " COMPUTE_TX($j,$i,C) COMPUTE_TX($i,$j,C)";
87
	$out .= " COMPUTE_TX($j,$i,C)";
88
	$sout .= " SAVE_TX($j,$i,C)";
89
    }
90
    print out "$out\n";
91
    print out "$vout\n";
92
    print out "$sout\n";
93
}
94
print out "\n";
95
96
sub save {
97
    my $el = shift(@_);
98
    my $c = shift(@_);
99
    my @el = @$el;
100
    my @c = @$c;
101
    
102
    my $out = "";
103
    for ($i = 0; $i <= $#c; $i++) {
104
	$out.=$c[$i]." = spu_extract(tmp5,$i);";
105
    }
106
    return $out;
107
}
108
109
for (my $i = 0; $i < $N; $i++) {
110
    $out = "#define SUM_T" . ($i+1) . "(C)";
111
    
112
    my @el = ();
113
    my @c = ();
114
    for (my $l = 0; $l <= $i; $l++) {
115
        for (my $m = 0; $m <= $l; $m++) {
116
    	    push @el, "R($m, $l)";
117
	    push @c, "*((C) + $l * ldc + $m)";
118
	    if ($#el == 3) {
119
		$out .= " HSUM($el[0], $el[1], $el[2], $el[3]);";
120
		$out .= save(\@el, \@c);
121
		@el = ();
122
		@c = ();
123
	    }
124
	}
125
    }
126
    if (@el) {
127
	while ($#el < 3) { push @el, "zero"; }
128
	$out .= " HSUM($el[0], $el[1], $el[2], $el[3]);";
129
	$out .= save(\@el, \@c);
130
    }   
131
    
132
    print out "$out\n"; 
133
134
135
    $out = "#define SUM_V" . ($i+1) . "(C)";
136
    my @el = ();
137
    my $cidx = 0;
138
    for (my $l = 0; $l <= $i; $l++) {
139
	push @el, "R($l, 0)";
140
	if ($#el == 3) {
141
	    $out .= " HSUM($el[0], $el[1], $el[2], $el[3]);";
142
	    $out .= " C[$cidx] = tmp5;";
143
	    $cidx++;
144
	    @el = ();
145
	}
146
    }    
147
148
    if (@el) {
149
	while ($#el < 3) { push @el, "zero"; }
150
	$out .= " HSUM($el[0], $el[1], $el[2], $el[3]);";
151
	$out .= " C[$cidx] = tmp5;";
152
    }   
153
154
    print out "$out\n"; 
155
}
156
157
close out;