1
2
3
4
5
6
7
8
9
10
11
12
13 #include <asm/ppc_asm.h>
14 #include <asm/asm-offsets.h>
15
16 #define rHP r3
17 #define rWP r4
18 #define rKP r5
19
20 #define rW0 r14
21 #define rW1 r15
22 #define rW2 r16
23 #define rW3 r17
24 #define rW4 r18
25 #define rW5 r19
26 #define rW6 r20
27 #define rW7 r21
28
29 #define rH0 r6
30 #define rH1 r7
31 #define rH2 r8
32 #define rH3 r9
33 #define rH4 r10
34
35 #define rT0 r22
36 #define rT1 r0
37 #define rT2 r11
38 #define rT3 r12
39
40 #define rK r23
41
42 #define LOAD_K01
43
44 #define LOAD_K11 \
45 evlwwsplat rK,0(rKP);
46
47 #define LOAD_K21 \
48 evlwwsplat rK,4(rKP);
49
50 #define LOAD_K31 \
51 evlwwsplat rK,8(rKP);
52
53 #define LOAD_K41 \
54 evlwwsplat rK,12(rKP);
55
56 #define INITIALIZE \
57 stwu r1,-128(r1); \
58 evstdw r14,8(r1); \
59 evstdw r15,16(r1); \
60 evstdw r16,24(r1); \
61 evstdw r17,32(r1); \
62 evstdw r18,40(r1); \
63 evstdw r19,48(r1); \
64 evstdw r20,56(r1); \
65 evstdw r21,64(r1); \
66 evstdw r22,72(r1); \
67 evstdw r23,80(r1);
68
69
70 #define FINALIZE \
71 evldw r14,8(r1); \
72 evldw r15,16(r1); \
73 evldw r16,24(r1); \
74 evldw r17,32(r1); \
75 evldw r18,40(r1); \
76 evldw r19,48(r1); \
77 evldw r20,56(r1); \
78 evldw r21,64(r1); \
79 evldw r22,72(r1); \
80 evldw r23,80(r1); \
81 xor r0,r0,r0; \
82 stw r0,8(r1); \
83 stw r0,16(r1); \
84 stw r0,24(r1); \
85 stw r0,32(r1); \
86 stw r0,40(r1); \
87 stw r0,48(r1); \
88 stw r0,56(r1); \
89 stw r0,64(r1); \
90 stw r0,72(r1); \
91 stw r0,80(r1); \
92 addi r1,r1,128;
93
94 #ifdef __BIG_ENDIAN__
95 #define LOAD_DATA(reg, off) \
96 lwz reg,off(rWP);
97 #define NEXT_BLOCK \
98 addi rWP,rWP,64;
99 #else
100 #define LOAD_DATA(reg, off) \
101 lwbrx reg,0,rWP; \
102 addi rWP,rWP,4;
103 #define NEXT_BLOCK
104 #endif
105
106 #define R_00_15(a, b, c, d, e, w0, w1, k, off) \
107 LOAD_DATA(w0, off) \
108 and rT2,b,c; \
109 LOAD_K##k##1 \
110 andc rT1,d,b; \
111 rotrwi rT0,a,27; \
112 or rT2,rT2,rT1; \
113 add e,e,rT0; \
114 rotrwi b,b,2; \
115 add e,e,w0; \
116 LOAD_DATA(w1, off+4) \
117 add e,e,rT2; \
118 and rT1,a,b; \
119 add e,e,rK; \
120 andc rT2,c,a; \
121 add d,d,rK; \
122 or rT2,rT2,rT1; \
123 rotrwi rT0,e,27; \
124 add d,d,w1; \
125 rotrwi a,a,2; \
126 add d,d,rT0; \
127 evmergelo w1,w1,w0; \
128 add d,d,rT2
129
130 #define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
131 and rT2,b,c; \
132 evmergelohi rT0,w7,w6; \
133 andc rT1,d,b; \
134 evxor w0,w0,rT0; \
135 or rT1,rT1,rT2; \
136 evxor w0,w0,w4; \
137 add e,e,rT1; \
138 evxor w0,w0,w1; \
139 rotrwi rT2,a,27; \
140 evrlwi w0,w0,1; \
141 add e,e,rT2; \
142 evaddw rT0,w0,rK; \
143 rotrwi b,b,2; \
144 LOAD_K##k##1 \
145 evmergehi rT1,rT1,rT0; \
146 add e,e,rT0; \
147 add d,d,rT1; \
148 and rT2,a,b; \
149 andc rT1,c,a; \
150 rotrwi rT0,e,27; \
151 or rT1,rT1,rT2; \
152 add d,d,rT0; \
153 rotrwi a,a,2; \
154 add d,d,rT1
155
156 #define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
157 evmergelohi rT0,w7,w6; \
158 xor rT2,b,c; \
159 evxor w0,w0,rT0; \
160 xor rT2,rT2,d; \
161 evxor w0,w0,w4; \
162 add e,e,rT2; \
163 evxor w0,w0,w1; \
164 rotrwi rT2,a,27; \
165 evrlwi w0,w0,1; \
166 add e,e,rT2; \
167 evaddw rT0,w0,rK; \
168 rotrwi b,b,2; \
169 LOAD_K##k##1 \
170 evmergehi rT1,rT1,rT0; \
171 add e,e,rT0; \
172 xor rT2,a,b; \
173 add d,d,rT1; \
174 xor rT2,rT2,c; \
175 rotrwi rT0,e,27; \
176 add d,d,rT2; \
177 rotrwi a,a,2; \
178 add d,d,rT0
179
180 #define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
181 and rT2,b,c; \
182 evmergelohi rT0,w7,w6; \
183 or rT1,b,c; \
184 evxor w0,w0,rT0; \
185 and rT1,d,rT1; \
186 evxor w0,w0,w4; \
187 or rT2,rT2,rT1; \
188 evxor w0,w0,w1; \
189 add e,e,rT2; \
190 evrlwi w0,w0,1; \
191 rotrwi rT2,a,27; \
192 evaddw rT0,w0,rK; \
193 add e,e,rT2; \
194 LOAD_K##k##1 \
195 evmergehi rT1,rT1,rT0; \
196 rotrwi b,b,2; \
197 add e,e,rT0; \
198 and rT2,a,b; \
199 or rT0,a,b; \
200 add d,d,rT1; \
201 and rT0,c,rT0; \
202 rotrwi a,a,2; \
203 or rT2,rT2,rT0; \
204 rotrwi rT0,e,27; \
205 add d,d,rT2; \
206 add d,d,rT0
207
208 #define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
209 R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k)
210
211 _GLOBAL(ppc_spe_sha1_transform)
212 INITIALIZE
213
214 lwz rH0,0(rHP)
215 lwz rH1,4(rHP)
216 mtctr r5
217 lwz rH2,8(rHP)
218 lis rKP,PPC_SPE_SHA1_K@h
219 lwz rH3,12(rHP)
220 ori rKP,rKP,PPC_SPE_SHA1_K@l
221 lwz rH4,16(rHP)
222
223 ppc_spe_sha1_main:
224 R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0)
225 R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8)
226 R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16)
227 R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24)
228 R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32)
229 R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40)
230 R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48)
231 R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56)
232
233 R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0)
234 R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2)
235
236 R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0)
237 R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0)
238 R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0)
239 R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0)
240 R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0)
241 R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0)
242 R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0)
243 R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0)
244 R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0)
245 R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3)
246
247 R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0)
248 R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0)
249 R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0)
250 R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0)
251 R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0)
252 R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0)
253 R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0)
254 R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0)
255 R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0)
256 R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4)
257
258 R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0)
259 R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0)
260 R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0)
261 R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0)
262 R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0)
263 R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0)
264 R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0)
265 lwz rT3,0(rHP)
266 R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0)
267 lwz rW1,4(rHP)
268 R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0)
269 lwz rW2,8(rHP)
270 R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0)
271 lwz rW3,12(rHP)
272 NEXT_BLOCK
273 lwz rW4,16(rHP)
274
275 add rH0,rH0,rT3
276 stw rH0,0(rHP)
277 add rH1,rH1,rW1
278 stw rH1,4(rHP)
279 add rH2,rH2,rW2
280 stw rH2,8(rHP)
281 add rH3,rH3,rW3
282 stw rH3,12(rHP)
283 add rH4,rH4,rW4
284 stw rH4,16(rHP)
285
286 bdnz ppc_spe_sha1_main
287
288 FINALIZE
289 blr
290
291 .data
292 .align 4
293 PPC_SPE_SHA1_K:
294 .long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6