1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 #include <asm/ppc_asm.h>
  14 #include "aes-spe-regs.h"
  15 
  16 #define EAD(in, bpos) \
  17         rlwimi          rT0,in,28-((bpos+3)%4)*8,20,27;
  18 
  19 #define DAD(in, bpos) \
  20         rlwimi          rT1,in,24-((bpos+3)%4)*8,24,31;
  21 
  22 #define LWH(out, off) \
  23         evlwwsplat      out,off(rT0);   
  24 
  25 #define LWL(out, off) \
  26         lwz             out,off(rT0);   
  27 
  28 #define LBZ(out, tab, off) \
  29         lbz             out,off(tab);   
  30 
  31 #define LAH(out, in, bpos, off) \
  32         EAD(in, bpos)                    \
  33         LWH(out, off)
  34 
  35 #define LAL(out, in, bpos, off) \
  36         EAD(in, bpos)                    \
  37         LWL(out, off)
  38 
  39 #define LAE(out, in, bpos) \
  40         EAD(in, bpos)                    \
  41         LBZ(out, rT0, 8)
  42 
  43 #define LBE(out) \
  44         LBZ(out, rT0, 8)                
  45 
  46 #define LAD(out, in, bpos) \
  47         DAD(in, bpos)                    \
  48         LBZ(out, rT1, 0)
  49 
  50 #define LBD(out) \
  51         LBZ(out, rT1, 0)
  52 
  53 
  54 
  55 
  56 
  57 
  58 
  59 
  60 
  61 
  62 
  63 _GLOBAL(ppc_encrypt_block)
  64         LAH(rW4, rD1, 2, 4)
  65         LAH(rW6, rD0, 3, 0)
  66         LAH(rW3, rD0, 1, 8)
  67 ppc_encrypt_block_loop:
  68         LAH(rW0, rD3, 0, 12)
  69         LAL(rW0, rD0, 0, 12)
  70         LAH(rW1, rD1, 0, 12)
  71         LAH(rW2, rD2, 1, 8)
  72         LAL(rW2, rD3, 1, 8)
  73         LAL(rW3, rD1, 1, 8)
  74         LAL(rW4, rD2, 2, 4)
  75         LAL(rW6, rD1, 3, 0)
  76         LAH(rW5, rD3, 2, 4)
  77         LAL(rW5, rD0, 2, 4)
  78         LAH(rW7, rD2, 3, 0)
  79         evldw           rD1,16(rKP)
  80         EAD(rD3, 3)
  81         evxor           rW2,rW2,rW4
  82         LWL(rW7, 0)
  83         evxor           rW2,rW2,rW6
  84         EAD(rD2, 0)
  85         evxor           rD1,rD1,rW2
  86         LWL(rW1, 12)
  87         evxor           rD1,rD1,rW0
  88         evldw           rD3,24(rKP)
  89         evmergehi       rD0,rD0,rD1
  90         EAD(rD1, 2)
  91         evxor           rW3,rW3,rW5
  92         LWH(rW4, 4)
  93         evxor           rW3,rW3,rW7
  94         EAD(rD0, 3)
  95         evxor           rD3,rD3,rW3
  96         LWH(rW6, 0)
  97         evxor           rD3,rD3,rW1
  98         EAD(rD0, 1)
  99         evmergehi       rD2,rD2,rD3
 100         LWH(rW3, 8)
 101         LAH(rW0, rD3, 0, 12)
 102         LAL(rW0, rD0, 0, 12)
 103         LAH(rW1, rD1, 0, 12)
 104         LAH(rW2, rD2, 1, 8)
 105         LAL(rW2, rD3, 1, 8)
 106         LAL(rW3, rD1, 1, 8)
 107         LAL(rW4, rD2, 2, 4)
 108         LAL(rW6, rD1, 3, 0)
 109         LAH(rW5, rD3, 2, 4)
 110         LAL(rW5, rD0, 2, 4)
 111         LAH(rW7, rD2, 3, 0)
 112         evldw           rD1,32(rKP)
 113         EAD(rD3, 3)
 114         evxor           rW2,rW2,rW4
 115         LWL(rW7, 0)
 116         evxor           rW2,rW2,rW6
 117         EAD(rD2, 0)
 118         evxor           rD1,rD1,rW2
 119         LWL(rW1, 12)
 120         evxor           rD1,rD1,rW0
 121         evldw           rD3,40(rKP)
 122         evmergehi       rD0,rD0,rD1
 123         EAD(rD1, 2)
 124         evxor           rW3,rW3,rW5
 125         LWH(rW4, 4)
 126         evxor           rW3,rW3,rW7
 127         EAD(rD0, 3)
 128         evxor           rD3,rD3,rW3
 129         LWH(rW6, 0)
 130         evxor           rD3,rD3,rW1
 131         EAD(rD0, 1)
 132         evmergehi       rD2,rD2,rD3
 133         LWH(rW3, 8)
 134         addi            rKP,rKP,32
 135         bdnz            ppc_encrypt_block_loop
 136         LAH(rW0, rD3, 0, 12)
 137         LAL(rW0, rD0, 0, 12)
 138         LAH(rW1, rD1, 0, 12)
 139         LAH(rW2, rD2, 1, 8)
 140         LAL(rW2, rD3, 1, 8)
 141         LAL(rW3, rD1, 1, 8)
 142         LAL(rW4, rD2, 2, 4)
 143         LAH(rW5, rD3, 2, 4)
 144         LAL(rW6, rD1, 3, 0)
 145         LAL(rW5, rD0, 2, 4)
 146         LAH(rW7, rD2, 3, 0)
 147         evldw           rD1,16(rKP)
 148         EAD(rD3, 3)
 149         evxor           rW2,rW2,rW4
 150         LWL(rW7, 0)
 151         evxor           rW2,rW2,rW6
 152         EAD(rD2, 0)
 153         evxor           rD1,rD1,rW2
 154         LWL(rW1, 12)
 155         evxor           rD1,rD1,rW0
 156         evldw           rD3,24(rKP)
 157         evmergehi       rD0,rD0,rD1
 158         EAD(rD1, 0)
 159         evxor           rW3,rW3,rW5
 160         LBE(rW2)
 161         evxor           rW3,rW3,rW7
 162         EAD(rD0, 1)
 163         evxor           rD3,rD3,rW3
 164         LBE(rW6)
 165         evxor           rD3,rD3,rW1
 166         EAD(rD0, 0)
 167         evmergehi       rD2,rD2,rD3
 168         LBE(rW1)
 169         LAE(rW0, rD3, 0)
 170         LAE(rW1, rD0, 0)
 171         LAE(rW4, rD2, 1)
 172         LAE(rW5, rD3, 1)
 173         LAE(rW3, rD2, 0)
 174         LAE(rW7, rD1, 1)
 175         rlwimi          rW0,rW4,8,16,23
 176         rlwimi          rW1,rW5,8,16,23
 177         LAE(rW4, rD1, 2)
 178         LAE(rW5, rD2, 2)
 179         rlwimi          rW2,rW6,8,16,23
 180         rlwimi          rW3,rW7,8,16,23
 181         LAE(rW6, rD3, 2)
 182         LAE(rW7, rD0, 2)
 183         rlwimi          rW0,rW4,16,8,15
 184         rlwimi          rW1,rW5,16,8,15
 185         LAE(rW4, rD0, 3)
 186         LAE(rW5, rD1, 3)
 187         rlwimi          rW2,rW6,16,8,15
 188         lwz             rD0,32(rKP)
 189         rlwimi          rW3,rW7,16,8,15
 190         lwz             rD1,36(rKP)
 191         LAE(rW6, rD2, 3)
 192         LAE(rW7, rD3, 3)
 193         rlwimi          rW0,rW4,24,0,7
 194         lwz             rD2,40(rKP)
 195         rlwimi          rW1,rW5,24,0,7
 196         lwz             rD3,44(rKP)
 197         rlwimi          rW2,rW6,24,0,7
 198         rlwimi          rW3,rW7,24,0,7
 199         blr
 200 
 201 
 202 
 203 
 204 
 205 
 206 
 207 
 208 
 209 
 210 
 211 _GLOBAL(ppc_decrypt_block)
 212         LAH(rW0, rD1, 0, 12)
 213         LAH(rW6, rD0, 3, 0)
 214         LAH(rW3, rD0, 1, 8)
 215 ppc_decrypt_block_loop:
 216         LAH(rW1, rD3, 0, 12)
 217         LAL(rW0, rD2, 0, 12)
 218         LAH(rW2, rD2, 1, 8)
 219         LAL(rW2, rD3, 1, 8)
 220         LAH(rW4, rD3, 2, 4)
 221         LAL(rW4, rD0, 2, 4)
 222         LAL(rW6, rD1, 3, 0)
 223         LAH(rW5, rD1, 2, 4)
 224         LAH(rW7, rD2, 3, 0)
 225         LAL(rW7, rD3, 3, 0)
 226         LAL(rW3, rD1, 1, 8)
 227         evldw           rD1,16(rKP)
 228         EAD(rD0, 0)
 229         evxor           rW4,rW4,rW6
 230         LWL(rW1, 12)
 231         evxor           rW0,rW0,rW4
 232         EAD(rD2, 2)
 233         evxor           rW0,rW0,rW2
 234         LWL(rW5, 4)
 235         evxor           rD1,rD1,rW0
 236         evldw           rD3,24(rKP)
 237         evmergehi       rD0,rD0,rD1
 238         EAD(rD1, 0)
 239         evxor           rW3,rW3,rW7
 240         LWH(rW0, 12)
 241         evxor           rW3,rW3,rW1
 242         EAD(rD0, 3)
 243         evxor           rD3,rD3,rW3
 244         LWH(rW6, 0)
 245         evxor           rD3,rD3,rW5
 246         EAD(rD0, 1)
 247         evmergehi       rD2,rD2,rD3
 248         LWH(rW3, 8)
 249         LAH(rW1, rD3, 0, 12)
 250         LAL(rW0, rD2, 0, 12)
 251         LAH(rW2, rD2, 1, 8)
 252         LAL(rW2, rD3, 1, 8)
 253         LAH(rW4, rD3, 2, 4)
 254         LAL(rW4, rD0, 2, 4)
 255         LAL(rW6, rD1, 3, 0)
 256         LAH(rW5, rD1, 2, 4)
 257         LAH(rW7, rD2, 3, 0)
 258         LAL(rW7, rD3, 3, 0)
 259         LAL(rW3, rD1, 1, 8)
 260         evldw            rD1,32(rKP)
 261         EAD(rD0, 0)
 262         evxor           rW4,rW4,rW6
 263         LWL(rW1, 12)
 264         evxor           rW0,rW0,rW4
 265         EAD(rD2, 2)
 266         evxor           rW0,rW0,rW2
 267         LWL(rW5, 4)
 268         evxor           rD1,rD1,rW0
 269         evldw           rD3,40(rKP)
 270         evmergehi       rD0,rD0,rD1
 271         EAD(rD1, 0)
 272         evxor           rW3,rW3,rW7
 273         LWH(rW0, 12)
 274         evxor           rW3,rW3,rW1
 275         EAD(rD0, 3)
 276         evxor           rD3,rD3,rW3
 277         LWH(rW6, 0)
 278         evxor           rD3,rD3,rW5
 279         EAD(rD0, 1)
 280         evmergehi       rD2,rD2,rD3
 281         LWH(rW3, 8)
 282         addi            rKP,rKP,32
 283         bdnz            ppc_decrypt_block_loop
 284         LAH(rW1, rD3, 0, 12)
 285         LAL(rW0, rD2, 0, 12)
 286         LAH(rW2, rD2, 1, 8)
 287         LAL(rW2, rD3, 1, 8)
 288         LAH(rW4, rD3, 2, 4)
 289         LAL(rW4, rD0, 2, 4)
 290         LAL(rW6, rD1, 3, 0)
 291         LAH(rW5, rD1, 2, 4)
 292         LAH(rW7, rD2, 3, 0)
 293         LAL(rW7, rD3, 3, 0)
 294         LAL(rW3, rD1, 1, 8)
 295         evldw            rD1,16(rKP)
 296         EAD(rD0, 0)
 297         evxor           rW4,rW4,rW6
 298         LWL(rW1, 12)
 299         evxor           rW0,rW0,rW4
 300         EAD(rD2, 2)
 301         evxor           rW0,rW0,rW2
 302         LWL(rW5, 4)
 303         evxor           rD1,rD1,rW0
 304         evldw           rD3,24(rKP)
 305         evmergehi       rD0,rD0,rD1
 306         DAD(rD1, 0)
 307         evxor           rW3,rW3,rW7
 308         LBD(rW0)
 309         evxor           rW3,rW3,rW1
 310         DAD(rD0, 1)
 311         evxor           rD3,rD3,rW3
 312         LBD(rW6)
 313         evxor           rD3,rD3,rW5
 314         DAD(rD0, 0)
 315         evmergehi       rD2,rD2,rD3
 316         LBD(rW3)
 317         LAD(rW2, rD3, 0)
 318         LAD(rW1, rD2, 0)
 319         LAD(rW4, rD2, 1)
 320         LAD(rW5, rD3, 1)
 321         LAD(rW7, rD1, 1)
 322         rlwimi          rW0,rW4,8,16,23
 323         rlwimi          rW1,rW5,8,16,23
 324         LAD(rW4, rD3, 2)
 325         LAD(rW5, rD0, 2)
 326         rlwimi          rW2,rW6,8,16,23
 327         rlwimi          rW3,rW7,8,16,23
 328         LAD(rW6, rD1, 2)
 329         LAD(rW7, rD2, 2)
 330         rlwimi          rW0,rW4,16,8,15
 331         rlwimi          rW1,rW5,16,8,15
 332         LAD(rW4, rD0, 3)
 333         LAD(rW5, rD1, 3)
 334         rlwimi          rW2,rW6,16,8,15
 335         lwz             rD0,32(rKP)
 336         rlwimi          rW3,rW7,16,8,15
 337         lwz             rD1,36(rKP)
 338         LAD(rW6, rD2, 3)
 339         LAD(rW7, rD3, 3)
 340         rlwimi          rW0,rW4,24,0,7
 341         lwz             rD2,40(rKP)
 342         rlwimi          rW1,rW5,24,0,7
 343         lwz             rD3,44(rKP)
 344         rlwimi          rW2,rW6,24,0,7
 345         rlwimi          rW3,rW7,24,0,7
 346         blr