root/arch/x86/crypto/sha256_ni_asm.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /*
   2  * Intel SHA Extensions optimized implementation of a SHA-256 update function
   3  *
   4  * This file is provided under a dual BSD/GPLv2 license.  When using or
   5  * redistributing this file, you may do so under either license.
   6  *
   7  * GPL LICENSE SUMMARY
   8  *
   9  * Copyright(c) 2015 Intel Corporation.
  10  *
  11  * This program is free software; you can redistribute it and/or modify
  12  * it under the terms of version 2 of the GNU General Public License as
  13  * published by the Free Software Foundation.
  14  *
  15  * This program is distributed in the hope that it will be useful, but
  16  * WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * General Public License for more details.
  19  *
  20  * Contact Information:
  21  *      Sean Gulley <sean.m.gulley@intel.com>
  22  *      Tim Chen <tim.c.chen@linux.intel.com>
  23  *
  24  * BSD LICENSE
  25  *
  26  * Copyright(c) 2015 Intel Corporation.
  27  *
  28  * Redistribution and use in source and binary forms, with or without
  29  * modification, are permitted provided that the following conditions
  30  * are met:
  31  *
  32  *      * Redistributions of source code must retain the above copyright
  33  *        notice, this list of conditions and the following disclaimer.
  34  *      * Redistributions in binary form must reproduce the above copyright
  35  *        notice, this list of conditions and the following disclaimer in
  36  *        the documentation and/or other materials provided with the
  37  *        distribution.
  38  *      * Neither the name of Intel Corporation nor the names of its
  39  *        contributors may be used to endorse or promote products derived
  40  *        from this software without specific prior written permission.
  41  *
  42  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  43  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  44  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  45  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  46  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  47  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  48  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  49  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  50  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  51  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  52  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  53  *
  54  */
  55 
  56 #include <linux/linkage.h>
  57 
  58 #define DIGEST_PTR      %rdi    /* 1st arg */
  59 #define DATA_PTR        %rsi    /* 2nd arg */
  60 #define NUM_BLKS        %rdx    /* 3rd arg */
  61 
  62 #define SHA256CONSTANTS %rax
  63 
  64 #define MSG             %xmm0
  65 #define STATE0          %xmm1
  66 #define STATE1          %xmm2
  67 #define MSGTMP0         %xmm3
  68 #define MSGTMP1         %xmm4
  69 #define MSGTMP2         %xmm5
  70 #define MSGTMP3         %xmm6
  71 #define MSGTMP4         %xmm7
  72 
  73 #define SHUF_MASK       %xmm8
  74 
  75 #define ABEF_SAVE       %xmm9
  76 #define CDGH_SAVE       %xmm10
  77 
  78 /*
  79  * Intel SHA Extensions optimized implementation of a SHA-256 update function
  80  *
  81  * The function takes a pointer to the current hash values, a pointer to the
  82  * input data, and a number of 64 byte blocks to process.  Once all blocks have
  83  * been processed, the digest pointer is  updated with the resulting hash value.
  84  * The function only processes complete blocks, there is no functionality to
  85  * store partial blocks.  All message padding and hash value initialization must
  86  * be done outside the update function.
  87  *
  88  * The indented lines in the loop are instructions related to rounds processing.
  89  * The non-indented lines are instructions related to the message schedule.
  90  *
  91  * void sha256_ni_transform(uint32_t *digest, const void *data,
  92                 uint32_t numBlocks);
  93  * digest : pointer to digest
  94  * data: pointer to input data
  95  * numBlocks: Number of blocks to process
  96  */
  97 
  98 .text
  99 .align 32
 100 ENTRY(sha256_ni_transform)
 101 
 102         shl             $6, NUM_BLKS            /*  convert to bytes */
 103         jz              .Ldone_hash
 104         add             DATA_PTR, NUM_BLKS      /* pointer to end of data */
 105 
 106         /*
 107          * load initial hash values
 108          * Need to reorder these appropriately
 109          * DCBA, HGFE -> ABEF, CDGH
 110          */
 111         movdqu          0*16(DIGEST_PTR), STATE0
 112         movdqu          1*16(DIGEST_PTR), STATE1
 113 
 114         pshufd          $0xB1, STATE0,  STATE0          /* CDAB */
 115         pshufd          $0x1B, STATE1,  STATE1          /* EFGH */
 116         movdqa          STATE0, MSGTMP4
 117         palignr         $8, STATE1,  STATE0             /* ABEF */
 118         pblendw         $0xF0, MSGTMP4, STATE1          /* CDGH */
 119 
 120         movdqa          PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
 121         lea             K256(%rip), SHA256CONSTANTS
 122 
 123 .Lloop0:
 124         /* Save hash values for addition after rounds */
 125         movdqa          STATE0, ABEF_SAVE
 126         movdqa          STATE1, CDGH_SAVE
 127 
 128         /* Rounds 0-3 */
 129         movdqu          0*16(DATA_PTR), MSG
 130         pshufb          SHUF_MASK, MSG
 131         movdqa          MSG, MSGTMP0
 132                 paddd           0*16(SHA256CONSTANTS), MSG
 133                 sha256rnds2     STATE0, STATE1
 134                 pshufd          $0x0E, MSG, MSG
 135                 sha256rnds2     STATE1, STATE0
 136 
 137         /* Rounds 4-7 */
 138         movdqu          1*16(DATA_PTR), MSG
 139         pshufb          SHUF_MASK, MSG
 140         movdqa          MSG, MSGTMP1
 141                 paddd           1*16(SHA256CONSTANTS), MSG
 142                 sha256rnds2     STATE0, STATE1
 143                 pshufd          $0x0E, MSG, MSG
 144                 sha256rnds2     STATE1, STATE0
 145         sha256msg1      MSGTMP1, MSGTMP0
 146 
 147         /* Rounds 8-11 */
 148         movdqu          2*16(DATA_PTR), MSG
 149         pshufb          SHUF_MASK, MSG
 150         movdqa          MSG, MSGTMP2
 151                 paddd           2*16(SHA256CONSTANTS), MSG
 152                 sha256rnds2     STATE0, STATE1
 153                 pshufd          $0x0E, MSG, MSG
 154                 sha256rnds2     STATE1, STATE0
 155         sha256msg1      MSGTMP2, MSGTMP1
 156 
 157         /* Rounds 12-15 */
 158         movdqu          3*16(DATA_PTR), MSG
 159         pshufb          SHUF_MASK, MSG
 160         movdqa          MSG, MSGTMP3
 161                 paddd           3*16(SHA256CONSTANTS), MSG
 162                 sha256rnds2     STATE0, STATE1
 163         movdqa          MSGTMP3, MSGTMP4
 164         palignr         $4, MSGTMP2, MSGTMP4
 165         paddd           MSGTMP4, MSGTMP0
 166         sha256msg2      MSGTMP3, MSGTMP0
 167                 pshufd          $0x0E, MSG, MSG
 168                 sha256rnds2     STATE1, STATE0
 169         sha256msg1      MSGTMP3, MSGTMP2
 170 
 171         /* Rounds 16-19 */
 172         movdqa          MSGTMP0, MSG
 173                 paddd           4*16(SHA256CONSTANTS), MSG
 174                 sha256rnds2     STATE0, STATE1
 175         movdqa          MSGTMP0, MSGTMP4
 176         palignr         $4, MSGTMP3, MSGTMP4
 177         paddd           MSGTMP4, MSGTMP1
 178         sha256msg2      MSGTMP0, MSGTMP1
 179                 pshufd          $0x0E, MSG, MSG
 180                 sha256rnds2     STATE1, STATE0
 181         sha256msg1      MSGTMP0, MSGTMP3
 182 
 183         /* Rounds 20-23 */
 184         movdqa          MSGTMP1, MSG
 185                 paddd           5*16(SHA256CONSTANTS), MSG
 186                 sha256rnds2     STATE0, STATE1
 187         movdqa          MSGTMP1, MSGTMP4
 188         palignr         $4, MSGTMP0, MSGTMP4
 189         paddd           MSGTMP4, MSGTMP2
 190         sha256msg2      MSGTMP1, MSGTMP2
 191                 pshufd          $0x0E, MSG, MSG
 192                 sha256rnds2     STATE1, STATE0
 193         sha256msg1      MSGTMP1, MSGTMP0
 194 
 195         /* Rounds 24-27 */
 196         movdqa          MSGTMP2, MSG
 197                 paddd           6*16(SHA256CONSTANTS), MSG
 198                 sha256rnds2     STATE0, STATE1
 199         movdqa          MSGTMP2, MSGTMP4
 200         palignr         $4, MSGTMP1, MSGTMP4
 201         paddd           MSGTMP4, MSGTMP3
 202         sha256msg2      MSGTMP2, MSGTMP3
 203                 pshufd          $0x0E, MSG, MSG
 204                 sha256rnds2     STATE1, STATE0
 205         sha256msg1      MSGTMP2, MSGTMP1
 206 
 207         /* Rounds 28-31 */
 208         movdqa          MSGTMP3, MSG
 209                 paddd           7*16(SHA256CONSTANTS), MSG
 210                 sha256rnds2     STATE0, STATE1
 211         movdqa          MSGTMP3, MSGTMP4
 212         palignr         $4, MSGTMP2, MSGTMP4
 213         paddd           MSGTMP4, MSGTMP0
 214         sha256msg2      MSGTMP3, MSGTMP0
 215                 pshufd          $0x0E, MSG, MSG
 216                 sha256rnds2     STATE1, STATE0
 217         sha256msg1      MSGTMP3, MSGTMP2
 218 
 219         /* Rounds 32-35 */
 220         movdqa          MSGTMP0, MSG
 221                 paddd           8*16(SHA256CONSTANTS), MSG
 222                 sha256rnds2     STATE0, STATE1
 223         movdqa          MSGTMP0, MSGTMP4
 224         palignr         $4, MSGTMP3, MSGTMP4
 225         paddd           MSGTMP4, MSGTMP1
 226         sha256msg2      MSGTMP0, MSGTMP1
 227                 pshufd          $0x0E, MSG, MSG
 228                 sha256rnds2     STATE1, STATE0
 229         sha256msg1      MSGTMP0, MSGTMP3
 230 
 231         /* Rounds 36-39 */
 232         movdqa          MSGTMP1, MSG
 233                 paddd           9*16(SHA256CONSTANTS), MSG
 234                 sha256rnds2     STATE0, STATE1
 235         movdqa          MSGTMP1, MSGTMP4
 236         palignr         $4, MSGTMP0, MSGTMP4
 237         paddd           MSGTMP4, MSGTMP2
 238         sha256msg2      MSGTMP1, MSGTMP2
 239                 pshufd          $0x0E, MSG, MSG
 240                 sha256rnds2     STATE1, STATE0
 241         sha256msg1      MSGTMP1, MSGTMP0
 242 
 243         /* Rounds 40-43 */
 244         movdqa          MSGTMP2, MSG
 245                 paddd           10*16(SHA256CONSTANTS), MSG
 246                 sha256rnds2     STATE0, STATE1
 247         movdqa          MSGTMP2, MSGTMP4
 248         palignr         $4, MSGTMP1, MSGTMP4
 249         paddd           MSGTMP4, MSGTMP3
 250         sha256msg2      MSGTMP2, MSGTMP3
 251                 pshufd          $0x0E, MSG, MSG
 252                 sha256rnds2     STATE1, STATE0
 253         sha256msg1      MSGTMP2, MSGTMP1
 254 
 255         /* Rounds 44-47 */
 256         movdqa          MSGTMP3, MSG
 257                 paddd           11*16(SHA256CONSTANTS), MSG
 258                 sha256rnds2     STATE0, STATE1
 259         movdqa          MSGTMP3, MSGTMP4
 260         palignr         $4, MSGTMP2, MSGTMP4
 261         paddd           MSGTMP4, MSGTMP0
 262         sha256msg2      MSGTMP3, MSGTMP0
 263                 pshufd          $0x0E, MSG, MSG
 264                 sha256rnds2     STATE1, STATE0
 265         sha256msg1      MSGTMP3, MSGTMP2
 266 
 267         /* Rounds 48-51 */
 268         movdqa          MSGTMP0, MSG
 269                 paddd           12*16(SHA256CONSTANTS), MSG
 270                 sha256rnds2     STATE0, STATE1
 271         movdqa          MSGTMP0, MSGTMP4
 272         palignr         $4, MSGTMP3, MSGTMP4
 273         paddd           MSGTMP4, MSGTMP1
 274         sha256msg2      MSGTMP0, MSGTMP1
 275                 pshufd          $0x0E, MSG, MSG
 276                 sha256rnds2     STATE1, STATE0
 277         sha256msg1      MSGTMP0, MSGTMP3
 278 
 279         /* Rounds 52-55 */
 280         movdqa          MSGTMP1, MSG
 281                 paddd           13*16(SHA256CONSTANTS), MSG
 282                 sha256rnds2     STATE0, STATE1
 283         movdqa          MSGTMP1, MSGTMP4
 284         palignr         $4, MSGTMP0, MSGTMP4
 285         paddd           MSGTMP4, MSGTMP2
 286         sha256msg2      MSGTMP1, MSGTMP2
 287                 pshufd          $0x0E, MSG, MSG
 288                 sha256rnds2     STATE1, STATE0
 289 
 290         /* Rounds 56-59 */
 291         movdqa          MSGTMP2, MSG
 292                 paddd           14*16(SHA256CONSTANTS), MSG
 293                 sha256rnds2     STATE0, STATE1
 294         movdqa          MSGTMP2, MSGTMP4
 295         palignr         $4, MSGTMP1, MSGTMP4
 296         paddd           MSGTMP4, MSGTMP3
 297         sha256msg2      MSGTMP2, MSGTMP3
 298                 pshufd          $0x0E, MSG, MSG
 299                 sha256rnds2     STATE1, STATE0
 300 
 301         /* Rounds 60-63 */
 302         movdqa          MSGTMP3, MSG
 303                 paddd           15*16(SHA256CONSTANTS), MSG
 304                 sha256rnds2     STATE0, STATE1
 305                 pshufd          $0x0E, MSG, MSG
 306                 sha256rnds2     STATE1, STATE0
 307 
 308         /* Add current hash values with previously saved */
 309         paddd           ABEF_SAVE, STATE0
 310         paddd           CDGH_SAVE, STATE1
 311 
 312         /* Increment data pointer and loop if more to process */
 313         add             $64, DATA_PTR
 314         cmp             NUM_BLKS, DATA_PTR
 315         jne             .Lloop0
 316 
 317         /* Write hash values back in the correct order */
 318         pshufd          $0x1B, STATE0,  STATE0          /* FEBA */
 319         pshufd          $0xB1, STATE1,  STATE1          /* DCHG */
 320         movdqa          STATE0, MSGTMP4
 321         pblendw         $0xF0, STATE1,  STATE0          /* DCBA */
 322         palignr         $8, MSGTMP4, STATE1             /* HGFE */
 323 
 324         movdqu          STATE0, 0*16(DIGEST_PTR)
 325         movdqu          STATE1, 1*16(DIGEST_PTR)
 326 
 327 .Ldone_hash:
 328 
 329         ret
 330 ENDPROC(sha256_ni_transform)
 331 
 332 .section        .rodata.cst256.K256, "aM", @progbits, 256
 333 .align 64
 334 K256:
 335         .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
 336         .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
 337         .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
 338         .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
 339         .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
 340         .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
 341         .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
 342         .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
 343         .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
 344         .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
 345         .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
 346         .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
 347         .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
 348         .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
 349         .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
 350         .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
 351 
 352 .section        .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
 353 .align 16
 354 PSHUFFLE_BYTE_FLIP_MASK:
 355         .octa 0x0c0d0e0f08090a0b0405060700010203

/* [<][>][^][v][top][bottom][index][help] */