1/* 2 * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. 3 * 4 * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation. 9 */ 10 11#include <linux/linkage.h> 12#include <asm/assembler.h> 13 14 SHASH .req q0 15 SHASH2 .req q1 16 T1 .req q2 17 T2 .req q3 18 MASK .req q4 19 XL .req q5 20 XM .req q6 21 XH .req q7 22 IN1 .req q7 23 24 SHASH_L .req d0 25 SHASH_H .req d1 26 SHASH2_L .req d2 27 T1_L .req d4 28 MASK_L .req d8 29 XL_L .req d10 30 XL_H .req d11 31 XM_L .req d12 32 XM_H .req d13 33 XH_L .req d14 34 35 .text 36 .fpu crypto-neon-fp-armv8 37 38 /* 39 * void pmull_ghash_update(int blocks, u64 dg[], const char *src, 40 * struct ghash_key const *k, const char *head) 41 */ 42ENTRY(pmull_ghash_update) 43 vld1.64 {SHASH}, [r3] 44 vld1.64 {XL}, [r1] 45 vmov.i8 MASK, #0xe1 46 vext.8 SHASH2, SHASH, SHASH, #8 47 vshl.u64 MASK, MASK, #57 48 veor SHASH2, SHASH2, SHASH 49 50 /* do the head block first, if supplied */ 51 ldr ip, [sp] 52 teq ip, #0 53 beq 0f 54 vld1.64 {T1}, [ip] 55 teq r0, #0 56 b 1f 57 580: vld1.64 {T1}, [r2]! 59 subs r0, r0, #1 60 611: /* multiply XL by SHASH in GF(2^128) */ 62#ifndef CONFIG_CPU_BIG_ENDIAN 63 vrev64.8 T1, T1 64#endif 65 vext.8 T2, XL, XL, #8 66 vext.8 IN1, T1, T1, #8 67 veor T1, T1, T2 68 veor XL, XL, IN1 69 70 vmull.p64 XH, SHASH_H, XL_H @ a1 * b1 71 veor T1, T1, XL 72 vmull.p64 XL, SHASH_L, XL_L @ a0 * b0 73 vmull.p64 XM, SHASH2_L, T1_L @ (a1 + a0)(b1 + b0) 74 75 vext.8 T1, XL, XH, #8 76 veor T2, XL, XH 77 veor XM, XM, T1 78 veor XM, XM, T2 79 vmull.p64 T2, XL_L, MASK_L 80 81 vmov XH_L, XM_H 82 vmov XM_H, XL_L 83 84 veor XL, XM, T2 85 vext.8 T2, XL, XL, #8 86 vmull.p64 XL, XL_L, MASK_L 87 veor T2, T2, XH 88 veor XL, XL, T2 89 90 bne 0b 91 92 vst1.64 {XL}, [r1] 93 bx lr 94ENDPROC(pmull_ghash_update) 95