1/*
2 * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
3 *
4 * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13
14	SHASH		.req	q0
15	SHASH2		.req	q1
16	T1		.req	q2
17	T2		.req	q3
18	MASK		.req	q4
19	XL		.req	q5
20	XM		.req	q6
21	XH		.req	q7
22	IN1		.req	q7
23
24	SHASH_L		.req	d0
25	SHASH_H		.req	d1
26	SHASH2_L	.req	d2
27	T1_L		.req	d4
28	MASK_L		.req	d8
29	XL_L		.req	d10
30	XL_H		.req	d11
31	XM_L		.req	d12
32	XM_H		.req	d13
33	XH_L		.req	d14
34
35	.text
36	.fpu		crypto-neon-fp-armv8
37
38	/*
39	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
40	 *			   struct ghash_key const *k, const char *head)
41	 */
42ENTRY(pmull_ghash_update)
43	vld1.64		{SHASH}, [r3]
44	vld1.64		{XL}, [r1]
45	vmov.i8		MASK, #0xe1
46	vext.8		SHASH2, SHASH, SHASH, #8
47	vshl.u64	MASK, MASK, #57
48	veor		SHASH2, SHASH2, SHASH
49
50	/* do the head block first, if supplied */
51	ldr		ip, [sp]
52	teq		ip, #0
53	beq		0f
54	vld1.64		{T1}, [ip]
55	teq		r0, #0
56	b		1f
57
580:	vld1.64		{T1}, [r2]!
59	subs		r0, r0, #1
60
611:	/* multiply XL by SHASH in GF(2^128) */
62#ifndef CONFIG_CPU_BIG_ENDIAN
63	vrev64.8	T1, T1
64#endif
65	vext.8		T2, XL, XL, #8
66	vext.8		IN1, T1, T1, #8
67	veor		T1, T1, T2
68	veor		XL, XL, IN1
69
70	vmull.p64	XH, SHASH_H, XL_H		@ a1 * b1
71	veor		T1, T1, XL
72	vmull.p64	XL, SHASH_L, XL_L		@ a0 * b0
73	vmull.p64	XM, SHASH2_L, T1_L		@ (a1 + a0)(b1 + b0)
74
75	vext.8		T1, XL, XH, #8
76	veor		T2, XL, XH
77	veor		XM, XM, T1
78	veor		XM, XM, T2
79	vmull.p64	T2, XL_L, MASK_L
80
81	vmov		XH_L, XM_H
82	vmov		XM_H, XL_L
83
84	veor		XL, XM, T2
85	vext.8		T2, XL, XL, #8
86	vmull.p64	XL, XL_L, MASK_L
87	veor		T2, T2, XH
88	veor		XL, XL, T2
89
90	bne		0b
91
92	vst1.64		{XL}, [r1]
93	bx		lr
94ENDPROC(pmull_ghash_update)
95