1
2
3
4
5
6
7
8
9
10 #include <linux/linkage.h>
11
12 #define PASS0_SUMS %xmm0
13 #define PASS1_SUMS %xmm1
14 #define PASS2_SUMS %xmm2
15 #define PASS3_SUMS %xmm3
16 #define K0 %xmm4
17 #define K1 %xmm5
18 #define K2 %xmm6
19 #define K3 %xmm7
20 #define T0 %xmm8
21 #define T1 %xmm9
22 #define T2 %xmm10
23 #define T3 %xmm11
24 #define T4 %xmm12
25 #define T5 %xmm13
26 #define T6 %xmm14
27 #define T7 %xmm15
28 #define KEY %rdi
29 #define MESSAGE %rsi
30 #define MESSAGE_LEN %rdx
31 #define HASH %rcx
32
33 .macro _nh_stride k0, k1, k2, k3, offset
34
35
36 movdqu \offset(MESSAGE), T1
37
38
39 movdqu \offset(KEY), \k3
40
41
42 movdqa T1, T2
43 movdqa T1, T3
44 paddd T1, \k0
45 paddd \k1, T1
46 paddd \k2, T2
47 paddd \k3, T3
48
49
50 pshufd $0x10, \k0, T4
51 pshufd $0x32, \k0, \k0
52 pshufd $0x10, T1, T5
53 pshufd $0x32, T1, T1
54 pshufd $0x10, T2, T6
55 pshufd $0x32, T2, T2
56 pshufd $0x10, T3, T7
57 pshufd $0x32, T3, T3
58 pmuludq T4, \k0
59 pmuludq T5, T1
60 pmuludq T6, T2
61 pmuludq T7, T3
62 paddq \k0, PASS0_SUMS
63 paddq T1, PASS1_SUMS
64 paddq T2, PASS2_SUMS
65 paddq T3, PASS3_SUMS
66 .endm
67
68
69
70
71
72
73
74 ENTRY(nh_sse2)
75
76 movdqu 0x00(KEY), K0
77 movdqu 0x10(KEY), K1
78 movdqu 0x20(KEY), K2
79 add $0x30, KEY
80 pxor PASS0_SUMS, PASS0_SUMS
81 pxor PASS1_SUMS, PASS1_SUMS
82 pxor PASS2_SUMS, PASS2_SUMS
83 pxor PASS3_SUMS, PASS3_SUMS
84
85 sub $0x40, MESSAGE_LEN
86 jl .Lloop4_done
87 .Lloop4:
88 _nh_stride K0, K1, K2, K3, 0x00
89 _nh_stride K1, K2, K3, K0, 0x10
90 _nh_stride K2, K3, K0, K1, 0x20
91 _nh_stride K3, K0, K1, K2, 0x30
92 add $0x40, KEY
93 add $0x40, MESSAGE
94 sub $0x40, MESSAGE_LEN
95 jge .Lloop4
96
97 .Lloop4_done:
98 and $0x3f, MESSAGE_LEN
99 jz .Ldone
100 _nh_stride K0, K1, K2, K3, 0x00
101
102 sub $0x10, MESSAGE_LEN
103 jz .Ldone
104 _nh_stride K1, K2, K3, K0, 0x10
105
106 sub $0x10, MESSAGE_LEN
107 jz .Ldone
108 _nh_stride K2, K3, K0, K1, 0x20
109
110 .Ldone:
111
112 movdqa PASS0_SUMS, T0
113 movdqa PASS2_SUMS, T1
114 punpcklqdq PASS1_SUMS, T0
115 punpcklqdq PASS3_SUMS, T1
116 punpckhqdq PASS1_SUMS, PASS0_SUMS
117 punpckhqdq PASS3_SUMS, PASS2_SUMS
118 paddq PASS0_SUMS, T0
119 paddq PASS2_SUMS, T1
120 movdqu T0, 0x00(HASH)
121 movdqu T1, 0x10(HASH)
122 ret
123 ENDPROC(nh_sse2)